Fixed up front and backend for 32bit mul,div,cmp,shift in mode64
Backend:
 - separated shifts from other alu ops
 - gave {shift, mul, div, cmp} ops a bool to indicate 32|64bit insn
 - fixed and implemented more mode64 cases

Also improved some IR by moving imm's to right arg of binop - backend assumes this.

All integer ppc32 insns now pass switchback tests in 64bit mode.
(ppc64-only insns not yet fully tested)




git-svn-id: svn://svn.valgrind.org/vex/trunk@1498 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host-ppc32/hdefs.c b/priv/host-ppc32/hdefs.c
index 82a2406..0ece9d5 100644
--- a/priv/host-ppc32/hdefs.c
+++ b/priv/host-ppc32/hdefs.c
@@ -573,23 +573,29 @@
    }
 }
 
-HChar* showPPC32AluOp ( PPC32AluOp op, Bool immR, Bool is32Bit ) {
+HChar* showPPC32AluOp ( PPC32AluOp op, Bool immR ) {
    switch (op) {
       case Palu_ADD: return immR ? "addi"  : "add";
       case Palu_SUB: return immR ? "subi"  : "sub";
       case Palu_AND: return immR ? "andi." : "and";
       case Palu_OR:  return immR ? "ori"   : "or";
       case Palu_XOR: return immR ? "xori"  : "xor";
-      case Palu_SHL: return is32Bit ? (immR ? "slwi"  : "slw") : 
-                                      (immR ? "sldi"  : "sld");
-      case Palu_SHR: return is32Bit ? (immR ? "srwi"  : "srw") :
-                                      (immR ? "srdi"  : "srd");
-      case Palu_SAR: return is32Bit ? (immR ? "srawi" : "sraw") :
-                                      (immR ? "sradi" : "srad");
       default: vpanic("showPPC32AluOp");
    }
 }
 
+HChar* showPPC32ShftOp ( PPC32ShftOp op, Bool immR, Bool sz32 ) {
+   switch (op) {
+      case Pshft_SHL: return sz32 ? (immR ? "slwi"  : "slw") : 
+                                    (immR ? "sldi"  : "sld");
+      case Pshft_SHR: return sz32 ? (immR ? "srwi"  : "srw") :
+                                    (immR ? "srdi"  : "srd");
+      case Pshft_SAR: return sz32 ? (immR ? "srawi" : "sraw") :
+                                    (immR ? "sradi" : "srad");
+      default: vpanic("showPPC32ShftOp");
+   }
+}
+
 HChar* showPPC32FpOp ( PPC32FpOp op ) {
    switch (op) {
       case Pfp_ADD:    return "fadd";
@@ -719,6 +725,17 @@
    i->Pin.Alu.srcR = srcR;
    return i;
 }
+PPC32Instr* PPC32Instr_Shft ( PPC32ShftOp op, Bool sz32, 
+                              HReg dst, HReg srcL, PPC32RH* srcR ) {
+   PPC32Instr* i    = LibVEX_Alloc(sizeof(PPC32Instr));
+   i->tag           = Pin_Shft;
+   i->Pin.Shft.op   = op;
+   i->Pin.Shft.sz32 = sz32;
+   i->Pin.Shft.dst  = dst;
+   i->Pin.Shft.srcL = srcL;
+   i->Pin.Shft.srcR = srcR;
+   return i;
+}
 PPC32Instr* PPC32Instr_AddSubC32 ( Bool isAdd, Bool setC,
                                    HReg dst, HReg srcL, HReg srcR ) {
    PPC32Instr* i          = LibVEX_Alloc(sizeof(PPC32Instr));
@@ -730,11 +747,12 @@
    i->Pin.AddSubC32.srcR  = srcR;
    return i;
 }
-PPC32Instr* PPC32Instr_Cmp ( Bool syned, UInt crfD, 
-                             HReg srcL, PPC32RH* srcR ) {
+PPC32Instr* PPC32Instr_Cmp ( Bool syned, Bool sz32, 
+                             UInt crfD, HReg srcL, PPC32RH* srcR ) {
    PPC32Instr* i    = LibVEX_Alloc(sizeof(PPC32Instr));
    i->tag           = Pin_Cmp;
    i->Pin.Cmp.syned = syned;
+   i->Pin.Cmp.sz32  = sz32;
    i->Pin.Cmp.crfD  = crfD;
    i->Pin.Cmp.srcL  = srcL;
    i->Pin.Cmp.srcR  = srcR;
@@ -748,12 +766,13 @@
    i->Pin.Unary32.src = src;
    return i;
 }
-PPC32Instr* PPC32Instr_MulL ( Bool syned, Bool hi, 
+PPC32Instr* PPC32Instr_MulL ( Bool syned, Bool hi, Bool sz32, 
                               HReg dst, HReg srcL, HReg srcR ) {
    PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr));
    i->tag            = Pin_MulL;
    i->Pin.MulL.syned = syned;
    i->Pin.MulL.hi    = hi;
+   i->Pin.MulL.sz32  = sz32;
    i->Pin.MulL.dst   = dst;
    i->Pin.MulL.srcL  = srcL;
    i->Pin.MulL.srcR  = srcR;
@@ -762,10 +781,12 @@
    if (!hi) vassert(!syned);
    return i;
 }
-PPC32Instr* PPC32Instr_Div ( Bool syned, HReg dst, HReg srcL, HReg srcR ) {
+PPC32Instr* PPC32Instr_Div ( Bool syned, Bool sz32,
+                             HReg dst, HReg srcL, HReg srcR ) {
    PPC32Instr* i      = LibVEX_Alloc(sizeof(PPC32Instr));
    i->tag             = Pin_Div;
    i->Pin.Div.syned   = syned;
+   i->Pin.Div.sz32    = sz32;
    i->Pin.Div.dst     = dst;
    i->Pin.Div.srcL    = srcL;
    i->Pin.Div.srcR    = srcR;
@@ -1137,17 +1158,39 @@
          ppHRegPPC32(i->Pin.Alu.dst);
          vex_printf(",");
          ppHRegPPC32(r_srcL);
-      } else {
-         /* generic */
-         vex_printf("%s ", showPPC32AluOp(i->Pin.Alu.op,
-                                toBool(rh_srcR->tag == Prh_Imm),
-                                toBool(hregClass(r_srcL) == HRcInt32)));
+         return;
+      }
+      /* special-case "li" */
+      if (i->Pin.Alu.op == Palu_ADD &&   // addi Rd,0,imm == li Rd,imm
+          rh_srcR->tag == Prh_Imm &&
+          hregNumber(r_srcL) == 0) {
+         vex_printf("li ");
          ppHRegPPC32(i->Pin.Alu.dst);
          vex_printf(",");
-         ppHRegPPC32(r_srcL);
-         vex_printf(",");
          ppPPC32RH(rh_srcR);
+         return;
       }
+      /* generic */
+      vex_printf("%s ", showPPC32AluOp(i->Pin.Alu.op,
+                                       toBool(rh_srcR->tag == Prh_Imm)));
+      ppHRegPPC32(i->Pin.Alu.dst);
+      vex_printf(",");
+      ppHRegPPC32(r_srcL);
+      vex_printf(",");
+      ppPPC32RH(rh_srcR);
+      return;
+   }
+   case Pin_Shft: {
+      HReg     r_srcL  = i->Pin.Shft.srcL;
+      PPC32RH* rh_srcR = i->Pin.Shft.srcR;
+      vex_printf("%s ", showPPC32ShftOp(i->Pin.Shft.op,
+                                        toBool(rh_srcR->tag == Prh_Imm),
+                                        i->Pin.Shft.sz32));
+      ppHRegPPC32(i->Pin.Shft.dst);
+      vex_printf(",");
+      ppHRegPPC32(r_srcL);
+      vex_printf(",");
+      ppPPC32RH(rh_srcR);
       return;
    }
    case Pin_AddSubC32:
@@ -1161,8 +1204,9 @@
       ppHRegPPC32(i->Pin.AddSubC32.srcR);
       return;
    case Pin_Cmp:
-      vex_printf("%s%s %%cr%u,",
+      vex_printf("%s%c%s %%cr%u,",
                  i->Pin.Cmp.syned ? "cmp" : "cmpl",
+                 i->Pin.Cmp.sz32 ? 'w' : 'd',
                  i->Pin.Cmp.srcR->tag == Prh_Imm ? "i" : "",
                  i->Pin.Cmp.crfD);
       ppHRegPPC32(i->Pin.Cmp.srcL);
@@ -1176,8 +1220,9 @@
       ppHRegPPC32(i->Pin.Unary32.src);
       return;
    case Pin_MulL:
-      vex_printf("mul%s%s ",
-                 i->Pin.MulL.hi ? "hw" : "lw",
+      vex_printf("mul%c%c%s ",
+                 i->Pin.MulL.hi ? 'h' : 'l',
+                 i->Pin.MulL.sz32 ? 'w' : 'd',
                  i->Pin.MulL.hi ? (i->Pin.MulL.syned ? "s" : "u") : "");
       ppHRegPPC32(i->Pin.MulL.dst);
       vex_printf(",");
@@ -1186,7 +1231,8 @@
       ppHRegPPC32(i->Pin.MulL.srcR);
       return;
    case Pin_Div:
-      vex_printf("divw%s ",
+      vex_printf("div%c%s ",
+                 i->Pin.Div.sz32 ? 'w' : 'd',
                  i->Pin.Div.syned ? "" : "u");
       ppHRegPPC32(i->Pin.Div.dst);
       vex_printf(",");
@@ -1555,6 +1601,11 @@
       addRegUsage_PPC32RH(u, i->Pin.Alu.srcR);
       addHRegUse(u, HRmWrite, i->Pin.Alu.dst);
       return;
+   case Pin_Shft:
+      addHRegUse(u, HRmRead,  i->Pin.Shft.srcL);
+      addRegUsage_PPC32RH(u,  i->Pin.Shft.srcR);
+      addHRegUse(u, HRmWrite, i->Pin.Shft.dst);
+      return;
    case Pin_AddSubC32:
       addHRegUse(u, HRmWrite, i->Pin.AddSubC32.dst);
       addHRegUse(u, HRmRead, i->Pin.AddSubC32.srcL);
@@ -1800,6 +1851,11 @@
       mapReg(m, &i->Pin.Alu.srcL);
       mapRegs_PPC32RH(m, i->Pin.Alu.srcR);
       return;
+   case Pin_Shft:
+      mapReg(m, &i->Pin.Shft.dst);
+      mapReg(m, &i->Pin.Shft.srcL);
+      mapRegs_PPC32RH(m, i->Pin.Shft.srcR);
+      return;
    case Pin_AddSubC32:
       mapReg(m, &i->Pin.AddSubC32.dst);
       mapReg(m, &i->Pin.AddSubC32.srcL);
@@ -2429,10 +2485,8 @@
       UInt     r_srcL = iregNo(i->Pin.Alu.srcL, mode64);
       UInt     r_srcR = immR ? (-1)/*bogus*/ :
                                iregNo(srcR->Prh.Reg.reg, mode64);
-      Bool  is32BitOp = toBool(hregClass(i->Pin.Alu.srcL) == HRcInt32);
 
       switch (i->Pin.Alu.op) {
-
       case Palu_ADD:
          if (immR) {
             /* addi (PPC32 p350) */
@@ -2490,9 +2544,26 @@
          }
          break;
 
-      case Palu_SHL:
-         if (is32BitOp) {
-            vassert(!mode64);
+      default:
+         goto bad;
+      }
+      goto done;
+   }
+
+   case Pin_Shft: {
+      PPC32RH* srcR   = i->Pin.Shft.srcR;
+      Bool     sz32   = i->Pin.Shft.sz32;
+      Bool     immR   = toBool(srcR->tag == Prh_Imm);
+      UInt     r_dst  = iregNo(i->Pin.Shft.dst, mode64);
+      UInt     r_srcL = iregNo(i->Pin.Shft.srcL, mode64);
+      UInt     r_srcR = immR ? (-1)/*bogus*/ :
+                               iregNo(srcR->Prh.Reg.reg, mode64);
+      if (!mode64)
+         vassert(sz32);
+
+      switch (i->Pin.Shft.op) {
+      case Pshft_SHL:
+         if (sz32) {
             if (immR) {
                /* rd = rs << n, 1 <= n <= 31
                   is
@@ -2507,7 +2578,6 @@
                p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 24, 0);
             }
          } else {
-            vassert(mode64);
             if (immR) {
                /* rd = rs << n, 1 <= n <= 63
                   is
@@ -2524,10 +2594,9 @@
          }
          break;
 
-      case Palu_SHR:
-         if (is32BitOp) {
-            vassert(!mode64);
-            if (immR) {
+      case Pshft_SHR:
+         if (sz32) {
+             if (immR) {
                /* rd = rs >>u n, 1 <= n <= 31
                   is
                   rlwinm rd,rs,32-n,n,31  (PPC32 p501)
@@ -2541,7 +2610,6 @@
                p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 536, 0);
             }
          } else {
-            vassert(mode64);
             if (immR) {
                /* rd = rs >>u n, 1 <= n <= 63
                   is
@@ -2558,9 +2626,8 @@
          }
          break;
 
-      case Palu_SAR:
-         if (is32BitOp) {
-            vassert(!mode64);
+      case Pshft_SAR:
+         if (sz32) {
             if (immR) {
                /* srawi (PPC32 p507) */
                UInt n = srcR->Prh.Imm.imm16;
@@ -2572,7 +2639,6 @@
                p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 792, 0);
             }
          } else {
-            vassert(mode64);
             if (immR) {
                /* sradi (PPC64 p571) */
                UInt n = srcR->Prh.Imm.imm16;
@@ -2616,29 +2682,34 @@
 
    case Pin_Cmp: {
       Bool syned  = i->Pin.Cmp.syned;
+      Bool sz32   = i->Pin.Cmp.sz32;
       UInt fld1   = i->Pin.Cmp.crfD << 2;
       UInt r_srcL = iregNo(i->Pin.Cmp.srcL, mode64);
       UInt r_srcR, imm_srcR;
       PPC32RH* srcR = i->Pin.Cmp.srcR;
 
+      if (!mode64)        // cmp double word invalid for mode32
+         vassert(sz32);      
+      else if (!sz32)     // mode64 && cmp64: set L=1
+         fld1 |= 1;
+ 
       switch (srcR->tag) {
       case Prh_Imm:
-         /* cmpi  (signed)   (PPC32 p368)  or 
-            cmpli (unsigned) (PPC32 p370) */
+         vassert(syned == srcR->Prh.Imm.syned);
          imm_srcR = srcR->Prh.Imm.imm16;
-         if (syned) {
-            vassert(srcR->Prh.Imm.syned);
+         if (syned) {  // cmpw/di  (signed)   (PPC32 p368)
             vassert(imm_srcR != 0x8000);
-         } else {
-            vassert(!srcR->Prh.Imm.syned);
+            p = mkFormD(p, 11, fld1, r_srcL, imm_srcR);
+         } else {      // cmplw/di (unsigned) (PPC32 p370)
+            p = mkFormD(p, 10, fld1, r_srcL, imm_srcR);
          }
-         p = mkFormD(p, syned ? 11 : 10, fld1, r_srcL, imm_srcR);
          break;
       case Prh_Reg:
-         /* cmpi  (signed)   (PPC32 p367)  or 
-            cmpli (unsigned) (PPC32 p379) */
          r_srcR = iregNo(srcR->Prh.Reg.reg, mode64);
-         p = mkFormX(p, 31, fld1, r_srcL, r_srcR, syned ? 0 : 32, 0);
+         if (syned)  // cmpwi  (signed)   (PPC32 p367)
+            p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 0, 0);
+         else        // cmplwi (unsigned) (PPC32 p379)
+            p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 32, 0);
          break;
       default: 
          goto bad;
@@ -2667,30 +2738,33 @@
 
    case Pin_MulL: {
       Bool syned  = i->Pin.MulL.syned;
+      Bool sz32   = i->Pin.MulL.sz32;
       UInt r_dst  = iregNo(i->Pin.MulL.dst, mode64);
       UInt r_srcL = iregNo(i->Pin.MulL.srcL, mode64);
       UInt r_srcR = iregNo(i->Pin.MulL.srcR, mode64);
-      Bool is32BitOp = toBool(hregClass(i->Pin.MulL.dst) == HRcInt32);
+
+      if (!mode64)
+         vassert(sz32);
 
       if (i->Pin.MulL.hi) {
          // mul hi words, must consider sign
-         if (syned) {
-            if (is32BitOp)  // mulhw r_dst,r_srcL,r_srcR
+         if (sz32) {
+            if (syned)  // mulhw r_dst,r_srcL,r_srcR
                p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 75, 0);
-            else            // mulhd r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 73, 0);
-         } else {
-            if (is32BitOp)  // mulhwu r_dst,r_srcL,r_srcR
+            else        // mulhwu r_dst,r_srcL,r_srcR
                p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 11, 0);
-            else            // mulhdu r_dst,r_srcL,r_srcR
+         } else {
+            if (syned)  // mulhd r_dst,r_srcL,r_srcR
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 73, 0);
+            else        // mulhdu r_dst,r_srcL,r_srcR
                p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 9, 0);
          }
       } else {
          // mul low word, sign is irrelevant
          vassert(!i->Pin.MulL.syned);
-         if (is32BitOp)     // mullw r_dst,r_srcL,r_srcR
+         if (sz32)      // mullw r_dst,r_srcL,r_srcR
             p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 235, 0);
-         else               // mulld r_dst,r_srcL,r_srcR
+         else           // mulld r_dst,r_srcL,r_srcR
             p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 233, 0);
       }
       goto done;
@@ -2698,20 +2772,23 @@
 
    case Pin_Div: {
       Bool syned  = i->Pin.Div.syned;
+      Bool sz32   = i->Pin.Div.sz32;
       UInt r_dst  = iregNo(i->Pin.Div.dst, mode64);
       UInt r_srcL = iregNo(i->Pin.Div.srcL, mode64);
       UInt r_srcR = iregNo(i->Pin.Div.srcR, mode64);
-      Bool is32BitOp = toBool(hregClass(i->Pin.Div.dst) == HRcInt32);
 
-      if (syned == True) {
-         if (is32BitOp)  // divw r_dst,r_srcL,r_srcR
+      if (!mode64)
+         vassert(sz32);
+
+      if (sz32) {
+         if (syned)  // divw r_dst,r_srcL,r_srcR
             p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 491, 0);
-         else
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 489, 0);
-      } else {
-         if (is32BitOp)  // divwu r_dst,r_srcL,r_srcR
+         else        // divwu r_dst,r_srcL,r_srcR
             p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 459, 0);
-         else
+      } else {
+         if (syned)  // divd r_dst,r_srcL,r_srcR
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 489, 0);
+         else        // divdu r_dst,r_srcL,r_srcR
             p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 457, 0);
       }
       goto done;
diff --git a/priv/host-ppc32/hdefs.h b/priv/host-ppc32/hdefs.h
index 6fd011f..686c2f8 100644
--- a/priv/host-ppc32/hdefs.h
+++ b/priv/host-ppc32/hdefs.h
@@ -339,14 +339,26 @@
       Palu_INVALID,
       Palu_ADD, Palu_SUB,
       Palu_AND, Palu_OR, Palu_XOR,
-      Palu_SHL, Palu_SHR, Palu_SAR, 
    }
    PPC32AluOp;
 
 extern 
 HChar* showPPC32AluOp ( PPC32AluOp, 
-                        Bool /* is the 2nd operand an immediate? */,
-                        Bool /* is this a 32bit or 64bit op? */ );
+                        Bool /* is the 2nd operand an immediate? */);
+
+
+/* --------- */
+typedef 
+   enum {
+      Pshft_INVALID,
+      Pshft_SHL, Pshft_SHR, Pshft_SAR, 
+   }
+   PPC32ShftOp;
+
+extern 
+HChar* showPPC32ShftOp ( PPC32ShftOp, 
+                         Bool /* is the 2nd operand an immediate? */,
+                         Bool /* is this a 32bit or 64bit op? */ );
 
 
 /* --------- */
@@ -427,7 +439,8 @@
 typedef
    enum {
       Pin_LI,         /* load word (32/64-bit) immediate (fake insn) */
-      Pin_Alu,        /* word add/sub/and/or/xor/shl/shr/sar */
+      Pin_Alu,        /* word add/sub/and/or/xor */
+      Pin_Shft,       /* word shl/shr/sar */
       Pin_AddSubC32,  /* 32-bit add/sub with read/write carry */
       Pin_Cmp,        /* word compare */
       Pin_Unary,      /* not, neg, clz */
@@ -485,7 +498,7 @@
             HReg dst;
             ULong imm64;
          } LI;
-         /* Integer add/sub/and/or/xor/shl/shr/sar.  Limitations:
+         /* Integer add/sub/and/or/xor.  Limitations:
             - For add, the immediate, if it exists, is a signed 16.
             - For sub, the immediate, if it exists, is a signed 16
               which may not be -32768, since no such instruction 
@@ -493,8 +506,6 @@
               that is not possible.
             - For and/or/xor,  the immediate, if it exists, 
               is an unsigned 16.
-            - For shr/shr/sar, the immediate, if it exists,
-              is a signed 5-bit value between 1 and 31 inclusive.
          */
          struct {
             PPC32AluOp op;
@@ -502,6 +513,17 @@
             HReg       srcL;
             PPC32RH*   srcR;
          } Alu;
+         /* Integer shl/shr/sar.
+            Limitations: the immediate, if it exists,
+            is a signed 5-bit value between 1 and 31 inclusive.
+         */
+         struct {
+            PPC32ShftOp op;
+            Bool        sz32;   /* mode64 has both 32 and 64bit shft */
+            HReg        dst;
+            HReg        srcL;
+            PPC32RH*    srcR;
+         } Shft;
          /*  */
          struct {
             Bool isAdd;  /* else sub */
@@ -514,6 +536,7 @@
             else it is an unsigned 16. */
          struct {
             Bool     syned;
+            Bool     sz32;    /* mode64 has both 32 and 64bit cmp */
             UInt     crfD;
             HReg     srcL;
             PPC32RH* srcR;
@@ -527,6 +550,7 @@
          struct {
             Bool syned;  /* meaningless if hi32==False */
             Bool hi;     /* False=>low, True=>high */
+            Bool sz32;   /* mode64 has both 32 & 64bit mull */
             HReg dst;
             HReg srcL;
             HReg srcR;
@@ -534,6 +558,7 @@
          /* ppc32 div/divu instruction. */
          struct {
             Bool syned;
+            Bool sz32;   /* mode64 has both 32 & 64bit div */
             HReg dst;
             HReg srcL;
             HReg srcR;
@@ -564,14 +589,14 @@
          } CMov;
          /* Sign/Zero extending loads.  Dst size is always 32 bits. */
          struct {
-            UChar       sz; /* 1|2|4 */
+            UChar       sz; /* 1|2|4|8 */
             Bool        syned;
             HReg        dst;
             PPC32AMode* src;
          } Load;
          /* 32/16/8 bit stores */
          struct {
-            UChar       sz; /* 1|2|4 */
+            UChar       sz; /* 1|2|4|8 */
             PPC32AMode* dst;
             HReg        src;
          } Store;
@@ -734,11 +759,12 @@
 
 extern PPC32Instr* PPC32Instr_LI         ( HReg, ULong, Bool );
 extern PPC32Instr* PPC32Instr_Alu        ( PPC32AluOp, HReg, HReg, PPC32RH* );
+extern PPC32Instr* PPC32Instr_Shft       ( PPC32AluOp, Bool sz32, HReg, HReg, PPC32RH* );
 extern PPC32Instr* PPC32Instr_AddSubC32  ( Bool, Bool, HReg, HReg, HReg );
-extern PPC32Instr* PPC32Instr_Cmp        ( Bool,       UInt, HReg, PPC32RH* );
+extern PPC32Instr* PPC32Instr_Cmp        ( Bool, Bool, UInt, HReg, PPC32RH* );
 extern PPC32Instr* PPC32Instr_Unary      ( PPC32UnaryOp op, HReg dst, HReg src );
-extern PPC32Instr* PPC32Instr_MulL       ( Bool syned, Bool hi32, HReg, HReg, HReg );
-extern PPC32Instr* PPC32Instr_Div        ( Bool syned, HReg dst, HReg srcL, HReg srcR );
+extern PPC32Instr* PPC32Instr_MulL       ( Bool syned, Bool hi32, Bool sz32, HReg, HReg, HReg );
+extern PPC32Instr* PPC32Instr_Div        ( Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR );
 extern PPC32Instr* PPC32Instr_Call       ( PPC32CondCode, Addr64, UInt );
 extern PPC32Instr* PPC32Instr_Goto       ( IRJumpKind, PPC32CondCode cond, PPC32RI* dst );
 extern PPC32Instr* PPC32Instr_CMov       ( PPC32CondCode, HReg dst, PPC32RI* src );
diff --git a/priv/host-ppc32/isel.c b/priv/host-ppc32/isel.c
index ecd8b10..b5f8d84 100644
--- a/priv/host-ppc32/isel.c
+++ b/priv/host-ppc32/isel.c
@@ -767,8 +767,8 @@
       PPC32Instr_Alu(Palu_AND, r_rmIR, r_rmIR, PPC32RH_Imm(False,3)));
 
    // r_rmPPC32 = XOR( r_rmIR, (r_rmIR << 1) & 2)
-   addInstr(env, 
-      PPC32Instr_Alu(Palu_SHL, r_tmp, r_rmIR, PPC32RH_Imm(False,1)));
+   addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                 r_tmp, r_rmIR, PPC32RH_Imm(False,1)));
    addInstr(env, 
       PPC32Instr_Alu(Palu_AND, r_tmp, r_tmp, PPC32RH_Imm(False,2)));
    addInstr(env, 
@@ -1021,6 +1021,7 @@
    /* --------- BINARY OP --------- */
    case Iex_Binop: {
       PPC32AluOp   aluOp;
+      PPC32ShftOp  shftOp;
 
 //..       /* Pattern: Sub32(0,x) */
 //..       if (e->Iex.Binop.op == Iop_Sub32 && isZero32(e->Iex.Binop.arg1)) {
@@ -1043,12 +1044,6 @@
          aluOp = Palu_OR; break;
       case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
          aluOp = Palu_XOR; break;
-      case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
-         aluOp = Palu_SHL; break;
-      case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
-         aluOp = Palu_SHR; break;
-      case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
-         aluOp = Palu_SAR; break;
       default:
          aluOp = Palu_INVALID; break;
       }
@@ -1068,49 +1063,86 @@
             ri_srcR = iselIntExpr_RH(env, False/*signed*/,
                                      e->Iex.Binop.arg2);
             break;
-         case Palu_SHL: case Palu_SHR: case Palu_SAR:
+         default:
+            vpanic("iselIntExpr_R_wrk-aluOp-arg2");
+         }
+         addInstr(env, PPC32Instr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
+         return r_dst;
+      }
+
+      /* a shift? */
+      switch (e->Iex.Binop.op) {
+      case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
+         shftOp = Pshft_SHL; break;
+      case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
+         shftOp = Pshft_SHR; break;
+      case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
+         shftOp = Pshft_SAR; break;
+      default:
+         shftOp = Pshft_INVALID; break;
+      }
+      /* we assume any literal values are on the second operand. */
+      if (shftOp != Pshft_INVALID) {
+         HReg     r_dst   = newVRegI(env);
+         HReg     r_srcL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         PPC32RH* ri_srcR = NULL;
+         /* get right arg into an RH, in the appropriate way */
+         switch (shftOp) {
+         case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
             if (!mode64)
                ri_srcR = iselIntExpr_RH5u(env, e->Iex.Binop.arg2);
             else
                ri_srcR = iselIntExpr_RH6u(env, e->Iex.Binop.arg2);
             break;
          default:
-            vpanic("iselIntExpr_R_wrk-aluOp-arg2");
+            vpanic("iselIntExpr_R_wrk-shftOp-arg2");
          }
          /* widen the left arg if needed */
-         if ((aluOp == Palu_SHR || aluOp == Palu_SAR)) {
-            if (!mode64 && (ty == Ity_I8 || ty == Ity_I16)) {
+         if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
+            if (ty == Ity_I8 || ty == Ity_I16) {
                PPC32RH* amt = PPC32RH_Imm(False, toUShort(ty == Ity_I8 ? 24 : 16));
                HReg     tmp = newVRegI(env);
-               addInstr(env, PPC32Instr_Alu(Palu_SHL, tmp, r_srcL, amt));
-               addInstr(env, PPC32Instr_Alu(aluOp,    tmp, tmp, amt));
+               addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                             tmp, r_srcL, amt));
+               addInstr(env, PPC32Instr_Shft(shftOp,    True/*32bit shift*/,
+                                             tmp, tmp,    amt));
                r_srcL = tmp;
                vassert(0); /* AWAITING TEST CASE */
             }
-            if (mode64 && (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32)) {
-               PPC32RH* amt = PPC32RH_Imm(False, toUShort(ty == Ity_I8  ? 56 :
-                                                          ty == Ity_I16 ? 48 : 32));
-               HReg     tmp = newVRegI(env);
-               addInstr(env, PPC32Instr_Alu(Palu_SHL, tmp, r_srcL, amt));
-               addInstr(env, PPC32Instr_Alu(aluOp,    tmp, tmp, amt));
-               r_srcL = tmp;
-            }
          }
-         addInstr(env, PPC32Instr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
+         /* Only 64 expressions need 64bit shifts,
+            32bit shifts are fine for all others */
+         if (ty == Ity_I64) {
+            vassert(mode64);
+            addInstr(env, PPC32Instr_Shft(shftOp, False/*64bit shift*/,
+                                          r_dst, r_srcL, ri_srcR));
+         } else {
+            addInstr(env, PPC32Instr_Shft(shftOp, True/*32bit shift*/,
+                                          r_dst, r_srcL, ri_srcR));
+         }
          return r_dst;
       }
 
       /* How about a div? */
       if (e->Iex.Binop.op == Iop_DivS32 || 
-          e->Iex.Binop.op == Iop_DivU32 ||
-          e->Iex.Binop.op == Iop_DivS64 || 
-          e->Iex.Binop.op == Iop_DivU64) {
+          e->Iex.Binop.op == Iop_DivU32) {
+         Bool syned  = toBool(e->Iex.Binop.op == Iop_DivS32);
          HReg r_dst  = newVRegI(env);
          HReg r_srcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
          HReg r_srcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
-         Bool syned = toBool(e->Iex.Binop.op == Iop_DivS32 ||
-                             e->Iex.Binop.op == Iop_DivS64);
-         addInstr(env, PPC32Instr_Div(syned, r_dst, r_srcL, r_srcR));
+         addInstr(env, PPC32Instr_Div(syned, True/*32bit div*/,
+                                      r_dst, r_srcL, r_srcR));
+         return r_dst;
+      }
+      if (e->Iex.Binop.op == Iop_DivS64 || 
+          e->Iex.Binop.op == Iop_DivU64) {
+         Bool syned  = toBool(e->Iex.Binop.op == Iop_DivS64);
+         HReg r_dst  = newVRegI(env);
+         HReg r_srcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg r_srcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         vassert(mode64);
+         addInstr(env, PPC32Instr_Div(syned, False/*64bit div*/,
+                                      r_dst, r_srcL, r_srcR));
          return r_dst;
       }
 
@@ -1119,25 +1151,61 @@
           e->Iex.Binop.op == Iop_Mul32 ||
           e->Iex.Binop.op == Iop_Mul64) {
          Bool syned       = False;
+         Bool sz32        = (e->Iex.Binop.op != Iop_Mul64);
          HReg r_dst       = newVRegI(env);
          HReg r_srcL      = iselIntExpr_R(env, e->Iex.Binop.arg1);
          HReg r_srcR      = iselIntExpr_R(env, e->Iex.Binop.arg2);
-         addInstr(env, PPC32Instr_MulL(syned, False/*lo32*/, 
+         addInstr(env, PPC32Instr_MulL(syned, False/*lo32*/, sz32,
                                        r_dst, r_srcL, r_srcR));
          return r_dst;
       }      
 
+      /* 32 x 32 -> 64 multiply */
+      if (e->Iex.Binop.op == Iop_MullU32 ||
+          e->Iex.Binop.op == Iop_MullS32) {
+         HReg tLo    = newVRegI(env);
+         HReg tHi    = newVRegI(env);
+         HReg r_dst  = newVRegI(env);
+         Bool syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
+         HReg r_srcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg r_srcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         vassert(mode64);
+         addInstr(env, PPC32Instr_MulL(False/*signedness irrelevant*/, 
+                                       False/*lo32*/, True/*32bit mul*/,
+                                       tLo, r_srcL, r_srcR));
+         addInstr(env, PPC32Instr_MulL(syned,
+                                       True/*hi32*/, True/*32bit mul*/,
+                                       tHi, r_srcL, r_srcR));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, False/*64bit shift*/,
+                                       r_dst, tHi, PPC32RH_Imm(False,32)));
+         addInstr(env, PPC32Instr_Alu(Palu_OR, r_dst, r_dst, PPC32RH_Reg(tLo)));
+         return r_dst;
+      }
+
       /* El-mutanto 3-way compare? */
       if (e->Iex.Binop.op == Iop_CmpORD32S ||
-          e->Iex.Binop.op == Iop_CmpORD32U ||
-          e->Iex.Binop.op == Iop_CmpORD64S ||
-          e->Iex.Binop.op == Iop_CmpORD64U) {
-         Bool     syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S ||
-                                 e->Iex.Binop.op == Iop_CmpORD64S);
+          e->Iex.Binop.op == Iop_CmpORD32U) {
+         Bool     syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
          HReg     dst   = newVRegI(env);
          HReg     srcL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
          PPC32RH* srcR  = iselIntExpr_RH(env, syned, e->Iex.Binop.arg2);
-         addInstr(env, PPC32Instr_Cmp(syned, /*cr*/7, srcL, srcR));
+         addInstr(env, PPC32Instr_Cmp(syned, True/*32bit cmp*/,
+                                      7/*cr*/, srcL, srcR));
+         addInstr(env, PPC32Instr_MfCR(dst));
+         addInstr(env, PPC32Instr_Alu(Palu_AND, dst, dst,
+                                      PPC32RH_Imm(False,7<<1)));
+         return dst;
+      }
+
+      if (e->Iex.Binop.op == Iop_CmpORD64S ||
+          e->Iex.Binop.op == Iop_CmpORD64U) {
+         Bool     syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
+         HReg     dst   = newVRegI(env);
+         HReg     srcL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         PPC32RH* srcR  = iselIntExpr_RH(env, syned, e->Iex.Binop.arg2);
+         vassert(mode64);
+         addInstr(env, PPC32Instr_Cmp(syned, False/*64bit cmp*/,
+                                      7/*cr*/, srcL, srcR));
          addInstr(env, PPC32Instr_MfCR(dst));
          addInstr(env, PPC32Instr_Alu(Palu_AND, dst, dst,
                                       PPC32RH_Imm(False,7<<1)));
@@ -1217,18 +1285,22 @@
          */
 
          // r_ccIR_b0 = r_ccPPC32[0] | r_ccPPC32[3]
-         addInstr(env, PPC32Instr_Alu(Palu_SHR, r_ccIR_b0, r_ccPPC32, PPC32RH_Imm(False,0x3)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHR, True/*32bit shift*/,
+                                       r_ccIR_b0, r_ccPPC32, PPC32RH_Imm(False,0x3)));
          addInstr(env, PPC32Instr_Alu(Palu_OR,  r_ccIR_b0, r_ccPPC32, PPC32RH_Reg(r_ccIR_b0)));
          addInstr(env, PPC32Instr_Alu(Palu_AND, r_ccIR_b0, r_ccIR_b0, PPC32RH_Imm(False,0x1)));
          
          // r_ccIR_b2 = r_ccPPC32[0]
-         addInstr(env, PPC32Instr_Alu(Palu_SHL, r_ccIR_b2, r_ccPPC32, PPC32RH_Imm(False,0x2)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                       r_ccIR_b2, r_ccPPC32, PPC32RH_Imm(False,0x2)));
          addInstr(env, PPC32Instr_Alu(Palu_AND, r_ccIR_b2, r_ccIR_b2, PPC32RH_Imm(False,0x4)));
 
          // r_ccIR_b6 = r_ccPPC32[0] | r_ccPPC32[1]
-         addInstr(env, PPC32Instr_Alu(Palu_SHR, r_ccIR_b6, r_ccPPC32, PPC32RH_Imm(False,0x1)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHR, True/*32bit shift*/,
+                                       r_ccIR_b6, r_ccPPC32, PPC32RH_Imm(False,0x1)));
          addInstr(env, PPC32Instr_Alu(Palu_OR,  r_ccIR_b6, r_ccPPC32, PPC32RH_Reg(r_ccIR_b6)));
-         addInstr(env, PPC32Instr_Alu(Palu_SHL, r_ccIR_b6, r_ccIR_b6, PPC32RH_Imm(False,0x6)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                       r_ccIR_b6, r_ccIR_b6, PPC32RH_Imm(False,0x6)));
          addInstr(env, PPC32Instr_Alu(Palu_AND, r_ccIR_b6, r_ccIR_b6, PPC32RH_Imm(False,0x40)));
 
          // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
@@ -1322,27 +1394,36 @@
          HReg r_dst = newVRegI(env);
          HReg r_src = iselIntExpr_R(env, e->Iex.Unop.arg);
          vassert(mode64);
-         addInstr(env, PPC32Instr_Alu(Palu_SHL, r_dst, r_src, 
-                                                  PPC32RH_Imm(False,32)));
-         addInstr(env, PPC32Instr_Alu(Palu_SHR, r_dst, r_dst, 
-                                                  PPC32RH_Imm(False,32)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, False/*64bit shift*/,
+                                       r_dst, r_src, PPC32RH_Imm(False,32)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHR, False/*64bit shift*/,
+                                       r_dst, r_dst, PPC32RH_Imm(False,32)));
          return r_dst;
       }
       case Iop_8Sto16:
       case Iop_8Sto32:
-      case Iop_16Sto32:
+      case Iop_16Sto32: {
+         HReg   r_dst = newVRegI(env);
+         HReg   r_src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         UShort amt   = toUShort(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                       r_dst, r_src, PPC32RH_Imm(False,amt)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SAR, True/*32bit shift*/,
+                                       r_dst, r_dst, PPC32RH_Imm(False,amt)));
+         return r_dst;
+      }
+      case Iop_8Sto64:
       case Iop_16Sto64:
       case Iop_32Sto64: {
          HReg   r_dst = newVRegI(env);
          HReg   r_src = iselIntExpr_R(env, e->Iex.Unop.arg);
-         UShort amt   = toUShort(e->Iex.Unop.op==Iop_16Sto64 ? 48 :
-                                 e->Iex.Unop.op==Iop_32Sto64 ? 32 :
-                                 e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
-         vassert(amt<32 || mode64);
-         addInstr(env, PPC32Instr_Alu(Palu_SHL, r_dst, r_src, 
-                                                  PPC32RH_Imm(False,amt)));
-         addInstr(env, PPC32Instr_Alu(Palu_SAR, r_dst, r_dst, 
-                                                  PPC32RH_Imm(False,amt)));
+         UShort amt   = toUShort(e->Iex.Unop.op==Iop_8Sto64  ? 56 :
+                                 e->Iex.Unop.op==Iop_16Sto64 ? 48 : 32);
+         vassert(mode64);
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, False/*64bit shift*/,
+                                       r_dst, r_src, PPC32RH_Imm(False,amt)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SAR, False/*64bit shift*/,
+                                       r_dst, r_dst, PPC32RH_Imm(False,amt)));
          return r_dst;
       }
       case Iop_Not8:
@@ -1362,8 +1443,8 @@
          } else {
             HReg   r_dst = newVRegI(env);
             HReg   r_src = iselIntExpr_R(env, e->Iex.Unop.arg);
-            addInstr(env, PPC32Instr_Alu(Palu_SHR, r_dst, r_src, 
-                                         PPC32RH_Imm(False,32)));
+            addInstr(env, PPC32Instr_Shft(Pshft_SHR, False/*64bit shift*/,
+                                         r_dst, r_src, PPC32RH_Imm(False,32)));
             return r_dst;
          }
       }
@@ -1421,10 +1502,16 @@
          HReg   r_dst = newVRegI(env);
          HReg   r_src = iselIntExpr_R(env, e->Iex.Unop.arg);
          UShort shift = toUShort(e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16);
-         addInstr(env, PPC32Instr_Alu(Palu_SHR, r_dst, r_src, 
-                                      PPC32RH_Imm(False,shift)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHR, True/*32bit shift*/,
+                                       r_dst, r_src, PPC32RH_Imm(False,shift)));
          return r_dst;
       }
+      case Iop_128HIto64: {
+         HReg rHi, rLo;
+         vassert(mode64);
+         iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+         return rHi; /* and abandon rLo .. poor wee thing :-) */
+      }
       case Iop_128to64: {
          vassert(mode64);
          HReg rHi, rLo;
@@ -1445,10 +1532,10 @@
          HReg          r_dst = newVRegI(env);
          PPC32CondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
          addInstr(env, PPC32Instr_Set32(cond,r_dst));
-         addInstr(env, PPC32Instr_Alu(Palu_SHL, r_dst, r_dst, 
-                                                  PPC32RH_Imm(False,31)));
-         addInstr(env, PPC32Instr_Alu(Palu_SAR, r_dst, r_dst, 
-                                                  PPC32RH_Imm(False,31)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                       r_dst, r_dst, PPC32RH_Imm(False,31)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SAR, True/*32bit shift*/,
+                                       r_dst, r_dst, PPC32RH_Imm(False,31)));
          return r_dst;
       }
 
@@ -1584,7 +1671,8 @@
          HReg     r_tmp  = newVRegI(env);
          addInstr(env, mk_iMOVds_RR(r_dst,rX));
          addInstr(env, PPC32Instr_Alu(Palu_AND, r_tmp, r_cond, PPC32RH_Imm(False,0xFF)));
-         addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, 7/*cr*/, r_tmp, PPC32RH_Imm(False,0)));
+         addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                      7/*cr*/, r_tmp, PPC32RH_Imm(False,0)));
          addInstr(env, PPC32Instr_CMov(cc,r_dst,r0));
          return r_dst;
       }
@@ -1917,8 +2005,8 @@
       // Make a compare that will always be true:
       HReg r_zero = newVRegI(env);
       addInstr(env, PPC32Instr_LI(r_zero, 0, mode64));
-      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, /*cr*/7, 
-                                   r_zero, PPC32RH_Reg(r_zero)));
+      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                   7/*cr*/, r_zero, PPC32RH_Reg(r_zero)));
       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
    }
 
@@ -1949,15 +2037,14 @@
 //..    }
 
    /* 32to1 */
-   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
+   if (e->tag == Iex_Unop &&
+       (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
       HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
       HReg tmp = newVRegI(env);
       /* could do better, probably -- andi. */
-      addInstr(env, PPC32Instr_Alu(
-                       Palu_AND, tmp, src, PPC32RH_Imm(False,1)));
-      addInstr(env, PPC32Instr_Cmp(
-                       False/*unsigned*/, 7/*cr*/, 
-                       tmp, PPC32RH_Imm(False,1)));
+      addInstr(env, PPC32Instr_Alu(Palu_AND, tmp, src, PPC32RH_Imm(False,1)));
+      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                   7/*cr*/, tmp, PPC32RH_Imm(False,1)));
       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
    }
 
@@ -1969,9 +2056,10 @@
        && e->Iex.Unop.op == Iop_CmpNEZ8) {
       HReg r_32 = iselIntExpr_R(env, e->Iex.Unop.arg);
       HReg r_l  = newVRegI(env);
-      addInstr(env, PPC32Instr_Alu(Palu_AND, r_l, r_32, PPC32RH_Imm(False,0xFF)));
-      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, 7/*cr*/, 
-                                   r_l, PPC32RH_Imm(False,0)));
+      addInstr(env, PPC32Instr_Alu(Palu_AND, r_l, r_32,
+                                   PPC32RH_Imm(False,0xFF)));
+      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                   7/*cr*/, r_l, PPC32RH_Imm(False,0)));
       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
    }
 
@@ -1981,7 +2069,8 @@
    if (e->tag == Iex_Unop
        && e->Iex.Unop.op == Iop_CmpNEZ32) {
       HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
-      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, 7, r1, PPC32RH_Imm(False,0)));
+      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                   7/*cr*/, r1, PPC32RH_Imm(False,0)));
       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
    }
 
@@ -2040,15 +2129,12 @@
            || e->Iex.Binop.op == Iop_CmpLT32U
            || e->Iex.Binop.op == Iop_CmpLE32S
            || e->Iex.Binop.op == Iop_CmpLE32U)) {
-      PPC32RH* ri2;
-      HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
-      Bool syned = False;
-      if (e->Iex.Binop.op == Iop_CmpLT32S ||
-          e->Iex.Binop.op == Iop_CmpLE32S) {
-         syned = True;
-      }
-      ri2 = iselIntExpr_RH(env, syned, e->Iex.Binop.arg2);
-      addInstr(env, PPC32Instr_Cmp(syned,7,r1,ri2));
+      Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
+                    e->Iex.Binop.op == Iop_CmpLE32S);
+      HReg     r1  = iselIntExpr_R(env, e->Iex.Binop.arg1);
+      PPC32RH* ri2 = iselIntExpr_RH(env, syned, e->Iex.Binop.arg2);
+      addInstr(env, PPC32Instr_Cmp(syned, True/*32bit cmp*/,
+                                   7/*cr*/, r1, ri2));
 
       switch (e->Iex.Binop.op) {
       case Iop_CmpEQ32:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
@@ -2069,15 +2155,13 @@
            || e->Iex.Binop.op == Iop_CmpLT64U
            || e->Iex.Binop.op == Iop_CmpLE64S
            || e->Iex.Binop.op == Iop_CmpLE64U)) {
-      PPC32RH* ri2;
-      HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
-      Bool syned = False;
-      if (e->Iex.Binop.op == Iop_CmpLT64S ||
-          e->Iex.Binop.op == Iop_CmpLE64S) {
-         syned = True;
-      }
-      ri2 = iselIntExpr_RH(env, syned, e->Iex.Binop.arg2);
-      addInstr(env, PPC32Instr_Cmp(syned,7,r1,ri2));
+      Bool   syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
+                      e->Iex.Binop.op == Iop_CmpLE64S);
+      HReg      r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+      PPC32RH* ri2 = iselIntExpr_RH(env, syned, e->Iex.Binop.arg2);
+      vassert(mode64);
+      addInstr(env, PPC32Instr_Cmp(syned, False/*64bit cmp*/,
+                                   7/*cr*/, r1, ri2));
 
       switch (e->Iex.Binop.op) {
       case Iop_CmpEQ64:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
@@ -2147,11 +2231,13 @@
          iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
          addInstr(env, mk_iMOVds_RR(tmp, lo));
          addInstr(env, PPC32Instr_Alu(Palu_OR, tmp, tmp, PPC32RH_Reg(hi)));
-         addInstr(env, PPC32Instr_Cmp(False/*sign*/,7/*cr*/,tmp,PPC32RH_Imm(False,0)));
+         addInstr(env, PPC32Instr_Cmp(False/*sign*/, True/*32bit cmp*/,
+                                      7/*cr*/, tmp,PPC32RH_Imm(False,0)));
          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
       } else {  // mode64
          HReg r_src = iselIntExpr_R(env, e->Iex.Binop.arg1);
-         addInstr(env, PPC32Instr_Cmp(False/*sign*/,7/*cr*/,r_src,PPC32RH_Imm(False,0)));
+         addInstr(env, PPC32Instr_Cmp(False/*sign*/, False/*64bit cmp*/,
+                                      7/*cr*/, r_src,PPC32RH_Imm(False,0)));
          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
       }
    }
@@ -2161,7 +2247,8 @@
       HReg r_src      = lookupIRTemp(env, e->Iex.Tmp.tmp);
       HReg src_masked = newVRegI(env);
       addInstr(env, PPC32Instr_Alu(Palu_AND, src_masked, r_src, PPC32RH_Imm(False,1)));
-      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, 7/*cr*/, src_masked, PPC32RH_Imm(False,1)));
+      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                   7/*cr*/, src_masked, PPC32RH_Imm(False,1)));
       return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
    }
 
@@ -2217,9 +2304,11 @@
          HReg     r_srcL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
          HReg     r_srcR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
          addInstr(env, PPC32Instr_MulL(False/*signedness irrelevant*/, 
-                                       False/*lo64*/, tLo, r_srcL, r_srcR));
+                                       False/*lo64*/, False/*64bit mul*/,
+                                       tLo, r_srcL, r_srcR));
          addInstr(env, PPC32Instr_MulL(syned,
-                                       True/*hi64*/, tHi, r_srcL, r_srcR));
+                                       True/*hi64*/, False/*64bit mul*/,
+                                       tHi, r_srcL, r_srcR));
          *rHi = tHi;
          *rLo = tLo;
          return;
@@ -2357,7 +2446,7 @@
       
       addInstr(env, PPC32Instr_Alu(Palu_AND, 
                                    r_tmp, r_cond, PPC32RH_Imm(False,0xFF)));
-      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, 
+      addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/, 
                                    7/*cr*/, r_tmp, PPC32RH_Imm(False,0)));
       
       addInstr(env, PPC32Instr_CMov(cc,tHi,PPC32RI_Reg(e0Hi)));
@@ -2379,9 +2468,11 @@
          HReg     r_srcL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
          HReg     r_srcR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
          addInstr(env, PPC32Instr_MulL(False/*signedness irrelevant*/, 
-                                       False/*lo32*/, tLo, r_srcL, r_srcR));
+                                       False/*lo32*/, True/*32bit mul*/,
+                                       tLo, r_srcL, r_srcR));
          addInstr(env, PPC32Instr_MulL(syned,
-                                       True/*hi32*/, tHi, r_srcL, r_srcR));
+                                       True/*hi32*/, True/*32bit mul*/,
+                                       tHi, r_srcL, r_srcR));
          *rHi = tHi;
          *rLo = tLo;
          return;
@@ -2749,7 +2840,8 @@
       case Iop_32Sto64: {
          HReg tHi = newVRegI(env);
          HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
-         addInstr(env, PPC32Instr_Alu(Palu_SAR, tHi, src, PPC32RH_Imm(False,31)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SAR, True/*32bit shift*/,
+                                       tHi, src, PPC32RH_Imm(False,31)));
          *rHi = tHi;
          *rLo = src;
          return;
@@ -2801,8 +2893,10 @@
          HReg tHi = newVRegI(env);
          PPC32CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
          addInstr(env, PPC32Instr_Set32(cond,tLo));
-         addInstr(env, PPC32Instr_Alu(Palu_SHL, tLo, tLo, PPC32RH_Imm(False,31)));
-         addInstr(env, PPC32Instr_Alu(Palu_SAR, tLo, tLo, PPC32RH_Imm(False,31)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SHL, True/*32bit shift*/,
+                                       tLo, tLo, PPC32RH_Imm(False,31)));
+         addInstr(env, PPC32Instr_Shft(Pshft_SAR, True/*32bit shift*/,
+                                       tLo, tLo, PPC32RH_Imm(False,31)));
          addInstr(env, mk_iMOVds_RR(tHi, tLo));
          *rHi = tHi;
          *rLo = tLo;
@@ -3239,7 +3333,8 @@
          HReg r_tmp  = newVRegI(env);
          addInstr(env, PPC32Instr_Alu(Palu_AND, r_tmp, r_cond, PPC32RH_Imm(False,0xFF)));
          addInstr(env, PPC32Instr_FpUnary( Pfp_MOV, fr_dst, frX ));
-         addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, 7/*cr*/, r_tmp, PPC32RH_Imm(False,0)));
+         addInstr(env, PPC32Instr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+                                      7/*cr*/, r_tmp, PPC32RH_Imm(False,0)));
          addInstr(env, PPC32Instr_FpCMov( cc, fr_dst, fr0 ));
          return fr_dst;
       }
@@ -4024,6 +4119,14 @@
          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
          return;
       }
+      if (mode64 && ty == Ity_I128) {
+         HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
+         iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.Tmp.data);
+         lookupIRTemp128( &r_dstHi, &r_dstLo, env, tmp);
+         addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
+         addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
+         return;
+      }
       if (ty == Ity_I1) {
          PPC32CondCode cond = iselCondCode(env, stmt->Ist.Tmp.data);
          HReg r_dst = lookupIRTemp(env, tmp);
@@ -4207,22 +4310,21 @@
       case Ity_I1:
       case Ity_I8:
       case Ity_I16:
-      case Ity_I32:  hreg   = mkHReg(j++, HRcIntWRDSZ,  True);
-      case Ity_I64:  
-         if (mode64) {
-                     hreg   = mkHReg(j++, HRcInt64,  True);
-         } else {
-                     hreg   = mkHReg(j++, HRcInt32,  True);
-                     hregHI = mkHReg(j++, HRcInt32,  True);
+      case Ity_I32:
+         if (mode64) { hreg   = mkHReg(j++, HRcInt64,  True); break;
+         } else {      hreg   = mkHReg(j++, HRcInt32,  True); break;
          }
-         break;
-      case Ity_I128: vassert(mode64);
-                     hreg   = mkHReg(j++, HRcInt64,  True);
-                     hregHI = mkHReg(j++, HRcInt64,  True);
-
+      case Ity_I64:  
+         if (mode64) { hreg   = mkHReg(j++, HRcInt64,  True); break;
+         } else {      hreg   = mkHReg(j++, HRcInt32,  True);
+                       hregHI = mkHReg(j++, HRcInt32,  True); break;
+         }
+      case Ity_I128:   vassert(mode64);
+                       hreg   = mkHReg(j++, HRcInt64,  True);
+                       hregHI = mkHReg(j++, HRcInt64,  True); break;
       case Ity_F32:
-      case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64,  True); break;
-      case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
+      case Ity_F64:    hreg   = mkHReg(j++, HRcFlt64,  True); break;
+      case Ity_V128:   hreg   = mkHReg(j++, HRcVec128, True); break;
       default:
          ppIRType(bb->tyenv->types[i]);
          if (mode64)