Fix some isel cases pertaining to 1-bit values.  This makes lackey
work on amd64.  Yay!


git-svn-id: svn://svn.valgrind.org/vex/trunk@1078 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host-amd64/hdefs.c b/priv/host-amd64/hdefs.c
index 27629a9..7dc0cc6 100644
--- a/priv/host-amd64/hdefs.c
+++ b/priv/host-amd64/hdefs.c
@@ -772,13 +772,13 @@
    vassert(sz == 1 || sz == 2 || sz == 4);
    return i;
 }
-//.. AMD64Instr* AMD64Instr_Set32 ( AMD64CondCode cond, HReg dst ) {
-//..    AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
-//..    i->tag            = Xin_Set32;
-//..    i->Xin.Set32.cond = cond;
-//..    i->Xin.Set32.dst  = dst;
-//..    return i;
-//.. }
+AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
+   AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
+   i->tag            = Ain_Set64;
+   i->Ain.Set64.cond = cond;
+   i->Ain.Set64.dst  = dst;
+   return i;
+}
 //.. AMD64Instr* AMD64Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
 //..    AMD64Instr* i          = LibVEX_Alloc(sizeof(AMD64Instr));
 //..    i->tag               = Xin_Bsfr32;
@@ -1116,10 +1116,10 @@
          vex_printf(",");
          ppAMD64AMode(i->Ain.Store.dst);
          return;
-//..       case Xin_Set32:
-//..          vex_printf("setl%s ", showAMD64CondCode(i->Xin.Set32.cond));
-//..          ppHRegAMD64(i->Xin.Set32.dst);
-//..          return;
+      case Ain_Set64:
+         vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
+         ppHRegAMD64(i->Ain.Set64.dst);
+         return;
 //..       case Xin_Bsfr32:
 //..          vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
 //..          ppHRegAMD64(i->Xin.Bsfr32.src);
@@ -1430,9 +1430,9 @@
          addHRegUse(u, HRmRead, i->Ain.Store.src);
          addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
          return;
-//..       case Xin_Set32:
-//..          addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
-//..          return;
+      case Ain_Set64:
+         addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
+         return;
 //..       case Xin_Bsfr32:
 //..          addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
 //..          addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
@@ -1628,9 +1628,9 @@
          mapReg(m, &i->Ain.Store.src);
          mapRegs_AMD64AMode(m, i->Ain.Store.dst);
          return;
-//..       case Xin_Set32:
-//..          mapReg(m, &i->Xin.Set32.dst);
-//..          return;
+      case Ain_Set64:
+         mapReg(m, &i->Ain.Set64.dst);
+         return;
 //..       case Xin_Bsfr32:
 //..          mapReg(m, &i->Xin.Bsfr32.src);
 //..          mapReg(m, &i->Xin.Bsfr32.dst);
@@ -2150,6 +2150,7 @@
 {
    UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
    UInt   xtra;
+   UInt   reg;
    UChar  rex;
    UChar* p = &buf[0];
    UChar* ptmp;
@@ -2670,39 +2671,30 @@
       }
       break;
 
-//..    case Xin_Set32:
-//..       /* Make the destination register be 1 or 0, depending on whether
-//..          the relevant condition holds.  We have to dodge and weave
-//..          when the destination is %esi or %edi as we cannot directly
-//..          emit the native 'setb %reg' for those.  Further complication:
-//..          the top 24 bits of the destination should be forced to zero,
-//..          but doing 'xor %r,%r' kills the flag(s) we are about to read.
-//..          Sigh.  So start off my moving $0 into the dest. */
-//.. 
-//..       /* Do we need to swap in %eax? */
-//..       if (iregNo(i->Xin.Set32.dst) >= 4) {
-//..          /* xchg %eax, %dst */
-//..          *p++ = 0x90 + iregNo(i->Xin.Set32.dst);
-//..          /* movl $0, %eax */
-//..          *p++ = 0xB8 + iregNo(hregAMD64_EAX());
-//..          p = emit32(p, 0);
-//..          /* setb lo8(%eax) */
-//..          *p++ = 0x0F; 
-//..          *p++ = 0x90 + (UChar)(i->Xin.Set32.cond);
-//..          p = doAMode_R(p, fake(0), hregAMD64_EAX());
-//..          /* xchg %eax, %dst */
-//..          *p++ = 0x90 + iregNo(i->Xin.Set32.dst);
-//..       } else {
-//..          /* movl $0, %dst */
-//..          *p++ = 0xB8 + iregNo(i->Xin.Set32.dst);
-//..          p = emit32(p, 0);
-//..          /* setb lo8(%dst) */
-//..          *p++ = 0x0F; 
-//..          *p++ = 0x90 + (UChar)(i->Xin.Set32.cond);
-//..          p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
-//..       }
-//..       goto done;
-//.. 
+   case Ain_Set64:
+      /* Make the destination register be 1 or 0, depending on whether
+         the relevant condition holds.  Complication: the top 56 bits
+         of the destination should be forced to zero, but doing 'xorq
+         %r,%r' kills the flag(s) we are about to read.  Sigh.  So
+         start off my moving $0 into the dest. */
+
+      reg = iregNo(i->Ain.Set64.dst);
+      vassert(reg < 16);
+
+      /* movq $0, %dst */
+      *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
+      *p++ = 0xC7;
+      *p++ = toUChar(0xC0 + (reg & 7));
+      p = emit32(p, 0);
+
+      /* setb lo8(%dst) */
+      /* note, 8-bit register rex trickyness.  Be careful here. */
+      *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
+      *p++ = 0x0F; 
+      *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
+      *p++ = toUChar(0xC0 + (reg & 7));
+      goto done;
+
 //..    case Xin_Bsfr32:
 //..       *p++ = 0x0F;
 //..       if (i->Xin.Bsfr32.isFwds) {
diff --git a/priv/host-amd64/hdefs.h b/priv/host-amd64/hdefs.h
index cf1b006..ed9299f 100644
--- a/priv/host-amd64/hdefs.h
+++ b/priv/host-amd64/hdefs.h
@@ -369,7 +369,7 @@
       Ain_MovZLQ,    /* reg-reg move, zeroing out top half */
       Ain_LoadEX,    /* mov{s,z}{b,w,l}q from mem to reg */
       Ain_Store,     /* store 32/16/8 bit value in memory */
-//..       Xin_Set32,     /* convert condition code to 32-bit value */
+      Ain_Set64,     /* convert condition code to 32-bit value */
 //..       Xin_Bsfr32,    /* 32-bit bsf/bsr */
       Ain_MFence,    /* mem fence */
 //.. 
@@ -498,11 +498,11 @@
             HReg        src;
             AMD64AMode* dst;
          } Store;
-//..          /* Convert a x86 condition code to a 32-bit value (0 or 1). */
-//..          struct {
-//..             X86CondCode cond;
-//..             HReg        dst;
-//..          } Set32;
+         /* Convert an amd64 condition code to a 64-bit value (0 or 1). */
+         struct {
+            AMD64CondCode cond;
+            HReg          dst;
+         } Set64;
 //..          /* 32-bit bsf or bsr. */
 //..          struct {
 //..             Bool isFwds;
@@ -669,7 +669,7 @@
 extern AMD64Instr* AMD64Instr_LoadEX    ( UChar szSmall, Bool syned,
                                           AMD64AMode* src, HReg dst );
 extern AMD64Instr* AMD64Instr_Store     ( UChar sz, HReg src, AMD64AMode* dst );
-//.. extern AMD64Instr* AMD64Instr_Set32     ( AMD64CondCode cond, HReg dst );
+extern AMD64Instr* AMD64Instr_Set64     ( AMD64CondCode cond, HReg dst );
 //.. extern AMD64Instr* AMD64Instr_Bsfr32    ( Bool isFwds, HReg src, HReg dst );
 extern AMD64Instr* AMD64Instr_MFence    ( void );
 //.. 
diff --git a/priv/host-amd64/isel.c b/priv/host-amd64/isel.c
index 122c2ae..3678be8 100644
--- a/priv/host-amd64/isel.c
+++ b/priv/host-amd64/isel.c
@@ -1897,15 +1897,15 @@
 //..          default: vpanic("iselCondCode(x86): CmpXX64");
 //..       }
 //..    }
-//.. 
-//..    /* var */
-//..    if (e->tag == Iex_Tmp) {
-//..       HReg r32 = lookupIRTemp(env, e->Iex.Tmp.tmp);
-//..       HReg dst = newVRegI(env);
-//..       addInstr(env, mk_iMOVsd_RR(r32,dst));
-//..       addInstr(env, X86Instr_Alu32R(Xalu_AND,X86RMI_Imm(1),dst));
-//..       return Xcc_NZ;
-//..    }
+
+   /* var */
+   if (e->tag == Iex_Tmp) {
+      HReg r64 = lookupIRTemp(env, e->Iex.Tmp.tmp);
+      HReg dst = newVRegI(env);
+      addInstr(env, mk_iMOVsd_RR(r64,dst));
+      addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
+      return Acc_NZ;
+   }
 
    ppIRExpr(e);
    vpanic("iselCondCode(amd64)");
@@ -3545,12 +3545,12 @@
          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
          return;
       }
-//..       if (ty == Ity_I1) {
-//..          X86CondCode cond = iselCondCode(env, stmt->Ist.Tmp.data);
-//..          HReg dst = lookupIRTemp(env, tmp);
-//..          addInstr(env, X86Instr_Set32(cond, dst));
-//..          return;
-//..       }
+      if (ty == Ity_I1) {
+         AMD64CondCode cond = iselCondCode(env, stmt->Ist.Tmp.data);
+         HReg dst = lookupIRTemp(env, tmp);
+         addInstr(env, AMD64Instr_Set64(cond, dst));
+         return;
+      }
       if (ty == Ity_F64) {
          HReg dst = lookupIRTemp(env, tmp);
          HReg src = iselDblExpr(env, stmt->Ist.Tmp.data);