Merge patch from JeremyF:

50-fast-cond

Implement Julian's idea for fast conditional jumps. Rather than fully
restoring the eflags register with an expensive push-popf pair, just
test the flag bits directly out of the base block. Faster, and smaller
code too!


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1339 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 11843ee..645988c 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1535,8 +1535,16 @@
 }
 
 
+static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg );
+static void synth_nonshiftop_lit_reg ( Bool upd_cc,
+                                       Opcode opcode, Int size, 
+                                       UInt lit, Int reg );
+
 static void synth_jcond_lit ( Condcode cond, Addr addr )
 {
+   UInt mask;
+   Int delta;
+
   /* Do the following:
         get eflags
         jmp short if not cond to xyxyxy
@@ -1550,7 +1558,6 @@
    5 0008 FFE3                  jmp     *%ebx
    6                    xyxyxy:
   */
-   emit_get_eflags();
    if (VG_(clo_chain_bb)) {
       /* When using BB chaining, the jump sequence is:
         jmp short if not cond to xyxyxy
@@ -1562,16 +1569,65 @@
 		mov    $0x4000d190,%eax			// 5
 		mov    %eax, VGOFF_(m_eip)(%ebp)	// 3
 		call   0x40050f9a <vgPlain_patch_me>	// 5
+		$01					// 1
 	1:	mov    $0x4000d042,%eax
 		call   0x40050f9a <vgPlain_patch_me>
       */
-      VG_(emit_jcondshort_delta) ( invertCondition(cond), 5+3+5 );
+      delta = 5+3+5+1 -1;
    } else
-      VG_(emit_jcondshort_delta) ( invertCondition(cond), 5+1 );
+      delta = 5+1;
+   
+   if (!VG_(clo_fast_jcc)) {
+      /* We're forced to do it the slow way. */
+      emit_get_eflags();
+      cond = invertCondition(cond);
+   } else {
+      switch (cond & ~1) {
+         case CondB:  mask = EFlagC;          goto common;  /* C=1        */
+         case CondZ:  mask = EFlagZ;          goto common;  /* Z=1        */
+         case CondBE: mask = EFlagC | EFlagZ; goto common;  /* C=1 || Z=1 */
+         case CondS:  mask = EFlagS;          goto common;  /* S=1        */
+         case CondP:  mask = EFlagP;          goto common;  /* P=1        */
+         default:
+            /* Too complex .. we have to do it the slow way. */
+            emit_get_eflags();
+            cond = invertCondition(cond);
+            break;
+
+        common:
+            VG_(new_emit)();
+            if ((mask & 0xff) == mask) {
+               VG_(emitB) ( 0xF6 );   /* Grp3 */
+               VG_(emit_amode_offregmem_reg)(
+                  VGOFF_(m_eflags) * 4, R_EBP, 0 /* subcode for TEST */);
+               VG_(emitB) (mask);
+               if (dis)
+                  VG_(printf)("\n\t\ttestb $%x, %d(%%ebp)\n", 
+                              mask, VGOFF_(m_eflags) * 4);
+            } else {
+               VG_(emitB) ( 0xF7 );
+               VG_(emit_amode_offregmem_reg)(
+                  VGOFF_(m_eflags) * 4, R_EBP, 0 /* subcode for TEST */);
+               VG_(emitB) (mask);
+               if (dis)
+                  VG_(printf)("\n\t\ttestx $%x, %d(%%ebp)\n", 
+                              mask, VGOFF_(m_eflags) * 4);
+            }
+
+            if (cond & 1)
+               cond = CondNZ;
+            else
+               cond = CondZ;
+            break;
+      }
+   }
+
+   VG_(emit_jcondshort_delta) ( cond, delta );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
 
+
 static void synth_jmp_ifzero_reg_lit ( Int reg, Addr addr )
 {
    /* 0000 83FF00                cmpl    $0, %edi