Merge, from CGTUNE branch:

r1768:
Cosmetic (non-functional) changes associated with r1767.

r1767:
Add a second spill-code-avoidance optimisation, which could be called
'directReload' for lack of a better name.

If an instruction reads exactly one vreg which is currently in a spill
slot, and this is last use of that vreg, see if the instruction can be
converted into one that reads directly from the spill slot.  This is
clearly only possible for x86 and amd64 targets, since ppc is a
load-store architecture.  So, for example,

   orl %vreg, %dst

where %vreg is in a spill slot, and this is its last use, would
previously be converted to

   movl $spill-offset(%ebp), %tmp
   orl %tmp, %dst

whereas now it becomes

   orl $spill-offset(%ebp), %dst

This not only avoids an instruction, it eliminates the need for a
reload temporary (%tmp in this example) and so potentially further
reduces spilling.

Implementation is in two parts: an architecture independent part, in
reg_alloc2.c, which finds candidate instructions, and a host dependent
function (directReload_ARCH) for each arch supporting the
optimisation.  The directReload_ function does the instruction form
conversion, when possible.  Currently only x86 hosts are supported.

As a side effect, change the form of the X86_Test32 instruction from
reg-only to reg/mem so it can participate in such transformations.

This gives a code size reduction of 0.6% for perf/bz2 on x86 memcheck,
but tends to be more effective for long blocks of x86 FP code.



git-svn-id: svn://svn.valgrind.org/vex/trunk@1779 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/main/vex_main.c b/priv/main/vex_main.c
index 108720c..9fabf70 100644
--- a/priv/main/vex_main.c
+++ b/priv/main/vex_main.c
@@ -186,17 +186,18 @@
       from the target instruction set. */
    HReg* available_real_regs;
    Int   n_available_real_regs;
-   Bool         (*isMove)      ( HInstr*, HReg*, HReg* );
-   void         (*getRegUsage) ( HRegUsage*, HInstr*, Bool );
-   void         (*mapRegs)     ( HRegRemap*, HInstr*, Bool );
-   HInstr*      (*genSpill)    ( HReg, Int, Bool );
-   HInstr*      (*genReload)   ( HReg, Int, Bool );
-   void         (*ppInstr)     ( HInstr*, Bool );
-   void         (*ppReg)       ( HReg );
-   HInstrArray* (*iselSB)      ( IRSB*, VexArch, VexArchInfo*, 
-                                                 VexAbiInfo* );
-   Int          (*emit)        ( UChar*, Int, HInstr*, Bool, void* );
-   IRExpr*      (*specHelper)  ( HChar*, IRExpr** );
+   Bool         (*isMove)       ( HInstr*, HReg*, HReg* );
+   void         (*getRegUsage)  ( HRegUsage*, HInstr*, Bool );
+   void         (*mapRegs)      ( HRegRemap*, HInstr*, Bool );
+   HInstr*      (*genSpill)     ( HReg, Int, Bool );
+   HInstr*      (*genReload)    ( HReg, Int, Bool );
+   HInstr*      (*directReload) ( HInstr*, HReg, Short );
+   void         (*ppInstr)      ( HInstr*, Bool );
+   void         (*ppReg)        ( HReg );
+   HInstrArray* (*iselSB)       ( IRSB*, VexArch, VexArchInfo*, 
+                                                  VexAbiInfo* );
+   Int          (*emit)         ( UChar*, Int, HInstr*, Bool, void* );
+   IRExpr*      (*specHelper)   ( HChar*, IRExpr** );
    Bool         (*preciseMemExnsFn) ( Int, Int );
 
    DisOneInstrFn disInstrFn;
@@ -221,6 +222,7 @@
    mapRegs                = NULL;
    genSpill               = NULL;
    genReload              = NULL;
+   directReload           = NULL;
    ppInstr                = NULL;
    ppReg                  = NULL;
    iselSB                 = NULL;
@@ -246,18 +248,19 @@
    switch (vta->arch_host) {
 
       case VexArchX86:
-         mode64      = False;
+         mode64       = False;
          getAllocableRegs_X86 ( &n_available_real_regs,
                                 &available_real_regs );
-         isMove      = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_X86Instr;
-         getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_X86Instr;
-         mapRegs     = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_X86Instr;
-         genSpill    = (HInstr*(*)(HReg,Int, Bool)) genSpill_X86;
-         genReload   = (HInstr*(*)(HReg,Int, Bool)) genReload_X86;
-         ppInstr     = (void(*)(HInstr*, Bool)) ppX86Instr;
-         ppReg       = (void(*)(HReg)) ppHRegX86;
-         iselSB      = iselSB_X86;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr;
+         isMove       = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_X86Instr;
+         getRegUsage  = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_X86Instr;
+         mapRegs      = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_X86Instr;
+         genSpill     = (HInstr*(*)(HReg,Int, Bool)) genSpill_X86;
+         genReload    = (HInstr*(*)(HReg,Int, Bool)) genReload_X86;
+         directReload = (HInstr*(*)(HInstr*,HReg,Short)) directReload_X86;
+         ppInstr      = (void(*)(HInstr*, Bool)) ppX86Instr;
+         ppReg        = (void(*)(HReg)) ppHRegX86;
+         iselSB       = iselSB_X86;
+         emit         = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr;
          host_is_bigendian = False;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
@@ -581,7 +584,8 @@
    rcode = doRegisterAllocation ( vcode, available_real_regs,
                                   n_available_real_regs,
                                   isMove, getRegUsage, mapRegs, 
-                                  genSpill, genReload, guest_sizeB,
+                                  genSpill, genReload, directReload, 
+                                  guest_sizeB,
                                   ppInstr, ppReg, mode64 );
 
    vexAllocSanityCheck();