Track Vex API change (r1239, introduction of endianness-indications in
IR loads and stores.)



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@4072 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index f32a16a..06dd4c9 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -433,22 +433,23 @@
 
    case Ist_Tmp: {
       IRExpr* data = st->Ist.Tmp.data;
-      if (data->tag == Iex_LDle) {
-         IRExpr* aexpr = data->Iex.LDle.addr;
+      if (data->tag == Iex_Load) {
+         IRExpr* aexpr = data->Iex.Load.addr;
          tl_assert( isIRAtom(aexpr) );
-
+         // Note also, endianness info is ignored.  I guess that's not
+         // interesting.
          // XXX: repe cmpsb does two loads... the first one is ignored here!
          //tl_assert( NULL == *loadAddrExpr );          // XXX: ???
          *loadAddrExpr = aexpr;
-         *dataSize = sizeofIRType(data->Iex.LDle.ty);
+         *dataSize = sizeofIRType(data->Iex.Load.ty);
       }
       addStmtToIRBB( bbOut, st );
       break;
    }
       
-   case Ist_STle: {
-      IRExpr* data  = st->Ist.STle.data;
-      IRExpr* aexpr = st->Ist.STle.addr;
+   case Ist_Store: {
+      IRExpr* data  = st->Ist.Store.data;
+      IRExpr* aexpr = st->Ist.Store.addr;
       tl_assert( isIRAtom(aexpr) );
       tl_assert( NULL == *storeAddrExpr );          // XXX: ???
       *storeAddrExpr = aexpr;
diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h
index 2d1c460..e27418c 100644
--- a/memcheck/mc_include.h
+++ b/memcheck/mc_include.h
@@ -60,15 +60,15 @@
 extern void MC_(helperc_value_check1_fail) ( void );
 extern void MC_(helperc_value_check0_fail) ( void );
 
-extern VG_REGPARM(1) void MC_(helperc_STOREV8) ( Addr, ULong );
-extern VG_REGPARM(2) void MC_(helperc_STOREV4) ( Addr, UWord );
-extern VG_REGPARM(2) void MC_(helperc_STOREV2) ( Addr, UWord );
-extern VG_REGPARM(2) void MC_(helperc_STOREV1) ( Addr, UWord );
+extern VG_REGPARM(1) void MC_(helperc_STOREV8le) ( Addr, ULong );
+extern VG_REGPARM(2) void MC_(helperc_STOREV4le) ( Addr, UWord );
+extern VG_REGPARM(2) void MC_(helperc_STOREV2le) ( Addr, UWord );
+extern VG_REGPARM(2) void MC_(helperc_STOREV1le) ( Addr, UWord );
 
-extern VG_REGPARM(1) UWord MC_(helperc_LOADV1)  ( Addr );
-extern VG_REGPARM(1) UWord MC_(helperc_LOADV2)  ( Addr );
-extern VG_REGPARM(1) UWord MC_(helperc_LOADV4)  ( Addr );
-extern VG_REGPARM(1) ULong MC_(helperc_LOADV8)  ( Addr );
+extern VG_REGPARM(1) UWord MC_(helperc_LOADV1le)  ( Addr );
+extern VG_REGPARM(1) UWord MC_(helperc_LOADV2le)  ( Addr );
+extern VG_REGPARM(1) UWord MC_(helperc_LOADV4le)  ( Addr );
+extern VG_REGPARM(1) ULong MC_(helperc_LOADV8le)  ( Addr );
 
 extern void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len );
 
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index a2e2e40..fa3dbe6 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -1473,9 +1473,9 @@
 /* ------------------------ Size = 8 ------------------------ */
 
 VG_REGPARM(1)
-ULong MC_(helperc_LOADV8) ( Addr aA )
+ULong MC_(helperc_LOADV8le) ( Addr aA )
 {
-   PROF_EVENT(200, "helperc_LOADV8");
+   PROF_EVENT(200, "helperc_LOADV8le");
 
 #  if VG_DEBUG_MEMORY >= 2
    return mc_LOADVn_slow( aA, 8, False/*littleendian*/ );
@@ -1488,7 +1488,7 @@
       naturally aligned, or 'a' exceeds the range covered by the
       primary map.  Either way we defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(201, "helperc_LOADV8-slow1");
+      PROF_EVENT(201, "helperc_LOADV8le-slow1");
       return (UWord)mc_LOADVn_slow( aA, 8, False/*littleendian*/ );
    }
 
@@ -1509,7 +1509,7 @@
       return ((ULong*)(sm->vbyte))[ v_off >> 3 ];
    } else {
       /* Slow but general case. */
-      PROF_EVENT(202, "helperc_LOADV8-slow2");
+      PROF_EVENT(202, "helperc_LOADV8le-slow2");
       return mc_LOADVn_slow( a, 8, False/*littleendian*/ );
    }
 
@@ -1517,9 +1517,9 @@
 }
 
 VG_REGPARM(1)
-void MC_(helperc_STOREV8) ( Addr aA, ULong vbytes )
+void MC_(helperc_STOREV8le) ( Addr aA, ULong vbytes )
 {
-   PROF_EVENT(210, "helperc_STOREV8");
+   PROF_EVENT(210, "helperc_STOREV8le");
 
 #  if VG_DEBUG_MEMORY >= 2
    mc_STOREVn_slow( aA, 8, vbytes, False/*littleendian*/ );
@@ -1532,7 +1532,7 @@
       naturally aligned, or 'a' exceeds the range covered by the
       primary map.  Either way we defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(211, "helperc_STOREV8-slow1");
+      PROF_EVENT(211, "helperc_STOREV8le-slow1");
       mc_STOREVn_slow( aA, 8, vbytes, False/*littleendian*/ );
       return;
    }
@@ -1555,7 +1555,7 @@
       ((ULong*)(sm->vbyte))[ v_off >> 3 ] = vbytes;
    } else {
       /* Slow but general case. */
-      PROF_EVENT(212, "helperc_STOREV8-slow2");
+      PROF_EVENT(212, "helperc_STOREV8le-slow2");
       mc_STOREVn_slow( aA, 8, vbytes, False/*littleendian*/ );
    }
 #  endif
@@ -1564,9 +1564,9 @@
 /* ------------------------ Size = 4 ------------------------ */
 
 VG_REGPARM(1)
-UWord MC_(helperc_LOADV4) ( Addr aA )
+UWord MC_(helperc_LOADV4le) ( Addr aA )
 {
-   PROF_EVENT(220, "helperc_LOADV4");
+   PROF_EVENT(220, "helperc_LOADV4le");
 
 #  if VG_DEBUG_MEMORY >= 2
    return (UWord)mc_LOADVn_slow( aA, 4, False/*littleendian*/ );
@@ -1579,7 +1579,7 @@
       naturally aligned, or 'a' exceeds the range covered by the
       primary map.  Either way we defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(221, "helperc_LOADV4-slow1");
+      PROF_EVENT(221, "helperc_LOADV4le-slow1");
       return (UWord)mc_LOADVn_slow( aA, 4, False/*littleendian*/ );
    }
 
@@ -1607,7 +1607,7 @@
       return ret;
    } else {
       /* Slow but general case. */
-      PROF_EVENT(222, "helperc_LOADV4-slow2");
+      PROF_EVENT(222, "helperc_LOADV4le-slow2");
       return (UWord)mc_LOADVn_slow( a, 4, False/*littleendian*/ );
    }
 
@@ -1615,9 +1615,9 @@
 }
 
 VG_REGPARM(2)
-void MC_(helperc_STOREV4) ( Addr aA, UWord vbytes )
+void MC_(helperc_STOREV4le) ( Addr aA, UWord vbytes )
 {
-   PROF_EVENT(230, "helperc_STOREV4");
+   PROF_EVENT(230, "helperc_STOREV4le");
 
 #  if VG_DEBUG_MEMORY >= 2
    mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
@@ -1630,7 +1630,7 @@
       naturally aligned, or 'a' exceeds the range covered by the
       primary map.  Either way we defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(231, "helperc_STOREV4-slow1");
+      PROF_EVENT(231, "helperc_STOREV4le-slow1");
       mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
       return;
    }
@@ -1654,7 +1654,7 @@
       ((UInt*)(sm->vbyte))[ v_off >> 2 ] = (UInt)vbytes;
    } else {
       /* Slow but general case. */
-      PROF_EVENT(232, "helperc_STOREV4-slow2");
+      PROF_EVENT(232, "helperc_STOREV4le-slow2");
       mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
    }
 #  endif
@@ -1663,9 +1663,9 @@
 /* ------------------------ Size = 2 ------------------------ */
 
 VG_REGPARM(1)
-UWord MC_(helperc_LOADV2) ( Addr aA )
+UWord MC_(helperc_LOADV2le) ( Addr aA )
 {
-   PROF_EVENT(240, "helperc_LOADV2");
+   PROF_EVENT(240, "helperc_LOADV2le");
 
 #  if VG_DEBUG_MEMORY >= 2
    return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
@@ -1678,7 +1678,7 @@
       naturally aligned, or 'a' exceeds the range covered by the
       primary map.  Either way we defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(241, "helperc_LOADV2-slow1");
+      PROF_EVENT(241, "helperc_LOADV2le-slow1");
       return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
    }
 
@@ -1703,7 +1703,7 @@
              (UWord)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
    } else {
       /* Slow but general case. */
-      PROF_EVENT(242, "helperc_LOADV2-slow2");
+      PROF_EVENT(242, "helperc_LOADV2le-slow2");
       return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
    }
 
@@ -1711,9 +1711,9 @@
 }
 
 VG_REGPARM(2)
-void MC_(helperc_STOREV2) ( Addr aA, UWord vbytes )
+void MC_(helperc_STOREV2le) ( Addr aA, UWord vbytes )
 {
-   PROF_EVENT(250, "helperc_STOREV2");
+   PROF_EVENT(250, "helperc_STOREV2le");
 
 #  if VG_DEBUG_MEMORY >= 2
    mc_STOREVn_slow( aA, 2, (ULong)vbytes, False/*littleendian*/ );
@@ -1726,7 +1726,7 @@
       naturally aligned, or 'a' exceeds the range covered by the
       primary map.  Either way we defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(251, "helperc_STOREV2-slow1");
+      PROF_EVENT(251, "helperc_STOREV2le-slow1");
       mc_STOREVn_slow( aA, 2, (ULong)vbytes, False/*littleendian*/ );
       return;
    }
@@ -1747,7 +1747,7 @@
       ((UShort*)(sm->vbyte))[ v_off >> 1 ] = (UShort)vbytes;
    } else {
       /* Slow but general case. */
-      PROF_EVENT(252, "helperc_STOREV2-slow2");
+      PROF_EVENT(252, "helperc_STOREV2le-slow2");
       mc_STOREVn_slow( aA, 2, (ULong)vbytes, False/*littleendian*/ );
    }
 #  endif
@@ -1756,9 +1756,9 @@
 /* ------------------------ Size = 1 ------------------------ */
 
 VG_REGPARM(1)
-UWord MC_(helperc_LOADV1) ( Addr aA )
+UWord MC_(helperc_LOADV1le) ( Addr aA )
 {
-   PROF_EVENT(260, "helperc_LOADV1");
+   PROF_EVENT(260, "helperc_LOADV1le");
 
 #  if VG_DEBUG_MEMORY >= 2
    return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
@@ -1771,7 +1771,7 @@
       exceeds the range covered by the primary map.  In which case we
       defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(261, "helperc_LOADV1-slow1");
+      PROF_EVENT(261, "helperc_LOADV1le-slow1");
       return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
    }
 
@@ -1796,7 +1796,7 @@
              (UWord)( ((UChar*)(sm->vbyte))[ v_off ] );
    } else {
       /* Slow but general case. */
-      PROF_EVENT(262, "helperc_LOADV1-slow2");
+      PROF_EVENT(262, "helperc_LOADV1le-slow2");
       return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
    }
 #  endif
@@ -1804,9 +1804,9 @@
 
 
 VG_REGPARM(2)
-void MC_(helperc_STOREV1) ( Addr aA, UWord vbyte )
+void MC_(helperc_STOREV1le) ( Addr aA, UWord vbyte )
 {
-   PROF_EVENT(270, "helperc_STOREV1");
+   PROF_EVENT(270, "helperc_STOREV1le");
 
 #  if VG_DEBUG_MEMORY >= 2
    mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
@@ -1818,7 +1818,7 @@
       exceeds the range covered by the primary map.  In which case we
       defer to the slow-path case. */
    if (EXPECTED_NOT_TAKEN(a & mask)) {
-      PROF_EVENT(271, "helperc_STOREV1-slow1");
+      PROF_EVENT(271, "helperc_STOREV1le-slow1");
       mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
       return;
    }
@@ -1839,7 +1839,7 @@
          lives in is addressible. */
       ((UChar*)(sm->vbyte))[ v_off ] = (UChar)vbyte;
    } else {
-      PROF_EVENT(272, "helperc_STOREV1-slow2");
+      PROF_EVENT(272, "helperc_STOREV1le-slow2");
       mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
    }
 
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index ce165bf..c87bdca 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -1936,7 +1936,9 @@
 
 /* Worker function; do not call directly. */
 static
-IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
+IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, 
+                              IREndness end, IRType ty, 
+                              IRAtom* addr, UInt bias )
 {
    void*    helper;
    Char*    hname;
@@ -1945,6 +1947,7 @@
    IRAtom*  addrAct;
 
    tl_assert(isOriginalAtom(mce,addr));
+   tl_assert(end == Iend_LE || end == Iend_BE);
 
    /* First, emit a definedness test for the address.  This also sets
       the address (shadow) to 'defined' following the test. */
@@ -1953,21 +1956,26 @@
    /* Now cook up a call to the relevant helper function, to read the
       data V bits from shadow memory. */
    ty = shadowType(ty);
-   switch (ty) {
-      case Ity_I64: helper = &MC_(helperc_LOADV8);
-                    hname = "MC_(helperc_LOADV8)";
-                    break;
-      case Ity_I32: helper = &MC_(helperc_LOADV4);
-                    hname = "MC_(helperc_LOADV4)";
-                    break;
-      case Ity_I16: helper = &MC_(helperc_LOADV2);
-                    hname = "MC_(helperc_LOADV2)";
-                    break;
-      case Ity_I8:  helper = &MC_(helperc_LOADV1);
-                    hname = "MC_(helperc_LOADV1)";
-                    break;
-      default:      ppIRType(ty);
-                    VG_(tool_panic)("memcheck:do_shadow_LDle");
+
+   if (end == Iend_LE) {   
+      switch (ty) {
+         case Ity_I64: helper = &MC_(helperc_LOADV8le);
+                       hname = "MC_(helperc_LOADV8le)";
+                       break;
+         case Ity_I32: helper = &MC_(helperc_LOADV4le);
+                       hname = "MC_(helperc_LOADV4le)";
+                       break;
+         case Ity_I16: helper = &MC_(helperc_LOADV2le);
+                       hname = "MC_(helperc_LOADV2le)";
+                       break;
+         case Ity_I8:  helper = &MC_(helperc_LOADV1le);
+                       hname = "MC_(helperc_LOADV1le)";
+                       break;
+         default:      ppIRType(ty);
+                       VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
+      }
+   } else {
+      VG_(tool_panic)("memcheck:do_shadow_Load(BE):bigendian not implemented");
    }
 
    /* Generate the actual address into addrAct. */
@@ -1997,23 +2005,32 @@
 
 
 static
-IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
+IRAtom* expr2vbits_Load ( MCEnv* mce, 
+                          IREndness end, IRType ty, 
+                          IRAtom* addr, UInt bias )
 {
    IRAtom *v64hi, *v64lo;
+   tl_assert(end == Iend_LE || end == Iend_BE);
    switch (shadowType(ty)) {
       case Ity_I8: 
       case Ity_I16: 
       case Ity_I32: 
       case Ity_I64:
-         return expr2vbits_LDle_WRK(mce, ty, addr, bias);
+         return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
       case Ity_V128:
-         v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
-         v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
+         if (end == Iend_LE) {
+            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
+            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
+         } else {
+            tl_assert(0 /* awaiting test case */);
+            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
+            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
+         }
          return assignNew( mce, 
                            Ity_V128, 
                            binop(Iop_64HLtoV128, v64hi, v64lo));
       default:
-         VG_(tool_panic)("expr2vbits_LDle");
+         VG_(tool_panic)("expr2vbits_Load");
    }
 }
 
@@ -2073,9 +2090,10 @@
       case Iex_Unop:
          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
 
-      case Iex_LDle:
-         return expr2vbits_LDle( mce, e->Iex.LDle.ty, 
-                                      e->Iex.LDle.addr, 0/*addr bias*/ );
+      case Iex_Load:
+         return expr2vbits_Load( mce, e->Iex.Load.end,
+                                      e->Iex.Load.ty, 
+                                      e->Iex.Load.addr, 0/*addr bias*/ );
 
       case Iex_CCall:
          return mkLazyN( mce, e->Iex.CCall.args, 
@@ -2142,25 +2160,27 @@
    obviously not both.  */
 
 static 
-void do_shadow_STle ( MCEnv* mce, 
-                      IRAtom* addr, UInt bias,
-                      IRAtom* data, IRAtom* vdata )
+void do_shadow_Store ( MCEnv* mce, 
+                       IREndness end,
+                       IRAtom* addr, UInt bias,
+                       IRAtom* data, IRAtom* vdata )
 {
    IROp     mkAdd;
    IRType   ty, tyAddr;
    IRDirty  *di, *diLo64, *diHi64;
    IRAtom   *addrAct, *addrLo64, *addrHi64;
    IRAtom   *vdataLo64, *vdataHi64;
-   IRAtom   *eBias, *eBias0, *eBias8;
+   IRAtom   *eBias, *eBiasLo64, *eBiasHi64;
    void*    helper = NULL;
    Char*    hname = NULL;
 
    tyAddr = mce->hWordTy;
    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
+   tl_assert( end == Iend_LE || end == Iend_BE );
 
    di = diLo64 = diHi64 = NULL;
-   eBias = eBias0 = eBias8 = NULL;
+   eBias = eBiasLo64 = eBiasHi64 = NULL;
    addrAct = addrLo64 = addrHi64 = NULL;
    vdataLo64 = vdataHi64 = NULL;
 
@@ -2184,36 +2204,52 @@
 
    /* Now decide which helper function to call to write the data V
       bits into shadow memory. */
-   switch (ty) {
-      case Ity_V128: /* we'll use the helper twice */
-      case Ity_I64: helper = &MC_(helperc_STOREV8);
-                    hname = "MC_(helperc_STOREV8)";
-                    break;
-      case Ity_I32: helper = &MC_(helperc_STOREV4);
-                    hname = "MC_(helperc_STOREV4)";
-                    break;
-      case Ity_I16: helper = &MC_(helperc_STOREV2);
-                    hname = "MC_(helperc_STOREV2)";
-                    break;
-      case Ity_I8:  helper = &MC_(helperc_STOREV1);
-                    hname = "MC_(helperc_STOREV1)";
-                    break;
-      default:      VG_(tool_panic)("memcheck:do_shadow_STle");
+   if (end == Iend_LE) {
+      switch (ty) {
+         case Ity_V128: /* we'll use the helper twice */
+         case Ity_I64: helper = &MC_(helperc_STOREV8le);
+                       hname = "MC_(helperc_STOREV8le)";
+                       break;
+         case Ity_I32: helper = &MC_(helperc_STOREV4le);
+                       hname = "MC_(helperc_STOREV4le)";
+                       break;
+         case Ity_I16: helper = &MC_(helperc_STOREV2le);
+                       hname = "MC_(helperc_STOREV2le)";
+                       break;
+         case Ity_I8:  helper = &MC_(helperc_STOREV1le);
+                       hname = "MC_(helperc_STOREV1le)";
+                       break;
+         default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
+      }
+   } else {
+      VG_(tool_panic)("memcheck:do_shadow_Store(BE):bigendian not implemented");
    }
 
    if (ty == Ity_V128) {
 
       /* V128-bit case */
       /* See comment in next clause re 64-bit regparms */
-      eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
-      addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
+      /* also, need to be careful about endianness */
+
+      Int offLo64, offHi64;
+      if (end == Iend_LE) {
+         offLo64 = 0;
+         offHi64 = 8;
+      } else {
+         tl_assert(0 /* awaiting test case */);
+         offLo64 = 8;
+         offHi64 = 0;
+      }
+
+      eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
+      addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
       diLo64    = unsafeIRDirty_0_N( 
                      1/*regparms*/, hname, helper, 
                      mkIRExprVec_2( addrLo64, vdataLo64 ));
 
-      eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
-      addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
+      eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
+      addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
       diHi64    = unsafeIRDirty_0_N( 
                      1/*regparms*/, hname, helper, 
@@ -2273,10 +2309,20 @@
 static
 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
 {
-   Int     i, n, offset, toDo, gSz, gOff;
-   IRAtom  *src, *here, *curr;
-   IRType  tyAddr, tySrc, tyDst;
-   IRTemp  dst;
+   Int       i, n, offset, toDo, gSz, gOff;
+   IRAtom    *src, *here, *curr;
+   IRType    tyAddr, tySrc, tyDst;
+   IRTemp    dst;
+   IREndness end;
+
+   /* What's the native endianness?  We need to know this. */
+#  if defined(VKI_BIG_ENDIAN)
+   end = Iend_BE;
+#  elif defined(VKI_LITTLE_ENDIAN)
+   end = Iend_LE;
+#  else
+#    error "Unknown endianness"
+#  endif
 
    /* First check the guard. */
    complainIfUndefined(mce, d->guard);
@@ -2351,11 +2397,14 @@
    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
       offset = 0;
       toDo   = d->mSize;
-      /* chew off 32-bit chunks */
+      /* chew off 32-bit chunks.  We don't care about the endianness
+         since it's all going to be condensed down to a single bit,
+         but nevertheless choose an endianness which is hopefully
+         native to the platform. */
       while (toDo >= 4) {
          here = mkPCastTo( 
                    mce, Ity_I32,
-                   expr2vbits_LDle ( mce, Ity_I32, 
+                   expr2vbits_Load ( mce, end, Ity_I32, 
                                      d->mAddr, d->mSize - toDo )
                 );
          curr = mkUifU32(mce, here, curr);
@@ -2365,7 +2414,7 @@
       while (toDo >= 2) {
          here = mkPCastTo( 
                    mce, Ity_I32,
-                   expr2vbits_LDle ( mce, Ity_I16, 
+                   expr2vbits_Load ( mce, end, Ity_I16, 
                                      d->mAddr, d->mSize - toDo )
                 );
          curr = mkUifU32(mce, here, curr);
@@ -2413,22 +2462,23 @@
       }
    }
 
-   /* Outputs: memory that we write or modify. */
+   /* Outputs: memory that we write or modify.  Same comments about
+      endianness as above apply. */
    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
       offset = 0;
       toDo   = d->mSize;
       /* chew off 32-bit chunks */
       while (toDo >= 4) {
-         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
-                         NULL, /* original data */
-                         mkPCastTo( mce, Ity_I32, curr ) );
+         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
+                          NULL, /* original data */
+                          mkPCastTo( mce, Ity_I32, curr ) );
          toDo -= 4;
       }
       /* chew off 16-bit chunks */
       while (toDo >= 2) {
-         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
-                         NULL, /* original data */
-                         mkPCastTo( mce, Ity_I16, curr ) );
+         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
+                          NULL, /* original data */
+                          mkPCastTo( mce, Ity_I16, curr ) );
          toDo -= 2;
       }
       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
@@ -2517,8 +2567,8 @@
                return isBogusAtom(e->Iex.Mux0X.cond)
                       || isBogusAtom(e->Iex.Mux0X.expr0)
                       || isBogusAtom(e->Iex.Mux0X.exprX);
-            case Iex_LDle: 
-               return isBogusAtom(e->Iex.LDle.addr);
+            case Iex_Load: 
+               return isBogusAtom(e->Iex.Load.addr);
             case Iex_CCall:
                for (i = 0; e->Iex.CCall.args[i]; i++)
                   if (isBogusAtom(e->Iex.CCall.args[i]))
@@ -2542,9 +2592,9 @@
       case Ist_PutI:
          return isBogusAtom(st->Ist.PutI.ix) 
                 || isBogusAtom(st->Ist.PutI.data);
-      case Ist_STle:
-         return isBogusAtom(st->Ist.STle.addr) 
-                || isBogusAtom(st->Ist.STle.data);
+      case Ist_Store:
+         return isBogusAtom(st->Ist.Store.addr) 
+                || isBogusAtom(st->Ist.Store.data);
       case Ist_Exit:
          return isBogusAtom(st->Ist.Exit.guard);
       case Ist_AbiHint:
@@ -2649,10 +2699,11 @@
                             st->Ist.PutI.data );
             break;
 
-         case Ist_STle:
-            do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
-                                  st->Ist.STle.data,
-                                  NULL /* shadow data */ );
+         case Ist_Store:
+            do_shadow_Store( &mce, st->Ist.Store.end,
+                                   st->Ist.Store.addr, 0/* addr bias */,
+                                   st->Ist.Store.data,
+                                   NULL /* shadow data */ );
             break;
 
          case Ist_Exit: