Subzero: Fix over-aggressive bool folding.

The problem is that bitcode like this:

  %cond = cmp %var, [mem]
  store ..., mem
  br cond, target1, target2

would be bool-folded into this:

  //deleted cmp
  store ..., mem
  br (%var==[mem]), target1, target2

And if the memory operands point to the same location, results are incorrect.

In addition to stores, this is a problem for RMW instructions, and most call instructions which could perform stores before returning.

BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4370
R=eholk@chromium.org, jpp@chromium.org

Review URL: https://codereview.chromium.org/1904233002 .
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 6eb8058..ff577b7 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -189,6 +189,10 @@
   llvm::report_fatal_error("Failed to find splice operand");
 }
 
+bool Inst::isMemoryWrite() const {
+  llvm::report_fatal_error("Attempt to call base Inst::isMemoryWrite() method");
+}
+
 void Inst::livenessLightweight(Cfg *Func, LivenessBV &Live) {
   assert(!isDeleted());
   resetLastUses();
diff --git a/src/IceInst.h b/src/IceInst.h
index ad62e09..f21aec8 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -137,6 +137,13 @@
   /// "var_dest=var_src" assignment where the dest and src are both variables.
   virtual bool isVarAssign() const { return false; }
 
+  /// Returns true if the instruction has a possible side effect of changing
+  /// memory, in which case a memory load should not be reordered with respect
+  /// to this instruction.  It should really be pure virtual, but we can't
+  /// because of g++ and llvm::ilist<>, so we implement it as
+  /// report_fatal_error().
+  virtual bool isMemoryWrite() const;
+
   void livenessLightweight(Cfg *Func, LivenessBV &Live);
   /// Calculates liveness for this instruction. Returns true if this instruction
   /// is (tentatively) still live and should be retained, and false if this
@@ -261,6 +268,7 @@
   Operand *getSizeInBytes() const { return getSrc(0); }
   bool getKnownFrameOffset() const { return KnownFrameOffset; }
   void setKnownFrameOffset() { KnownFrameOffset = true; }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Alloca; }
 
@@ -298,6 +306,7 @@
 
   static const char *getOpName(OpKind Op);
   bool isCommutative() const;
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == Arithmetic;
@@ -325,6 +334,7 @@
     return new (Func->allocate<InstAssign>()) InstAssign(Func, Dest, Source);
   }
   bool isVarAssign() const override;
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Assign; }
 
@@ -365,6 +375,7 @@
   NodeList getTerminatorEdges() const override;
   bool isUnconditionalBranch() const override { return isUnconditional(); }
   bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Br; }
 
@@ -404,6 +415,7 @@
   SizeT getNumArgs() const { return getSrcSize() - 1; }
   bool isTailcall() const { return HasTailCall; }
   bool isTargetHelperCall() const { return IsTargetHelperCall; }
+  bool isMemoryWrite() const override { return true; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Call; }
   Type getReturnType() const;
@@ -445,6 +457,7 @@
         InstCast(Func, CastKind, Dest, Source);
   }
   OpKind getCastKind() const { return CastKind; }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Cast; }
 
@@ -467,6 +480,7 @@
         InstExtractElement(Func, Dest, Source1, Source2);
   }
 
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == ExtractElement;
@@ -498,6 +512,7 @@
         InstFcmp(Func, Condition, Dest, Source1, Source2);
   }
   FCond getCondition() const { return Condition; }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Fcmp; }
 
@@ -529,6 +544,7 @@
         InstIcmp(Func, Condition, Dest, Source1, Source2);
   }
   ICond getCondition() const { return Condition; }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Icmp; }
 
@@ -552,6 +568,7 @@
         InstInsertElement(Func, Dest, Source1, Source2, Source3);
   }
 
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == InsertElement;
@@ -581,6 +598,9 @@
   }
 
   Intrinsics::IntrinsicInfo getIntrinsicInfo() const { return Info; }
+  bool isMemoryWrite() const override {
+    return getIntrinsicInfo().IsMemoryWrite;
+  }
 
 private:
   InstIntrinsicCall(Cfg *Func, SizeT NumArgs, Variable *Dest,
@@ -606,6 +626,7 @@
     return new (Func->allocate<InstLoad>()) InstLoad(Func, Dest, SourceAddr);
   }
   Operand *getSourceAddress() const { return getSrc(0); }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Load; }
 
@@ -632,6 +653,7 @@
   void livenessPhiOperand(LivenessBV &Live, CfgNode *Target,
                           Liveness *Liveness);
   Inst *lower(Cfg *Func);
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Phi; }
 
@@ -666,6 +688,7 @@
     return getSrc(0);
   }
   NodeList getTerminatorEdges() const override { return NodeList(); }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Ret; }
 
@@ -688,6 +711,7 @@
   Operand *getCondition() const { return getSrc(0); }
   Operand *getTrueOperand() const { return getSrc(1); }
   Operand *getFalseOperand() const { return getSrc(2); }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Select; }
 
@@ -714,6 +738,7 @@
   Operand *getData() const { return getSrc(0); }
   Variable *getRmwBeacon() const;
   void setRmwBeacon(Variable *Beacon);
+  bool isMemoryWrite() const override { return true; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Store; }
 
@@ -747,6 +772,7 @@
   void addBranch(SizeT CaseIndex, uint64_t Value, CfgNode *Label);
   NodeList getTerminatorEdges() const override;
   bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == Switch; }
 
@@ -775,6 +801,7 @@
     return new (Func->allocate<InstUnreachable>()) InstUnreachable(Func);
   }
   NodeList getTerminatorEdges() const override { return NodeList(); }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == Unreachable;
@@ -799,6 +826,7 @@
   }
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg * /* Func */) const override {}
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   Option getOption() const { return BundleOption; }
   static bool classof(const Inst *Instr) {
@@ -822,6 +850,7 @@
   }
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg * /* Func */) const override {}
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == BundleUnlock;
@@ -854,6 +883,7 @@
   }
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg * /* Func */) const override {}
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == FakeDef; }
 
@@ -879,6 +909,7 @@
   }
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg * /* Func */) const override {}
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() == FakeUse; }
 
@@ -907,6 +938,7 @@
   const Inst *getLinked() const { return Linked; }
   void emit(const Cfg *Func) const override;
   void emitIAS(const Cfg * /* Func */) const override {}
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == FakeKill;
@@ -989,6 +1021,7 @@
     assert(I < NumTargets);
     return Targets[I];
   }
+  bool isMemoryWrite() const override { return false; }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) {
     return Instr->getKind() == JumpTable;
@@ -1036,7 +1069,8 @@
   InstBreakpoint(const InstBreakpoint &) = delete;
   InstBreakpoint &operator=(const InstBreakpoint &) = delete;
 
-  InstBreakpoint(Cfg *Func);
+  explicit InstBreakpoint(Cfg *Func);
+  bool isMemoryWrite() const override { return false; }
 
 public:
   static InstBreakpoint *create(Cfg *Func) {
@@ -1057,6 +1091,9 @@
 
 public:
   uint32_t getEmitInstCount() const override { return 1; }
+  bool isMemoryWrite() const override {
+    return true; // conservative answer
+  }
   void dump(const Cfg *Func) const override;
   static bool classof(const Inst *Instr) { return Instr->getKind() >= Target; }
 
diff --git a/src/IceIntrinsics.cpp b/src/IceIntrinsics.cpp
index 4555b21..cf3c976 100644
--- a/src/IceIntrinsics.cpp
+++ b/src/IceIntrinsics.cpp
@@ -31,8 +31,8 @@
 
 namespace {
 
-#define INTRIN(ID, SE, RT)                                                     \
-  { Intrinsics::ID, Intrinsics::SE, Intrinsics::RT }
+#define INTRIN(ID, SE, RT, MW)                                                 \
+  { Intrinsics::ID, Intrinsics::SE, Intrinsics::RT, Intrinsics::MW }
 
 // Build list of intrinsics with their attributes and expected prototypes. List
 // is sorted alphabetically.
@@ -41,18 +41,14 @@
   const char *IntrinsicName;
 } IceIntrinsicsTable[] = {
 
-#define AtomicCmpxchgInit(Overload, NameSuffix)                                \
-  {                                                                            \
-    {                                                                          \
-      INTRIN(AtomicCmpxchg, SideEffects_T, ReturnsTwice_F), {Overload,         \
-                                                             IceType_i32,      \
-                                                             Overload,         \
-                                                             Overload,         \
-                                                             IceType_i32,      \
-                                                             IceType_i32},     \
-          6                                                                    \
-    }                                                                          \
-    , "llvm.nacl.atomic.cmpxchg." NameSuffix                                   \
+#define AtomicCmpxchgInit(Overload, NameSuffix)                                  \
+  {                                                                              \
+    {                                                                            \
+      INTRIN(AtomicCmpxchg, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),       \
+          {Overload, IceType_i32, Overload, Overload, IceType_i32, IceType_i32}, \
+          6                                                                      \
+    }                                                                            \
+    , "llvm.nacl.atomic.cmpxchg." NameSuffix                                     \
   }
     AtomicCmpxchgInit(IceType_i8, "i8"),
     AtomicCmpxchgInit(IceType_i16, "i16"),
@@ -60,13 +56,15 @@
     AtomicCmpxchgInit(IceType_i64, "i64"),
 #undef AtomicCmpxchgInit
 
-    {{INTRIN(AtomicFence, SideEffects_T, ReturnsTwice_F),
+    {{INTRIN(AtomicFence, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),
       {IceType_void, IceType_i32},
       2},
      "llvm.nacl.atomic.fence"},
-    {{INTRIN(AtomicFenceAll, SideEffects_T, ReturnsTwice_F), {IceType_void}, 1},
+    {{INTRIN(AtomicFenceAll, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),
+      {IceType_void},
+      1},
      "llvm.nacl.atomic.fence.all"},
-    {{INTRIN(AtomicIsLockFree, SideEffects_F, ReturnsTwice_F),
+    {{INTRIN(AtomicIsLockFree, SideEffects_F, ReturnsTwice_F, MemoryWrite_F),
       {IceType_i1, IceType_i32, IceType_i32},
       3},
      "llvm.nacl.atomic.is.lock.free"},
@@ -74,7 +72,7 @@
 #define AtomicLoadInit(Overload, NameSuffix)                                   \
   {                                                                            \
     {                                                                          \
-      INTRIN(AtomicLoad, SideEffects_T, ReturnsTwice_F),                       \
+      INTRIN(AtomicLoad, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),        \
           {Overload, IceType_i32, IceType_i32}, 3                              \
     }                                                                          \
     , "llvm.nacl.atomic.load." NameSuffix                                      \
@@ -88,7 +86,7 @@
 #define AtomicRMWInit(Overload, NameSuffix)                                    \
   {                                                                            \
     {                                                                          \
-      INTRIN(AtomicRMW, SideEffects_T, ReturnsTwice_F)                         \
+      INTRIN(AtomicRMW, SideEffects_T, ReturnsTwice_F, MemoryWrite_T)          \
       , {Overload, IceType_i32, IceType_i32, Overload, IceType_i32}, 5         \
     }                                                                          \
     , "llvm.nacl.atomic.rmw." NameSuffix                                       \
@@ -102,7 +100,7 @@
 #define AtomicStoreInit(Overload, NameSuffix)                                  \
   {                                                                            \
     {                                                                          \
-      INTRIN(AtomicStore, SideEffects_T, ReturnsTwice_F)                       \
+      INTRIN(AtomicStore, SideEffects_T, ReturnsTwice_F, MemoryWrite_T)        \
       , {IceType_void, Overload, IceType_i32, IceType_i32}, 4                  \
     }                                                                          \
     , "llvm.nacl.atomic.store." NameSuffix                                     \
@@ -116,7 +114,7 @@
 #define BswapInit(Overload, NameSuffix)                                        \
   {                                                                            \
     {                                                                          \
-      INTRIN(Bswap, SideEffects_F, ReturnsTwice_F)                             \
+      INTRIN(Bswap, SideEffects_F, ReturnsTwice_F, MemoryWrite_F)              \
       , {Overload, Overload}, 2                                                \
     }                                                                          \
     , "llvm.bswap." NameSuffix                                                 \
@@ -129,7 +127,7 @@
 #define CtlzInit(Overload, NameSuffix)                                         \
   {                                                                            \
     {                                                                          \
-      INTRIN(Ctlz, SideEffects_F, ReturnsTwice_F)                              \
+      INTRIN(Ctlz, SideEffects_F, ReturnsTwice_F, MemoryWrite_F)               \
       , {Overload, Overload, IceType_i1}, 3                                    \
     }                                                                          \
     , "llvm.ctlz." NameSuffix                                                  \
@@ -141,7 +139,7 @@
 #define CtpopInit(Overload, NameSuffix)                                        \
   {                                                                            \
     {                                                                          \
-      INTRIN(Ctpop, SideEffects_F, ReturnsTwice_F)                             \
+      INTRIN(Ctpop, SideEffects_F, ReturnsTwice_F, MemoryWrite_F)              \
       , {Overload, Overload}, 2                                                \
     }                                                                          \
     , "llvm.ctpop." NameSuffix                                                 \
@@ -153,7 +151,7 @@
 #define CttzInit(Overload, NameSuffix)                                         \
   {                                                                            \
     {                                                                          \
-      INTRIN(Cttz, SideEffects_F, ReturnsTwice_F)                              \
+      INTRIN(Cttz, SideEffects_F, ReturnsTwice_F, MemoryWrite_F)               \
       , {Overload, Overload, IceType_i1}, 3                                    \
     }                                                                          \
     , "llvm.cttz." NameSuffix                                                  \
@@ -164,7 +162,10 @@
 
 #define FabsInit(Overload, NameSuffix)                                         \
   {                                                                            \
-    { INTRIN(Fabs, SideEffects_F, ReturnsTwice_F), {Overload, Overload}, 2 }   \
+    {                                                                          \
+      INTRIN(Fabs, SideEffects_F, ReturnsTwice_F, MemoryWrite_F),              \
+          {Overload, Overload}, 2                                              \
+    }                                                                          \
     , "llvm.fabs." NameSuffix                                                  \
   }
     FabsInit(IceType_f32, "f32"),
@@ -172,48 +173,57 @@
     FabsInit(IceType_v4f32, "v4f32"),
 #undef FabsInit
 
-    {{INTRIN(Longjmp, SideEffects_T, ReturnsTwice_F),
+    {{INTRIN(Longjmp, SideEffects_T, ReturnsTwice_F, MemoryWrite_F),
       {IceType_void, IceType_i32, IceType_i32},
       3},
      "llvm.nacl.longjmp"},
-    {{INTRIN(Memcpy, SideEffects_T, ReturnsTwice_F),
+    {{INTRIN(Memcpy, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),
       {IceType_void, IceType_i32, IceType_i32, IceType_i32, IceType_i32,
        IceType_i1},
       6},
      "llvm.memcpy.p0i8.p0i8.i32"},
-    {{INTRIN(Memmove, SideEffects_T, ReturnsTwice_F),
+    {{INTRIN(Memmove, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),
       {IceType_void, IceType_i32, IceType_i32, IceType_i32, IceType_i32,
        IceType_i1},
       6},
      "llvm.memmove.p0i8.p0i8.i32"},
-    {{INTRIN(Memset, SideEffects_T, ReturnsTwice_F),
+    {{INTRIN(Memset, SideEffects_T, ReturnsTwice_F, MemoryWrite_T),
       {IceType_void, IceType_i32, IceType_i8, IceType_i32, IceType_i32,
        IceType_i1},
       6},
      "llvm.memset.p0i8.i32"},
-    {{INTRIN(NaClReadTP, SideEffects_F, ReturnsTwice_F), {IceType_i32}, 1},
+    {{INTRIN(NaClReadTP, SideEffects_F, ReturnsTwice_F, MemoryWrite_F),
+      {IceType_i32},
+      1},
      "llvm.nacl.read.tp"},
-    {{INTRIN(Setjmp, SideEffects_T, ReturnsTwice_T),
+    {{INTRIN(Setjmp, SideEffects_T, ReturnsTwice_T, MemoryWrite_T),
       {IceType_i32, IceType_i32},
       2},
      "llvm.nacl.setjmp"},
 
 #define SqrtInit(Overload, NameSuffix)                                         \
   {                                                                            \
-    { INTRIN(Sqrt, SideEffects_F, ReturnsTwice_F), {Overload, Overload}, 2 }   \
+    {                                                                          \
+      INTRIN(Sqrt, SideEffects_F, ReturnsTwice_F, MemoryWrite_F),              \
+          {Overload, Overload}, 2                                              \
+    }                                                                          \
     , "llvm.sqrt." NameSuffix                                                  \
   }
     SqrtInit(IceType_f32, "f32"),
     SqrtInit(IceType_f64, "f64"),
 #undef SqrtInit
 
-    {{INTRIN(Stacksave, SideEffects_T, ReturnsTwice_F), {IceType_i32}, 1},
+    {{INTRIN(Stacksave, SideEffects_T, ReturnsTwice_F, MemoryWrite_F),
+      {IceType_i32},
+      1},
      "llvm.stacksave"},
-    {{INTRIN(Stackrestore, SideEffects_T, ReturnsTwice_F),
+    {{INTRIN(Stackrestore, SideEffects_T, ReturnsTwice_F, MemoryWrite_F),
       {IceType_void, IceType_i32},
       2},
      "llvm.stackrestore"},
-    {{INTRIN(Trap, SideEffects_T, ReturnsTwice_F), {IceType_void}, 1},
+    {{INTRIN(Trap, SideEffects_T, ReturnsTwice_F, MemoryWrite_F),
+      {IceType_void},
+      1},
      "llvm.trap"}};
 const size_t IceIntrinsicsTableSize = llvm::array_lengthof(IceIntrinsicsTable);
 
diff --git a/src/IceIntrinsics.h b/src/IceIntrinsics.h
index aea0bd2..4b2fbc4 100644
--- a/src/IceIntrinsics.h
+++ b/src/IceIntrinsics.h
@@ -106,15 +106,19 @@
 
   enum ReturnsTwice { ReturnsTwice_F = 0, ReturnsTwice_T = 1 };
 
+  enum MemoryWrite { MemoryWrite_F = 0, MemoryWrite_T = 1 };
+
   /// Basic attributes related to each intrinsic, that are relevant to code
   /// generation. Perhaps the attributes representation can be shared with
   /// general function calls, but PNaCl currently strips all attributes from
   /// functions.
   struct IntrinsicInfo {
-    enum IntrinsicID ID : 30;
+    enum IntrinsicID ID : 29;
     enum SideEffects HasSideEffects : 1;
     enum ReturnsTwice ReturnsTwice : 1;
+    enum MemoryWrite IsMemoryWrite : 1;
   };
+  static_assert(sizeof(IntrinsicInfo) == 4, "IntrinsicInfo should be 32 bits");
 
   /// The types of validation values for FullIntrinsicInfo.validateCall.
   enum ValidateCallValue {
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index ebc5a2e..d96c0ca 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -151,6 +151,7 @@
     return Element != Producers.end() && Element->second.Instr != nullptr;
   }
   void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
+  void invalidateProducersOnStore(const Inst *Instr);
   /// Producers maps Variable::Number to a BoolFoldingEntry.
   CfgUnorderedMap<SizeT, BoolFoldingEntry<Traits>> Producers;
 };
@@ -252,10 +253,12 @@
 template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
   Producers.clear();
   for (Inst &Instr : Node->getInsts()) {
+    if (Instr.isDeleted())
+      continue;
+    invalidateProducersOnStore(&Instr);
     // Check whether Instr is a valid producer.
     Variable *Var = Instr.getDest();
-    if (!Instr.isDeleted() // only consider non-deleted instructions
-        && Var             // only instructions with an actual dest var
+    if (Var // only consider instructions with an actual dest var
         && Var->getType() == IceType_i1          // only bool-type dest vars
         && getProducerKind(&Instr) != PK_None) { // white-listed instructions
       Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
@@ -338,6 +341,43 @@
   }
 }
 
+/// If the given instruction has potential memory side effects (e.g. store, rmw,
+/// or a call instruction with potential memory side effects), then we must not
+/// allow a pre-store Producer instruction with memory operands to be folded
+/// into a post-store Consumer instruction.  If this is detected, the Producer
+/// is invalidated.
+///
+/// We use the Producer's IsLiveOut field to determine whether any potential
+/// Consumers come after this store instruction.  The IsLiveOut field is
+/// initialized to true, and BoolFolding::init() sets IsLiveOut to false when it
+/// sees the variable's definitive last use (indicating the variable is not in
+/// the node's live-out set).  Thus if we see here that IsLiveOut is false, we
+/// know that there can be no consumers after the store, and therefore we know
+/// the folding is safe despite the store instruction.
+template <typename Traits>
+void BoolFolding<Traits>::invalidateProducersOnStore(const Inst *Instr) {
+  if (!Instr->isMemoryWrite())
+    return;
+  for (auto &ProducerPair : Producers) {
+    if (!ProducerPair.second.IsLiveOut)
+      continue;
+    Inst *PInst = ProducerPair.second.Instr;
+    if (PInst == nullptr)
+      continue;
+    bool HasMemOperand = false;
+    const SizeT SrcSize = PInst->getSrcSize();
+    for (SizeT I = 0; I < SrcSize; ++I) {
+      if (llvm::isa<typename Traits::X86OperandMem>(PInst->getSrc(I))) {
+        HasMemOperand = true;
+        break;
+      }
+    }
+    if (!HasMemOperand)
+      continue;
+    setInvalid(ProducerPair.first);
+  }
+}
+
 template <typename TraitsType>
 void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) {
   FoldingInfo.init(Node);
diff --git a/tests_lit/llvm2ice_tests/bool-folding.ll b/tests_lit/llvm2ice_tests/bool-folding.ll
index 6f87128..d0fd53e 100644
--- a/tests_lit/llvm2ice_tests/bool-folding.ll
+++ b/tests_lit/llvm2ice_tests/bool-folding.ll
@@ -269,6 +269,58 @@
 ; ARM32: movne
 ; ARM32: bx lr
 
+; Cmp/branch non-folding due to load folding and intervening store.
+define internal i32 @no_fold_cmp_br_store(i32 %arg2, i32 %argaddr) {
+entry:
+  %addr = inttoptr i32 %argaddr to i32*
+  %arg1 = load i32, i32* %addr, align 1
+  %cmp1 = icmp slt i32 %arg1, %arg2
+  store i32 1, i32* %addr, align 1
+  br i1 %cmp1, label %branch1, label %branch2
+branch1:
+  ret i32 1
+branch2:
+  ret i32 2
+}
+
+; CHECK-LABEL: no_fold_cmp_br_store
+; CHECK: cmp
+; CHECK: set
+; CHECK: cmp
+
+; Cmp/select non-folding due to load folding and intervening store.
+define internal i32 @no_fold_cmp_select_store(i32 %arg1, i32 %argaddr) {
+entry:
+  %addr = inttoptr i32 %argaddr to i32*
+  %arg2 = load i32, i32* %addr, align 1
+  %cmp1 = icmp slt i32 %arg1, %arg2
+  store i32 1, i32* %addr, align 1
+  %result = select i1 %cmp1, i32 %arg1, i32 %argaddr
+  ret i32 %result
+}
+
+; CHECK-LABEL: no_fold_cmp_select_store
+; CHECK: cmp
+; CHECK: setl
+; CHECK: mov DWORD PTR
+; CHECK: cmp
+; CHECK: cmovne
+
+; Cmp/select folding due to load folding and non-intervening store.
+define internal i32 @fold_cmp_select_store(i32 %arg1, i32 %argaddr) {
+entry:
+  %addr = inttoptr i32 %argaddr to i32*
+  %arg2 = load i32, i32* %addr, align 1
+  %cmp1 = icmp slt i32 %arg1, %arg2
+  %result = select i1 %cmp1, i32 %arg1, i32 %argaddr
+  store i32 1, i32* %addr, align 1
+  ret i32 %result
+}
+
+; CHECK-LABEL: fold_cmp_select_store
+; CHECK: cmp {{.*}},DWORD PTR
+; CHECK: cmovl
+
 ; Cmp/multi-select non-folding because of extra non-whitelisted uses.
 define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1,
                                                             i32 %arg2) {