Fix bug when atomic load is fused with an arith op (and not in the entry BB) Normally, the FakeUse for preserving the atomic load ends up on the load's Dest. However, for fused load+add, the load is deleted, and its Dest is no longer defined. This trips up the liveness analysis when it happens on a non-entry block. So the FakeUse should be for the add's dest instead, in that case. We have no access to the add, so introduce a getLastInserted() helper. A couple of ways to do that: - modify insert() to track explicitly - rewind from Next one step Either that, or we disable the fusing for atomic loads. BUG= https://code.google.com/p/nativeclient/issues/detail?id=3882 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/417353003

commit: e6e497dbc2dc6e97b33f7fb1ec661df92d0ce100 [log] [tgz]
author: Jan Voung <jvoung@chromium.org> Wed Jul 30 10:06:03 2014 -0700
committer: Jan Voung <jvoung@chromium.org> Wed Jul 30 10:06:03 2014 -0700
tree: 6010aff2211c497a0b42531216d829a92ae31a93
parent: d7ee972862e2952ed895497eba5760c738a7da11 [diff]
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 2a9b8c4..0034de5 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp

@@ -26,11 +26,12 @@
 
 void LoweringContext::init(CfgNode *N) {
   Node = N;
-  Cur = getNode()->getInsts().begin();
+  Begin = getNode()->getInsts().begin();
+  Cur = Begin;
   End = getNode()->getInsts().end();
   skipDeleted(Cur);
   Next = Cur;
-  advance(Next);
+  advanceForward(Next);
 }
 
 void LoweringContext::insert(Inst *Inst) {
@@ -43,13 +44,26 @@
     ++I;
 }
 
-void LoweringContext::advance(InstList::iterator &I) const {
+void LoweringContext::advanceForward(InstList::iterator &I) const {
   if (I != End) {
     ++I;
     skipDeleted(I);
   }
 }
 
+void LoweringContext::advanceBackward(InstList::iterator &I) const {
+  assert(I != Begin);
+  do {
+    --I;
+  } while (I != Begin && (*I)->isDeleted());
+}
+
+Inst *LoweringContext::getLastInserted() const {
+  InstList::iterator Cursor = Next;
+  advanceBackward(Cursor);
+  return *Cursor;
+}
+
 TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
   // These statements can be #ifdef'd to specialize the code generator
   // to a subset of the available targets.  TODO: use CRTP.

diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index ec86cff..91512a9 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h

@@ -42,7 +42,7 @@
     return *Next;
   }
   Inst *getNextInst(InstList::iterator &Iter) const {
-    advance(Iter);
+    advanceForward(Iter);
     if (Iter == End)
       return NULL;
     return *Iter;
@@ -52,8 +52,9 @@
   InstList::iterator getCur() const { return Cur; }
   InstList::iterator getEnd() const { return End; }
   void insert(Inst *Inst);
+  Inst *getLastInserted() const;
   void advanceCur() { Cur = Next; }
-  void advanceNext() { advance(Next); }
+  void advanceNext() { advanceForward(Next); }
   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
 
 private:
@@ -71,11 +72,14 @@
   // insertion point", to avoid confusion when previously-deleted
   // instructions come between the two points.
   InstList::iterator Next;
+  // Begin is a copy of Insts.begin(), used if iterators are moved backward.
+  InstList::iterator Begin;
   // End is a copy of Insts.end(), used if Next needs to be advanced.
   InstList::iterator End;
 
   void skipDeleted(InstList::iterator &I) const;
-  void advance(InstList::iterator &I) const;
+  void advanceForward(InstList::iterator &I) const;
+  void advanceBackward(InstList::iterator &I) const;
   LoweringContext(const LoweringContext &) LLVM_DELETED_FUNCTION;
   LoweringContext &operator=(const LoweringContext &) LLVM_DELETED_FUNCTION;
 };

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 2db795b..00db25a 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp

@@ -2724,15 +2724,18 @@
       // Then cast the bits back out of the XMM register to the i64 Dest.
       InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
       lowerCast(Cast);
-      // Make sure that the atomic load isn't elided.
+      // Make sure that the atomic load isn't elided when unused.
       Context.insert(InstFakeUse::create(Func, Dest->getLo()));
       Context.insert(InstFakeUse::create(Func, Dest->getHi()));
       return;
     }
     InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
     lowerLoad(Load);
-    // Make sure the atomic load isn't elided.
-    Context.insert(InstFakeUse::create(Func, Dest));
+    // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
+    // Since lowerLoad may fuse the load w/ an arithmetic instruction,
+    // insert the FakeUse on the last-inserted instruction's dest.
+    Context.insert(InstFakeUse::create(Func,
+                                       Context.getLastInserted()->getDest()));
     return;
   }
   case Intrinsics::AtomicRMW:

diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
index 2c325df..82f975f 100644
--- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll

@@ -88,6 +88,9 @@
 
 define i32 @test_atomic_load_32_with_arith(i32 %iptr) {
 entry:
+  br label %next
+
+next:
   %ptr = inttoptr i32 %iptr to i32*
   %r = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
   %r2 = add i32 %r, 32
@@ -96,6 +99,11 @@
 ; CHECK-LABEL: test_atomic_load_32_with_arith
 ; CHECK: mov {{.*}}, dword
 ; The next instruction may be a separate load or folded into an add.
+;
+; In O2 mode, we know that the load and add are going to be fused.
+; CHECKO2-LABEL: test_atomic_load_32_with_arith
+; CHECKO2: mov {{.*}}, dword
+; CHECKO2: add {{.*}}, dword
 
 define i32 @test_atomic_load_32_ignored(i32 %iptr) {
 entry:
@@ -106,6 +114,9 @@
 ; CHECK-LABEL: test_atomic_load_32_ignored
 ; CHECK: mov {{.*}}, dword
 ; CHECK: mov {{.*}}, dword
+; CHECKO2-LABEL: test_atomic_load_32_ignored
+; CHECKO2: mov {{.*}}, dword
+; CHECKO2: mov {{.*}}, dword
 
 define i64 @test_atomic_load_64_ignored(i32 %iptr) {
 entry:
commit	e6e497dbc2dc6e97b33f7fb1ec661df92d0ce100	[log] [tgz]
author	Jan Voung <jvoung@chromium.org>	Wed Jul 30 10:06:03 2014 -0700
committer	Jan Voung <jvoung@chromium.org>	Wed Jul 30 10:06:03 2014 -0700
tree	6010aff2211c497a0b42531216d829a92ae31a93
parent	d7ee972862e2952ed895497eba5760c738a7da11 [diff]