Subzero, MIPS32: Atomic intrinsics fixes

This patch introduces changes to the MIPS32 intrinsic functions to
comply with PNaCl smoke tests.

Also made a change regarding addressing relative to frame pointer,
since it differs in MIPS compared to ARM and x86.

R=stichnot@chromium.org

Patch from Stefan Maksimovic <makdstefan@gmail.com>.

Review-Url: https://codereview.chromium.org/2619363003 .
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index c76c097..802abfc 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -939,7 +939,7 @@
       // Addressing is relative to the frame pointer.  Subtract the offset after
       // adding the size of the alloca, because it grows downwards from the
       // frame pointer.
-      Offsets.push_back(-(CurrentOffset + Size));
+      Offsets.push_back(Target->getFramePointerOffset(CurrentOffset, Size));
     } else {
       // Addressing is relative to the stack pointer or to a user pointer.  Add
       // the offset before adding the size of the object, because it grows
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 3de6965..41108f8 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -253,7 +253,13 @@
   virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
   virtual int32_t getFrameFixedAllocaOffset() const = 0;
   virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
-
+  // Addressing relative to frame pointer differs in MIPS compared to X86/ARM
+  // since MIPS decrements its stack pointer prior to saving it in the frame
+  // pointer register.
+  virtual uint32_t getFramePointerOffset(uint32_t CurrentOffset,
+                                         uint32_t Size) const {
+    return -(CurrentOffset + Size);
+  }
   /// Return whether a 64-bit Variable should be split into a Variable64On32.
   virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
 
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
index acbe422..5110c86 100644
--- a/src/IceTargetLoweringMIPS32.cpp
+++ b/src/IceTargetLoweringMIPS32.cpp
@@ -4476,8 +4476,8 @@
       Context.insert<InstFakeUse>(T_Hi);
     } else {
       auto *T = makeReg(DestTy);
-      lowerLoad(InstLoad::create(Func, T,
-                                 formMemoryOperand(Instr->getArg(0), DestTy)));
+      auto *Base = legalizeToReg(Instr->getArg(0));
+      lowerLoad(InstLoad::create(Func, T, formMemoryOperand(Base, DestTy)));
       _sync();
       _mov(Dest, T);
       // Adding a fake-use of T to ensure the atomic load is not removed if Dest
@@ -4524,8 +4524,8 @@
     } else {
       _sync();
       auto *Val = legalizeToReg(Instr->getArg(0));
-      lowerStore(InstStore::create(
-          Func, Val, formMemoryOperand(Instr->getArg(1), DestTy)));
+      auto *Base = legalizeToReg(Instr->getArg(1));
+      lowerStore(InstStore::create(Func, Val, formMemoryOperand(Base, DestTy)));
       _sync();
     }
     return;
@@ -4552,15 +4552,25 @@
       InstMIPS32Label *Retry1 = InstMIPS32Label::create(Func, this);
       auto *T1 = I32Reg();
       auto *T2 = I32Reg();
+      auto *T3 = I32Reg();
+      auto *T4 = I32Reg();
       _sync();
       Variable *ValHi, *ValLo, *ExpectedLo, *ExpectedHi;
       if (llvm::isa<ConstantUndef>(Expected)) {
         ExpectedLo = legalizeToReg(Ctx->getConstantZero(IceType_i32));
         ExpectedHi = legalizeToReg(Ctx->getConstantZero(IceType_i32));
-      } else {
-        auto *Expected64 = llvm::cast<Variable64On32>(Expected);
+      } else if (auto *Expected64 = llvm::dyn_cast<Variable64On32>(Expected)) {
         ExpectedLo = legalizeToReg(loOperand(Expected64));
         ExpectedHi = legalizeToReg(hiOperand(Expected64));
+      } else if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Expected)) {
+        const uint64_t Value = C64->getValue();
+        uint64_t Upper32Bits = (Value >> INT32_BITS) & 0xFFFFFFFF;
+        uint64_t Lower32Bits = Value & 0xFFFFFFFF;
+        ExpectedLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
+        ExpectedHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
+      } else {
+        llvm::report_fatal_error(
+            "AtomicCmpxchg: getArg(1) is nor Constant neither Variable64On32");
       }
       if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(New)) {
         const uint64_t Value = C64->getValue();
@@ -4581,20 +4591,22 @@
       auto *AddrHi = OperandMIPS32Mem::create(
           Func, IceType_i32, BaseR,
           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(4)));
+      lowerLoad(InstLoad::create(Func, T3, AddrLo));
+      lowerLoad(InstLoad::create(Func, T4, AddrHi));
       Context.insert(Retry);
-      _ll(T1, AddrLo);
+      Sandboxer(this).ll(T1, AddrLo);
       _br(NoTarget, NoTarget, T1, ExpectedLo, Exit, CondMIPS32::Cond::NE);
-      _sc(ValLo, AddrLo);
+      Sandboxer(this).sc(ValLo, AddrLo);
       _br(NoTarget, NoTarget, ValLo, getZero(), Retry, CondMIPS32::Cond::EQ);
-      _mov(Dest64->getLo(), T1);
       Context.insert(Retry1);
-      _ll(T2, AddrHi);
+      Sandboxer(this).ll(T2, AddrHi);
       _br(NoTarget, NoTarget, T2, ExpectedHi, Exit, CondMIPS32::Cond::NE);
-      _sc(ValHi, AddrHi);
+      Sandboxer(this).sc(ValHi, AddrHi);
       _br(NoTarget, NoTarget, ValHi, getZero(), Retry1, CondMIPS32::Cond::EQ);
-      _mov(Dest64->getHi(), T2);
       Context.insert<InstFakeUse>(getZero());
       Context.insert(Exit);
+      _mov(Dest64->getLo(), T3);
+      _mov(Dest64->getHi(), T4);
       _sync();
     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
       auto *NewR = legalizeToReg(New);
@@ -4626,12 +4638,12 @@
       _andi(RegAt, NewR, Mask);
       _sllv(T6, RegAt, T2);
       Context.insert(Retry);
-      _ll(T7, formMemoryOperand(T1, DestTy));
+      Sandboxer(this).ll(T7, formMemoryOperand(T1, DestTy));
       _and(T8, T7, T3);
       _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
       _and(RegAt, T7, T4);
       _or(T9, RegAt, T6);
-      _sc(T9, formMemoryOperand(T1, DestTy));
+      Sandboxer(this).sc(T9, formMemoryOperand(T1, DestTy));
       _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
       Context.insert<InstFakeUse>(getZero());
       Context.insert(Exit);
@@ -4644,14 +4656,14 @@
       Context.insert<InstFakeUse>(NewR);
     } else {
       auto *T1 = I32Reg();
+      _sync();
+      Context.insert(Retry);
       auto *NewR = legalizeToReg(New);
       auto *ExpectedR = legalizeToReg(Expected);
       auto *ActualAddressR = legalizeToReg(ActualAddress);
-      _sync();
-      Context.insert(Retry);
-      _ll(T1, formMemoryOperand(ActualAddressR, DestTy));
+      Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
       _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
-      _sc(NewR, formMemoryOperand(ActualAddressR, DestTy));
+      Sandboxer(this).sc(NewR, formMemoryOperand(ActualAddressR, DestTy));
       _br(NoTarget, NoTarget, NewR, getZero(), Retry, CondMIPS32::Cond::EQ);
       Context.insert<InstFakeUse>(getZero());
       Context.insert(Exit);
@@ -4707,9 +4719,10 @@
       auto *T2 = I32Reg();
       auto *T3 = I32Reg();
       Context.insert(Retry);
-      _ll(T1, AddrLo);
+      Sandboxer(this).ll(T1, AddrLo);
       if (Operation == Intrinsics::AtomicExchange) {
         _mov(RegAt, ValLo);
+        Context.insert<InstFakeUse>(T1);
       } else if (Operation == Intrinsics::AtomicAdd) {
         createArithInst(Operation, RegAt, T1, ValLo);
         _sltu(T2, RegAt, T1);
@@ -4719,22 +4732,23 @@
       } else {
         createArithInst(Operation, RegAt, T1, ValLo);
       }
-      _sc(RegAt, AddrLo);
+      Sandboxer(this).sc(RegAt, AddrLo);
       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
       Context.insert<InstFakeUse>(getZero());
       _mov(Dest64->getLo(), T1);
       Context.insert(Retry1);
-      _ll(T3, AddrHi);
+      Sandboxer(this).ll(T3, AddrHi);
       if (Operation == Intrinsics::AtomicAdd ||
           Operation == Intrinsics::AtomicSub) {
         _addu(RegAt, T2, ValHi);
         createArithInst(Operation, RegAt, T3, RegAt);
       } else if (Operation == Intrinsics::AtomicExchange) {
         _mov(RegAt, ValHi);
+        Context.insert<InstFakeUse>(T3);
       } else {
         createArithInst(Operation, RegAt, T3, ValHi);
       }
-      _sc(RegAt, AddrHi);
+      Sandboxer(this).sc(RegAt, AddrHi);
       _br(NoTarget, NoTarget, RegAt, getZero(), Retry1, CondMIPS32::Cond::EQ);
       Context.insert<InstFakeUse>(getZero());
       _mov(Dest64->getHi(), T3);
@@ -4765,7 +4779,7 @@
       _nor(T4, getZero(), T3);
       _sllv(T5, NewR, T2);
       Context.insert(Retry);
-      _ll(T6, formMemoryOperand(T1, DestTy));
+      Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
       if (Operation != Intrinsics::AtomicExchange) {
         createArithInst(Operation, RegAt, T6, T5);
         _and(RegAt, RegAt, T3);
@@ -4776,7 +4790,7 @@
       } else {
         _or(RegAt, T7, RegAt);
       }
-      _sc(RegAt, formMemoryOperand(T1, DestTy));
+      Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
       Context.insert<InstFakeUse>(getZero());
       _and(RegAt, T6, T3);
@@ -4794,13 +4808,13 @@
       auto *ActualAddressR = legalizeToReg(ActualAddress);
       _sync();
       Context.insert(Retry);
-      _ll(T1, formMemoryOperand(ActualAddressR, DestTy));
+      Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
       if (Operation == Intrinsics::AtomicExchange) {
         _mov(T2, NewR);
       } else {
         createArithInst(Operation, T2, T1, NewR);
       }
-      _sc(T2, formMemoryOperand(ActualAddressR, DestTy));
+      Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
       Context.insert<InstFakeUse>(getZero());
       _mov(Dest, T1);
@@ -4831,7 +4845,6 @@
     case 1:
     case 2:
     case 4:
-    case 8:
       Result = LockFree;
       break;
     }
@@ -5063,12 +5076,15 @@
     return;
   }
   case Intrinsics::NaClReadTP: {
-    if (getFlags().getUseSandboxing()) {
-      UnimplementedLoweringError(this, Instr);
-    } else {
-      InstCall *Call =
-          makeHelperCall(RuntimeHelper::H_call_read_tp, Instr->getDest(), 0);
-      lowerCall(Call);
+    if (SandboxingType != ST_NaCl)
+      llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
+    else {
+      auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
+      Context.insert<InstFakeDef>(T8);
+      Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
+          Func, getPointerType(), T8,
+          llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
+      _mov(Dest, TP);
     }
     return;
   }
@@ -6038,7 +6054,8 @@
     Target->_addiu(SP, SP, StackOffset);
     return;
   }
-  Variable *T7 = Target->getPhysicalRegister(RegMIPS32::Reg_T7);
+  auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+  Target->Context.insert<InstFakeDef>(T7);
   createAutoBundle();
   Target->_addiu(SP, SP, StackOffset);
   Target->_and(SP, SP, T7);
@@ -6046,20 +6063,53 @@
 
 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
   Variable *Base = Mem->getBase();
-  Variable *T7 = Target->getPhysicalRegister(RegMIPS32::Reg_T7);
-  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
+  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
+      (RegMIPS32::Reg_T8 != Base->getRegNum())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     createAutoBundle();
     Target->_and(Base, Base, T7);
   }
   Target->_lw(Dest, Mem);
-  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg()))
+  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     Target->_and(Dest, Dest, T7);
+  }
+}
+
+void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
+  Variable *Base = Mem->getBase();
+  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
+    createAutoBundle();
+    Target->_and(Base, Base, T7);
+  }
+  Target->_ll(Dest, Mem);
+  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
+    Target->_and(Dest, Dest, T7);
+  }
+}
+
+void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
+  Variable *Base = Mem->getBase();
+  if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
+    createAutoBundle();
+    Target->_and(Base, Base, T7);
+  }
+  Target->_sc(Dest, Mem);
 }
 
 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
   Variable *Base = Mem->getBase();
-  Variable *T7 = Target->getPhysicalRegister(RegMIPS32::Reg_T7);
   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     createAutoBundle();
     Target->_and(Base, Base, T7);
   }
@@ -6069,34 +6119,43 @@
 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
                                    RelocOp Reloc) {
   Variable *Base = Mem->getBase();
-  Variable *T7 = Target->getPhysicalRegister(RegMIPS32::Reg_T7);
   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     createAutoBundle();
     Target->_and(Base, Base, T7);
   }
   Target->_lwc1(Dest, Mem, Reloc);
-  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg()))
+  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     Target->_and(Dest, Dest, T7);
+  }
 }
 
 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
                                    RelocOp Reloc) {
   Variable *Base = Mem->getBase();
-  Variable *T7 = Target->getPhysicalRegister(RegMIPS32::Reg_T7);
   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     createAutoBundle();
     Target->_and(Base, Base, T7);
   }
   Target->_ldc1(Dest, Mem, Reloc);
-  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg()))
+  if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
+    auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+    Target->Context.insert<InstFakeDef>(T7);
     Target->_and(Dest, Dest, T7);
+  }
 }
 
 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
   if (!Target->NeedSandboxing) {
     Target->_ret(RetAddr, RetValue);
   }
-  Variable *T6 = Target->getPhysicalRegister(RegMIPS32::Reg_T6);
+  auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
+  Target->Context.insert<InstFakeDef>(T6);
   createAutoBundle();
   Target->_and(RetAddr, RetAddr, T6);
   Target->_ret(RetAddr, RetValue);
@@ -6108,7 +6167,8 @@
     Target->_mov(SP, Src);
     return;
   }
-  Variable *T7 = Target->getPhysicalRegister(RegMIPS32::Reg_T7);
+  auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
+  Target->Context.insert<InstFakeDef>(T7);
   createAutoBundle();
   Target->_mov(SP, Src);
   Target->_and(SP, SP, T7);
@@ -6120,7 +6180,8 @@
   if (Target->NeedSandboxing) {
     createAutoBundle();
     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
-      Variable *T6 = Target->getPhysicalRegister(RegMIPS32::Reg_T6);
+      auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
+      Target->Context.insert<InstFakeDef>(T6);
       Target->_and(CallTargetR, CallTargetR, T6);
     }
   }
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 6f47e21..1695f50 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -112,6 +112,12 @@
 
   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
 
+  uint32_t getFramePointerOffset(uint32_t CurrentOffset,
+                                 uint32_t Size) const override {
+    (void)Size;
+    return CurrentOffset + MaxOutArgsSizeBytes;
+  }
+
   bool shouldSplitToVariable64On32(Type Ty) const override {
     return Ty == IceType_i64;
   }
@@ -613,6 +619,8 @@
     void addiu_sp(uint32_t StackOffset);
     void lw(Variable *Dest, OperandMIPS32Mem *Mem);
     void sw(Variable *Dest, OperandMIPS32Mem *Mem);
+    void ll(Variable *Dest, OperandMIPS32Mem *Mem);
+    void sc(Variable *Dest, OperandMIPS32Mem *Mem);
     void lwc1(Variable *Dest, OperandMIPS32Mem *Mem, RelocOp Reloc = RO_No);
     void ldc1(Variable *Dest, OperandMIPS32Mem *Mem, RelocOp Reloc = RO_No);
     void ret(Variable *RetAddr, Variable *RetValue);
diff --git a/tests_lit/llvm2ice_tests/fused-alloca.ll b/tests_lit/llvm2ice_tests/fused-alloca.ll
index 6cc64fe..4dcc2b7 100644
--- a/tests_lit/llvm2ice_tests/fused-alloca.ll
+++ b/tests_lit/llvm2ice_tests/fused-alloca.ll
@@ -156,7 +156,7 @@
 ; MIPS32: 	addiu	v0,sp,0
 ; MIPS32: 	addiu	v1,sp,16
 ; MIPS32: 	move	a1,a0
-; MIPS32: 	sw	a1,16(s8)
+; MIPS32: 	sw	a1,32(s8)
 ; MIPS32: 	move	a1,a0
 ; MIPS32: 	sw	a1,0(v0)
 ; MIPS32: 	sw	a0,0(v1)
@@ -221,9 +221,9 @@
 ; MIPS32: 	move	a1,a0
 ; MIPS32: 	sw	a1,32(v0)
 ; MIPS32: 	move	v0,a0
-; MIPS32: 	sw	v0,64(s8)
+; MIPS32: 	sw	v0,80(s8)
 ; MIPS32: 	move	v0,a0
-; MIPS32: 	sw	v0,48(s8)
+; MIPS32: 	sw	v0,96(s8)
 ; MIPS32: 	sw	a0,0(v1)
 ; MIPS32: 	move	sp,s8
 ; MIPS32: 	lw	s8,{{.*}}(sp)