Initial implementation of __atomic_* (everything except __atomic_is_lock_free).



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@141632 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index dc37ac9..465b490 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -1538,6 +1538,7 @@
   }
   case CompoundAssignOperatorClass:
   case VAArgExprClass:
+  case AtomicExprClass:
     return false;
 
   case ConditionalOperatorClass: {
diff --git a/lib/AST/ExprClassification.cpp b/lib/AST/ExprClassification.cpp
index 624e9d2..49c6821 100644
--- a/lib/AST/ExprClassification.cpp
+++ b/lib/AST/ExprClassification.cpp
@@ -162,6 +162,7 @@
   case Expr::SubstNonTypeTemplateParmPackExprClass:
   case Expr::AsTypeExprClass:
   case Expr::ObjCIndirectCopyRestoreExprClass:
+  case Expr::AtomicExprClass:
     return Cl::CL_PRValue;
 
     // Next come the complicated cases.
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index c0f913d..85cb40f 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -2857,6 +2857,7 @@
   case Expr::AsTypeExprClass:
   case Expr::ObjCIndirectCopyRestoreExprClass:
   case Expr::MaterializeTemporaryExprClass:
+  case Expr::AtomicExprClass:
     return ICEDiag(2, E->getLocStart());
 
   case Expr::InitListExprClass:
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp
index 2b93250..d4ac722 100644
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -2255,6 +2255,7 @@
   case Expr::CXXNoexceptExprClass:
   case Expr::CUDAKernelCallExprClass:
   case Expr::AsTypeExprClass:
+  case Expr::AtomicExprClass:
   {
     // As bad as this diagnostic is, it's better than crashing.
     DiagnosticsEngine &Diags = Context.getDiags();
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index dfa2612..78d9a59 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -1011,6 +1011,59 @@
   OS << ")";
 }
 
+void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
+  const char *Name;
+  switch (Node->getOp()) {
+    case AtomicExpr::Load:
+      Name = "__atomic_load(";
+      break;
+    case AtomicExpr::Store:
+      Name = "__atomic_store(";
+      break;
+    case AtomicExpr::CmpXchgStrong:
+      Name = "__atomic_compare_exchange_strong(";
+      break;
+    case AtomicExpr::CmpXchgWeak:
+      Name = "__atomic_compare_exchange_weak(";
+      break;
+    case AtomicExpr::Xchg:
+      Name = "__atomic_exchange(";
+      break;
+    case AtomicExpr::Add:
+      Name = "__atomic_fetch_add(";
+      break;
+    case AtomicExpr::Sub:
+      Name = "__atomic_fetch_sub(";
+      break;
+    case AtomicExpr::And:
+      Name = "__atomic_fetch_and(";
+      break;
+    case AtomicExpr::Or:
+      Name = "__atomic_fetch_or(";
+      break;
+    case AtomicExpr::Xor:
+      Name = "__atomic_fetch_xor(";
+      break;
+  }
+  OS << Name;
+  PrintExpr(Node->getPtr());
+  OS << ", ";
+  if (Node->getOp() != AtomicExpr::Load) {
+    PrintExpr(Node->getVal1());
+    OS << ", ";
+  }
+  if (Node->isCmpXChg()) {
+    PrintExpr(Node->getVal2());
+    OS << ", ";
+  }
+  PrintExpr(Node->getOrder());
+  if (Node->isCmpXChg()) {
+    OS << ", ";
+    PrintExpr(Node->getOrderFail());
+  }
+  OS << ")";
+}
+
 // C++
 void StmtPrinter::VisitCXXOperatorCallExpr(CXXOperatorCallExpr *Node) {
   const char *OpStrings[NUM_OVERLOADED_OPERATORS] = {
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index 12321ef..df49e84 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -468,6 +468,10 @@
   }
 }
 
+void StmtProfiler::VisitAtomicExpr(const AtomicExpr *S) {
+  VisitExpr(S);
+}
+
 static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
                                           UnaryOperatorKind &UnaryOp,
                                           BinaryOperatorKind &BinaryOp) {
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index f7179be..ec0ca42 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -948,6 +948,72 @@
     return RValue::get(0);
   }
 
+  case Builtin::BI__atomic_thread_fence:
+  case Builtin::BI__atomic_signal_fence: {
+    llvm::SynchronizationScope Scope;
+    if (BuiltinID == Builtin::BI__atomic_signal_fence)
+      Scope = llvm::SingleThread;
+    else
+      Scope = llvm::CrossThread;
+    Value *Order = EmitScalarExpr(E->getArg(0));
+    if (isa<llvm::ConstantInt>(Order)) {
+      int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+      switch (ord) {
+      case 0:  // memory_order_relaxed
+      default: // invalid order
+        break;
+      case 1:  // memory_order_consume
+      case 2:  // memory_order_acquire
+        Builder.CreateFence(llvm::Acquire, Scope);
+        break;
+      case 3:  // memory_order_release
+        Builder.CreateFence(llvm::Release, Scope);
+        break;
+      case 4:  // memory_order_acq_rel
+        Builder.CreateFence(llvm::AcquireRelease, Scope);
+        break;
+      case 5:  // memory_order_seq_cst
+        Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+        break;
+      }
+      return RValue::get(0);
+    }
+
+    llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
+    AcquireBB = createBasicBlock("acquire", CurFn);
+    ReleaseBB = createBasicBlock("release", CurFn);
+    AcqRelBB = createBasicBlock("acqrel", CurFn);
+    SeqCstBB = createBasicBlock("seqcst", CurFn);
+    llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
+
+    Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
+    llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
+
+    Builder.SetInsertPoint(AcquireBB);
+    Builder.CreateFence(llvm::Acquire, Scope);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(1), AcquireBB);
+    SI->addCase(Builder.getInt32(2), AcquireBB);
+
+    Builder.SetInsertPoint(ReleaseBB);
+    Builder.CreateFence(llvm::Release, Scope);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(3), ReleaseBB);
+
+    Builder.SetInsertPoint(AcqRelBB);
+    Builder.CreateFence(llvm::AcquireRelease, Scope);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(4), AcqRelBB);
+
+    Builder.SetInsertPoint(SeqCstBB);
+    Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(5), SeqCstBB);
+
+    Builder.SetInsertPoint(ContBB);
+    return RValue::get(0);
+  }
+
     // Library functions with special handling.
   case Builtin::BIsqrt:
   case Builtin::BIsqrtf:
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index b242061..cb60df1 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -2478,3 +2478,280 @@
 
   return MakeAddrLValue(AddV, MPT->getPointeeType());
 }
+
+static void
+EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
+             llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
+             uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
+  if (E->isCmpXChg()) {
+    // Note that cmpxchg only supports specifying one ordering and
+    // doesn't support weak cmpxchg, at least at the moment.
+    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
+    LoadVal1->setAlignment(Align);
+    llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2);
+    LoadVal2->setAlignment(Align);
+    llvm::AtomicCmpXchgInst *CXI =
+        CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order);
+    CXI->setVolatile(E->isVolatile());
+    llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1);
+    StoreVal1->setAlignment(Align);
+    llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1);
+    CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
+    return;
+  }
+
+  if (E->getOp() == AtomicExpr::Load) {
+    llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
+    Load->setAtomic(Order);
+    Load->setAlignment(Size);
+    Load->setVolatile(E->isVolatile());
+    llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Load, Dest);
+    StoreDest->setAlignment(Align);
+    return;
+  }
+
+  if (E->getOp() == AtomicExpr::Store) {
+    assert(!Dest && "Store does not return a value");
+    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
+    LoadVal1->setAlignment(Align);
+    llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
+    Store->setAtomic(Order);
+    Store->setAlignment(Size);
+    Store->setVolatile(E->isVolatile());
+    return;
+  }
+
+  llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
+  switch (E->getOp()) {
+    case AtomicExpr::CmpXchgWeak:
+    case AtomicExpr::CmpXchgStrong:
+    case AtomicExpr::Store:
+    case AtomicExpr::Load:  assert(0 && "Already handled!");
+    case AtomicExpr::Add:   Op = llvm::AtomicRMWInst::Add;  break;
+    case AtomicExpr::Sub:   Op = llvm::AtomicRMWInst::Sub;  break;
+    case AtomicExpr::And:   Op = llvm::AtomicRMWInst::And;  break;
+    case AtomicExpr::Or:    Op = llvm::AtomicRMWInst::Or;   break;
+    case AtomicExpr::Xor:   Op = llvm::AtomicRMWInst::Xor;  break;
+    case AtomicExpr::Xchg:  Op = llvm::AtomicRMWInst::Xchg; break;
+  }
+  llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
+  LoadVal1->setAlignment(Align);
+  llvm::AtomicRMWInst *RMWI =
+      CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order);
+  RMWI->setVolatile(E->isVolatile());
+  llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(RMWI, Dest);
+  StoreDest->setAlignment(Align);
+}
+
+// This function emits any expression (scalar, complex, or aggregate)
+// into a temporary alloca.
+static llvm::Value *
+EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
+  llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
+  CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
+                       /*Init*/ true);
+  return DeclPtr;
+}
+
+static RValue ConvertTempToRValue(CodeGenFunction &CGF, QualType Ty,
+                                  llvm::Value *Dest) {
+  if (Ty->isAnyComplexType())
+    return RValue::getComplex(CGF.LoadComplexFromAddr(Dest, false));
+  if (CGF.hasAggregateLLVMType(Ty))
+    return RValue::getAggregate(Dest);
+  return RValue::get(CGF.EmitLoadOfScalar(CGF.MakeAddrLValue(Dest, Ty)));
+}
+
+RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
+  QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
+  QualType MemTy = AtomicTy->getAs<AtomicType>()->getValueType();
+  CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy);
+  uint64_t Size = sizeChars.getQuantity();
+  CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy);
+  unsigned Align = alignChars.getQuantity();
+  // FIXME: Bound on Size should not be hardcoded.
+  bool UseLibcall = (sizeChars != alignChars || !llvm::isPowerOf2_64(Size) ||
+                     Size > 8);
+
+  llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
+  Ptr = EmitScalarExpr(E->getPtr());
+  Order = EmitScalarExpr(E->getOrder());
+  if (E->isCmpXChg()) {
+    Val1 = EmitScalarExpr(E->getVal1());
+    Val2 = EmitValToTemp(*this, E->getVal2());
+    OrderFail = EmitScalarExpr(E->getOrderFail());
+    (void)OrderFail; // OrderFail is unused at the moment
+  } else if ((E->getOp() == AtomicExpr::Add || E->getOp() == AtomicExpr::Sub) &&
+             MemTy->isPointerType()) {
+    // For pointers, we're required to do a bit of math: adding 1 to an int*
+    // is not the same as adding 1 to a uintptr_t.
+    QualType Val1Ty = E->getVal1()->getType();
+    llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
+    uint64_t PointeeIncAmt =
+        getContext().getTypeSizeInChars(MemTy->getPointeeType()).getQuantity();
+    llvm::Value *PointeeIncAmtVal =
+        llvm::ConstantInt::get(Val1Scalar->getType(), PointeeIncAmt);
+    Val1Scalar = Builder.CreateMul(Val1Scalar, PointeeIncAmtVal);
+    Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
+    EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
+  } else if (E->getOp() != AtomicExpr::Load) {
+    Val1 = EmitValToTemp(*this, E->getVal1());
+  }
+
+  if (E->getOp() != AtomicExpr::Store && !Dest)
+    Dest = CreateMemTemp(E->getType(), ".atomicdst");
+
+  if (UseLibcall) {
+    // FIXME: Finalize what the libcalls are actually supposed to look like.
+    // See also http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
+    return EmitUnsupportedRValue(E, "atomic library call");
+  }
+#if 0
+  if (UseLibcall) {
+    const char* LibCallName;
+    switch (E->getOp()) {
+    case AtomicExpr::CmpXchgWeak:
+      LibCallName = "__atomic_compare_exchange_generic"; break;
+    case AtomicExpr::CmpXchgStrong:
+      LibCallName = "__atomic_compare_exchange_generic"; break;
+    case AtomicExpr::Add:   LibCallName = "__atomic_fetch_add_generic"; break;
+    case AtomicExpr::Sub:   LibCallName = "__atomic_fetch_sub_generic"; break;
+    case AtomicExpr::And:   LibCallName = "__atomic_fetch_and_generic"; break;
+    case AtomicExpr::Or:    LibCallName = "__atomic_fetch_or_generic"; break;
+    case AtomicExpr::Xor:   LibCallName = "__atomic_fetch_xor_generic"; break;
+    case AtomicExpr::Xchg:  LibCallName = "__atomic_exchange_generic"; break;
+    case AtomicExpr::Store: LibCallName = "__atomic_store_generic"; break;
+    case AtomicExpr::Load:  LibCallName = "__atomic_load_generic"; break;
+    }
+    llvm::SmallVector<QualType, 4> Params;
+    CallArgList Args;
+    QualType RetTy = getContext().VoidTy;
+    if (E->getOp() != AtomicExpr::Store && !E->isCmpXChg())
+      Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
+               getContext().VoidPtrTy);
+    Args.add(RValue::get(EmitCastToVoidPtr(Ptr)),
+             getContext().VoidPtrTy);
+    if (E->getOp() != AtomicExpr::Load)
+      Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
+               getContext().VoidPtrTy);
+    if (E->isCmpXChg()) {
+      Args.add(RValue::get(EmitCastToVoidPtr(Val2)),
+               getContext().VoidPtrTy);
+      RetTy = getContext().IntTy;
+    }
+    Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
+             getContext().getSizeType());
+    const CGFunctionInfo &FuncInfo =
+        CGM.getTypes().getFunctionInfo(RetTy, Args, FunctionType::ExtInfo());
+    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo, false);
+    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
+    RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
+    if (E->isCmpXChg())
+      return Res;
+    if (E->getOp() == AtomicExpr::Store)
+      return RValue::get(0);
+    return ConvertTempToRValue(*this, E->getType(), Dest);
+  }
+#endif
+  llvm::Type *IPtrTy =
+      llvm::IntegerType::get(getLLVMContext(), Size * 8)->getPointerTo();
+  llvm::Value *OrigDest = Dest;
+  Ptr = Builder.CreateBitCast(Ptr, IPtrTy);
+  if (Val1) Val1 = Builder.CreateBitCast(Val1, IPtrTy);
+  if (Val2) Val2 = Builder.CreateBitCast(Val2, IPtrTy);
+  if (Dest && !E->isCmpXChg()) Dest = Builder.CreateBitCast(Dest, IPtrTy);
+
+  if (isa<llvm::ConstantInt>(Order)) {
+    int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+    switch (ord) {
+    case 0:  // memory_order_relaxed
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::Monotonic);
+      break;
+    case 1:  // memory_order_consume
+    case 2:  // memory_order_acquire
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::Acquire);
+      break;
+    case 3:  // memory_order_release
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::Release);
+      break;
+    case 4:  // memory_order_acq_rel
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::AcquireRelease);
+      break;
+    case 5:  // memory_order_seq_cst
+      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                   llvm::SequentiallyConsistent);
+      break;
+    default: // invalid order
+      // We should not ever get here normally, but it's hard to
+      // enforce that in general.
+      break; 
+    }
+    if (E->getOp() == AtomicExpr::Store)
+      return RValue::get(0);
+    return ConvertTempToRValue(*this, E->getType(), OrigDest);
+  }
+
+  // Long case, when Order isn't obviously constant.
+
+  // Create all the relevant BB's
+  llvm::BasicBlock *MonotonicBB, *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
+  MonotonicBB = createBasicBlock("monotonic", CurFn);
+  if (E->getOp() != AtomicExpr::Store)
+    AcquireBB = createBasicBlock("acquire", CurFn);
+  if (E->getOp() != AtomicExpr::Load)
+    ReleaseBB = createBasicBlock("release", CurFn);
+  if (E->getOp() != AtomicExpr::Load && E->getOp() != AtomicExpr::Store)
+    AcqRelBB = createBasicBlock("acqrel", CurFn);
+  SeqCstBB = createBasicBlock("seqcst", CurFn);
+  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
+
+  // Create the switch for the split
+  // MonotonicBB is arbitrarily chosen as the default case; in practice, this
+  // doesn't matter unless someone is crazy enough to use something that
+  // doesn't fold to a constant for the ordering.
+  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
+  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
+
+  // Emit all the different atomics
+  Builder.SetInsertPoint(MonotonicBB);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+               llvm::Monotonic);
+  Builder.CreateBr(ContBB);
+  if (E->getOp() != AtomicExpr::Store) {
+    Builder.SetInsertPoint(AcquireBB);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                 llvm::Acquire);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(1), AcquireBB);
+    SI->addCase(Builder.getInt32(2), AcquireBB);
+  }
+  if (E->getOp() != AtomicExpr::Load) {
+    Builder.SetInsertPoint(ReleaseBB);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                 llvm::Release);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(3), ReleaseBB);
+  }
+  if (E->getOp() != AtomicExpr::Load && E->getOp() != AtomicExpr::Store) {
+    Builder.SetInsertPoint(AcqRelBB);
+    EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+                 llvm::AcquireRelease);
+    Builder.CreateBr(ContBB);
+    SI->addCase(Builder.getInt32(4), AcqRelBB);
+  }
+  Builder.SetInsertPoint(SeqCstBB);
+  EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
+               llvm::SequentiallyConsistent);
+  Builder.CreateBr(ContBB);
+  SI->addCase(Builder.getInt32(5), SeqCstBB);
+
+  // Cleanup and return
+  Builder.SetInsertPoint(ContBB);
+  if (E->getOp() == AtomicExpr::Store)
+    return RValue::get(0);
+  return ConvertTempToRValue(*this, E->getType(), OrigDest);
+}
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index ff82bfd..97754d5 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -154,6 +154,9 @@
   void EmitNullInitializationToLValue(LValue Address);
   //  case Expr::ChooseExprClass:
   void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); }
+  void VisitAtomicExpr(AtomicExpr *E) {
+    CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr());
+  }
 };
 }  // end anonymous namespace.
 
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index e3f02ac..4a31bcf 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -266,6 +266,10 @@
   ComplexPairTy VisitInitListExpr(InitListExpr *E);
 
   ComplexPairTy VisitVAArgExpr(VAArgExpr *E);
+
+  ComplexPairTy VisitAtomicExpr(AtomicExpr *E) {
+    return CGF.EmitAtomicExpr(E).getComplexVal();
+  }
 };
 }  // end anonymous namespace.
 
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 9d95620..26a3e94 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -513,6 +513,7 @@
     return CGF.EmitObjCStringLiteral(E);
   }
   Value *VisitAsTypeExpr(AsTypeExpr *CE);
+  Value *VisitAtomicExpr(AtomicExpr *AE);
 };
 }  // end anonymous namespace.
 
@@ -2637,6 +2638,10 @@
   return Builder.CreateBitCast(Src, DstTy, "astype");
 }
 
+Value *ScalarExprEmitter::VisitAtomicExpr(AtomicExpr *E) {
+  return CGF.EmitAtomicExpr(E).getScalarVal();
+}
+
 //===----------------------------------------------------------------------===//
 //                         Entry Point into this File
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index e53ed30..157623d 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2283,6 +2283,8 @@
 
   void EmitCXXThrowExpr(const CXXThrowExpr *E);
 
+  RValue EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest = 0);
+
   //===--------------------------------------------------------------------===//
   //                         Annotations Emission
   //===--------------------------------------------------------------------===//
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 412ed0e..a9d79bb 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -15,6 +15,7 @@
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaInternal.h"
+#include "clang/Sema/Initialization.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Analysis/Analyses/FormatString.h"
 #include "clang/AST/ASTContext.h"
@@ -197,6 +198,28 @@
   case Builtin::BI__sync_lock_release:
   case Builtin::BI__sync_swap:
     return SemaBuiltinAtomicOverloaded(move(TheCallResult));
+  case Builtin::BI__atomic_load:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Load);
+  case Builtin::BI__atomic_store:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Store);
+  case Builtin::BI__atomic_exchange:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Xchg);
+  case Builtin::BI__atomic_compare_exchange_strong:
+    return SemaAtomicOpsOverloaded(move(TheCallResult),
+                                   AtomicExpr::CmpXchgStrong);
+  case Builtin::BI__atomic_compare_exchange_weak:
+    return SemaAtomicOpsOverloaded(move(TheCallResult),
+                                   AtomicExpr::CmpXchgWeak);
+  case Builtin::BI__atomic_fetch_add:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Add);
+  case Builtin::BI__atomic_fetch_sub:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Sub);
+  case Builtin::BI__atomic_fetch_and:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::And);
+  case Builtin::BI__atomic_fetch_or:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Or);
+  case Builtin::BI__atomic_fetch_xor:
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Xor);
   case Builtin::BI__builtin_annotation:
     if (CheckBuiltinAnnotationString(*this, TheCall->getArg(1)))
       return ExprError();
@@ -414,6 +437,153 @@
   return false;
 }
 
+ExprResult
+Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult, AtomicExpr::AtomicOp Op) {
+  CallExpr *TheCall = cast<CallExpr>(TheCallResult.get());
+  DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  Expr *Ptr, *Order, *Val1, *Val2, *OrderFail;
+
+  // All these operations take one of the following four forms:
+  // T   __atomic_load(_Atomic(T)*, int)                              (loads)
+  // T*  __atomic_add(_Atomic(T*)*, ptrdiff_t, int)         (pointer add/sub)
+  // int __atomic_compare_exchange_strong(_Atomic(T)*, T*, T, int, int)
+  //                                                                (cmpxchg)
+  // T   __atomic_exchange(_Atomic(T)*, T, int)             (everything else)
+  // where T is an appropriate type, and the int paremeterss are for orderings.
+  unsigned NumVals = 1;
+  unsigned NumOrders = 1;
+  if (Op == AtomicExpr::Load) {
+    NumVals = 0;
+  } else if (Op == AtomicExpr::CmpXchgWeak || Op == AtomicExpr::CmpXchgStrong) {
+    NumVals = 2;
+    NumOrders = 2;
+  }
+
+  if (TheCall->getNumArgs() < NumVals+NumOrders+1) {
+    Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
+      << 0 << NumVals+NumOrders+1 << TheCall->getNumArgs()
+      << TheCall->getCallee()->getSourceRange();
+    return ExprError();
+  } else if (TheCall->getNumArgs() > NumVals+NumOrders+1) {
+    Diag(TheCall->getArg(NumVals+NumOrders+1)->getLocStart(),
+         diag::err_typecheck_call_too_many_args)
+      << 0 << NumVals+NumOrders+1 << TheCall->getNumArgs()
+      << TheCall->getCallee()->getSourceRange();
+    return ExprError();
+  }
+
+  // Inspect the first argument of the atomic operation.  This should always be
+  // a pointer to an _Atomic type.
+  Ptr = TheCall->getArg(0);
+  Ptr = DefaultFunctionArrayLvalueConversion(Ptr).get();
+  const PointerType *pointerType = Ptr->getType()->getAs<PointerType>();
+  if (!pointerType) {
+    Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic)
+      << Ptr->getType() << Ptr->getSourceRange();
+    return ExprError();
+  }
+
+  QualType AtomTy = pointerType->getPointeeType();
+  if (!AtomTy->isAtomicType()) {
+    Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic)
+      << Ptr->getType() << Ptr->getSourceRange();
+    return ExprError();
+  }
+  QualType ValType = AtomTy->getAs<AtomicType>()->getValueType();
+
+  if ((Op == AtomicExpr::Add || Op == AtomicExpr::Sub) &&
+      !ValType->isIntegerType() && !ValType->isPointerType()) {
+    Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic_int_or_ptr)
+      << Ptr->getType() << Ptr->getSourceRange();
+    return ExprError();
+  }
+
+  if (!ValType->isIntegerType() &&
+      (Op == AtomicExpr::And || Op == AtomicExpr::Or || Op == AtomicExpr::Xor)){
+    Diag(DRE->getLocStart(), diag::err_atomic_op_logical_needs_atomic_int)
+      << Ptr->getType() << Ptr->getSourceRange();
+    return ExprError();
+  }
+
+  switch (ValType.getObjCLifetime()) {
+  case Qualifiers::OCL_None:
+  case Qualifiers::OCL_ExplicitNone:
+    // okay
+    break;
+
+  case Qualifiers::OCL_Weak:
+  case Qualifiers::OCL_Strong:
+  case Qualifiers::OCL_Autoreleasing:
+    Diag(DRE->getLocStart(), diag::err_arc_atomic_ownership)
+      << ValType << Ptr->getSourceRange();
+    return ExprError();
+  }
+
+  QualType ResultType = ValType;
+  if (Op == AtomicExpr::Store)
+    ResultType = Context.VoidTy;
+  else if (Op == AtomicExpr::CmpXchgWeak || Op == AtomicExpr::CmpXchgStrong)
+    ResultType = Context.BoolTy;
+
+  // The first argument --- the pointer --- has a fixed type; we
+  // deduce the types of the rest of the arguments accordingly.  Walk
+  // the remaining arguments, converting them to the deduced value type.
+  for (unsigned i = 1; i != NumVals+NumOrders+1; ++i) {
+    ExprResult Arg = TheCall->getArg(i);
+    QualType Ty;
+    if (i < NumVals+1) {
+      // The second argument to a cmpxchg is a pointer to the data which will
+      // be exchanged. The second argument to a pointer add/subtract is the
+      // amount to add/subtract, which must be a ptrdiff_t.  The third
+      // argument to a cmpxchg and the second argument in all other cases
+      // is the type of the value.
+      if (i == 1 && (Op == AtomicExpr::CmpXchgWeak ||
+                     Op == AtomicExpr::CmpXchgStrong))
+         Ty = Context.getPointerType(ValType.getUnqualifiedType());
+      else if (!ValType->isIntegerType() &&
+               (Op == AtomicExpr::Add || Op == AtomicExpr::Sub))
+        Ty = Context.getPointerDiffType();
+      else
+        Ty = ValType;
+    } else {
+      // The order(s) are always converted to int.
+      Ty = Context.IntTy;
+    }
+    InitializedEntity Entity =
+        InitializedEntity::InitializeParameter(Context, Ty, false);
+    Arg = PerformCopyInitialization(Entity, SourceLocation(), Arg);
+    if (Arg.isInvalid())
+      return true;
+    TheCall->setArg(i, Arg.get());
+  }
+
+  if (Op == AtomicExpr::Load) {
+    Order = TheCall->getArg(1);
+    return Owned(new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(),
+                                          Ptr, Order, ResultType, Op,
+                                          TheCall->getRParenLoc(), false,
+                                          false));
+  } else if (Op != AtomicExpr::CmpXchgWeak && Op != AtomicExpr::CmpXchgStrong) {
+    Val1 = TheCall->getArg(1);
+    Order = TheCall->getArg(2);
+    return Owned(new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(),
+                                          Ptr, Val1, Order, ResultType, Op,
+                                          TheCall->getRParenLoc(), false,
+                                          false));
+  } else {
+    Val1 = TheCall->getArg(1);
+    Val2 = TheCall->getArg(2);
+    Order = TheCall->getArg(3);
+    OrderFail = TheCall->getArg(4);
+    return Owned(new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(),
+                                          Ptr, Val1, Val2, Order, OrderFail,
+                                          ResultType, Op, 
+                                          TheCall->getRParenLoc(), false,
+                                          false));
+  }
+}
+
+
 /// checkBuiltinArgument - Given a call to a builtin function, perform
 /// normal type-checking on the given argument, updating the call in
 /// place.  This is useful when a builtin function requires custom
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index 2a18afa..4a2ad18 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -8099,6 +8099,13 @@
 TreeTransform<Derived>::TransformAsTypeExpr(AsTypeExpr *E) {
   llvm_unreachable("Cannot transform asType expressions yet");
 }
+
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformAtomicExpr(AtomicExpr *E) {
+  assert(false && "Cannot transform atomic expressions yet");
+  return SemaRef.Owned(E);
+}
   
 //===----------------------------------------------------------------------===//
 // Type reconstruction
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index 7653d5f..ab07b85 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -774,6 +774,25 @@
   E->RParenLoc = ReadSourceLocation(Record, Idx);
 }
 
+void ASTStmtReader::VisitAtomicExpr(AtomicExpr *E) {
+  VisitExpr(E);
+  E->setOp(AtomicExpr::AtomicOp(Record[Idx++]));
+  E->setPtr(Reader.ReadSubExpr());
+  E->setOrder(Reader.ReadSubExpr());
+  E->setNumSubExprs(2);
+  if (E->getOp() != AtomicExpr::Load) {
+    E->setVal1(Reader.ReadSubExpr());
+    E->setNumSubExprs(3);
+  }
+  if (E->isCmpXChg()) {
+    E->setOrderFail(Reader.ReadSubExpr());
+    E->setVal2(Reader.ReadSubExpr());
+    E->setNumSubExprs(5);
+  }
+  E->setBuiltinLoc(ReadSourceLocation(Record, Idx));
+  E->setRParenLoc(ReadSourceLocation(Record, Idx));
+}
+
 //===----------------------------------------------------------------------===//
 // Objective-C Expressions and Statements
 
@@ -2010,6 +2029,10 @@
     case EXPR_ASTYPE:
       S = new (Context) AsTypeExpr(Empty);
       break;
+
+    case EXPR_ATOMIC:
+      S = new (Context) AtomicExpr(Empty);
+      break;
     }
     
     // We hit a STMT_STOP, so we're done with this expression.
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index fb56175..7e2d45c 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -736,6 +736,21 @@
   Code = serialization::EXPR_GENERIC_SELECTION;
 }
 
+void ASTStmtWriter::VisitAtomicExpr(AtomicExpr *E) {
+  VisitExpr(E);
+  Record.push_back(E->getOp());
+  Writer.AddStmt(E->getPtr());
+  Writer.AddStmt(E->getOrder());
+  if (E->getOp() != AtomicExpr::Load)
+    Writer.AddStmt(E->getVal1());
+  if (E->isCmpXChg()) {
+    Writer.AddStmt(E->getOrderFail());
+    Writer.AddStmt(E->getVal2());
+  }
+  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
+  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+}
+
 //===----------------------------------------------------------------------===//
 // Objective-C Expressions and Statements.
 //===----------------------------------------------------------------------===//
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 11be71a..5489c10 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -564,6 +564,7 @@
     case Stmt::CUDAKernelCallExprClass:
     case Stmt::OpaqueValueExprClass:
     case Stmt::AsTypeExprClass:
+    case Stmt::AtomicExprClass:
         // Fall through.
 
     // Cases we intentionally don't evaluate, since they don't need