Implement support for 18 of the GNU-compatible __atomic builtins.

This is not quite sufficient for libstdc++'s <atomic>: we still need
__atomic_test_and_set and __atomic_clear, and may need a more complete
__atomic_is_lock_free implementation.

We are also missing an implementation of __atomic_always_lock_free,
__atomic_nand_fetch, and __atomic_fetch_nand, but those aren't needed
for libstdc++.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154579 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 2bb79a0..eb185b2 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -3858,20 +3858,44 @@
 
 unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   switch (Op) {
-  case Init:
-  case Load:
+  case AO__c11_atomic_init:
+  case AO__c11_atomic_load:
+  case AO__atomic_load_n:
     return 2;
-  case Store:
-  case Xchg:
-  case Add:
-  case Sub:
-  case And:
-  case Or:
-  case Xor:
+
+  case AO__c11_atomic_store:
+  case AO__c11_atomic_exchange:
+  case AO__atomic_load:
+  case AO__atomic_store:
+  case AO__atomic_store_n:
+  case AO__atomic_exchange_n:
+  case AO__c11_atomic_fetch_add:
+  case AO__c11_atomic_fetch_sub:
+  case AO__c11_atomic_fetch_and:
+  case AO__c11_atomic_fetch_or:
+  case AO__c11_atomic_fetch_xor:
+  case AO__atomic_fetch_add:
+  case AO__atomic_fetch_sub:
+  case AO__atomic_fetch_and:
+  case AO__atomic_fetch_or:
+  case AO__atomic_fetch_xor:
+  case AO__atomic_add_fetch:
+  case AO__atomic_sub_fetch:
+  case AO__atomic_and_fetch:
+  case AO__atomic_or_fetch:
+  case AO__atomic_xor_fetch:
     return 3;
-  case CmpXchgStrong:
-  case CmpXchgWeak:
+
+  case AO__atomic_exchange:
+    return 4;
+
+  case AO__c11_atomic_compare_exchange_strong:
+  case AO__c11_atomic_compare_exchange_weak:
     return 5;
+
+  case AO__atomic_compare_exchange:
+  case AO__atomic_compare_exchange_n:
+    return 6;
   }
   llvm_unreachable("unknown atomic op");
 }
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index 651b88b..3a44183 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -1108,52 +1108,34 @@
 void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
   const char *Name = 0;
   switch (Node->getOp()) {
-    case AtomicExpr::Init:
-      Name = "__c11_atomic_init(";
-      break;
-    case AtomicExpr::Load:
-      Name = "__c11_atomic_load(";
-      break;
-    case AtomicExpr::Store:
-      Name = "__c11_atomic_store(";
-      break;
-    case AtomicExpr::CmpXchgStrong:
-      Name = "__c11_atomic_compare_exchange_strong(";
-      break;
-    case AtomicExpr::CmpXchgWeak:
-      Name = "__c11_atomic_compare_exchange_weak(";
-      break;
-    case AtomicExpr::Xchg:
-      Name = "__c11_atomic_exchange(";
-      break;
-    case AtomicExpr::Add:
-      Name = "__c11_atomic_fetch_add(";
-      break;
-    case AtomicExpr::Sub:
-      Name = "__c11_atomic_fetch_sub(";
-      break;
-    case AtomicExpr::And:
-      Name = "__c11_atomic_fetch_and(";
-      break;
-    case AtomicExpr::Or:
-      Name = "__c11_atomic_fetch_or(";
-      break;
-    case AtomicExpr::Xor:
-      Name = "__c11_atomic_fetch_xor(";
-      break;
+#define BUILTIN(ID, TYPE, ATTRS)
+#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
+  case AtomicExpr::AO ## ID: \
+    Name = #ID "("; \
+    break;
+#include "clang/Basic/Builtins.def"
   }
   OS << Name;
+
+  // AtomicExpr stores its subexpressions in a permuted order.
   PrintExpr(Node->getPtr());
   OS << ", ";
-  if (Node->getOp() != AtomicExpr::Load) {
+  if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
+      Node->getOp() != AtomicExpr::AO__atomic_load_n) {
     PrintExpr(Node->getVal1());
     OS << ", ";
   }
-  if (Node->isCmpXChg()) {
+  if (Node->getOp() == AtomicExpr::AO__atomic_exchange ||
+      Node->isCmpXChg()) {
     PrintExpr(Node->getVal2());
     OS << ", ";
   }
-  if (Node->getOp() != AtomicExpr::Init)
+  if (Node->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+      Node->getOp() == AtomicExpr::AO__atomic_compare_exchange_n) {
+    PrintExpr(Node->getWeak());
+    OS << ", ";
+  }
+  if (Node->getOp() != AtomicExpr::AO__c11_atomic_init)
     PrintExpr(Node->getOrder());
   if (Node->isCmpXChg()) {
     OS << ", ";
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 260fa5b..147e727 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -2688,7 +2688,17 @@
 EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
              llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
              uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
-  if (E->isCmpXChg()) {
+  llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
+  llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
+
+  switch (E->getOp()) {
+  case AtomicExpr::AO__c11_atomic_init:
+    llvm_unreachable("Already handled!");
+
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__atomic_compare_exchange:
+  case AtomicExpr::AO__atomic_compare_exchange_n: {
     // Note that cmpxchg only supports specifying one ordering and
     // doesn't support weak cmpxchg, at least at the moment.
     llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
@@ -2705,7 +2715,9 @@
     return;
   }
 
-  if (E->getOp() == AtomicExpr::Load) {
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+  case AtomicExpr::AO__atomic_load: {
     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
     Load->setAtomic(Order);
     Load->setAlignment(Size);
@@ -2715,7 +2727,9 @@
     return;
   }
 
-  if (E->getOp() == AtomicExpr::Store) {
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__atomic_store:
+  case AtomicExpr::AO__atomic_store_n: {
     assert(!Dest && "Store does not return a value");
     llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
     LoadVal1->setAlignment(Align);
@@ -2726,26 +2740,66 @@
     return;
   }
 
-  llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
-  switch (E->getOp()) {
-    case AtomicExpr::CmpXchgWeak:
-    case AtomicExpr::CmpXchgStrong:
-    case AtomicExpr::Store:
-    case AtomicExpr::Init:
-    case AtomicExpr::Load:  assert(0 && "Already handled!");
-    case AtomicExpr::Add:   Op = llvm::AtomicRMWInst::Add;  break;
-    case AtomicExpr::Sub:   Op = llvm::AtomicRMWInst::Sub;  break;
-    case AtomicExpr::And:   Op = llvm::AtomicRMWInst::And;  break;
-    case AtomicExpr::Or:    Op = llvm::AtomicRMWInst::Or;   break;
-    case AtomicExpr::Xor:   Op = llvm::AtomicRMWInst::Xor;  break;
-    case AtomicExpr::Xchg:  Op = llvm::AtomicRMWInst::Xchg; break;
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__atomic_exchange:
+    Op = llvm::AtomicRMWInst::Xchg;
+    break;
+
+  case AtomicExpr::AO__atomic_add_fetch:
+    PostOp = llvm::Instruction::Add;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_add:
+    Op = llvm::AtomicRMWInst::Add;
+    break;
+
+  case AtomicExpr::AO__atomic_sub_fetch:
+    PostOp = llvm::Instruction::Sub;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_fetch_sub:
+    Op = llvm::AtomicRMWInst::Sub;
+    break;
+
+  case AtomicExpr::AO__atomic_and_fetch:
+    PostOp = llvm::Instruction::And;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_and:
+    Op = llvm::AtomicRMWInst::And;
+    break;
+
+  case AtomicExpr::AO__atomic_or_fetch:
+    PostOp = llvm::Instruction::Or;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_or:
+    Op = llvm::AtomicRMWInst::Or;
+    break;
+
+  case AtomicExpr::AO__atomic_xor_fetch:
+    PostOp = llvm::Instruction::Xor;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_xor:
+    Op = llvm::AtomicRMWInst::Xor;
+    break;
   }
+
   llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
   LoadVal1->setAlignment(Align);
   llvm::AtomicRMWInst *RMWI =
       CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order);
   RMWI->setVolatile(E->isVolatile());
-  llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(RMWI, Dest);
+
+  // For __atomic_*_fetch operations, perform the operation again to
+  // determine the value which was written.
+  llvm::Value *Result = RMWI;
+  if (PostOp)
+    Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1);
+
+  llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Result, Dest);
   StoreDest->setAlignment(Align);
 }
 
@@ -2770,7 +2824,9 @@
 
 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
-  QualType MemTy = AtomicTy->getAs<AtomicType>()->getValueType();
+  QualType MemTy = AtomicTy;
+  if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
+    MemTy = AT->getValueType();
   CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy);
   uint64_t Size = sizeChars.getQuantity();
   CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy);
@@ -2784,7 +2840,7 @@
   llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
   Ptr = EmitScalarExpr(E->getPtr());
 
-  if (E->getOp() == AtomicExpr::Init) {
+  if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
     assert(!Dest && "Init does not return a value");
     if (!hasAggregateLLVMType(E->getVal1()->getType())) {
       llvm::StoreInst *Store =
@@ -2805,26 +2861,80 @@
   }
 
   Order = EmitScalarExpr(E->getOrder());
-  if (E->isCmpXChg()) {
+
+  switch (E->getOp()) {
+  case AtomicExpr::AO__c11_atomic_init:
+    llvm_unreachable("Already handled!");
+
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+    break;
+
+  case AtomicExpr::AO__atomic_load:
+    Dest = EmitScalarExpr(E->getVal1());
+    break;
+
+  case AtomicExpr::AO__atomic_store:
     Val1 = EmitScalarExpr(E->getVal1());
-    Val2 = EmitValToTemp(*this, E->getVal2());
+    break;
+
+  case AtomicExpr::AO__atomic_exchange:
+    Val1 = EmitScalarExpr(E->getVal1());
+    Dest = EmitScalarExpr(E->getVal2());
+    break;
+
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__atomic_compare_exchange_n:
+  case AtomicExpr::AO__atomic_compare_exchange:
+    Val1 = EmitScalarExpr(E->getVal1());
+    if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+      Val2 = EmitScalarExpr(E->getVal2());
+    else
+      Val2 = EmitValToTemp(*this, E->getVal2());
     OrderFail = EmitScalarExpr(E->getOrderFail());
-  } else if ((E->getOp() == AtomicExpr::Add || E->getOp() == AtomicExpr::Sub) &&
-             MemTy->isPointerType()) {
-    // For pointers, we're required to do a bit of math: adding 1 to an int*
-    // is not the same as adding 1 to a uintptr_t.
-    QualType Val1Ty = E->getVal1()->getType();
-    llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
-    CharUnits PointeeIncAmt =
-        getContext().getTypeSizeInChars(MemTy->getPointeeType());
-    Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
-    Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
-    EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
-  } else if (E->getOp() != AtomicExpr::Load) {
+    // Evaluate and discard the 'weak' argument.
+    if (E->getNumSubExprs() == 6)
+      EmitScalarExpr(E->getWeak());
+    break;
+
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_sub_fetch:
+    if (MemTy->isPointerType()) {
+      // For pointer arithmetic, we're required to do a bit of math:
+      // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
+      QualType Val1Ty = E->getVal1()->getType();
+      llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
+      CharUnits PointeeIncAmt =
+          getContext().getTypeSizeInChars(MemTy->getPointeeType());
+      Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
+      Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
+      EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
+      break;
+    }
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__atomic_store_n:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_and_fetch:
+  case AtomicExpr::AO__atomic_or_fetch:
+  case AtomicExpr::AO__atomic_xor_fetch:
     Val1 = EmitValToTemp(*this, E->getVal1());
+    break;
   }
 
-  if (E->getOp() != AtomicExpr::Store && !Dest)
+  if (!E->getType()->isVoidType() && !Dest)
     Dest = CreateMemTemp(E->getType(), ".atomicdst");
 
   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
@@ -2846,9 +2956,11 @@
     // optimisation benefit possible from a libcall version of a weak compare
     // and exchange.
     // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
-     //                               void *desired, int success, int failure)
-    case AtomicExpr::CmpXchgWeak:
-    case AtomicExpr::CmpXchgStrong:
+    //                                void *desired, int success, int failure)
+    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__atomic_compare_exchange:
+    case AtomicExpr::AO__atomic_compare_exchange_n:
       LibCallName = "__atomic_compare_exchange";
       RetTy = getContext().BoolTy;
       Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
@@ -2861,7 +2973,9 @@
       break;
     // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
     //                        int order)
-    case AtomicExpr::Xchg:
+    case AtomicExpr::AO__c11_atomic_exchange:
+    case AtomicExpr::AO__atomic_exchange_n:
+    case AtomicExpr::AO__atomic_exchange:
       LibCallName = "__atomic_exchange";
       Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
                getContext().VoidPtrTy);
@@ -2869,13 +2983,17 @@
                getContext().VoidPtrTy);
       break;
     // void __atomic_store(size_t size, void *mem, void *val, int order)
-    case AtomicExpr::Store:
+    case AtomicExpr::AO__c11_atomic_store:
+    case AtomicExpr::AO__atomic_store:
+    case AtomicExpr::AO__atomic_store_n:
       LibCallName = "__atomic_store";
       Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
                getContext().VoidPtrTy);
       break;
     // void __atomic_load(size_t size, void *mem, void *return, int order)
-    case AtomicExpr::Load:
+    case AtomicExpr::AO__c11_atomic_load:
+    case AtomicExpr::AO__atomic_load:
+    case AtomicExpr::AO__atomic_load_n:
       LibCallName = "__atomic_load";
       Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
                getContext().VoidPtrTy);
@@ -2903,7 +3021,7 @@
     RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
     if (E->isCmpXChg())
       return Res;
-    if (E->getOp() == AtomicExpr::Store)
+    if (E->getType()->isVoidType())
       return RValue::get(0);
     return ConvertTempToRValue(*this, E->getType(), Dest);
   }
@@ -2943,24 +3061,31 @@
     default: // invalid order
       // We should not ever get here normally, but it's hard to
       // enforce that in general.
-      break; 
+      break;
     }
-    if (E->getOp() == AtomicExpr::Store || E->getOp() == AtomicExpr::Init)
+    if (E->getType()->isVoidType())
       return RValue::get(0);
     return ConvertTempToRValue(*this, E->getType(), OrigDest);
   }
 
   // Long case, when Order isn't obviously constant.
 
+  bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
+                 E->getOp() == AtomicExpr::AO__atomic_store ||
+                 E->getOp() == AtomicExpr::AO__atomic_store_n;
+  bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
+                E->getOp() == AtomicExpr::AO__atomic_load ||
+                E->getOp() == AtomicExpr::AO__atomic_load_n;
+
   // Create all the relevant BB's
   llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0,
                    *AcqRelBB = 0, *SeqCstBB = 0;
   MonotonicBB = createBasicBlock("monotonic", CurFn);
-  if (E->getOp() != AtomicExpr::Store)
+  if (!IsStore)
     AcquireBB = createBasicBlock("acquire", CurFn);
-  if (E->getOp() != AtomicExpr::Load)
+  if (!IsLoad)
     ReleaseBB = createBasicBlock("release", CurFn);
-  if (E->getOp() != AtomicExpr::Load && E->getOp() != AtomicExpr::Store)
+  if (!IsLoad && !IsStore)
     AcqRelBB = createBasicBlock("acqrel", CurFn);
   SeqCstBB = createBasicBlock("seqcst", CurFn);
   llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
@@ -2977,7 +3102,7 @@
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                llvm::Monotonic);
   Builder.CreateBr(ContBB);
-  if (E->getOp() != AtomicExpr::Store) {
+  if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                  llvm::Acquire);
@@ -2985,14 +3110,14 @@
     SI->addCase(Builder.getInt32(1), AcquireBB);
     SI->addCase(Builder.getInt32(2), AcquireBB);
   }
-  if (E->getOp() != AtomicExpr::Load) {
+  if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                  llvm::Release);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(3), ReleaseBB);
   }
-  if (E->getOp() != AtomicExpr::Load && E->getOp() != AtomicExpr::Store) {
+  if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                  llvm::AcquireRelease);
@@ -3007,7 +3132,7 @@
 
   // Cleanup and return
   Builder.SetInsertPoint(ContBB);
-  if (E->getOp() == AtomicExpr::Store)
+  if (E->getType()->isVoidType())
     return RValue::get(0);
   return ConvertTempToRValue(*this, E->getType(), OrigDest);
 }
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index c4ed0b0..1606e33 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -250,41 +250,11 @@
   case Builtin::BI__sync_swap_8:
   case Builtin::BI__sync_swap_16:
     return SemaBuiltinAtomicOverloaded(move(TheCallResult));
-  case Builtin::BI__atomic_load:
-  case Builtin::BI__c11_atomic_load:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Load);
-  case Builtin::BI__atomic_store:
-  case Builtin::BI__c11_atomic_store:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Store);
-  case Builtin::BI__atomic_init:
-  case Builtin::BI__c11_atomic_init:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Init);
-  case Builtin::BI__atomic_exchange:
-  case Builtin::BI__c11_atomic_exchange:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Xchg);
-  case Builtin::BI__atomic_compare_exchange_strong:
-  case Builtin::BI__c11_atomic_compare_exchange_strong:
-    return SemaAtomicOpsOverloaded(move(TheCallResult),
-                                   AtomicExpr::CmpXchgStrong);
-  case Builtin::BI__atomic_compare_exchange_weak:
-  case Builtin::BI__c11_atomic_compare_exchange_weak:
-    return SemaAtomicOpsOverloaded(move(TheCallResult),
-                                   AtomicExpr::CmpXchgWeak);
-  case Builtin::BI__atomic_fetch_add:
-  case Builtin::BI__c11_atomic_fetch_add:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Add);
-  case Builtin::BI__atomic_fetch_sub:
-  case Builtin::BI__c11_atomic_fetch_sub:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Sub);
-  case Builtin::BI__atomic_fetch_and:
-  case Builtin::BI__c11_atomic_fetch_and:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::And);
-  case Builtin::BI__atomic_fetch_or:
-  case Builtin::BI__c11_atomic_fetch_or:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Or);
-  case Builtin::BI__atomic_fetch_xor:
-  case Builtin::BI__c11_atomic_fetch_xor:
-    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::Xor);
+#define BUILTIN(ID, TYPE, ATTRS)
+#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
+  case Builtin::BI##ID: \
+    return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::AO##ID);
+#include "clang/Basic/Builtins.def"
   case Builtin::BI__builtin_annotation:
     if (CheckBuiltinAnnotationString(*this, TheCall->getArg(1)))
       return ExprError();
@@ -515,74 +485,174 @@
   return false;
 }
 
-ExprResult
-Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult, AtomicExpr::AtomicOp Op) {
+ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
+                                         AtomicExpr::AtomicOp Op) {
   CallExpr *TheCall = cast<CallExpr>(TheCallResult.get());
   DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
 
-  // All these operations take one of the following four forms:
-  // T   __c11_atomic_load(_Atomic(T)*, int)                             (loads)
-  // T*  __c11_atomic_add(_Atomic(T*)*, ptrdiff_t, int)        (pointer add/sub)
-  // int __c11_atomic_compare_exchange_strong(_Atomic(T)*, T*, T, int, int)
-  //                                                                   (cmpxchg)
-  // T   __c11_atomic_exchange(_Atomic(T)*, T, int)            (everything else)
-  // where T is an appropriate type, and the int paremeterss are for orderings.
-  unsigned NumVals = 1;
-  unsigned NumOrders = 1;
-  if (Op == AtomicExpr::Load) {
-    NumVals = 0;
-  } else if (Op == AtomicExpr::CmpXchgWeak || Op == AtomicExpr::CmpXchgStrong) {
-    NumVals = 2;
-    NumOrders = 2;
-  }
-  if (Op == AtomicExpr::Init)
-    NumOrders = 0;
+  // All these operations take one of the following forms:
+  enum {
+    // C    __c11_atomic_init(A *, C)
+    Init,
+    // C    __c11_atomic_load(A *, int)
+    Load,
+    // void __atomic_load(A *, CP, int)
+    Copy,
+    // C    __c11_atomic_add(A *, M, int)
+    Arithmetic,
+    // C    __atomic_exchange_n(A *, CP, int)
+    Xchg,
+    // void __atomic_exchange(A *, C *, CP, int)
+    GNUXchg,
+    // bool __c11_atomic_compare_exchange_strong(A *, C *, CP, int, int)
+    C11CmpXchg,
+    // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
+    GNUCmpXchg
+  } Form = Init;
+  const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 4, 5, 6 };
+  const unsigned NumVals[] = { 1, 0, 1, 1, 1, 2, 2, 3 };
+  // where:
+  //   C is an appropriate type,
+  //   A is volatile _Atomic(C) for __c11 builtins and is C for GNU builtins,
+  //   CP is C for __c11 builtins and GNU _n builtins and is C * otherwise,
+  //   M is C if C is an integer, and ptrdiff_t if C is a pointer, and
+  //   the int parameters are for orderings.
 
-  if (TheCall->getNumArgs() < NumVals+NumOrders+1) {
+  assert(AtomicExpr::AO__c11_atomic_init == 0 &&
+         AtomicExpr::AO__c11_atomic_fetch_xor + 1 == AtomicExpr::AO__atomic_load
+         && "need to update code for modified C11 atomics");
+  bool IsC11 = Op >= AtomicExpr::AO__c11_atomic_init &&
+               Op <= AtomicExpr::AO__c11_atomic_fetch_xor;
+  bool IsN = Op == AtomicExpr::AO__atomic_load_n ||
+             Op == AtomicExpr::AO__atomic_store_n ||
+             Op == AtomicExpr::AO__atomic_exchange_n ||
+             Op == AtomicExpr::AO__atomic_compare_exchange_n;
+  bool IsAddSub = false;
+
+  switch (Op) {
+  case AtomicExpr::AO__c11_atomic_init:
+    Form = Init;
+    break;
+
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+    Form = Load;
+    break;
+
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__atomic_load:
+  case AtomicExpr::AO__atomic_store:
+  case AtomicExpr::AO__atomic_store_n:
+    Form = Copy;
+    break;
+
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_sub_fetch:
+    IsAddSub = true;
+    // Fall through.
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_fetch_and:
+  case AtomicExpr::AO__atomic_fetch_or:
+  case AtomicExpr::AO__atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_and_fetch:
+  case AtomicExpr::AO__atomic_or_fetch:
+  case AtomicExpr::AO__atomic_xor_fetch:
+    Form = Arithmetic;
+    break;
+
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__atomic_exchange_n:
+    Form = Xchg;
+    break;
+
+  case AtomicExpr::AO__atomic_exchange:
+    Form = GNUXchg;
+    break;
+
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+    Form = C11CmpXchg;
+    break;
+
+  case AtomicExpr::AO__atomic_compare_exchange:
+  case AtomicExpr::AO__atomic_compare_exchange_n:
+    Form = GNUCmpXchg;
+    break;
+  }
+
+  // Check we have the right number of arguments.
+  if (TheCall->getNumArgs() < NumArgs[Form]) {
     Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
-      << 0 << NumVals+NumOrders+1 << TheCall->getNumArgs()
+      << 0 << NumArgs[Form] << TheCall->getNumArgs()
       << TheCall->getCallee()->getSourceRange();
     return ExprError();
-  } else if (TheCall->getNumArgs() > NumVals+NumOrders+1) {
-    Diag(TheCall->getArg(NumVals+NumOrders+1)->getLocStart(),
+  } else if (TheCall->getNumArgs() > NumArgs[Form]) {
+    Diag(TheCall->getArg(NumArgs[Form])->getLocStart(),
          diag::err_typecheck_call_too_many_args)
-      << 0 << NumVals+NumOrders+1 << TheCall->getNumArgs()
+      << 0 << NumArgs[Form] << TheCall->getNumArgs()
       << TheCall->getCallee()->getSourceRange();
     return ExprError();
   }
 
-  // Inspect the first argument of the atomic operation.  This should always be
-  // a pointer to an _Atomic type.
+  // Inspect the first argument of the atomic operation.
   Expr *Ptr = TheCall->getArg(0);
   Ptr = DefaultFunctionArrayLvalueConversion(Ptr).get();
   const PointerType *pointerType = Ptr->getType()->getAs<PointerType>();
   if (!pointerType) {
-    Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic)
+    Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer)
       << Ptr->getType() << Ptr->getSourceRange();
     return ExprError();
   }
 
-  QualType AtomTy = pointerType->getPointeeType();
-  if (!AtomTy->isAtomicType()) {
-    Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic)
-      << Ptr->getType() << Ptr->getSourceRange();
-    return ExprError();
+  // For a __c11 builtin, this should be a pointer to an _Atomic type.
+  QualType AtomTy = pointerType->getPointeeType(); // 'A'
+  QualType ValType = AtomTy; // 'C'
+  if (IsC11) {
+    if (!AtomTy->isAtomicType()) {
+      Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic)
+        << Ptr->getType() << Ptr->getSourceRange();
+      return ExprError();
+    }
+    ValType = AtomTy->getAs<AtomicType>()->getValueType();
   }
-  QualType ValType = AtomTy->getAs<AtomicType>()->getValueType();
 
-  if ((Op == AtomicExpr::Add || Op == AtomicExpr::Sub) &&
-      !ValType->isIntegerType() && !ValType->isPointerType()) {
+  // For an arithmetic operation, the implied arithmetic must be well-formed.
+  if (Form == Arithmetic) {
+    // gcc does not enforce these rules for GNU atomics, but we do so for sanity.
+    if (IsAddSub && !ValType->isIntegerType() && !ValType->isPointerType()) {
+      Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic_int_or_ptr)
+        << IsC11 << Ptr->getType() << Ptr->getSourceRange();
+      return ExprError();
+    }
+    if (!IsAddSub && !ValType->isIntegerType()) {
+      Diag(DRE->getLocStart(), diag::err_atomic_op_bitwise_needs_atomic_int)
+        << IsC11 << Ptr->getType() << Ptr->getSourceRange();
+      return ExprError();
+    }
+  } else if (IsN && !ValType->isIntegerType() && !ValType->isPointerType()) {
+    // For __atomic_*_n operations, the value type must be a scalar integral or
+    // pointer type which is 1, 2, 4, 8 or 16 bytes in length.
     Diag(DRE->getLocStart(), diag::err_atomic_op_needs_atomic_int_or_ptr)
+      << IsC11 << Ptr->getType() << Ptr->getSourceRange();
+    return ExprError();
+  }
+
+  if (!IsC11 && !AtomTy.isTriviallyCopyableType(Context)) {
+    // For GNU atomics, require a trivially-copyable type. This is not part of
+    // the GNU atomics specification, but we enforce it for sanity.
+    Diag(DRE->getLocStart(), diag::err_atomic_op_needs_trivial_copy)
       << Ptr->getType() << Ptr->getSourceRange();
     return ExprError();
   }
 
-  if (!ValType->isIntegerType() &&
-      (Op == AtomicExpr::And || Op == AtomicExpr::Or || Op == AtomicExpr::Xor)){
-    Diag(DRE->getLocStart(), diag::err_atomic_op_logical_needs_atomic_int)
-      << Ptr->getType() << Ptr->getSourceRange();
-    return ExprError();
-  }
+  // FIXME: For any builtin other than a load, the ValType must not be
+  // const-qualified.
 
   switch (ValType.getObjCLifetime()) {
   case Qualifiers::OCL_None:
@@ -593,63 +663,107 @@
   case Qualifiers::OCL_Weak:
   case Qualifiers::OCL_Strong:
   case Qualifiers::OCL_Autoreleasing:
+    // FIXME: Can this happen? By this point, ValType should be known
+    // to be trivially copyable.
     Diag(DRE->getLocStart(), diag::err_arc_atomic_ownership)
       << ValType << Ptr->getSourceRange();
     return ExprError();
   }
 
   QualType ResultType = ValType;
-  if (Op == AtomicExpr::Store || Op == AtomicExpr::Init)
+  if (Form == Copy || Form == GNUXchg || Form == Init)
     ResultType = Context.VoidTy;
-  else if (Op == AtomicExpr::CmpXchgWeak || Op == AtomicExpr::CmpXchgStrong)
+  else if (Form == C11CmpXchg || Form == GNUCmpXchg)
     ResultType = Context.BoolTy;
 
+  // The type of a parameter passed 'by value'. In the GNU atomics, such
+  // arguments are actually passed as pointers.
+  QualType ByValType = ValType; // 'CP'
+  if (!IsC11 && !IsN)
+    ByValType = Ptr->getType();
+
   // The first argument --- the pointer --- has a fixed type; we
   // deduce the types of the rest of the arguments accordingly.  Walk
   // the remaining arguments, converting them to the deduced value type.
-  for (unsigned i = 1; i != NumVals+NumOrders+1; ++i) {
-    ExprResult Arg = TheCall->getArg(i);
+  for (unsigned i = 1; i != NumArgs[Form]; ++i) {
     QualType Ty;
-    if (i < NumVals+1) {
-      // The second argument to a cmpxchg is a pointer to the data which will
-      // be exchanged. The second argument to a pointer add/subtract is the
-      // amount to add/subtract, which must be a ptrdiff_t.  The third
-      // argument to a cmpxchg and the second argument in all other cases
-      // is the type of the value.
-      if (i == 1 && (Op == AtomicExpr::CmpXchgWeak ||
-                     Op == AtomicExpr::CmpXchgStrong))
-         Ty = Context.getPointerType(ValType.getUnqualifiedType());
-      else if (!ValType->isIntegerType() &&
-               (Op == AtomicExpr::Add || Op == AtomicExpr::Sub))
-        Ty = Context.getPointerDiffType();
-      else
-        Ty = ValType;
+    if (i < NumVals[Form] + 1) {
+      switch (i) {
+      case 1:
+        // The second argument is the non-atomic operand. For arithmetic, this
+        // is always passed by value, and for a compare_exchange it is always
+        // passed by address. For the rest, GNU uses by-address and C11 uses
+        // by-value.
+        assert(Form != Load);
+        if (Form == Init || (Form == Arithmetic && ValType->isIntegerType()))
+          Ty = ValType;
+        else if (Form == Copy || Form == Xchg)
+          Ty = ByValType;
+        else if (Form == Arithmetic)
+          Ty = Context.getPointerDiffType();
+        else
+          Ty = Context.getPointerType(ValType.getUnqualifiedType());
+        break;
+      case 2:
+        // The third argument to compare_exchange / GNU exchange is a
+        // (pointer to a) desired value.
+        Ty = ByValType;
+        break;
+      case 3:
+        // The fourth argument to GNU compare_exchange is a 'weak' flag.
+        Ty = Context.BoolTy;
+        break;
+      }
     } else {
       // The order(s) are always converted to int.
       Ty = Context.IntTy;
     }
+
     InitializedEntity Entity =
         InitializedEntity::InitializeParameter(Context, Ty, false);
+    ExprResult Arg = TheCall->getArg(i);
     Arg = PerformCopyInitialization(Entity, SourceLocation(), Arg);
     if (Arg.isInvalid())
       return true;
     TheCall->setArg(i, Arg.get());
   }
 
+  // Permute the arguments into a 'consistent' order.
   SmallVector<Expr*, 5> SubExprs;
   SubExprs.push_back(Ptr);
-  if (Op == AtomicExpr::Load) {
-    SubExprs.push_back(TheCall->getArg(1)); // Order
-  } else if (Op == AtomicExpr::Init) {
+  switch (Form) {
+  case Init:
+    // Note, AtomicExpr::getVal1() has a special case for this atomic.
     SubExprs.push_back(TheCall->getArg(1)); // Val1
-  } else if (Op != AtomicExpr::CmpXchgWeak && Op != AtomicExpr::CmpXchgStrong) {
+    break;
+  case Load:
+    SubExprs.push_back(TheCall->getArg(1)); // Order
+    break;
+  case Copy:
+  case Arithmetic:
+  case Xchg:
     SubExprs.push_back(TheCall->getArg(2)); // Order
     SubExprs.push_back(TheCall->getArg(1)); // Val1
-  } else {
+    break;
+  case GNUXchg:
+    // Note, AtomicExpr::getVal2() has a special case for this atomic.
+    SubExprs.push_back(TheCall->getArg(3)); // Order
+    SubExprs.push_back(TheCall->getArg(1)); // Val1
+    SubExprs.push_back(TheCall->getArg(2)); // Val2
+    break;
+  case C11CmpXchg:
     SubExprs.push_back(TheCall->getArg(3)); // Order
     SubExprs.push_back(TheCall->getArg(1)); // Val1
     SubExprs.push_back(TheCall->getArg(4)); // OrderFail
     SubExprs.push_back(TheCall->getArg(2)); // Val2
+    break;
+  case GNUCmpXchg:
+    SubExprs.push_back(TheCall->getArg(4)); // Order
+    SubExprs.push_back(TheCall->getArg(1)); // Val1
+    SubExprs.push_back(TheCall->getArg(5)); // OrderFail
+    SubExprs.push_back(TheCall->getArg(2)); // Val2
+    SubExprs.push_back(TheCall->getArg(3)); // Weak
+    break;
   }
 
   return Owned(new (Context) AtomicExpr(TheCall->getCallee()->getLocStart(),