Atomics: support min/max orthogonally

We seem to have been gradually growing support for atomic min/max operations
(exposing longstanding IR atomicrmw instructions). But until now there have
been gaps in the expected intrinsics. This adds support for the C11-style
intrinsics (i.e. taking _Atomic, rather than individually blessed by C11
standard), and the variants that return the new value instead of the original
one.

That way, people won't be misled by trying one form and it not working, and the
front-end is more friendly to people using _Atomic types, as we recommend.
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 5059163..039fe6d 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -488,13 +488,36 @@
   CGF.Builder.SetInsertPoint(ContBB);
 }
 
+/// Duplicate the atomic min/max operation in conventional IR for the builtin
+/// variants that return the new rather than the original value.
+static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
+                                         AtomicExpr::AtomicOp Op,
+                                         bool IsSigned,
+                                         llvm::Value *OldVal,
+                                         llvm::Value *RHS) {
+  llvm::CmpInst::Predicate Pred;
+  switch (Op) {
+  default:
+    llvm_unreachable("Unexpected min/max operation");
+  case AtomicExpr::AO__atomic_max_fetch:
+    Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT;
+    break;
+  case AtomicExpr::AO__atomic_min_fetch:
+    Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT;
+    break;
+  }
+  llvm::Value *Cmp = Builder.CreateICmp(Pred, OldVal, RHS, "tst");
+  return Builder.CreateSelect(Cmp, OldVal, RHS, "newval");
+}
+
 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
                          Address Ptr, Address Val1, Address Val2,
                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
                          uint64_t Size, llvm::AtomicOrdering Order,
                          llvm::SyncScope::ID Scope) {
   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
-  llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
+  bool PostOpMinMax = false;
+  unsigned PostOp = 0;
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -588,12 +611,20 @@
     Op = llvm::AtomicRMWInst::Sub;
     break;
 
+  case AtomicExpr::AO__atomic_min_fetch:
+    PostOpMinMax = true;
+    LLVM_FALLTHROUGH;
+  case AtomicExpr::AO__c11_atomic_fetch_min:
   case AtomicExpr::AO__opencl_atomic_fetch_min:
   case AtomicExpr::AO__atomic_fetch_min:
     Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
                                                   : llvm::AtomicRMWInst::UMin;
     break;
 
+  case AtomicExpr::AO__atomic_max_fetch:
+    PostOpMinMax = true;
+    LLVM_FALLTHROUGH;
+  case AtomicExpr::AO__c11_atomic_fetch_max:
   case AtomicExpr::AO__opencl_atomic_fetch_max:
   case AtomicExpr::AO__atomic_fetch_max:
     Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
@@ -643,8 +674,13 @@
   // For __atomic_*_fetch operations, perform the operation again to
   // determine the value which was written.
   llvm::Value *Result = RMWI;
-  if (PostOp)
-    Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1);
+  if (PostOpMinMax)
+    Result = EmitPostAtomicMinMax(CGF.Builder, E->getOp(),
+                                  E->getValueType()->isSignedIntegerType(),
+                                  RMWI, LoadVal1);
+  else if (PostOp)
+    Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI,
+                                     LoadVal1);
   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
     Result = CGF.Builder.CreateNot(Result);
   CGF.Builder.CreateStore(Result, Dest);
@@ -853,6 +889,8 @@
   case AtomicExpr::AO__c11_atomic_fetch_and:
   case AtomicExpr::AO__c11_atomic_fetch_or:
   case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__c11_atomic_fetch_max:
+  case AtomicExpr::AO__c11_atomic_fetch_min:
   case AtomicExpr::AO__opencl_atomic_fetch_and:
   case AtomicExpr::AO__opencl_atomic_fetch_or:
   case AtomicExpr::AO__opencl_atomic_fetch_xor:
@@ -866,8 +904,10 @@
   case AtomicExpr::AO__atomic_or_fetch:
   case AtomicExpr::AO__atomic_xor_fetch:
   case AtomicExpr::AO__atomic_nand_fetch:
-  case AtomicExpr::AO__atomic_fetch_min:
+  case AtomicExpr::AO__atomic_max_fetch:
+  case AtomicExpr::AO__atomic_min_fetch:
   case AtomicExpr::AO__atomic_fetch_max:
+  case AtomicExpr::AO__atomic_fetch_min:
     Val1 = EmitValToTemp(*this, E->getVal1());
     break;
   }
@@ -916,14 +956,18 @@
     case AtomicExpr::AO__opencl_atomic_fetch_min:
     case AtomicExpr::AO__opencl_atomic_fetch_max:
     case AtomicExpr::AO__atomic_fetch_xor:
+    case AtomicExpr::AO__c11_atomic_fetch_max:
+    case AtomicExpr::AO__c11_atomic_fetch_min:
     case AtomicExpr::AO__atomic_add_fetch:
     case AtomicExpr::AO__atomic_and_fetch:
     case AtomicExpr::AO__atomic_nand_fetch:
     case AtomicExpr::AO__atomic_or_fetch:
     case AtomicExpr::AO__atomic_sub_fetch:
     case AtomicExpr::AO__atomic_xor_fetch:
-    case AtomicExpr::AO__atomic_fetch_min:
     case AtomicExpr::AO__atomic_fetch_max:
+    case AtomicExpr::AO__atomic_fetch_min:
+    case AtomicExpr::AO__atomic_max_fetch:
+    case AtomicExpr::AO__atomic_min_fetch:
       // For these, only library calls for certain sizes exist.
       UseOptimizedLibcall = true;
       break;
@@ -991,6 +1035,7 @@
     QualType RetTy;
     bool HaveRetTy = false;
     llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
+    bool PostOpMinMax = false;
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
     case AtomicExpr::AO__opencl_atomic_init:
@@ -1112,6 +1157,10 @@
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                         MemTy, E->getExprLoc(), sizeChars);
       break;
+    case AtomicExpr::AO__atomic_min_fetch:
+      PostOpMinMax = true;
+      LLVM_FALLTHROUGH;
+    case AtomicExpr::AO__c11_atomic_fetch_min:
     case AtomicExpr::AO__atomic_fetch_min:
     case AtomicExpr::AO__opencl_atomic_fetch_min:
       LibCallName = E->getValueType()->isSignedIntegerType()
@@ -1120,6 +1169,10 @@
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
                         LoweredMemTy, E->getExprLoc(), sizeChars);
       break;
+    case AtomicExpr::AO__atomic_max_fetch:
+      PostOpMinMax = true;
+      LLVM_FALLTHROUGH;
+    case AtomicExpr::AO__c11_atomic_fetch_max:
     case AtomicExpr::AO__atomic_fetch_max:
     case AtomicExpr::AO__opencl_atomic_fetch_max:
       LibCallName = E->getValueType()->isSignedIntegerType()
@@ -1171,7 +1224,7 @@
     // PostOp is only needed for the atomic_*_fetch operations, and
     // thus is only needed for and implemented in the
     // UseOptimizedLibcall codepath.
-    assert(UseOptimizedLibcall || !PostOp);
+    assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax));
 
     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
     // The value is returned directly from the libcall.
@@ -1182,7 +1235,12 @@
     // provided an out-param.
     if (UseOptimizedLibcall && Res.getScalarVal()) {
       llvm::Value *ResVal = Res.getScalarVal();
-      if (PostOp) {
+      if (PostOpMinMax) {
+        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
+        ResVal = EmitPostAtomicMinMax(Builder, E->getOp(),
+                                      E->getValueType()->isSignedIntegerType(),
+                                      ResVal, LoadVal1);
+      } else if (PostOp) {
         llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
         ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
       }