Promote atomic type sizes up to a power of two, capped by
MaxAtomicPromoteWidth.  Fix a ton of terrible bugs with
_Atomic types and (non-intrinsic-mediated) loads and stores
thereto.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@176658 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index f17e48d..817d5c4 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -17,10 +17,169 @@
 #include "clang/AST/ASTContext.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
 
 using namespace clang;
 using namespace CodeGen;
 
+// The ABI values for various atomic memory orderings.
+enum AtomicOrderingKind {
+  AO_ABI_memory_order_relaxed = 0,
+  AO_ABI_memory_order_consume = 1,
+  AO_ABI_memory_order_acquire = 2,
+  AO_ABI_memory_order_release = 3,
+  AO_ABI_memory_order_acq_rel = 4,
+  AO_ABI_memory_order_seq_cst = 5
+};
+
+namespace {
+  class AtomicInfo {
+    CodeGenFunction &CGF;
+    QualType AtomicTy;
+    QualType ValueTy;
+    uint64_t AtomicSizeInBits;
+    uint64_t ValueSizeInBits;
+    CharUnits AtomicAlign;
+    CharUnits ValueAlign;
+    CharUnits LValueAlign;
+    TypeEvaluationKind EvaluationKind;
+    bool UseLibcall;
+  public:
+    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
+      assert(lvalue.isSimple());
+
+      AtomicTy = lvalue.getType();
+      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
+      EvaluationKind = CGF.getEvaluationKind(ValueTy);
+
+      ASTContext &C = CGF.getContext();
+
+      uint64_t valueAlignInBits;
+      llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
+
+      uint64_t atomicAlignInBits;
+      llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
+
+      assert(ValueSizeInBits <= AtomicSizeInBits);
+      assert(valueAlignInBits <= atomicAlignInBits);
+
+      AtomicAlign = C.toCharUnitsFromBits(atomicAlignInBits);
+      ValueAlign = C.toCharUnitsFromBits(valueAlignInBits);
+      if (lvalue.getAlignment().isZero())
+        lvalue.setAlignment(AtomicAlign);
+
+      UseLibcall =
+        (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
+         AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
+    }
+
+    QualType getAtomicType() const { return AtomicTy; }
+    QualType getValueType() const { return ValueTy; }
+    CharUnits getAtomicAlignment() const { return AtomicAlign; }
+    CharUnits getValueAlignment() const { return ValueAlign; }
+    uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
+    uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
+    TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
+    bool shouldUseLibcall() const { return UseLibcall; }
+
+    /// Is the atomic size larger than the underlying value type?
+    ///
+    /// Note that the absence of padding does not mean that atomic
+    /// objects are completely interchangeable with non-atomic
+    /// objects: we might have promoted the alignment of a type
+    /// without making it bigger.
+    bool hasPadding() const {
+      return (ValueSizeInBits != AtomicSizeInBits);
+    }
+
+    void emitMemSetZeroIfNecessary(LValue dest) const;
+
+    llvm::Value *getAtomicSizeValue() const {
+      CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
+      return CGF.CGM.getSize(size);
+    }
+
+    /// Cast the given pointer to an integer pointer suitable for
+    /// atomic operations.
+    llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
+
+    /// Turn an atomic-layout object into an r-value.
+    RValue convertTempToRValue(llvm::Value *addr,
+                               AggValueSlot resultSlot) const;
+
+    /// Copy an atomic r-value into atomic-layout memory.
+    void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
+
+    /// Project an l-value down to the value field.
+    LValue projectValue(LValue lvalue) const {
+      llvm::Value *addr = lvalue.getAddress();
+      if (hasPadding())
+        addr = CGF.Builder.CreateStructGEP(addr, 0);
+
+      return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
+                              CGF.getContext(), lvalue.getTBAAInfo());
+    }
+
+    /// Materialize an atomic r-value in atomic-layout memory.
+    llvm::Value *materializeRValue(RValue rvalue) const;
+
+  private:
+    bool requiresMemSetZero(llvm::Type *type) const;
+  };
+}
+
+static RValue emitAtomicLibcall(CodeGenFunction &CGF,
+                                StringRef fnName,
+                                QualType resultType,
+                                CallArgList &args) {
+  const CGFunctionInfo &fnInfo =
+    CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args,
+            FunctionType::ExtInfo(), RequiredArgs::All);
+  llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
+  llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName);
+  return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args);
+}
+
+/// Does a store of the given IR type modify the full expected width?
+static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type,
+                           uint64_t expectedSize) {
+  return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize);
+}
+
+/// Does the atomic type require memsetting to zero before initialization?
+///
+/// The IR type is provided as a way of making certain queries faster.
+bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
+  // If the atomic type has size padding, we definitely need a memset.
+  if (hasPadding()) return true;
+
+  // Otherwise, do some simple heuristics to try to avoid it:
+  switch (getEvaluationKind()) {
+  // For scalars and complexes, check whether the store size of the
+  // type uses the full size.
+  case TEK_Scalar:
+    return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits);
+  case TEK_Complex:
+    return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
+                           AtomicSizeInBits / 2);
+
+  // Just be pessimistic about aggregates.
+  case TEK_Aggregate:
+    return true;
+  }
+  llvm_unreachable("bad evaluation kind");
+}
+
+void AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
+  llvm::Value *addr = dest.getAddress();
+  if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
+    return;
+
+  CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+                           AtomicSizeInBits / 8,
+                           dest.getAlignment().getQuantity());
+}
+
 static void
 EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
              llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
@@ -177,24 +336,9 @@
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
     assert(!Dest && "Init does not return a value");
-    LValue LV = MakeAddrLValue(Ptr, AtomicTy, alignChars);
-    switch (getEvaluationKind(E->getVal1()->getType())) {
-    case TEK_Scalar:
-      EmitScalarInit(EmitScalarExpr(E->getVal1()), LV);
-      return RValue::get(0);
-    case TEK_Complex:
-      EmitComplexExprIntoLValue(E->getVal1(), LV, /*isInit*/ true);
-      return RValue::get(0);
-    case TEK_Aggregate: {
-      AggValueSlot Slot = AggValueSlot::forLValue(LV,
-                                        AggValueSlot::IsNotDestructed,
-                                        AggValueSlot::DoesNotNeedGCBarriers,
-                                        AggValueSlot::IsNotAliased);
-      EmitAggExpr(E->getVal1(), Slot);
-      return RValue::get(0);
-    }
-    }
-    llvm_unreachable("bad evaluation kind");
+    LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
+    EmitAtomicInit(E->getVal1(), lvalue);
+    return RValue::get(0);
   }
 
   Order = EmitScalarExpr(E->getOrder());
@@ -385,30 +529,30 @@
   if (isa<llvm::ConstantInt>(Order)) {
     int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     switch (ord) {
-    case 0:  // memory_order_relaxed
+    case AO_ABI_memory_order_relaxed:
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                    llvm::Monotonic);
       break;
-    case 1:  // memory_order_consume
-    case 2:  // memory_order_acquire
+    case AO_ABI_memory_order_consume:
+    case AO_ABI_memory_order_acquire:
       if (IsStore)
         break; // Avoid crashing on code with undefined behavior
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                    llvm::Acquire);
       break;
-    case 3:  // memory_order_release
+    case AO_ABI_memory_order_release:
       if (IsLoad)
         break; // Avoid crashing on code with undefined behavior
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                    llvm::Release);
       break;
-    case 4:  // memory_order_acq_rel
+    case AO_ABI_memory_order_acq_rel:
       if (IsLoad || IsStore)
         break; // Avoid crashing on code with undefined behavior
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                    llvm::AcquireRelease);
       break;
-    case 5:  // memory_order_seq_cst
+    case AO_ABI_memory_order_seq_cst:
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
                    llvm::SequentiallyConsistent);
       break;
@@ -483,3 +627,316 @@
     return RValue::get(0);
   return convertTempToRValue(OrigDest, E->getType());
 }
+
+llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
+  unsigned addrspace =
+    cast<llvm::PointerType>(addr->getType())->getAddressSpace();
+  llvm::IntegerType *ty =
+    llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
+  return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace));
+}
+
+RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
+                                       AggValueSlot resultSlot) const {
+  if (EvaluationKind == TEK_Aggregate) {
+    // Nothing to do if the result is ignored.
+    if (resultSlot.isIgnored()) return resultSlot.asRValue();
+
+    assert(resultSlot.getAddr() == addr || hasPadding());
+
+    // In these cases, we should have emitted directly into the result slot.
+    if (!hasPadding() || resultSlot.isValueOfAtomic())
+      return resultSlot.asRValue();
+
+    // Otherwise, fall into the common path.
+  }
+
+  // Drill into the padding structure if we have one.
+  if (hasPadding())
+    addr = CGF.Builder.CreateStructGEP(addr, 0);
+
+  // If we're emitting to an aggregate, copy into the result slot.
+  if (EvaluationKind == TEK_Aggregate) {
+    CGF.EmitAggregateCopy(resultSlot.getAddr(), addr, getValueType(),
+                          resultSlot.isVolatile());
+    return resultSlot.asRValue();
+  }
+
+  // Otherwise, just convert the temporary to an r-value using the
+  // normal conversion routine.
+  return CGF.convertTempToRValue(addr, getValueType());
+}
+
+/// Emit a load from an l-value of atomic type.  Note that the r-value
+/// we produce is an r-value of the atomic *value* type.
+RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) {
+  AtomicInfo atomics(*this, src);
+
+  // Check whether we should use a library call.
+  if (atomics.shouldUseLibcall()) {
+    llvm::Value *tempAddr;
+    if (resultSlot.isValueOfAtomic()) {
+      assert(atomics.getEvaluationKind() == TEK_Aggregate);
+      tempAddr = resultSlot.getPaddedAtomicAddr();
+    } else if (!resultSlot.isIgnored() && !atomics.hasPadding()) {
+      assert(atomics.getEvaluationKind() == TEK_Aggregate);
+      tempAddr = resultSlot.getAddr();
+    } else {
+      tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    }
+
+    // void __atomic_load(size_t size, void *mem, void *return, int order);
+    CallArgList args;
+    args.add(RValue::get(atomics.getAtomicSizeValue()),
+             getContext().getSizeType());
+    args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(llvm::ConstantInt::get(IntTy,
+                                                AO_ABI_memory_order_seq_cst)),
+             getContext().IntTy);
+    emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
+
+    // Produce the r-value.
+    return atomics.convertTempToRValue(tempAddr, resultSlot);
+  }
+
+  // Okay, we're doing this natively.
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
+  llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
+  load->setAtomic(llvm::SequentiallyConsistent);
+
+  // Other decoration.
+  load->setAlignment(src.getAlignment().getQuantity());
+  if (src.isVolatileQualified())
+    load->setVolatile(true);
+  if (src.getTBAAInfo())
+    CGM.DecorateInstruction(load, src.getTBAAInfo());
+
+  // Okay, turn that back into the original value type.
+  QualType valueType = atomics.getValueType();
+  llvm::Value *result = load;
+
+  // If we're ignoring an aggregate return, don't do anything.
+  if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
+    return RValue::getAggregate(0, false);
+
+  // The easiest way to do this this is to go through memory, but we
+  // try not to in some easy cases.
+  if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) {
+    llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType);
+    if (isa<llvm::IntegerType>(resultTy)) {
+      assert(result->getType() == resultTy);
+      result = EmitFromMemory(result, valueType);
+    } else if (isa<llvm::PointerType>(resultTy)) {
+      result = Builder.CreateIntToPtr(result, resultTy);
+    } else {
+      result = Builder.CreateBitCast(result, resultTy);
+    }
+    return RValue::get(result);
+  }
+
+  // Create a temporary.  This needs to be big enough to hold the
+  // atomic integer.
+  llvm::Value *temp;
+  bool tempIsVolatile = false;
+  CharUnits tempAlignment;
+  if (atomics.getEvaluationKind() == TEK_Aggregate &&
+      (!atomics.hasPadding() || resultSlot.isValueOfAtomic())) {
+    assert(!resultSlot.isIgnored());
+    if (resultSlot.isValueOfAtomic()) {
+      temp = resultSlot.getPaddedAtomicAddr();
+      tempAlignment = atomics.getAtomicAlignment();
+    } else {
+      temp = resultSlot.getAddr();
+      tempAlignment = atomics.getValueAlignment();
+    }
+    tempIsVolatile = resultSlot.isVolatile();
+  } else {
+    temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    tempAlignment = atomics.getAtomicAlignment();
+  }
+
+  // Slam the integer into the temporary.
+  llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp);
+  Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity())
+    ->setVolatile(tempIsVolatile);
+
+  return atomics.convertTempToRValue(temp, resultSlot);
+}
+
+
+
+/// Copy an r-value into memory as part of storing to an atomic type.
+/// This needs to create a bit-pattern suitable for atomic operations.
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
+  // If we have an r-value, the rvalue should be of the atomic type,
+  // which means that the caller is responsible for having zeroed
+  // any padding.  Just do an aggregate copy of that type.
+  if (rvalue.isAggregate()) {
+    CGF.EmitAggregateCopy(dest.getAddress(),
+                          rvalue.getAggregateAddr(),
+                          getAtomicType(),
+                          (rvalue.isVolatileQualified()
+                           || dest.isVolatileQualified()),
+                          dest.getAlignment());
+    return;
+  }
+
+  // Okay, otherwise we're copying stuff.
+
+  // Zero out the buffer if necessary.
+  emitMemSetZeroIfNecessary(dest);
+
+  // Drill past the padding if present.
+  dest = projectValue(dest);
+
+  // Okay, store the rvalue in.
+  if (rvalue.isScalar()) {
+    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
+  } else {
+    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
+  }
+}
+
+
+/// Materialize an r-value into memory for the purposes of storing it
+/// to an atomic type.
+llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {
+  // Aggregate r-values are already in memory, and EmitAtomicStore
+  // requires them to be values of the atomic type.
+  if (rvalue.isAggregate())
+    return rvalue.getAggregateAddr();
+
+  // Otherwise, make a temporary and materialize into it.
+  llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
+  LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
+  emitCopyIntoMemory(rvalue, tempLV);
+  return temp;
+}
+
+/// Emit a store to an l-value of atomic type.
+///
+/// Note that the r-value is expected to be an r-value *of the atomic
+/// type*; this means that for aggregate r-values, it should include
+/// storage for any padding that was necessary.
+void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
+                                      bool isInit) {
+  // If this is an aggregate r-value, it should agree in type except
+  // maybe for address-space qualification.
+  assert(!rvalue.isAggregate() ||
+         rvalue.getAggregateAddr()->getType()->getPointerElementType()
+           == dest.getAddress()->getType()->getPointerElementType());
+
+  AtomicInfo atomics(*this, dest);
+
+  // If this is an initialization, just put the value there normally.
+  if (isInit) {
+    atomics.emitCopyIntoMemory(rvalue, dest);
+    return;
+  }
+
+  // Check whether we should use a library call.
+  if (atomics.shouldUseLibcall()) {
+    // Produce a source address.
+    llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
+
+    // void __atomic_store(size_t size, void *mem, void *val, int order)
+    CallArgList args;
+    args.add(RValue::get(atomics.getAtomicSizeValue()),
+             getContext().getSizeType());
+    args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
+             getContext().VoidPtrTy);
+    args.add(RValue::get(llvm::ConstantInt::get(IntTy,
+                                                AO_ABI_memory_order_seq_cst)),
+             getContext().IntTy);
+    emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
+    return;
+  }
+
+  // Okay, we're doing this natively.
+  llvm::Value *intValue;
+
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory.
+  if (rvalue.isScalar() && !atomics.hasPadding()) {
+    llvm::Value *value = rvalue.getScalarVal();
+    if (isa<llvm::IntegerType>(value->getType())) {
+      intValue = value;
+    } else {
+      llvm::IntegerType *inputIntTy =
+        llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits());
+      if (isa<llvm::PointerType>(value->getType())) {
+        intValue = Builder.CreatePtrToInt(value, inputIntTy);
+      } else {
+        intValue = Builder.CreateBitCast(value, inputIntTy);
+      }
+    }
+
+  // Otherwise, we need to go through memory.
+  } else {
+    // Put the r-value in memory.
+    llvm::Value *addr = atomics.materializeRValue(rvalue);
+
+    // Cast the temporary to the atomic int type and pull a value out.
+    addr = atomics.emitCastToAtomicIntPointer(addr);
+    intValue = Builder.CreateAlignedLoad(addr,
+                                 atomics.getAtomicAlignment().getQuantity());
+  }
+
+  // Do the atomic store.
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
+  llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+
+  // Initializations don't need to be atomic.
+  if (!isInit) store->setAtomic(llvm::SequentiallyConsistent);
+
+  // Other decoration.
+  store->setAlignment(dest.getAlignment().getQuantity());
+  if (dest.isVolatileQualified())
+    store->setVolatile(true);
+  if (dest.getTBAAInfo())
+    CGM.DecorateInstruction(store, dest.getTBAAInfo());
+}
+
+void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
+  AtomicInfo atomics(*this, dest);
+
+  switch (atomics.getEvaluationKind()) {
+  case TEK_Scalar: {
+    llvm::Value *value = EmitScalarExpr(init);
+    atomics.emitCopyIntoMemory(RValue::get(value), dest);
+    return;
+  }
+
+  case TEK_Complex: {
+    ComplexPairTy value = EmitComplexExpr(init);
+    atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
+    return;
+  }
+
+  case TEK_Aggregate: {
+    // Memset the buffer first if there's any possibility of
+    // uninitialized internal bits.
+    atomics.emitMemSetZeroIfNecessary(dest);
+
+    // HACK: whether the initializer actually has an atomic type
+    // doesn't really seem reliable right now.
+    if (!init->getType()->isAtomicType()) {
+      dest = atomics.projectValue(dest);
+    }
+
+    // Evaluate the expression directly into the destination.
+    AggValueSlot slot = AggValueSlot::forLValue(dest,
+                                        AggValueSlot::IsNotDestructed,
+                                        AggValueSlot::DoesNotNeedGCBarriers,
+                                        AggValueSlot::IsNotAliased);
+    EmitAggExpr(init, slot);
+    return;
+  }
+  }
+  llvm_unreachable("bad evaluation kind");
+}
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index bb5d638..0e00130 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -1129,11 +1129,15 @@
     return;
   }
   case TEK_Aggregate:
-    // TODO: how can we delay here if D is captured by its initializer?
-    EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
+    if (type->isAtomicType()) {
+      EmitAtomicInit(const_cast<Expr*>(init), lvalue);
+    } else {
+      // TODO: how can we delay here if D is captured by its initializer?
+      EmitAggExpr(init, AggValueSlot::forLValue(lvalue,
                                               AggValueSlot::IsDestructed,
                                          AggValueSlot::DoesNotNeedGCBarriers,
                                               AggValueSlot::IsNotAliased));
+    }
     MaybeEmitStdInitializerListCleanup(lvalue.getAddress(), init);
     return;
   }
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index ba816af..a170028 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -1144,6 +1144,14 @@
       return EmitFromMemory(V, Ty);
     }
   }
+
+  // Atomic operations have to be done on integral types.
+  if (Ty->isAtomicType()) {
+    LValue lvalue = LValue::MakeAddr(Addr, Ty,
+                                     CharUnits::fromQuantity(Alignment),
+                                     getContext(), TBAAInfo);
+    return EmitAtomicLoad(lvalue).getScalarVal();
+  }
   
   llvm::LoadInst *Load = Builder.CreateLoad(Addr);
   if (Volatile)
@@ -1152,9 +1160,6 @@
     Load->setAlignment(Alignment);
   if (TBAAInfo)
     CGM.DecorateInstruction(Load, TBAAInfo);
-  // If this is an atomic type, all normal reads must be atomic
-  if (Ty->isAtomicType())
-    Load->setAtomic(llvm::SequentiallyConsistent);
 
   if ((SanOpts->Bool && hasBooleanRepresentation(Ty)) ||
       (SanOpts->Enum && Ty->getAs<EnumType>())) {
@@ -1251,13 +1256,20 @@
   
   Value = EmitToMemory(Value, Ty);
 
+  if (Ty->isAtomicType()) {
+    EmitAtomicStore(RValue::get(Value),
+                    LValue::MakeAddr(Addr, Ty,
+                                     CharUnits::fromQuantity(Alignment),
+                                     getContext(), TBAAInfo),
+                    isInit);
+    return;
+  }
+
   llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
   if (Alignment)
     Store->setAlignment(Alignment);
   if (TBAAInfo)
     CGM.DecorateInstruction(Store, TBAAInfo);
-  if (!isInit && Ty->isAtomicType())
-    Store->setAtomic(llvm::SequentiallyConsistent);
 }
 
 void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index f8921db..1ac13c0 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -29,6 +29,14 @@
 //                        Aggregate Expression Emitter
 //===----------------------------------------------------------------------===//
 
+llvm::Value *AggValueSlot::getPaddedAtomicAddr() const {
+  assert(isValueOfAtomic());
+  llvm::GEPOperator *op = cast<llvm::GEPOperator>(getAddr());
+  assert(op->getNumIndices() == 2);
+  assert(op->hasAllZeroIndices());
+  return op->getPointerOperand();
+}
+
 namespace  {
 class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
   CodeGenFunction &CGF;
@@ -190,6 +198,38 @@
     CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr());
   }
 };
+
+/// A helper class for emitting expressions into the value sub-object
+/// of a padded atomic type.
+class ValueDestForAtomic {
+  AggValueSlot Dest;
+public:
+  ValueDestForAtomic(CodeGenFunction &CGF, AggValueSlot dest, QualType type)
+    : Dest(dest) {
+    assert(!Dest.isValueOfAtomic());
+    if (!Dest.isIgnored() && CGF.CGM.isPaddedAtomicType(type)) {
+      llvm::Value *valueAddr = CGF.Builder.CreateStructGEP(Dest.getAddr(), 0);
+      Dest = AggValueSlot::forAddr(valueAddr,
+                                   Dest.getAlignment(),
+                                   Dest.getQualifiers(),
+                                   Dest.isExternallyDestructed(),
+                                   Dest.requiresGCollection(),
+                                   Dest.isPotentiallyAliased(),
+                                   Dest.isZeroed(),
+                                   AggValueSlot::IsValueOfAtomic);
+    }
+  }
+
+  const AggValueSlot &getDest() const { return Dest; }
+
+  ~ValueDestForAtomic() {
+    // Kill the GEP if we made one and it didn't end up used.
+    if (Dest.isValueOfAtomic()) {
+      llvm::Instruction *addr = cast<llvm::GetElementPtrInst>(Dest.getAddr());
+      if (addr->use_empty()) addr->eraseFromParent();
+    }
+  }
+};
 }  // end anonymous namespace.
 
 //===----------------------------------------------------------------------===//
@@ -201,6 +241,14 @@
 /// then loads the result into DestPtr.
 void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
   LValue LV = CGF.EmitLValue(E);
+
+  // If the type of the l-value is atomic, then do an atomic load.
+  if (LV.getType()->isAtomicType()) {
+    ValueDestForAtomic valueDest(CGF, Dest, LV.getType());
+    CGF.EmitAtomicLoad(LV, valueDest.getDest());
+    return;
+  }
+
   EmitFinalDestCopy(E->getType(), LV);
 }
 
@@ -543,6 +591,20 @@
   CGF.EmitAggExpr(E->getInitializer(), Slot);
 }
 
+/// Attempt to look through various unimportant expressions to find a
+/// cast of the given kind.
+static Expr *findPeephole(Expr *op, CastKind kind) {
+  while (true) {
+    op = op->IgnoreParens();
+    if (CastExpr *castE = dyn_cast<CastExpr>(op)) {
+      if (castE->getCastKind() == kind)
+        return castE->getSubExpr();
+      if (castE->getCastKind() == CK_NoOp)
+        continue;
+    }
+    return 0;
+  }
+}
 
 void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   switch (E->getCastKind()) {
@@ -582,6 +644,75 @@
                 "should have been unpacked before we got here");
   }
 
+  case CK_NonAtomicToAtomic:
+  case CK_AtomicToNonAtomic: {
+    bool isToAtomic = (E->getCastKind() == CK_NonAtomicToAtomic);
+
+    // Determine the atomic and value types.
+    QualType atomicType = E->getSubExpr()->getType();
+    QualType valueType = E->getType();
+    if (isToAtomic) std::swap(atomicType, valueType);
+
+    assert(atomicType->isAtomicType());
+    assert(CGF.getContext().hasSameUnqualifiedType(valueType,
+                          atomicType->castAs<AtomicType>()->getValueType()));
+
+    // Just recurse normally if we're ignoring the result or the
+    // atomic type doesn't change representation.
+    if (Dest.isIgnored() || !CGF.CGM.isPaddedAtomicType(atomicType)) {
+      return Visit(E->getSubExpr());
+    }
+
+    CastKind peepholeTarget =
+      (isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic);
+
+    // These two cases are reverses of each other; try to peephole them.
+    if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) {
+      assert(CGF.getContext().hasSameUnqualifiedType(op->getType(),
+                                                     E->getType()) &&
+           "peephole significantly changed types?");
+      return Visit(op);
+    }
+
+    // If we're converting an r-value of non-atomic type to an r-value
+    // of atomic type, just make an atomic temporary, emit into that,
+    // and then copy the value out.  (FIXME: do we need to
+    // zero-initialize it first?)
+    if (isToAtomic) {
+      ValueDestForAtomic valueDest(CGF, Dest, atomicType);
+      CGF.EmitAggExpr(E->getSubExpr(), valueDest.getDest());
+      return;
+    }
+
+    // Otherwise, we're converting an atomic type to a non-atomic type.
+
+    // If the dest is a value-of-atomic subobject, drill back out.
+    if (Dest.isValueOfAtomic()) {
+      AggValueSlot atomicSlot =
+        AggValueSlot::forAddr(Dest.getPaddedAtomicAddr(),
+                              Dest.getAlignment(),
+                              Dest.getQualifiers(),
+                              Dest.isExternallyDestructed(),
+                              Dest.requiresGCollection(),
+                              Dest.isPotentiallyAliased(),
+                              Dest.isZeroed(),
+                              AggValueSlot::IsNotValueOfAtomic);
+      CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
+      return;
+    }
+
+    // Otherwise, make an atomic temporary, emit into that, and then
+    // copy the value out.
+    AggValueSlot atomicSlot =
+      CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp");
+    CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
+
+    llvm::Value *valueAddr =
+      Builder.CreateStructGEP(atomicSlot.getAddr(), 0);
+    RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile());
+    return EmitFinalDestCopy(valueType, rvalue);
+  }
+
   case CK_LValueToRValue:
     // If we're loading from a volatile type, force the destination
     // into existence.
@@ -589,11 +720,10 @@
       EnsureDest(E->getType());
       return Visit(E->getSubExpr());
     }
+
     // fallthrough
 
   case CK_NoOp:
-  case CK_AtomicToNonAtomic:
-  case CK_NonAtomicToAtomic:
   case CK_UserDefinedConversion:
   case CK_ConstructorConversion:
     assert(CGF.getContext().hasSameUnqualifiedType(E->getSubExpr()->getType(),
@@ -775,6 +905,12 @@
     // Now emit the LHS and copy into it.
     LValue LHS = CGF.EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
 
+    // That copy is an atomic copy if the LHS is atomic.
+    if (LHS.getType()->isAtomicType()) {
+      CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
+      return;
+    }
+
     EmitCopy(E->getLHS()->getType(),
              AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
                                      needsGC(E->getLHS()->getType()),
@@ -785,6 +921,15 @@
   
   LValue LHS = CGF.EmitLValue(E->getLHS());
 
+  // If we have an atomic type, evaluate into the destination and then
+  // do an atomic copy.
+  if (LHS.getType()->isAtomicType()) {
+    EnsureDest(E->getRHS()->getType());
+    Visit(E->getRHS());
+    CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
+    return;
+  }
+
   // Codegen the RHS so that it stores directly into the LHS.
   AggValueSlot LHSSlot =
     AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, 
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 840463b..5fc73aa 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -42,7 +42,6 @@
   : public StmtVisitor<ComplexExprEmitter, ComplexPairTy> {
   CodeGenFunction &CGF;
   CGBuilderTy &Builder;
-  // True is we should ignore the value of a
   bool IgnoreReal;
   bool IgnoreImag;
 public:
@@ -286,6 +285,9 @@
 /// load the real and imaginary pieces, returning them as Real/Imag.
 ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue) {
   assert(lvalue.isSimple() && "non-simple complex l-value?");
+  if (lvalue.getType()->isAtomicType())
+    return CGF.EmitAtomicLoad(lvalue).getComplexVal();
+
   llvm::Value *SrcPtr = lvalue.getAddress();
   bool isVolatile = lvalue.isVolatileQualified();
 
@@ -310,6 +312,9 @@
 void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val,
                                             LValue lvalue,
                                             bool isInit) {
+  if (lvalue.getType()->isAtomicType())
+    return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit);
+
   llvm::Value *Ptr = lvalue.getAddress();
   llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, "real");
   llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, "imag");
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 0bbd373..6b0c271 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -350,11 +350,23 @@
   /// evaluating an expression which constructs such an object.
   bool AliasedFlag : 1;
 
+  /// ValueOfAtomicFlag - This is set to true if the slot is the value
+  /// subobject of an object the size of an _Atomic(T).  The specific
+  /// guarantees this makes are:
+  ///   - the address is guaranteed to be a getelementptr into the
+  ///     padding struct and
+  ///   - it is okay to store something the width of an _Atomic(T)
+  ///     into the address.
+  /// Tracking this allows us to avoid some obviously unnecessary
+  /// memcpys.
+  bool ValueOfAtomicFlag : 1;
+
 public:
   enum IsAliased_t { IsNotAliased, IsAliased };
   enum IsDestructed_t { IsNotDestructed, IsDestructed };
   enum IsZeroed_t { IsNotZeroed, IsZeroed };
   enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers };
+  enum IsValueOfAtomic_t { IsNotValueOfAtomic, IsValueOfAtomic };
 
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
@@ -378,7 +390,9 @@
                               IsDestructed_t isDestructed,
                               NeedsGCBarriers_t needsGC,
                               IsAliased_t isAliased,
-                              IsZeroed_t isZeroed = IsNotZeroed) {
+                              IsZeroed_t isZeroed = IsNotZeroed,
+                              IsValueOfAtomic_t isValueOfAtomic
+                                = IsNotValueOfAtomic) {
     AggValueSlot AV;
     AV.Addr = addr;
     AV.Alignment = align.getQuantity();
@@ -387,6 +401,7 @@
     AV.ObjCGCFlag = needsGC;
     AV.ZeroedFlag = isZeroed;
     AV.AliasedFlag = isAliased;
+    AV.ValueOfAtomicFlag = isValueOfAtomic;
     return AV;
   }
 
@@ -394,9 +409,12 @@
                                 IsDestructed_t isDestructed,
                                 NeedsGCBarriers_t needsGC,
                                 IsAliased_t isAliased,
-                                IsZeroed_t isZeroed = IsNotZeroed) {
+                                IsZeroed_t isZeroed = IsNotZeroed,
+                                IsValueOfAtomic_t isValueOfAtomic
+                                  = IsNotValueOfAtomic) {
     return forAddr(LV.getAddress(), LV.getAlignment(),
-                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
+                   LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed,
+                   isValueOfAtomic);
   }
 
   IsDestructed_t isExternallyDestructed() const {
@@ -428,6 +446,12 @@
     return Addr;
   }
 
+  IsValueOfAtomic_t isValueOfAtomic() const {
+    return IsValueOfAtomic_t(ValueOfAtomicFlag);
+  }
+
+  llvm::Value *getPaddedAtomicAddr() const;
+
   bool isIgnored() const {
     return Addr == 0;
   }
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 4e6c72e..2be8dcc 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2149,6 +2149,13 @@
 
   RValue convertTempToRValue(llvm::Value *addr, QualType type);
 
+  void EmitAtomicInit(Expr *E, LValue lvalue);
+
+  RValue EmitAtomicLoad(LValue lvalue,
+                        AggValueSlot slot = AggValueSlot::ignored());
+
+  void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit);
+
   /// EmitToMemory - Change a scalar value from its value
   /// representation to its in-memory representation.
   llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty);
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index f5ae9cc..2bddb6f 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -47,6 +47,7 @@
 namespace clang {
   class TargetCodeGenInfo;
   class ASTContext;
+  class AtomicType;
   class FunctionDecl;
   class IdentifierInfo;
   class ObjCMethodDecl;
@@ -494,6 +495,9 @@
 
   bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor);
 
+  bool isPaddedAtomicType(QualType type);
+  bool isPaddedAtomicType(const AtomicType *type);
+
   static void DecorateInstruction(llvm::Instruction *Inst,
                                   llvm::MDNode *TBAAInfo);
 
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 259d106..8fc78e3 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -582,7 +582,21 @@
   }
 
   case Type::Atomic: {
-    ResultType = ConvertType(cast<AtomicType>(Ty)->getValueType());
+    QualType valueType = cast<AtomicType>(Ty)->getValueType();
+    ResultType = ConvertTypeForMem(valueType);
+
+    // Pad out to the inflated size if necessary.
+    uint64_t valueSize = Context.getTypeSize(valueType);
+    uint64_t atomicSize = Context.getTypeSize(Ty);
+    if (valueSize != atomicSize) {
+      assert(valueSize < atomicSize);
+      llvm::Type *elts[] = {
+        ResultType,
+        llvm::ArrayType::get(CGM.Int8Ty, (atomicSize - valueSize) / 8)
+      };
+      ResultType = llvm::StructType::get(getLLVMContext(),
+                                         llvm::makeArrayRef(elts));
+    }
     break;
   }
   }
@@ -593,6 +607,14 @@
   return ResultType;
 }
 
+bool CodeGenModule::isPaddedAtomicType(QualType type) {
+  return isPaddedAtomicType(type->castAs<AtomicType>());
+}
+
+bool CodeGenModule::isPaddedAtomicType(const AtomicType *type) {
+  return Context.getTypeSize(type) != Context.getTypeSize(type->getValueType());
+}
+
 /// ConvertRecordDeclType - Lay out a tagged decl type like struct or union.
 llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) {
   // TagDecl's are not necessarily unique, instead use the (clang)