Introduce f[no-]max-unknown-pointer-align=[number] option
to instruct the code generator to not enforce a higher alignment 
than the given number (of bytes) when accessing memory via an opaque 
pointer or reference. Patch reviewed by John McCall (with post-commit
review pending). rdar://16254558

llvm-svn: 214911
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 9c64b04..6f66c4d 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -1111,6 +1111,37 @@
    This option restricts the generated code to use general registers
    only. This only applies to the AArch64 architecture.
 
+**-f[no-]max-unknown-pointer-align=[number]**
+   Instruct the code generator to not enforce a higher alignment than the given
+   number (of bytes) when accessing memory via an opaque pointer or reference.
+   This cap is ignored when directly accessing a variable or when the pointee
+   type has an explicit “aligned” attribute.
+
+   The value should usually be determined by the properties of the system allocator.
+   Some builtin types, especially vector types, have very high natural alignments;
+   when working with values of those types, Clang usually wants to use instructions
+   that take advantage of that alignment.  However, many system allocators do
+   not promise to return memory that is more than 8-byte or 16-byte-aligned.  Use
+   this option to limit the alignment that the compiler can assume for an arbitrary
+   pointer, which may point onto the heap.
+
+   This option does not affect the ABI alignment of types; the layout of structs and
+   unions and the value returned by the alignof operator remain the same.
+
+   This option can be overridden on a case-by-case basis by putting an explicit
+   “aligned” alignment on a struct, union, or typedef.  For example:
+
+   .. code-block:: console
+
+      #include <immintrin.h>
+      // Make an aligned typedef of the AVX-512 16-int vector type.
+      typedef __v16si __aligned_v16si __attribute__((aligned(64)));
+
+      void initialize_vector(__aligned_v16si *v) {
+        // The compiler may assume that ‘v’ is 64-byte aligned, regardless of the
+        // value of -fmax-unknown-pointer-align.
+      }
+
 
 Profile Guided Optimization
 ---------------------------
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index a297a4c..c29686a 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -106,6 +106,8 @@
 LANGOPT(Static            , 1, 0, "__STATIC__ predefined macro (as opposed to __DYNAMIC__)")
 VALUE_LANGOPT(PackStruct  , 32, 0,
               "default struct packing maximum alignment")
+VALUE_LANGOPT(MaxTypeAlign  , 32, 0,
+              "default maximum alignment for types")
 VALUE_LANGOPT(PICLevel    , 2, 0, "__PIC__ level")
 VALUE_LANGOPT(PIELevel    , 2, 0, "__PIE__ level")
 LANGOPT(GNUInline         , 1, 0, "GNU inline semantics")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 759c5b5..7759b44 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -804,6 +804,9 @@
 def fno_pack_struct : Flag<["-"], "fno-pack-struct">, Group<f_Group>;
 def fpack_struct_EQ : Joined<["-"], "fpack-struct=">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Specify the default maximum struct packing alignment">;
+def fmax_type_align_EQ : Joined<["-"], "fmax-type-align=">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Specify the maximum alignment to enforce on pointers lacking an explicit alignment">;
+def fno_max_type_align : Flag<["-"], "fno-max-type-align">, Group<f_Group>;
 def fpascal_strings : Flag<["-"], "fpascal-strings">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Recognize and construct Pascal-style string literals">;
 def fpcc_struct_return : Flag<["-"], "fpcc-struct-return">, Group<f_Group>, Flags<[CC1Option]>,
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 8496292..a200548 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1399,8 +1399,13 @@
 
   LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
     CharUnits Alignment;
-    if (!T->isIncompleteType())
+    if (!T->isIncompleteType()) {
       Alignment = getContext().getTypeAlignInChars(T);
+      unsigned MaxAlign = getContext().getLangOpts().MaxTypeAlign;
+      if (MaxAlign && Alignment.getQuantity() > MaxAlign &&
+          !getContext().isAlignmentRequired(T))
+        Alignment = CharUnits::fromQuantity(MaxAlign);
+    }
     return LValue::MakeAddr(V, T, Alignment, getContext(),
                             CGM.getTBAAInfo(T));
   }
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp
index 9a205b3..93a1d91 100644
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@@ -4134,6 +4134,21 @@
     CmdArgs.push_back("-fpack-struct=1");
   }
 
+  // Handle -fmax-type-align=N and -fno-type-align
+  bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align);
+  if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) {
+    if (!SkipMaxTypeAlign) {
+      std::string MaxTypeAlignStr = "-fmax-type-align=";
+      MaxTypeAlignStr += A->getValue();
+      CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
+    }
+  } else if (getToolChain().getTriple().isOSDarwin()) {
+    if (!SkipMaxTypeAlign) {
+      std::string MaxTypeAlignStr = "-fmax-type-align=16";
+      CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
+    }
+  }
+
   if (KernelOrKext || isNoCommonDefault(getToolChain().getTriple())) {
     if (!Args.hasArg(options::OPT_fcommon))
       CmdArgs.push_back("-fno-common");
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 0fbbf1a..e4be487 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1476,6 +1476,7 @@
     Args.hasArg(OPT_fencode_extended_block_signature);
   Opts.EmitAllDecls = Args.hasArg(OPT_femit_all_decls);
   Opts.PackStruct = getLastArgIntValue(Args, OPT_fpack_struct_EQ, 0, Diags);
+  Opts.MaxTypeAlign = getLastArgIntValue(Args, OPT_fmax_type_align_EQ, 0, Diags);
   Opts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
   Opts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags);
   Opts.Static = Args.hasArg(OPT_static_define);
diff --git a/clang/test/CodeGenCXX/align-avx-complete-objects.cpp b/clang/test/CodeGenCXX/align-avx-complete-objects.cpp
new file mode 100644
index 0000000..25f4ef1
--- /dev/null
+++ b/clang/test/CodeGenCXX/align-avx-complete-objects.cpp
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -x c++ %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -fmax-type-align=16 -emit-llvm -o - -Werror | FileCheck %s
+// rdar://16254558
+
+typedef float AVX2Float __attribute__((__vector_size__(32)));
+
+
+volatile float TestAlign(void)
+{
+       volatile AVX2Float *p = new AVX2Float;
+        *p = *p;
+        AVX2Float r = *p;
+        return r[0];
+}
+
+// CHECK: [[R:%.*]] = alloca <8 x float>, align 32
+// CHECK-NEXT:  [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32)
+// CHECK-NEXT:  [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>*
+// CHECK-NEXT:  store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8
+// CHECK-NEXT:  [[ONE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 16
+// CHECK-NEXT:  [[THREE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], align 16
+// CHECK-NEXT:  [[FOUR:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 16
+// CHECK-NEXT:  store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32
+// CHECK-NEXT:  [[SIX:%.*]] = load <8 x float>* [[R]], align 32
+// CHECK-NEXT:  [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0
+// CHECK-NEXT:  ret float [[VECEXT]]
+
+typedef float AVX2Float_Explicitly_aligned __attribute__((__vector_size__(32))) __attribute__((aligned (32)));
+
+typedef AVX2Float_Explicitly_aligned AVX2Float_indirect;
+
+typedef AVX2Float_indirect AVX2Float_use_existing_align;
+
+volatile float TestAlign2(void)
+{
+       volatile AVX2Float_use_existing_align *p = new AVX2Float_use_existing_align;
+        *p = *p;
+        AVX2Float_use_existing_align r = *p;
+        return r[0];
+}
+
+// CHECK: [[R:%.*]] = alloca <8 x float>, align 32
+// CHECK-NEXT:  [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32)
+// CHECK-NEXT:  [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>*
+// CHECK-NEXT:  store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8
+// CHECK-NEXT:  [[ONE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 32
+// CHECK-NEXT:  [[THREE:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], align 32
+// CHECK-NEXT:  [[FOUR:%.*]] = load <8 x float>** [[P]], align 8
+// CHECK-NEXT:  [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 32
+// CHECK-NEXT:  store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32
+// CHECK-NEXT:  [[SIX:%.*]] = load <8 x float>* [[R]], align 32
+// CHECK-NEXT:  [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0
+// CHECK-NEXT:  ret float [[VECEXT]]
diff --git a/clang/test/Driver/darwin-max-type-align.c b/clang/test/Driver/darwin-max-type-align.c
new file mode 100644
index 0000000..6532f4a
--- /dev/null
+++ b/clang/test/Driver/darwin-max-type-align.c
@@ -0,0 +1,15 @@
+// Check the -fmax-type-align=N flag
+// rdar://16254558
+//
+// RUN: %clang -no-canonical-prefixes -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN:   FileCheck -check-prefix=TEST0 %s
+// TEST0: -fmax-type-align=16
+// RUN: %clang -no-canonical-prefixes -fmax-type-align=32 -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN:   FileCheck -check-prefix=TEST1 %s
+// TEST1: -fmax-type-align=32
+// RUN: %clang -no-canonical-prefixes -fmax-type-align=32 -fno-max-type-align -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN:   FileCheck -check-prefix=TEST2 %s
+// TEST2-NOT: -fmax-type-align
+// RUN: %clang -no-canonical-prefixes -fno-max-type-align -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \
+// RUN:   FileCheck -check-prefix=TEST3 %s
+// TEST3-NOT: -fmax-type-align
diff --git a/clang/test/Driver/rewrite-legacy-objc.m b/clang/test/Driver/rewrite-legacy-objc.m
index 0c6404e..b0b78d0 100644
--- a/clang/test/Driver/rewrite-legacy-objc.m
+++ b/clang/test/Driver/rewrite-legacy-objc.m
@@ -3,11 +3,11 @@
 // TEST0: clang{{.*}}" "-cc1"
 // TEST0: "-rewrite-objc"
 // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead.
-// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fdiagnostics-show-option"
+// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
 // TEST0: rewrite-legacy-objc.m"
 // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.9.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST1 %s
 // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.6.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST2 %s
-// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fdiagnostics-show-option"
-// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fdiagnostics-show-option"
+// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
+// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
diff --git a/clang/test/Driver/rewrite-objc.m b/clang/test/Driver/rewrite-objc.m
index 95db582..ba5f835 100644
--- a/clang/test/Driver/rewrite-objc.m
+++ b/clang/test/Driver/rewrite-objc.m
@@ -3,4 +3,4 @@
 // TEST0: clang{{.*}}" "-cc1"
 // TEST0: "-rewrite-objc"
 // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead.
-// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fdiagnostics-show-option"
+// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"