Implement initial memory alignment awareness for SSE instructions. Vector loads
and stores that have a specified alignment of less than 16 bytes now use
instructions that support misaligned memory references.

llvm-svn: 40015
diff --git a/llvm/test/CodeGen/X86/sse-align-0.ll b/llvm/test/CodeGen/X86/sse-align-0.ll
new file mode 100644
index 0000000..0b28067
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-0.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | not grep mov
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x)
+{
+  %t = load <4 x float>* %p
+  %z = mul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x)
+{
+  %t = load <2 x double>* %p
+  %z = mul <2 x double> %t, %x
+  ret <2 x double> %z
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-1.ll b/llvm/test/CodeGen/X86/sse-align-1.ll
new file mode 100644
index 0000000..1841535
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-1.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | wc -l | grep 2
+
+define <4 x float> @foo(<4 x float>* %p)
+{
+  %t = load <4 x float>* %p
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p)
+{
+  %t = load <2 x double>* %p
+  ret <2 x double> %t
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-10.ll b/llvm/test/CodeGen/X86/sse-align-10.ll
new file mode 100644
index 0000000..e94c090
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-10.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movdqu | wc -l | grep 1
+
+define <2 x i64> @bar(<2 x i64>* %p)
+{
+  %t = load <2 x i64>* %p, align 8
+  ret <2 x i64> %t
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-2.ll b/llvm/test/CodeGen/X86/sse-align-2.ll
new file mode 100644
index 0000000..b5c0674
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-2.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | wc -l | grep 2
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x)
+{
+  %t = load <4 x float>* %p, align 4
+  %z = mul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x)
+{
+  %t = load <2 x double>* %p, align 8
+  %z = mul <2 x double> %t, %x
+  ret <2 x double> %z
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-3.ll b/llvm/test/CodeGen/X86/sse-align-3.ll
new file mode 100644
index 0000000..6f96bba
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-3.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movap | wc -l | grep 2
+
+define void @foo(<4 x float>* %p, <4 x float> %x)
+{
+  store <4 x float> %x, <4 x float>* %p
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x)
+{
+  store <2 x double> %x, <2 x double>* %p
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-4.ll b/llvm/test/CodeGen/X86/sse-align-4.ll
new file mode 100644
index 0000000..4bf83b3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-4.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | wc -l | grep 2
+
+define void @foo(<4 x float>* %p, <4 x float> %x)
+{
+  store <4 x float> %x, <4 x float>* %p, align 4
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x)
+{
+  store <2 x double> %x, <2 x double>* %p, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-5.ll b/llvm/test/CodeGen/X86/sse-align-5.ll
new file mode 100644
index 0000000..48c568b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-5.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movdqa | wc -l | grep 1
+
+define <2 x i64> @bar(<2 x i64>* %p)
+{
+  %t = load <2 x i64>* %p
+  ret <2 x i64> %t
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-6.ll b/llvm/test/CodeGen/X86/sse-align-6.ll
new file mode 100644
index 0000000..9b1b781
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-6.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movdqu | wc -l | grep 1
+
+define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x)
+{
+  %t = load <2 x i64>* %p, align 8
+  %z = mul <2 x i64> %t, %x
+  ret <2 x i64> %z
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-7.ll b/llvm/test/CodeGen/X86/sse-align-7.ll
new file mode 100644
index 0000000..04b013c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-7.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movdqa | wc -l | grep 1
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x)
+{
+  store <2 x i64> %x, <2 x i64>* %p
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-8.ll b/llvm/test/CodeGen/X86/sse-align-8.ll
new file mode 100644
index 0000000..14fc76c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-8.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movdqu | wc -l | grep 1
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x)
+{
+  store <2 x i64> %x, <2 x i64>* %p, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/sse-align-9.ll b/llvm/test/CodeGen/X86/sse-align-9.ll
new file mode 100644
index 0000000..fb4fe92
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-align-9.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | wc -l | grep 2
+
+define <4 x float> @foo(<4 x float>* %p)
+{
+  %t = load <4 x float>* %p, align 4
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p)
+{
+  %t = load <2 x double>* %p, align 8
+  ret <2 x double> %t
+}