prevent folding a scalar FP load into a packed logical FP instruction (PR22371) Change the memory operands in sse12_fp_packed_scalar_logical_alias from scalars to vectors. That's what the hardware packed logical FP instructions define: 128-bit memory operands. There are no scalar versions of these instructions...because this is x86. Generating the wrong code (folding a scalar load into a 128-bit load) is still possible using the peephole optimization pass and the load folding tables. We won't completely solve this bug until we either fix the lowering in fabs/fneg/fcopysign and any other places where scalar FP logic is created or fix the load folding in foldMemoryOperandImpl() to make sure it isn't changing the size of the load. Differential Revision: http://reviews.llvm.org/D7474 llvm-svn: 229531

commit: b811c1d6a57beae697be440144b9f8b8b33e0605 [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Tue Feb 17 20:08:21 2015 +0000
committer: Sanjay Patel <spatel@rotateright.com> Tue Feb 17 20:08:21 2015 +0000
tree: 249a0045df268a18224ea5052220b26ab54084b5
parent: 127b6c3ba7fef9c0b39dac4c343d14eeba2c53d2 [diff]
diff --git a/llvm/test/CodeGen/X86/logical-load-fold.ll b/llvm/test/CodeGen/X86/logical-load-fold.ll
new file mode 100644
index 0000000..6051a5e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/logical-load-fold.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=sse2,sse-unaligned-mem | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mcpu=x86-64 -mattr=avx                    | FileCheck %s --check-prefix=AVX
+
+; Although we have the ability to fold an unaligned load with AVX 
+; and under special conditions with some SSE implementations, we
+; can not fold the load under any circumstances in these test
+; cases because they are not 16-byte loads. The load must be
+; executed as a scalar ('movs*') with a zero extension to
+; 128-bits and then used in the packed logical ('andp*') op. 
+; PR22371 - http://llvm.org/bugs/show_bug.cgi?id=22371
+
+define double @load_double_no_fold(double %x, double %y) {
+; SSE2-LABEL: load_double_no_fold:
+; SSE2:       ## BB#0:
+; SSE2-NEXT:    cmplesd %xmm0, %xmm1
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    andpd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: load_double_no_fold:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vcmplesd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+
+  %cmp = fcmp oge double %x, %y
+  %zext = zext i1 %cmp to i32
+  %conv = sitofp i32 %zext to double
+  ret double %conv
+}
+
+define float @load_float_no_fold(float %x, float %y) {
+; SSE2-LABEL: load_float_no_fold:
+; SSE2:       ## BB#0:
+; SSE2-NEXT:    cmpless %xmm0, %xmm1
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: load_float_no_fold:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vcmpless %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+
+  %cmp = fcmp oge float %x, %y
+  %zext = zext i1 %cmp to i32
+  %conv = sitofp i32 %zext to float
+  ret float %conv
+}
+

diff --git a/llvm/test/CodeGen/X86/stack-align.ll b/llvm/test/CodeGen/X86/stack-align.ll
index eafb7c2..74f4c78 100644
--- a/llvm/test/CodeGen/X86/stack-align.ll
+++ b/llvm/test/CodeGen/X86/stack-align.ll

@@ -1,7 +1,10 @@
 ; RUN: llc < %s -relocation-model=static -mcpu=yonah | FileCheck %s
 
-; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
-; fold the load into the andpd.
+; The double argument is at 4(esp) which is 16-byte aligned, but we
+; are required to read in extra bytes of memory in order to fold the
+; load. Bad Things may happen when reading/processing undefined bytes,
+; so don't fold the load.
+; PR22371 / http://reviews.llvm.org/D7474
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
@@ -15,22 +18,31 @@
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 ) readnone	; <double> [#uses=1]
-    ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store volatile double %tmp6, double* %P, align 8
 	ret void
+
+; CHECK-LABEL: test:
+; CHECK:       movsd	{{.*}}G, %xmm{{.*}}
+; CHECK:       andpd	%xmm{{.*}}, %xmm{{.*}}
+; CHECK:       movsd	4(%esp), %xmm{{.*}}
+; CHECK:       andpd	%xmm{{.*}}, %xmm{{.*}}
+
+
 }
 
 define void @test2() alignstack(16) nounwind {
 entry:
-    ; CHECK: andl{{.*}}$-16, %esp
+; CHECK-LABEL: test2:
+; CHECK: andl{{.*}}$-16, %esp
     ret void
 }
 
 ; Use a call to force a spill.
 define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
 entry:
-    ; CHECK: andl{{.*}}$-32, %esp
+; CHECK-LABEL: test3:
+; CHECK: andl{{.*}}$-32, %esp
     call void @test2()
     %A = fmul <2 x double> %x, %y
     ret <2 x double> %A
commit	b811c1d6a57beae697be440144b9f8b8b33e0605	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Tue Feb 17 20:08:21 2015 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Tue Feb 17 20:08:21 2015 +0000
tree	249a0045df268a18224ea5052220b26ab54084b5
parent	127b6c3ba7fef9c0b39dac4c343d14eeba2c53d2 [diff]