implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44656 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
new file mode 100644
index 0000000..543bb22
--- /dev/null
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc | not grep shrl
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@array = weak global [4 x i32] zeroinitializer ; <[4 x i32]*> [#uses=1]
+
+define i32 @foo(i32 %x) {
+entry:
+ %tmp2 = lshr i32 %x, 2 ; <i32> [#uses=1]
+ %tmp3 = and i32 %tmp2, 3 ; <i32> [#uses=1]
+ %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3 ; <i32*> [#uses=1]
+ %tmp5 = load i32* %tmp4, align 4 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+