llvm/test/CodeGen/X86/fold-and-shift.ll - toolchain/llvm-project - Gitiles

 ; RUN: llc < %s -march=x86 | FileCheck %s

 define i32 @t1(i8* %X, i32 %i) {
 ; CHECK-LABEL: t1:
 ; CHECK-NOT: and
 ; CHECK: movzbl
 ; CHECK: movl (%{{...}},%{{...}},4),
 ; CHECK: ret

 entry:
   %tmp2 = shl i32 %i, 2
   %tmp4 = and i32 %tmp2, 1020
   %tmp7 = getelementptr i8, i8* %X, i32 %tmp4
   %tmp78 = bitcast i8* %tmp7 to i32*
   %tmp9 = load i32, i32* %tmp78
   ret i32 %tmp9
 }

 define i32 @t2(i16* %X, i32 %i) {
 ; CHECK-LABEL: t2:
 ; CHECK-NOT: and
 ; CHECK: movzwl
 ; CHECK: movl (%{{...}},%{{...}},4),
 ; CHECK: ret

 entry:
   %tmp2 = shl i32 %i, 1
   %tmp4 = and i32 %tmp2, 131070
   %tmp7 = getelementptr i16, i16* %X, i32 %tmp4
   %tmp78 = bitcast i16* %tmp7 to i32*
   %tmp9 = load i32, i32* %tmp78
   ret i32 %tmp9
 }

 define i32 @t3(i16* %i.ptr, i32* %arr) {
 ; This case is tricky. The lshr followed by a gep will produce a lshr followed
 ; by an and to remove the low bits. This can be simplified by doing the lshr by
 ; a greater constant and using the addressing mode to scale the result back up.
 ; To make matters worse, because of the two-phase zext of %i and their reuse in
 ; the function, the DAG can get confusing trying to re-use both of them and
 ; prevent easy analysis of the mask in order to match this.
 ; CHECK-LABEL: t3:
 ; CHECK-NOT: and
 ; CHECK: shrl
 ; CHECK: addl (%{{...}},%{{...}},4),
 ; CHECK: ret

 entry:
   %i = load i16, i16* %i.ptr
   %i.zext = zext i16 %i to i32
   %index = lshr i32 %i.zext, 11
   %val.ptr = getelementptr inbounds i32, i32* %arr, i32 %index
   %val = load i32, i32* %val.ptr
   %sum = add i32 %val, %i.zext
   ret i32 %sum
 }

 define i32 @t4(i16* %i.ptr, i32* %arr) {
 ; A version of @t3 that has more zero extends and more re-use of intermediate
 ; values. This exercise slightly different bits of canonicalization.
 ; CHECK-LABEL: t4:
 ; CHECK-NOT: and
 ; CHECK: shrl
 ; CHECK: addl (%{{...}},%{{...}},4),
 ; CHECK: ret

 entry:
   %i = load i16, i16* %i.ptr
   %i.zext = zext i16 %i to i32
   %index = lshr i32 %i.zext, 11
   %index.zext = zext i32 %index to i64
   %val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
   %val = load i32, i32* %val.ptr
   %sum.1 = add i32 %val, %i.zext
   %sum.2 = add i32 %sum.1, %index
   ret i32 %sum.2
 }
	; RUN: llc < %s -march=x86 \| FileCheck %s

	define i32 @t1(i8* %X, i32 %i) {
	; CHECK-LABEL: t1:
	; CHECK-NOT: and
	; CHECK: movzbl
	; CHECK: movl (%{{...}},%{{...}},4),
	; CHECK: ret

	entry:
	%tmp2 = shl i32 %i, 2
	%tmp4 = and i32 %tmp2, 1020
	%tmp7 = getelementptr i8, i8* %X, i32 %tmp4
	%tmp78 = bitcast i8* %tmp7 to i32*
	%tmp9 = load i32, i32* %tmp78
	ret i32 %tmp9
	}

	define i32 @t2(i16* %X, i32 %i) {
	; CHECK-LABEL: t2:
	; CHECK-NOT: and
	; CHECK: movzwl
	; CHECK: movl (%{{...}},%{{...}},4),
	; CHECK: ret

	entry:
	%tmp2 = shl i32 %i, 1
	%tmp4 = and i32 %tmp2, 131070
	%tmp7 = getelementptr i16, i16* %X, i32 %tmp4
	%tmp78 = bitcast i16* %tmp7 to i32*
	%tmp9 = load i32, i32* %tmp78
	ret i32 %tmp9
	}

	define i32 @t3(i16* %i.ptr, i32* %arr) {
	; This case is tricky. The lshr followed by a gep will produce a lshr followed
	; by an and to remove the low bits. This can be simplified by doing the lshr by
	; a greater constant and using the addressing mode to scale the result back up.
	; To make matters worse, because of the two-phase zext of %i and their reuse in
	; the function, the DAG can get confusing trying to re-use both of them and
	; prevent easy analysis of the mask in order to match this.
	; CHECK-LABEL: t3:
	; CHECK-NOT: and
	; CHECK: shrl
	; CHECK: addl (%{{...}},%{{...}},4),
	; CHECK: ret

	entry:
	%i = load i16, i16* %i.ptr
	%i.zext = zext i16 %i to i32
	%index = lshr i32 %i.zext, 11
	%val.ptr = getelementptr inbounds i32, i32* %arr, i32 %index
	%val = load i32, i32* %val.ptr
	%sum = add i32 %val, %i.zext
	ret i32 %sum
	}

	define i32 @t4(i16* %i.ptr, i32* %arr) {
	; A version of @t3 that has more zero extends and more re-use of intermediate
	; values. This exercise slightly different bits of canonicalization.
	; CHECK-LABEL: t4:
	; CHECK-NOT: and
	; CHECK: shrl
	; CHECK: addl (%{{...}},%{{...}},4),
	; CHECK: ret

	entry:
	%i = load i16, i16* %i.ptr
	%i.zext = zext i16 %i to i32
	%index = lshr i32 %i.zext, 11
	%index.zext = zext i32 %index to i64
	%val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
	%val = load i32, i32* %val.ptr
	%sum.1 = add i32 %val, %i.zext
	%sum.2 = add i32 %sum.1, %index
	ret i32 %sum.2
	}