Jakob Stoklund Olesen | 6660ed5 | 2012-06-08 23:15:12 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -march=x86 -mattr=+mmx | grep esi |
Evan Cheng | 530d47f | 2008-02-22 20:38:49 +0000 | [diff] [blame] | 2 | ; PR2082 |
| 3 | ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of |
| 4 | ; registers. |
| 5 | define void @transpose4x4(i8* %dst, i8* %src, i32 %dst_stride, i32 %src_stride) { |
| 6 | entry: |
| 7 | %dst_addr = alloca i8* ; <i8**> [#uses=5] |
| 8 | %src_addr = alloca i8* ; <i8**> [#uses=5] |
| 9 | %dst_stride_addr = alloca i32 ; <i32*> [#uses=4] |
| 10 | %src_stride_addr = alloca i32 ; <i32*> [#uses=4] |
| 11 | %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] |
| 12 | store i8* %dst, i8** %dst_addr |
| 13 | store i8* %src, i8** %src_addr |
| 14 | store i32 %dst_stride, i32* %dst_stride_addr |
| 15 | store i32 %src_stride, i32* %src_stride_addr |
| 16 | %tmp = load i8** %dst_addr, align 4 ; <i8*> [#uses=1] |
| 17 | %tmp1 = getelementptr i8* %tmp, i32 0 ; <i8*> [#uses=1] |
| 18 | %tmp12 = bitcast i8* %tmp1 to i32* ; <i32*> [#uses=1] |
| 19 | %tmp3 = load i8** %dst_addr, align 4 ; <i8*> [#uses=1] |
| 20 | %tmp4 = load i32* %dst_stride_addr, align 4 ; <i32> [#uses=1] |
| 21 | %tmp5 = getelementptr i8* %tmp3, i32 %tmp4 ; <i8*> [#uses=1] |
| 22 | %tmp56 = bitcast i8* %tmp5 to i32* ; <i32*> [#uses=1] |
| 23 | %tmp7 = load i32* %dst_stride_addr, align 4 ; <i32> [#uses=1] |
| 24 | %tmp8 = mul i32 %tmp7, 2 ; <i32> [#uses=1] |
| 25 | %tmp9 = load i8** %dst_addr, align 4 ; <i8*> [#uses=1] |
| 26 | %tmp10 = getelementptr i8* %tmp9, i32 %tmp8 ; <i8*> [#uses=1] |
| 27 | %tmp1011 = bitcast i8* %tmp10 to i32* ; <i32*> [#uses=1] |
| 28 | %tmp13 = load i32* %dst_stride_addr, align 4 ; <i32> [#uses=1] |
| 29 | %tmp14 = mul i32 %tmp13, 3 ; <i32> [#uses=1] |
| 30 | %tmp15 = load i8** %dst_addr, align 4 ; <i8*> [#uses=1] |
| 31 | %tmp16 = getelementptr i8* %tmp15, i32 %tmp14 ; <i8*> [#uses=1] |
| 32 | %tmp1617 = bitcast i8* %tmp16 to i32* ; <i32*> [#uses=1] |
| 33 | %tmp18 = load i8** %src_addr, align 4 ; <i8*> [#uses=1] |
| 34 | %tmp19 = getelementptr i8* %tmp18, i32 0 ; <i8*> [#uses=1] |
| 35 | %tmp1920 = bitcast i8* %tmp19 to i32* ; <i32*> [#uses=1] |
| 36 | %tmp21 = load i8** %src_addr, align 4 ; <i8*> [#uses=1] |
| 37 | %tmp22 = load i32* %src_stride_addr, align 4 ; <i32> [#uses=1] |
| 38 | %tmp23 = getelementptr i8* %tmp21, i32 %tmp22 ; <i8*> [#uses=1] |
| 39 | %tmp2324 = bitcast i8* %tmp23 to i32* ; <i32*> [#uses=1] |
| 40 | %tmp25 = load i32* %src_stride_addr, align 4 ; <i32> [#uses=1] |
| 41 | %tmp26 = mul i32 %tmp25, 2 ; <i32> [#uses=1] |
| 42 | %tmp27 = load i8** %src_addr, align 4 ; <i8*> [#uses=1] |
| 43 | %tmp28 = getelementptr i8* %tmp27, i32 %tmp26 ; <i8*> [#uses=1] |
| 44 | %tmp2829 = bitcast i8* %tmp28 to i32* ; <i32*> [#uses=1] |
| 45 | %tmp30 = load i32* %src_stride_addr, align 4 ; <i32> [#uses=1] |
| 46 | %tmp31 = mul i32 %tmp30, 3 ; <i32> [#uses=1] |
| 47 | %tmp32 = load i8** %src_addr, align 4 ; <i8*> [#uses=1] |
| 48 | %tmp33 = getelementptr i8* %tmp32, i32 %tmp31 ; <i8*> [#uses=1] |
| 49 | %tmp3334 = bitcast i8* %tmp33 to i32* ; <i32*> [#uses=1] |
| 50 | call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind |
| 51 | br label %return |
| 52 | |
| 53 | return: ; preds = %entry |
| 54 | ret void |
| 55 | } |