Dan Gohman | 0a06310 | 2009-09-08 23:54:48 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s |
Evan Cheng | 3b57033 | 2009-07-16 18:44:05 +0000 | [diff] [blame] | 2 | |
| 3 | ; CHECK: _foo: |
| 4 | ; CHECK: pavgw LCPI1_4(%rip) |
| 5 | |
| 6 | ; rdar://7057804 |
| 7 | |
| 8 | define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp { |
| 9 | entry: |
| 10 | %0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 11 | %1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 12 | %2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 13 | %3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 14 | %tmp.i.i10 = add <8 x i16> %0, %3 ; <<8 x i16>> [#uses=1] |
| 15 | %4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 16 | %5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone ; <<8 x i16>> [#uses=3] |
| 17 | %6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 18 | %7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 19 | %8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 20 | %9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 21 | %10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 22 | %tmp.i.i8 = add <8 x i16> %7, %10 ; <<8 x i16>> [#uses=1] |
| 23 | %11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 24 | %12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 25 | %13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 26 | %14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 27 | %15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 28 | %16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 29 | %17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 30 | %18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 31 | %19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 32 | %20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=4] |
| 33 | %21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 34 | %22 = bitcast <8 x i16> %21 to <2 x i64> ; <<2 x i64>> [#uses=1] |
| 35 | %23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 36 | %24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 37 | %25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 38 | %26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 39 | %tmp.i.i6 = add <8 x i16> %23, %26 ; <<8 x i16>> [#uses=1] |
| 40 | %27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 41 | %28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 42 | %29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 43 | %30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 44 | %31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 45 | %32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 46 | %tmp.i.i4 = add <8 x i16> %29, %32 ; <<8 x i16>> [#uses=1] |
| 47 | %33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 48 | %34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 49 | %35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 50 | %tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170> ; <<8 x i16>> [#uses=1] |
| 51 | %36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 52 | %37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 53 | %38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 54 | %tmp.i.i2 = add <8 x i16> %35, %38 ; <<8 x i16>> [#uses=1] |
| 55 | %39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 56 | %40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 57 | %41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone ; <<8 x i16>> [#uses=2] |
| 58 | %tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170> ; <<8 x i16>> [#uses=1] |
| 59 | %42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 60 | %43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 61 | %44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 62 | %tmp.i.i = add <8 x i16> %41, %44 ; <<8 x i16>> [#uses=1] |
| 63 | %45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 64 | %46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 65 | %47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone ; <<8 x i16>> [#uses=1] |
| 66 | %48 = bitcast <8 x i16> %47 to <2 x i64> ; <<2 x i64>> [#uses=1] |
| 67 | %49 = bitcast <8 x i16> %28 to <2 x i64> ; <<2 x i64>> [#uses=1] |
| 68 | %50 = getelementptr i16* %out8x8, i64 8 ; <i16*> [#uses=1] |
| 69 | %51 = bitcast i16* %50 to <2 x i64>* ; <<2 x i64>*> [#uses=1] |
| 70 | store <2 x i64> %49, <2 x i64>* %51, align 16 |
| 71 | %52 = bitcast <8 x i16> %40 to <2 x i64> ; <<2 x i64>> [#uses=1] |
| 72 | %53 = getelementptr i16* %out8x8, i64 16 ; <i16*> [#uses=1] |
| 73 | %54 = bitcast i16* %53 to <2 x i64>* ; <<2 x i64>*> [#uses=1] |
| 74 | store <2 x i64> %52, <2 x i64>* %54, align 16 |
| 75 | %55 = getelementptr i16* %out8x8, i64 24 ; <i16*> [#uses=1] |
| 76 | %56 = bitcast i16* %55 to <2 x i64>* ; <<2 x i64>*> [#uses=1] |
| 77 | store <2 x i64> %48, <2 x i64>* %56, align 16 |
| 78 | %57 = bitcast <8 x i16> %46 to <2 x i64> ; <<2 x i64>> [#uses=1] |
| 79 | %58 = getelementptr i16* %out8x8, i64 40 ; <i16*> [#uses=1] |
| 80 | %59 = bitcast i16* %58 to <2 x i64>* ; <<2 x i64>*> [#uses=1] |
| 81 | store <2 x i64> %57, <2 x i64>* %59, align 16 |
| 82 | %60 = bitcast <8 x i16> %34 to <2 x i64> ; <<2 x i64>> [#uses=1] |
| 83 | %61 = getelementptr i16* %out8x8, i64 48 ; <i16*> [#uses=1] |
| 84 | %62 = bitcast i16* %61 to <2 x i64>* ; <<2 x i64>*> [#uses=1] |
| 85 | store <2 x i64> %60, <2 x i64>* %62, align 16 |
| 86 | %63 = getelementptr i16* %out8x8, i64 56 ; <i16*> [#uses=1] |
| 87 | %64 = bitcast i16* %63 to <2 x i64>* ; <<2 x i64>*> [#uses=1] |
| 88 | store <2 x i64> %22, <2 x i64>* %64, align 16 |
| 89 | ret void |
| 90 | } |
| 91 | |
| 92 | declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone |
| 93 | |
| 94 | declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone |
| 95 | |
| 96 | declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone |
| 97 | |
| 98 | declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone |
| 99 | |
| 100 | declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone |
| 101 | |
| 102 | declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone |