Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s |
| 2 | |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 3 | ; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd |
Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 4 | |
| 5 | ; CHECK: vextractf128 $0 |
| 6 | ; CHECK-NEXT: punpcklbw |
| 7 | ; CHECK-NEXT: punpckhbw |
Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 8 | ; CHECK-NEXT: vinsertf128 $1 |
| 9 | ; CHECK-NEXT: vpermilps $85 |
| 10 | define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { |
| 11 | entry: |
| 12 | %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 13 | ret <32 x i8> %shuffle |
| 14 | } |
| 15 | |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 16 | ; CHECK: vextractf128 $0 |
| 17 | ; CHECK-NEXT: punpckhwd |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 18 | ; CHECK-NEXT: vinsertf128 $1 |
| 19 | ; CHECK-NEXT: vpermilps $85 |
| 20 | define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { |
| 21 | entry: |
| 22 | %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 23 | ret <16 x i16> %shuffle |
| 24 | } |
| 25 | |
Bruno Cardoso Lopes | 6a32adc | 2011-07-25 23:05:25 +0000 | [diff] [blame] | 26 | ; CHECK: vmovd |
Bruno Cardoso Lopes | 6a32adc | 2011-07-25 23:05:25 +0000 | [diff] [blame] | 27 | ; CHECK-NEXT: vinsertf128 $1 |
Bruno Cardoso Lopes | fc0a702 | 2011-08-17 02:29:10 +0000 | [diff] [blame^] | 28 | ; CHECK-NEXT: vpermilps $0 |
Bruno Cardoso Lopes | 6a32adc | 2011-07-25 23:05:25 +0000 | [diff] [blame] | 29 | define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { |
| 30 | entry: |
| 31 | %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 |
| 32 | %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 |
| 33 | %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 |
| 34 | %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 |
| 35 | ret <4 x i64> %vecinit6.i |
| 36 | } |
| 37 | |
Bruno Cardoso Lopes | fc0a702 | 2011-08-17 02:29:10 +0000 | [diff] [blame^] | 38 | ; CHECK: vinsertf128 $1 |
| 39 | ; CHECK-NEXT: vpermilps $0 |
Bruno Cardoso Lopes | 6a32adc | 2011-07-25 23:05:25 +0000 | [diff] [blame] | 40 | define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { |
| 41 | entry: |
| 42 | %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 |
| 43 | %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 |
| 44 | %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 |
| 45 | %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 |
| 46 | ret <4 x double> %vecinit6.i |
| 47 | } |
Bruno Cardoso Lopes | ac5f13f | 2011-08-02 16:06:18 +0000 | [diff] [blame] | 48 | |
| 49 | ; Test this simple opt: |
| 50 | ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> |
| 51 | ; To: |
| 52 | ; shuffle (vload ptr)), undef, <1, 1, 1, 1> |
| 53 | ; CHECK: vmovaps |
Bruno Cardoso Lopes | 5f1d8ab | 2011-08-11 02:49:44 +0000 | [diff] [blame] | 54 | ; CHECK-NEXT: vinsertf128 $1 |
| 55 | ; CHECK-NEXT: vpermilps $-1 |
| 56 | define <8 x float> @funcE() nounwind { |
Bruno Cardoso Lopes | ac5f13f | 2011-08-02 16:06:18 +0000 | [diff] [blame] | 57 | allocas: |
| 58 | %udx495 = alloca [18 x [18 x float]], align 32 |
| 59 | br label %for_test505.preheader |
| 60 | |
| 61 | for_test505.preheader: ; preds = %for_test505.preheader, %allocas |
| 62 | br i1 undef, label %for_exit499, label %for_test505.preheader |
| 63 | |
| 64 | for_exit499: ; preds = %for_test505.preheader |
| 65 | br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 |
| 66 | |
| 67 | load.i1247: ; preds = %for_exit499 |
| 68 | %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 |
| 69 | %ptr.i1237 = bitcast float* %ptr1227 to i32* |
| 70 | %val.i1238 = load i32* %ptr.i1237, align 4 |
| 71 | %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 |
| 72 | %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 |
| 73 | %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> |
| 74 | br label %__load_and_broadcast_32.exit1249 |
| 75 | |
| 76 | __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 |
| 77 | %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] |
Bruno Cardoso Lopes | 5f1d8ab | 2011-08-11 02:49:44 +0000 | [diff] [blame] | 78 | ret <8 x float> %load_broadcast12281250 |
Bruno Cardoso Lopes | ac5f13f | 2011-08-02 16:06:18 +0000 | [diff] [blame] | 79 | } |
| 80 | |
Bruno Cardoso Lopes | fc0a702 | 2011-08-17 02:29:10 +0000 | [diff] [blame^] | 81 | ; CHECK: vinsertf128 $1 |
| 82 | ; CHECK-NEXT: vpermilps $0 |
Bruno Cardoso Lopes | a5134a0 | 2011-08-11 02:49:41 +0000 | [diff] [blame] | 83 | define <8 x float> @funcF(i32* %ptr) nounwind { |
| 84 | %val = load i32* %ptr, align 4 |
| 85 | %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 |
| 86 | %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 |
| 87 | %tmp = bitcast <8 x i32> %ret7 to <8 x float> |
| 88 | ret <8 x float> %tmp |
| 89 | } |
| 90 | |
Bruno Cardoso Lopes | 5f1d8ab | 2011-08-11 02:49:44 +0000 | [diff] [blame] | 91 | ; CHECK: vinsertf128 $1 |
| 92 | ; CHECK-NEXT: vpermilps $0 |
| 93 | define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { |
| 94 | entry: |
| 95 | %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> |
| 96 | ret <8 x float> %shuffle |
| 97 | } |
| 98 | |
| 99 | ; CHECK: vextractf128 $1 |
| 100 | ; CHECK-NEXT: vinsertf128 $1 |
| 101 | ; CHECK-NEXT: vpermilps $85 |
| 102 | define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { |
| 103 | entry: |
| 104 | %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 105 | ret <8 x float> %shuffle |
| 106 | } |
| 107 | |