Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s |
| 2 | |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 3 | ; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd |
Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 4 | |
| 5 | ; CHECK: vextractf128 $0 |
| 6 | ; CHECK-NEXT: punpcklbw |
| 7 | ; CHECK-NEXT: punpckhbw |
Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 8 | ; CHECK-NEXT: vinsertf128 $1 |
| 9 | ; CHECK-NEXT: vpermilps $85 |
| 10 | define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { |
| 11 | entry: |
| 12 | %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 13 | ret <32 x i8> %shuffle |
| 14 | } |
| 15 | |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 16 | ; CHECK: vextractf128 $0 |
| 17 | ; CHECK-NEXT: punpckhwd |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 18 | ; CHECK-NEXT: vinsertf128 $1 |
| 19 | ; CHECK-NEXT: vpermilps $85 |
| 20 | define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { |
| 21 | entry: |
| 22 | %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 23 | ret <16 x i16> %shuffle |
| 24 | } |
| 25 | |
Bruno Cardoso Lopes | 6a32adc | 2011-07-25 23:05:25 +0000 | [diff] [blame] | 26 | ; CHECK: vmovd |
| 27 | ; CHECK-NEXT: movlhps |
| 28 | ; CHECK-NEXT: vinsertf128 $1 |
| 29 | define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { |
| 30 | entry: |
| 31 | %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 |
| 32 | %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 |
| 33 | %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 |
| 34 | %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 |
| 35 | ret <4 x i64> %vecinit6.i |
| 36 | } |
| 37 | |
| 38 | ; CHECK: vshufpd |
| 39 | ; CHECK-NEXT: vinsertf128 $1 |
| 40 | define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { |
| 41 | entry: |
| 42 | %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 |
| 43 | %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 |
| 44 | %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 |
| 45 | %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 |
| 46 | ret <4 x double> %vecinit6.i |
| 47 | } |
Bruno Cardoso Lopes | ac5f13f | 2011-08-02 16:06:18 +0000 | [diff] [blame] | 48 | |
| 49 | ; Test this simple opt: |
| 50 | ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> |
| 51 | ; To: |
| 52 | ; shuffle (vload ptr)), undef, <1, 1, 1, 1> |
| 53 | ; CHECK: vmovaps |
| 54 | ; CHECK-NEXT: vpextrd |
| 55 | define void @funcE() nounwind { |
| 56 | allocas: |
| 57 | %udx495 = alloca [18 x [18 x float]], align 32 |
| 58 | br label %for_test505.preheader |
| 59 | |
| 60 | for_test505.preheader: ; preds = %for_test505.preheader, %allocas |
| 61 | br i1 undef, label %for_exit499, label %for_test505.preheader |
| 62 | |
| 63 | for_exit499: ; preds = %for_test505.preheader |
| 64 | br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 |
| 65 | |
| 66 | load.i1247: ; preds = %for_exit499 |
| 67 | %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 |
| 68 | %ptr.i1237 = bitcast float* %ptr1227 to i32* |
| 69 | %val.i1238 = load i32* %ptr.i1237, align 4 |
| 70 | %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 |
| 71 | %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 |
| 72 | %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> |
| 73 | br label %__load_and_broadcast_32.exit1249 |
| 74 | |
| 75 | __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 |
| 76 | %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] |
| 77 | ret void |
| 78 | } |
| 79 | |
Bruno Cardoso Lopes | a5134a0 | 2011-08-11 02:49:41 +0000 | [diff] [blame^] | 80 | ; CHECK: vpshufd $0 |
| 81 | ; CHECK-NEXT: vinsertf128 $1 |
| 82 | define <8 x float> @funcF(i32* %ptr) nounwind { |
| 83 | %val = load i32* %ptr, align 4 |
| 84 | %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 |
| 85 | %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 |
| 86 | %tmp = bitcast <8 x i32> %ret7 to <8 x float> |
| 87 | ret <8 x float> %tmp |
| 88 | } |
| 89 | |