Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s |
| 2 | |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 3 | ; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd |
Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 4 | |
| 5 | ; CHECK: vextractf128 $0 |
| 6 | ; CHECK-NEXT: punpcklbw |
| 7 | ; CHECK-NEXT: punpckhbw |
Bruno Cardoso Lopes | 65b74e1 | 2011-07-21 01:55:47 +0000 | [diff] [blame] | 8 | ; CHECK-NEXT: vinsertf128 $1 |
| 9 | ; CHECK-NEXT: vpermilps $85 |
| 10 | define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { |
| 11 | entry: |
| 12 | %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 13 | ret <32 x i8> %shuffle |
| 14 | } |
| 15 | |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 16 | ; CHECK: vextractf128 $0 |
| 17 | ; CHECK-NEXT: punpckhwd |
Bruno Cardoso Lopes | dbd4fe2 | 2011-07-21 02:24:08 +0000 | [diff] [blame] | 18 | ; CHECK-NEXT: vinsertf128 $1 |
| 19 | ; CHECK-NEXT: vpermilps $85 |
| 20 | define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { |
| 21 | entry: |
| 22 | %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> |
| 23 | ret <16 x i16> %shuffle |
| 24 | } |
| 25 | |
Bruno Cardoso Lopes | 6a32adc | 2011-07-25 23:05:25 +0000 | [diff] [blame] | 26 | ; CHECK: vmovd |
| 27 | ; CHECK-NEXT: movlhps |
| 28 | ; CHECK-NEXT: vinsertf128 $1 |
| 29 | define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { |
| 30 | entry: |
| 31 | %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 |
| 32 | %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 |
| 33 | %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 |
| 34 | %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 |
| 35 | ret <4 x i64> %vecinit6.i |
| 36 | } |
| 37 | |
| 38 | ; CHECK: vshufpd |
| 39 | ; CHECK-NEXT: vinsertf128 $1 |
| 40 | define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { |
| 41 | entry: |
| 42 | %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 |
| 43 | %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 |
| 44 | %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 |
| 45 | %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 |
| 46 | ret <4 x double> %vecinit6.i |
| 47 | } |
Bruno Cardoso Lopes | ac5f13f | 2011-08-02 16:06:18 +0000 | [diff] [blame^] | 48 | |
| 49 | ; Test this simple opt: |
| 50 | ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> |
| 51 | ; To: |
| 52 | ; shuffle (vload ptr)), undef, <1, 1, 1, 1> |
| 53 | ; CHECK: vmovaps |
| 54 | ; CHECK-NEXT: vpextrd |
| 55 | define void @funcE() nounwind { |
| 56 | allocas: |
| 57 | %udx495 = alloca [18 x [18 x float]], align 32 |
| 58 | br label %for_test505.preheader |
| 59 | |
| 60 | for_test505.preheader: ; preds = %for_test505.preheader, %allocas |
| 61 | br i1 undef, label %for_exit499, label %for_test505.preheader |
| 62 | |
| 63 | for_exit499: ; preds = %for_test505.preheader |
| 64 | br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 |
| 65 | |
| 66 | load.i1247: ; preds = %for_exit499 |
| 67 | %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1 |
| 68 | %ptr.i1237 = bitcast float* %ptr1227 to i32* |
| 69 | %val.i1238 = load i32* %ptr.i1237, align 4 |
| 70 | %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 |
| 71 | %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 |
| 72 | %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> |
| 73 | br label %__load_and_broadcast_32.exit1249 |
| 74 | |
| 75 | __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 |
| 76 | %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] |
| 77 | ret void |
| 78 | } |
| 79 | |