blob: ca35b7f265c591c1b7c35fddfb3ae71efd42b0da [file] [log] [blame]
Bruno Cardoso Lopes65b74e12011-07-21 01:55:47 +00001; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
2
Bruno Cardoso Lopesdbd4fe22011-07-21 02:24:08 +00003; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
Bruno Cardoso Lopes65b74e12011-07-21 01:55:47 +00004
5; CHECK: vextractf128 $0
6; CHECK-NEXT: punpcklbw
7; CHECK-NEXT: punpckhbw
Bruno Cardoso Lopes65b74e12011-07-21 01:55:47 +00008; CHECK-NEXT: vinsertf128 $1
9; CHECK-NEXT: vpermilps $85
10define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
11entry:
12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13 ret <32 x i8> %shuffle
14}
15
Bruno Cardoso Lopesdbd4fe22011-07-21 02:24:08 +000016; CHECK: vextractf128 $0
17; CHECK-NEXT: punpckhwd
Bruno Cardoso Lopesdbd4fe22011-07-21 02:24:08 +000018; CHECK-NEXT: vinsertf128 $1
19; CHECK-NEXT: vpermilps $85
20define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
21entry:
22 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
23 ret <16 x i16> %shuffle
24}
25
Bruno Cardoso Lopes6a32adc2011-07-25 23:05:25 +000026; CHECK: vmovd
27; CHECK-NEXT: movlhps
28; CHECK-NEXT: vinsertf128 $1
29define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
30entry:
31 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
32 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
33 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
34 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
35 ret <4 x i64> %vecinit6.i
36}
37
38; CHECK: vshufpd
39; CHECK-NEXT: vinsertf128 $1
40define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
41entry:
42 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
43 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
44 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
45 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
46 ret <4 x double> %vecinit6.i
47}
Bruno Cardoso Lopesac5f13f2011-08-02 16:06:18 +000048
49; Test this simple opt:
50; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
51; To:
52; shuffle (vload ptr)), undef, <1, 1, 1, 1>
53; CHECK: vmovaps
Bruno Cardoso Lopes5f1d8ab2011-08-11 02:49:44 +000054; CHECK-NEXT: vinsertf128 $1
55; CHECK-NEXT: vpermilps $-1
56define <8 x float> @funcE() nounwind {
Bruno Cardoso Lopesac5f13f2011-08-02 16:06:18 +000057allocas:
58 %udx495 = alloca [18 x [18 x float]], align 32
59 br label %for_test505.preheader
60
61for_test505.preheader: ; preds = %for_test505.preheader, %allocas
62 br i1 undef, label %for_exit499, label %for_test505.preheader
63
64for_exit499: ; preds = %for_test505.preheader
65 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
66
67load.i1247: ; preds = %for_exit499
68 %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
69 %ptr.i1237 = bitcast float* %ptr1227 to i32*
70 %val.i1238 = load i32* %ptr.i1237, align 4
71 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
72 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
73 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
74 br label %__load_and_broadcast_32.exit1249
75
76__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
77 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
Bruno Cardoso Lopes5f1d8ab2011-08-11 02:49:44 +000078 ret <8 x float> %load_broadcast12281250
Bruno Cardoso Lopesac5f13f2011-08-02 16:06:18 +000079}
80
Bruno Cardoso Lopesa5134a02011-08-11 02:49:41 +000081; CHECK: vpshufd $0
82; CHECK-NEXT: vinsertf128 $1
83define <8 x float> @funcF(i32* %ptr) nounwind {
84 %val = load i32* %ptr, align 4
85 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
86 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
87 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
88 ret <8 x float> %tmp
89}
90
Bruno Cardoso Lopes5f1d8ab2011-08-11 02:49:44 +000091; CHECK: vinsertf128 $1
92; CHECK-NEXT: vpermilps $0
93define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
94entry:
95 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
96 ret <8 x float> %shuffle
97}
98
99; CHECK: vextractf128 $1
100; CHECK-NEXT: vinsertf128 $1
101; CHECK-NEXT: vpermilps $85
102define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
103entry:
104 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
105 ret <8 x float> %shuffle
106}
107