blob: c7acb27cab741be037bc0685fe4920b2bcbd7693 [file] [log] [blame]
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +00001// Test host codegen.
2// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
3// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
5// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
6// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
8
9// Test target codegen - host bc file has to be created first.
10// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
11// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
12// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
13// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
14// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
15// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
16// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
17// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
18
19// expected-no-diagnostics
20#ifndef HEADER
21#define HEADER
22
23// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
24// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
25// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
26
27// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
28// CHECK-DAG: [[S1:%.+]] = type { double }
29// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
30// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
31// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
32
33// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
34
35// We have 8 target regions, but only 7 that actually will generate offloading
36// code, only 6 will have mapped arguments, and only 4 have all-constant map
37// sizes.
38
39// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2]
40// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288]
41// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
42// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288]
43// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 35, i32 288, i32 35, i32 35, i32 288, i32 288, i32 35, i32 35]
44// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
45// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 35]
46// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
47// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 35]
48// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 35, i32 288, i32 288, i32 288, i32 35]
49// CHECK-DAG: @{{.*}} = private constant i8 0
50// CHECK-DAG: @{{.*}} = private constant i8 0
51// CHECK-DAG: @{{.*}} = private constant i8 0
52// CHECK-DAG: @{{.*}} = private constant i8 0
53// CHECK-DAG: @{{.*}} = private constant i8 0
54// CHECK-DAG: @{{.*}} = private constant i8 0
55// CHECK-DAG: @{{.*}} = private constant i8 0
56
57// TCHECK: @{{.+}} = constant [[ENTTY]]
58// TCHECK: @{{.+}} = constant [[ENTTY]]
59// TCHECK: @{{.+}} = constant [[ENTTY]]
60// TCHECK: @{{.+}} = constant [[ENTTY]]
61// TCHECK: @{{.+}} = constant [[ENTTY]]
62// TCHECK: @{{.+}} = constant [[ENTTY]]
63// TCHECK: @{{.+}} = constant [[ENTTY]]
64// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
65
66// Check if offloading descriptor is created.
67// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
68// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
69// CHECK: [[DEVBEGIN:@.+]] = external constant i8
70// CHECK: [[DEVEND:@.+]] = external constant i8
71// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
72// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
73
74// Check target registration is registered as a Ctor.
75// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
76
77
78template<typename tx, typename ty>
79struct TT{
80 tx X;
81 ty Y;
82};
83
84// CHECK: define {{.*}}[[FOO:@.+]](
85int foo(int n) {
86 int a = 0;
87 short aa = 0;
88 float b[10];
89 float bn[n];
90 double c[5][10];
91 double cn[5][n];
92 TT<long long, char> d;
93
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +000094 // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +000095 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
96 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
97 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
98 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
99 // CHECK: [[FAIL]]
100 // CHECK: call void [[HVT0:@.+]]()
101 // CHECK-NEXT: br label %[[END]]
102 // CHECK: [[END]]
103 #pragma omp target parallel
104 {
105 }
106
107 // CHECK: store i32 0, i32* [[RHV:%.+]], align 4
108 // CHECK: store i32 -1, i32* [[RHV]], align 4
109 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
110 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
111 // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
112 #pragma omp target parallel if(target: 0)
113 {
114 a += 1;
115 }
116
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +0000117 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000118 // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
119 // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
120 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
121 // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
122 // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
123 // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
124 // CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
125 // CHECK-DAG: [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
126
127 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
128 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
129 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
130 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
131 // CHECK: [[FAIL]]
132 // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}})
133 // CHECK-NEXT: br label %[[END]]
134 // CHECK: [[END]]
135 #pragma omp target parallel if(target: 1)
136 {
137 aa += 1;
138 }
139
140 // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
141 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
142 // CHECK: [[IFTHEN]]
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +0000143 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000144 // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
145 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
146
147 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
148 // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
149 // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
150 // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
151 // CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
152 // CHECK-DAG: [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
153
154 // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
155 // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
156 // CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
157 // CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
158 // CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] %{{.+}} to i8*
159 // CHECK-DAG: [[P1]] = inttoptr i[[SZ]] %{{.+}} to i8*
160 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
161 // CHECK-NEXT: br label %[[IFEND:.+]]
162
163 // CHECK: [[IFELSE]]
164 // CHECK: store i32 -1, i32* [[RHV]], align 4
165 // CHECK-NEXT: br label %[[IFEND:.+]]
166
167 // CHECK: [[IFEND]]
168 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
169 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
170 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
171 // CHECK: [[FAIL]]
172 // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
173 // CHECK-NEXT: br label %[[END]]
174 // CHECK: [[END]]
175 #pragma omp target parallel if(target: n>10)
176 {
177 a += 1;
178 aa += 1;
179 }
180
181 // We capture 3 VLA sizes in this target region
182 // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}},
183 // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
184 // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]],
185 // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
186
187 // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}},
188 // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
189 // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
190
191 // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
192 // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
193 // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
194
195 // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
196 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
197 // CHECK: [[TRY]]
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +0000198 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000199 // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
200 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
201 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
202
203 // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
204 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
205 // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
206 // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
207 // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
208 // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
209 // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
210 // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
211 // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
212 // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
213 // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
214 // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
215 // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
216 // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
217 // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
218 // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
219 // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
220 // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
221 // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
222 // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
223 // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
224 // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
225 // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
226 // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
227 // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
228 // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
229 // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
230
231 // The names below are not necessarily consistent with the names used for the
232 // addresses above as some are repeated.
233 // CHECK-DAG: [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
234 // CHECK-DAG: [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
235 // CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}}
236 // CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}}
237 // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
238
239 // CHECK-DAG: [[BP1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
240 // CHECK-DAG: [[P1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
241 // CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}}
242 // CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}}
243 // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
244
245 // CHECK-DAG: store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
246 // CHECK-DAG: store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
247 // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
248
249 // CHECK-DAG: [[BP3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
250 // CHECK-DAG: [[P3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
251 // CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}}
252 // CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}}
253 // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
254
255 // CHECK-DAG: [[BP4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
256 // CHECK-DAG: [[P4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
257 // CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}}
258 // CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}}
259 // CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
260
261 // CHECK-DAG: [[BP5:%[^,]+]] = bitcast float* %{{.+}} to i8*
262 // CHECK-DAG: [[P5:%[^,]+]] = bitcast float* %{{.+}} to i8*
263 // CHECK-DAG: store i8* [[BP5]], i8** {{%[^,]+}}
264 // CHECK-DAG: store i8* [[P5]], i8** {{%[^,]+}}
265 // CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
266
267 // CHECK-DAG: [[BP6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
268 // CHECK-DAG: [[P6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
269 // CHECK-DAG: store i8* [[BP6]], i8** {{%[^,]+}}
270 // CHECK-DAG: store i8* [[P6]], i8** {{%[^,]+}}
271 // CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
272
273 // CHECK-DAG: [[BP7:%[^,]+]] = bitcast double* %{{.+}} to i8*
274 // CHECK-DAG: [[P7:%[^,]+]] = bitcast double* %{{.+}} to i8*
275 // CHECK-DAG: store i8* [[BP7]], i8** {{%[^,]+}}
276 // CHECK-DAG: store i8* [[P7]], i8** {{%[^,]+}}
277 // CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
278
279 // CHECK-DAG: [[BP8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
280 // CHECK-DAG: [[P8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
281 // CHECK-DAG: store i8* [[BP8]], i8** {{%[^,]+}}
282 // CHECK-DAG: store i8* [[P8]], i8** {{%[^,]+}}
283 // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
284
285 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
286 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
287 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
288 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
289
290 // CHECK: [[FAIL]]
291 // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
292 // CHECK-NEXT: br label %[[END]]
293 // CHECK: [[END]]
294 #pragma omp target parallel if(target: n>20)
295 {
296 a += 1;
297 b[2] += 1.0;
298 bn[3] += 1.0;
299 c[1][2] += 1.0;
300 cn[1][3] += 1.0;
301 d.X += 1;
302 d.Y += 1;
303 }
304
305 return a;
306}
307
308// Check that the offloading functions are emitted and that the arguments are
309// correct and loaded correctly for the target regions in foo().
310
311// CHECK: define internal void [[HVT0]]()
312// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*))
313//
314//
315// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.)
316// CHECK: ret void
317// CHECK-NEXT: }
318
319
320// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}})
321// Create stack storage and store argument in there.
322// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
323// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align
324// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
325// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
326// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
327// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
328// CHECK-64: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32*
329// CHECK-64: store i32 [[AA]], i32* [[AA_C]], align
330// CHECK-32: store i32 [[AA]], i32* [[AA_CASTED]], align
331// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
332// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
333//
334//
335// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
336// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
337// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
338// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
339// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
340// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
341// CHECK: ret void
342// CHECK-NEXT: }
343
344// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}})
345// Create stack storage and store argument in there.
346// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
347// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align
348// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
349// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
350// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
351// CHECK: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
352// CHECK: store i16 [[AA]], i16* [[AA_C]], align
353// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
354// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
355//
356//
357// CHECK: define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
358// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
359// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
360// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
361// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
362// CHECK: ret void
363// CHECK-NEXT: }
364
365// CHECK: define internal void [[HVT3]]
366// Create stack storage and store argument in there.
367// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align
368// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
369// CHECK: [[A_CASTED:%.+]] = alloca i[[SZ]], align
370// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align
371// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
372// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
373// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
374// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
375// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align
376// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align
377// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32*
378// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align
379// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align
380// CHECK-DAG: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
381// CHECK-DAG: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
382// CHECK-DAG: store i16 [[AA]], i16* [[AA_C]], align
383// CHECK-DAG: [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
384// CHECK-DAG: [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
385// CHECK-DAG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
386//
387//
388// CHECK: define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
389// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align
390// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
391// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
392// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
393// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
394// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
395// CHECK: ret void
396// CHECK-NEXT: }
397
398// CHECK: define internal void [[HVT4]]
399// Create local storage for each capture.
400// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
401// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]*
402// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
403// CHECK: [[LOCAL_BN:%.+]] = alloca float*
404// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
405// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
406// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
407// CHECK: [[LOCAL_CN:%.+]] = alloca double*
408// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]*
409// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
410// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
411// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
412// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
413// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
414// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
415// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
416// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
417// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
418// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
419
420// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
421// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
422// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
423// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
424// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
425// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
426// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
427// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
428// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
429
430// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
431// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
432// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
433// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
434// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
435// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
436
437// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]])
438//
439//
440// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* %{{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* %{{.+}}, [[TT]]* {{.+}})
441// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
442
443template<typename tx>
444tx ftemplate(int n) {
445 tx a = 0;
446 short aa = 0;
447 tx b[10];
448
449 #pragma omp target parallel if(target: n>40)
450 {
451 a += 1;
452 aa += 1;
453 b[2] += 1;
454 }
455
456 return a;
457}
458
459static
460int fstatic(int n) {
461 int a = 0;
462 short aa = 0;
463 char aaa = 0;
464 int b[10];
465
466 #pragma omp target parallel if(target: n>50)
467 {
468 a += 1;
469 aa += 1;
470 aaa += 1;
471 b[2] += 1;
472 }
473
474 return a;
475}
476
477struct S1 {
478 double a;
479
480 int r1(int n){
481 int b = n+1;
482 short int c[2][n];
483
484 #pragma omp target parallel if(target: n>60)
485 {
486 this->a = (double)b + 1.5;
487 c[1][1] = ++a;
488 }
489
490 return c[1][1] + (int)b;
491 }
492};
493
494// CHECK: define {{.*}}@{{.*}}bar{{.*}}
495int bar(int n){
496 int a = 0;
497
498 // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
499 a += foo(n);
500
501 S1 S;
502 // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
503 a += S.r1(n);
504
505 // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
506 a += fstatic(n);
507
508 // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
509 a += ftemplate<int>(n);
510
511 return a;
512}
513
514//
515// CHECK: define {{.*}}[[FS1]]
516//
517// CHECK: i8* @llvm.stacksave()
518// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
519// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]],
520// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
521
522// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
523// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
524
525// We capture 2 VLA sizes in this target region
526// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
527// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
528
529// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
530// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
531// CHECK: [[TRY]]
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +0000532// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000533// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
534// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
535// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
536// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
537// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
538// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
539// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
540// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
541// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
542// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
543// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
544// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
545// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
546// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
547// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
548
549// The names below are not necessarily consistent with the names used for the
550// addresses above as some are repeated.
551// CHECK-DAG: [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
552// CHECK-DAG: [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
553// CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}}
554// CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}}
555// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
556
557// CHECK-DAG: store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
558// CHECK-DAG: store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
559// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
560
561// CHECK-DAG: [[BP2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
562// CHECK-DAG: [[P2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
563// CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}}
564// CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}}
565// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
566
567// CHECK-DAG: [[BP3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
568// CHECK-DAG: [[P3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
569// CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}}
570// CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}}
571// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
572
573// CHECK-DAG: [[BP4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
574// CHECK-DAG: [[P4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
575// CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}}
576// CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}}
577// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
578
579// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
580// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
581// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
582// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
583
584// CHECK: [[FAIL]]
585// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
586// CHECK-NEXT: br label %[[END]]
587// CHECK: [[END]]
588
589//
590// CHECK: define {{.*}}[[FSTATIC]]
591//
592// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
593// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
594// CHECK: [[IFTHEN]]
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +0000595// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000596// CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
597// CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
598
599// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0
600// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
601// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
602// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
603// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
604// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
605
606// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
607// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
608// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
609// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
610// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
611// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
612
613// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
614// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
615// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
616// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
617
618// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3
619// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3
620// CHECK-DAG: store i8* [[BP3:%[^,]+]], i8** [[BPADDR3]]
621// CHECK-DAG: store i8* [[P3:%[^,]+]], i8** [[PADDR3]]
622// CHECK-DAG: [[BP3]] = bitcast [10 x i32]* %{{.+}} to i8*
623// CHECK-DAG: [[P3]] = bitcast [10 x i32]* %{{.+}} to i8*
624
625// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
626// CHECK-NEXT: br label %[[IFEND:.+]]
627
628// CHECK: [[IFELSE]]
629// CHECK: store i32 -1, i32* [[RHV]], align 4
630// CHECK-NEXT: br label %[[IFEND:.+]]
631
632// CHECK: [[IFEND]]
633// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
634// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
635// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
636// CHECK: [[FAIL]]
637// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
638// CHECK-NEXT: br label %[[END]]
639// CHECK: [[END]]
640
641//
642// CHECK: define {{.*}}[[FTEMPLATE]]
643//
644// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
645// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
646// CHECK: [[IFTHEN]]
Arpith Chacko Jacob33c849a2017-01-25 00:57:16 +0000647// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
Arpith Chacko Jacob19b911c2017-01-18 18:18:53 +0000648// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
649// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
650
651// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
652// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
653// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
654// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
655// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
656// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
657
658// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
659// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
660// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
661// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
662// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
663// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
664
665// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
666// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
667// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
668// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
669// CHECK-DAG: [[BP2]] = bitcast [10 x i32]* %{{.+}} to i8*
670// CHECK-DAG: [[P2]] = bitcast [10 x i32]* %{{.+}} to i8*
671
672// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
673// CHECK-NEXT: br label %[[IFEND:.+]]
674
675// CHECK: [[IFELSE]]
676// CHECK: store i32 -1, i32* [[RHV]], align 4
677// CHECK-NEXT: br label %[[IFEND:.+]]
678
679// CHECK: [[IFEND]]
680// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
681// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
682// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
683// CHECK: [[FAIL]]
684// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
685// CHECK-NEXT: br label %[[END]]
686// CHECK: [[END]]
687
688
689
690// Check that the offloading functions are emitted and that the arguments are
691// correct and loaded correctly for the target regions of the callees of bar().
692
693// CHECK: define internal void [[HVT7]]
694// Create local storage for each capture.
695// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]*
696// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]]
697// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
698// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
699// CHECK: [[LOCAL_C:%.+]] = alloca i16*
700// CHECK: [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]]
701// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
702// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
703// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
704// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
705// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
706// Store captures in the context.
707// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
708// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
709// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
710// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
711// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
712
713// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]]
714// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32*
715// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align
716// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]]
717// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align
718// CHECK-DAG: [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]],
719
720// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
721//
722//
723// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* %{{.+}})
724// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
725
726
727// CHECK: define internal void [[HVT6]]
728// Create local storage for each capture.
729// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
730// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
731// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
732// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]*
733// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
734// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
735// CHECK: [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]]
736// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
737// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
738// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
739// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
740// Store captures in the context.
741// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
742// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
743// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
744// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
745
746// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
747// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
748// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
749// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
750// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
751// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
752
753// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
754// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
755// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align
756// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
757
758// CHECK-DAG: [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]]
759// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8*
760// CHECK-DAG: store i8 [[CONV_AAA]], i8* [[CONV]], align
761// CHECK-DAG: [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]],
762
763// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]])
764//
765//
766// CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
767// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
768
769// CHECK: define internal void [[HVT5]]
770// Create local storage for each capture.
771// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
772// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
773// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]*
774// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
775// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
776// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
777// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
778// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
779// Store captures in the context.
780// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
781// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
782// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
783
784// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
785// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
786// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
787// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
788// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
789// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
790
791// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
792// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
793// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align
794// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
795
796// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]])
797//
798//
799// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
800// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
801
802#endif