blob: 169328b9062710888dbc11edd13f5ae9cdec8e52 [file] [log] [blame]
Alexey Bataev98eb6e32015-04-22 11:15:40 +00001// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
2// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
3// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4//
5// expected-no-diagnostics
6#ifndef HEADER
7#define HEADER
8
9// CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
10// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
11// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
12void static_not_chunked(float *a, float *b, float *c, float *d) {
13// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
14 #pragma omp for schedule(static) ordered
15// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
16// UB = min(UB, GlobalUB)
17// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
18// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
19// CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
20// CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
21// CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
22// CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
23// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
24// Loop header
25// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
26// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
27// CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
28// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
29 for (int i = 32000000; i > 33; i += -7) {
30// CHECK: [[LOOP1_BODY]]
31// Start of body: calculate i from IV:
32// CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
33// CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 7
34// CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 32000000, [[CALC_I_1]]
35// CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
36
37// ... start of ordered region ...
38// CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
39// ... loop body ...
40// End of body: store into a[i]:
41// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
Alexey Bataev53223c92015-05-07 04:25:17 +000042// CHECK-NOT: !llvm.mem.parallel_loop_access
Alexey Bataev98eb6e32015-04-22 11:15:40 +000043// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
44// ... end of ordered region ...
45 #pragma omp ordered
46 a[i] = b[i] * c[i] * d[i];
47// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
48// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
49// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
50// CHECK-NEXT: br label %{{.+}}
51 }
52// CHECK: [[LOOP1_END]]
53// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
54// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
55// CHECK: ret void
56}
57
58// CHECK-LABEL: define {{.*void}} @{{.*}}dynamic1{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
59void dynamic1(float *a, float *b, float *c, float *d) {
60// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
61 #pragma omp for schedule(dynamic) ordered
62// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1)
63//
64// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
65// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
66// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
67
68// Loop header
69// CHECK: [[O_LOOP1_BODY]]
70// CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
71// CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
72// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
73
74// CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
75// CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]]
76// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
77 for (unsigned long long i = 131071; i < 2147483647; i += 127) {
78// CHECK: [[LOOP1_BODY]]
79// Start of body: calculate i from IV:
80// CHECK: [[IV1_1:%.+]] = load i64, i64* [[OMP_IV]]
81// CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 127
82// CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 131071, [[CALC_I_1]]
83// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
84
85// ... start of ordered region ...
86// CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
87// ... loop body ...
88// End of body: store into a[i]:
89// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
Alexey Bataev53223c92015-05-07 04:25:17 +000090// CHECK-NOT: !llvm.mem.parallel_loop_access
Alexey Bataev98eb6e32015-04-22 11:15:40 +000091// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
92// ... end of ordered region ...
93 #pragma omp ordered
94 a[i] = b[i] * c[i] * d[i];
95// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
96// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
97// CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
98
99// ... end iteration for ordered loop ...
100// CHECK-NEXT: call void @__kmpc_dispatch_fini_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
101// CHECK-NEXT: br label %{{.+}}
102 }
103// CHECK: [[LOOP1_END]]
104// CHECK: [[O_LOOP1_END]]
105// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
106// CHECK: ret void
107}
108
109// CHECK-LABEL: define {{.*void}} @{{.*}}test_auto{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
110void test_auto(float *a, float *b, float *c, float *d) {
111 unsigned int x = 0;
112 unsigned int y = 0;
113// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
114 #pragma omp for schedule(auto) collapse(2) ordered
115// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1)
116//
117// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
118// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
119// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
120
121// Loop header
122// CHECK: [[O_LOOP1_BODY]]
123// CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
124// CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
125// CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
126
127// CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
128// CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]]
129// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
130// FIXME: When the iteration count of some nested loop is not a known constant,
131// we should pre-calculate it, like we do for the total number of iterations!
132 for (char i = static_cast<char>(y); i <= '9'; ++i)
133 for (x = 11; x > 0; --x) {
134// CHECK: [[LOOP1_BODY]]
135// Start of body: indices are calculated from IV:
136// CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
137// CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
138
139// ... start of ordered region ...
140// CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
141// ... loop body ...
142// End of body: store into a[i]:
143// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
Alexey Bataev53223c92015-05-07 04:25:17 +0000144// CHECK-NOT: !llvm.mem.parallel_loop_access
Alexey Bataev98eb6e32015-04-22 11:15:40 +0000145// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
146// ... end of ordered region ...
147 #pragma omp ordered
148 a[i] = b[i] * c[i] * d[i];
149// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
150// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
151// CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
152
153// ... end iteration for ordered loop ...
154// CHECK-NEXT: call void @__kmpc_dispatch_fini_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
155// CHECK-NEXT: br label %{{.+}}
156 }
157// CHECK: [[LOOP1_END]]
158// CHECK: [[O_LOOP1_END]]
159// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
160// CHECK: ret void
161}
162
163// CHECK-LABEL: define {{.*void}} @{{.*}}runtime{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
164void runtime(float *a, float *b, float *c, float *d) {
165 int x = 0;
166// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
167 #pragma omp for collapse(2) schedule(runtime) ordered
168// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1)
169//
170// CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
171// CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
172// CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
173
174// Loop header
175// CHECK: [[O_LOOP1_BODY]]
176// CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
177// CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
178// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
179
180// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
181// CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
182// CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
183 for (unsigned char i = '0' ; i <= '9'; ++i)
184 for (x = -10; x < 10; ++x) {
185// CHECK: [[LOOP1_BODY]]
186// Start of body: indices are calculated from IV:
187// CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
188// CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
189
190// ... start of ordered region ...
191// CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
192// ... loop body ...
193// End of body: store into a[i]:
194// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
Alexey Bataev53223c92015-05-07 04:25:17 +0000195// CHECK-NOT: !llvm.mem.parallel_loop_access
Alexey Bataev98eb6e32015-04-22 11:15:40 +0000196// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
197// ... end of ordered region ...
198 #pragma omp ordered
199 a[i] = b[i] * c[i] * d[i];
200// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
201// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
202// CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
203
204// ... end iteration for ordered loop ...
205// CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
206// CHECK-NEXT: br label %{{.+}}
207 }
208// CHECK: [[LOOP1_END]]
209// CHECK: [[O_LOOP1_END]]
210// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
211// CHECK: ret void
212}
213
214#endif // HEADER
215