Douglas Yung | 31ec97a | 2016-11-24 01:53:38 +0000 | [diff] [blame] | 1 | // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
|
| 2 | // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
|
| 3 | // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
| 4 | // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
|
| 5 | // expected-no-diagnostics
|
| 6 | #ifndef HEADER
|
| 7 | #define HEADER
|
| 8 |
|
| 9 | // CHECK: [[SS_TY:%.+]] = type { i32 }
|
| 10 |
|
| 11 | long long get_val() { return 0; }
|
| 12 | double *g_ptr;
|
| 13 |
|
| 14 | // CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
|
| 15 | void simple(float *a, float *b, float *c, float *d) {
|
| 16 | #pragma omp simd
|
| 17 | // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
|
| 18 |
|
| 19 | // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID:[0-9]+]]
|
| 20 | // CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], 6
|
| 21 | // CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]]
|
| 22 | for (int i = 3; i < 32; i += 5) {
|
| 23 | // CHECK: [[SIMPLE_LOOP1_BODY]]:
|
| 24 | // Start of body: calculate i from IV:
|
| 25 | // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
|
| 26 | // CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5
|
| 27 | // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]]
|
| 28 | // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
|
| 29 | // ... loop body ...
|
| 30 | // End of body: store into a[i]:
|
| 31 | // CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
|
| 32 | a[i] = b[i] * c[i] * d[i];
|
| 33 | // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
|
| 34 | // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
|
| 35 | // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP1_ID]]
|
| 36 | // br label %{{.+}}, !llvm.loop !{{.+}}
|
| 37 | }
|
| 38 | // CHECK: [[SIMPLE_LOOP1_END]]:
|
| 39 |
|
| 40 | long long k = get_val();
|
| 41 |
|
| 42 | #pragma omp simd linear(k : 3)
|
| 43 | // CHECK: [[K0:%.+]] = call {{.*}}i64 @{{.*}}get_val
|
| 44 | // CHECK-NEXT: store i64 [[K0]], i64* [[K_VAR:%[^,]+]]
|
| 45 | // CHECK: store i32 0, i32* [[OMP_IV2:%[^,]+]]
|
| 46 | // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR]]
|
| 47 | // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
|
| 48 |
|
| 49 | // CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID:[0-9]+]]
|
| 50 | // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV2]], 9
|
| 51 | // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP2_BODY:.+]], label %[[SIMPLE_LOOP2_END:[^,]+]]
|
| 52 | for (int i = 10; i > 1; i--) {
|
| 53 | // CHECK: [[SIMPLE_LOOP2_BODY]]:
|
| 54 | // Start of body: calculate i from IV:
|
| 55 | // CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
|
| 56 | // FIXME: It is interesting, why the following "mul 1" was not constant folded?
|
| 57 | // CHECK-NEXT: [[IV2_1:%.+]] = mul nsw i32 [[IV2_0]], 1
|
| 58 | // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV2_1]]
|
| 59 | // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
|
| 60 | //
|
| 61 | // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
|
| 62 | // CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
|
| 63 | // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV2_2]], 3
|
| 64 | // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
|
| 65 | // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
|
| 66 | // Update of the privatized version of linear variable!
|
| 67 | // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
|
| 68 | a[k]++;
|
| 69 | k = k + 3;
|
| 70 | // CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
|
| 71 | // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV2_2]], 1
|
| 72 | // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP2_ID]]
|
| 73 | // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP2_ID]]
|
| 74 | }
|
| 75 | // CHECK: [[SIMPLE_LOOP2_END]]:
|
| 76 | //
|
| 77 | // Update linear vars after loop, as the loop was operating on a private version.
|
| 78 | // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
|
| 79 | // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
|
| 80 | // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_VAR]]
|
| 81 | //
|
| 82 |
|
| 83 | int lin = 12;
|
| 84 | #pragma omp simd linear(lin : get_val()), linear(g_ptr)
|
| 85 |
|
| 86 | // Init linear private var.
|
| 87 | // CHECK: store i32 12, i32* [[LIN_VAR:%[^,]+]]
|
| 88 | // CHECK: store i64 0, i64* [[OMP_IV3:%[^,]+]]
|
| 89 |
|
| 90 | // CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]]
|
| 91 | // CHECK-NEXT: store i32 [[LIN_LOAD]], i32* [[LIN_START:%[^,]+]]
|
| 92 | // Remember linear step.
|
| 93 | // CHECK: [[CALL_VAL:%.+]] = invoke
|
| 94 | // CHECK: store i64 [[CALL_VAL]], i64* [[LIN_STEP:%[^,]+]]
|
| 95 |
|
| 96 | // CHECK: [[GLIN_LOAD:%.+]] = load double*, double** [[GLIN_VAR:@[^,]+]]
|
| 97 | // CHECK-NEXT: store double* [[GLIN_LOAD]], double** [[GLIN_START:%[^,]+]]
|
| 98 |
|
| 99 | // CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID:[0-9]+]]
|
| 100 | // CHECK-NEXT: [[CMP3:%.+]] = icmp ult i64 [[IV3]], 4
|
| 101 | // CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]]
|
| 102 | for (unsigned long long it = 2000; it >= 600; it-=400) {
|
| 103 | // CHECK: [[SIMPLE_LOOP3_BODY]]:
|
| 104 | // Start of body: calculate it from IV:
|
| 105 | // CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 106 | // CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400
|
| 107 | // CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]]
|
| 108 | // CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 109 | //
|
| 110 | // Linear start and step are used to calculate current value of the linear variable.
|
| 111 | // CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 112 | // CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 113 | // CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 114 | // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 115 | // CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 116 | // CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1
|
| 117 | // CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]]
|
| 118 | // CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 119 | *g_ptr++ = 0.0;
|
| 120 | // CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 121 | // CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 122 | a[it + lin]++;
|
| 123 | // CHECK: [[FLT_INC:%.+]] = fadd float
|
| 124 | // CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 125 | // CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 126 | // CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1
|
| 127 | // CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP3_ID]]
|
| 128 | }
|
| 129 | // CHECK: [[SIMPLE_LOOP3_END]]:
|
| 130 | //
|
| 131 | // Linear start and step are used to calculate final value of the linear variables.
|
| 132 | // CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
|
| 133 | // CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
|
| 134 | // CHECK: store i32 {{.+}}, i32* [[LIN_VAR]],
|
| 135 | // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
|
| 136 | // CHECK: store double* {{.*}}[[GLIN_VAR]]
|
| 137 |
|
| 138 | #pragma omp simd
|
| 139 | // CHECK: store i32 0, i32* [[OMP_IV4:%[^,]+]]
|
| 140 |
|
| 141 | // CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID:[0-9]+]]
|
| 142 | // CHECK-NEXT: [[CMP4:%.+]] = icmp slt i32 [[IV4]], 4
|
| 143 | // CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]]
|
| 144 | for (short it = 6; it <= 20; it-=-4) {
|
| 145 | // CHECK: [[SIMPLE_LOOP4_BODY]]:
|
| 146 | // Start of body: calculate it from IV:
|
| 147 | // CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
|
| 148 | // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4
|
| 149 | // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]]
|
| 150 | // CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16
|
| 151 | // CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
|
| 152 |
|
| 153 | // CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
|
| 154 | // CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1
|
| 155 | // CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP4_ID]]
|
| 156 | }
|
| 157 | // CHECK: [[SIMPLE_LOOP4_END]]:
|
| 158 |
|
| 159 | #pragma omp simd
|
| 160 | // CHECK: store i32 0, i32* [[OMP_IV5:%[^,]+]]
|
| 161 |
|
| 162 | // CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID:[0-9]+]]
|
| 163 | // CHECK-NEXT: [[CMP5:%.+]] = icmp slt i32 [[IV5]], 26
|
| 164 | // CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]]
|
| 165 | for (unsigned char it = 'z'; it >= 'a'; it+=-1) {
|
| 166 | // CHECK: [[SIMPLE_LOOP5_BODY]]:
|
| 167 | // Start of body: calculate it from IV:
|
| 168 | // CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
|
| 169 | // CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1
|
| 170 | // CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]]
|
| 171 | // CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8
|
| 172 | // CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
|
| 173 |
|
| 174 | // CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
|
| 175 | // CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1
|
| 176 | // CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP5_ID]]
|
| 177 | }
|
| 178 | // CHECK: [[SIMPLE_LOOP5_END]]:
|
| 179 |
|
| 180 | // CHECK-NOT: mul i32 %{{.+}}, 10
|
| 181 | #pragma omp simd
|
| 182 | for (unsigned i=100; i<10; i+=10) {
|
| 183 | }
|
| 184 |
|
| 185 | int A;
|
| 186 | // CHECK: store i32 -1, i32* [[A:%.+]],
|
| 187 | A = -1;
|
| 188 | #pragma omp simd lastprivate(A)
|
| 189 | // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]]
|
| 190 | // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]]
|
| 191 | // CHECK: [[SIMD_LOOP7_COND]]:
|
| 192 | // CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]]
|
| 193 | // CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 7
|
| 194 | // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]]
|
| 195 | for (long long i = -10; i < 10; i += 3) {
|
| 196 | // CHECK: [[SIMPLE_LOOP7_BODY]]:
|
| 197 | // Start of body: calculate i from IV:
|
| 198 | // CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
|
| 199 | // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3
|
| 200 | // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
|
| 201 | // CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
|
| 202 | // CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
|
| 203 | // CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32
|
| 204 | // CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
|
| 205 | A = i;
|
| 206 | // CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
|
| 207 | // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1
|
| 208 | // CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID]]
|
| 209 | }
|
| 210 | // CHECK: [[SIMPLE_LOOP7_END]]:
|
| 211 | // CHECK-NEXT: store i64 11, i64*
|
| 212 | // CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
|
| 213 | // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]],
|
| 214 | int R;
|
| 215 | // CHECK: store i32 -1, i32* [[R:%[^,]+]],
|
| 216 | R = -1;
|
| 217 | // CHECK: store i64 0, i64* [[OMP_IV8:%[^,]+]],
|
| 218 | // CHECK: store i32 1, i32* [[R_PRIV:%[^,]+]],
|
| 219 | #pragma omp simd reduction(*:R)
|
| 220 | // CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]]
|
| 221 | // CHECK: [[SIMD_LOOP8_COND]]:
|
| 222 | // CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID:[0-9]+]]
|
| 223 | // CHECK-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7
|
| 224 | // CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]]
|
| 225 | for (long long i = -10; i < 10; i += 3) {
|
| 226 | // CHECK: [[SIMPLE_LOOP8_BODY]]:
|
| 227 | // Start of body: calculate i from IV:
|
| 228 | // CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
|
| 229 | // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3
|
| 230 | // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]]
|
| 231 | // CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
|
| 232 | // CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
|
| 233 | // CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
|
| 234 | R *= i;
|
| 235 | // CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
|
| 236 | // CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1
|
| 237 | // CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP8_ID]]
|
| 238 | }
|
| 239 | // CHECK: [[SIMPLE_LOOP8_END]]:
|
| 240 | // CHECK-DAG: [[R_VAL:%.+]] = load i32, i32* [[R]],
|
| 241 | // CHECK-DAG: [[R_PRIV_VAL:%.+]] = load i32, i32* [[R_PRIV]],
|
| 242 | // CHECK: [[RED:%.+]] = mul nsw i32 [[R_VAL]], [[R_PRIV_VAL]]
|
| 243 | // CHECK-NEXT: store i32 [[RED]], i32* [[R]],
|
| 244 | // CHECK-NEXT: ret void
|
| 245 | }
|
| 246 |
|
| 247 | template <class T, unsigned K> T tfoo(T a) { return a + K; }
|
| 248 |
|
| 249 | template <typename T, unsigned N>
|
| 250 | int templ1(T a, T *z) {
|
| 251 | #pragma omp simd collapse(N)
|
| 252 | for (int i = 0; i < N * 2; i++) {
|
| 253 | for (long long j = 0; j < (N + N + N + N); j += 2) {
|
| 254 | z[i + j] = a + tfoo<T, N>(i + j);
|
| 255 | }
|
| 256 | }
|
| 257 | return 0;
|
| 258 | }
|
| 259 |
|
| 260 | // Instatiation templ1<float,2>
|
| 261 | // CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}})
|
| 262 | // CHECK: store i64 0, i64* [[T1_OMP_IV:[^,]+]]
|
| 263 | // ...
|
| 264 | // CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID:[0-9]+]]
|
| 265 | // CHECK-NEXT: [[CMP1:%.+]] = icmp slt i64 [[IV]], 16
|
| 266 | // CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]]
|
| 267 | // CHECK: [[T1_BODY]]:
|
| 268 | // Loop counters i and j updates:
|
| 269 | // CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
|
| 270 | // CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4
|
| 271 | // CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1
|
| 272 | // CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]]
|
| 273 | // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32
|
| 274 | // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
|
| 275 | // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
|
| 276 | // CHECK-NEXT: [[J_1:%.+]] = srem i64 [[IV2]], 4
|
| 277 | // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2
|
| 278 | // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]]
|
| 279 | // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
|
| 280 | // simd.for.inc:
|
| 281 | // CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
|
| 282 | // CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1
|
| 283 | // CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[T1_ID]]
|
| 284 | // CHECK-NEXT: br label {{%.+}}
|
| 285 | // CHECK: [[T1_END]]:
|
| 286 | // CHECK: ret i32 0
|
| 287 | //
|
| 288 | void inst_templ1() {
|
| 289 | float a;
|
| 290 | float z[100];
|
| 291 | templ1<float,2> (a, z);
|
| 292 | }
|
| 293 |
|
| 294 |
|
| 295 | typedef int MyIdx;
|
| 296 |
|
| 297 | class IterDouble {
|
| 298 | double *Ptr;
|
| 299 | public:
|
| 300 | IterDouble operator++ () const {
|
| 301 | IterDouble n;
|
| 302 | n.Ptr = Ptr + 1;
|
| 303 | return n;
|
| 304 | }
|
| 305 | bool operator < (const IterDouble &that) const {
|
| 306 | return Ptr < that.Ptr;
|
| 307 | }
|
| 308 | double & operator *() const {
|
| 309 | return *Ptr;
|
| 310 | }
|
| 311 | MyIdx operator - (const IterDouble &that) const {
|
| 312 | return (MyIdx) (Ptr - that.Ptr);
|
| 313 | }
|
| 314 | IterDouble operator + (int Delta) {
|
| 315 | IterDouble re;
|
| 316 | re.Ptr = Ptr + Delta;
|
| 317 | return re;
|
| 318 | }
|
| 319 |
|
| 320 | ///~IterDouble() {}
|
| 321 | };
|
| 322 |
|
| 323 | // CHECK-LABEL: define {{.*void}} @{{.*}}iter_simple{{.*}}
|
| 324 | void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
|
| 325 | //
|
| 326 | // Calculate number of iterations before the loop body.
|
| 327 | // CHECK: [[DIFF1:%.+]] = invoke {{.*}}i32 @{{.*}}IterDouble{{.*}}
|
| 328 | // CHECK: [[DIFF2:%.+]] = sub nsw i32 [[DIFF1]], 1
|
| 329 | // CHECK-NEXT: [[DIFF3:%.+]] = add nsw i32 [[DIFF2]], 1
|
| 330 | // CHECK-NEXT: [[DIFF4:%.+]] = sdiv i32 [[DIFF3]], 1
|
| 331 | // CHECK-NEXT: [[DIFF5:%.+]] = sub nsw i32 [[DIFF4]], 1
|
| 332 | // CHECK-NEXT: store i32 [[DIFF5]], i32* [[OMP_LAST_IT:%[^,]+]]{{.+}}
|
| 333 | // CHECK: store i32 0, i32* [[IT_OMP_IV:%[^,]+]]
|
| 334 | #pragma omp simd
|
| 335 |
|
| 336 | // CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID:[0-9]+]]
|
| 337 | // CHECK-NEXT: [[LAST_IT:%.+]] = load i32, i32* [[OMP_LAST_IT]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
|
| 338 | // CHECK-NEXT: [[NUM_IT:%.+]] = add nsw i32 [[LAST_IT]], 1
|
| 339 | // CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], [[NUM_IT]]
|
| 340 | // CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]]
|
| 341 | for (IterDouble i = ia; i < ib; ++i) {
|
| 342 | // CHECK: [[IT_BODY]]:
|
| 343 | // Start of body: calculate i from index:
|
| 344 | // CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
|
| 345 | // Call of operator+ (i, IV).
|
| 346 | // CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}}
|
| 347 | // ... loop body ...
|
| 348 | *i = *ic * 0.5;
|
| 349 | // Float multiply and save result.
|
| 350 | // CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01
|
| 351 | // CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}}
|
| 352 | // CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
|
| 353 | ++ic;
|
| 354 | //
|
| 355 | // CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
|
| 356 | // CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1
|
| 357 | // CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[ITER_LOOP_ID]]
|
| 358 | // br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]]
|
| 359 | }
|
| 360 | // CHECK: [[IT_END]]:
|
| 361 | // CHECK: ret void
|
| 362 | }
|
| 363 |
|
| 364 |
|
| 365 | // CHECK-LABEL: define {{.*void}} @{{.*}}collapsed{{.*}}
|
| 366 | void collapsed(float *a, float *b, float *c, float *d) {
|
| 367 | int i; // outer loop counter
|
| 368 | unsigned j; // middle loop couter, leads to unsigned icmp in loop header.
|
| 369 | // k declared in the loop init below
|
| 370 | short l; // inner loop counter
|
| 371 | // CHECK: store i32 0, i32* [[OMP_IV:[^,]+]]
|
| 372 | //
|
| 373 | #pragma omp simd collapse(4)
|
| 374 |
|
| 375 | // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID:[0-9]+]]
|
| 376 | // CHECK-NEXT: [[CMP:%.+]] = icmp ult i32 [[IV]], 120
|
| 377 | // CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]]
|
| 378 | for (i = 1; i < 3; i++) // 2 iterations
|
| 379 | for (j = 2u; j < 5u; j++) //3 iterations
|
| 380 | for (int k = 3; k <= 6; k++) // 4 iterations
|
| 381 | for (l = 4; l < 9; ++l) // 5 iterations
|
| 382 | {
|
| 383 | // CHECK: [[COLL1_BODY]]:
|
| 384 | // Start of body: calculate i from index:
|
| 385 | // CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 386 | // Calculation of the loop counters values.
|
| 387 | // CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60
|
| 388 | // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1
|
| 389 | // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]]
|
| 390 | // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
|
| 391 | // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 392 | // CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2]], 20
|
| 393 | // CHECK-NEXT: [[CALC_J_2:%.+]] = urem i32 [[CALC_J_1]], 3
|
| 394 | // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1
|
| 395 | // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]]
|
| 396 | // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]]
|
| 397 | // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 398 | // CHECK-NEXT: [[CALC_K_1:%.+]] = udiv i32 [[IV1_3]], 5
|
| 399 | // CHECK-NEXT: [[CALC_K_2:%.+]] = urem i32 [[CALC_K_1]], 4
|
| 400 | // CHECK-NEXT: [[CALC_K_2_MUL1:%.+]] = mul i32 [[CALC_K_2]], 1
|
| 401 | // CHECK-NEXT: [[CALC_K_3:%.+]] = add i32 3, [[CALC_K_2_MUL1]]
|
| 402 | // CHECK-NEXT: store i32 [[CALC_K_3]], i32* [[LC_K:.+]]
|
| 403 | // CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 404 | // CHECK-NEXT: [[CALC_L_1:%.+]] = urem i32 [[IV1_4]], 5
|
| 405 | // CHECK-NEXT: [[CALC_L_1_MUL1:%.+]] = mul i32 [[CALC_L_1]], 1
|
| 406 | // CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[CALC_L_1_MUL1]]
|
| 407 | // CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16
|
| 408 | // CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]]
|
| 409 | // ... loop body ...
|
| 410 | // End of body: store into a[i]:
|
| 411 | // CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 412 | float res = b[j] * c[k];
|
| 413 | a[i] = res * d[l];
|
| 414 | // CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 415 | // CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1
|
| 416 | // CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[COLL1_LOOP_ID]]
|
| 417 | // br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]]
|
| 418 | // CHECK: [[COLL1_END]]:
|
| 419 | }
|
| 420 | // i,j,l are updated; k is not updated.
|
| 421 | // CHECK: store i32 3, i32*
|
| 422 | // CHECK-NEXT: store i32 5, i32*
|
| 423 | // CHECK-NEXT: store i32 7, i32*
|
| 424 | // CHECK-NEXT: store i16 9, i16*
|
| 425 | // CHECK: ret void
|
| 426 | }
|
| 427 |
|
| 428 | extern char foo();
|
| 429 | extern double globalfloat;
|
| 430 |
|
| 431 | // CHECK-LABEL: define {{.*void}} @{{.*}}widened{{.*}}
|
| 432 | void widened(float *a, float *b, float *c, float *d) {
|
| 433 | int i; // outer loop counter
|
| 434 | short j; // inner loop counter
|
| 435 | globalfloat = 1.0;
|
| 436 | int localint = 1;
|
| 437 | // CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]]
|
| 438 | // Counter is widened to 64 bits.
|
| 439 | // CHECK: store i64 0, i64* [[OMP_IV:[^,]+]]
|
| 440 | //
|
| 441 | #pragma omp simd collapse(2) private(globalfloat, localint)
|
| 442 |
|
| 443 | // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID:[0-9]+]]
|
| 444 | // CHECK-NEXT: [[LI:%.+]] = load i64, i64* [[OMP_LI:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 445 | // CHECK-NEXT: [[NUMIT:%.+]] = add nsw i64 [[LI]], 1
|
| 446 | // CHECK-NEXT: [[CMP:%.+]] = icmp slt i64 [[IV]], [[NUMIT]]
|
| 447 | // CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]]
|
| 448 | for (i = 1; i < 3; i++) // 2 iterations
|
| 449 | for (j = 0; j < foo(); j++) // foo() iterations
|
| 450 | {
|
| 451 | // CHECK: [[WIDE1_BODY]]:
|
| 452 | // Start of body: calculate i from index:
|
| 453 | // CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 454 | // Calculation of the loop counters values...
|
| 455 | // CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]]
|
| 456 | // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 457 | // CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]]
|
| 458 | // ... loop body ...
|
| 459 | //
|
| 460 | // Here we expect store into private double var, not global
|
| 461 | // CHECK-NOT: store double {{.+}}, double* [[GLOBALFLOAT]]
|
| 462 | globalfloat = (float)j/i;
|
| 463 | float res = b[j] * c[j];
|
| 464 | // Store into a[i]:
|
| 465 | // CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 466 | a[i] = res * d[i];
|
| 467 | // Then there's a store into private var localint:
|
| 468 | // CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 469 | localint = (int)j;
|
| 470 | // CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 471 | // CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1
|
| 472 | // CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[WIDE1_LOOP_ID]]
|
| 473 | //
|
| 474 | // br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]]
|
| 475 | // CHECK: [[WIDE1_END]]:
|
| 476 | }
|
| 477 | // i,j are updated.
|
| 478 | // CHECK: store i32 3, i32* [[I:%[^,]+]]
|
| 479 | // CHECK: store i16
|
| 480 | //
|
| 481 | // Here we expect store into original localint, not its privatized version.
|
| 482 | // CHECK-NOT: store i32 {{.+}}, i32* [[LOCALINT]]
|
| 483 | localint = (int)j;
|
| 484 | // CHECK: ret void
|
| 485 | }
|
| 486 |
|
| 487 | // CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}})
|
| 488 | void linear(float *a) {
|
| 489 | // CHECK: [[VAL_ADDR:%.+]] = alloca i64,
|
| 490 | // CHECK: [[K_ADDR:%.+]] = alloca i64*,
|
| 491 | long long val = 0;
|
| 492 | long long &k = val;
|
| 493 |
|
| 494 | #pragma omp simd linear(k : 3)
|
| 495 | // CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
|
| 496 | // CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]],
|
| 497 | // CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
|
| 498 | // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
|
| 499 | // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
|
| 500 | // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
|
| 501 | // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
|
| 502 |
|
| 503 | // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
|
| 504 | // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
|
| 505 | // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
|
| 506 | for (int i = 10; i > 1; i--) {
|
| 507 | // CHECK: [[SIMPLE_LOOP_BODY]]:
|
| 508 | // Start of body: calculate i from IV:
|
| 509 | // CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 510 | // FIXME: It is interesting, why the following "mul 1" was not constant folded?
|
| 511 | // CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
|
| 512 | // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
|
| 513 | // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 514 | //
|
| 515 | // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 516 | // CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 517 | // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
|
| 518 | // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
|
| 519 | // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
|
| 520 | // Update of the privatized version of linear variable!
|
| 521 | // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
|
| 522 | a[k]++;
|
| 523 | k = k + 3;
|
| 524 | // CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 525 | // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
|
| 526 | // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 527 | // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
|
| 528 | }
|
| 529 | // CHECK: [[SIMPLE_LOOP_END]]:
|
| 530 | //
|
| 531 | // Update linear vars after loop, as the loop was operating on a private version.
|
| 532 | // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
|
| 533 | // CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
|
| 534 | // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
|
| 535 | // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
|
| 536 | // CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
|
| 537 | // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
|
| 538 | //
|
| 539 |
|
| 540 | #pragma omp simd linear(val(k) : 3)
|
| 541 | // CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]],
|
| 542 | // CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]],
|
| 543 | // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
|
| 544 | // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
|
| 545 | // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
|
| 546 | // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
|
| 547 |
|
| 548 | // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
|
| 549 | // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
|
| 550 | // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
|
| 551 | for (int i = 10; i > 1; i--) {
|
| 552 | // CHECK: [[SIMPLE_LOOP_BODY]]:
|
| 553 | // Start of body: calculate i from IV:
|
| 554 | // CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 555 | // FIXME: It is interesting, why the following "mul 1" was not constant folded?
|
| 556 | // CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
|
| 557 | // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
|
| 558 | // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 559 | //
|
| 560 | // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 561 | // CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 562 | // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
|
| 563 | // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
|
| 564 | // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
|
| 565 | // Update of the privatized version of linear variable!
|
| 566 | // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
|
| 567 | a[k]++;
|
| 568 | k = k + 3;
|
| 569 | // CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 570 | // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
|
| 571 | // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 572 | // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
|
| 573 | }
|
| 574 | // CHECK: [[SIMPLE_LOOP_END]]:
|
| 575 | //
|
| 576 | // Update linear vars after loop, as the loop was operating on a private version.
|
| 577 | // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]],
|
| 578 | // CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
|
| 579 | // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
|
| 580 | // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
|
| 581 | // CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
|
| 582 | // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
|
| 583 | //
|
| 584 | #pragma omp simd linear(uval(k) : 3)
|
| 585 | // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
|
| 586 | // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
|
| 587 | // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
|
| 588 |
|
| 589 | // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
|
| 590 | // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
|
| 591 | // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
|
| 592 | for (int i = 10; i > 1; i--) {
|
| 593 | // CHECK: [[SIMPLE_LOOP_BODY]]:
|
| 594 | // Start of body: calculate i from IV:
|
| 595 | // CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 596 | // FIXME: It is interesting, why the following "mul 1" was not constant folded?
|
| 597 | // CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
|
| 598 | // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
|
| 599 | // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 600 | //
|
| 601 | // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 602 | // CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 603 | // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
|
| 604 | // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
|
| 605 | // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
|
| 606 | // Update of the privatized version of linear variable!
|
| 607 | // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
|
| 608 | a[k]++;
|
| 609 | k = k + 3;
|
| 610 | // CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 611 | // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
|
| 612 | // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
|
| 613 | // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
|
| 614 | }
|
| 615 | // CHECK: [[SIMPLE_LOOP_END]]:
|
| 616 | //
|
| 617 | // Update linear vars after loop, as the loop was operating on a private version.
|
| 618 | // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
|
| 619 | // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
|
| 620 | // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
|
| 621 | //
|
| 622 | }
|
| 623 |
|
| 624 | // TERM_DEBUG-LABEL: bar
|
| 625 | int bar() {return 0;};
|
| 626 |
|
| 627 | // TERM_DEBUG-LABEL: parallel_simd
|
| 628 | void parallel_simd(float *a) {
|
| 629 | #pragma omp parallel
|
| 630 | #pragma omp simd
|
| 631 | // TERM_DEBUG-NOT: __kmpc_global_thread_num
|
| 632 | // TERM_DEBUG: invoke i32 {{.*}}bar{{.*}}()
|
| 633 | // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]],
|
| 634 | // TERM_DEBUG-NOT: __kmpc_global_thread_num
|
| 635 | // TERM_DEBUG: [[TERM_LPAD]]
|
| 636 | // TERM_DEBUG: call void @__clang_call_terminate
|
| 637 | // TERM_DEBUG: unreachable
|
| 638 | for (unsigned i = 131071; i <= 2147483647; i += 127)
|
| 639 | a[i] += bar();
|
| 640 | }
|
| 641 | // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
|
| 642 |
|
| 643 | // CHECK-LABEL: S8
|
| 644 | // CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
|
| 645 | // CHECK-NEXT: and i64 %{{.+}}, 15
|
| 646 | // CHECK-NEXT: icmp eq i64 %{{.+}}, 0
|
| 647 | // CHECK-NEXT: call void @llvm.assume(i1
|
| 648 |
|
| 649 | // CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
|
| 650 | // CHECK-NEXT: and i64 %{{.+}}, 7
|
| 651 | // CHECK-NEXT: icmp eq i64 %{{.+}}, 0
|
| 652 | // CHECK-NEXT: call void @llvm.assume(i1
|
| 653 |
|
| 654 | // CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
|
| 655 | // CHECK-NEXT: and i64 %{{.+}}, 15
|
| 656 | // CHECK-NEXT: icmp eq i64 %{{.+}}, 0
|
| 657 | // CHECK-NEXT: call void @llvm.assume(i1
|
| 658 |
|
| 659 | // CHECK: ptrtoint [[SS_TY]]* %{{.+}} to i64
|
| 660 | // CHECK-NEXT: and i64 %{{.+}}, 3
|
| 661 | // CHECK-NEXT: icmp eq i64 %{{.+}}, 0
|
| 662 | // CHECK-NEXT: call void @llvm.assume(i1
|
| 663 | struct SS {
|
| 664 | SS(): a(0) {}
|
| 665 | SS(int v) : a(v) {}
|
| 666 | int a;
|
| 667 | typedef int type;
|
| 668 | };
|
| 669 |
|
| 670 | template <typename T>
|
| 671 | class S7 : public T {
|
| 672 | protected:
|
| 673 | T *a;
|
| 674 | T b[2];
|
| 675 | S7() : a(0) {}
|
| 676 |
|
| 677 | public:
|
| 678 | S7(typename T::type &v) : a((T*)&v) {
|
| 679 | #pragma omp simd aligned(a)
|
| 680 | for (int k = 0; k < a->a; ++k)
|
| 681 | ++this->a->a;
|
| 682 | #pragma omp simd aligned(this->b : 8)
|
| 683 | for (int k = 0; k < a->a; ++k)
|
| 684 | ++a->a;
|
| 685 | }
|
| 686 | };
|
| 687 |
|
| 688 | class S8 : private IterDouble, public S7<SS> {
|
| 689 | S8() {}
|
| 690 |
|
| 691 | public:
|
| 692 | S8(int v) : S7<SS>(v){
|
| 693 | #pragma omp parallel private(a)
|
| 694 | #pragma omp simd aligned(S7<SS>::a)
|
| 695 | for (int k = 0; k < a->a; ++k)
|
| 696 | ++this->a->a;
|
| 697 | #pragma omp parallel shared(b)
|
| 698 | #pragma omp simd aligned(this->b: 4)
|
| 699 | for (int k = 0; k < a->a; ++k)
|
| 700 | ++a->a;
|
| 701 | }
|
| 702 | };
|
| 703 | S8 s8(0);
|
| 704 |
|
| 705 | #endif // HEADER
|
| 706 |
|