blob: 56e1e7b93d651b91b26b1da0146eeaedba02bd6d [file] [log] [blame]
Tobias Grossered21a1f2015-08-27 16:55:18 +00001; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S \
Tobias Grosserf4ee3712015-10-06 15:36:44 +00002; RUN: -polly-codegen -S < %s | FileCheck %s
Tobias Grossered21a1f2015-08-27 16:55:18 +00003; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S \
4; RUN: -polly-codegen -polly-import-jscop-postfix=pow2 \
Tobias Grosserf4ee3712015-10-06 15:36:44 +00005; RUN: -S < %s | FileCheck %s -check-prefix=POW2
Tobias Grossercdb38e52015-05-29 17:08:19 +00006;
7; void exprModDiv(float *A, float *B, float *C, long N, long p) {
8; for (long i = 0; i < N; i++)
Johannes Doerfertc1db67e2015-09-29 23:47:21 +00009; C[i] += A[i] + B[i] + A[i] + B[i + p];
Tobias Grossercdb38e52015-05-29 17:08:19 +000010; }
11;
12;
13; This test case changes the access functions such that the resulting index
14; expressions are modulo or division operations. We test that the code we
15; generate takes advantage of knowledge about unsigned numerators. This is
16; useful as LLVM will translate urem and udiv operations with power-of-two
17; denominators to fast bitwise and or shift operations.
18
Tobias Grossercb73f152015-06-03 06:31:30 +000019; A[i % 127]
20; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000021; CHECK: %polly.access.A{{[0-9]*}} = getelementptr float, float* %A, i64 %pexp.pdiv_r
Tobias Grossercdb38e52015-05-29 17:08:19 +000022
Tobias Grosser5cf78602015-06-04 07:44:35 +000023; A[floor(i / 127)]
Tobias Grossercdb38e52015-05-29 17:08:19 +000024;
Tobias Grosser5cf78602015-06-04 07:44:35 +000025; Note: without the floor, we would create a map i -> i/127, which only contains
26; values of i that are divisible by 127. All other values of i would not
27; be mapped to any value. However, to generate correct code we require
28; each value of i to indeed be mapped to a value.
29;
30; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000031; CHECK: %polly.access.B{{[0-9]*}} = getelementptr float, float* %B, i64 %pexp.p_div_q
Tobias Grossercdb38e52015-05-29 17:08:19 +000032
33; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
Tobias Grossercb73f152015-06-03 06:31:30 +000034; A[p + 127 * floord(-p - 1, 127) + 127]
Johannes Doerfert404a0f82016-05-12 15:12:43 +000035; CHECK: %pexp.fdiv_q.0 = sub nsw i64 %p, 127
36; CHECK: %pexp.fdiv_q.1 = add nsw i64 %pexp.fdiv_q.0, 1
Michael Kruse959a8dc2016-01-15 15:54:45 +000037; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %p, 0
38; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %p
Tobias Grossercb73f152015-06-03 06:31:30 +000039; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127
Johannes Doerfert91bb5bc2016-02-21 18:59:35 +000040; CHECK: %[[r1:[0-9]*]] = mul nsw i64 127, %pexp.fdiv_q.4
41; CHECK: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]]
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000042; CHECK: %polly.access.A{{[0-9]*}} = getelementptr float, float* %A, i64 %[[r2]]
Tobias Grossercdb38e52015-05-29 17:08:19 +000043
Tobias Grossercb73f152015-06-03 06:31:30 +000044; A[p / 127]
Tobias Grosser22adfb42015-06-04 07:45:09 +000045; CHECK: %pexp.div = sdiv exact i64 %p, 127
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000046; CHECK: %polly.access.B{{[0-9]*}} = getelementptr float, float* %B, i64 %pexp.div
Tobias Grossercdb38e52015-05-29 17:08:19 +000047
Tobias Grossercb73f152015-06-03 06:31:30 +000048; A[i % 128]
49; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000050; POW2: %polly.access.A{{[0-9]*}} = getelementptr float, float* %A, i64 %pexp.pdiv_r
Tobias Grossercb73f152015-06-03 06:31:30 +000051
Tobias Grosser5cf78602015-06-04 07:44:35 +000052; A[floor(i / 128)]
53; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000054; POW2: %polly.access.B{{[0-9]*}} = getelementptr float, float* %B, i64 %pexp.p_div_q
Tobias Grossercb73f152015-06-03 06:31:30 +000055
56; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
57; A[p + 128 * floord(-p - 1, 128) + 128]
Michael Kruse959a8dc2016-01-15 15:54:45 +000058; POW2: %polly.fdiv_q.shr = ashr i64 %p, 7
Johannes Doerfert91bb5bc2016-02-21 18:59:35 +000059; POW2: %[[r1:[0-9]*]] = mul nsw i64 128, %polly.fdiv_q.shr
60; POW2: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]]
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000061; POW2: %polly.access.A{{[0-9]*}} = getelementptr float, float* %A, i64 %[[r2]]
Tobias Grossercb73f152015-06-03 06:31:30 +000062
63; A[p / 128]
Tobias Grosser22adfb42015-06-04 07:45:09 +000064; POW2: %pexp.div = sdiv exact i64 %p, 128
Johannes Doerfertc0ece9b2016-06-06 13:32:52 +000065; POW2: %polly.access.B{{[0-9]*}} = getelementptr float, float* %B, i64 %pexp.div
Tobias Grossercb73f152015-06-03 06:31:30 +000066
Tobias Grossercdb38e52015-05-29 17:08:19 +000067target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
68
69define void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) {
70entry:
71 br label %for.cond
72
73for.cond: ; preds = %for.inc, %entry
74 %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
75 %cmp = icmp slt i64 %i.0, %N
76 br i1 %cmp, label %for.body, label %for.end
77
78for.body: ; preds = %for.cond
79 %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0
80 %tmp = load float, float* %arrayidx, align 4
81 %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0
82 %tmp1 = load float, float* %arrayidx1, align 4
83 %add = fadd float %tmp, %tmp1
Johannes Doerfertc1db67e2015-09-29 23:47:21 +000084 %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0
Tobias Grossercdb38e52015-05-29 17:08:19 +000085 %tmp2 = load float, float* %arrayidx2, align 4
86 %add3 = fadd float %add, %tmp2
Johannes Doerfertc1db67e2015-09-29 23:47:21 +000087 %padd = add nsw i64 %p, %i.0
88 %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd
Tobias Grossercdb38e52015-05-29 17:08:19 +000089 %tmp3 = load float, float* %arrayidx4, align 4
90 %add5 = fadd float %add3, %tmp3
91 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0
92 %tmp4 = load float, float* %arrayidx6, align 4
93 %add7 = fadd float %tmp4, %add5
94 store float %add7, float* %arrayidx6, align 4
95 br label %for.inc
96
97for.inc: ; preds = %for.body
98 %inc = add nuw nsw i64 %i.0, 1
99 br label %for.cond
100
101for.end: ; preds = %for.cond
102 ret void
103}