blob: a071ec3e748f2ba5ddf1c392bc7ad439258d780a [file] [log] [blame]
Sam Parkera761ba02019-08-28 08:51:13 +00001; RUN: opt -arm-parallel-dsp -mtriple=armv7-a -S %s -o - | FileCheck %s
2
3; CHECK-LABEL: overlap_1
4; CHECK: [[ADDR_A_1:%[^ ]+]] = getelementptr i16, i16* %a, i32 1
5; CHECK: [[ADDR_B_1:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
6; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
7; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
8; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
9; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
10; CHECK: [[CAST_A_1:%[^ ]+]] = bitcast i16* [[ADDR_A_1]] to i32*
11; CHECK: [[LD_A_1:%[^ ]+]] = load i32, i32* [[CAST_A_1]]
12; CHECK: [[CAST_B_1:%[^ ]+]] = bitcast i16* [[ADDR_B_1]] to i32*
13; CHECK: [[LD_B_1:%[^ ]+]] = load i32, i32* [[CAST_B_1]]
14; CHECK: [[ACC:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A_1]], i32 [[LD_B_1]], i32 %acc)
15; CHECK: [[RES:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 [[ACC]])
16; CHECK: ret i32 [[RES]]
17define i32 @overlap_1(i16* %a, i16* %b, i32 %acc) {
18entry:
19 %addr.a.1 = getelementptr i16, i16* %a, i32 1
20 %addr.b.1 = getelementptr i16, i16* %b, i32 1
21 %ld.a.0 = load i16, i16* %a
22 %sext.a.0 = sext i16 %ld.a.0 to i32
23 %ld.b.0 = load i16, i16* %b
24 %ld.a.1 = load i16, i16* %addr.a.1
25 %ld.b.1 = load i16, i16* %addr.b.1
26 %sext.a.1 = sext i16 %ld.a.1 to i32
27 %sext.b.1 = sext i16 %ld.b.1 to i32
28 %sext.b.0 = sext i16 %ld.b.0 to i32
29 %mul.0 = mul i32 %sext.a.0, %sext.b.0
30 %mul.1 = mul i32 %sext.a.1, %sext.b.1
31 %addr.a.2 = getelementptr i16, i16* %a, i32 2
32 %addr.b.2 = getelementptr i16, i16* %b, i32 2
33 %ld.a.2 = load i16, i16* %addr.a.2
34 %ld.b.2 = load i16, i16* %addr.b.2
35 %sext.a.2 = sext i16 %ld.a.2 to i32
36 %sext.b.2 = sext i16 %ld.b.2 to i32
37 %mul.2 = mul i32 %sext.a.2, %sext.b.2
38 %add = add i32 %mul.0, %mul.1
39 %add.1 = add i32 %mul.1, %mul.2
40 %add.2 = add i32 %add.1, %add
41 %res = add i32 %add.2, %acc
42 ret i32 %res
43}
44
45; CHECK-LABEL: overlap_2
46; CHECK: [[ADDR_A_1:%[^ ]+]] = getelementptr i16, i16* %a, i32 1
47; CHECK: [[ADDR_B_1:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
48; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
49; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
50; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
51; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
52; CHECK: [[ACC1:%[^ ]+]] = add i32 %mul.1, %acc
53; CHECK: [[ACC2:%[^ ]+]] = add i32 %mul.2, [[ACC1]]
54; CHECK: [[RES:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 [[ACC2]])
55; CHECK: ret i32 [[RES]]
56define i32 @overlap_2(i16* %a, i16* %b, i32 %acc) {
57entry:
58 %addr.a.1 = getelementptr i16, i16* %a, i32 1
59 %addr.b.1 = getelementptr i16, i16* %b, i32 1
60 %ld.a.0 = load i16, i16* %a
61 %sext.a.0 = sext i16 %ld.a.0 to i32
62 %ld.b.0 = load i16, i16* %b
63 %ld.a.1 = load i16, i16* %addr.a.1
64 %ld.b.1 = load i16, i16* %addr.b.1
65 %sext.a.1 = sext i16 %ld.a.1 to i32
66 %sext.b.1 = sext i16 %ld.b.1 to i32
67 %sext.b.0 = sext i16 %ld.b.0 to i32
68 %mul.0 = mul i32 %sext.a.0, %sext.b.0
69 %mul.1 = mul i32 %sext.a.1, %sext.b.1
70 %addr.a.2 = getelementptr i16, i16* %a, i32 2
71 %addr.b.2 = getelementptr i16, i16* %b, i32 2
72 %ld.a.2 = load i16, i16* %addr.a.2
73 %ld.b.2 = load i16, i16* %addr.b.2
74 %sext.a.2 = sext i16 %ld.a.2 to i32
75 %sext.b.2 = sext i16 %ld.b.2 to i32
76 %mul.2 = mul i32 %sext.b.2, %sext.a.2
77 %add = add i32 %mul.0, %mul.1
78 %add.1 = add i32 %mul.1, %mul.2
79 %add.2 = add i32 %add, %add.1
80 %res = add i32 %add.2, %acc
81 ret i32 %res
82}
83
84; CHECK-LABEL: overlap_3
85; CHECK: [[GEP_B:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
86; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
87; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
88; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
89; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
90; CHECK: [[CAST_B_1:%[^ ]+]] = bitcast i16* [[GEP_B]] to i32*
91; CHECK: [[LD_B_1:%[^ ]+]] = load i32, i32* [[CAST_B_1]]
92; CHECK: [[GEP_A:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
93; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP_A]] to i32*
94; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
95; CHECK: [[SMLAD:%[^ ]+]] = call i32 @llvm.arm.smlad(i32 [[LD_A_2]], i32 [[LD_B_1]], i32 %acc)
96; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 [[SMLAD]])
97define i32 @overlap_3(i16* %a, i16* %b, i32 %acc) {
98entry:
99 %addr.a.1 = getelementptr i16, i16* %a, i32 1
100 %addr.b.1 = getelementptr i16, i16* %b, i32 1
101 %ld.a.0 = load i16, i16* %a
102 %sext.a.0 = sext i16 %ld.a.0 to i32
103 %ld.b.0 = load i16, i16* %b
104 %ld.a.1 = load i16, i16* %addr.a.1
105 %ld.b.1 = load i16, i16* %addr.b.1
106 %sext.a.1 = sext i16 %ld.a.1 to i32
107 %sext.b.1 = sext i16 %ld.b.1 to i32
108 %sext.b.0 = sext i16 %ld.b.0 to i32
109 %mul.0 = mul i32 %sext.a.0, %sext.b.0
110 %mul.1 = mul i32 %sext.a.1, %sext.b.1
111 %addr.a.2 = getelementptr i16, i16* %a, i32 2
112 %addr.b.2 = getelementptr i16, i16* %b, i32 2
113 %addr.a.3 = getelementptr i16, i16* %a, i32 3
114 %ld.a.2 = load i16, i16* %addr.a.2
115 %ld.b.2 = load i16, i16* %addr.b.2
116 %ld.a.3 = load i16, i16* %addr.a.3
117 %sext.a.2 = sext i16 %ld.a.2 to i32
118 %sext.b.2 = sext i16 %ld.b.2 to i32
119 %sext.a.3 = sext i16 %ld.a.3 to i32
120 %mul.2 = mul i32 %sext.a.2, %sext.b.1
121 %mul.3 = mul i32 %sext.a.3, %sext.b.2
122 %add = add i32 %mul.0, %mul.1
123 %add.1 = add i32 %mul.2, %mul.3
124 %add.2 = add i32 %add.1, %add
125 %res = add i32 %add.2, %acc
126 ret i32 %res
127}
128
129; CHECK-LABEL: overlap_4
130; CHECK: [[GEP_B:%[^ ]+]] = getelementptr i16, i16* %b, i32 1
131; CHECK: [[CAST_A:%[^ ]+]] = bitcast i16* %a to i32*
132; CHECK: [[LD_A:%[^ ]+]] = load i32, i32* [[CAST_A]]
133; CHECK: [[CAST_B:%[^ ]+]] = bitcast i16* %b to i32*
134; CHECK: [[LD_B:%[^ ]+]] = load i32, i32* [[CAST_B]]
135; CHECK: [[CAST_B_1:%[^ ]+]] = bitcast i16* [[GEP_B]] to i32*
136; CHECK: [[LD_B_1:%[^ ]+]] = load i32, i32* [[CAST_B_1]]
137; CHECK: [[GEP_A:%[^ ]+]] = getelementptr i16, i16* %a, i32 2
138; CHECK: [[CAST_A_2:%[^ ]+]] = bitcast i16* [[GEP_A]] to i32*
139; CHECK: [[LD_A_2:%[^ ]+]] = load i32, i32* [[CAST_A_2]]
140; CHECK: [[SMLAD:%[^ ]+]] = call i32 @llvm.arm.smladx(i32 [[LD_A_2]], i32 [[LD_B_1]], i32 %acc)
141; CHECK: call i32 @llvm.arm.smlad(i32 [[LD_A]], i32 [[LD_B]], i32 [[SMLAD]])
142define i32 @overlap_4(i16* %a, i16* %b, i32 %acc) {
143entry:
144 %addr.a.1 = getelementptr i16, i16* %a, i32 1
145 %addr.b.1 = getelementptr i16, i16* %b, i32 1
146 %ld.a.0 = load i16, i16* %a
147 %sext.a.0 = sext i16 %ld.a.0 to i32
148 %ld.b.0 = load i16, i16* %b
149 %ld.a.1 = load i16, i16* %addr.a.1
150 %ld.b.1 = load i16, i16* %addr.b.1
151 %sext.a.1 = sext i16 %ld.a.1 to i32
152 %sext.b.1 = sext i16 %ld.b.1 to i32
153 %sext.b.0 = sext i16 %ld.b.0 to i32
154 %mul.0 = mul i32 %sext.a.0, %sext.b.0
155 %mul.1 = mul i32 %sext.a.1, %sext.b.1
156 %addr.a.2 = getelementptr i16, i16* %a, i32 2
157 %addr.b.2 = getelementptr i16, i16* %b, i32 2
158 %addr.a.3 = getelementptr i16, i16* %a, i32 3
159 %ld.a.2 = load i16, i16* %addr.a.2
160 %ld.b.2 = load i16, i16* %addr.b.2
161 %ld.a.3 = load i16, i16* %addr.a.3
162 %sext.a.2 = sext i16 %ld.a.2 to i32
163 %sext.b.2 = sext i16 %ld.b.2 to i32
164 %sext.a.3 = sext i16 %ld.a.3 to i32
165 %mul.2 = mul i32 %sext.b.2, %sext.a.2
166 %mul.3 = mul i32 %sext.b.1, %sext.a.3
167 %add = add i32 %mul.0, %mul.1
168 %add.1 = add i32 %mul.2, %mul.3
169 %add.2 = add i32 %add.1, %add
170 %res = add i32 %add.2, %acc
171 ret i32 %res
172}