blob: cda1da825f9424e5c5056f625e74c36885e4ea08 [file] [log] [blame]
Matt Arsenault49169a92019-07-15 17:50:31 +00001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
3; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
4
5define i16 @mul_i16(i16 %lhs, i16 %rhs) {
6; SI-LABEL: @mul_i16(
7; SI-NEXT: [[TMP1:%.*]] = zext i16 [[LHS:%.*]] to i32
8; SI-NEXT: [[TMP2:%.*]] = zext i16 [[RHS:%.*]] to i32
9; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
10; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
11; SI-NEXT: ret i16 [[TMP4]]
12;
13; VI-LABEL: @mul_i16(
14; VI-NEXT: [[MUL:%.*]] = mul i16 [[LHS:%.*]], [[RHS:%.*]]
15; VI-NEXT: ret i16 [[MUL]]
16;
17 %mul = mul i16 %lhs, %rhs
18 ret i16 %mul
19}
20
21define i32 @smul24_i32(i32 %lhs, i32 %rhs) {
22; SI-LABEL: @smul24_i32(
23; SI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8
24; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
25; SI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
26; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
27; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
28; SI-NEXT: ret i32 [[TMP1]]
29;
30; VI-LABEL: @smul24_i32(
31; VI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8
32; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
33; VI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
34; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 8
35; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
36; VI-NEXT: ret i32 [[TMP1]]
37;
38 %shl.lhs = shl i32 %lhs, 8
39 %lhs24 = ashr i32 %shl.lhs, 8
40 %lshr.rhs = shl i32 %rhs, 8
41 %rhs24 = ashr i32 %lhs, 8
42 %mul = mul i32 %lhs24, %rhs24
43 ret i32 %mul
44}
45
46define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
47; SI-LABEL: @smul24_v2i32(
48; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
49; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
50; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
51; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
52; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
53; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
54; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
55; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
56; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
57; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
58; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
59; SI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
60; SI-NEXT: ret <2 x i32> [[TMP8]]
61;
62; VI-LABEL: @smul24_v2i32(
63; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
64; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
65; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
66; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
67; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
68; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
69; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
70; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
71; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
72; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
73; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
74; VI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
75; VI-NEXT: ret <2 x i32> [[TMP8]]
76;
77 %shl.lhs = shl <2 x i32> %lhs, <i32 8, i32 8>
78 %lhs24 = ashr <2 x i32> %shl.lhs, <i32 8, i32 8>
79 %lshr.rhs = shl <2 x i32> %rhs, <i32 8, i32 8>
80 %rhs24 = ashr <2 x i32> %lhs, <i32 8, i32 8>
81 %mul = mul <2 x i32> %lhs24, %rhs24
82 ret <2 x i32> %mul
83}
84
85define i32 @umul24_i32(i32 %lhs, i32 %rhs) {
86; SI-LABEL: @umul24_i32(
87; SI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
88; SI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
89; SI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
90; SI-NEXT: ret i32 [[TMP1]]
91;
92; VI-LABEL: @umul24_i32(
93; VI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
94; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
95; VI-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
96; VI-NEXT: ret i32 [[TMP1]]
97;
98 %lhs24 = and i32 %lhs, 16777215
99 %rhs24 = and i32 %rhs, 16777215
100 %mul = mul i32 %lhs24, %rhs24
101 ret i32 %mul
102}
103
104define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
105; SI-LABEL: @umul24_v2i32(
106; SI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
107; SI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
108; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
109; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
110; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
111; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
112; SI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
113; SI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
114; SI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
115; SI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
116; SI-NEXT: ret <2 x i32> [[TMP8]]
117;
118; VI-LABEL: @umul24_v2i32(
119; VI-NEXT: [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
120; VI-NEXT: [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
121; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
122; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
123; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
124; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
125; VI-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
126; VI-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
127; VI-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
128; VI-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
129; VI-NEXT: ret <2 x i32> [[TMP8]]
130;
131 %lhs24 = and <2 x i32> %lhs, <i32 16777215, i32 16777215>
132 %rhs24 = and <2 x i32> %rhs, <i32 16777215, i32 16777215>
133 %mul = mul <2 x i32> %lhs24, %rhs24
134 ret <2 x i32> %mul
135}
136
137define i64 @smul24_i64(i64 %lhs, i64 %rhs) {
138; SI-LABEL: @smul24_i64(
139; SI-NEXT: [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40
140; SI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
141; SI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40
142; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40
143; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
144; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
145; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
146; SI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
147; SI-NEXT: ret i64 [[TMP4]]
148;
149; VI-LABEL: @smul24_i64(
150; VI-NEXT: [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40
151; VI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
152; VI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40
153; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40
154; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
155; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
156; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
157; VI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
158; VI-NEXT: ret i64 [[TMP4]]
159;
160 %shl.lhs = shl i64 %lhs, 40
161 %lhs24 = ashr i64 %shl.lhs, 40
162 %lshr.rhs = shl i64 %rhs, 40
163 %rhs24 = ashr i64 %lhs, 40
164 %mul = mul i64 %lhs24, %rhs24
165 ret i64 %mul
166}
167
168define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
169; SI-LABEL: @umul24_i64(
170; SI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
171; SI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
172; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
173; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
174; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
175; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
176; SI-NEXT: ret i64 [[TMP4]]
177;
178; VI-LABEL: @umul24_i64(
179; VI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
180; VI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
181; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
182; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
183; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
184; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
185; VI-NEXT: ret i64 [[TMP4]]
186;
187 %lhs24 = and i64 %lhs, 16777215
188 %rhs24 = and i64 %rhs, 16777215
189 %mul = mul i64 %lhs24, %rhs24
190 ret i64 %mul
191}
192
193define i31 @smul24_i31(i31 %lhs, i31 %rhs) {
194; SI-LABEL: @smul24_i31(
195; SI-NEXT: [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7
196; SI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
197; SI-NEXT: [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7
198; SI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7
199; SI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
200; SI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
201; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
202; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
203; SI-NEXT: ret i31 [[TMP4]]
204;
205; VI-LABEL: @smul24_i31(
206; VI-NEXT: [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7
207; VI-NEXT: [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
208; VI-NEXT: [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7
209; VI-NEXT: [[RHS24:%.*]] = ashr i31 [[LHS]], 7
210; VI-NEXT: [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
211; VI-NEXT: [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
212; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
213; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
214; VI-NEXT: ret i31 [[TMP4]]
215;
216 %shl.lhs = shl i31 %lhs, 7
217 %lhs24 = ashr i31 %shl.lhs, 7
218 %lshr.rhs = shl i31 %rhs, 7
219 %rhs24 = ashr i31 %lhs, 7
220 %mul = mul i31 %lhs24, %rhs24
221 ret i31 %mul
222}
223
224define i31 @umul24_i31(i31 %lhs, i31 %rhs) {
225; SI-LABEL: @umul24_i31(
226; SI-NEXT: [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215
227; SI-NEXT: [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215
228; SI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
229; SI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
230; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
231; SI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
232; SI-NEXT: ret i31 [[TMP4]]
233;
234; VI-LABEL: @umul24_i31(
235; VI-NEXT: [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215
236; VI-NEXT: [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215
237; VI-NEXT: [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
238; VI-NEXT: [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
239; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
240; VI-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
241; VI-NEXT: ret i31 [[TMP4]]
242;
243 %lhs24 = and i31 %lhs, 16777215
244 %rhs24 = and i31 %rhs, 16777215
245 %mul = mul i31 %lhs24, %rhs24
246 ret i31 %mul
247}
248
249define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
250; SI-LABEL: @umul24_v2i31(
251; SI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], <i31 16777215, i31 16777215>
252; SI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], <i31 16777215, i31 16777215>
253; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
254; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
255; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
256; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
257; SI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32
258; SI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32
259; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])
260; SI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
261; SI-NEXT: [[TMP9:%.*]] = zext i31 [[TMP2]] to i32
262; SI-NEXT: [[TMP10:%.*]] = zext i31 [[TMP4]] to i32
263; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
264; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
265; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
266; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
267; SI-NEXT: ret <2 x i31> [[TMP14]]
268;
269; VI-LABEL: @umul24_v2i31(
270; VI-NEXT: [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], <i31 16777215, i31 16777215>
271; VI-NEXT: [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], <i31 16777215, i31 16777215>
272; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
273; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
274; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
275; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
276; VI-NEXT: [[TMP5:%.*]] = zext i31 [[TMP1]] to i32
277; VI-NEXT: [[TMP6:%.*]] = zext i31 [[TMP3]] to i32
278; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])
279; VI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
280; VI-NEXT: [[TMP9:%.*]] = zext i31 [[TMP2]] to i32
281; VI-NEXT: [[TMP10:%.*]] = zext i31 [[TMP4]] to i32
282; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
283; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
284; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
285; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
286; VI-NEXT: ret <2 x i31> [[TMP14]]
287;
288 %lhs24 = and <2 x i31> %lhs, <i31 16777215, i31 16777215>
289 %rhs24 = and <2 x i31> %rhs, <i31 16777215, i31 16777215>
290 %mul = mul <2 x i31> %lhs24, %rhs24
291 ret <2 x i31> %mul
292}
293
294define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
295; SI-LABEL: @smul24_v2i31(
296; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], <i31 8, i31 8>
297; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>
298; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], <i31 8, i31 8>
299; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
300; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
301; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
302; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
303; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
304; SI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32
305; SI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32
306; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
307; SI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
308; SI-NEXT: [[TMP9:%.*]] = sext i31 [[TMP2]] to i32
309; SI-NEXT: [[TMP10:%.*]] = sext i31 [[TMP4]] to i32
310; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
311; SI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
312; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
313; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
314; SI-NEXT: ret <2 x i31> [[TMP14]]
315;
316; VI-LABEL: @smul24_v2i31(
317; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], <i31 8, i31 8>
318; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>
319; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], <i31 8, i31 8>
320; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
321; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
322; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
323; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
324; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
325; VI-NEXT: [[TMP5:%.*]] = sext i31 [[TMP1]] to i32
326; VI-NEXT: [[TMP6:%.*]] = sext i31 [[TMP3]] to i32
327; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
328; VI-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
329; VI-NEXT: [[TMP9:%.*]] = sext i31 [[TMP2]] to i32
330; VI-NEXT: [[TMP10:%.*]] = sext i31 [[TMP4]] to i32
331; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
332; VI-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
333; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
334; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
335; VI-NEXT: ret <2 x i31> [[TMP14]]
336;
337 %shl.lhs = shl <2 x i31> %lhs, <i31 8, i31 8>
338 %lhs24 = ashr <2 x i31> %shl.lhs, <i31 8, i31 8>
339 %lshr.rhs = shl <2 x i31> %rhs, <i31 8, i31 8>
340 %rhs24 = ashr <2 x i31> %lhs, <i31 8, i31 8>
341 %mul = mul <2 x i31> %lhs24, %rhs24
342 ret <2 x i31> %mul
343}
344
345define i33 @smul24_i33(i33 %lhs, i33 %rhs) {
346; SI-LABEL: @smul24_i33(
347; SI-NEXT: [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9
348; SI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
349; SI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9
350; SI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9
351; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
352; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
353; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
354; SI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i33
355; SI-NEXT: ret i33 [[TMP4]]
356;
357; VI-LABEL: @smul24_i33(
358; VI-NEXT: [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9
359; VI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
360; VI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9
361; VI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9
362; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
363; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
364; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
365; VI-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i33
366; VI-NEXT: ret i33 [[TMP4]]
367;
368 %shl.lhs = shl i33 %lhs, 9
369 %lhs24 = ashr i33 %shl.lhs, 9
370 %lshr.rhs = shl i33 %rhs, 9
371 %rhs24 = ashr i33 %lhs, 9
372 %mul = mul i33 %lhs24, %rhs24
373 ret i33 %mul
374}
375
376define i33 @umul24_i33(i33 %lhs, i33 %rhs) {
377; SI-LABEL: @umul24_i33(
378; SI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215
379; SI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215
380; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
381; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
382; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
383; SI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i33
384; SI-NEXT: ret i33 [[TMP4]]
385;
386; VI-LABEL: @umul24_i33(
387; VI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215
388; VI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215
389; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
390; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
391; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
392; VI-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i33
393; VI-NEXT: ret i33 [[TMP4]]
394;
395 %lhs24 = and i33 %lhs, 16777215
396 %rhs24 = and i33 %rhs, 16777215
397 %mul = mul i33 %lhs24, %rhs24
398 ret i33 %mul
399}
400
401define i32 @smul25_i32(i32 %lhs, i32 %rhs) {
402; SI-LABEL: @smul25_i32(
403; SI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7
404; SI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7
405; SI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7
406; SI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 7
407; SI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
408; SI-NEXT: ret i32 [[MUL]]
409;
410; VI-LABEL: @smul25_i32(
411; VI-NEXT: [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7
412; VI-NEXT: [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7
413; VI-NEXT: [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7
414; VI-NEXT: [[RHS24:%.*]] = ashr i32 [[LHS]], 7
415; VI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
416; VI-NEXT: ret i32 [[MUL]]
417;
418 %shl.lhs = shl i32 %lhs, 7
419 %lhs24 = ashr i32 %shl.lhs, 7
420 %lshr.rhs = shl i32 %rhs, 7
421 %rhs24 = ashr i32 %lhs, 7
422 %mul = mul i32 %lhs24, %rhs24
423 ret i32 %mul
424}
425
426define i32 @umul25_i32(i32 %lhs, i32 %rhs) {
427; SI-LABEL: @umul25_i32(
428; SI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431
429; SI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431
430; SI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
431; SI-NEXT: ret i32 [[MUL]]
432;
433; VI-LABEL: @umul25_i32(
434; VI-NEXT: [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431
435; VI-NEXT: [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431
436; VI-NEXT: [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
437; VI-NEXT: ret i32 [[MUL]]
438;
439 %lhs24 = and i32 %lhs, 33554431
440 %rhs24 = and i32 %rhs, 33554431
441 %mul = mul i32 %lhs24, %rhs24
442 ret i32 %mul
443}
444
445define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) {
446; SI-LABEL: @smul24_v2i33(
447; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], <i33 9, i33 9>
448; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
449; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], <i33 9, i33 9>
450; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
451; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0
452; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1
453; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0
454; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1
455; SI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32
456; SI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32
457; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
458; SI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33
459; SI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32
460; SI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32
461; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
462; SI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33
463; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0
464; SI-NEXT: [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
465; SI-NEXT: ret <2 x i33> [[TMP14]]
466;
467; VI-LABEL: @smul24_v2i33(
468; VI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], <i33 9, i33 9>
469; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
470; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], <i33 9, i33 9>
471; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
472; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0
473; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1
474; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0
475; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1
476; VI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32
477; VI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32
478; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
479; VI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33
480; VI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32
481; VI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32
482; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
483; VI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33
484; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0
485; VI-NEXT: [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
486; VI-NEXT: ret <2 x i33> [[TMP14]]
487;
488 %shl.lhs = shl <2 x i33> %lhs, <i33 9, i33 9>
489 %lhs24 = ashr <2 x i33> %shl.lhs, <i33 9, i33 9>
490 %lshr.rhs = shl <2 x i33> %rhs, <i33 9, i33 9>
491 %rhs24 = ashr <2 x i33> %lhs, <i33 9, i33 9>
492 %mul = mul <2 x i33> %lhs24, %rhs24
493 ret <2 x i33> %mul
494}