blob: 2dfe1570ab08c7ff23824c397ae8b3f37100363a [file] [log] [blame]
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001// Pattern fragment that combines the value type and the register class
2// into a single parameter.
3// The pat frags in the definitions below need to have a named register,
4// otherwise i32 will be assumed regardless of the register class. The
5// name of the register does not matter.
6def I1 : PatLeaf<(i1 PredRegs:$R)>;
7def I32 : PatLeaf<(i32 IntRegs:$R)>;
8def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
9def F32 : PatLeaf<(f32 IntRegs:$R)>;
10def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
11
12// Pattern fragments to extract the low and high subregisters from a
13// 64-bit value.
14def LoReg: OutPatFrag<(ops node:$Rs),
15 (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
16def HiReg: OutPatFrag<(ops node:$Rs),
17 (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
18
19def orisadd: PatFrag<(ops node:$Addr, node:$off),
20 (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>;
21
22// SDNode for converting immediate C to C-1.
23def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
24 // Return the byte immediate const-1 as an SDNode.
25 int32_t imm = N->getSExtValue();
26 return XformSToSM1Imm(imm, SDLoc(N));
27}]>;
28
29// SDNode for converting immediate C to C-2.
30def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
31 // Return the byte immediate const-2 as an SDNode.
32 int32_t imm = N->getSExtValue();
33 return XformSToSM2Imm(imm, SDLoc(N));
34}]>;
35
36// SDNode for converting immediate C to C-3.
37def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
38 // Return the byte immediate const-3 as an SDNode.
39 int32_t imm = N->getSExtValue();
40 return XformSToSM3Imm(imm, SDLoc(N));
41}]>;
42
43// SDNode for converting immediate C to C-1.
44def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
45 // Return the byte immediate const-1 as an SDNode.
46 uint32_t imm = N->getZExtValue();
47 return XformUToUM1Imm(imm, SDLoc(N));
48}]>;
49
50class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
51 : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
52 (MI IntRegs:$src1, ImmPred:$src2)>;
53
54def : T_CMP_pat <C2_cmpeqi, seteq, s10_0ImmPred>;
55def : T_CMP_pat <C2_cmpgti, setgt, s10_0ImmPred>;
56def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>;
57
58def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
59 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
60
61def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
62def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
63
64// Pats for instruction selection.
65class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
66 : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
67 (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
68
69def: BinOp32_pat<add, A2_add, i32>;
70def: BinOp32_pat<and, A2_and, i32>;
71def: BinOp32_pat<or, A2_or, i32>;
72def: BinOp32_pat<sub, A2_sub, i32>;
73def: BinOp32_pat<xor, A2_xor, i32>;
74
75def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
76def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
77
78// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
79// that reverse the order of the operands.
80class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
81
82// Pats for compares. They use PatFrags as operands, not SDNodes,
83// since seteq/setgt/etc. are defined as ParFrags.
84class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
85 : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
86 (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
87
88def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
89def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
90def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
91
92def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
93def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
94
95def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
96 (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
97
98def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)),
99 (i32 (A2_addi I32:$Rs, imm:$s16))>;
100
101def: Pat<(or (i32 IntRegs:$Rs), s32_0ImmPred:$s10),
102 (A2_orir IntRegs:$Rs, imm:$s10)>;
103def: Pat<(and (i32 IntRegs:$Rs), s32_0ImmPred:$s10),
104 (A2_andir IntRegs:$Rs, imm:$s10)>;
105
106def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
107 (A2_subri imm:$s10, IntRegs:$Rs)>;
108
109// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
110def: Pat<(not (i32 IntRegs:$src1)),
111 (A2_subri -1, IntRegs:$src1)>;
112
113def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
114def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
115
116def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)),
117 (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
118
119def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)),
120 (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
121
122def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)),
123 (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
124
125def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
126def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
127def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
128def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
129
130class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
131 : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
132 (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
133
134def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
135def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
136def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
137def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
138def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
139def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
140def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
141def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
142
143// Add halfword.
144def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
145 (A2_addh_l16_ll I32:$src1, I32:$src2)>;
146
147def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
148 (A2_addh_l16_hl I32:$src1, I32:$src2)>;
149
150def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
151 (A2_addh_h16_ll I32:$src1, I32:$src2)>;
152
153// Subtract halfword.
154def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
155 (A2_subh_l16_ll I32:$src1, I32:$src2)>;
156
157def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
158 (A2_subh_h16_ll I32:$src1, I32:$src2)>;
159
160// Here, depending on the operand being selected, we'll either generate a
161// min or max instruction.
162// Ex:
163// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
164// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
165// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
166// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
167
168multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
169 InstHexagon Inst, InstHexagon SwapInst> {
170 def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
171 (VT RC:$src1), (VT RC:$src2)),
172 (Inst RC:$src1, RC:$src2)>;
173 def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
174 (VT RC:$src2), (VT RC:$src1)),
175 (SwapInst RC:$src1, RC:$src2)>;
176}
177
178
179multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
180 defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
181
182 def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
183 (i32 PositiveHalfWord:$src2))),
184 (i32 PositiveHalfWord:$src1),
185 (i32 PositiveHalfWord:$src2))), i16),
186 (Inst IntRegs:$src1, IntRegs:$src2)>;
187
188 def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
189 (i32 PositiveHalfWord:$src2))),
190 (i32 PositiveHalfWord:$src2),
191 (i32 PositiveHalfWord:$src1))), i16),
192 (SwapInst IntRegs:$src1, IntRegs:$src2)>;
193}
194
195let AddedComplexity = 200 in {
196 defm: MinMax_pats<setge, A2_max, A2_min>;
197 defm: MinMax_pats<setgt, A2_max, A2_min>;
198 defm: MinMax_pats<setle, A2_min, A2_max>;
199 defm: MinMax_pats<setlt, A2_min, A2_max>;
200 defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
201 defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
202 defm: MinMax_pats<setule, A2_minu, A2_maxu>;
203 defm: MinMax_pats<setult, A2_minu, A2_maxu>;
204}
205
206class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
207 : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
208 (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
209
210def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
211def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
212def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
213def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
214def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
215
216def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
217def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
218
219def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
220def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
221def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
222
223def: Pat<(i1 (not (i1 PredRegs:$Ps))),
224 (C2_not PredRegs:$Ps)>;
225
226def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
227def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
228def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
229def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
230def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
231
232def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
233 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
234def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
235
236def: Pat<(br bb:$dst),
237 (J2_jump brtarget:$dst)>;
238def: Pat<(retflag),
239 (PS_jmpret (i32 R31))>;
240def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
241 (J2_jumpt PredRegs:$src1, bb:$offset)>;
242
243def: Pat<(eh_return),
244 (EH_RETURN_JMPR (i32 R31))>;
245def: Pat<(brind (i32 IntRegs:$dst)),
246 (J2_jumpr IntRegs:$dst)>;
247
248// Patterns to select load-indexed (i.e. load from base+offset).
249multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
250 InstHexagon MI> {
251 def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
252 def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
253 (VT (MI AddrFI:$fi, imm:$Off))>;
254 def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))),
255 (VT (MI AddrFI:$fi, imm:$Off))>;
256 def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
257 (VT (MI IntRegs:$Rs, imm:$Off))>;
258 def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
259}
260
261let AddedComplexity = 20 in {
262 defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
263 defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
264 defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
265 defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
266 defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
267 defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
268
269 defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
270 defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
271 defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
272 defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
273 defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
274 defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
275 defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
276 defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
277 // No sextloadi1.
278}
279
280// Sign-extending loads of i1 need to replicate the lowest bit throughout
281// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
282// do the trick.
283let AddedComplexity = 20 in
284def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
285 (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
286
287def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
288def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
289def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
290
291def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8),
292 (M2_mpysip IntRegs:$Rs, imm:$u8)>;
293def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)),
294 (M2_mpysin IntRegs:$Rs, imm:$u8)>;
295def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2),
296 (M2_mpysmi IntRegs:$src1, imm:$src2)>;
297def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
298 (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
299def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
300 (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
301def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
302 (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
303def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
304 (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
305
306class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
307 PatLeaf ImmPred>
308 : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
309 (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
310
311class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
312 : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
313 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
314
315def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
316def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>;
317
318def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>;
319def : T_MType_acc_pat2 <M2_nacci, add, sub>;
320
321def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
322def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
323def: T_MType_acc_pat2 <M4_or_and, and, or>;
324def: T_MType_acc_pat2 <M4_and_and, and, and>;
325def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
326def: T_MType_acc_pat2 <M4_or_or, or, or>;
327def: T_MType_acc_pat2 <M4_and_or, or, and>;
328def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
329
330class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
331 : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
332 (not IntRegs:$src3)))),
333 (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
334
335def: T_MType_acc_pat3 <M4_or_andn, and, or>;
336def: T_MType_acc_pat3 <M4_and_andn, and, and>;
337def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
338
339def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
340 (i64 (anyext (i32 IntRegs:$src2))))),
341 (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
342
343def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
344 (i64 (sext (i32 IntRegs:$src2))))),
345 (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
346
347def: Pat<(i64 (mul (is_sext_i32:$src1),
348 (is_sext_i32:$src2))),
349 (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
350
351// Multiply and accumulate, use full result.
352// Rxx[+-]=mpy(Rs,Rt)
353
354def: Pat<(i64 (add (i64 DoubleRegs:$src1),
355 (mul (i64 (sext (i32 IntRegs:$src2))),
356 (i64 (sext (i32 IntRegs:$src3)))))),
357 (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
358
359def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
360 (mul (i64 (sext (i32 IntRegs:$src2))),
361 (i64 (sext (i32 IntRegs:$src3)))))),
362 (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
363
364def: Pat<(i64 (add (i64 DoubleRegs:$src1),
365 (mul (i64 (anyext (i32 IntRegs:$src2))),
366 (i64 (anyext (i32 IntRegs:$src3)))))),
367 (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
368
369def: Pat<(i64 (add (i64 DoubleRegs:$src1),
370 (mul (i64 (zext (i32 IntRegs:$src2))),
371 (i64 (zext (i32 IntRegs:$src3)))))),
372 (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
373
374def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
375 (mul (i64 (anyext (i32 IntRegs:$src2))),
376 (i64 (anyext (i32 IntRegs:$src3)))))),
377 (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
378
379def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
380 (mul (i64 (zext (i32 IntRegs:$src2))),
381 (i64 (zext (i32 IntRegs:$src3)))))),
382 (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
383
384class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
385 InstHexagon MI>
386 : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
387 (MI I32:$src2, imm:$offset, Value:$src1)>;
388
389def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
390def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
391def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
392def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
393
394// Patterns for generating stores, where the address takes different forms:
395// - frameindex,
396// - frameindex + offset,
397// - base + offset,
398// - simple (base address without offset).
399// These would usually be used together (via Storex_pat defined below), but
400// in some cases one may want to apply different properties (such as
401// AddedComplexity) to the individual patterns.
402class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
403 : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
404multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
405 InstHexagon MI> {
406 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
407 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
408 def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
409 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
410}
411multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
412 InstHexagon MI> {
413 def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
414 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
415 def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
416 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
417}
418class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
419 : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
420 (MI IntRegs:$Rs, 0, Value:$Rt)>;
421
422// Patterns for generating stores, where the address takes different forms,
423// and where the value being stored is transformed through the value modifier
424// ValueMod. The address forms are same as above.
425class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
426 InstHexagon MI>
427 : Pat<(Store Value:$Rs, AddrFI:$fi),
428 (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
429multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
430 PatFrag ValueMod, InstHexagon MI> {
431 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
432 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
433 def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
434 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
435}
436multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
437 PatFrag ValueMod, InstHexagon MI> {
438 def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
439 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
440 def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
441 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
442}
443class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
444 InstHexagon MI>
445 : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
446 (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
447
448multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
449 InstHexagon MI> {
450 def: Storex_fi_pat <Store, Value, MI>;
451 defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
452 defm: Storex_add_pat <Store, Value, ImmPred, MI>;
453}
454
455multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
456 PatFrag ValueMod, InstHexagon MI> {
457 def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
458 defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
459 defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
460}
461
462// Regular stores in the DAG have two operands: value and address.
463// Atomic stores also have two, but they are reversed: address, value.
464// To use atomic stores with the patterns, they need to have their operands
465// swapped. This relies on the knowledge that the F.Fragment uses names
466// "ptr" and "val".
467class SwapSt<PatFrag F>
468 : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
469 F.OperandTransform>;
470
471let AddedComplexity = 20 in {
472 defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
473 defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
474 defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
475 defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
476
477 defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
478 defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
479 defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
480 defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
481}
482
483// Simple patterns should be tried with the least priority.
484def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
485def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
486def: Storex_simple_pat<store, I32, S2_storeri_io>;
487def: Storex_simple_pat<store, I64, S2_storerd_io>;
488
489def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
490def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
491def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
492def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
493
494let AddedComplexity = 20 in {
495 defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
496 defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
497 defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
498}
499
500def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
501def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
502def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
503
504def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
505
506def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
507 (i32 (sub 0, (i32 IntRegs:$src))),
508 (i32 IntRegs:$src))),
509 (A2_abs IntRegs:$src)>;
510
511let AddedComplexity = 50 in
512def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
513 (i32 IntRegs:$src)),
514 (sra (i32 IntRegs:$src), (i32 31)))),
515 (A2_abs IntRegs:$src)>;
516
517def: Pat<(sra (i32 IntRegs:$src), u5_0ImmPred:$u5),
518 (S2_asr_i_r IntRegs:$src, imm:$u5)>;
519def: Pat<(srl (i32 IntRegs:$src), u5_0ImmPred:$u5),
520 (S2_lsr_i_r IntRegs:$src, imm:$u5)>;
521def: Pat<(shl (i32 IntRegs:$src), u5_0ImmPred:$u5),
522 (S2_asl_i_r IntRegs:$src, imm:$u5)>;
523
524def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)),
525 (i32 1))),
526 (i32 1))),
527 (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>;
528
529def : Pat<(not (i64 DoubleRegs:$src1)),
530 (A2_notp DoubleRegs:$src1)>;
531
532// Count leading zeros.
533def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
534def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
535
536// Count trailing zeros: 32-bit.
537def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
538
539// Count leading ones.
540def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
541def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
542
543// Count trailing ones: 32-bit.
544def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
545
546def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5_0ImmPred:$u5)))),
547 (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
548def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))),
549 (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
550def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))),
551 (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
552def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
553 (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
554def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
555 (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
556def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
557 (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
558
559let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
560 def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
561 (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
562 def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
563 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
564 def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
565 (S2_tstbit_i IntRegs:$Rs, 0)>;
566 def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
567 (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
568}
569
570let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
571 def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6_0ImmPred:$u6), 0)),
572 (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>;
573 def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
574 (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
575}
576
577let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
578def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
579 (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
580
581def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
582 (i32 8)),
583 (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
584 (i32 16)),
585 (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
586 (zextloadi8 (i32 IntRegs:$b))),
587 (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
588
589// Patterns for loads of i1:
590def: Pat<(i1 (load AddrFI:$fi)),
591 (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
592def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32_0ImmPred:$Off))),
593 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
594def: Pat<(i1 (load (i32 IntRegs:$Rs))),
595 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
596
597def I1toI32: OutPatFrag<(ops node:$Rs),
598 (C2_muxii (i1 $Rs), 1, 0)>;
599
600def I32toI1: OutPatFrag<(ops node:$Rs),
601 (i1 (C2_tfrrp (i32 $Rs)))>;
602
603defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
604def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
605
606def: Pat<(sra (i64 DoubleRegs:$src), u6_0ImmPred:$u6),
607 (S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
608def: Pat<(srl (i64 DoubleRegs:$src), u6_0ImmPred:$u6),
609 (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>;
610def: Pat<(shl (i64 DoubleRegs:$src), u6_0ImmPred:$u6),
611 (S2_asl_i_p DoubleRegs:$src, imm:$u6)>;
612
613let AddedComplexity = 100 in
614def: Pat<(add (i32 IntRegs:$Rt), (shl (i32 IntRegs:$Rs), u3_0ImmPred:$u3)),
615 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
616
617def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
618def: Pat<(HexagonBARRIER), (Y2_barrier)>;
619
620def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
621 (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>;
622
623
624// Support for generating global address.
625// Taken from X86InstrInfo.td.
626def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
627 SDTCisVT<1, i32>,
628 SDTCisPtrTy<0>]>;
629def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
630def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
631
632// Map TLS addressses to A2_tfrsi.
633def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
634def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
635
636def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
637def: Pat<(i1 0), (PS_false)>;
638def: Pat<(i1 1), (PS_true)>;
639
640// Pseudo instructions.
641def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
642def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
643 SDTCisVT<1, i32> ]>;
644
645def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
646 [SDNPHasChain, SDNPOutGlue]>;
647def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
648 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
649
650def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
651
652// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
653// Optional Flag and Variable Arguments.
654// Its 1 Operand has pointer type.
655def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
656 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
657
658
659def: Pat<(callseq_start timm:$amt),
660 (ADJCALLSTACKDOWN imm:$amt)>;
661def: Pat<(callseq_end timm:$amt1, timm:$amt2),
662 (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;
663
664//Tail calls.
665def: Pat<(HexagonTCRet tglobaladdr:$dst),
666 (PS_tailcall_i tglobaladdr:$dst)>;
667def: Pat<(HexagonTCRet texternalsym:$dst),
668 (PS_tailcall_i texternalsym:$dst)>;
669def: Pat<(HexagonTCRet I32:$dst),
670 (PS_tailcall_r I32:$dst)>;
671
672// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
673def: Pat<(and (i32 IntRegs:$src1), 65535),
674 (A2_zxth IntRegs:$src1)>;
675
676// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
677def: Pat<(and (i32 IntRegs:$src1), 255),
678 (A2_zxtb IntRegs:$src1)>;
679
680// Map Add(p1, true) to p1 = not(p1).
681// Add(p1, false) should never be produced,
682// if it does, it got to be mapped to NOOP.
683def: Pat<(add (i1 PredRegs:$src1), -1),
684 (C2_not PredRegs:$src1)>;
685
686// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
687def: Pat<(select (not (i1 PredRegs:$src1)), s8_0ImmPred:$src2, s32_0ImmPred:$src3),
688 (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>;
689
690// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
691// => r0 = C2_muxir(p0, r1, #i)
692def: Pat<(select (not (i1 PredRegs:$src1)), s32_0ImmPred:$src2,
693 (i32 IntRegs:$src3)),
694 (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>;
695
696// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
697// => r0 = C2_muxri (p0, #i, r1)
698def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32_0ImmPred:$src3),
699 (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>;
700
701// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
702def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
703 (J2_jumpf PredRegs:$src1, bb:$offset)>;
704
705// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
706def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
707 (A2_sxtw (LoReg DoubleRegs:$src1))>;
708
709// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
710def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
711 (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
712
713// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
714def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
715 (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
716
717// We want to prevent emitting pnot's as much as possible.
718// Map brcond with an unsupported setcc to a J2_jumpf.
719def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
720 bb:$offset),
721 (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
722 bb:$offset)>;
723
724def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10_0ImmPred:$src2)),
725 bb:$offset),
726 (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10_0ImmPred:$src2), bb:$offset)>;
727
728def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
729 (J2_jumpf PredRegs:$src1, bb:$offset)>;
730
731def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
732 (J2_jumpt PredRegs:$src1, bb:$offset)>;
733
734// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
735def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8_0ImmPred:$src2)), bb:$offset),
736 (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8_0ImmPred:$src2)),
737 bb:$offset)>;
738
739// Map from a 64-bit select to an emulated 64-bit mux.
740// Hexagon does not support 64-bit MUXes; so emulate with combines.
741def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
742 (i64 DoubleRegs:$src3)),
743 (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
744 (HiReg DoubleRegs:$src3)),
745 (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
746 (LoReg DoubleRegs:$src3)))>;
747
748// Map from a 1-bit select to logical ops.
749// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
750def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
751 (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
752 (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
753
754// Map for truncating from 64 immediates to 32 bit immediates.
755def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
756 (LoReg DoubleRegs:$src)>;
757
758// Map for truncating from i64 immediates to i1 bit immediates.
759def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
760 (C2_tfrrp (LoReg DoubleRegs:$src))>;
761
762// rs <= rt -> !(rs > rt).
763let AddedComplexity = 30 in
764def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
765 (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
766
767// rs <= rt -> !(rs > rt).
768def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
769 (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
770
771// Rss <= Rtt -> !(Rss > Rtt).
772def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
773 (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
774
775// Map cmpne -> cmpeq.
776// Hexagon_TODO: We should improve on this.
777// rs != rt -> !(rs == rt).
778let AddedComplexity = 30 in
779def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
780 (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>;
781
782// Convert setne back to xor for hexagon since we compute w/ pred registers.
783def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
784 (C2_xor PredRegs:$src1, PredRegs:$src2)>;
785
786// Map cmpne(Rss) -> !cmpew(Rss).
787// rs != rt -> !(rs == rt).
788def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
789 (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
790
791// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
792// rs >= rt -> !(rt > rs).
793def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
794 (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
795
796// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
797let AddedComplexity = 30 in
798def: Pat<(i1 (setge (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
799 (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
800
801// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
802// rss >= rtt -> !(rtt > rss).
803def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
804 (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
805
806// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
807// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
808// rs < rt -> !(rs >= rt).
809let AddedComplexity = 30 in
810def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
811 (C2_not (C2_cmpgti IntRegs:$src1,
812 (DEC_CONST_SIGNED s32_0ImmPred:$src2)))>;
813
814// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
815def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
816 (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
817
818// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
819def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32_0ImmPred:$src2)),
820 (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32_0ImmPred:$src2))>;
821
822// Generate cmpgtu(Rs, #u9)
823def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32_0ImmPred:$src2)),
824 (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>;
825
826// Map from Rs >= Rt -> !(Rt > Rs).
827// rs >= rt -> !(rt > rs).
828def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
829 (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
830
831// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
832// Map from (Rs <= Rt) -> !(Rs > Rt).
833def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
834 (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
835
836// Sign extends.
837// i1 -> i32
838def: Pat<(i32 (sext (i1 PredRegs:$src1))),
839 (C2_muxii PredRegs:$src1, -1, 0)>;
840
841// i1 -> i64
842def: Pat<(i64 (sext (i1 PredRegs:$src1))),
843 (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
844
845// Zero extends.
846// i1 -> i32
847def: Pat<(i32 (zext (i1 PredRegs:$src1))),
848 (C2_muxii PredRegs:$src1, 1, 0)>;
849
850// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
851def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
852 (C2_muxii PredRegs:$src1, 1, 0)>;
853
854// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
855def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
856 (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
857
858// Clear the sign bit in a 64-bit register.
859def ClearSign : OutPatFrag<(ops node:$Rss),
860 (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>;
861
862def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
863 (A2_addp
864 (M2_dpmpyuu_acc_s0
865 (S2_lsr_i_p
866 (A2_addp
867 (M2_dpmpyuu_acc_s0
868 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
869 (HiReg $Rss),
870 (LoReg $Rtt)),
871 (A2_combinew (A2_tfrsi 0),
872 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
873 32),
874 (HiReg $Rss),
875 (HiReg $Rtt)),
876 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
877
878// Multiply 64-bit unsigned and use upper result.
879def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
880
881// Multiply 64-bit signed and use upper result.
882//
883// For two signed 64-bit integers A and B, let A' and B' denote A and B
884// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
885// sign bit of A (and identically for B). With this notation, the signed
886// product A*B can be written as:
887// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
888// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
889// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
890// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
891
892def : Pat <(mulhs I64:$Rss, I64:$Rtt),
893 (A2_subp
894 (MulHU $Rss, $Rtt),
895 (A2_addp
896 (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
897 (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
898
899// Hexagon specific ISD nodes.
900def SDTHexagonALLOCA : SDTypeProfile<1, 2,
901 [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
902def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
903 [SDNPHasChain]>;
904
905
906def: Pat<(HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)),
907 (PS_alloca IntRegs:$Rs, imm:$A)>;
908
909def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
910def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
911
912def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>;
913def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>;
914
915let AddedComplexity = 100 in
916def: Pat<(add (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
917def: Pat<(sub (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
918def: Pat<(and (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
919def: Pat<(or (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
920
921let AddedComplexity = 100 in
922def: Pat<(add (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
923def: Pat<(sub (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
924def: Pat<(and (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
925def: Pat<(or (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
926
927let AddedComplexity = 100 in
928def: Pat<(add (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
929def: Pat<(sub (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
930def: Pat<(and (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
931def: Pat<(or (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
932let AddedComplexity = 100 in
933def: Pat<(xor (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
934
935let AddedComplexity = 100 in
936def: Pat<(add (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
937def: Pat<(sub (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
938def: Pat<(and (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
939def: Pat<(or (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
940let AddedComplexity = 100 in
941def: Pat<(xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
942
943let AddedComplexity = 100 in
944def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
945def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
946def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
947def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
948let AddedComplexity = 100 in
949def: Pat<(xor (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
950
951let AddedComplexity = 100 in
952def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
953def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
954def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
955def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
956let AddedComplexity = 100 in
957def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
958
959let AddedComplexity = 100 in
960def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
961def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
962def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
963def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
964let AddedComplexity = 100 in
965def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
966def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
967def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
968def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
969def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
970
971let AddedComplexity = 100 in
972def: Pat<(add (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
973def: Pat<(sub (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
974def: Pat<(and (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
975def: Pat<(or (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
976let AddedComplexity = 100 in
977def: Pat<(add (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
978def: Pat<(sub (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
979def: Pat<(and (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
980def: Pat<(or (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
981def: Pat<(xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
982
983let AddedComplexity = 100 in
984def: Pat<(add (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
985def: Pat<(sub (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
986def: Pat<(and (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
987def: Pat<(or (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
988let AddedComplexity = 100 in
989def: Pat<(add (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
990def: Pat<(sub (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
991def: Pat<(and (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
992def: Pat<(or (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
993def: Pat<(xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
994
995let AddedComplexity = 100 in
996def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
997def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
998def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
999def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1000let AddedComplexity = 100 in
1001def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1002def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1003def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1004def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1005def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1006
1007def: Pat<(sra (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1008def: Pat<(srl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1009def: Pat<(shl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1010def: Pat<(shl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1011
1012def: Pat<(sra (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>;
1013def: Pat<(srl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>;
1014def: Pat<(shl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>;
1015def: Pat<(shl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>;
1016
1017def SDTHexagonINSERT:
1018 SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1019 SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
1020def SDTHexagonINSERTRP:
1021 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1022 SDTCisInt<0>, SDTCisVT<3, i64>]>;
1023
1024def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
1025def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
1026
1027def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
1028 (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>;
1029def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
1030 (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>;
1031def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
1032 (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
1033def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
1034 (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
1035
1036let AddedComplexity = 100 in
1037def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
1038 (i32 (extloadi8 (add I32:$b, 3))),
1039 24, 8),
1040 (i32 16)),
1041 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
1042 (zextloadi8 I32:$b)),
1043 (A2_swiz (L2_loadri_io I32:$b, 0))>;
1044
1045def SDTHexagonEXTRACTU:
1046 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
1047 SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
1048def SDTHexagonEXTRACTURP:
1049 SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
1050 SDTCisVT<2, i64>]>;
1051
1052def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
1053def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
1054
1055def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
1056 (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>;
1057def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
1058 (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>;
1059def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
1060 (S2_extractu_rp I32:$src1, I64:$src2)>;
1061def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
1062 (S2_extractup_rp I64:$src1, I64:$src2)>;
1063
1064// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
1065def: Pat<(mul (i32 IntRegs:$src1), (ineg n8_0ImmPred:$src2)),
1066 (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>;
1067
1068multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
1069 defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
1070}
1071
1072def: Pat<(add (i64 (sext (i32 IntRegs:$Rs))), (i64 DoubleRegs:$Rt)),
1073 (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>;
1074
1075let AddedComplexity = 200 in {
1076 defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
1077 defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
1078 defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
1079 defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
1080 defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
1081 defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
1082 defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
1083 defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
1084}
1085
1086def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall,
1087 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
1088
1089def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall,
1090 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
1091
1092
1093// Map call instruction
1094def : Pat<(callv3 I32:$dst),
1095 (J2_callr I32:$dst)>;
1096def : Pat<(callv3 tglobaladdr:$dst),
1097 (J2_call tglobaladdr:$dst)>;
1098def : Pat<(callv3 texternalsym:$dst),
1099 (J2_call texternalsym:$dst)>;
1100def : Pat<(callv3 tglobaltlsaddr:$dst),
1101 (J2_call tglobaltlsaddr:$dst)>;
1102
1103def : Pat<(callv3nr I32:$dst),
1104 (PS_callr_nr I32:$dst)>;
1105def : Pat<(callv3nr tglobaladdr:$dst),
1106 (PS_call_nr tglobaladdr:$dst)>;
1107def : Pat<(callv3nr texternalsym:$dst),
1108 (PS_call_nr texternalsym:$dst)>;
1109
1110
1111def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
1112def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
1113
1114def BITPOS32 : SDNodeXForm<imm, [{
1115 // Return the bit position we will set [0-31].
1116 // As an SDNode.
1117 int32_t imm = N->getSExtValue();
1118 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1119}]>;
1120
1121
1122// Pats for instruction selection.
1123
1124// A class to embed the usual comparison patfrags within a zext to i32.
1125// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
1126// names, or else the frag's "body" won't match the operands.
1127class CmpInReg<PatFrag Op>
1128 : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
1129
1130def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
1131def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
1132
1133def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
1134def: T_cmp32_rr_pat<C4_cmplte, setle, i1>;
1135def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
1136
1137def: T_cmp32_rr_pat<C4_cmplte, RevCmp<setge>, i1>;
1138def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
1139
1140let AddedComplexity = 100 in {
1141 def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
1142 255), 0)),
1143 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
1144 def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
1145 255), 0)),
1146 (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
1147 def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
1148 65535), 0)),
1149 (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
1150 def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
1151 65535), 0)),
1152 (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
1153}
1154
1155def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))),
1156 (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
1157def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))),
1158 (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
1159
1160// Preserve the S2_tstbit_r generation
1161def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
1162 (i32 IntRegs:$src1))), 0)))),
1163 (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
1164
1165// The complexity of the combines involving immediates should be greater
1166// than the complexity of the combine with two registers.
1167let AddedComplexity = 50 in {
1168def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i),
1169 (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>;
1170
1171def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r),
1172 (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>;
1173}
1174
1175// The complexity of the combine with two immediates should be greater than
1176// the complexity of a combine involving a register.
1177let AddedComplexity = 75 in {
1178def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6),
1179 (A4_combineii imm:$s8, imm:$u6)>;
1180def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
1181 (A2_combineii imm:$s8, imm:$S8)>;
1182}
1183
1184
1185def Zext64: OutPatFrag<(ops node:$Rs),
1186 (i64 (A4_combineir 0, (i32 $Rs)))>;
1187def Sext64: OutPatFrag<(ops node:$Rs),
1188 (i64 (A2_sxtw (i32 $Rs)))>;
1189
1190// Patterns to generate indexed loads with different forms of the address:
1191// - frameindex,
1192// - base + offset,
1193// - base (without offset).
1194multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
1195 PatLeaf ImmPred, InstHexagon MI> {
1196 def: Pat<(VT (Load AddrFI:$fi)),
1197 (VT (ValueMod (MI AddrFI:$fi, 0)))>;
1198 def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
1199 (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
1200 def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
1201 (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
1202 def: Pat<(VT (Load (i32 IntRegs:$Rs))),
1203 (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
1204}
1205
1206defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
1207defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
1208defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
1209defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
1210defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
1211defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
1212defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
1213defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
1214
1215// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
1216def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
1217
1218multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
1219 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1220 (HexagonCONST32 tglobaladdr:$src3)))),
1221 (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>;
1222 def : Pat <(VT (ldOp (add IntRegs:$src1,
1223 (HexagonCONST32 tglobaladdr:$src2)))),
1224 (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
1225
1226 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1227 (HexagonCONST32 tconstpool:$src3)))),
1228 (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>;
1229 def : Pat <(VT (ldOp (add IntRegs:$src1,
1230 (HexagonCONST32 tconstpool:$src2)))),
1231 (MI IntRegs:$src1, 0, tconstpool:$src2)>;
1232
1233 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1234 (HexagonCONST32 tjumptable:$src3)))),
1235 (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>;
1236 def : Pat <(VT (ldOp (add IntRegs:$src1,
1237 (HexagonCONST32 tjumptable:$src2)))),
1238 (MI IntRegs:$src1, 0, tjumptable:$src2)>;
1239}
1240
1241let AddedComplexity = 60 in {
1242defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
1243defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
1244defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
1245
1246defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
1247defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
1248defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
1249
1250defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
1251defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
1252}
1253
1254// 'def pats' for load instructions with base + register offset and non-zero
1255// immediate value. Immediate value is used to left-shift the second
1256// register operand.
1257class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
1258 : Pat<(VT (Load (add (i32 IntRegs:$Rs),
1259 (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2))))),
1260 (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
1261
1262let AddedComplexity = 40 in {
1263 def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
1264 def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
1265 def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
1266 def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
1267 def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
1268 def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
1269 def: Loadxs_pat<load, i32, L4_loadri_rr>;
1270 def: Loadxs_pat<load, i64, L4_loadrd_rr>;
1271}
1272
1273// 'def pats' for load instruction base + register offset and
1274// zero immediate value.
1275class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
1276 : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
1277 (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
1278
1279let AddedComplexity = 20 in {
1280 def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
1281 def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
1282 def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
1283 def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
1284 def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
1285 def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
1286 def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
1287 def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
1288}
1289
1290// zext i1->i64
1291def: Pat<(i64 (zext (i1 PredRegs:$src1))),
1292 (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
1293
1294// zext i32->i64
1295def: Pat<(i64 (zext (i32 IntRegs:$src1))),
1296 (Zext64 IntRegs:$src1)>;
1297
1298let AddedComplexity = 40 in
1299multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
1300 PatFrag stOp> {
1301 def : Pat<(stOp (VT RC:$src4),
1302 (add (shl (i32 IntRegs:$src1), u2_0ImmPred:$src2),
1303 u32_0ImmPred:$src3)),
1304 (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>;
1305
1306 def : Pat<(stOp (VT RC:$src4),
1307 (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1308 (HexagonCONST32 tglobaladdr:$src3))),
1309 (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
1310
1311 def : Pat<(stOp (VT RC:$src4),
1312 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
1313 (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
1314}
1315
1316defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
1317defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
1318defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
1319defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
1320
1321class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
1322 : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
1323 (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2)))),
1324 (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
1325
1326let AddedComplexity = 40 in {
1327 def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
1328 def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
1329 def: Storexs_pat<store, I32, S4_storeri_rr>;
1330 def: Storexs_pat<store, I64, S4_storerd_rr>;
1331}
1332
1333def s30_2ProperPred : PatLeaf<(i32 imm), [{
1334 int64_t v = (int64_t)N->getSExtValue();
1335 return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
1336}]>;
1337def RoundTo8 : SDNodeXForm<imm, [{
1338 int32_t Imm = N->getSExtValue();
1339 return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
1340}]>;
1341
1342let AddedComplexity = 40 in
1343def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
1344 (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
1345
1346class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
1347 : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
1348 (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
1349
1350let AddedComplexity = 20 in {
1351 def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>;
1352 def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
1353 def: Store_rr_pat<store, I32, S4_storeri_rr>;
1354 def: Store_rr_pat<store, I64, S4_storerd_rr>;
1355}
1356
1357
1358def IMM_BYTE : SDNodeXForm<imm, [{
1359 // -1 etc is represented as 255 etc
1360 // assigning to a byte restores our desired signed value.
1361 int8_t imm = N->getSExtValue();
1362 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1363}]>;
1364
1365def IMM_HALF : SDNodeXForm<imm, [{
1366 // -1 etc is represented as 65535 etc
1367 // assigning to a short restores our desired signed value.
1368 int16_t imm = N->getSExtValue();
1369 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1370}]>;
1371
1372def IMM_WORD : SDNodeXForm<imm, [{
1373 // -1 etc can be represented as 4294967295 etc
1374 // Currently, it's not doing this. But some optimization
1375 // might convert -1 to a large +ve number.
1376 // assigning to a word restores our desired signed value.
1377 int32_t imm = N->getSExtValue();
1378 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1379}]>;
1380
1381def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
1382def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
1383def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
1384
1385// Emit store-immediate, but only when the stored value will not be constant-
1386// extended. The reason for that is that there is no pass that can optimize
1387// constant extenders in store-immediate instructions. In some cases we can
1388// end up will a number of such stores, all of which store the same extended
1389// value (e.g. after unrolling a loop that initializes floating point array).
1390
1391// Predicates to determine if the 16-bit immediate is expressible as a sign-
1392// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
1393// beyond 0..15, so we don't care what is in there.
1394
1395def i16in8ImmPred: PatLeaf<(i32 imm), [{
1396 int64_t v = (int16_t)N->getSExtValue();
1397 return v == (int64_t)(int8_t)v;
1398}]>;
1399
1400// Predicates to determine if the 32-bit immediate is expressible as a sign-
1401// extended 8-bit immediate.
1402def i32in8ImmPred: PatLeaf<(i32 imm), [{
1403 int64_t v = (int32_t)N->getSExtValue();
1404 return v == (int64_t)(int8_t)v;
1405}]>;
1406
1407
1408let AddedComplexity = 40 in {
1409 // Even though the offset is not extendable in the store-immediate, we
1410 // can still generate the fi# in the base address. If the final offset
1411 // is not valid for the instruction, we will replace it with a scratch
1412 // register.
1413// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
1414// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
1415// S4_storeirh_io>;
1416// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
1417
1418// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
1419// S4_storeirb_io>;
1420// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
1421// ToImmHalf, S4_storeirh_io>;
1422// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
1423// S4_storeiri_io>;
1424
1425 defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
1426 S4_storeirb_io>;
1427 defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
1428 S4_storeirh_io>;
1429 defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
1430 S4_storeiri_io>;
1431}
1432
1433def: Storexm_simple_pat<truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
1434def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>;
1435def: Storexm_simple_pat<store, s32_0ImmPred, ToImmWord, S4_storeiri_io>;
1436
1437// op(Ps, op(Pt, Pu))
1438class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
1439 : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
1440 (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
1441
1442// op(Ps, op(Pt, ~Pu))
1443class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
1444 : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
1445 (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
1446
1447def: LogLog_pat<and, and, C4_and_and>;
1448def: LogLog_pat<and, or, C4_and_or>;
1449def: LogLog_pat<or, and, C4_or_and>;
1450def: LogLog_pat<or, or, C4_or_or>;
1451
1452def: LogLogNot_pat<and, and, C4_and_andn>;
1453def: LogLogNot_pat<and, or, C4_and_orn>;
1454def: LogLogNot_pat<or, and, C4_or_andn>;
1455def: LogLogNot_pat<or, or, C4_or_orn>;
1456
1457//===----------------------------------------------------------------------===//
1458// PIC: Support for PIC compilations. The patterns and SD nodes defined
1459// below are needed to support code generation for PIC
1460//===----------------------------------------------------------------------===//
1461
1462def SDT_HexagonAtGot
1463 : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
1464def SDT_HexagonAtPcrel
1465 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
1466
1467// AT_GOT address-of-GOT, address-of-global, offset-in-global
1468def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
1469// AT_PCREL address-of-global
1470def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
1471
1472def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
1473 (L2_loadri_io I32:$got, imm:$addr)>;
1474def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
1475 (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
1476def: Pat<(HexagonAtPcrel I32:$addr),
1477 (C4_addipc imm:$addr)>;
1478
1479def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
1480 (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
1481def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
1482 (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
1483
1484def: Pat<(add (i32 IntRegs:$Rs), (add (i32 IntRegs:$Ru), s32_0ImmPred:$s6)),
1485 (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
1486
1487// Rd=add(Rs,sub(#s6,Ru))
1488def: Pat<(add (i32 IntRegs:$src1), (sub s32_0ImmPred:$src2,
1489 (i32 IntRegs:$src3))),
1490 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1491
1492// Rd=sub(add(Rs,#s6),Ru)
1493def: Pat<(sub (add (i32 IntRegs:$src1), s32_0ImmPred:$src2),
1494 (i32 IntRegs:$src3)),
1495 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1496
1497// Rd=add(sub(Rs,Ru),#s6)
1498def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
1499 (s32_0ImmPred:$src2)),
1500 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1501
1502def: Pat<(xor (i64 DoubleRegs:$dst2),
1503 (xor (i64 DoubleRegs:$Rss), (i64 DoubleRegs:$Rtt))),
1504 (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>;
1505def: Pat<(or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)),
1506 (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>;
1507
1508def: Pat<(or (i32 IntRegs:$src1), (and (i32 IntRegs:$Rs), s32_0ImmPred:$s10)),
1509 (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
1510
1511def: Pat<(or (i32 IntRegs:$src1), (or (i32 IntRegs:$Rs), s32_0ImmPred:$s10)),
1512 (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
1513
1514
1515
1516// Count trailing zeros: 64-bit.
1517def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
1518
1519// Count trailing ones: 64-bit.
1520def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
1521
1522// Define leading/trailing patterns that require zero-extensions to 64 bits.
1523def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
1524def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
1525def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
1526def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
1527
1528
1529let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
1530 def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
1531 (S4_ntstbit_i (i32 IntRegs:$Rs), u5_0ImmPred:$u5)>;
1532 def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
1533 (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
1534}
1535
1536// Add extra complexity to prefer these instructions over bitsset/bitsclr.
1537// The reason is that tstbit/ntstbit can be folded into a compound instruction:
1538// if ([!]tstbit(...)) jump ...
1539let AddedComplexity = 100 in
1540def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
1541 (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
1542
1543let AddedComplexity = 100 in
1544def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
1545 (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
1546
1547// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
1548// represented as a compare against "value & 0xFF", which is an exact match
1549// for cmpb (same for cmph). The patterns below do not contain any additional
1550// complexity that would make them preferable, and if they were actually used
1551// instead of cmpb/cmph, they would result in a compare against register that
1552// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
1553def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
1554 (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>;
1555def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
1556 (C4_nbitsclr I32:$Rs, I32:$Rt)>;
1557def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
1558 (C4_nbitsset I32:$Rs, I32:$Rt)>;
1559
1560
1561def: Pat<(add (mul (i32 IntRegs:$Rs), u6_0ImmPred:$U6), u32_0ImmPred:$u6),
1562 (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
1563def: Pat<(add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32_0ImmPred:$u6),
1564 (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
1565
1566def: Pat<(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), u6_2ImmPred:$src2)),
1567 (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
1568def: Pat<(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), u32_0ImmPred:$src2)),
1569 (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>;
1570
1571def: Pat<(add (i32 IntRegs:$Ru), (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))),
1572 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>;
1573
1574def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
1575
1576class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
1577 : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8),
1578 (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
1579
1580let AddedComplexity = 200 in {
1581 def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>;
1582 def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>;
1583 def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>;
1584 def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>;
1585}
1586
1587let AddedComplexity = 30 in {
1588 def : T_Shift_CommOp_pat <S4_ori_asl_ri, or, shl>;
1589 def : T_Shift_CommOp_pat <S4_ori_lsr_ri, or, srl>;
1590}
1591
1592class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
1593 : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)),
1594 (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
1595
1596def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>;
1597def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>;
1598
1599let AddedComplexity = 200 in {
1600 def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
1601 (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1602 def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
1603 (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1604 def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
1605 (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1606 def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
1607 (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1608}
1609
1610def: Pat<(shl s6_0ImmPred:$s6, (i32 IntRegs:$Rt)),
1611 (S4_lsli imm:$s6, IntRegs:$Rt)>;
1612
1613
1614//===----------------------------------------------------------------------===//
1615// MEMOP
1616//===----------------------------------------------------------------------===//
1617
1618def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
1619 int8_t v = (int8_t)N->getSExtValue();
1620 return v > -32 && v <= -1;
1621}]>;
1622
1623def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
1624 int16_t v = (int16_t)N->getSExtValue();
1625 return v > -32 && v <= -1;
1626}]>;
1627
1628def Clr5Imm8Pred : PatLeaf<(i32 imm), [{
1629 uint32_t v = (uint8_t)~N->getZExtValue();
1630 return ImmIsSingleBit(v);
1631}]>;
1632
1633def Clr5Imm16Pred : PatLeaf<(i32 imm), [{
1634 uint32_t v = (uint16_t)~N->getZExtValue();
1635 return ImmIsSingleBit(v);
1636}]>;
1637
1638def Set5Imm8 : SDNodeXForm<imm, [{
1639 uint32_t imm = (uint8_t)N->getZExtValue();
1640 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1641}]>;
1642
1643def Set5Imm16 : SDNodeXForm<imm, [{
1644 uint32_t imm = (uint16_t)N->getZExtValue();
1645 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1646}]>;
1647
1648def Set5Imm32 : SDNodeXForm<imm, [{
1649 uint32_t imm = (uint32_t)N->getZExtValue();
1650 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1651}]>;
1652
1653def Clr5Imm8 : SDNodeXForm<imm, [{
1654 uint32_t imm = (uint8_t)~N->getZExtValue();
1655 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1656}]>;
1657
1658def Clr5Imm16 : SDNodeXForm<imm, [{
1659 uint32_t imm = (uint16_t)~N->getZExtValue();
1660 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1661}]>;
1662
1663def Clr5Imm32 : SDNodeXForm<imm, [{
1664 int32_t imm = (int32_t)~N->getZExtValue();
1665 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1666}]>;
1667
1668def NegImm8 : SDNodeXForm<imm, [{
1669 int8_t V = N->getSExtValue();
1670 return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
1671}]>;
1672
1673def NegImm16 : SDNodeXForm<imm, [{
1674 int16_t V = N->getSExtValue();
1675 return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
1676}]>;
1677
1678def NegImm32 : SDNodeXForm<imm, [{
1679 return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
1680}]>;
1681
1682def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
1683
1684multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
1685 InstHexagon MI> {
1686 // Addr: i32
1687 def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
1688 (MI I32:$Rs, 0, I32:$A)>;
1689 // Addr: fi
1690 def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
1691 (MI AddrFI:$Rs, 0, I32:$A)>;
1692}
1693
1694multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1695 SDNode Oper, InstHexagon MI> {
1696 // Addr: i32
1697 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
1698 (add I32:$Rs, ImmPred:$Off)),
1699 (MI I32:$Rs, imm:$Off, I32:$A)>;
1700 def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A),
1701 (orisadd I32:$Rs, ImmPred:$Off)),
1702 (MI I32:$Rs, imm:$Off, I32:$A)>;
1703 // Addr: fi
1704 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
1705 (add AddrFI:$Rs, ImmPred:$Off)),
1706 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
1707 def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
1708 (orisadd AddrFI:$Rs, ImmPred:$Off)),
1709 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
1710}
1711
1712multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1713 SDNode Oper, InstHexagon MI> {
1714 defm: Memopxr_simple_pat <Load, Store, Oper, MI>;
1715 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
1716}
1717
1718let AddedComplexity = 180 in {
1719 // add reg
1720 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
1721 /*anyext*/ L4_add_memopb_io>;
1722 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
1723 /*sext*/ L4_add_memopb_io>;
1724 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
1725 /*zext*/ L4_add_memopb_io>;
1726 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
1727 /*anyext*/ L4_add_memoph_io>;
1728 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
1729 /*sext*/ L4_add_memoph_io>;
1730 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
1731 /*zext*/ L4_add_memoph_io>;
1732 defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
1733
1734 // sub reg
1735 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
1736 /*anyext*/ L4_sub_memopb_io>;
1737 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
1738 /*sext*/ L4_sub_memopb_io>;
1739 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
1740 /*zext*/ L4_sub_memopb_io>;
1741 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
1742 /*anyext*/ L4_sub_memoph_io>;
1743 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
1744 /*sext*/ L4_sub_memoph_io>;
1745 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
1746 /*zext*/ L4_sub_memoph_io>;
1747 defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
1748
1749 // and reg
1750 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
1751 /*anyext*/ L4_and_memopb_io>;
1752 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
1753 /*sext*/ L4_and_memopb_io>;
1754 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
1755 /*zext*/ L4_and_memopb_io>;
1756 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
1757 /*anyext*/ L4_and_memoph_io>;
1758 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
1759 /*sext*/ L4_and_memoph_io>;
1760 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
1761 /*zext*/ L4_and_memoph_io>;
1762 defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
1763
1764 // or reg
1765 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
1766 /*anyext*/ L4_or_memopb_io>;
1767 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
1768 /*sext*/ L4_or_memopb_io>;
1769 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
1770 /*zext*/ L4_or_memopb_io>;
1771 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
1772 /*anyext*/ L4_or_memoph_io>;
1773 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
1774 /*sext*/ L4_or_memoph_io>;
1775 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
1776 /*zext*/ L4_or_memoph_io>;
1777 defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
1778}
1779
1780
1781multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
1782 PatFrag Arg, SDNodeXForm ArgMod,
1783 InstHexagon MI> {
1784 // Addr: i32
1785 def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
1786 (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
1787 // Addr: fi
1788 def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
1789 (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
1790}
1791
1792multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1793 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
1794 InstHexagon MI> {
1795 // Addr: i32
1796 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
1797 (add I32:$Rs, ImmPred:$Off)),
1798 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1799 def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A),
1800 (orisadd I32:$Rs, ImmPred:$Off)),
1801 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1802 // Addr: fi
1803 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
1804 (add AddrFI:$Rs, ImmPred:$Off)),
1805 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1806 def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
1807 (orisadd AddrFI:$Rs, ImmPred:$Off)),
1808 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1809}
1810
1811multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1812 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
1813 InstHexagon MI> {
1814 defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>;
1815 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
1816}
1817
1818
1819let AddedComplexity = 200 in {
1820 // add imm
1821 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1822 /*anyext*/ IdImm, L4_iadd_memopb_io>;
1823 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1824 /*sext*/ IdImm, L4_iadd_memopb_io>;
1825 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1826 /*zext*/ IdImm, L4_iadd_memopb_io>;
1827 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1828 /*anyext*/ IdImm, L4_iadd_memoph_io>;
1829 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1830 /*sext*/ IdImm, L4_iadd_memoph_io>;
1831 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1832 /*zext*/ IdImm, L4_iadd_memoph_io>;
1833 defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
1834 L4_iadd_memopw_io>;
1835 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1836 /*anyext*/ NegImm8, L4_iadd_memopb_io>;
1837 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1838 /*sext*/ NegImm8, L4_iadd_memopb_io>;
1839 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1840 /*zext*/ NegImm8, L4_iadd_memopb_io>;
1841 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1842 /*anyext*/ NegImm16, L4_iadd_memoph_io>;
1843 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1844 /*sext*/ NegImm16, L4_iadd_memoph_io>;
1845 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1846 /*zext*/ NegImm16, L4_iadd_memoph_io>;
1847 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
1848 L4_iadd_memopw_io>;
1849
1850 // sub imm
1851 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1852 /*anyext*/ IdImm, L4_isub_memopb_io>;
1853 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1854 /*sext*/ IdImm, L4_isub_memopb_io>;
1855 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1856 /*zext*/ IdImm, L4_isub_memopb_io>;
1857 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1858 /*anyext*/ IdImm, L4_isub_memoph_io>;
1859 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1860 /*sext*/ IdImm, L4_isub_memoph_io>;
1861 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1862 /*zext*/ IdImm, L4_isub_memoph_io>;
1863 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
1864 L4_isub_memopw_io>;
1865 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1866 /*anyext*/ NegImm8, L4_isub_memopb_io>;
1867 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1868 /*sext*/ NegImm8, L4_isub_memopb_io>;
1869 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1870 /*zext*/ NegImm8, L4_isub_memopb_io>;
1871 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1872 /*anyext*/ NegImm16, L4_isub_memoph_io>;
1873 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1874 /*sext*/ NegImm16, L4_isub_memoph_io>;
1875 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1876 /*zext*/ NegImm16, L4_isub_memoph_io>;
1877 defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
1878 L4_isub_memopw_io>;
1879
1880 // clrbit imm
1881 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
1882 /*anyext*/ Clr5Imm8, L4_iand_memopb_io>;
1883 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
1884 /*sext*/ Clr5Imm8, L4_iand_memopb_io>;
1885 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
1886 /*zext*/ Clr5Imm8, L4_iand_memopb_io>;
1887 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
1888 /*anyext*/ Clr5Imm16, L4_iand_memoph_io>;
1889 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
1890 /*sext*/ Clr5Imm16, L4_iand_memoph_io>;
1891 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
1892 /*zext*/ Clr5Imm16, L4_iand_memoph_io>;
1893 defm: Memopxi_pat<load, store, u6_2ImmPred, and, Clr5ImmPred, Clr5Imm32,
1894 L4_iand_memopw_io>;
1895
1896 // setbit imm
1897 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
1898 /*anyext*/ Set5Imm8, L4_ior_memopb_io>;
1899 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
1900 /*sext*/ Set5Imm8, L4_ior_memopb_io>;
1901 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
1902 /*zext*/ Set5Imm8, L4_ior_memopb_io>;
1903 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
1904 /*anyext*/ Set5Imm16, L4_ior_memoph_io>;
1905 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
1906 /*sext*/ Set5Imm16, L4_ior_memoph_io>;
1907 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
1908 /*zext*/ Set5Imm16, L4_ior_memoph_io>;
1909 defm: Memopxi_pat<load, store, u6_2ImmPred, or, Set5ImmPred, Set5Imm32,
1910 L4_ior_memopw_io>;
1911}
1912
1913def : T_CMP_pat <C4_cmpneqi, setne, s32_0ImmPred>;
1914def : T_CMP_pat <C4_cmpltei, setle, s32_0ImmPred>;
1915def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>;
1916
1917// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
1918def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
1919 (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
1920
1921// rs != rt -> !(rs == rt).
1922def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
1923 (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>;
1924
1925// SDNode for converting immediate C to C-1.
1926def DEC_CONST_BYTE : SDNodeXForm<imm, [{
1927 // Return the byte immediate const-1 as an SDNode.
1928 int32_t imm = N->getSExtValue();
1929 return XformU7ToU7M1Imm(imm, SDLoc(N));
1930}]>;
1931
1932// For the sequence
1933// zext( setult ( and(Rs, 255), u8))
1934// Use the isdigit transformation below
1935
1936// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
1937// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
1938// The isdigit transformation relies on two 'clever' aspects:
1939// 1) The data type is unsigned which allows us to eliminate a zero test after
1940// biasing the expression by 48. We are depending on the representation of
1941// the unsigned types, and semantics.
1942// 2) The front end has converted <= 9 into < 10 on entry to LLVM
1943//
1944// For the C code:
1945// retval = ((c>='0') & (c<='9')) ? 1 : 0;
1946// The code is transformed upstream of llvm into
1947// retval = (c-48) < 10 ? 1 : 0;
1948let AddedComplexity = 139 in
1949def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
1950 u7_0StrictPosImmPred:$src2)))),
1951 (C2_muxii (A4_cmpbgtui IntRegs:$src1,
1952 (DEC_CONST_BYTE u7_0StrictPosImmPred:$src2)),
1953 0, 1)>;
1954
1955class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
1956 : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
1957
1958class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
1959 InstHexagon MI>
1960 : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
1961
1962class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
1963 : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
1964
1965class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
1966 InstHexagon MI>
1967 : Pat<(Store Value:$val, Addr:$addr),
1968 (MI Addr:$addr, (ValueMod Value:$val))>;
1969
1970let AddedComplexity = 30 in {
1971 def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
1972 def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
1973 def: Storea_pat<store, I32, addrga, PS_storeriabs>;
1974 def: Storea_pat<store, I64, addrga, PS_storerdabs>;
1975
1976 def: Stoream_pat<truncstorei8, I64, addrga, LoReg, PS_storerbabs>;
1977 def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>;
1978 def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
1979}
1980
1981def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
1982def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
1983def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
1984def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
1985
1986let AddedComplexity = 100 in {
1987 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
1988 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
1989 def: Storea_pat<store, I32, addrgp, S2_storerigp>;
1990 def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
1991
1992 // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
1993 // to "r0 = 1; memw(#foo) = r0"
1994 let AddedComplexity = 100 in
1995 def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
1996 (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
1997}
1998
1999class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
2000 : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
2001 (VT (MI tglobaladdr:$absaddr))>;
2002
2003let AddedComplexity = 30 in {
2004 def: LoadAbs_pats <load, PS_loadriabs>;
2005 def: LoadAbs_pats <zextloadi1, PS_loadrubabs>;
2006 def: LoadAbs_pats <sextloadi8, PS_loadrbabs>;
2007 def: LoadAbs_pats <extloadi8, PS_loadrubabs>;
2008 def: LoadAbs_pats <zextloadi8, PS_loadrubabs>;
2009 def: LoadAbs_pats <sextloadi16, PS_loadrhabs>;
2010 def: LoadAbs_pats <extloadi16, PS_loadruhabs>;
2011 def: LoadAbs_pats <zextloadi16, PS_loadruhabs>;
2012 def: LoadAbs_pats <load, PS_loadrdabs, i64>;
2013}
2014
2015let AddedComplexity = 30 in
2016def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
2017 (Zext64 (PS_loadrubabs tglobaladdr:$absaddr))>;
2018
2019def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
2020def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
2021def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
2022def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
2023
2024// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
2025def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>;
2026def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
2027
2028def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>;
2029def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
2030
2031// Map from load(globaladdress) -> mem[u][bhwd](#foo)
2032class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
2033 : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
2034 (VT (MI tglobaladdr:$global))>;
2035
2036let AddedComplexity = 100 in {
2037 def: LoadGP_pats <extloadi8, L2_loadrubgp>;
2038 def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
2039 def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
2040 def: LoadGP_pats <extloadi16, L2_loadruhgp>;
2041 def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
2042 def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
2043 def: LoadGP_pats <load, L2_loadrigp>;
2044 def: LoadGP_pats <load, L2_loadrdgp, i64>;
2045}
2046
2047// When the Interprocedural Global Variable optimizer realizes that a certain
2048// global variable takes only two constant values, it shrinks the global to
2049// a boolean. Catch those loads here in the following 3 patterns.
2050let AddedComplexity = 100 in {
2051 def: LoadGP_pats <extloadi1, L2_loadrubgp>;
2052 def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
2053}
2054
2055// Transfer global address into a register
2056def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
2057def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>;
2058def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
2059
2060let AddedComplexity = 30 in {
2061 def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>;
2062 def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
2063 def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>;
2064}
2065
2066let AddedComplexity = 30 in {
2067 def: Loada_pat<load, i32, u32_0ImmPred, PS_loadriabs>;
2068 def: Loada_pat<sextloadi8, i32, u32_0ImmPred, PS_loadrbabs>;
2069 def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>;
2070 def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
2071 def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
2072}
2073
2074// Indexed store word - global address.
2075// memw(Rs+#u6:2)=#S8
2076let AddedComplexity = 100 in
2077defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
2078
2079// Load from a global address that has only one use in the current basic block.
2080let AddedComplexity = 100 in {
2081 def: Loada_pat<extloadi8, i32, addrga, PS_loadrubabs>;
2082 def: Loada_pat<sextloadi8, i32, addrga, PS_loadrbabs>;
2083 def: Loada_pat<zextloadi8, i32, addrga, PS_loadrubabs>;
2084
2085 def: Loada_pat<extloadi16, i32, addrga, PS_loadruhabs>;
2086 def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>;
2087 def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>;
2088
2089 def: Loada_pat<load, i32, addrga, PS_loadriabs>;
2090 def: Loada_pat<load, i64, addrga, PS_loadrdabs>;
2091}
2092
2093// Store to a global address that has only one use in the current basic block.
2094let AddedComplexity = 100 in {
2095 def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
2096 def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
2097 def: Storea_pat<store, I32, addrga, PS_storeriabs>;
2098 def: Storea_pat<store, I64, addrga, PS_storerdabs>;
2099
2100 def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
2101}
2102
2103// i8/i16/i32 -> i64 loads
2104// We need a complexity of 120 here to override preceding handling of
2105// zextload.
2106let AddedComplexity = 120 in {
2107 def: Loadam_pat<extloadi8, i64, addrga, Zext64, PS_loadrubabs>;
2108 def: Loadam_pat<sextloadi8, i64, addrga, Sext64, PS_loadrbabs>;
2109 def: Loadam_pat<zextloadi8, i64, addrga, Zext64, PS_loadrubabs>;
2110
2111 def: Loadam_pat<extloadi16, i64, addrga, Zext64, PS_loadruhabs>;
2112 def: Loadam_pat<sextloadi16, i64, addrga, Sext64, PS_loadrhabs>;
2113 def: Loadam_pat<zextloadi16, i64, addrga, Zext64, PS_loadruhabs>;
2114
2115 def: Loadam_pat<extloadi32, i64, addrga, Zext64, PS_loadriabs>;
2116 def: Loadam_pat<sextloadi32, i64, addrga, Sext64, PS_loadriabs>;
2117 def: Loadam_pat<zextloadi32, i64, addrga, Zext64, PS_loadriabs>;
2118}
2119
2120let AddedComplexity = 100 in {
2121 def: Loada_pat<extloadi8, i32, addrgp, PS_loadrubabs>;
2122 def: Loada_pat<sextloadi8, i32, addrgp, PS_loadrbabs>;
2123 def: Loada_pat<zextloadi8, i32, addrgp, PS_loadrubabs>;
2124
2125 def: Loada_pat<extloadi16, i32, addrgp, PS_loadruhabs>;
2126 def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>;
2127 def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>;
2128
2129 def: Loada_pat<load, i32, addrgp, PS_loadriabs>;
2130 def: Loada_pat<load, i64, addrgp, PS_loadrdabs>;
2131}
2132
2133let AddedComplexity = 100 in {
2134 def: Storea_pat<truncstorei8, I32, addrgp, PS_storerbabs>;
2135 def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>;
2136 def: Storea_pat<store, I32, addrgp, PS_storeriabs>;
2137 def: Storea_pat<store, I64, addrgp, PS_storerdabs>;
2138}
2139
2140def: Loada_pat<atomic_load_8, i32, addrgp, PS_loadrubabs>;
2141def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>;
2142def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>;
2143def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>;
2144
2145def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, PS_storerbabs>;
2146def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
2147def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
2148def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
2149
2150def: Pat<(or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
2151 (i32 16)),
2152 (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
2153 (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
2154 (i32 32))),
2155 (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))),
2156 (Insert4 IntRegs:$a, IntRegs:$b, IntRegs:$c, IntRegs:$d)>;
2157
2158// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
2159// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
2160// We don't really want either one here.
2161def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
2162def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
2163 [SDNPHasChain]>;
2164
2165def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
2166 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
2167def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
2168 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
2169
2170def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
2171def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
2172
2173def ftoi : SDNodeXForm<fpimm, [{
2174 APInt I = N->getValueAPF().bitcastToAPInt();
2175 return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
2176 MVT::getIntegerVT(I.getBitWidth()));
2177}]>;
2178
2179
2180def: Pat<(sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)), (i32 1)),
2181 (S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
2182
2183def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
2184 SDTCisVT<1, i64>]>;
2185
2186def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
2187
2188def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
2189
2190let AddedComplexity = 20 in {
2191 defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
2192 defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
2193}
2194
2195let AddedComplexity = 60 in {
2196 defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
2197 defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
2198}
2199
2200let AddedComplexity = 40 in {
2201 def: Loadxs_pat<load, f32, L4_loadri_rr>;
2202 def: Loadxs_pat<load, f64, L4_loadrd_rr>;
2203}
2204
2205let AddedComplexity = 20 in {
2206 def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
2207 def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
2208}
2209
2210let AddedComplexity = 80 in {
2211 def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>;
2212 def: Loada_pat<load, f32, addrga, PS_loadriabs>;
2213 def: Loada_pat<load, f64, addrga, PS_loadrdabs>;
2214}
2215
2216let AddedComplexity = 100 in {
2217 def: LoadGP_pats <load, L2_loadrigp, f32>;
2218 def: LoadGP_pats <load, L2_loadrdgp, f64>;
2219}
2220
2221let AddedComplexity = 20 in {
2222 defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
2223 defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
2224}
2225
2226// Simple patterns should be tried with the least priority.
2227def: Storex_simple_pat<store, F32, S2_storeri_io>;
2228def: Storex_simple_pat<store, F64, S2_storerd_io>;
2229
2230let AddedComplexity = 60 in {
2231 defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
2232 defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
2233}
2234
2235let AddedComplexity = 40 in {
2236 def: Storexs_pat<store, F32, S4_storeri_rr>;
2237 def: Storexs_pat<store, F64, S4_storerd_rr>;
2238}
2239
2240let AddedComplexity = 20 in {
2241 def: Store_rr_pat<store, F32, S4_storeri_rr>;
2242 def: Store_rr_pat<store, F64, S4_storerd_rr>;
2243}
2244
2245let AddedComplexity = 80 in {
2246 def: Storea_pat<store, F32, addrga, PS_storeriabs>;
2247 def: Storea_pat<store, F64, addrga, PS_storerdabs>;
2248}
2249
2250let AddedComplexity = 100 in {
2251 def: Storea_pat<store, F32, addrgp, S2_storerigp>;
2252 def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
2253}
2254
2255defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
2256defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
2257def: Storex_simple_pat<store, F32, S2_storeri_io>;
2258def: Storex_simple_pat<store, F64, S2_storerd_io>;
2259
2260def: Pat<(fadd F32:$src1, F32:$src2),
2261 (F2_sfadd F32:$src1, F32:$src2)>;
2262
2263def: Pat<(fsub F32:$src1, F32:$src2),
2264 (F2_sfsub F32:$src1, F32:$src2)>;
2265
2266def: Pat<(fmul F32:$src1, F32:$src2),
2267 (F2_sfmpy F32:$src1, F32:$src2)>;
2268
2269let Predicates = [HasV5T] in {
2270 def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
2271 def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
2272}
2273
2274let AddedComplexity = 100, Predicates = [HasV5T] in {
2275 class SfSel12<PatFrag Cmp, InstHexagon MI>
2276 : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
2277 (MI F32:$Rs, F32:$Rt)>;
2278 class SfSel21<PatFrag Cmp, InstHexagon MI>
2279 : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs),
2280 (MI F32:$Rs, F32:$Rt)>;
2281
2282 def: SfSel12<setolt, F2_sfmin>;
2283 def: SfSel12<setole, F2_sfmin>;
2284 def: SfSel12<setogt, F2_sfmax>;
2285 def: SfSel12<setoge, F2_sfmax>;
2286 def: SfSel21<setolt, F2_sfmax>;
2287 def: SfSel21<setole, F2_sfmax>;
2288 def: SfSel21<setogt, F2_sfmin>;
2289 def: SfSel21<setoge, F2_sfmin>;
2290}
2291
2292class T_fcmp32_pat<PatFrag OpNode, InstHexagon MI>
2293 : Pat<(i1 (OpNode F32:$src1, F32:$src2)),
2294 (MI F32:$src1, F32:$src2)>;
2295class T_fcmp64_pat<PatFrag OpNode, InstHexagon MI>
2296 : Pat<(i1 (OpNode F64:$src1, F64:$src2)),
2297 (MI F64:$src1, F64:$src2)>;
2298
2299def: T_fcmp32_pat<setoge, F2_sfcmpge>;
2300def: T_fcmp32_pat<setuo, F2_sfcmpuo>;
2301def: T_fcmp32_pat<setoeq, F2_sfcmpeq>;
2302def: T_fcmp32_pat<setogt, F2_sfcmpgt>;
2303
2304def: T_fcmp64_pat<setoge, F2_dfcmpge>;
2305def: T_fcmp64_pat<setuo, F2_dfcmpuo>;
2306def: T_fcmp64_pat<setoeq, F2_dfcmpeq>;
2307def: T_fcmp64_pat<setogt, F2_dfcmpgt>;
2308
2309let Predicates = [HasV5T] in
2310multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
2311 // IntRegs
2312 def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
2313 (IntMI F32:$src1, F32:$src2)>;
2314 // DoubleRegs
2315 def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
2316 (DoubleMI F64:$src1, F64:$src2)>;
2317}
2318
2319defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
2320defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
2321defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
2322
2323//===----------------------------------------------------------------------===//
2324// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
2325//===----------------------------------------------------------------------===//
2326let Predicates = [HasV5T] in
2327multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
2328 // IntRegs
2329 def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
2330 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2331 (IntMI F32:$src1, F32:$src2))>;
2332
2333 // DoubleRegs
2334 def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
2335 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2336 (DoubleMI F64:$src1, F64:$src2))>;
2337}
2338
2339defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
2340defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
2341defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
2342
2343//===----------------------------------------------------------------------===//
2344// Multiclass to define 'Def Pats' for the following dags:
2345// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
2346// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
2347// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
2348// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
2349//===----------------------------------------------------------------------===//
2350let Predicates = [HasV5T] in
2351multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
2352 InstHexagon DoubleMI> {
2353 // IntRegs
2354 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2355 (C2_not (IntMI F32:$src1, F32:$src2))>;
2356 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2357 (IntMI F32:$src1, F32:$src2)>;
2358 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2359 (IntMI F32:$src1, F32:$src2)>;
2360 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2361 (C2_not (IntMI F32:$src1, F32:$src2))>;
2362
2363 // DoubleRegs
2364 def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2365 (C2_not (DoubleMI F64:$src1, F64:$src2))>;
2366 def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2367 (DoubleMI F64:$src1, F64:$src2)>;
2368 def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2369 (DoubleMI F64:$src1, F64:$src2)>;
2370 def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2371 (C2_not (DoubleMI F64:$src1, F64:$src2))>;
2372}
2373
2374defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
2375defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
2376defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
2377
2378//===----------------------------------------------------------------------===//
2379// Multiclass to define 'Def Pats' for the following dags:
2380// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
2381// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
2382// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
2383// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
2384//===----------------------------------------------------------------------===//
2385let Predicates = [HasV5T] in
2386multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
2387 InstHexagon DoubleMI> {
2388 // IntRegs
2389 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2390 (C2_not (IntMI F32:$src2, F32:$src1))>;
2391 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2392 (IntMI F32:$src2, F32:$src1)>;
2393 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2394 (IntMI F32:$src2, F32:$src1)>;
2395 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2396 (C2_not (IntMI F32:$src2, F32:$src1))>;
2397
2398 // DoubleRegs
2399 def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2400 (C2_not (DoubleMI F64:$src2, F64:$src1))>;
2401 def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2402 (DoubleMI F64:$src2, F64:$src1)>;
2403 def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2404 (DoubleMI F64:$src2, F64:$src1)>;
2405 def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2406 (C2_not (DoubleMI F64:$src2, F64:$src1))>;
2407}
2408
2409defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
2410defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
2411
2412
2413// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
2414let Predicates = [HasV5T] in {
2415 def: Pat<(i1 (seto F32:$src1, F32:$src2)),
2416 (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
2417 def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)),
2418 (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2419 def: Pat<(i1 (seto F64:$src1, F64:$src2)),
2420 (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
2421 def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)),
2422 (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>;
2423}
2424
2425// Ordered lt.
2426let Predicates = [HasV5T] in {
2427 def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
2428 (F2_sfcmpgt F32:$src2, F32:$src1)>;
2429 def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)),
2430 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2431 def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
2432 (F2_dfcmpgt F64:$src2, F64:$src1)>;
2433 def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)),
2434 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
2435}
2436
2437// Unordered lt.
2438let Predicates = [HasV5T] in {
2439 def: Pat<(i1 (setult F32:$src1, F32:$src2)),
2440 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2441 (F2_sfcmpgt F32:$src2, F32:$src1))>;
2442 def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)),
2443 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2444 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2445 def: Pat<(i1 (setult F64:$src1, F64:$src2)),
2446 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2447 (F2_dfcmpgt F64:$src2, F64:$src1))>;
2448 def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)),
2449 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2450 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>;
2451}
2452
2453// Ordered le.
2454let Predicates = [HasV5T] in {
2455 // rs <= rt -> rt >= rs.
2456 def: Pat<(i1 (setole F32:$src1, F32:$src2)),
2457 (F2_sfcmpge F32:$src2, F32:$src1)>;
2458 def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)),
2459 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2460
2461 // Rss <= Rtt -> Rtt >= Rss.
2462 def: Pat<(i1 (setole F64:$src1, F64:$src2)),
2463 (F2_dfcmpge F64:$src2, F64:$src1)>;
2464 def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)),
2465 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
2466}
2467
2468// Unordered le.
2469let Predicates = [HasV5T] in {
2470// rs <= rt -> rt >= rs.
2471 def: Pat<(i1 (setule F32:$src1, F32:$src2)),
2472 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2473 (F2_sfcmpge F32:$src2, F32:$src1))>;
2474 def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)),
2475 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2476 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2477 def: Pat<(i1 (setule F64:$src1, F64:$src2)),
2478 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2479 (F2_dfcmpge F64:$src2, F64:$src1))>;
2480 def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)),
2481 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2482 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>;
2483}
2484
2485// Ordered ne.
2486let Predicates = [HasV5T] in {
2487 def: Pat<(i1 (setone F32:$src1, F32:$src2)),
2488 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
2489 def: Pat<(i1 (setone F64:$src1, F64:$src2)),
2490 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
2491 def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)),
2492 (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
2493 def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)),
2494 (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
2495}
2496
2497// Unordered ne.
2498let Predicates = [HasV5T] in {
2499 def: Pat<(i1 (setune F32:$src1, F32:$src2)),
2500 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2501 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
2502 def: Pat<(i1 (setune F64:$src1, F64:$src2)),
2503 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2504 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
2505 def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)),
2506 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2507 (C2_not (F2_sfcmpeq F32:$src1,
2508 (f32 (A2_tfrsi (ftoi $src2))))))>;
2509 def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)),
2510 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2511 (C2_not (F2_dfcmpeq F64:$src1,
2512 (CONST64 (ftoi $src2)))))>;
2513}
2514
2515// Besides set[o|u][comparions], we also need set[comparisons].
2516let Predicates = [HasV5T] in {
2517 // lt.
2518 def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
2519 (F2_sfcmpgt F32:$src2, F32:$src1)>;
2520 def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)),
2521 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2522 def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
2523 (F2_dfcmpgt F64:$src2, F64:$src1)>;
2524 def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)),
2525 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
2526
2527 // le.
2528 // rs <= rt -> rt >= rs.
2529 def: Pat<(i1 (setle F32:$src1, F32:$src2)),
2530 (F2_sfcmpge F32:$src2, F32:$src1)>;
2531 def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)),
2532 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2533
2534 // Rss <= Rtt -> Rtt >= Rss.
2535 def: Pat<(i1 (setle F64:$src1, F64:$src2)),
2536 (F2_dfcmpge F64:$src2, F64:$src1)>;
2537 def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)),
2538 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
2539
2540 // ne.
2541 def: Pat<(i1 (setne F32:$src1, F32:$src2)),
2542 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
2543 def: Pat<(i1 (setne F64:$src1, F64:$src2)),
2544 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
2545 def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)),
2546 (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
2547 def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)),
2548 (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
2549}
2550
2551
2552def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>;
2553def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>;
2554
2555def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>;
2556def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>;
2557def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>;
2558def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>;
2559
2560def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>;
2561def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>;
2562def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>;
2563def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>;
2564
2565def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>;
2566def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>;
2567def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>;
2568def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>;
2569
2570def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>;
2571def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>;
2572def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>;
2573def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>;
2574
2575// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
2576let Predicates = [HasV5T] in {
2577 def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
2578 def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
2579 def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
2580 def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
2581}
2582
2583def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
2584 (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
2585
2586def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
2587 (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
2588
2589def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
2590 (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
2591
2592def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm),
2593 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>,
2594 Requires<[HasV5T]>;
2595
2596def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt),
2597 (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>,
2598 Requires<[HasV5T]>;
2599
2600def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
2601 (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
2602 Requires<[HasV5T]>;
2603
2604def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
2605 (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
2606 Requires<[HasV5T]>;
2607
2608def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
2609 (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
2610 Requires<[HasV5T]>;
2611
2612def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
2613 (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
2614 Requires<[HasV5T]>;
2615
2616// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
2617// => r0 = mux(p0, #i, r1)
2618def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3),
2619 (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>,
2620 Requires<[HasV5T]>;
2621
2622// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
2623// => r0 = mux(p0, r1, #i)
2624def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3),
2625 (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>,
2626 Requires<[HasV5T]>;
2627
2628def: Pat<(i32 (fp_to_sint F64:$src1)),
2629 (LoReg (F2_conv_df2d_chop F64:$src1))>,
2630 Requires<[HasV5T]>;
2631
2632def : Pat <(fabs (f32 IntRegs:$src1)),
2633 (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
2634 Requires<[HasV5T]>;
2635
2636def : Pat <(fneg (f32 IntRegs:$src1)),
2637 (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
2638 Requires<[HasV5T]>;
2639
2640
2641def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
2642 return isAlignedMemNode(dyn_cast<MemSDNode>(N));
2643}]>;
2644
2645def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
2646 return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
2647}]>;
2648
2649def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
2650 return isAlignedMemNode(dyn_cast<MemSDNode>(N));
2651}]>;
2652
2653def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
2654 return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
2655}]>;
2656
2657
2658multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
2659 // Aligned stores
2660 def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
2661 (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
2662 Requires<[UseHVXSgl]>;
2663 def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
2664 (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
2665 Requires<[UseHVXSgl]>;
2666
2667 // 128B Aligned stores
2668 def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
2669 (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
2670 Requires<[UseHVXDbl]>;
2671 def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
2672 (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
2673 Requires<[UseHVXDbl]>;
2674
2675 // Fold Add R+OFF into vector store.
2676 let AddedComplexity = 10 in {
2677 def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
2678 (add IntRegs:$src2, s4_6ImmPred:$offset)),
2679 (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
2680 (VTSgl VectorRegs:$src1))>,
2681 Requires<[UseHVXSgl]>;
2682 def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
2683 (add IntRegs:$src2, s4_6ImmPred:$offset)),
2684 (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
2685 (VTSgl VectorRegs:$src1))>,
2686 Requires<[UseHVXSgl]>;
2687
2688 // Fold Add R+OFF into vector store 128B.
2689 def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
2690 (add IntRegs:$src2, s4_7ImmPred:$offset)),
2691 (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
2692 (VTDbl VectorRegs128B:$src1))>,
2693 Requires<[UseHVXDbl]>;
2694 def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
2695 (add IntRegs:$src2, s4_7ImmPred:$offset)),
2696 (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
2697 (VTDbl VectorRegs128B:$src1))>,
2698 Requires<[UseHVXDbl]>;
2699 }
2700}
2701
2702defm : vS32b_ai_pats <v64i8, v128i8>;
2703defm : vS32b_ai_pats <v32i16, v64i16>;
2704defm : vS32b_ai_pats <v16i32, v32i32>;
2705defm : vS32b_ai_pats <v8i64, v16i64>;
2706
2707
2708multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
2709 // Aligned loads
2710 def : Pat < (VTSgl (alignedload IntRegs:$addr)),
2711 (V6_vL32b_ai IntRegs:$addr, 0) >,
2712 Requires<[UseHVXSgl]>;
2713 def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
2714 (V6_vL32Ub_ai IntRegs:$addr, 0) >,
2715 Requires<[UseHVXSgl]>;
2716
2717 // 128B Load
2718 def : Pat < (VTDbl (alignedload IntRegs:$addr)),
2719 (V6_vL32b_ai_128B IntRegs:$addr, 0) >,
2720 Requires<[UseHVXDbl]>;
2721 def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
2722 (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
2723 Requires<[UseHVXDbl]>;
2724
2725 // Fold Add R+OFF into vector load.
2726 let AddedComplexity = 10 in {
2727 def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
2728 (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
2729 Requires<[UseHVXDbl]>;
2730 def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
2731 (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
2732 Requires<[UseHVXDbl]>;
2733
2734 def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
2735 (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
2736 Requires<[UseHVXSgl]>;
2737 def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
2738 (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
2739 Requires<[UseHVXSgl]>;
2740 }
2741}
2742
2743defm : vL32b_ai_pats <v64i8, v128i8>;
2744defm : vL32b_ai_pats <v32i16, v64i16>;
2745defm : vL32b_ai_pats <v16i32, v32i32>;
2746defm : vL32b_ai_pats <v8i64, v16i64>;
2747
2748multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
2749 def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
2750 (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
2751 Requires<[UseHVXSgl]>;
2752 def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
2753 (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
2754 Requires<[UseHVXSgl]>;
2755
2756 def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
2757 (PS_vstorerw_ai_128B IntRegs:$addr, 0,
2758 (VTDbl VecDblRegs128B:$src1))>,
2759 Requires<[UseHVXDbl]>;
2760 def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
2761 (PS_vstorerwu_ai_128B IntRegs:$addr, 0,
2762 (VTDbl VecDblRegs128B:$src1))>,
2763 Requires<[UseHVXDbl]>;
2764}
2765
2766defm : STrivv_pats <v128i8, v256i8>;
2767defm : STrivv_pats <v64i16, v128i16>;
2768defm : STrivv_pats <v32i32, v64i32>;
2769defm : STrivv_pats <v16i64, v32i64>;
2770
2771multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
2772 def : Pat<(VTSgl (alignedload I32:$addr)),
2773 (PS_vloadrw_ai I32:$addr, 0)>,
2774 Requires<[UseHVXSgl]>;
2775 def : Pat<(VTSgl (unalignedload I32:$addr)),
2776 (PS_vloadrwu_ai I32:$addr, 0)>,
2777 Requires<[UseHVXSgl]>;
2778
2779 def : Pat<(VTDbl (alignedload I32:$addr)),
2780 (PS_vloadrw_ai_128B I32:$addr, 0)>,
2781 Requires<[UseHVXDbl]>;
2782 def : Pat<(VTDbl (unalignedload I32:$addr)),
2783 (PS_vloadrwu_ai_128B I32:$addr, 0)>,
2784 Requires<[UseHVXDbl]>;
2785}
2786
2787defm : LDrivv_pats <v128i8, v256i8>;
2788defm : LDrivv_pats <v64i16, v128i16>;
2789defm : LDrivv_pats <v32i32, v64i32>;
2790defm : LDrivv_pats <v16i64, v32i64>;
2791
2792let Predicates = [HasV60T,UseHVXSgl] in {
2793 def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt),
2794 (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>;
2795 def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt),
2796 (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>;
2797}
2798let Predicates = [HasV60T,UseHVXDbl] in {
2799 def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt),
2800 (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>;
2801 def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt),
2802 (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>;
2803}
2804
2805
2806def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
2807 SDTCisSubVecOfVec<1, 0>]>;
2808
2809def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
2810
2811def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
2812 (v16i32 VectorRegs:$Vt))),
2813 (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
2814 Requires<[UseHVXSgl]>;
2815def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
2816 (v32i32 VecDblRegs:$Vt))),
2817 (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2818 Requires<[UseHVXDbl]>;
2819
2820def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
2821 SDTCisInt<3>]>;
2822
2823def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
2824
2825// 0 as the last argument denotes vpacke. 1 denotes vpacko
2826def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
2827 (v64i8 VectorRegs:$Vt), (i32 0))),
2828 (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
2829 Requires<[UseHVXSgl]>;
2830def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
2831 (v64i8 VectorRegs:$Vt), (i32 1))),
2832 (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
2833 Requires<[UseHVXSgl]>;
2834def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
2835 (v32i16 VectorRegs:$Vt), (i32 0))),
2836 (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
2837 Requires<[UseHVXSgl]>;
2838def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
2839 (v32i16 VectorRegs:$Vt), (i32 1))),
2840 (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
2841 Requires<[UseHVXSgl]>;
2842
2843def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
2844 (v128i8 VecDblRegs:$Vt), (i32 0))),
2845 (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2846 Requires<[UseHVXDbl]>;
2847def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
2848 (v128i8 VecDblRegs:$Vt), (i32 1))),
2849 (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2850 Requires<[UseHVXDbl]>;
2851def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
2852 (v64i16 VecDblRegs:$Vt), (i32 0))),
2853 (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2854 Requires<[UseHVXDbl]>;
2855def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
2856 (v64i16 VecDblRegs:$Vt), (i32 1))),
2857 (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2858 Requires<[UseHVXDbl]>;
2859
2860def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
2861def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
2862def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
2863def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
2864def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
2865def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
2866def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
2867def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
2868
2869
2870multiclass bitconvert_32<ValueType a, ValueType b> {
2871 def : Pat <(b (bitconvert (a IntRegs:$src))),
2872 (b IntRegs:$src)>;
2873 def : Pat <(a (bitconvert (b IntRegs:$src))),
2874 (a IntRegs:$src)>;
2875}
2876
2877multiclass bitconvert_64<ValueType a, ValueType b> {
2878 def : Pat <(b (bitconvert (a DoubleRegs:$src))),
2879 (b DoubleRegs:$src)>;
2880 def : Pat <(a (bitconvert (b DoubleRegs:$src))),
2881 (a DoubleRegs:$src)>;
2882}
2883
2884// Bit convert vector types to integers.
2885defm : bitconvert_32<v4i8, i32>;
2886defm : bitconvert_32<v2i16, i32>;
2887defm : bitconvert_64<v8i8, i64>;
2888defm : bitconvert_64<v4i16, i64>;
2889defm : bitconvert_64<v2i32, i64>;
2890
2891def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2892 (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>;
2893def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2894 (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>;
2895def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2896 (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>;
2897
2898def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2899 (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>;
2900def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2901 (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>;
2902def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2903 (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>;
2904
2905def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
2906 (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
2907
2908def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
2909 (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
2910
2911def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
2912def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
2913
2914// Replicate the low 8-bits from 32-bits input register into each of the
2915// four bytes of 32-bits destination register.
2916def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
2917
2918// Replicate the low 16-bits from 32-bits input register into each of the
2919// four halfwords of 64-bits destination register.
2920def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
2921
2922
2923class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
2924 : Pat <(Op Type:$Rss, Type:$Rtt),
2925 (MI Type:$Rss, Type:$Rtt)>;
2926
2927def: VArith_pat <A2_vaddub, add, V8I8>;
2928def: VArith_pat <A2_vaddh, add, V4I16>;
2929def: VArith_pat <A2_vaddw, add, V2I32>;
2930def: VArith_pat <A2_vsubub, sub, V8I8>;
2931def: VArith_pat <A2_vsubh, sub, V4I16>;
2932def: VArith_pat <A2_vsubw, sub, V2I32>;
2933
2934def: VArith_pat <A2_and, and, V2I16>;
2935def: VArith_pat <A2_xor, xor, V2I16>;
2936def: VArith_pat <A2_or, or, V2I16>;
2937
2938def: VArith_pat <A2_andp, and, V8I8>;
2939def: VArith_pat <A2_andp, and, V4I16>;
2940def: VArith_pat <A2_andp, and, V2I32>;
2941def: VArith_pat <A2_orp, or, V8I8>;
2942def: VArith_pat <A2_orp, or, V4I16>;
2943def: VArith_pat <A2_orp, or, V2I32>;
2944def: VArith_pat <A2_xorp, xor, V8I8>;
2945def: VArith_pat <A2_xorp, xor, V4I16>;
2946def: VArith_pat <A2_xorp, xor, V2I32>;
2947
2948def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2949 (i32 u5_0ImmPred:$c))))),
2950 (S2_asr_i_vw V2I32:$b, imm:$c)>;
2951def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2952 (i32 u5_0ImmPred:$c))))),
2953 (S2_lsr_i_vw V2I32:$b, imm:$c)>;
2954def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2955 (i32 u5_0ImmPred:$c))))),
2956 (S2_asl_i_vw V2I32:$b, imm:$c)>;
2957
2958def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2959 (S2_asr_i_vh V4I16:$b, imm:$c)>;
2960def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2961 (S2_lsr_i_vh V4I16:$b, imm:$c)>;
2962def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2963 (S2_asl_i_vh V4I16:$b, imm:$c)>;
2964
2965
2966def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
2967 [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
2968def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
2969 [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
2970
2971def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
2972def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
2973def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
2974def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
2975def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
2976def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
2977
2978def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
2979 (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
2980def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
2981 (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
2982def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
2983 (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
2984def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
2985 (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
2986def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
2987 (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
2988def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
2989 (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
2990
2991class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
2992 : Pat <(Op Value:$Rs, I32:$Rt),
2993 (MI Value:$Rs, I32:$Rt)>;
2994
2995def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
2996def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
2997def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
2998def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
2999def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
3000def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
3001
3002
3003def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
3004 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
3005def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
3006 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
3007def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
3008 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
3009
3010def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
3011def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
3012def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
3013def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
3014def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
3015def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
3016def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
3017def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
3018def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
3019
3020
3021class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
3022 : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
3023 (MI Value:$Rs, Value:$Rt)>;
3024
3025def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
3026def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
3027def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
3028
3029def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
3030def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
3031def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
3032
3033def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
3034def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
3035def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
3036
3037
3038class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
3039 : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
3040 (MI InVal:$Rs, InVal:$Rt)>;
3041
3042def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
3043def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
3044def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
3045
3046def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
3047def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
3048def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
3049
3050def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
3051 (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>;
3052def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
3053 (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
3054
3055
3056// Adds two v4i8: Hexagon does not have an insn for this one, so we
3057// use the double add v8i8, and use only the low part of the result.
3058def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
3059 (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
3060
3061// Subtract two v4i8: Hexagon does not have an insn for this one, so we
3062// use the double sub v8i8, and use only the low part of the result.
3063def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
3064 (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
3065
3066//
3067// No 32 bit vector mux.
3068//
3069def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
3070 (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
3071def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
3072 (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
3073
3074//
3075// 64-bit vector mux.
3076//
3077def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
3078 (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
3079def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
3080 (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
3081def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
3082 (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
3083
3084//
3085// No 32 bit vector compare.
3086//
3087def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
3088 (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
3089def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
3090 (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
3091def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
3092 (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
3093
3094def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
3095 (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
3096def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
3097 (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
3098def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
3099 (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
3100
3101
3102class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
3103 ValueType CmpTy>
3104 : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
3105 (InvMI Value:$Rt, Value:$Rs)>;
3106
3107// Map from a compare operation to the corresponding instruction with the
3108// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
3109def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
3110def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
3111def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
3112def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
3113def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
3114def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
3115
3116def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
3117def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
3118def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
3119def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
3120def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
3121def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
3122
3123// Map from vcmpne(Rss) -> !vcmpew(Rss).
3124// rs != rt -> !(rs == rt).
3125def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
3126 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
3127
3128
3129// Truncate: from vector B copy all 'E'ven 'B'yte elements:
3130// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
3131def: Pat<(v4i8 (trunc V4I16:$Rs)),
3132 (S2_vtrunehb V4I16:$Rs)>;
3133
3134// Truncate: from vector B copy all 'O'dd 'B'yte elements:
3135// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
3136// S2_vtrunohb
3137
3138// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
3139// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
3140// S2_vtruneh
3141
3142def: Pat<(v2i16 (trunc V2I32:$Rs)),
3143 (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
3144
3145
3146def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
3147def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
3148
3149def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
3150def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
3151
3152def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
3153def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
3154def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
3155def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
3156def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
3157def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
3158
3159// Sign extends a v2i8 into a v2i32.
3160def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
3161 (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
3162
3163// Sign extends a v2i16 into a v2i32.
3164def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
3165 (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
3166
3167
3168// Multiplies two v2i16 and returns a v2i32. We are using here the
3169// saturating multiply, as hexagon does not provide a non saturating
3170// vector multiply, and saturation does not impact the result that is
3171// in double precision of the operands.
3172
3173// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
3174// with the C semantics for this one, this pattern uses the half word
3175// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
3176// then truncated to fit this back into a v2i16 and to simulate the
3177// wrap around semantics for unsigned in C.
3178def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
3179 (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
3180
3181def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
3182 (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
3183 (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
3184
3185// Multiplies two v4i16 vectors.
3186def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
3187 (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
3188 (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
3189
3190def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
3191 (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
3192 (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
3193
3194// Multiplies two v4i8 vectors.
3195def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
3196 (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
3197 Requires<[HasV5T]>;
3198
3199def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
3200 (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
3201
3202// Multiplies two v8i8 vectors.
3203def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
3204 (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
3205 (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
3206 Requires<[HasV5T]>;
3207
3208def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
3209 (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
3210 (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
3211
3212def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
3213 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
3214
3215def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
3216def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
3217def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
3218def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
3219
3220class ShufflePat<InstHexagon MI, SDNode Op>
3221 : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
3222 (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
3223
3224// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
3225def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
3226
3227// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
3228def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
3229
3230// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
3231def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
3232
3233// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
3234def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
3235
3236
3237// Truncated store from v4i16 to v4i8.
3238def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
3239 (truncstore node:$val, node:$ptr),
3240 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
3241
3242// Truncated store from v2i32 to v2i16.
3243def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
3244 (truncstore node:$val, node:$ptr),
3245 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
3246
3247def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
3248 (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
3249 (LoReg $Rs))))>;
3250
3251def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
3252 (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
3253
3254
3255// Zero and sign extended load from v2i8 into v2i16.
3256def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
3257 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
3258
3259def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
3260 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
3261
3262def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
3263 (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
3264
3265def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
3266 (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
3267
3268def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
3269 (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
3270
3271def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
3272 (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
3273