blob: 232bb2bf15bd8ec7ba8f4713e61e5c544b2d330a [file] [log] [blame]
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001// Pattern fragment that combines the value type and the register class
2// into a single parameter.
3// The pat frags in the definitions below need to have a named register,
4// otherwise i32 will be assumed regardless of the register class. The
5// name of the register does not matter.
6def I1 : PatLeaf<(i1 PredRegs:$R)>;
7def I32 : PatLeaf<(i32 IntRegs:$R)>;
8def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
9def F32 : PatLeaf<(f32 IntRegs:$R)>;
10def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
11
12// Pattern fragments to extract the low and high subregisters from a
13// 64-bit value.
14def LoReg: OutPatFrag<(ops node:$Rs),
15 (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
16def HiReg: OutPatFrag<(ops node:$Rs),
17 (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
18
19def orisadd: PatFrag<(ops node:$Addr, node:$off),
20 (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>;
21
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +000022def Set5ImmPred : PatLeaf<(i32 imm), [{
23 // Set5ImmPred predicate - True if the number is in the series of values.
24 // [ 2^0, 2^1, ... 2^31 ]
25 // For use in setbit immediate.
26 uint32_t v = N->getZExtValue();
27 // Constrain to 32 bits, and then check for single bit.
28 return isPowerOf2_32(v);
29}]>;
30
31def Clr5ImmPred : PatLeaf<(i32 imm), [{
32 // Clr5ImmPred predicate - True if the number is in the series of
33 // bit negated values.
34 // [ 2^0, 2^1, ... 2^31 ]
35 // For use in clrbit immediate.
36 // Note: we are bit NOTing the value.
37 uint32_t v = ~N->getZExtValue();
38 // Constrain to 32 bits, and then check for single bit.
39 return isPowerOf2_32(v);
40}]>;
41
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000042// SDNode for converting immediate C to C-1.
43def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
44 // Return the byte immediate const-1 as an SDNode.
45 int32_t imm = N->getSExtValue();
46 return XformSToSM1Imm(imm, SDLoc(N));
47}]>;
48
49// SDNode for converting immediate C to C-2.
50def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
51 // Return the byte immediate const-2 as an SDNode.
52 int32_t imm = N->getSExtValue();
53 return XformSToSM2Imm(imm, SDLoc(N));
54}]>;
55
56// SDNode for converting immediate C to C-3.
57def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
58 // Return the byte immediate const-3 as an SDNode.
59 int32_t imm = N->getSExtValue();
60 return XformSToSM3Imm(imm, SDLoc(N));
61}]>;
62
63// SDNode for converting immediate C to C-1.
64def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
65 // Return the byte immediate const-1 as an SDNode.
66 uint32_t imm = N->getZExtValue();
67 return XformUToUM1Imm(imm, SDLoc(N));
68}]>;
69
70class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +000071 : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000072 (MI IntRegs:$src1, ImmPred:$src2)>;
73
74def : T_CMP_pat <C2_cmpeqi, seteq, s10_0ImmPred>;
75def : T_CMP_pat <C2_cmpgti, setgt, s10_0ImmPred>;
76def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>;
77
78def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
79 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
80
81def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
82def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
83
84// Pats for instruction selection.
85class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +000086 : Pat<(ResT (Op I32:$Rs, I32:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000087 (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
88
89def: BinOp32_pat<add, A2_add, i32>;
90def: BinOp32_pat<and, A2_and, i32>;
91def: BinOp32_pat<or, A2_or, i32>;
92def: BinOp32_pat<sub, A2_sub, i32>;
93def: BinOp32_pat<xor, A2_xor, i32>;
94
95def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
96def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
97
98// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
99// that reverse the order of the operands.
100class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
101
102// Pats for compares. They use PatFrags as operands, not SDNodes,
103// since seteq/setgt/etc. are defined as ParFrags.
104class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000105 : Pat<(VT (Op I32:$Rs, I32:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000106 (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
107
108def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
109def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
110def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
111
112def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
113def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
114
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000115def: Pat<(i32 (select I1:$Pu, I32:$Rs, I32:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000116 (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
117
118def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)),
119 (i32 (A2_addi I32:$Rs, imm:$s16))>;
120
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000121def: Pat<(or I32:$Rs, s32_0ImmPred:$s10),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000122 (A2_orir IntRegs:$Rs, imm:$s10)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000123def: Pat<(and I32:$Rs, s32_0ImmPred:$s10),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000124 (A2_andir IntRegs:$Rs, imm:$s10)>;
125
126def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
127 (A2_subri imm:$s10, IntRegs:$Rs)>;
128
129// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000130def: Pat<(not I32:$src1),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000131 (A2_subri -1, IntRegs:$src1)>;
132
133def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
134def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
135
136def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)),
137 (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
138
139def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)),
140 (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
141
142def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)),
143 (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
144
145def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
146def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
147def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
148def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
149
150class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
151 : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
152 (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
153
154def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
155def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
156def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
157def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
158def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
159def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
160def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
161def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
162
163// Add halfword.
164def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
165 (A2_addh_l16_ll I32:$src1, I32:$src2)>;
166
167def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
168 (A2_addh_l16_hl I32:$src1, I32:$src2)>;
169
170def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
171 (A2_addh_h16_ll I32:$src1, I32:$src2)>;
172
173// Subtract halfword.
174def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
175 (A2_subh_l16_ll I32:$src1, I32:$src2)>;
176
177def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
178 (A2_subh_h16_ll I32:$src1, I32:$src2)>;
179
180// Here, depending on the operand being selected, we'll either generate a
181// min or max instruction.
182// Ex:
183// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
184// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
185// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
186// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
187
188multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
189 InstHexagon Inst, InstHexagon SwapInst> {
190 def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
191 (VT RC:$src1), (VT RC:$src2)),
192 (Inst RC:$src1, RC:$src2)>;
193 def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
194 (VT RC:$src2), (VT RC:$src1)),
195 (SwapInst RC:$src1, RC:$src2)>;
196}
197
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +0000198def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a), [{
199 return isPositiveHalfWord(N);
200}]>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000201
202multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
203 defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
204
205 def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
206 (i32 PositiveHalfWord:$src2))),
207 (i32 PositiveHalfWord:$src1),
208 (i32 PositiveHalfWord:$src2))), i16),
209 (Inst IntRegs:$src1, IntRegs:$src2)>;
210
211 def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
212 (i32 PositiveHalfWord:$src2))),
213 (i32 PositiveHalfWord:$src2),
214 (i32 PositiveHalfWord:$src1))), i16),
215 (SwapInst IntRegs:$src1, IntRegs:$src2)>;
216}
217
218let AddedComplexity = 200 in {
219 defm: MinMax_pats<setge, A2_max, A2_min>;
220 defm: MinMax_pats<setgt, A2_max, A2_min>;
221 defm: MinMax_pats<setle, A2_min, A2_max>;
222 defm: MinMax_pats<setlt, A2_min, A2_max>;
223 defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
224 defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
225 defm: MinMax_pats<setule, A2_minu, A2_maxu>;
226 defm: MinMax_pats<setult, A2_minu, A2_maxu>;
227}
228
229class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000230 : Pat<(i1 (CmpOp I64:$Rs, I64:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000231 (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
232
233def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
234def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
235def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
236def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
237def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
238
239def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
240def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
241
242def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
243def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
244def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
245
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000246def: Pat<(i1 (not I1:$Ps)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000247 (C2_not PredRegs:$Ps)>;
248
249def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
250def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
251def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
252def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
253def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
254
255def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
256 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
257def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
258
259def: Pat<(br bb:$dst),
260 (J2_jump brtarget:$dst)>;
261def: Pat<(retflag),
262 (PS_jmpret (i32 R31))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000263def: Pat<(brcond I1:$src1, bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000264 (J2_jumpt PredRegs:$src1, bb:$offset)>;
265
266def: Pat<(eh_return),
267 (EH_RETURN_JMPR (i32 R31))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000268def: Pat<(brind I32:$dst),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000269 (J2_jumpr IntRegs:$dst)>;
270
271// Patterns to select load-indexed (i.e. load from base+offset).
272multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
273 InstHexagon MI> {
274 def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
275 def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
276 (VT (MI AddrFI:$fi, imm:$Off))>;
277 def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))),
278 (VT (MI AddrFI:$fi, imm:$Off))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000279 def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000280 (VT (MI IntRegs:$Rs, imm:$Off))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000281 def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000282}
283
284let AddedComplexity = 20 in {
285 defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
286 defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
287 defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
288 defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
289 defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
290 defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
291
292 defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
293 defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
294 defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
295 defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
296 defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
297 defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
298 defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
299 defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
300 // No sextloadi1.
301}
302
303// Sign-extending loads of i1 need to replicate the lowest bit throughout
304// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
305// do the trick.
306let AddedComplexity = 20 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000307def: Pat<(i32 (sextloadi1 I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000308 (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
309
310def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
311def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
312def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
313
314def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8),
315 (M2_mpysip IntRegs:$Rs, imm:$u8)>;
316def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)),
317 (M2_mpysin IntRegs:$Rs, imm:$u8)>;
318def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2),
319 (M2_mpysmi IntRegs:$src1, imm:$src2)>;
320def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
321 (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
322def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
323 (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
324def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
325 (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
326def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
327 (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
328
329class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
330 PatLeaf ImmPred>
331 : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
332 (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
333
334class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
335 : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
336 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
337
338def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
339def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>;
340
341def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>;
342def : T_MType_acc_pat2 <M2_nacci, add, sub>;
343
344def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
345def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
346def: T_MType_acc_pat2 <M4_or_and, and, or>;
347def: T_MType_acc_pat2 <M4_and_and, and, and>;
348def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
349def: T_MType_acc_pat2 <M4_or_or, or, or>;
350def: T_MType_acc_pat2 <M4_and_or, or, and>;
351def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
352
353class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
354 : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
355 (not IntRegs:$src3)))),
356 (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
357
358def: T_MType_acc_pat3 <M4_or_andn, and, or>;
359def: T_MType_acc_pat3 <M4_and_andn, and, and>;
360def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
361
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000362def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
363def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>;
364def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
365
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +0000366// Return true if for a 32 to 64-bit sign-extended load.
367def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{
368 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
369 if (!LD)
370 return false;
371 return LD->getExtensionType() == ISD::SEXTLOAD &&
372 LD->getMemoryVT().getScalarType() == MVT::i32;
373}]>;
374
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000375def: Pat<(i64 (mul (Aext64 I32:$src1), (Aext64 I32:$src2))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000376 (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
377
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000378def: Pat<(i64 (mul (Sext64 I32:$src1), (Sext64 I32:$src2))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000379 (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
380
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +0000381def: Pat<(i64 (mul Sext64Ld:$src1, Sext64Ld:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000382 (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
383
384// Multiply and accumulate, use full result.
385// Rxx[+-]=mpy(Rs,Rt)
386
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000387def: Pat<(i64 (add I64:$src1,
388 (mul (Sext64 I32:$src2),
389 (Sext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000390 (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
391
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000392def: Pat<(i64 (sub I64:$src1,
393 (mul (Sext64 I32:$src2),
394 (Sext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000395 (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
396
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000397def: Pat<(i64 (add I64:$src1,
398 (mul (Aext64 I32:$src2),
399 (Aext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000400 (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
401
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000402def: Pat<(i64 (add I64:$src1,
403 (mul (Zext64 I32:$src2),
404 (Zext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000405 (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
406
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000407def: Pat<(i64 (sub I64:$src1,
408 (mul (Aext64 I32:$src2),
409 (Aext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000410 (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
411
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000412def: Pat<(i64 (sub I64:$src1,
413 (mul (Zext64 I32:$src2),
414 (Zext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000415 (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
416
417class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
418 InstHexagon MI>
419 : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
420 (MI I32:$src2, imm:$offset, Value:$src1)>;
421
422def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
423def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
424def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
425def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
426
427// Patterns for generating stores, where the address takes different forms:
428// - frameindex,
429// - frameindex + offset,
430// - base + offset,
431// - simple (base address without offset).
432// These would usually be used together (via Storex_pat defined below), but
433// in some cases one may want to apply different properties (such as
434// AddedComplexity) to the individual patterns.
435class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
436 : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
437multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
438 InstHexagon MI> {
439 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
440 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
441 def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
442 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
443}
444multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
445 InstHexagon MI> {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000446 def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000447 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000448 def: Pat<(Store Value:$Rt, (orisadd I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000449 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
450}
451class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000452 : Pat<(Store Value:$Rt, I32:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000453 (MI IntRegs:$Rs, 0, Value:$Rt)>;
454
455// Patterns for generating stores, where the address takes different forms,
456// and where the value being stored is transformed through the value modifier
457// ValueMod. The address forms are same as above.
458class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
459 InstHexagon MI>
460 : Pat<(Store Value:$Rs, AddrFI:$fi),
461 (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
462multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
463 PatFrag ValueMod, InstHexagon MI> {
464 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
465 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
466 def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
467 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
468}
469multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
470 PatFrag ValueMod, InstHexagon MI> {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000471 def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000472 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000473 def: Pat<(Store Value:$Rt, (orisadd I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000474 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
475}
476class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
477 InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000478 : Pat<(Store Value:$Rt, I32:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000479 (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
480
481multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
482 InstHexagon MI> {
483 def: Storex_fi_pat <Store, Value, MI>;
484 defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
485 defm: Storex_add_pat <Store, Value, ImmPred, MI>;
486}
487
488multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
489 PatFrag ValueMod, InstHexagon MI> {
490 def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
491 defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
492 defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
493}
494
495// Regular stores in the DAG have two operands: value and address.
496// Atomic stores also have two, but they are reversed: address, value.
497// To use atomic stores with the patterns, they need to have their operands
498// swapped. This relies on the knowledge that the F.Fragment uses names
499// "ptr" and "val".
500class SwapSt<PatFrag F>
501 : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
502 F.OperandTransform>;
503
504let AddedComplexity = 20 in {
505 defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
506 defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
507 defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
508 defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
509
510 defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
511 defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
512 defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
513 defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
514}
515
516// Simple patterns should be tried with the least priority.
517def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
518def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
519def: Storex_simple_pat<store, I32, S2_storeri_io>;
520def: Storex_simple_pat<store, I64, S2_storerd_io>;
521
522def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
523def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
524def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
525def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
526
527let AddedComplexity = 20 in {
528 defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
529 defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
530 defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
531}
532
533def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
534def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
535def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
536
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000537def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000538
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000539def: Pat<(i32 (select (i1 (setlt I32:$src, 0)),
540 (i32 (sub 0, I32:$src)),
541 I32:$src)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000542 (A2_abs IntRegs:$src)>;
543
544let AddedComplexity = 50 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000545def: Pat<(i32 (xor (add (sra I32:$src, (i32 31)),
546 I32:$src),
547 (sra I32:$src, (i32 31)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000548 (A2_abs IntRegs:$src)>;
549
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000550def: Pat<(sra I32:$src, u5_0ImmPred:$u5),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000551 (S2_asr_i_r IntRegs:$src, imm:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000552def: Pat<(srl I32:$src, u5_0ImmPred:$u5),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000553 (S2_lsr_i_r IntRegs:$src, imm:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000554def: Pat<(shl I32:$src, u5_0ImmPred:$u5),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000555 (S2_asl_i_r IntRegs:$src, imm:$u5)>;
556
557def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)),
558 (i32 1))),
559 (i32 1))),
560 (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>;
561
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000562def : Pat<(not I64:$src1),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000563 (A2_notp DoubleRegs:$src1)>;
564
565// Count leading zeros.
566def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
567def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
568
569// Count trailing zeros: 32-bit.
570def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
571
572// Count leading ones.
573def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
574def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
575
576// Count trailing ones: 32-bit.
577def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
578
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000579def: Pat<(i32 (and I32:$Rs, (not (shl 1, u5_0ImmPred:$u5)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000580 (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000581def: Pat<(i32 (or I32:$Rs, (shl 1, u5_0ImmPred:$u5))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000582 (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000583def: Pat<(i32 (xor I32:$Rs, (shl 1, u5_0ImmPred:$u5))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000584 (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000585def: Pat<(i32 (and I32:$Rs, (not (shl 1, I32:$Rt)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000586 (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000587def: Pat<(i32 (or I32:$Rs, (shl 1, I32:$Rt))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000588 (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000589def: Pat<(i32 (xor I32:$Rs, (shl 1, I32:$Rt))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000590 (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
591
592let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000593 def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000594 (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000595 def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000596 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000597 def: Pat<(i1 (trunc I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000598 (S2_tstbit_i IntRegs:$Rs, 0)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000599 def: Pat<(i1 (trunc I64:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000600 (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
601}
602
603let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000604 def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000605 (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000606 def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000607 (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
608}
609
610let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000611def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000612 (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
613
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000614def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000615 (i32 8)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000616 (i32 (zextloadi8 (add I32:$b, 2)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000617 (i32 16)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000618 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
619 (zextloadi8 I32:$b)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000620 (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
621
622// Patterns for loads of i1:
623def: Pat<(i1 (load AddrFI:$fi)),
624 (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000625def: Pat<(i1 (load (add I32:$Rs, s32_0ImmPred:$Off))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000626 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000627def: Pat<(i1 (load I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000628 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
629
630def I1toI32: OutPatFrag<(ops node:$Rs),
631 (C2_muxii (i1 $Rs), 1, 0)>;
632
633def I32toI1: OutPatFrag<(ops node:$Rs),
634 (i1 (C2_tfrrp (i32 $Rs)))>;
635
636defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
637def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
638
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000639def: Pat<(sra I64:$src, u6_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000640 (S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000641def: Pat<(srl I64:$src, u6_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000642 (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000643def: Pat<(shl I64:$src, u6_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000644 (S2_asl_i_p DoubleRegs:$src, imm:$u6)>;
645
646let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000647def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000648 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
649
650def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
651def: Pat<(HexagonBARRIER), (Y2_barrier)>;
652
653def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
654 (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>;
655
656
657// Support for generating global address.
658// Taken from X86InstrInfo.td.
659def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
660 SDTCisVT<1, i32>,
661 SDTCisPtrTy<0>]>;
662def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
663def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
664
665// Map TLS addressses to A2_tfrsi.
666def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
667def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
668
669def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
670def: Pat<(i1 0), (PS_false)>;
671def: Pat<(i1 1), (PS_true)>;
672
673// Pseudo instructions.
674def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
675def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
676 SDTCisVT<1, i32> ]>;
677
678def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
679 [SDNPHasChain, SDNPOutGlue]>;
680def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
681 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
682
683def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
684
685// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
686// Optional Flag and Variable Arguments.
687// Its 1 Operand has pointer type.
688def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
689 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
690
691
692def: Pat<(callseq_start timm:$amt),
693 (ADJCALLSTACKDOWN imm:$amt)>;
694def: Pat<(callseq_end timm:$amt1, timm:$amt2),
695 (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;
696
697//Tail calls.
698def: Pat<(HexagonTCRet tglobaladdr:$dst),
699 (PS_tailcall_i tglobaladdr:$dst)>;
700def: Pat<(HexagonTCRet texternalsym:$dst),
701 (PS_tailcall_i texternalsym:$dst)>;
702def: Pat<(HexagonTCRet I32:$dst),
703 (PS_tailcall_r I32:$dst)>;
704
705// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000706def: Pat<(and I32:$src1, 65535),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000707 (A2_zxth IntRegs:$src1)>;
708
709// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000710def: Pat<(and I32:$src1, 255),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000711 (A2_zxtb IntRegs:$src1)>;
712
713// Map Add(p1, true) to p1 = not(p1).
714// Add(p1, false) should never be produced,
715// if it does, it got to be mapped to NOOP.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000716def: Pat<(add I1:$src1, -1),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000717 (C2_not PredRegs:$src1)>;
718
719// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000720def: Pat<(select (not I1:$src1), s8_0ImmPred:$src2, s32_0ImmPred:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000721 (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>;
722
723// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
724// => r0 = C2_muxir(p0, r1, #i)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000725def: Pat<(select (not I1:$src1), s32_0ImmPred:$src2,
726 I32:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000727 (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>;
728
729// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
730// => r0 = C2_muxri (p0, #i, r1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000731def: Pat<(select (not I1:$src1), IntRegs:$src2, s32_0ImmPred:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000732 (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>;
733
734// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000735def: Pat<(brcond (not I1:$src1), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000736 (J2_jumpf PredRegs:$src1, bb:$offset)>;
737
738// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000739def: Pat<(i64 (sext_inreg I64:$src1, i32)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000740 (A2_sxtw (LoReg DoubleRegs:$src1))>;
741
742// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000743def: Pat<(i64 (sext_inreg I64:$src1, i16)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000744 (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
745
746// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000747def: Pat<(i64 (sext_inreg I64:$src1, i8)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000748 (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
749
750// We want to prevent emitting pnot's as much as possible.
751// Map brcond with an unsupported setcc to a J2_jumpf.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000752def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000753 bb:$offset),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000754 (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000755 bb:$offset)>;
756
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000757def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000758 bb:$offset),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000759 (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000760
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000761def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000762 (J2_jumpf PredRegs:$src1, bb:$offset)>;
763
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000764def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000765 (J2_jumpt PredRegs:$src1, bb:$offset)>;
766
767// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000768def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000769 (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8_0ImmPred:$src2)),
770 bb:$offset)>;
771
772// Map from a 64-bit select to an emulated 64-bit mux.
773// Hexagon does not support 64-bit MUXes; so emulate with combines.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000774def: Pat<(select I1:$src1, I64:$src2,
775 I64:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000776 (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
777 (HiReg DoubleRegs:$src3)),
778 (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
779 (LoReg DoubleRegs:$src3)))>;
780
781// Map from a 1-bit select to logical ops.
782// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000783def: Pat<(select I1:$src1, I1:$src2, I1:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000784 (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
785 (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
786
787// Map for truncating from 64 immediates to 32 bit immediates.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000788def: Pat<(i32 (trunc I64:$src)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000789 (LoReg DoubleRegs:$src)>;
790
791// Map for truncating from i64 immediates to i1 bit immediates.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000792def: Pat<(i1 (trunc I64:$src)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000793 (C2_tfrrp (LoReg DoubleRegs:$src))>;
794
795// rs <= rt -> !(rs > rt).
796let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000797def: Pat<(i1 (setle I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000798 (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
799
800// rs <= rt -> !(rs > rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000801def : Pat<(i1 (setle I32:$src1, I32:$src2)),
802 (i1 (C2_not (C2_cmpgt I32:$src1, I32:$src2)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000803
804// Rss <= Rtt -> !(Rss > Rtt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000805def: Pat<(i1 (setle I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000806 (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
807
808// Map cmpne -> cmpeq.
809// Hexagon_TODO: We should improve on this.
810// rs != rt -> !(rs == rt).
811let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000812def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000813 (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>;
814
815// Convert setne back to xor for hexagon since we compute w/ pred registers.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000816def: Pat<(i1 (setne I1:$src1, I1:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000817 (C2_xor PredRegs:$src1, PredRegs:$src2)>;
818
819// Map cmpne(Rss) -> !cmpew(Rss).
820// rs != rt -> !(rs == rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000821def: Pat<(i1 (setne I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000822 (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
823
824// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
825// rs >= rt -> !(rt > rs).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000826def : Pat <(i1 (setge I32:$src1, I32:$src2)),
827 (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000828
829// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
830let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000831def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000832 (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
833
834// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
835// rss >= rtt -> !(rtt > rss).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000836def: Pat<(i1 (setge I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000837 (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
838
839// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
840// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
841// rs < rt -> !(rs >= rt).
842let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000843def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000844 (C2_not (C2_cmpgti IntRegs:$src1,
845 (DEC_CONST_SIGNED s32_0ImmPred:$src2)))>;
846
847// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000848def: Pat<(i1 (setuge I32:$src1, 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000849 (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
850
851// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000852def: Pat<(i1 (setuge I32:$src1, u32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000853 (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32_0ImmPred:$src2))>;
854
855// Generate cmpgtu(Rs, #u9)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000856def: Pat<(i1 (setugt I32:$src1, u32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000857 (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>;
858
859// Map from Rs >= Rt -> !(Rt > Rs).
860// rs >= rt -> !(rt > rs).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000861def: Pat<(i1 (setuge I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000862 (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
863
864// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
865// Map from (Rs <= Rt) -> !(Rs > Rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000866def: Pat<(i1 (setule I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000867 (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
868
869// Sign extends.
870// i1 -> i32
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000871def: Pat<(i32 (sext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000872 (C2_muxii PredRegs:$src1, -1, 0)>;
873
874// i1 -> i64
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000875def: Pat<(i64 (sext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000876 (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
877
878// Zero extends.
879// i1 -> i32
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000880def: Pat<(i32 (zext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000881 (C2_muxii PredRegs:$src1, 1, 0)>;
882
883// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000884def: Pat<(i32 (anyext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000885 (C2_muxii PredRegs:$src1, 1, 0)>;
886
887// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000888def: Pat<(i64 (anyext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000889 (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
890
891// Clear the sign bit in a 64-bit register.
892def ClearSign : OutPatFrag<(ops node:$Rss),
893 (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>;
894
895def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
896 (A2_addp
897 (M2_dpmpyuu_acc_s0
898 (S2_lsr_i_p
899 (A2_addp
900 (M2_dpmpyuu_acc_s0
901 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
902 (HiReg $Rss),
903 (LoReg $Rtt)),
904 (A2_combinew (A2_tfrsi 0),
905 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
906 32),
907 (HiReg $Rss),
908 (HiReg $Rtt)),
909 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
910
911// Multiply 64-bit unsigned and use upper result.
912def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
913
914// Multiply 64-bit signed and use upper result.
915//
916// For two signed 64-bit integers A and B, let A' and B' denote A and B
917// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
918// sign bit of A (and identically for B). With this notation, the signed
919// product A*B can be written as:
920// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
921// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
922// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
923// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
924
925def : Pat <(mulhs I64:$Rss, I64:$Rtt),
926 (A2_subp
927 (MulHU $Rss, $Rtt),
928 (A2_addp
929 (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
930 (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
931
932// Hexagon specific ISD nodes.
933def SDTHexagonALLOCA : SDTypeProfile<1, 2,
934 [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
935def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
936 [SDNPHasChain]>;
937
938
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000939def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000940 (PS_alloca IntRegs:$Rs, imm:$A)>;
941
942def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
943def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
944
945def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>;
946def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>;
947
948let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000949def: Pat<(add I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
950def: Pat<(sub I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
951def: Pat<(and I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
952def: Pat<(or I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000953
954let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000955def: Pat<(add I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
956def: Pat<(sub I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
957def: Pat<(and I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
958def: Pat<(or I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000959
960let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000961def: Pat<(add I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
962def: Pat<(sub I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
963def: Pat<(and I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
964def: Pat<(or I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000965let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000966def: Pat<(xor I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000967
968let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000969def: Pat<(add I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
970def: Pat<(sub I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
971def: Pat<(and I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
972def: Pat<(or I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000973let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000974def: Pat<(xor I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000975
976let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000977def: Pat<(add I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
978def: Pat<(sub I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
979def: Pat<(and I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
980def: Pat<(or I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000981let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000982def: Pat<(xor I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000983
984let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000985def: Pat<(add I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
986def: Pat<(sub I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
987def: Pat<(and I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
988def: Pat<(or I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000989let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000990def: Pat<(xor I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000991
992let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000993def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
994def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
995def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
996def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000997let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000998def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
999def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1000def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1001def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1002def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001003
1004let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001005def: Pat<(add I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1006def: Pat<(sub I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1007def: Pat<(and I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1008def: Pat<(or I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001009let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001010def: Pat<(add I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1011def: Pat<(sub I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1012def: Pat<(and I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1013def: Pat<(or I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1014def: Pat<(xor I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001015
1016let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001017def: Pat<(add I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1018def: Pat<(sub I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1019def: Pat<(and I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1020def: Pat<(or I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001021let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001022def: Pat<(add I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1023def: Pat<(sub I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1024def: Pat<(and I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1025def: Pat<(or I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1026def: Pat<(xor I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001027
1028let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001029def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1030def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1031def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1032def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001033let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001034def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1035def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1036def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1037def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1038def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001039
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001040def: Pat<(sra I64:$src1, I32:$src2), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1041def: Pat<(srl I64:$src1, I32:$src2), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1042def: Pat<(shl I64:$src1, I32:$src2), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1043def: Pat<(shl I64:$src1, I32:$src2), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001044
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001045def: Pat<(sra I32:$src1, I32:$src2), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>;
1046def: Pat<(srl I32:$src1, I32:$src2), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>;
1047def: Pat<(shl I32:$src1, I32:$src2), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>;
1048def: Pat<(shl I32:$src1, I32:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001049
1050def SDTHexagonINSERT:
1051 SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1052 SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
1053def SDTHexagonINSERTRP:
1054 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1055 SDTCisInt<0>, SDTCisVT<3, i64>]>;
1056
1057def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
1058def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
1059
1060def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
1061 (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>;
1062def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
1063 (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>;
1064def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
1065 (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
1066def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
1067 (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
1068
1069let AddedComplexity = 100 in
1070def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
1071 (i32 (extloadi8 (add I32:$b, 3))),
1072 24, 8),
1073 (i32 16)),
1074 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
1075 (zextloadi8 I32:$b)),
1076 (A2_swiz (L2_loadri_io I32:$b, 0))>;
1077
1078def SDTHexagonEXTRACTU:
1079 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
1080 SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
1081def SDTHexagonEXTRACTURP:
1082 SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
1083 SDTCisVT<2, i64>]>;
1084
1085def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
1086def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
1087
1088def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
1089 (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>;
1090def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
1091 (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>;
1092def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
1093 (S2_extractu_rp I32:$src1, I64:$src2)>;
1094def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
1095 (S2_extractup_rp I64:$src1, I64:$src2)>;
1096
1097// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001098def: Pat<(mul I32:$src1, (ineg n8_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001099 (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>;
1100
1101multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
1102 defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
1103}
1104
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00001105def: Pat<(add (Sext64 I32:$Rs), I64:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001106 (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>;
1107
1108let AddedComplexity = 200 in {
1109 defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
1110 defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
1111 defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
1112 defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
1113 defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
1114 defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
1115 defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
1116 defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
1117}
1118
1119def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall,
1120 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
1121
1122def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall,
1123 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
1124
1125
1126// Map call instruction
1127def : Pat<(callv3 I32:$dst),
1128 (J2_callr I32:$dst)>;
1129def : Pat<(callv3 tglobaladdr:$dst),
1130 (J2_call tglobaladdr:$dst)>;
1131def : Pat<(callv3 texternalsym:$dst),
1132 (J2_call texternalsym:$dst)>;
1133def : Pat<(callv3 tglobaltlsaddr:$dst),
1134 (J2_call tglobaltlsaddr:$dst)>;
1135
1136def : Pat<(callv3nr I32:$dst),
1137 (PS_callr_nr I32:$dst)>;
1138def : Pat<(callv3nr tglobaladdr:$dst),
1139 (PS_call_nr tglobaladdr:$dst)>;
1140def : Pat<(callv3nr texternalsym:$dst),
1141 (PS_call_nr texternalsym:$dst)>;
1142
1143
1144def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
1145def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
1146
1147def BITPOS32 : SDNodeXForm<imm, [{
1148 // Return the bit position we will set [0-31].
1149 // As an SDNode.
1150 int32_t imm = N->getSExtValue();
1151 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1152}]>;
1153
1154
1155// Pats for instruction selection.
1156
1157// A class to embed the usual comparison patfrags within a zext to i32.
1158// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
1159// names, or else the frag's "body" won't match the operands.
1160class CmpInReg<PatFrag Op>
1161 : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
1162
1163def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
1164def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
1165
1166def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
1167def: T_cmp32_rr_pat<C4_cmplte, setle, i1>;
1168def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
1169
1170def: T_cmp32_rr_pat<C4_cmplte, RevCmp<setge>, i1>;
1171def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
1172
1173let AddedComplexity = 100 in {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001174 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001175 255), 0)),
1176 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001177 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001178 255), 0)),
1179 (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001180 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001181 65535), 0)),
1182 (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001183 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001184 65535), 0)),
1185 (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
1186}
1187
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001188def: Pat<(i32 (zext (i1 (seteq I32:$Rs, s32_0ImmPred:$s8)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001189 (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001190def: Pat<(i32 (zext (i1 (setne I32:$Rs, s32_0ImmPred:$s8)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001191 (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
1192
1193// Preserve the S2_tstbit_r generation
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001194def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, I32:$src2)),
1195 I32:$src1)), 0)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001196 (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
1197
1198// The complexity of the combines involving immediates should be greater
1199// than the complexity of the combine with two registers.
1200let AddedComplexity = 50 in {
1201def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i),
1202 (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>;
1203
1204def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r),
1205 (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>;
1206}
1207
1208// The complexity of the combine with two immediates should be greater than
1209// the complexity of a combine involving a register.
1210let AddedComplexity = 75 in {
1211def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6),
1212 (A4_combineii imm:$s8, imm:$u6)>;
1213def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
1214 (A2_combineii imm:$s8, imm:$S8)>;
1215}
1216
1217
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001218def ToZext64: OutPatFrag<(ops node:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001219 (i64 (A4_combineir 0, (i32 $Rs)))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001220def ToSext64: OutPatFrag<(ops node:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001221 (i64 (A2_sxtw (i32 $Rs)))>;
1222
1223// Patterns to generate indexed loads with different forms of the address:
1224// - frameindex,
1225// - base + offset,
1226// - base (without offset).
1227multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
1228 PatLeaf ImmPred, InstHexagon MI> {
1229 def: Pat<(VT (Load AddrFI:$fi)),
1230 (VT (ValueMod (MI AddrFI:$fi, 0)))>;
1231 def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
1232 (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
1233 def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
1234 (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001235 def: Pat<(VT (Load I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001236 (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
1237}
1238
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001239defm: Loadxm_pat<extloadi1, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1240defm: Loadxm_pat<extloadi8, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1241defm: Loadxm_pat<extloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>;
1242defm: Loadxm_pat<zextloadi1, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1243defm: Loadxm_pat<zextloadi8, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1244defm: Loadxm_pat<zextloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>;
1245defm: Loadxm_pat<sextloadi8, i64, ToSext64, s32_0ImmPred, L2_loadrb_io>;
1246defm: Loadxm_pat<sextloadi16, i64, ToSext64, s31_1ImmPred, L2_loadrh_io>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001247
1248// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00001249def: Pat<(Aext64 I32:$src1), (ToZext64 IntRegs:$src1)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001250
1251multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
1252 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1253 (HexagonCONST32 tglobaladdr:$src3)))),
1254 (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>;
1255 def : Pat <(VT (ldOp (add IntRegs:$src1,
1256 (HexagonCONST32 tglobaladdr:$src2)))),
1257 (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
1258
1259 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1260 (HexagonCONST32 tconstpool:$src3)))),
1261 (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>;
1262 def : Pat <(VT (ldOp (add IntRegs:$src1,
1263 (HexagonCONST32 tconstpool:$src2)))),
1264 (MI IntRegs:$src1, 0, tconstpool:$src2)>;
1265
1266 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1267 (HexagonCONST32 tjumptable:$src3)))),
1268 (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>;
1269 def : Pat <(VT (ldOp (add IntRegs:$src1,
1270 (HexagonCONST32 tjumptable:$src2)))),
1271 (MI IntRegs:$src1, 0, tjumptable:$src2)>;
1272}
1273
1274let AddedComplexity = 60 in {
1275defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
1276defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
1277defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
1278
1279defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
1280defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
1281defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
1282
1283defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
1284defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
1285}
1286
1287// 'def pats' for load instructions with base + register offset and non-zero
1288// immediate value. Immediate value is used to left-shift the second
1289// register operand.
1290class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001291 : Pat<(VT (Load (add I32:$Rs,
1292 (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001293 (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
1294
1295let AddedComplexity = 40 in {
1296 def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
1297 def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
1298 def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
1299 def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
1300 def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
1301 def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
1302 def: Loadxs_pat<load, i32, L4_loadri_rr>;
1303 def: Loadxs_pat<load, i64, L4_loadrd_rr>;
1304}
1305
1306// 'def pats' for load instruction base + register offset and
1307// zero immediate value.
1308class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001309 : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001310 (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
1311
1312let AddedComplexity = 20 in {
1313 def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
1314 def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
1315 def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
1316 def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
1317 def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
1318 def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
1319 def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
1320 def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
1321}
1322
1323// zext i1->i64
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001324def: Pat<(i64 (zext I1:$src1)),
1325 (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001326
1327// zext i32->i64
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00001328def: Pat<(Zext64 I32:$src1),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001329 (ToZext64 IntRegs:$src1)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001330
1331let AddedComplexity = 40 in
1332multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
1333 PatFrag stOp> {
1334 def : Pat<(stOp (VT RC:$src4),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001335 (add (shl I32:$src1, u2_0ImmPred:$src2),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001336 u32_0ImmPred:$src3)),
1337 (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>;
1338
1339 def : Pat<(stOp (VT RC:$src4),
1340 (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1341 (HexagonCONST32 tglobaladdr:$src3))),
1342 (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
1343
1344 def : Pat<(stOp (VT RC:$src4),
1345 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
1346 (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
1347}
1348
1349defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
1350defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
1351defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
1352defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
1353
1354class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001355 : Pat<(Store Value:$Ru, (add I32:$Rs,
1356 (i32 (shl I32:$Rt, u2_0ImmPred:$u2)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001357 (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
1358
1359let AddedComplexity = 40 in {
1360 def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
1361 def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
1362 def: Storexs_pat<store, I32, S4_storeri_rr>;
1363 def: Storexs_pat<store, I64, S4_storerd_rr>;
1364}
1365
1366def s30_2ProperPred : PatLeaf<(i32 imm), [{
1367 int64_t v = (int64_t)N->getSExtValue();
1368 return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
1369}]>;
1370def RoundTo8 : SDNodeXForm<imm, [{
1371 int32_t Imm = N->getSExtValue();
1372 return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
1373}]>;
1374
1375let AddedComplexity = 40 in
1376def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
1377 (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
1378
1379class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
1380 : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
1381 (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
1382
1383let AddedComplexity = 20 in {
1384 def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>;
1385 def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
1386 def: Store_rr_pat<store, I32, S4_storeri_rr>;
1387 def: Store_rr_pat<store, I64, S4_storerd_rr>;
1388}
1389
1390
1391def IMM_BYTE : SDNodeXForm<imm, [{
1392 // -1 etc is represented as 255 etc
1393 // assigning to a byte restores our desired signed value.
1394 int8_t imm = N->getSExtValue();
1395 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1396}]>;
1397
1398def IMM_HALF : SDNodeXForm<imm, [{
1399 // -1 etc is represented as 65535 etc
1400 // assigning to a short restores our desired signed value.
1401 int16_t imm = N->getSExtValue();
1402 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1403}]>;
1404
1405def IMM_WORD : SDNodeXForm<imm, [{
1406 // -1 etc can be represented as 4294967295 etc
1407 // Currently, it's not doing this. But some optimization
1408 // might convert -1 to a large +ve number.
1409 // assigning to a word restores our desired signed value.
1410 int32_t imm = N->getSExtValue();
1411 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1412}]>;
1413
1414def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
1415def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
1416def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
1417
1418// Emit store-immediate, but only when the stored value will not be constant-
1419// extended. The reason for that is that there is no pass that can optimize
1420// constant extenders in store-immediate instructions. In some cases we can
1421// end up will a number of such stores, all of which store the same extended
1422// value (e.g. after unrolling a loop that initializes floating point array).
1423
1424// Predicates to determine if the 16-bit immediate is expressible as a sign-
1425// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
1426// beyond 0..15, so we don't care what is in there.
1427
1428def i16in8ImmPred: PatLeaf<(i32 imm), [{
1429 int64_t v = (int16_t)N->getSExtValue();
1430 return v == (int64_t)(int8_t)v;
1431}]>;
1432
1433// Predicates to determine if the 32-bit immediate is expressible as a sign-
1434// extended 8-bit immediate.
1435def i32in8ImmPred: PatLeaf<(i32 imm), [{
1436 int64_t v = (int32_t)N->getSExtValue();
1437 return v == (int64_t)(int8_t)v;
1438}]>;
1439
1440
1441let AddedComplexity = 40 in {
1442 // Even though the offset is not extendable in the store-immediate, we
1443 // can still generate the fi# in the base address. If the final offset
1444 // is not valid for the instruction, we will replace it with a scratch
1445 // register.
1446// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
1447// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
1448// S4_storeirh_io>;
1449// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
1450
1451// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
1452// S4_storeirb_io>;
1453// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
1454// ToImmHalf, S4_storeirh_io>;
1455// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
1456// S4_storeiri_io>;
1457
1458 defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
1459 S4_storeirb_io>;
1460 defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
1461 S4_storeirh_io>;
1462 defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
1463 S4_storeiri_io>;
1464}
1465
1466def: Storexm_simple_pat<truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
1467def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>;
1468def: Storexm_simple_pat<store, s32_0ImmPred, ToImmWord, S4_storeiri_io>;
1469
1470// op(Ps, op(Pt, Pu))
1471class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
1472 : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
1473 (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
1474
1475// op(Ps, op(Pt, ~Pu))
1476class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
1477 : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
1478 (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
1479
1480def: LogLog_pat<and, and, C4_and_and>;
1481def: LogLog_pat<and, or, C4_and_or>;
1482def: LogLog_pat<or, and, C4_or_and>;
1483def: LogLog_pat<or, or, C4_or_or>;
1484
1485def: LogLogNot_pat<and, and, C4_and_andn>;
1486def: LogLogNot_pat<and, or, C4_and_orn>;
1487def: LogLogNot_pat<or, and, C4_or_andn>;
1488def: LogLogNot_pat<or, or, C4_or_orn>;
1489
1490//===----------------------------------------------------------------------===//
1491// PIC: Support for PIC compilations. The patterns and SD nodes defined
1492// below are needed to support code generation for PIC
1493//===----------------------------------------------------------------------===//
1494
1495def SDT_HexagonAtGot
1496 : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
1497def SDT_HexagonAtPcrel
1498 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
1499
1500// AT_GOT address-of-GOT, address-of-global, offset-in-global
1501def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
1502// AT_PCREL address-of-global
1503def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
1504
1505def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
1506 (L2_loadri_io I32:$got, imm:$addr)>;
1507def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
1508 (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
1509def: Pat<(HexagonAtPcrel I32:$addr),
1510 (C4_addipc imm:$addr)>;
1511
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001512def: Pat<(i64 (and I64:$Rs, (i64 (not I64:$Rt)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001513 (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001514def: Pat<(i64 (or I64:$Rs, (i64 (not I64:$Rt)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001515 (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
1516
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001517def: Pat<(add I32:$Rs, (add I32:$Ru, s32_0ImmPred:$s6)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001518 (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
1519
1520// Rd=add(Rs,sub(#s6,Ru))
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001521def: Pat<(add I32:$src1, (sub s32_0ImmPred:$src2,
1522 I32:$src3)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001523 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1524
1525// Rd=sub(add(Rs,#s6),Ru)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001526def: Pat<(sub (add I32:$src1, s32_0ImmPred:$src2),
1527 I32:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001528 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1529
1530// Rd=add(sub(Rs,Ru),#s6)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001531def: Pat<(add (sub I32:$src1, I32:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001532 (s32_0ImmPred:$src2)),
1533 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1534
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001535def: Pat<(xor I64:$dst2,
1536 (xor I64:$Rss, I64:$Rtt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001537 (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001538def: Pat<(or I32:$Ru, (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001539 (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>;
1540
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001541def: Pat<(or I32:$src1, (and I32:$Rs, s32_0ImmPred:$s10)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001542 (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
1543
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001544def: Pat<(or I32:$src1, (or I32:$Rs, s32_0ImmPred:$s10)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001545 (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
1546
1547
1548
1549// Count trailing zeros: 64-bit.
1550def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
1551
1552// Count trailing ones: 64-bit.
1553def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
1554
1555// Define leading/trailing patterns that require zero-extensions to 64 bits.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001556def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>;
1557def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>;
1558def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>;
1559def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001560
1561
1562let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001563 def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
1564 (S4_ntstbit_i I32:$Rs, u5_0ImmPred:$u5)>;
1565 def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
1566 (S4_ntstbit_r I32:$Rs, I32:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001567}
1568
1569// Add extra complexity to prefer these instructions over bitsset/bitsclr.
1570// The reason is that tstbit/ntstbit can be folded into a compound instruction:
1571// if ([!]tstbit(...)) jump ...
1572let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001573def: Pat<(i1 (setne (and I32:$Rs, (i32 Set5ImmPred:$u5)), (i32 0))),
1574 (S2_tstbit_i I32:$Rs, (BITPOS32 Set5ImmPred:$u5))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001575
1576let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001577def: Pat<(i1 (seteq (and I32:$Rs, (i32 Set5ImmPred:$u5)), (i32 0))),
1578 (S4_ntstbit_i I32:$Rs, (BITPOS32 Set5ImmPred:$u5))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001579
1580// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
1581// represented as a compare against "value & 0xFF", which is an exact match
1582// for cmpb (same for cmph). The patterns below do not contain any additional
1583// complexity that would make them preferable, and if they were actually used
1584// instead of cmpb/cmph, they would result in a compare against register that
1585// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
1586def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
1587 (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>;
1588def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
1589 (C4_nbitsclr I32:$Rs, I32:$Rt)>;
1590def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
1591 (C4_nbitsset I32:$Rs, I32:$Rt)>;
1592
1593
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001594def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001595 (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001596def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001597 (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
1598
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001599def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001600 (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001601def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001602 (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>;
1603
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001604def: Pat<(add I32:$Ru, (mul (i32 IntRegs:$_src_), I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001605 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>;
1606
1607def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
1608
1609class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
1610 : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8),
1611 (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
1612
1613let AddedComplexity = 200 in {
1614 def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>;
1615 def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>;
1616 def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>;
1617 def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>;
1618}
1619
1620let AddedComplexity = 30 in {
1621 def : T_Shift_CommOp_pat <S4_ori_asl_ri, or, shl>;
1622 def : T_Shift_CommOp_pat <S4_ori_lsr_ri, or, srl>;
1623}
1624
1625class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
1626 : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)),
1627 (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
1628
1629def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>;
1630def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>;
1631
1632let AddedComplexity = 200 in {
1633 def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
1634 (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1635 def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
1636 (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1637 def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
1638 (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1639 def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
1640 (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1641}
1642
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001643def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001644 (S4_lsli imm:$s6, IntRegs:$Rt)>;
1645
1646
1647//===----------------------------------------------------------------------===//
1648// MEMOP
1649//===----------------------------------------------------------------------===//
1650
1651def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
1652 int8_t v = (int8_t)N->getSExtValue();
1653 return v > -32 && v <= -1;
1654}]>;
1655
1656def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
1657 int16_t v = (int16_t)N->getSExtValue();
1658 return v > -32 && v <= -1;
1659}]>;
1660
1661def Clr5Imm8Pred : PatLeaf<(i32 imm), [{
1662 uint32_t v = (uint8_t)~N->getZExtValue();
1663 return ImmIsSingleBit(v);
1664}]>;
1665
1666def Clr5Imm16Pred : PatLeaf<(i32 imm), [{
1667 uint32_t v = (uint16_t)~N->getZExtValue();
1668 return ImmIsSingleBit(v);
1669}]>;
1670
1671def Set5Imm8 : SDNodeXForm<imm, [{
1672 uint32_t imm = (uint8_t)N->getZExtValue();
1673 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1674}]>;
1675
1676def Set5Imm16 : SDNodeXForm<imm, [{
1677 uint32_t imm = (uint16_t)N->getZExtValue();
1678 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1679}]>;
1680
1681def Set5Imm32 : SDNodeXForm<imm, [{
1682 uint32_t imm = (uint32_t)N->getZExtValue();
1683 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1684}]>;
1685
1686def Clr5Imm8 : SDNodeXForm<imm, [{
1687 uint32_t imm = (uint8_t)~N->getZExtValue();
1688 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1689}]>;
1690
1691def Clr5Imm16 : SDNodeXForm<imm, [{
1692 uint32_t imm = (uint16_t)~N->getZExtValue();
1693 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1694}]>;
1695
1696def Clr5Imm32 : SDNodeXForm<imm, [{
1697 int32_t imm = (int32_t)~N->getZExtValue();
1698 return XformMskToBitPosU5Imm(imm, SDLoc(N));
1699}]>;
1700
1701def NegImm8 : SDNodeXForm<imm, [{
1702 int8_t V = N->getSExtValue();
1703 return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
1704}]>;
1705
1706def NegImm16 : SDNodeXForm<imm, [{
1707 int16_t V = N->getSExtValue();
1708 return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
1709}]>;
1710
1711def NegImm32 : SDNodeXForm<imm, [{
1712 return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
1713}]>;
1714
1715def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
1716
1717multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
1718 InstHexagon MI> {
1719 // Addr: i32
1720 def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
1721 (MI I32:$Rs, 0, I32:$A)>;
1722 // Addr: fi
1723 def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
1724 (MI AddrFI:$Rs, 0, I32:$A)>;
1725}
1726
1727multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1728 SDNode Oper, InstHexagon MI> {
1729 // Addr: i32
1730 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
1731 (add I32:$Rs, ImmPred:$Off)),
1732 (MI I32:$Rs, imm:$Off, I32:$A)>;
1733 def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A),
1734 (orisadd I32:$Rs, ImmPred:$Off)),
1735 (MI I32:$Rs, imm:$Off, I32:$A)>;
1736 // Addr: fi
1737 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
1738 (add AddrFI:$Rs, ImmPred:$Off)),
1739 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
1740 def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
1741 (orisadd AddrFI:$Rs, ImmPred:$Off)),
1742 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
1743}
1744
1745multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1746 SDNode Oper, InstHexagon MI> {
1747 defm: Memopxr_simple_pat <Load, Store, Oper, MI>;
1748 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
1749}
1750
1751let AddedComplexity = 180 in {
1752 // add reg
1753 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
1754 /*anyext*/ L4_add_memopb_io>;
1755 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
1756 /*sext*/ L4_add_memopb_io>;
1757 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
1758 /*zext*/ L4_add_memopb_io>;
1759 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
1760 /*anyext*/ L4_add_memoph_io>;
1761 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
1762 /*sext*/ L4_add_memoph_io>;
1763 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
1764 /*zext*/ L4_add_memoph_io>;
1765 defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
1766
1767 // sub reg
1768 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
1769 /*anyext*/ L4_sub_memopb_io>;
1770 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
1771 /*sext*/ L4_sub_memopb_io>;
1772 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
1773 /*zext*/ L4_sub_memopb_io>;
1774 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
1775 /*anyext*/ L4_sub_memoph_io>;
1776 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
1777 /*sext*/ L4_sub_memoph_io>;
1778 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
1779 /*zext*/ L4_sub_memoph_io>;
1780 defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
1781
1782 // and reg
1783 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
1784 /*anyext*/ L4_and_memopb_io>;
1785 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
1786 /*sext*/ L4_and_memopb_io>;
1787 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
1788 /*zext*/ L4_and_memopb_io>;
1789 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
1790 /*anyext*/ L4_and_memoph_io>;
1791 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
1792 /*sext*/ L4_and_memoph_io>;
1793 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
1794 /*zext*/ L4_and_memoph_io>;
1795 defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
1796
1797 // or reg
1798 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
1799 /*anyext*/ L4_or_memopb_io>;
1800 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
1801 /*sext*/ L4_or_memopb_io>;
1802 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
1803 /*zext*/ L4_or_memopb_io>;
1804 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
1805 /*anyext*/ L4_or_memoph_io>;
1806 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
1807 /*sext*/ L4_or_memoph_io>;
1808 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
1809 /*zext*/ L4_or_memoph_io>;
1810 defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
1811}
1812
1813
1814multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
1815 PatFrag Arg, SDNodeXForm ArgMod,
1816 InstHexagon MI> {
1817 // Addr: i32
1818 def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
1819 (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
1820 // Addr: fi
1821 def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
1822 (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
1823}
1824
1825multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1826 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
1827 InstHexagon MI> {
1828 // Addr: i32
1829 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
1830 (add I32:$Rs, ImmPred:$Off)),
1831 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1832 def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A),
1833 (orisadd I32:$Rs, ImmPred:$Off)),
1834 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1835 // Addr: fi
1836 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
1837 (add AddrFI:$Rs, ImmPred:$Off)),
1838 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1839 def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
1840 (orisadd AddrFI:$Rs, ImmPred:$Off)),
1841 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1842}
1843
1844multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1845 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
1846 InstHexagon MI> {
1847 defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>;
1848 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
1849}
1850
1851
1852let AddedComplexity = 200 in {
1853 // add imm
1854 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1855 /*anyext*/ IdImm, L4_iadd_memopb_io>;
1856 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1857 /*sext*/ IdImm, L4_iadd_memopb_io>;
1858 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1859 /*zext*/ IdImm, L4_iadd_memopb_io>;
1860 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1861 /*anyext*/ IdImm, L4_iadd_memoph_io>;
1862 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1863 /*sext*/ IdImm, L4_iadd_memoph_io>;
1864 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1865 /*zext*/ IdImm, L4_iadd_memoph_io>;
1866 defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
1867 L4_iadd_memopw_io>;
1868 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1869 /*anyext*/ NegImm8, L4_iadd_memopb_io>;
1870 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1871 /*sext*/ NegImm8, L4_iadd_memopb_io>;
1872 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1873 /*zext*/ NegImm8, L4_iadd_memopb_io>;
1874 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1875 /*anyext*/ NegImm16, L4_iadd_memoph_io>;
1876 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1877 /*sext*/ NegImm16, L4_iadd_memoph_io>;
1878 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1879 /*zext*/ NegImm16, L4_iadd_memoph_io>;
1880 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
1881 L4_iadd_memopw_io>;
1882
1883 // sub imm
1884 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1885 /*anyext*/ IdImm, L4_isub_memopb_io>;
1886 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1887 /*sext*/ IdImm, L4_isub_memopb_io>;
1888 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1889 /*zext*/ IdImm, L4_isub_memopb_io>;
1890 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1891 /*anyext*/ IdImm, L4_isub_memoph_io>;
1892 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1893 /*sext*/ IdImm, L4_isub_memoph_io>;
1894 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1895 /*zext*/ IdImm, L4_isub_memoph_io>;
1896 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
1897 L4_isub_memopw_io>;
1898 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1899 /*anyext*/ NegImm8, L4_isub_memopb_io>;
1900 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1901 /*sext*/ NegImm8, L4_isub_memopb_io>;
1902 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1903 /*zext*/ NegImm8, L4_isub_memopb_io>;
1904 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1905 /*anyext*/ NegImm16, L4_isub_memoph_io>;
1906 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1907 /*sext*/ NegImm16, L4_isub_memoph_io>;
1908 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1909 /*zext*/ NegImm16, L4_isub_memoph_io>;
1910 defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
1911 L4_isub_memopw_io>;
1912
1913 // clrbit imm
1914 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
1915 /*anyext*/ Clr5Imm8, L4_iand_memopb_io>;
1916 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
1917 /*sext*/ Clr5Imm8, L4_iand_memopb_io>;
1918 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
1919 /*zext*/ Clr5Imm8, L4_iand_memopb_io>;
1920 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
1921 /*anyext*/ Clr5Imm16, L4_iand_memoph_io>;
1922 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
1923 /*sext*/ Clr5Imm16, L4_iand_memoph_io>;
1924 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
1925 /*zext*/ Clr5Imm16, L4_iand_memoph_io>;
1926 defm: Memopxi_pat<load, store, u6_2ImmPred, and, Clr5ImmPred, Clr5Imm32,
1927 L4_iand_memopw_io>;
1928
1929 // setbit imm
1930 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
1931 /*anyext*/ Set5Imm8, L4_ior_memopb_io>;
1932 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
1933 /*sext*/ Set5Imm8, L4_ior_memopb_io>;
1934 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
1935 /*zext*/ Set5Imm8, L4_ior_memopb_io>;
1936 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
1937 /*anyext*/ Set5Imm16, L4_ior_memoph_io>;
1938 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
1939 /*sext*/ Set5Imm16, L4_ior_memoph_io>;
1940 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
1941 /*zext*/ Set5Imm16, L4_ior_memoph_io>;
1942 defm: Memopxi_pat<load, store, u6_2ImmPred, or, Set5ImmPred, Set5Imm32,
1943 L4_ior_memopw_io>;
1944}
1945
1946def : T_CMP_pat <C4_cmpneqi, setne, s32_0ImmPred>;
1947def : T_CMP_pat <C4_cmpltei, setle, s32_0ImmPred>;
1948def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>;
1949
1950// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001951def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001952 (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
1953
1954// rs != rt -> !(rs == rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001955def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001956 (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>;
1957
1958// SDNode for converting immediate C to C-1.
1959def DEC_CONST_BYTE : SDNodeXForm<imm, [{
1960 // Return the byte immediate const-1 as an SDNode.
1961 int32_t imm = N->getSExtValue();
1962 return XformU7ToU7M1Imm(imm, SDLoc(N));
1963}]>;
1964
1965// For the sequence
1966// zext( setult ( and(Rs, 255), u8))
1967// Use the isdigit transformation below
1968
1969// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
1970// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
1971// The isdigit transformation relies on two 'clever' aspects:
1972// 1) The data type is unsigned which allows us to eliminate a zero test after
1973// biasing the expression by 48. We are depending on the representation of
1974// the unsigned types, and semantics.
1975// 2) The front end has converted <= 9 into < 10 on entry to LLVM
1976//
1977// For the C code:
1978// retval = ((c>='0') & (c<='9')) ? 1 : 0;
1979// The code is transformed upstream of llvm into
1980// retval = (c-48) < 10 ? 1 : 0;
1981let AddedComplexity = 139 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001982def: Pat<(i32 (zext (i1 (setult (i32 (and I32:$src1, 255)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001983 u7_0StrictPosImmPred:$src2)))),
1984 (C2_muxii (A4_cmpbgtui IntRegs:$src1,
1985 (DEC_CONST_BYTE u7_0StrictPosImmPred:$src2)),
1986 0, 1)>;
1987
1988class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
1989 : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
1990
1991class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
1992 InstHexagon MI>
1993 : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
1994
1995class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
1996 : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
1997
1998class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
1999 InstHexagon MI>
2000 : Pat<(Store Value:$val, Addr:$addr),
2001 (MI Addr:$addr, (ValueMod Value:$val))>;
2002
2003let AddedComplexity = 30 in {
2004 def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
2005 def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
2006 def: Storea_pat<store, I32, addrga, PS_storeriabs>;
2007 def: Storea_pat<store, I64, addrga, PS_storerdabs>;
2008
2009 def: Stoream_pat<truncstorei8, I64, addrga, LoReg, PS_storerbabs>;
2010 def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>;
2011 def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
2012}
2013
2014def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
2015def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
2016def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
2017def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
2018
2019let AddedComplexity = 100 in {
2020 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
2021 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
2022 def: Storea_pat<store, I32, addrgp, S2_storerigp>;
2023 def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
2024
2025 // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
2026 // to "r0 = 1; memw(#foo) = r0"
2027 let AddedComplexity = 100 in
2028 def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
2029 (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
2030}
2031
2032class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
2033 : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
2034 (VT (MI tglobaladdr:$absaddr))>;
2035
2036let AddedComplexity = 30 in {
2037 def: LoadAbs_pats <load, PS_loadriabs>;
2038 def: LoadAbs_pats <zextloadi1, PS_loadrubabs>;
2039 def: LoadAbs_pats <sextloadi8, PS_loadrbabs>;
2040 def: LoadAbs_pats <extloadi8, PS_loadrubabs>;
2041 def: LoadAbs_pats <zextloadi8, PS_loadrubabs>;
2042 def: LoadAbs_pats <sextloadi16, PS_loadrhabs>;
2043 def: LoadAbs_pats <extloadi16, PS_loadruhabs>;
2044 def: LoadAbs_pats <zextloadi16, PS_loadruhabs>;
2045 def: LoadAbs_pats <load, PS_loadrdabs, i64>;
2046}
2047
2048let AddedComplexity = 30 in
2049def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002050 (ToZext64 (PS_loadrubabs tglobaladdr:$absaddr))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002051
2052def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
2053def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
2054def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
2055def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
2056
2057// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
2058def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>;
2059def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
2060
2061def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>;
2062def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
2063
2064// Map from load(globaladdress) -> mem[u][bhwd](#foo)
2065class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
2066 : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
2067 (VT (MI tglobaladdr:$global))>;
2068
2069let AddedComplexity = 100 in {
2070 def: LoadGP_pats <extloadi8, L2_loadrubgp>;
2071 def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
2072 def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
2073 def: LoadGP_pats <extloadi16, L2_loadruhgp>;
2074 def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
2075 def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
2076 def: LoadGP_pats <load, L2_loadrigp>;
2077 def: LoadGP_pats <load, L2_loadrdgp, i64>;
2078}
2079
2080// When the Interprocedural Global Variable optimizer realizes that a certain
2081// global variable takes only two constant values, it shrinks the global to
2082// a boolean. Catch those loads here in the following 3 patterns.
2083let AddedComplexity = 100 in {
2084 def: LoadGP_pats <extloadi1, L2_loadrubgp>;
2085 def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
2086}
2087
2088// Transfer global address into a register
2089def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
2090def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>;
2091def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
2092
2093let AddedComplexity = 30 in {
2094 def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>;
2095 def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
2096 def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>;
2097}
2098
2099let AddedComplexity = 30 in {
2100 def: Loada_pat<load, i32, u32_0ImmPred, PS_loadriabs>;
2101 def: Loada_pat<sextloadi8, i32, u32_0ImmPred, PS_loadrbabs>;
2102 def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>;
2103 def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
2104 def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
2105}
2106
2107// Indexed store word - global address.
2108// memw(Rs+#u6:2)=#S8
2109let AddedComplexity = 100 in
2110defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
2111
2112// Load from a global address that has only one use in the current basic block.
2113let AddedComplexity = 100 in {
2114 def: Loada_pat<extloadi8, i32, addrga, PS_loadrubabs>;
2115 def: Loada_pat<sextloadi8, i32, addrga, PS_loadrbabs>;
2116 def: Loada_pat<zextloadi8, i32, addrga, PS_loadrubabs>;
2117
2118 def: Loada_pat<extloadi16, i32, addrga, PS_loadruhabs>;
2119 def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>;
2120 def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>;
2121
2122 def: Loada_pat<load, i32, addrga, PS_loadriabs>;
2123 def: Loada_pat<load, i64, addrga, PS_loadrdabs>;
2124}
2125
2126// Store to a global address that has only one use in the current basic block.
2127let AddedComplexity = 100 in {
2128 def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
2129 def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
2130 def: Storea_pat<store, I32, addrga, PS_storeriabs>;
2131 def: Storea_pat<store, I64, addrga, PS_storerdabs>;
2132
2133 def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
2134}
2135
2136// i8/i16/i32 -> i64 loads
2137// We need a complexity of 120 here to override preceding handling of
2138// zextload.
2139let AddedComplexity = 120 in {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002140 def: Loadam_pat<extloadi8, i64, addrga, ToZext64, PS_loadrubabs>;
2141 def: Loadam_pat<sextloadi8, i64, addrga, ToSext64, PS_loadrbabs>;
2142 def: Loadam_pat<zextloadi8, i64, addrga, ToZext64, PS_loadrubabs>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002143
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002144 def: Loadam_pat<extloadi16, i64, addrga, ToZext64, PS_loadruhabs>;
2145 def: Loadam_pat<sextloadi16, i64, addrga, ToSext64, PS_loadrhabs>;
2146 def: Loadam_pat<zextloadi16, i64, addrga, ToZext64, PS_loadruhabs>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002147
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002148 def: Loadam_pat<extloadi32, i64, addrga, ToZext64, PS_loadriabs>;
2149 def: Loadam_pat<sextloadi32, i64, addrga, ToSext64, PS_loadriabs>;
2150 def: Loadam_pat<zextloadi32, i64, addrga, ToZext64, PS_loadriabs>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002151}
2152
2153let AddedComplexity = 100 in {
2154 def: Loada_pat<extloadi8, i32, addrgp, PS_loadrubabs>;
2155 def: Loada_pat<sextloadi8, i32, addrgp, PS_loadrbabs>;
2156 def: Loada_pat<zextloadi8, i32, addrgp, PS_loadrubabs>;
2157
2158 def: Loada_pat<extloadi16, i32, addrgp, PS_loadruhabs>;
2159 def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>;
2160 def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>;
2161
2162 def: Loada_pat<load, i32, addrgp, PS_loadriabs>;
2163 def: Loada_pat<load, i64, addrgp, PS_loadrdabs>;
2164}
2165
2166let AddedComplexity = 100 in {
2167 def: Storea_pat<truncstorei8, I32, addrgp, PS_storerbabs>;
2168 def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>;
2169 def: Storea_pat<store, I32, addrgp, PS_storeriabs>;
2170 def: Storea_pat<store, I64, addrgp, PS_storerdabs>;
2171}
2172
2173def: Loada_pat<atomic_load_8, i32, addrgp, PS_loadrubabs>;
2174def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>;
2175def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>;
2176def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>;
2177
2178def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, PS_storerbabs>;
2179def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
2180def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
2181def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
2182
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002183def: Pat<(or (or (or (shl (i64 (zext (i32 (and I32:$b, (i32 65535))))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002184 (i32 16)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002185 (i64 (zext (i32 (and I32:$a, (i32 65535)))))),
2186 (shl (i64 (anyext (i32 (and I32:$c, (i32 65535))))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002187 (i32 32))),
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00002188 (shl (Aext64 I32:$d), (i32 48))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002189 (Insert4 IntRegs:$a, IntRegs:$b, IntRegs:$c, IntRegs:$d)>;
2190
2191// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
2192// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
2193// We don't really want either one here.
2194def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
2195def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
2196 [SDNPHasChain]>;
2197
2198def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
2199 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
2200def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
2201 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
2202
2203def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
2204def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
2205
2206def ftoi : SDNodeXForm<fpimm, [{
2207 APInt I = N->getValueAPF().bitcastToAPInt();
2208 return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
2209 MVT::getIntegerVT(I.getBitWidth()));
2210}]>;
2211
2212
2213def: Pat<(sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)), (i32 1)),
2214 (S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
2215
2216def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
2217 SDTCisVT<1, i64>]>;
2218
2219def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
2220
2221def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
2222
2223let AddedComplexity = 20 in {
2224 defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
2225 defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
2226}
2227
2228let AddedComplexity = 60 in {
2229 defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
2230 defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
2231}
2232
2233let AddedComplexity = 40 in {
2234 def: Loadxs_pat<load, f32, L4_loadri_rr>;
2235 def: Loadxs_pat<load, f64, L4_loadrd_rr>;
2236}
2237
2238let AddedComplexity = 20 in {
2239 def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
2240 def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
2241}
2242
2243let AddedComplexity = 80 in {
2244 def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>;
2245 def: Loada_pat<load, f32, addrga, PS_loadriabs>;
2246 def: Loada_pat<load, f64, addrga, PS_loadrdabs>;
2247}
2248
2249let AddedComplexity = 100 in {
2250 def: LoadGP_pats <load, L2_loadrigp, f32>;
2251 def: LoadGP_pats <load, L2_loadrdgp, f64>;
2252}
2253
2254let AddedComplexity = 20 in {
2255 defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
2256 defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
2257}
2258
2259// Simple patterns should be tried with the least priority.
2260def: Storex_simple_pat<store, F32, S2_storeri_io>;
2261def: Storex_simple_pat<store, F64, S2_storerd_io>;
2262
2263let AddedComplexity = 60 in {
2264 defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
2265 defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
2266}
2267
2268let AddedComplexity = 40 in {
2269 def: Storexs_pat<store, F32, S4_storeri_rr>;
2270 def: Storexs_pat<store, F64, S4_storerd_rr>;
2271}
2272
2273let AddedComplexity = 20 in {
2274 def: Store_rr_pat<store, F32, S4_storeri_rr>;
2275 def: Store_rr_pat<store, F64, S4_storerd_rr>;
2276}
2277
2278let AddedComplexity = 80 in {
2279 def: Storea_pat<store, F32, addrga, PS_storeriabs>;
2280 def: Storea_pat<store, F64, addrga, PS_storerdabs>;
2281}
2282
2283let AddedComplexity = 100 in {
2284 def: Storea_pat<store, F32, addrgp, S2_storerigp>;
2285 def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
2286}
2287
2288defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
2289defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
2290def: Storex_simple_pat<store, F32, S2_storeri_io>;
2291def: Storex_simple_pat<store, F64, S2_storerd_io>;
2292
2293def: Pat<(fadd F32:$src1, F32:$src2),
2294 (F2_sfadd F32:$src1, F32:$src2)>;
2295
2296def: Pat<(fsub F32:$src1, F32:$src2),
2297 (F2_sfsub F32:$src1, F32:$src2)>;
2298
2299def: Pat<(fmul F32:$src1, F32:$src2),
2300 (F2_sfmpy F32:$src1, F32:$src2)>;
2301
2302let Predicates = [HasV5T] in {
2303 def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
2304 def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
2305}
2306
2307let AddedComplexity = 100, Predicates = [HasV5T] in {
2308 class SfSel12<PatFrag Cmp, InstHexagon MI>
2309 : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
2310 (MI F32:$Rs, F32:$Rt)>;
2311 class SfSel21<PatFrag Cmp, InstHexagon MI>
2312 : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs),
2313 (MI F32:$Rs, F32:$Rt)>;
2314
2315 def: SfSel12<setolt, F2_sfmin>;
2316 def: SfSel12<setole, F2_sfmin>;
2317 def: SfSel12<setogt, F2_sfmax>;
2318 def: SfSel12<setoge, F2_sfmax>;
2319 def: SfSel21<setolt, F2_sfmax>;
2320 def: SfSel21<setole, F2_sfmax>;
2321 def: SfSel21<setogt, F2_sfmin>;
2322 def: SfSel21<setoge, F2_sfmin>;
2323}
2324
2325class T_fcmp32_pat<PatFrag OpNode, InstHexagon MI>
2326 : Pat<(i1 (OpNode F32:$src1, F32:$src2)),
2327 (MI F32:$src1, F32:$src2)>;
2328class T_fcmp64_pat<PatFrag OpNode, InstHexagon MI>
2329 : Pat<(i1 (OpNode F64:$src1, F64:$src2)),
2330 (MI F64:$src1, F64:$src2)>;
2331
2332def: T_fcmp32_pat<setoge, F2_sfcmpge>;
2333def: T_fcmp32_pat<setuo, F2_sfcmpuo>;
2334def: T_fcmp32_pat<setoeq, F2_sfcmpeq>;
2335def: T_fcmp32_pat<setogt, F2_sfcmpgt>;
2336
2337def: T_fcmp64_pat<setoge, F2_dfcmpge>;
2338def: T_fcmp64_pat<setuo, F2_dfcmpuo>;
2339def: T_fcmp64_pat<setoeq, F2_dfcmpeq>;
2340def: T_fcmp64_pat<setogt, F2_dfcmpgt>;
2341
2342let Predicates = [HasV5T] in
2343multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
2344 // IntRegs
2345 def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
2346 (IntMI F32:$src1, F32:$src2)>;
2347 // DoubleRegs
2348 def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
2349 (DoubleMI F64:$src1, F64:$src2)>;
2350}
2351
2352defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
2353defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
2354defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
2355
2356//===----------------------------------------------------------------------===//
2357// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
2358//===----------------------------------------------------------------------===//
2359let Predicates = [HasV5T] in
2360multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
2361 // IntRegs
2362 def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
2363 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2364 (IntMI F32:$src1, F32:$src2))>;
2365
2366 // DoubleRegs
2367 def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
2368 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2369 (DoubleMI F64:$src1, F64:$src2))>;
2370}
2371
2372defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
2373defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
2374defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
2375
2376//===----------------------------------------------------------------------===//
2377// Multiclass to define 'Def Pats' for the following dags:
2378// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
2379// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
2380// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
2381// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
2382//===----------------------------------------------------------------------===//
2383let Predicates = [HasV5T] in
2384multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
2385 InstHexagon DoubleMI> {
2386 // IntRegs
2387 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2388 (C2_not (IntMI F32:$src1, F32:$src2))>;
2389 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2390 (IntMI F32:$src1, F32:$src2)>;
2391 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2392 (IntMI F32:$src1, F32:$src2)>;
2393 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2394 (C2_not (IntMI F32:$src1, F32:$src2))>;
2395
2396 // DoubleRegs
2397 def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2398 (C2_not (DoubleMI F64:$src1, F64:$src2))>;
2399 def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2400 (DoubleMI F64:$src1, F64:$src2)>;
2401 def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2402 (DoubleMI F64:$src1, F64:$src2)>;
2403 def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2404 (C2_not (DoubleMI F64:$src1, F64:$src2))>;
2405}
2406
2407defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
2408defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
2409defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
2410
2411//===----------------------------------------------------------------------===//
2412// Multiclass to define 'Def Pats' for the following dags:
2413// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
2414// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
2415// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
2416// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
2417//===----------------------------------------------------------------------===//
2418let Predicates = [HasV5T] in
2419multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
2420 InstHexagon DoubleMI> {
2421 // IntRegs
2422 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2423 (C2_not (IntMI F32:$src2, F32:$src1))>;
2424 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2425 (IntMI F32:$src2, F32:$src1)>;
2426 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2427 (IntMI F32:$src2, F32:$src1)>;
2428 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2429 (C2_not (IntMI F32:$src2, F32:$src1))>;
2430
2431 // DoubleRegs
2432 def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2433 (C2_not (DoubleMI F64:$src2, F64:$src1))>;
2434 def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2435 (DoubleMI F64:$src2, F64:$src1)>;
2436 def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2437 (DoubleMI F64:$src2, F64:$src1)>;
2438 def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2439 (C2_not (DoubleMI F64:$src2, F64:$src1))>;
2440}
2441
2442defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
2443defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
2444
2445
2446// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
2447let Predicates = [HasV5T] in {
2448 def: Pat<(i1 (seto F32:$src1, F32:$src2)),
2449 (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
2450 def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)),
2451 (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2452 def: Pat<(i1 (seto F64:$src1, F64:$src2)),
2453 (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
2454 def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)),
2455 (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>;
2456}
2457
2458// Ordered lt.
2459let Predicates = [HasV5T] in {
2460 def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
2461 (F2_sfcmpgt F32:$src2, F32:$src1)>;
2462 def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)),
2463 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2464 def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
2465 (F2_dfcmpgt F64:$src2, F64:$src1)>;
2466 def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)),
2467 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
2468}
2469
2470// Unordered lt.
2471let Predicates = [HasV5T] in {
2472 def: Pat<(i1 (setult F32:$src1, F32:$src2)),
2473 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2474 (F2_sfcmpgt F32:$src2, F32:$src1))>;
2475 def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)),
2476 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2477 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2478 def: Pat<(i1 (setult F64:$src1, F64:$src2)),
2479 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2480 (F2_dfcmpgt F64:$src2, F64:$src1))>;
2481 def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)),
2482 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2483 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>;
2484}
2485
2486// Ordered le.
2487let Predicates = [HasV5T] in {
2488 // rs <= rt -> rt >= rs.
2489 def: Pat<(i1 (setole F32:$src1, F32:$src2)),
2490 (F2_sfcmpge F32:$src2, F32:$src1)>;
2491 def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)),
2492 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2493
2494 // Rss <= Rtt -> Rtt >= Rss.
2495 def: Pat<(i1 (setole F64:$src1, F64:$src2)),
2496 (F2_dfcmpge F64:$src2, F64:$src1)>;
2497 def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)),
2498 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
2499}
2500
2501// Unordered le.
2502let Predicates = [HasV5T] in {
2503// rs <= rt -> rt >= rs.
2504 def: Pat<(i1 (setule F32:$src1, F32:$src2)),
2505 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2506 (F2_sfcmpge F32:$src2, F32:$src1))>;
2507 def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)),
2508 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2509 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2510 def: Pat<(i1 (setule F64:$src1, F64:$src2)),
2511 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2512 (F2_dfcmpge F64:$src2, F64:$src1))>;
2513 def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)),
2514 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2515 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>;
2516}
2517
2518// Ordered ne.
2519let Predicates = [HasV5T] in {
2520 def: Pat<(i1 (setone F32:$src1, F32:$src2)),
2521 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
2522 def: Pat<(i1 (setone F64:$src1, F64:$src2)),
2523 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
2524 def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)),
2525 (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
2526 def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)),
2527 (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
2528}
2529
2530// Unordered ne.
2531let Predicates = [HasV5T] in {
2532 def: Pat<(i1 (setune F32:$src1, F32:$src2)),
2533 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2534 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
2535 def: Pat<(i1 (setune F64:$src1, F64:$src2)),
2536 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2537 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
2538 def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)),
2539 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2540 (C2_not (F2_sfcmpeq F32:$src1,
2541 (f32 (A2_tfrsi (ftoi $src2))))))>;
2542 def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)),
2543 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2544 (C2_not (F2_dfcmpeq F64:$src1,
2545 (CONST64 (ftoi $src2)))))>;
2546}
2547
2548// Besides set[o|u][comparions], we also need set[comparisons].
2549let Predicates = [HasV5T] in {
2550 // lt.
2551 def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
2552 (F2_sfcmpgt F32:$src2, F32:$src1)>;
2553 def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)),
2554 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2555 def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
2556 (F2_dfcmpgt F64:$src2, F64:$src1)>;
2557 def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)),
2558 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
2559
2560 // le.
2561 // rs <= rt -> rt >= rs.
2562 def: Pat<(i1 (setle F32:$src1, F32:$src2)),
2563 (F2_sfcmpge F32:$src2, F32:$src1)>;
2564 def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)),
2565 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2566
2567 // Rss <= Rtt -> Rtt >= Rss.
2568 def: Pat<(i1 (setle F64:$src1, F64:$src2)),
2569 (F2_dfcmpge F64:$src2, F64:$src1)>;
2570 def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)),
2571 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
2572
2573 // ne.
2574 def: Pat<(i1 (setne F32:$src1, F32:$src2)),
2575 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
2576 def: Pat<(i1 (setne F64:$src1, F64:$src2)),
2577 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
2578 def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)),
2579 (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
2580 def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)),
2581 (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
2582}
2583
2584
2585def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>;
2586def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>;
2587
2588def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>;
2589def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>;
2590def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>;
2591def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>;
2592
2593def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>;
2594def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>;
2595def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>;
2596def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>;
2597
2598def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>;
2599def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>;
2600def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>;
2601def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>;
2602
2603def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>;
2604def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>;
2605def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>;
2606def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>;
2607
2608// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
2609let Predicates = [HasV5T] in {
2610 def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
2611 def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
2612 def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
2613 def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
2614}
2615
2616def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
2617 (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
2618
2619def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
2620 (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
2621
2622def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
2623 (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
2624
2625def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm),
2626 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>,
2627 Requires<[HasV5T]>;
2628
2629def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt),
2630 (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>,
2631 Requires<[HasV5T]>;
2632
2633def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
2634 (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
2635 Requires<[HasV5T]>;
2636
2637def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
2638 (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
2639 Requires<[HasV5T]>;
2640
2641def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
2642 (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
2643 Requires<[HasV5T]>;
2644
2645def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
2646 (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
2647 Requires<[HasV5T]>;
2648
2649// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
2650// => r0 = mux(p0, #i, r1)
2651def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3),
2652 (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>,
2653 Requires<[HasV5T]>;
2654
2655// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
2656// => r0 = mux(p0, r1, #i)
2657def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3),
2658 (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>,
2659 Requires<[HasV5T]>;
2660
2661def: Pat<(i32 (fp_to_sint F64:$src1)),
2662 (LoReg (F2_conv_df2d_chop F64:$src1))>,
2663 Requires<[HasV5T]>;
2664
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002665def : Pat <(fabs F32:$src1),
2666 (S2_clrbit_i F32:$src1, 31)>,
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002667 Requires<[HasV5T]>;
2668
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002669def : Pat <(fneg F32:$src1),
2670 (S2_togglebit_i F32:$src1, 31)>,
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002671 Requires<[HasV5T]>;
2672
2673
2674def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
2675 return isAlignedMemNode(dyn_cast<MemSDNode>(N));
2676}]>;
2677
2678def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
2679 return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
2680}]>;
2681
2682def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
2683 return isAlignedMemNode(dyn_cast<MemSDNode>(N));
2684}]>;
2685
2686def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
2687 return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
2688}]>;
2689
2690
2691multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
2692 // Aligned stores
2693 def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
2694 (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
2695 Requires<[UseHVXSgl]>;
2696 def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
2697 (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
2698 Requires<[UseHVXSgl]>;
2699
2700 // 128B Aligned stores
2701 def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
2702 (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
2703 Requires<[UseHVXDbl]>;
2704 def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
2705 (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
2706 Requires<[UseHVXDbl]>;
2707
2708 // Fold Add R+OFF into vector store.
2709 let AddedComplexity = 10 in {
2710 def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
2711 (add IntRegs:$src2, s4_6ImmPred:$offset)),
2712 (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
2713 (VTSgl VectorRegs:$src1))>,
2714 Requires<[UseHVXSgl]>;
2715 def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
2716 (add IntRegs:$src2, s4_6ImmPred:$offset)),
2717 (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
2718 (VTSgl VectorRegs:$src1))>,
2719 Requires<[UseHVXSgl]>;
2720
2721 // Fold Add R+OFF into vector store 128B.
2722 def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
2723 (add IntRegs:$src2, s4_7ImmPred:$offset)),
2724 (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
2725 (VTDbl VectorRegs128B:$src1))>,
2726 Requires<[UseHVXDbl]>;
2727 def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
2728 (add IntRegs:$src2, s4_7ImmPred:$offset)),
2729 (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
2730 (VTDbl VectorRegs128B:$src1))>,
2731 Requires<[UseHVXDbl]>;
2732 }
2733}
2734
2735defm : vS32b_ai_pats <v64i8, v128i8>;
2736defm : vS32b_ai_pats <v32i16, v64i16>;
2737defm : vS32b_ai_pats <v16i32, v32i32>;
2738defm : vS32b_ai_pats <v8i64, v16i64>;
2739
2740
2741multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
2742 // Aligned loads
2743 def : Pat < (VTSgl (alignedload IntRegs:$addr)),
2744 (V6_vL32b_ai IntRegs:$addr, 0) >,
2745 Requires<[UseHVXSgl]>;
2746 def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
2747 (V6_vL32Ub_ai IntRegs:$addr, 0) >,
2748 Requires<[UseHVXSgl]>;
2749
2750 // 128B Load
2751 def : Pat < (VTDbl (alignedload IntRegs:$addr)),
2752 (V6_vL32b_ai_128B IntRegs:$addr, 0) >,
2753 Requires<[UseHVXDbl]>;
2754 def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
2755 (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
2756 Requires<[UseHVXDbl]>;
2757
2758 // Fold Add R+OFF into vector load.
2759 let AddedComplexity = 10 in {
2760 def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
2761 (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
2762 Requires<[UseHVXDbl]>;
2763 def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
2764 (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
2765 Requires<[UseHVXDbl]>;
2766
2767 def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
2768 (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
2769 Requires<[UseHVXSgl]>;
2770 def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
2771 (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
2772 Requires<[UseHVXSgl]>;
2773 }
2774}
2775
2776defm : vL32b_ai_pats <v64i8, v128i8>;
2777defm : vL32b_ai_pats <v32i16, v64i16>;
2778defm : vL32b_ai_pats <v16i32, v32i32>;
2779defm : vL32b_ai_pats <v8i64, v16i64>;
2780
2781multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
2782 def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
2783 (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
2784 Requires<[UseHVXSgl]>;
2785 def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
2786 (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
2787 Requires<[UseHVXSgl]>;
2788
2789 def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
2790 (PS_vstorerw_ai_128B IntRegs:$addr, 0,
2791 (VTDbl VecDblRegs128B:$src1))>,
2792 Requires<[UseHVXDbl]>;
2793 def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
2794 (PS_vstorerwu_ai_128B IntRegs:$addr, 0,
2795 (VTDbl VecDblRegs128B:$src1))>,
2796 Requires<[UseHVXDbl]>;
2797}
2798
2799defm : STrivv_pats <v128i8, v256i8>;
2800defm : STrivv_pats <v64i16, v128i16>;
2801defm : STrivv_pats <v32i32, v64i32>;
2802defm : STrivv_pats <v16i64, v32i64>;
2803
2804multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
2805 def : Pat<(VTSgl (alignedload I32:$addr)),
2806 (PS_vloadrw_ai I32:$addr, 0)>,
2807 Requires<[UseHVXSgl]>;
2808 def : Pat<(VTSgl (unalignedload I32:$addr)),
2809 (PS_vloadrwu_ai I32:$addr, 0)>,
2810 Requires<[UseHVXSgl]>;
2811
2812 def : Pat<(VTDbl (alignedload I32:$addr)),
2813 (PS_vloadrw_ai_128B I32:$addr, 0)>,
2814 Requires<[UseHVXDbl]>;
2815 def : Pat<(VTDbl (unalignedload I32:$addr)),
2816 (PS_vloadrwu_ai_128B I32:$addr, 0)>,
2817 Requires<[UseHVXDbl]>;
2818}
2819
2820defm : LDrivv_pats <v128i8, v256i8>;
2821defm : LDrivv_pats <v64i16, v128i16>;
2822defm : LDrivv_pats <v32i32, v64i32>;
2823defm : LDrivv_pats <v16i64, v32i64>;
2824
2825let Predicates = [HasV60T,UseHVXSgl] in {
2826 def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt),
2827 (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>;
2828 def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt),
2829 (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>;
2830}
2831let Predicates = [HasV60T,UseHVXDbl] in {
2832 def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt),
2833 (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>;
2834 def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt),
2835 (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>;
2836}
2837
2838
2839def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
2840 SDTCisSubVecOfVec<1, 0>]>;
2841
2842def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
2843
2844def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
2845 (v16i32 VectorRegs:$Vt))),
2846 (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
2847 Requires<[UseHVXSgl]>;
2848def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
2849 (v32i32 VecDblRegs:$Vt))),
2850 (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2851 Requires<[UseHVXDbl]>;
2852
2853def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
2854 SDTCisInt<3>]>;
2855
2856def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
2857
2858// 0 as the last argument denotes vpacke. 1 denotes vpacko
2859def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
2860 (v64i8 VectorRegs:$Vt), (i32 0))),
2861 (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
2862 Requires<[UseHVXSgl]>;
2863def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
2864 (v64i8 VectorRegs:$Vt), (i32 1))),
2865 (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
2866 Requires<[UseHVXSgl]>;
2867def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
2868 (v32i16 VectorRegs:$Vt), (i32 0))),
2869 (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
2870 Requires<[UseHVXSgl]>;
2871def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
2872 (v32i16 VectorRegs:$Vt), (i32 1))),
2873 (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
2874 Requires<[UseHVXSgl]>;
2875
2876def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
2877 (v128i8 VecDblRegs:$Vt), (i32 0))),
2878 (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2879 Requires<[UseHVXDbl]>;
2880def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
2881 (v128i8 VecDblRegs:$Vt), (i32 1))),
2882 (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2883 Requires<[UseHVXDbl]>;
2884def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
2885 (v64i16 VecDblRegs:$Vt), (i32 0))),
2886 (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2887 Requires<[UseHVXDbl]>;
2888def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
2889 (v64i16 VecDblRegs:$Vt), (i32 1))),
2890 (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2891 Requires<[UseHVXDbl]>;
2892
2893def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
2894def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
2895def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
2896def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
2897def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
2898def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
2899def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
2900def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
2901
2902
2903multiclass bitconvert_32<ValueType a, ValueType b> {
2904 def : Pat <(b (bitconvert (a IntRegs:$src))),
2905 (b IntRegs:$src)>;
2906 def : Pat <(a (bitconvert (b IntRegs:$src))),
2907 (a IntRegs:$src)>;
2908}
2909
2910multiclass bitconvert_64<ValueType a, ValueType b> {
2911 def : Pat <(b (bitconvert (a DoubleRegs:$src))),
2912 (b DoubleRegs:$src)>;
2913 def : Pat <(a (bitconvert (b DoubleRegs:$src))),
2914 (a DoubleRegs:$src)>;
2915}
2916
2917// Bit convert vector types to integers.
2918defm : bitconvert_32<v4i8, i32>;
2919defm : bitconvert_32<v2i16, i32>;
2920defm : bitconvert_64<v8i8, i64>;
2921defm : bitconvert_64<v4i16, i64>;
2922defm : bitconvert_64<v2i32, i64>;
2923
2924def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2925 (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>;
2926def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2927 (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>;
2928def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2929 (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>;
2930
2931def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2932 (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>;
2933def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2934 (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>;
2935def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2936 (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>;
2937
2938def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
2939 (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
2940
2941def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
2942 (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
2943
2944def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
2945def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
2946
2947// Replicate the low 8-bits from 32-bits input register into each of the
2948// four bytes of 32-bits destination register.
2949def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
2950
2951// Replicate the low 16-bits from 32-bits input register into each of the
2952// four halfwords of 64-bits destination register.
2953def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
2954
2955
2956class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
2957 : Pat <(Op Type:$Rss, Type:$Rtt),
2958 (MI Type:$Rss, Type:$Rtt)>;
2959
2960def: VArith_pat <A2_vaddub, add, V8I8>;
2961def: VArith_pat <A2_vaddh, add, V4I16>;
2962def: VArith_pat <A2_vaddw, add, V2I32>;
2963def: VArith_pat <A2_vsubub, sub, V8I8>;
2964def: VArith_pat <A2_vsubh, sub, V4I16>;
2965def: VArith_pat <A2_vsubw, sub, V2I32>;
2966
2967def: VArith_pat <A2_and, and, V2I16>;
2968def: VArith_pat <A2_xor, xor, V2I16>;
2969def: VArith_pat <A2_or, or, V2I16>;
2970
2971def: VArith_pat <A2_andp, and, V8I8>;
2972def: VArith_pat <A2_andp, and, V4I16>;
2973def: VArith_pat <A2_andp, and, V2I32>;
2974def: VArith_pat <A2_orp, or, V8I8>;
2975def: VArith_pat <A2_orp, or, V4I16>;
2976def: VArith_pat <A2_orp, or, V2I32>;
2977def: VArith_pat <A2_xorp, xor, V8I8>;
2978def: VArith_pat <A2_xorp, xor, V4I16>;
2979def: VArith_pat <A2_xorp, xor, V2I32>;
2980
2981def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2982 (i32 u5_0ImmPred:$c))))),
2983 (S2_asr_i_vw V2I32:$b, imm:$c)>;
2984def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2985 (i32 u5_0ImmPred:$c))))),
2986 (S2_lsr_i_vw V2I32:$b, imm:$c)>;
2987def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2988 (i32 u5_0ImmPred:$c))))),
2989 (S2_asl_i_vw V2I32:$b, imm:$c)>;
2990
2991def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2992 (S2_asr_i_vh V4I16:$b, imm:$c)>;
2993def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2994 (S2_lsr_i_vh V4I16:$b, imm:$c)>;
2995def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2996 (S2_asl_i_vh V4I16:$b, imm:$c)>;
2997
2998
2999def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
3000 [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
3001def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
3002 [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
3003
3004def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
3005def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
3006def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
3007def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
3008def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
3009def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
3010
3011def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
3012 (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
3013def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
3014 (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
3015def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
3016 (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
3017def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
3018 (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
3019def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
3020 (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
3021def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
3022 (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
3023
3024class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
3025 : Pat <(Op Value:$Rs, I32:$Rt),
3026 (MI Value:$Rs, I32:$Rt)>;
3027
3028def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
3029def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
3030def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
3031def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
3032def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
3033def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
3034
3035
3036def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
3037 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
3038def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
3039 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
3040def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
3041 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
3042
3043def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
3044def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
3045def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
3046def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
3047def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
3048def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
3049def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
3050def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
3051def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
3052
3053
3054class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
3055 : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
3056 (MI Value:$Rs, Value:$Rt)>;
3057
3058def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
3059def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
3060def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
3061
3062def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
3063def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
3064def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
3065
3066def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
3067def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
3068def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
3069
3070
3071class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
3072 : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
3073 (MI InVal:$Rs, InVal:$Rt)>;
3074
3075def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
3076def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
3077def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
3078
3079def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
3080def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
3081def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
3082
3083def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
3084 (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>;
3085def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
3086 (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
3087
3088
3089// Adds two v4i8: Hexagon does not have an insn for this one, so we
3090// use the double add v8i8, and use only the low part of the result.
3091def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003092 (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003093
3094// Subtract two v4i8: Hexagon does not have an insn for this one, so we
3095// use the double sub v8i8, and use only the low part of the result.
3096def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003097 (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003098
3099//
3100// No 32 bit vector mux.
3101//
3102def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003103 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003104def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003105 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003106
3107//
3108// 64-bit vector mux.
3109//
3110def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
3111 (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
3112def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
3113 (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
3114def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
3115 (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
3116
3117//
3118// No 32 bit vector compare.
3119//
3120def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003121 (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003122def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003123 (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003124def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003125 (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003126
3127def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003128 (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003129def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003130 (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003131def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003132 (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003133
3134
3135class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
3136 ValueType CmpTy>
3137 : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
3138 (InvMI Value:$Rt, Value:$Rs)>;
3139
3140// Map from a compare operation to the corresponding instruction with the
3141// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
3142def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
3143def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
3144def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
3145def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
3146def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
3147def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
3148
3149def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
3150def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
3151def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
3152def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
3153def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
3154def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
3155
3156// Map from vcmpne(Rss) -> !vcmpew(Rss).
3157// rs != rt -> !(rs == rt).
3158def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
3159 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
3160
3161
3162// Truncate: from vector B copy all 'E'ven 'B'yte elements:
3163// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
3164def: Pat<(v4i8 (trunc V4I16:$Rs)),
3165 (S2_vtrunehb V4I16:$Rs)>;
3166
3167// Truncate: from vector B copy all 'O'dd 'B'yte elements:
3168// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
3169// S2_vtrunohb
3170
3171// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
3172// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
3173// S2_vtruneh
3174
3175def: Pat<(v2i16 (trunc V2I32:$Rs)),
3176 (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
3177
3178
3179def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
3180def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
3181
3182def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
3183def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
3184
3185def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
3186def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
3187def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
3188def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
3189def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
3190def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
3191
3192// Sign extends a v2i8 into a v2i32.
3193def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
3194 (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
3195
3196// Sign extends a v2i16 into a v2i32.
3197def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
3198 (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
3199
3200
3201// Multiplies two v2i16 and returns a v2i32. We are using here the
3202// saturating multiply, as hexagon does not provide a non saturating
3203// vector multiply, and saturation does not impact the result that is
3204// in double precision of the operands.
3205
3206// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
3207// with the C semantics for this one, this pattern uses the half word
3208// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
3209// then truncated to fit this back into a v2i16 and to simulate the
3210// wrap around semantics for unsigned in C.
3211def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
3212 (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
3213
3214def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
3215 (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
3216 (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
3217
3218// Multiplies two v4i16 vectors.
3219def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
3220 (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
3221 (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
3222
3223def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
3224 (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
3225 (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
3226
3227// Multiplies two v4i8 vectors.
3228def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
3229 (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
3230 Requires<[HasV5T]>;
3231
3232def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
3233 (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
3234
3235// Multiplies two v8i8 vectors.
3236def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
3237 (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
3238 (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
3239 Requires<[HasV5T]>;
3240
3241def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
3242 (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
3243 (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
3244
3245def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
3246 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
3247
3248def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
3249def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
3250def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
3251def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
3252
3253class ShufflePat<InstHexagon MI, SDNode Op>
3254 : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
3255 (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
3256
3257// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
3258def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
3259
3260// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
3261def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
3262
3263// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
3264def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
3265
3266// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
3267def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
3268
3269
3270// Truncated store from v4i16 to v4i8.
3271def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
3272 (truncstore node:$val, node:$ptr),
3273 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
3274
3275// Truncated store from v2i32 to v2i16.
3276def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
3277 (truncstore node:$val, node:$ptr),
3278 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
3279
3280def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
3281 (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
3282 (LoReg $Rs))))>;
3283
3284def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
3285 (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
3286
3287
3288// Zero and sign extended load from v2i8 into v2i16.
3289def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
3290 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
3291
3292def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
3293 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
3294
3295def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
3296 (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
3297
3298def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
3299 (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
3300
3301def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
3302 (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
3303
3304def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
3305 (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
3306