blob: ffe7577add693429afebf0a2c6cf8e99018af63d [file] [log] [blame]
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001// Pattern fragment that combines the value type and the register class
2// into a single parameter.
3// The pat frags in the definitions below need to have a named register,
4// otherwise i32 will be assumed regardless of the register class. The
5// name of the register does not matter.
6def I1 : PatLeaf<(i1 PredRegs:$R)>;
7def I32 : PatLeaf<(i32 IntRegs:$R)>;
8def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
9def F32 : PatLeaf<(f32 IntRegs:$R)>;
10def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
11
12// Pattern fragments to extract the low and high subregisters from a
13// 64-bit value.
14def LoReg: OutPatFrag<(ops node:$Rs),
15 (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
16def HiReg: OutPatFrag<(ops node:$Rs),
17 (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
18
19def orisadd: PatFrag<(ops node:$Addr, node:$off),
20 (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>;
21
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +000022def IsPow2_32 : PatLeaf<(i32 imm), [{
23 uint32_t V = N->getZExtValue();
24 return isPowerOf2_32(V);
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +000025}]>;
26
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +000027def IsNPow2_32 : PatLeaf<(i32 imm), [{
28 uint32_t V = N->getZExtValue();
29 return isPowerOf2_32(~V);
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +000030}]>;
31
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +000032def SDEC1 : SDNodeXForm<imm, [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +000033 int32_t V = N->getSExtValue();
34 return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000035}]>;
36
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +000037def UDEC1 : SDNodeXForm<imm, [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +000038 uint32_t V = N->getZExtValue();
39 assert(V > 0);
40 return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000041}]>;
42
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +000043def Log2_32 : SDNodeXForm<imm, [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +000044 uint32_t V = N->getZExtValue();
45 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
46}]>;
47
48
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000049class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +000050 : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000051 (MI IntRegs:$src1, ImmPred:$src2)>;
52
53def : T_CMP_pat <C2_cmpeqi, seteq, s10_0ImmPred>;
54def : T_CMP_pat <C2_cmpgti, setgt, s10_0ImmPred>;
55def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>;
56
57def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
58 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
59
60def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
61def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
62
63// Pats for instruction selection.
64class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +000065 : Pat<(ResT (Op I32:$Rs, I32:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000066 (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
67
68def: BinOp32_pat<add, A2_add, i32>;
69def: BinOp32_pat<and, A2_and, i32>;
70def: BinOp32_pat<or, A2_or, i32>;
71def: BinOp32_pat<sub, A2_sub, i32>;
72def: BinOp32_pat<xor, A2_xor, i32>;
73
74def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
75def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
76
77// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
78// that reverse the order of the operands.
79class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
80
81// Pats for compares. They use PatFrags as operands, not SDNodes,
82// since seteq/setgt/etc. are defined as ParFrags.
83class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +000084 : Pat<(VT (Op I32:$Rs, I32:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000085 (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
86
87def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
88def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
89def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
90
91def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
92def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
93
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +000094def: Pat<(i32 (select I1:$Pu, I32:$Rs, I32:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +000095 (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
96
97def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)),
98 (i32 (A2_addi I32:$Rs, imm:$s16))>;
99
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000100def: Pat<(or I32:$Rs, s32_0ImmPred:$s10),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000101 (A2_orir IntRegs:$Rs, imm:$s10)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000102def: Pat<(and I32:$Rs, s32_0ImmPred:$s10),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000103 (A2_andir IntRegs:$Rs, imm:$s10)>;
104
105def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
106 (A2_subri imm:$s10, IntRegs:$Rs)>;
107
108// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000109def: Pat<(not I32:$src1),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000110 (A2_subri -1, IntRegs:$src1)>;
111
112def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
113def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
114
115def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)),
116 (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
117
118def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)),
119 (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
120
121def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)),
122 (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
123
124def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
125def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
126def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
127def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
128
129class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
130 : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
131 (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
132
133def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
134def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
135def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
136def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
137def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
138def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
139def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
140def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
141
142// Add halfword.
143def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
144 (A2_addh_l16_ll I32:$src1, I32:$src2)>;
145
146def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
147 (A2_addh_l16_hl I32:$src1, I32:$src2)>;
148
149def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
150 (A2_addh_h16_ll I32:$src1, I32:$src2)>;
151
152// Subtract halfword.
153def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
154 (A2_subh_l16_ll I32:$src1, I32:$src2)>;
155
156def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
157 (A2_subh_h16_ll I32:$src1, I32:$src2)>;
158
159// Here, depending on the operand being selected, we'll either generate a
160// min or max instruction.
161// Ex:
162// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
163// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
164// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
165// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
166
167multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
168 InstHexagon Inst, InstHexagon SwapInst> {
169 def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
170 (VT RC:$src1), (VT RC:$src2)),
171 (Inst RC:$src1, RC:$src2)>;
172 def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
173 (VT RC:$src2), (VT RC:$src1)),
174 (SwapInst RC:$src1, RC:$src2)>;
175}
176
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +0000177def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a), [{
178 return isPositiveHalfWord(N);
179}]>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000180
181multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
182 defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
183
184 def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
185 (i32 PositiveHalfWord:$src2))),
186 (i32 PositiveHalfWord:$src1),
187 (i32 PositiveHalfWord:$src2))), i16),
188 (Inst IntRegs:$src1, IntRegs:$src2)>;
189
190 def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
191 (i32 PositiveHalfWord:$src2))),
192 (i32 PositiveHalfWord:$src2),
193 (i32 PositiveHalfWord:$src1))), i16),
194 (SwapInst IntRegs:$src1, IntRegs:$src2)>;
195}
196
197let AddedComplexity = 200 in {
198 defm: MinMax_pats<setge, A2_max, A2_min>;
199 defm: MinMax_pats<setgt, A2_max, A2_min>;
200 defm: MinMax_pats<setle, A2_min, A2_max>;
201 defm: MinMax_pats<setlt, A2_min, A2_max>;
202 defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
203 defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
204 defm: MinMax_pats<setule, A2_minu, A2_maxu>;
205 defm: MinMax_pats<setult, A2_minu, A2_maxu>;
206}
207
208class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000209 : Pat<(i1 (CmpOp I64:$Rs, I64:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000210 (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
211
212def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
213def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
214def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
215def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
216def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
217
218def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
219def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
220
221def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
222def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
223def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
224
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000225def: Pat<(i1 (not I1:$Ps)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000226 (C2_not PredRegs:$Ps)>;
227
228def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
229def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
230def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
231def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
232def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
233
234def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
235 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
236def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
237
238def: Pat<(br bb:$dst),
239 (J2_jump brtarget:$dst)>;
240def: Pat<(retflag),
241 (PS_jmpret (i32 R31))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000242def: Pat<(brcond I1:$src1, bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000243 (J2_jumpt PredRegs:$src1, bb:$offset)>;
244
245def: Pat<(eh_return),
246 (EH_RETURN_JMPR (i32 R31))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000247def: Pat<(brind I32:$dst),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000248 (J2_jumpr IntRegs:$dst)>;
249
250// Patterns to select load-indexed (i.e. load from base+offset).
251multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
252 InstHexagon MI> {
253 def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
254 def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
255 (VT (MI AddrFI:$fi, imm:$Off))>;
256 def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))),
257 (VT (MI AddrFI:$fi, imm:$Off))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000258 def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000259 (VT (MI IntRegs:$Rs, imm:$Off))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000260 def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000261}
262
263let AddedComplexity = 20 in {
264 defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
265 defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
266 defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
267 defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
268 defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
269 defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
270
271 defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
272 defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
273 defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
274 defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
275 defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
276 defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
277 defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
278 defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
279 // No sextloadi1.
280}
281
282// Sign-extending loads of i1 need to replicate the lowest bit throughout
283// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
284// do the trick.
285let AddedComplexity = 20 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000286def: Pat<(i32 (sextloadi1 I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000287 (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
288
289def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
290def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
291def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
292
293def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8),
294 (M2_mpysip IntRegs:$Rs, imm:$u8)>;
295def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)),
296 (M2_mpysin IntRegs:$Rs, imm:$u8)>;
297def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2),
298 (M2_mpysmi IntRegs:$src1, imm:$src2)>;
299def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
300 (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
301def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
302 (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
303def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
304 (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
305def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
306 (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
307
308class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
309 PatLeaf ImmPred>
310 : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
311 (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
312
313class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
314 : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
315 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
316
317def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
318def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>;
319
320def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>;
321def : T_MType_acc_pat2 <M2_nacci, add, sub>;
322
323def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
324def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
325def: T_MType_acc_pat2 <M4_or_and, and, or>;
326def: T_MType_acc_pat2 <M4_and_and, and, and>;
327def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
328def: T_MType_acc_pat2 <M4_or_or, or, or>;
329def: T_MType_acc_pat2 <M4_and_or, or, and>;
330def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
331
332class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
333 : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
334 (not IntRegs:$src3)))),
335 (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
336
337def: T_MType_acc_pat3 <M4_or_andn, and, or>;
338def: T_MType_acc_pat3 <M4_and_andn, and, and>;
339def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
340
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000341def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
342def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>;
343def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
344
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +0000345// Return true if for a 32 to 64-bit sign-extended load.
346def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{
347 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
348 if (!LD)
349 return false;
350 return LD->getExtensionType() == ISD::SEXTLOAD &&
351 LD->getMemoryVT().getScalarType() == MVT::i32;
352}]>;
353
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000354def: Pat<(i64 (mul (Aext64 I32:$src1), (Aext64 I32:$src2))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000355 (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
356
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000357def: Pat<(i64 (mul (Sext64 I32:$src1), (Sext64 I32:$src2))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000358 (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
359
Krzysztof Parzyszek2839b292016-11-05 21:44:50 +0000360def: Pat<(i64 (mul Sext64Ld:$src1, Sext64Ld:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000361 (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
362
363// Multiply and accumulate, use full result.
364// Rxx[+-]=mpy(Rs,Rt)
365
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000366def: Pat<(i64 (add I64:$src1,
367 (mul (Sext64 I32:$src2),
368 (Sext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000369 (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
370
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000371def: Pat<(i64 (sub I64:$src1,
372 (mul (Sext64 I32:$src2),
373 (Sext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000374 (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
375
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000376def: Pat<(i64 (add I64:$src1,
377 (mul (Aext64 I32:$src2),
378 (Aext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000379 (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
380
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000381def: Pat<(i64 (add I64:$src1,
382 (mul (Zext64 I32:$src2),
383 (Zext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000384 (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
385
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000386def: Pat<(i64 (sub I64:$src1,
387 (mul (Aext64 I32:$src2),
388 (Aext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000389 (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
390
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000391def: Pat<(i64 (sub I64:$src1,
392 (mul (Zext64 I32:$src2),
393 (Zext64 I32:$src3)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000394 (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
395
396class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
397 InstHexagon MI>
398 : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
399 (MI I32:$src2, imm:$offset, Value:$src1)>;
400
401def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
402def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
403def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
404def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
405
406// Patterns for generating stores, where the address takes different forms:
407// - frameindex,
408// - frameindex + offset,
409// - base + offset,
410// - simple (base address without offset).
411// These would usually be used together (via Storex_pat defined below), but
412// in some cases one may want to apply different properties (such as
413// AddedComplexity) to the individual patterns.
414class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
415 : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
416multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
417 InstHexagon MI> {
418 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
419 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
420 def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
421 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
422}
423multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
424 InstHexagon MI> {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000425 def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000426 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000427 def: Pat<(Store Value:$Rt, (orisadd I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000428 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
429}
430class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000431 : Pat<(Store Value:$Rt, I32:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000432 (MI IntRegs:$Rs, 0, Value:$Rt)>;
433
434// Patterns for generating stores, where the address takes different forms,
435// and where the value being stored is transformed through the value modifier
436// ValueMod. The address forms are same as above.
437class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
438 InstHexagon MI>
439 : Pat<(Store Value:$Rs, AddrFI:$fi),
440 (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
441multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
442 PatFrag ValueMod, InstHexagon MI> {
443 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
444 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
445 def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
446 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
447}
448multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
449 PatFrag ValueMod, InstHexagon MI> {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000450 def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000451 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000452 def: Pat<(Store Value:$Rt, (orisadd I32:$Rs, ImmPred:$Off)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000453 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
454}
455class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
456 InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000457 : Pat<(Store Value:$Rt, I32:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000458 (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
459
460multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
461 InstHexagon MI> {
462 def: Storex_fi_pat <Store, Value, MI>;
463 defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
464 defm: Storex_add_pat <Store, Value, ImmPred, MI>;
465}
466
467multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
468 PatFrag ValueMod, InstHexagon MI> {
469 def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
470 defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
471 defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
472}
473
474// Regular stores in the DAG have two operands: value and address.
475// Atomic stores also have two, but they are reversed: address, value.
476// To use atomic stores with the patterns, they need to have their operands
477// swapped. This relies on the knowledge that the F.Fragment uses names
478// "ptr" and "val".
479class SwapSt<PatFrag F>
480 : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
481 F.OperandTransform>;
482
483let AddedComplexity = 20 in {
484 defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
485 defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
486 defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
487 defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
488
489 defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
490 defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
491 defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
492 defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
493}
494
495// Simple patterns should be tried with the least priority.
496def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
497def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
498def: Storex_simple_pat<store, I32, S2_storeri_io>;
499def: Storex_simple_pat<store, I64, S2_storerd_io>;
500
501def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
502def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
503def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
504def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
505
506let AddedComplexity = 20 in {
507 defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
508 defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
509 defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
510}
511
512def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
513def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
514def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
515
Krzysztof Parzyszek84755102016-11-06 17:56:48 +0000516def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000517
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000518def: Pat<(i32 (select (i1 (setlt I32:$src, 0)),
519 (i32 (sub 0, I32:$src)),
520 I32:$src)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000521 (A2_abs IntRegs:$src)>;
522
523let AddedComplexity = 50 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000524def: Pat<(i32 (xor (add (sra I32:$src, (i32 31)),
525 I32:$src),
526 (sra I32:$src, (i32 31)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000527 (A2_abs IntRegs:$src)>;
528
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000529def: Pat<(sra I32:$src, u5_0ImmPred:$u5),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000530 (S2_asr_i_r IntRegs:$src, imm:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000531def: Pat<(srl I32:$src, u5_0ImmPred:$u5),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000532 (S2_lsr_i_r IntRegs:$src, imm:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000533def: Pat<(shl I32:$src, u5_0ImmPred:$u5),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000534 (S2_asl_i_r IntRegs:$src, imm:$u5)>;
535
536def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)),
537 (i32 1))),
538 (i32 1))),
539 (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>;
540
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000541def : Pat<(not I64:$src1),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000542 (A2_notp DoubleRegs:$src1)>;
543
544// Count leading zeros.
545def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
546def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
547
548// Count trailing zeros: 32-bit.
549def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
550
551// Count leading ones.
552def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
553def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
554
555// Count trailing ones: 32-bit.
556def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
557
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000558def: Pat<(i32 (and I32:$Rs, (not (shl 1, u5_0ImmPred:$u5)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000559 (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000560def: Pat<(i32 (or I32:$Rs, (shl 1, u5_0ImmPred:$u5))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000561 (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000562def: Pat<(i32 (xor I32:$Rs, (shl 1, u5_0ImmPred:$u5))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000563 (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000564def: Pat<(i32 (and I32:$Rs, (not (shl 1, I32:$Rt)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000565 (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000566def: Pat<(i32 (or I32:$Rs, (shl 1, I32:$Rt))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000567 (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000568def: Pat<(i32 (xor I32:$Rs, (shl 1, I32:$Rt))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000569 (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
570
571let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000572 def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000573 (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000574 def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000575 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000576 def: Pat<(i1 (trunc I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000577 (S2_tstbit_i IntRegs:$Rs, 0)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000578 def: Pat<(i1 (trunc I64:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000579 (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
580}
581
582let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000583 def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000584 (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000585 def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000586 (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
587}
588
589let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000590def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000591 (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
592
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000593def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000594 (i32 8)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000595 (i32 (zextloadi8 (add I32:$b, 2)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000596 (i32 16)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000597 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
598 (zextloadi8 I32:$b)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000599 (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
600
601// Patterns for loads of i1:
602def: Pat<(i1 (load AddrFI:$fi)),
603 (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000604def: Pat<(i1 (load (add I32:$Rs, s32_0ImmPred:$Off))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000605 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000606def: Pat<(i1 (load I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000607 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
608
609def I1toI32: OutPatFrag<(ops node:$Rs),
610 (C2_muxii (i1 $Rs), 1, 0)>;
611
612def I32toI1: OutPatFrag<(ops node:$Rs),
613 (i1 (C2_tfrrp (i32 $Rs)))>;
614
615defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
616def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
617
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000618def: Pat<(sra I64:$src, u6_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000619 (S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000620def: Pat<(srl I64:$src, u6_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000621 (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000622def: Pat<(shl I64:$src, u6_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000623 (S2_asl_i_p DoubleRegs:$src, imm:$u6)>;
624
625let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000626def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000627 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
628
629def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
630def: Pat<(HexagonBARRIER), (Y2_barrier)>;
631
632def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
633 (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>;
634
635
636// Support for generating global address.
637// Taken from X86InstrInfo.td.
638def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
639 SDTCisVT<1, i32>,
640 SDTCisPtrTy<0>]>;
641def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
642def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
643
644// Map TLS addressses to A2_tfrsi.
645def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
646def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
647
648def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
649def: Pat<(i1 0), (PS_false)>;
650def: Pat<(i1 1), (PS_true)>;
651
652// Pseudo instructions.
653def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
654def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
655 SDTCisVT<1, i32> ]>;
656
657def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
658 [SDNPHasChain, SDNPOutGlue]>;
659def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
660 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
661
662def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
663
664// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
665// Optional Flag and Variable Arguments.
666// Its 1 Operand has pointer type.
667def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
668 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
669
670
671def: Pat<(callseq_start timm:$amt),
672 (ADJCALLSTACKDOWN imm:$amt)>;
673def: Pat<(callseq_end timm:$amt1, timm:$amt2),
674 (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;
675
676//Tail calls.
677def: Pat<(HexagonTCRet tglobaladdr:$dst),
678 (PS_tailcall_i tglobaladdr:$dst)>;
679def: Pat<(HexagonTCRet texternalsym:$dst),
680 (PS_tailcall_i texternalsym:$dst)>;
681def: Pat<(HexagonTCRet I32:$dst),
682 (PS_tailcall_r I32:$dst)>;
683
684// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000685def: Pat<(and I32:$src1, 65535),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000686 (A2_zxth IntRegs:$src1)>;
687
688// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000689def: Pat<(and I32:$src1, 255),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000690 (A2_zxtb IntRegs:$src1)>;
691
692// Map Add(p1, true) to p1 = not(p1).
693// Add(p1, false) should never be produced,
694// if it does, it got to be mapped to NOOP.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000695def: Pat<(add I1:$src1, -1),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000696 (C2_not PredRegs:$src1)>;
697
698// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000699def: Pat<(select (not I1:$src1), s8_0ImmPred:$src2, s32_0ImmPred:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000700 (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>;
701
702// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
703// => r0 = C2_muxir(p0, r1, #i)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000704def: Pat<(select (not I1:$src1), s32_0ImmPred:$src2,
705 I32:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000706 (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>;
707
708// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
709// => r0 = C2_muxri (p0, #i, r1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000710def: Pat<(select (not I1:$src1), IntRegs:$src2, s32_0ImmPred:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000711 (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>;
712
713// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000714def: Pat<(brcond (not I1:$src1), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000715 (J2_jumpf PredRegs:$src1, bb:$offset)>;
716
717// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000718def: Pat<(i64 (sext_inreg I64:$src1, i32)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000719 (A2_sxtw (LoReg DoubleRegs:$src1))>;
720
721// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000722def: Pat<(i64 (sext_inreg I64:$src1, i16)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000723 (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
724
725// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000726def: Pat<(i64 (sext_inreg I64:$src1, i8)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000727 (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
728
729// We want to prevent emitting pnot's as much as possible.
730// Map brcond with an unsupported setcc to a J2_jumpf.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000731def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000732 bb:$offset),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000733 (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000734 bb:$offset)>;
735
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000736def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000737 bb:$offset),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000738 (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000739
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000740def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000741 (J2_jumpf PredRegs:$src1, bb:$offset)>;
742
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000743def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000744 (J2_jumpt PredRegs:$src1, bb:$offset)>;
745
746// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000747def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset),
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +0000748 (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000749 bb:$offset)>;
750
751// Map from a 64-bit select to an emulated 64-bit mux.
752// Hexagon does not support 64-bit MUXes; so emulate with combines.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000753def: Pat<(select I1:$src1, I64:$src2,
754 I64:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000755 (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
756 (HiReg DoubleRegs:$src3)),
757 (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
758 (LoReg DoubleRegs:$src3)))>;
759
760// Map from a 1-bit select to logical ops.
761// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000762def: Pat<(select I1:$src1, I1:$src2, I1:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000763 (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
764 (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
765
766// Map for truncating from 64 immediates to 32 bit immediates.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000767def: Pat<(i32 (trunc I64:$src)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000768 (LoReg DoubleRegs:$src)>;
769
770// Map for truncating from i64 immediates to i1 bit immediates.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000771def: Pat<(i1 (trunc I64:$src)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000772 (C2_tfrrp (LoReg DoubleRegs:$src))>;
773
774// rs <= rt -> !(rs > rt).
775let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000776def: Pat<(i1 (setle I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000777 (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
778
779// rs <= rt -> !(rs > rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000780def : Pat<(i1 (setle I32:$src1, I32:$src2)),
781 (i1 (C2_not (C2_cmpgt I32:$src1, I32:$src2)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000782
783// Rss <= Rtt -> !(Rss > Rtt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000784def: Pat<(i1 (setle I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000785 (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
786
787// Map cmpne -> cmpeq.
788// Hexagon_TODO: We should improve on this.
789// rs != rt -> !(rs == rt).
790let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000791def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000792 (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>;
793
794// Convert setne back to xor for hexagon since we compute w/ pred registers.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000795def: Pat<(i1 (setne I1:$src1, I1:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000796 (C2_xor PredRegs:$src1, PredRegs:$src2)>;
797
798// Map cmpne(Rss) -> !cmpew(Rss).
799// rs != rt -> !(rs == rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000800def: Pat<(i1 (setne I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000801 (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
802
803// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
804// rs >= rt -> !(rt > rs).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000805def : Pat <(i1 (setge I32:$src1, I32:$src2)),
806 (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000807
808// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
809let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000810def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +0000811 (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000812
813// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
814// rss >= rtt -> !(rtt > rss).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000815def: Pat<(i1 (setge I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000816 (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
817
818// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
819// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
820// rs < rt -> !(rs >= rt).
821let AddedComplexity = 30 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000822def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +0000823 (C2_not (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000824
825// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000826def: Pat<(i1 (setuge I32:$src1, 0)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000827 (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
828
829// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000830def: Pat<(i1 (setuge I32:$src1, u32_0ImmPred:$src2)),
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +0000831 (C2_cmpgtui IntRegs:$src1, (UDEC1 u32_0ImmPred:$src2))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000832
833// Generate cmpgtu(Rs, #u9)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000834def: Pat<(i1 (setugt I32:$src1, u32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000835 (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>;
836
837// Map from Rs >= Rt -> !(Rt > Rs).
838// rs >= rt -> !(rt > rs).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000839def: Pat<(i1 (setuge I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000840 (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
841
842// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
843// Map from (Rs <= Rt) -> !(Rs > Rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000844def: Pat<(i1 (setule I64:$src1, I64:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000845 (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
846
847// Sign extends.
848// i1 -> i32
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000849def: Pat<(i32 (sext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000850 (C2_muxii PredRegs:$src1, -1, 0)>;
851
852// i1 -> i64
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000853def: Pat<(i64 (sext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000854 (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
855
856// Zero extends.
857// i1 -> i32
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000858def: Pat<(i32 (zext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000859 (C2_muxii PredRegs:$src1, 1, 0)>;
860
861// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000862def: Pat<(i32 (anyext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000863 (C2_muxii PredRegs:$src1, 1, 0)>;
864
865// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000866def: Pat<(i64 (anyext I1:$src1)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000867 (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
868
869// Clear the sign bit in a 64-bit register.
870def ClearSign : OutPatFrag<(ops node:$Rss),
871 (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>;
872
873def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
874 (A2_addp
875 (M2_dpmpyuu_acc_s0
876 (S2_lsr_i_p
877 (A2_addp
878 (M2_dpmpyuu_acc_s0
879 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
880 (HiReg $Rss),
881 (LoReg $Rtt)),
882 (A2_combinew (A2_tfrsi 0),
883 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
884 32),
885 (HiReg $Rss),
886 (HiReg $Rtt)),
887 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
888
889// Multiply 64-bit unsigned and use upper result.
890def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
891
892// Multiply 64-bit signed and use upper result.
893//
894// For two signed 64-bit integers A and B, let A' and B' denote A and B
895// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
896// sign bit of A (and identically for B). With this notation, the signed
897// product A*B can be written as:
898// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
899// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
900// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
901// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
902
903def : Pat <(mulhs I64:$Rss, I64:$Rtt),
904 (A2_subp
905 (MulHU $Rss, $Rtt),
906 (A2_addp
907 (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
908 (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
909
910// Hexagon specific ISD nodes.
911def SDTHexagonALLOCA : SDTypeProfile<1, 2,
912 [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
913def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
914 [SDNPHasChain]>;
915
916
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000917def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000918 (PS_alloca IntRegs:$Rs, imm:$A)>;
919
920def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
921def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
922
923def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>;
924def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>;
925
926let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000927def: Pat<(add I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
928def: Pat<(sub I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
929def: Pat<(and I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
930def: Pat<(or I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000931
932let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000933def: Pat<(add I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
934def: Pat<(sub I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
935def: Pat<(and I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
936def: Pat<(or I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000937
938let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000939def: Pat<(add I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
940def: Pat<(sub I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
941def: Pat<(and I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
942def: Pat<(or I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000943let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000944def: Pat<(xor I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000945
946let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000947def: Pat<(add I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
948def: Pat<(sub I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
949def: Pat<(and I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
950def: Pat<(or I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000951let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000952def: Pat<(xor I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000953
954let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000955def: Pat<(add I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
956def: Pat<(sub I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
957def: Pat<(and I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
958def: Pat<(or I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000959let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000960def: Pat<(xor I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000961
962let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000963def: Pat<(add I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
964def: Pat<(sub I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
965def: Pat<(and I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
966def: Pat<(or I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000967let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000968def: Pat<(xor I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000969
970let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000971def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
972def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
973def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
974def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000975let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000976def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
977def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
978def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
979def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
980def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000981
982let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000983def: Pat<(add I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
984def: Pat<(sub I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
985def: Pat<(and I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
986def: Pat<(or I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000987let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000988def: Pat<(add I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
989def: Pat<(sub I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
990def: Pat<(and I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
991def: Pat<(or I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
992def: Pat<(xor I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000993
994let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +0000995def: Pat<(add I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
996def: Pat<(sub I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
997def: Pat<(and I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
998def: Pat<(or I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +0000999let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001000def: Pat<(add I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1001def: Pat<(sub I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1002def: Pat<(and I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1003def: Pat<(or I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1004def: Pat<(xor I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001005
1006let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001007def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1008def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1009def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
1010def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001011let AddedComplexity = 100 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001012def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1013def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1014def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1015def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
1016def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001017
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001018def: Pat<(sra I64:$src1, I32:$src2), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1019def: Pat<(srl I64:$src1, I32:$src2), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1020def: Pat<(shl I64:$src1, I32:$src2), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
1021def: Pat<(shl I64:$src1, I32:$src2), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001022
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001023def: Pat<(sra I32:$src1, I32:$src2), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>;
1024def: Pat<(srl I32:$src1, I32:$src2), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>;
1025def: Pat<(shl I32:$src1, I32:$src2), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>;
1026def: Pat<(shl I32:$src1, I32:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001027
1028def SDTHexagonINSERT:
1029 SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1030 SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
1031def SDTHexagonINSERTRP:
1032 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1033 SDTCisInt<0>, SDTCisVT<3, i64>]>;
1034
1035def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
1036def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
1037
1038def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
1039 (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>;
1040def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
1041 (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>;
1042def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
1043 (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
1044def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
1045 (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
1046
1047let AddedComplexity = 100 in
1048def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
1049 (i32 (extloadi8 (add I32:$b, 3))),
1050 24, 8),
1051 (i32 16)),
1052 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
1053 (zextloadi8 I32:$b)),
1054 (A2_swiz (L2_loadri_io I32:$b, 0))>;
1055
1056def SDTHexagonEXTRACTU:
1057 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
1058 SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
1059def SDTHexagonEXTRACTURP:
1060 SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
1061 SDTCisVT<2, i64>]>;
1062
1063def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
1064def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
1065
1066def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
1067 (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>;
1068def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
1069 (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>;
1070def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
1071 (S2_extractu_rp I32:$src1, I64:$src2)>;
1072def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
1073 (S2_extractup_rp I64:$src1, I64:$src2)>;
1074
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001075def n8_0ImmPred: PatLeaf<(i32 imm), [{
1076 int64_t V = N->getSExtValue();
1077 return -255 <= V && V <= 0;
1078}]>;
1079
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001080// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001081def: Pat<(mul I32:$src1, (ineg n8_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001082 (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>;
1083
1084multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
1085 defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
1086}
1087
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00001088def: Pat<(add (Sext64 I32:$Rs), I64:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001089 (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>;
1090
1091let AddedComplexity = 200 in {
1092 defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
1093 defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
1094 defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
1095 defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
1096 defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
1097 defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
1098 defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
1099 defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
1100}
1101
1102def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall,
1103 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
1104
1105def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall,
1106 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
1107
1108
1109// Map call instruction
1110def : Pat<(callv3 I32:$dst),
1111 (J2_callr I32:$dst)>;
1112def : Pat<(callv3 tglobaladdr:$dst),
1113 (J2_call tglobaladdr:$dst)>;
1114def : Pat<(callv3 texternalsym:$dst),
1115 (J2_call texternalsym:$dst)>;
1116def : Pat<(callv3 tglobaltlsaddr:$dst),
1117 (J2_call tglobaltlsaddr:$dst)>;
1118
1119def : Pat<(callv3nr I32:$dst),
1120 (PS_callr_nr I32:$dst)>;
1121def : Pat<(callv3nr tglobaladdr:$dst),
1122 (PS_call_nr tglobaladdr:$dst)>;
1123def : Pat<(callv3nr texternalsym:$dst),
1124 (PS_call_nr texternalsym:$dst)>;
1125
1126
1127def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
1128def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
1129
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001130
1131// Pats for instruction selection.
1132
1133// A class to embed the usual comparison patfrags within a zext to i32.
1134// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
1135// names, or else the frag's "body" won't match the operands.
1136class CmpInReg<PatFrag Op>
1137 : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
1138
1139def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
1140def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
1141
1142def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
1143def: T_cmp32_rr_pat<C4_cmplte, setle, i1>;
1144def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
1145
1146def: T_cmp32_rr_pat<C4_cmplte, RevCmp<setge>, i1>;
1147def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
1148
1149let AddedComplexity = 100 in {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001150 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001151 255), 0)),
1152 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001153 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001154 255), 0)),
1155 (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001156 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001157 65535), 0)),
1158 (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001159 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001160 65535), 0)),
1161 (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
1162}
1163
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001164def: Pat<(i32 (zext (i1 (seteq I32:$Rs, s32_0ImmPred:$s8)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001165 (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001166def: Pat<(i32 (zext (i1 (setne I32:$Rs, s32_0ImmPred:$s8)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001167 (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
1168
1169// Preserve the S2_tstbit_r generation
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001170def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, I32:$src2)),
1171 I32:$src1)), 0)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001172 (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
1173
1174// The complexity of the combines involving immediates should be greater
1175// than the complexity of the combine with two registers.
1176let AddedComplexity = 50 in {
1177def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i),
1178 (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>;
1179
1180def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r),
1181 (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>;
1182}
1183
1184// The complexity of the combine with two immediates should be greater than
1185// the complexity of a combine involving a register.
1186let AddedComplexity = 75 in {
1187def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6),
1188 (A4_combineii imm:$s8, imm:$u6)>;
1189def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
1190 (A2_combineii imm:$s8, imm:$S8)>;
1191}
1192
1193
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001194def ToZext64: OutPatFrag<(ops node:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001195 (i64 (A4_combineir 0, (i32 $Rs)))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001196def ToSext64: OutPatFrag<(ops node:$Rs),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001197 (i64 (A2_sxtw (i32 $Rs)))>;
1198
1199// Patterns to generate indexed loads with different forms of the address:
1200// - frameindex,
1201// - base + offset,
1202// - base (without offset).
1203multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
1204 PatLeaf ImmPred, InstHexagon MI> {
1205 def: Pat<(VT (Load AddrFI:$fi)),
1206 (VT (ValueMod (MI AddrFI:$fi, 0)))>;
1207 def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
1208 (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
1209 def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
1210 (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001211 def: Pat<(VT (Load I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001212 (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
1213}
1214
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001215defm: Loadxm_pat<extloadi1, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1216defm: Loadxm_pat<extloadi8, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1217defm: Loadxm_pat<extloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>;
1218defm: Loadxm_pat<zextloadi1, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1219defm: Loadxm_pat<zextloadi8, i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
1220defm: Loadxm_pat<zextloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>;
1221defm: Loadxm_pat<sextloadi8, i64, ToSext64, s32_0ImmPred, L2_loadrb_io>;
1222defm: Loadxm_pat<sextloadi16, i64, ToSext64, s31_1ImmPred, L2_loadrh_io>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001223
1224// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00001225def: Pat<(Aext64 I32:$src1), (ToZext64 IntRegs:$src1)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001226
1227multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
1228 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1229 (HexagonCONST32 tglobaladdr:$src3)))),
1230 (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>;
1231 def : Pat <(VT (ldOp (add IntRegs:$src1,
1232 (HexagonCONST32 tglobaladdr:$src2)))),
1233 (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
1234
1235 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1236 (HexagonCONST32 tconstpool:$src3)))),
1237 (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>;
1238 def : Pat <(VT (ldOp (add IntRegs:$src1,
1239 (HexagonCONST32 tconstpool:$src2)))),
1240 (MI IntRegs:$src1, 0, tconstpool:$src2)>;
1241
1242 def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1243 (HexagonCONST32 tjumptable:$src3)))),
1244 (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>;
1245 def : Pat <(VT (ldOp (add IntRegs:$src1,
1246 (HexagonCONST32 tjumptable:$src2)))),
1247 (MI IntRegs:$src1, 0, tjumptable:$src2)>;
1248}
1249
1250let AddedComplexity = 60 in {
1251defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
1252defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
1253defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
1254
1255defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
1256defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
1257defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
1258
1259defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
1260defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
1261}
1262
1263// 'def pats' for load instructions with base + register offset and non-zero
1264// immediate value. Immediate value is used to left-shift the second
1265// register operand.
1266class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001267 : Pat<(VT (Load (add I32:$Rs,
1268 (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001269 (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
1270
1271let AddedComplexity = 40 in {
1272 def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
1273 def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
1274 def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
1275 def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
1276 def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
1277 def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
1278 def: Loadxs_pat<load, i32, L4_loadri_rr>;
1279 def: Loadxs_pat<load, i64, L4_loadrd_rr>;
1280}
1281
1282// 'def pats' for load instruction base + register offset and
1283// zero immediate value.
1284class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001285 : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001286 (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
1287
1288let AddedComplexity = 20 in {
1289 def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
1290 def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
1291 def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
1292 def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
1293 def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
1294 def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
1295 def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
1296 def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
1297}
1298
1299// zext i1->i64
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001300def: Pat<(i64 (zext I1:$src1)),
1301 (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001302
1303// zext i32->i64
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00001304def: Pat<(Zext64 I32:$src1),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001305 (ToZext64 IntRegs:$src1)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001306
1307let AddedComplexity = 40 in
1308multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
1309 PatFrag stOp> {
1310 def : Pat<(stOp (VT RC:$src4),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001311 (add (shl I32:$src1, u2_0ImmPred:$src2),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001312 u32_0ImmPred:$src3)),
1313 (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>;
1314
1315 def : Pat<(stOp (VT RC:$src4),
1316 (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
1317 (HexagonCONST32 tglobaladdr:$src3))),
1318 (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
1319
1320 def : Pat<(stOp (VT RC:$src4),
1321 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
1322 (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
1323}
1324
1325defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
1326defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
1327defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
1328defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
1329
1330class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001331 : Pat<(Store Value:$Ru, (add I32:$Rs,
1332 (i32 (shl I32:$Rt, u2_0ImmPred:$u2)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001333 (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
1334
1335let AddedComplexity = 40 in {
1336 def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
1337 def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
1338 def: Storexs_pat<store, I32, S4_storeri_rr>;
1339 def: Storexs_pat<store, I64, S4_storerd_rr>;
1340}
1341
1342def s30_2ProperPred : PatLeaf<(i32 imm), [{
1343 int64_t v = (int64_t)N->getSExtValue();
1344 return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
1345}]>;
1346def RoundTo8 : SDNodeXForm<imm, [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001347 int32_t Imm = N->getSExtValue();
1348 return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001349}]>;
1350
1351let AddedComplexity = 40 in
1352def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
1353 (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
1354
1355class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
1356 : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
1357 (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
1358
1359let AddedComplexity = 20 in {
1360 def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>;
1361 def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
1362 def: Store_rr_pat<store, I32, S4_storeri_rr>;
1363 def: Store_rr_pat<store, I64, S4_storerd_rr>;
1364}
1365
1366
1367def IMM_BYTE : SDNodeXForm<imm, [{
1368 // -1 etc is represented as 255 etc
1369 // assigning to a byte restores our desired signed value.
1370 int8_t imm = N->getSExtValue();
1371 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1372}]>;
1373
1374def IMM_HALF : SDNodeXForm<imm, [{
1375 // -1 etc is represented as 65535 etc
1376 // assigning to a short restores our desired signed value.
1377 int16_t imm = N->getSExtValue();
1378 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1379}]>;
1380
1381def IMM_WORD : SDNodeXForm<imm, [{
1382 // -1 etc can be represented as 4294967295 etc
1383 // Currently, it's not doing this. But some optimization
1384 // might convert -1 to a large +ve number.
1385 // assigning to a word restores our desired signed value.
1386 int32_t imm = N->getSExtValue();
1387 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
1388}]>;
1389
1390def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
1391def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
1392def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
1393
1394// Emit store-immediate, but only when the stored value will not be constant-
1395// extended. The reason for that is that there is no pass that can optimize
1396// constant extenders in store-immediate instructions. In some cases we can
1397// end up will a number of such stores, all of which store the same extended
1398// value (e.g. after unrolling a loop that initializes floating point array).
1399
1400// Predicates to determine if the 16-bit immediate is expressible as a sign-
1401// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
1402// beyond 0..15, so we don't care what is in there.
1403
1404def i16in8ImmPred: PatLeaf<(i32 imm), [{
1405 int64_t v = (int16_t)N->getSExtValue();
1406 return v == (int64_t)(int8_t)v;
1407}]>;
1408
1409// Predicates to determine if the 32-bit immediate is expressible as a sign-
1410// extended 8-bit immediate.
1411def i32in8ImmPred: PatLeaf<(i32 imm), [{
1412 int64_t v = (int32_t)N->getSExtValue();
1413 return v == (int64_t)(int8_t)v;
1414}]>;
1415
1416
1417let AddedComplexity = 40 in {
1418 // Even though the offset is not extendable in the store-immediate, we
1419 // can still generate the fi# in the base address. If the final offset
1420 // is not valid for the instruction, we will replace it with a scratch
1421 // register.
1422// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
1423// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
1424// S4_storeirh_io>;
1425// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
1426
1427// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
1428// S4_storeirb_io>;
1429// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
1430// ToImmHalf, S4_storeirh_io>;
1431// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
1432// S4_storeiri_io>;
1433
1434 defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
1435 S4_storeirb_io>;
1436 defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
1437 S4_storeirh_io>;
1438 defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
1439 S4_storeiri_io>;
1440}
1441
1442def: Storexm_simple_pat<truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
1443def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>;
1444def: Storexm_simple_pat<store, s32_0ImmPred, ToImmWord, S4_storeiri_io>;
1445
1446// op(Ps, op(Pt, Pu))
1447class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
1448 : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
1449 (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
1450
1451// op(Ps, op(Pt, ~Pu))
1452class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
1453 : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
1454 (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
1455
1456def: LogLog_pat<and, and, C4_and_and>;
1457def: LogLog_pat<and, or, C4_and_or>;
1458def: LogLog_pat<or, and, C4_or_and>;
1459def: LogLog_pat<or, or, C4_or_or>;
1460
1461def: LogLogNot_pat<and, and, C4_and_andn>;
1462def: LogLogNot_pat<and, or, C4_and_orn>;
1463def: LogLogNot_pat<or, and, C4_or_andn>;
1464def: LogLogNot_pat<or, or, C4_or_orn>;
1465
1466//===----------------------------------------------------------------------===//
1467// PIC: Support for PIC compilations. The patterns and SD nodes defined
1468// below are needed to support code generation for PIC
1469//===----------------------------------------------------------------------===//
1470
1471def SDT_HexagonAtGot
1472 : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
1473def SDT_HexagonAtPcrel
1474 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
1475
1476// AT_GOT address-of-GOT, address-of-global, offset-in-global
1477def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
1478// AT_PCREL address-of-global
1479def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
1480
1481def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
1482 (L2_loadri_io I32:$got, imm:$addr)>;
1483def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
1484 (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
1485def: Pat<(HexagonAtPcrel I32:$addr),
1486 (C4_addipc imm:$addr)>;
1487
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001488def: Pat<(i64 (and I64:$Rs, (i64 (not I64:$Rt)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001489 (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001490def: Pat<(i64 (or I64:$Rs, (i64 (not I64:$Rt)))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001491 (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
1492
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001493def: Pat<(add I32:$Rs, (add I32:$Ru, s32_0ImmPred:$s6)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001494 (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
1495
1496// Rd=add(Rs,sub(#s6,Ru))
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001497def: Pat<(add I32:$src1, (sub s32_0ImmPred:$src2,
1498 I32:$src3)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001499 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1500
1501// Rd=sub(add(Rs,#s6),Ru)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001502def: Pat<(sub (add I32:$src1, s32_0ImmPred:$src2),
1503 I32:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001504 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1505
1506// Rd=add(sub(Rs,Ru),#s6)
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001507def: Pat<(add (sub I32:$src1, I32:$src3),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001508 (s32_0ImmPred:$src2)),
1509 (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
1510
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001511def: Pat<(xor I64:$dst2,
1512 (xor I64:$Rss, I64:$Rtt)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001513 (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001514def: Pat<(or I32:$Ru, (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001515 (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>;
1516
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001517def: Pat<(or I32:$src1, (and I32:$Rs, s32_0ImmPred:$s10)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001518 (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
1519
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001520def: Pat<(or I32:$src1, (or I32:$Rs, s32_0ImmPred:$s10)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001521 (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
1522
1523
1524
1525// Count trailing zeros: 64-bit.
1526def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
1527
1528// Count trailing ones: 64-bit.
1529def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
1530
1531// Define leading/trailing patterns that require zero-extensions to 64 bits.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001532def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>;
1533def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>;
1534def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>;
1535def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001536
1537
1538let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001539 def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
1540 (S4_ntstbit_i I32:$Rs, u5_0ImmPred:$u5)>;
1541 def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
1542 (S4_ntstbit_r I32:$Rs, I32:$Rt)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001543}
1544
1545// Add extra complexity to prefer these instructions over bitsset/bitsclr.
1546// The reason is that tstbit/ntstbit can be folded into a compound instruction:
1547// if ([!]tstbit(...)) jump ...
1548let AddedComplexity = 100 in
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001549def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
1550 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001551
1552let AddedComplexity = 100 in
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001553def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
1554 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001555
1556// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
1557// represented as a compare against "value & 0xFF", which is an exact match
1558// for cmpb (same for cmph). The patterns below do not contain any additional
1559// complexity that would make them preferable, and if they were actually used
1560// instead of cmpb/cmph, they would result in a compare against register that
1561// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
1562def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
1563 (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>;
1564def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
1565 (C4_nbitsclr I32:$Rs, I32:$Rt)>;
1566def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
1567 (C4_nbitsset I32:$Rs, I32:$Rt)>;
1568
1569
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001570def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001571 (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001572def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001573 (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
1574
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001575def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001576 (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001577def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001578 (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>;
1579
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001580def: Pat<(add I32:$Ru, (mul (i32 IntRegs:$_src_), I32:$Rs)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001581 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>;
1582
1583def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
1584
1585class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
1586 : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8),
1587 (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
1588
1589let AddedComplexity = 200 in {
1590 def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>;
1591 def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>;
1592 def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>;
1593 def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>;
1594}
1595
1596let AddedComplexity = 30 in {
1597 def : T_Shift_CommOp_pat <S4_ori_asl_ri, or, shl>;
1598 def : T_Shift_CommOp_pat <S4_ori_lsr_ri, or, srl>;
1599}
1600
1601class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
1602 : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)),
1603 (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
1604
1605def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>;
1606def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>;
1607
1608let AddedComplexity = 200 in {
1609 def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
1610 (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1611 def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
1612 (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1613 def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
1614 (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1615 def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
1616 (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
1617}
1618
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001619def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001620 (S4_lsli imm:$s6, IntRegs:$Rt)>;
1621
1622
1623//===----------------------------------------------------------------------===//
1624// MEMOP
1625//===----------------------------------------------------------------------===//
1626
1627def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001628 int8_t V = N->getSExtValue();
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001629 return -32 < V && V <= -1;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001630}]>;
1631
1632def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001633 int16_t V = N->getSExtValue();
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001634 return -32 < V && V <= -1;
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001635}]>;
1636
1637def m5_0ImmPred : PatLeaf<(i32 imm), [{
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001638 int64_t V = N->getSExtValue();
1639 return -31 <= V && V <= -1;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001640}]>;
1641
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001642def IsNPow2_8 : PatLeaf<(i32 imm), [{
1643 uint8_t NV = ~N->getZExtValue();
1644 return isPowerOf2_32(NV);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001645}]>;
1646
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001647def IsNPow2_16 : PatLeaf<(i32 imm), [{
1648 uint16_t NV = ~N->getZExtValue();
1649 return isPowerOf2_32(NV);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001650}]>;
1651
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001652def Log2_8 : SDNodeXForm<imm, [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001653 uint8_t V = N->getZExtValue();
1654 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001655}]>;
1656
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001657def Log2_16 : SDNodeXForm<imm, [{
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001658 uint16_t V = N->getZExtValue();
1659 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001660}]>;
1661
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001662def LogN2_8 : SDNodeXForm<imm, [{
1663 uint8_t NV = ~N->getZExtValue();
1664 return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001665}]>;
1666
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001667def LogN2_16 : SDNodeXForm<imm, [{
1668 uint16_t NV = ~N->getZExtValue();
1669 return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001670}]>;
1671
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001672def LogN2_32 : SDNodeXForm<imm, [{
1673 uint32_t NV = ~N->getZExtValue();
1674 return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001675}]>;
1676
1677def NegImm8 : SDNodeXForm<imm, [{
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001678 int8_t NV = -N->getSExtValue();
1679 return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001680}]>;
1681
1682def NegImm16 : SDNodeXForm<imm, [{
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001683 int16_t NV = -N->getSExtValue();
1684 return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001685}]>;
1686
1687def NegImm32 : SDNodeXForm<imm, [{
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001688 int32_t NV = -N->getSExtValue();
1689 return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001690}]>;
1691
1692def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
1693
1694multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
1695 InstHexagon MI> {
1696 // Addr: i32
1697 def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
1698 (MI I32:$Rs, 0, I32:$A)>;
1699 // Addr: fi
1700 def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
1701 (MI AddrFI:$Rs, 0, I32:$A)>;
1702}
1703
1704multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1705 SDNode Oper, InstHexagon MI> {
1706 // Addr: i32
1707 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
1708 (add I32:$Rs, ImmPred:$Off)),
1709 (MI I32:$Rs, imm:$Off, I32:$A)>;
1710 def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A),
1711 (orisadd I32:$Rs, ImmPred:$Off)),
1712 (MI I32:$Rs, imm:$Off, I32:$A)>;
1713 // Addr: fi
1714 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
1715 (add AddrFI:$Rs, ImmPred:$Off)),
1716 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
1717 def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
1718 (orisadd AddrFI:$Rs, ImmPred:$Off)),
1719 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
1720}
1721
1722multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1723 SDNode Oper, InstHexagon MI> {
1724 defm: Memopxr_simple_pat <Load, Store, Oper, MI>;
1725 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
1726}
1727
1728let AddedComplexity = 180 in {
1729 // add reg
1730 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
1731 /*anyext*/ L4_add_memopb_io>;
1732 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
1733 /*sext*/ L4_add_memopb_io>;
1734 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
1735 /*zext*/ L4_add_memopb_io>;
1736 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
1737 /*anyext*/ L4_add_memoph_io>;
1738 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
1739 /*sext*/ L4_add_memoph_io>;
1740 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
1741 /*zext*/ L4_add_memoph_io>;
1742 defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
1743
1744 // sub reg
1745 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
1746 /*anyext*/ L4_sub_memopb_io>;
1747 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
1748 /*sext*/ L4_sub_memopb_io>;
1749 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
1750 /*zext*/ L4_sub_memopb_io>;
1751 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
1752 /*anyext*/ L4_sub_memoph_io>;
1753 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
1754 /*sext*/ L4_sub_memoph_io>;
1755 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
1756 /*zext*/ L4_sub_memoph_io>;
1757 defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
1758
1759 // and reg
1760 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
1761 /*anyext*/ L4_and_memopb_io>;
1762 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
1763 /*sext*/ L4_and_memopb_io>;
1764 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
1765 /*zext*/ L4_and_memopb_io>;
1766 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
1767 /*anyext*/ L4_and_memoph_io>;
1768 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
1769 /*sext*/ L4_and_memoph_io>;
1770 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
1771 /*zext*/ L4_and_memoph_io>;
1772 defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
1773
1774 // or reg
1775 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
1776 /*anyext*/ L4_or_memopb_io>;
1777 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
1778 /*sext*/ L4_or_memopb_io>;
1779 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
1780 /*zext*/ L4_or_memopb_io>;
1781 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
1782 /*anyext*/ L4_or_memoph_io>;
1783 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
1784 /*sext*/ L4_or_memoph_io>;
1785 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
1786 /*zext*/ L4_or_memoph_io>;
1787 defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
1788}
1789
1790
1791multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
1792 PatFrag Arg, SDNodeXForm ArgMod,
1793 InstHexagon MI> {
1794 // Addr: i32
1795 def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
1796 (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
1797 // Addr: fi
1798 def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
1799 (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
1800}
1801
1802multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1803 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
1804 InstHexagon MI> {
1805 // Addr: i32
1806 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
1807 (add I32:$Rs, ImmPred:$Off)),
1808 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1809 def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A),
1810 (orisadd I32:$Rs, ImmPred:$Off)),
1811 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1812 // Addr: fi
1813 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
1814 (add AddrFI:$Rs, ImmPred:$Off)),
1815 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1816 def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
1817 (orisadd AddrFI:$Rs, ImmPred:$Off)),
1818 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
1819}
1820
1821multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
1822 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
1823 InstHexagon MI> {
1824 defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>;
1825 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
1826}
1827
1828
1829let AddedComplexity = 200 in {
1830 // add imm
1831 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1832 /*anyext*/ IdImm, L4_iadd_memopb_io>;
1833 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1834 /*sext*/ IdImm, L4_iadd_memopb_io>;
1835 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
1836 /*zext*/ IdImm, L4_iadd_memopb_io>;
1837 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1838 /*anyext*/ IdImm, L4_iadd_memoph_io>;
1839 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1840 /*sext*/ IdImm, L4_iadd_memoph_io>;
1841 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
1842 /*zext*/ IdImm, L4_iadd_memoph_io>;
1843 defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
1844 L4_iadd_memopw_io>;
1845 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1846 /*anyext*/ NegImm8, L4_iadd_memopb_io>;
1847 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1848 /*sext*/ NegImm8, L4_iadd_memopb_io>;
1849 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
1850 /*zext*/ NegImm8, L4_iadd_memopb_io>;
1851 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1852 /*anyext*/ NegImm16, L4_iadd_memoph_io>;
1853 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1854 /*sext*/ NegImm16, L4_iadd_memoph_io>;
1855 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
1856 /*zext*/ NegImm16, L4_iadd_memoph_io>;
1857 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
1858 L4_iadd_memopw_io>;
1859
1860 // sub imm
1861 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1862 /*anyext*/ IdImm, L4_isub_memopb_io>;
1863 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1864 /*sext*/ IdImm, L4_isub_memopb_io>;
1865 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
1866 /*zext*/ IdImm, L4_isub_memopb_io>;
1867 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1868 /*anyext*/ IdImm, L4_isub_memoph_io>;
1869 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1870 /*sext*/ IdImm, L4_isub_memoph_io>;
1871 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
1872 /*zext*/ IdImm, L4_isub_memoph_io>;
1873 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
1874 L4_isub_memopw_io>;
1875 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1876 /*anyext*/ NegImm8, L4_isub_memopb_io>;
1877 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1878 /*sext*/ NegImm8, L4_isub_memopb_io>;
1879 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
1880 /*zext*/ NegImm8, L4_isub_memopb_io>;
1881 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1882 /*anyext*/ NegImm16, L4_isub_memoph_io>;
1883 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1884 /*sext*/ NegImm16, L4_isub_memoph_io>;
1885 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
1886 /*zext*/ NegImm16, L4_isub_memoph_io>;
1887 defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
1888 L4_isub_memopw_io>;
1889
1890 // clrbit imm
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001891 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
1892 /*anyext*/ LogN2_8, L4_iand_memopb_io>;
1893 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
1894 /*sext*/ LogN2_8, L4_iand_memopb_io>;
1895 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
1896 /*zext*/ LogN2_8, L4_iand_memopb_io>;
1897 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
1898 /*anyext*/ LogN2_16, L4_iand_memoph_io>;
1899 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
1900 /*sext*/ LogN2_16, L4_iand_memoph_io>;
1901 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
1902 /*zext*/ LogN2_16, L4_iand_memoph_io>;
1903 defm: Memopxi_pat<load, store, u6_2ImmPred, and, IsNPow2_32,
1904 LogN2_32, L4_iand_memopw_io>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001905
1906 // setbit imm
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001907 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
1908 /*anyext*/ Log2_8, L4_ior_memopb_io>;
1909 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
1910 /*sext*/ Log2_8, L4_ior_memopb_io>;
1911 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
1912 /*zext*/ Log2_8, L4_ior_memopb_io>;
1913 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
1914 /*anyext*/ Log2_16, L4_ior_memoph_io>;
1915 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
1916 /*sext*/ Log2_16, L4_ior_memoph_io>;
1917 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
1918 /*zext*/ Log2_16, L4_ior_memoph_io>;
1919 defm: Memopxi_pat<load, store, u6_2ImmPred, or, IsPow2_32,
1920 Log2_32, L4_ior_memopw_io>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001921}
1922
1923def : T_CMP_pat <C4_cmpneqi, setne, s32_0ImmPred>;
1924def : T_CMP_pat <C4_cmpltei, setle, s32_0ImmPred>;
1925def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>;
1926
1927// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001928def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001929 (C4_cmpltei IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001930
1931// rs != rt -> !(rs == rt).
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001932def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001933 (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>;
1934
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001935// For the sequence
1936// zext( setult ( and(Rs, 255), u8))
1937// Use the isdigit transformation below
1938
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001939
1940def u7_0PosImmPred : ImmLeaf<i32, [{
1941 // True if the immediate fits in an 7-bit unsigned field and
1942 // is strictly greater than 0.
1943 return Imm > 0 && isUInt<7>(Imm);
1944}]>;
1945
1946
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001947// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
1948// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
1949// The isdigit transformation relies on two 'clever' aspects:
1950// 1) The data type is unsigned which allows us to eliminate a zero test after
1951// biasing the expression by 48. We are depending on the representation of
1952// the unsigned types, and semantics.
1953// 2) The front end has converted <= 9 into < 10 on entry to LLVM
1954//
1955// For the C code:
1956// retval = ((c>='0') & (c<='9')) ? 1 : 0;
1957// The code is transformed upstream of llvm into
1958// retval = (c-48) < 10 ? 1 : 0;
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001959
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001960let AddedComplexity = 139 in
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00001961def: Pat<(i32 (zext (i1 (setult (i32 (and I32:$src1, 255)),
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00001962 u7_0PosImmPred:$src2)))),
Krzysztof Parzyszekf9142782016-11-06 18:09:56 +00001963 (C2_muxii (A4_cmpbgtui IntRegs:$src1, (UDEC1 imm:$src2)), 0, 1)>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00001964
1965class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
1966 : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
1967
1968class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
1969 InstHexagon MI>
1970 : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
1971
1972class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
1973 : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
1974
1975class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
1976 InstHexagon MI>
1977 : Pat<(Store Value:$val, Addr:$addr),
1978 (MI Addr:$addr, (ValueMod Value:$val))>;
1979
1980let AddedComplexity = 30 in {
1981 def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
1982 def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
1983 def: Storea_pat<store, I32, addrga, PS_storeriabs>;
1984 def: Storea_pat<store, I64, addrga, PS_storerdabs>;
1985
1986 def: Stoream_pat<truncstorei8, I64, addrga, LoReg, PS_storerbabs>;
1987 def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>;
1988 def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
1989}
1990
1991def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
1992def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
1993def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
1994def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
1995
1996let AddedComplexity = 100 in {
1997 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
1998 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
1999 def: Storea_pat<store, I32, addrgp, S2_storerigp>;
2000 def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
2001
2002 // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
2003 // to "r0 = 1; memw(#foo) = r0"
2004 let AddedComplexity = 100 in
2005 def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
2006 (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
2007}
2008
2009class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
2010 : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
2011 (VT (MI tglobaladdr:$absaddr))>;
2012
2013let AddedComplexity = 30 in {
2014 def: LoadAbs_pats <load, PS_loadriabs>;
2015 def: LoadAbs_pats <zextloadi1, PS_loadrubabs>;
2016 def: LoadAbs_pats <sextloadi8, PS_loadrbabs>;
2017 def: LoadAbs_pats <extloadi8, PS_loadrubabs>;
2018 def: LoadAbs_pats <zextloadi8, PS_loadrubabs>;
2019 def: LoadAbs_pats <sextloadi16, PS_loadrhabs>;
2020 def: LoadAbs_pats <extloadi16, PS_loadruhabs>;
2021 def: LoadAbs_pats <zextloadi16, PS_loadruhabs>;
2022 def: LoadAbs_pats <load, PS_loadrdabs, i64>;
2023}
2024
2025let AddedComplexity = 30 in
2026def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002027 (ToZext64 (PS_loadrubabs tglobaladdr:$absaddr))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002028
2029def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
2030def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
2031def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
2032def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
2033
2034// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
2035def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>;
2036def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
2037
2038def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>;
2039def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
2040
2041// Map from load(globaladdress) -> mem[u][bhwd](#foo)
2042class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
2043 : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
2044 (VT (MI tglobaladdr:$global))>;
2045
2046let AddedComplexity = 100 in {
2047 def: LoadGP_pats <extloadi8, L2_loadrubgp>;
2048 def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
2049 def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
2050 def: LoadGP_pats <extloadi16, L2_loadruhgp>;
2051 def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
2052 def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
2053 def: LoadGP_pats <load, L2_loadrigp>;
2054 def: LoadGP_pats <load, L2_loadrdgp, i64>;
2055}
2056
2057// When the Interprocedural Global Variable optimizer realizes that a certain
2058// global variable takes only two constant values, it shrinks the global to
2059// a boolean. Catch those loads here in the following 3 patterns.
2060let AddedComplexity = 100 in {
2061 def: LoadGP_pats <extloadi1, L2_loadrubgp>;
2062 def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
2063}
2064
2065// Transfer global address into a register
2066def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
2067def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>;
2068def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
2069
2070let AddedComplexity = 30 in {
2071 def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>;
2072 def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
2073 def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>;
2074}
2075
2076let AddedComplexity = 30 in {
2077 def: Loada_pat<load, i32, u32_0ImmPred, PS_loadriabs>;
2078 def: Loada_pat<sextloadi8, i32, u32_0ImmPred, PS_loadrbabs>;
2079 def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>;
2080 def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
2081 def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
2082}
2083
2084// Indexed store word - global address.
2085// memw(Rs+#u6:2)=#S8
2086let AddedComplexity = 100 in
2087defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
2088
2089// Load from a global address that has only one use in the current basic block.
2090let AddedComplexity = 100 in {
2091 def: Loada_pat<extloadi8, i32, addrga, PS_loadrubabs>;
2092 def: Loada_pat<sextloadi8, i32, addrga, PS_loadrbabs>;
2093 def: Loada_pat<zextloadi8, i32, addrga, PS_loadrubabs>;
2094
2095 def: Loada_pat<extloadi16, i32, addrga, PS_loadruhabs>;
2096 def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>;
2097 def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>;
2098
2099 def: Loada_pat<load, i32, addrga, PS_loadriabs>;
2100 def: Loada_pat<load, i64, addrga, PS_loadrdabs>;
2101}
2102
2103// Store to a global address that has only one use in the current basic block.
2104let AddedComplexity = 100 in {
2105 def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
2106 def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
2107 def: Storea_pat<store, I32, addrga, PS_storeriabs>;
2108 def: Storea_pat<store, I64, addrga, PS_storerdabs>;
2109
2110 def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
2111}
2112
2113// i8/i16/i32 -> i64 loads
2114// We need a complexity of 120 here to override preceding handling of
2115// zextload.
2116let AddedComplexity = 120 in {
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002117 def: Loadam_pat<extloadi8, i64, addrga, ToZext64, PS_loadrubabs>;
2118 def: Loadam_pat<sextloadi8, i64, addrga, ToSext64, PS_loadrbabs>;
2119 def: Loadam_pat<zextloadi8, i64, addrga, ToZext64, PS_loadrubabs>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002120
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002121 def: Loadam_pat<extloadi16, i64, addrga, ToZext64, PS_loadruhabs>;
2122 def: Loadam_pat<sextloadi16, i64, addrga, ToSext64, PS_loadrhabs>;
2123 def: Loadam_pat<zextloadi16, i64, addrga, ToZext64, PS_loadruhabs>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002124
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002125 def: Loadam_pat<extloadi32, i64, addrga, ToZext64, PS_loadriabs>;
2126 def: Loadam_pat<sextloadi32, i64, addrga, ToSext64, PS_loadriabs>;
2127 def: Loadam_pat<zextloadi32, i64, addrga, ToZext64, PS_loadriabs>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002128}
2129
2130let AddedComplexity = 100 in {
2131 def: Loada_pat<extloadi8, i32, addrgp, PS_loadrubabs>;
2132 def: Loada_pat<sextloadi8, i32, addrgp, PS_loadrbabs>;
2133 def: Loada_pat<zextloadi8, i32, addrgp, PS_loadrubabs>;
2134
2135 def: Loada_pat<extloadi16, i32, addrgp, PS_loadruhabs>;
2136 def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>;
2137 def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>;
2138
2139 def: Loada_pat<load, i32, addrgp, PS_loadriabs>;
2140 def: Loada_pat<load, i64, addrgp, PS_loadrdabs>;
2141}
2142
2143let AddedComplexity = 100 in {
2144 def: Storea_pat<truncstorei8, I32, addrgp, PS_storerbabs>;
2145 def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>;
2146 def: Storea_pat<store, I32, addrgp, PS_storeriabs>;
2147 def: Storea_pat<store, I64, addrgp, PS_storerdabs>;
2148}
2149
2150def: Loada_pat<atomic_load_8, i32, addrgp, PS_loadrubabs>;
2151def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>;
2152def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>;
2153def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>;
2154
2155def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, PS_storerbabs>;
2156def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
2157def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
2158def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
2159
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002160def: Pat<(or (or (or (shl (i64 (zext (i32 (and I32:$b, (i32 65535))))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002161 (i32 16)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002162 (i64 (zext (i32 (and I32:$a, (i32 65535)))))),
2163 (shl (i64 (anyext (i32 (and I32:$c, (i32 65535))))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002164 (i32 32))),
Krzysztof Parzyszek84755102016-11-06 17:56:48 +00002165 (shl (Aext64 I32:$d), (i32 48))),
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002166 (Insert4 IntRegs:$a, IntRegs:$b, IntRegs:$c, IntRegs:$d)>;
2167
2168// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
2169// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
2170// We don't really want either one here.
2171def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
2172def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
2173 [SDNPHasChain]>;
2174
2175def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
2176 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
2177def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
2178 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
2179
2180def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
2181def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
2182
2183def ftoi : SDNodeXForm<fpimm, [{
2184 APInt I = N->getValueAPF().bitcastToAPInt();
2185 return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
2186 MVT::getIntegerVT(I.getBitWidth()));
2187}]>;
2188
2189
2190def: Pat<(sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)), (i32 1)),
2191 (S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
2192
2193def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
2194 SDTCisVT<1, i64>]>;
2195
2196def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
2197
2198def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
2199
2200let AddedComplexity = 20 in {
2201 defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
2202 defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
2203}
2204
2205let AddedComplexity = 60 in {
2206 defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
2207 defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
2208}
2209
2210let AddedComplexity = 40 in {
2211 def: Loadxs_pat<load, f32, L4_loadri_rr>;
2212 def: Loadxs_pat<load, f64, L4_loadrd_rr>;
2213}
2214
2215let AddedComplexity = 20 in {
2216 def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
2217 def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
2218}
2219
2220let AddedComplexity = 80 in {
2221 def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>;
2222 def: Loada_pat<load, f32, addrga, PS_loadriabs>;
2223 def: Loada_pat<load, f64, addrga, PS_loadrdabs>;
2224}
2225
2226let AddedComplexity = 100 in {
2227 def: LoadGP_pats <load, L2_loadrigp, f32>;
2228 def: LoadGP_pats <load, L2_loadrdgp, f64>;
2229}
2230
2231let AddedComplexity = 20 in {
2232 defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
2233 defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
2234}
2235
2236// Simple patterns should be tried with the least priority.
2237def: Storex_simple_pat<store, F32, S2_storeri_io>;
2238def: Storex_simple_pat<store, F64, S2_storerd_io>;
2239
2240let AddedComplexity = 60 in {
2241 defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
2242 defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
2243}
2244
2245let AddedComplexity = 40 in {
2246 def: Storexs_pat<store, F32, S4_storeri_rr>;
2247 def: Storexs_pat<store, F64, S4_storerd_rr>;
2248}
2249
2250let AddedComplexity = 20 in {
2251 def: Store_rr_pat<store, F32, S4_storeri_rr>;
2252 def: Store_rr_pat<store, F64, S4_storerd_rr>;
2253}
2254
2255let AddedComplexity = 80 in {
2256 def: Storea_pat<store, F32, addrga, PS_storeriabs>;
2257 def: Storea_pat<store, F64, addrga, PS_storerdabs>;
2258}
2259
2260let AddedComplexity = 100 in {
2261 def: Storea_pat<store, F32, addrgp, S2_storerigp>;
2262 def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
2263}
2264
2265defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
2266defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
2267def: Storex_simple_pat<store, F32, S2_storeri_io>;
2268def: Storex_simple_pat<store, F64, S2_storerd_io>;
2269
2270def: Pat<(fadd F32:$src1, F32:$src2),
2271 (F2_sfadd F32:$src1, F32:$src2)>;
2272
2273def: Pat<(fsub F32:$src1, F32:$src2),
2274 (F2_sfsub F32:$src1, F32:$src2)>;
2275
2276def: Pat<(fmul F32:$src1, F32:$src2),
2277 (F2_sfmpy F32:$src1, F32:$src2)>;
2278
2279let Predicates = [HasV5T] in {
2280 def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
2281 def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
2282}
2283
2284let AddedComplexity = 100, Predicates = [HasV5T] in {
2285 class SfSel12<PatFrag Cmp, InstHexagon MI>
2286 : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
2287 (MI F32:$Rs, F32:$Rt)>;
2288 class SfSel21<PatFrag Cmp, InstHexagon MI>
2289 : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs),
2290 (MI F32:$Rs, F32:$Rt)>;
2291
2292 def: SfSel12<setolt, F2_sfmin>;
2293 def: SfSel12<setole, F2_sfmin>;
2294 def: SfSel12<setogt, F2_sfmax>;
2295 def: SfSel12<setoge, F2_sfmax>;
2296 def: SfSel21<setolt, F2_sfmax>;
2297 def: SfSel21<setole, F2_sfmax>;
2298 def: SfSel21<setogt, F2_sfmin>;
2299 def: SfSel21<setoge, F2_sfmin>;
2300}
2301
2302class T_fcmp32_pat<PatFrag OpNode, InstHexagon MI>
2303 : Pat<(i1 (OpNode F32:$src1, F32:$src2)),
2304 (MI F32:$src1, F32:$src2)>;
2305class T_fcmp64_pat<PatFrag OpNode, InstHexagon MI>
2306 : Pat<(i1 (OpNode F64:$src1, F64:$src2)),
2307 (MI F64:$src1, F64:$src2)>;
2308
2309def: T_fcmp32_pat<setoge, F2_sfcmpge>;
2310def: T_fcmp32_pat<setuo, F2_sfcmpuo>;
2311def: T_fcmp32_pat<setoeq, F2_sfcmpeq>;
2312def: T_fcmp32_pat<setogt, F2_sfcmpgt>;
2313
2314def: T_fcmp64_pat<setoge, F2_dfcmpge>;
2315def: T_fcmp64_pat<setuo, F2_dfcmpuo>;
2316def: T_fcmp64_pat<setoeq, F2_dfcmpeq>;
2317def: T_fcmp64_pat<setogt, F2_dfcmpgt>;
2318
2319let Predicates = [HasV5T] in
2320multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
2321 // IntRegs
2322 def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
2323 (IntMI F32:$src1, F32:$src2)>;
2324 // DoubleRegs
2325 def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
2326 (DoubleMI F64:$src1, F64:$src2)>;
2327}
2328
2329defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
2330defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
2331defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
2332
2333//===----------------------------------------------------------------------===//
2334// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
2335//===----------------------------------------------------------------------===//
2336let Predicates = [HasV5T] in
2337multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
2338 // IntRegs
2339 def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
2340 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2341 (IntMI F32:$src1, F32:$src2))>;
2342
2343 // DoubleRegs
2344 def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
2345 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2346 (DoubleMI F64:$src1, F64:$src2))>;
2347}
2348
2349defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
2350defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
2351defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
2352
2353//===----------------------------------------------------------------------===//
2354// Multiclass to define 'Def Pats' for the following dags:
2355// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
2356// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
2357// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
2358// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
2359//===----------------------------------------------------------------------===//
2360let Predicates = [HasV5T] in
2361multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
2362 InstHexagon DoubleMI> {
2363 // IntRegs
2364 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2365 (C2_not (IntMI F32:$src1, F32:$src2))>;
2366 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2367 (IntMI F32:$src1, F32:$src2)>;
2368 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2369 (IntMI F32:$src1, F32:$src2)>;
2370 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2371 (C2_not (IntMI F32:$src1, F32:$src2))>;
2372
2373 // DoubleRegs
2374 def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2375 (C2_not (DoubleMI F64:$src1, F64:$src2))>;
2376 def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2377 (DoubleMI F64:$src1, F64:$src2)>;
2378 def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2379 (DoubleMI F64:$src1, F64:$src2)>;
2380 def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2381 (C2_not (DoubleMI F64:$src1, F64:$src2))>;
2382}
2383
2384defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
2385defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
2386defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
2387
2388//===----------------------------------------------------------------------===//
2389// Multiclass to define 'Def Pats' for the following dags:
2390// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
2391// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
2392// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
2393// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
2394//===----------------------------------------------------------------------===//
2395let Predicates = [HasV5T] in
2396multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
2397 InstHexagon DoubleMI> {
2398 // IntRegs
2399 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2400 (C2_not (IntMI F32:$src2, F32:$src1))>;
2401 def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2402 (IntMI F32:$src2, F32:$src1)>;
2403 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
2404 (IntMI F32:$src2, F32:$src1)>;
2405 def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
2406 (C2_not (IntMI F32:$src2, F32:$src1))>;
2407
2408 // DoubleRegs
2409 def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2410 (C2_not (DoubleMI F64:$src2, F64:$src1))>;
2411 def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
2412 (DoubleMI F64:$src2, F64:$src1)>;
2413 def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2414 (DoubleMI F64:$src2, F64:$src1)>;
2415 def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
2416 (C2_not (DoubleMI F64:$src2, F64:$src1))>;
2417}
2418
2419defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
2420defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
2421
2422
2423// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
2424let Predicates = [HasV5T] in {
2425 def: Pat<(i1 (seto F32:$src1, F32:$src2)),
2426 (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
2427 def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)),
2428 (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2429 def: Pat<(i1 (seto F64:$src1, F64:$src2)),
2430 (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
2431 def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)),
2432 (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>;
2433}
2434
2435// Ordered lt.
2436let Predicates = [HasV5T] in {
2437 def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
2438 (F2_sfcmpgt F32:$src2, F32:$src1)>;
2439 def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)),
2440 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2441 def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
2442 (F2_dfcmpgt F64:$src2, F64:$src1)>;
2443 def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)),
2444 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
2445}
2446
2447// Unordered lt.
2448let Predicates = [HasV5T] in {
2449 def: Pat<(i1 (setult F32:$src1, F32:$src2)),
2450 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2451 (F2_sfcmpgt F32:$src2, F32:$src1))>;
2452 def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)),
2453 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2454 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2455 def: Pat<(i1 (setult F64:$src1, F64:$src2)),
2456 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2457 (F2_dfcmpgt F64:$src2, F64:$src1))>;
2458 def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)),
2459 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2460 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>;
2461}
2462
2463// Ordered le.
2464let Predicates = [HasV5T] in {
2465 // rs <= rt -> rt >= rs.
2466 def: Pat<(i1 (setole F32:$src1, F32:$src2)),
2467 (F2_sfcmpge F32:$src2, F32:$src1)>;
2468 def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)),
2469 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2470
2471 // Rss <= Rtt -> Rtt >= Rss.
2472 def: Pat<(i1 (setole F64:$src1, F64:$src2)),
2473 (F2_dfcmpge F64:$src2, F64:$src1)>;
2474 def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)),
2475 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
2476}
2477
2478// Unordered le.
2479let Predicates = [HasV5T] in {
2480// rs <= rt -> rt >= rs.
2481 def: Pat<(i1 (setule F32:$src1, F32:$src2)),
2482 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2483 (F2_sfcmpge F32:$src2, F32:$src1))>;
2484 def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)),
2485 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2486 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
2487 def: Pat<(i1 (setule F64:$src1, F64:$src2)),
2488 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2489 (F2_dfcmpge F64:$src2, F64:$src1))>;
2490 def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)),
2491 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2492 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>;
2493}
2494
2495// Ordered ne.
2496let Predicates = [HasV5T] in {
2497 def: Pat<(i1 (setone F32:$src1, F32:$src2)),
2498 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
2499 def: Pat<(i1 (setone F64:$src1, F64:$src2)),
2500 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
2501 def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)),
2502 (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
2503 def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)),
2504 (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
2505}
2506
2507// Unordered ne.
2508let Predicates = [HasV5T] in {
2509 def: Pat<(i1 (setune F32:$src1, F32:$src2)),
2510 (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
2511 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
2512 def: Pat<(i1 (setune F64:$src1, F64:$src2)),
2513 (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
2514 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
2515 def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)),
2516 (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
2517 (C2_not (F2_sfcmpeq F32:$src1,
2518 (f32 (A2_tfrsi (ftoi $src2))))))>;
2519 def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)),
2520 (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
2521 (C2_not (F2_dfcmpeq F64:$src1,
2522 (CONST64 (ftoi $src2)))))>;
2523}
2524
2525// Besides set[o|u][comparions], we also need set[comparisons].
2526let Predicates = [HasV5T] in {
2527 // lt.
2528 def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
2529 (F2_sfcmpgt F32:$src2, F32:$src1)>;
2530 def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)),
2531 (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2532 def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
2533 (F2_dfcmpgt F64:$src2, F64:$src1)>;
2534 def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)),
2535 (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
2536
2537 // le.
2538 // rs <= rt -> rt >= rs.
2539 def: Pat<(i1 (setle F32:$src1, F32:$src2)),
2540 (F2_sfcmpge F32:$src2, F32:$src1)>;
2541 def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)),
2542 (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
2543
2544 // Rss <= Rtt -> Rtt >= Rss.
2545 def: Pat<(i1 (setle F64:$src1, F64:$src2)),
2546 (F2_dfcmpge F64:$src2, F64:$src1)>;
2547 def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)),
2548 (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
2549
2550 // ne.
2551 def: Pat<(i1 (setne F32:$src1, F32:$src2)),
2552 (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
2553 def: Pat<(i1 (setne F64:$src1, F64:$src2)),
2554 (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
2555 def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)),
2556 (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
2557 def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)),
2558 (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
2559}
2560
2561
2562def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>;
2563def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>;
2564
2565def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>;
2566def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>;
2567def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>;
2568def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>;
2569
2570def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>;
2571def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>;
2572def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>;
2573def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>;
2574
2575def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>;
2576def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>;
2577def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>;
2578def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>;
2579
2580def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>;
2581def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>;
2582def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>;
2583def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>;
2584
2585// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
2586let Predicates = [HasV5T] in {
2587 def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
2588 def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
2589 def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
2590 def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
2591}
2592
2593def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
2594 (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
2595
2596def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
2597 (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
2598
2599def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
2600 (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
2601
2602def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm),
2603 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>,
2604 Requires<[HasV5T]>;
2605
2606def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt),
2607 (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>,
2608 Requires<[HasV5T]>;
2609
2610def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
2611 (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
2612 Requires<[HasV5T]>;
2613
2614def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
2615 (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
2616 Requires<[HasV5T]>;
2617
2618def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
2619 (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
2620 Requires<[HasV5T]>;
2621
2622def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
2623 (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
2624 Requires<[HasV5T]>;
2625
2626// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
2627// => r0 = mux(p0, #i, r1)
2628def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3),
2629 (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>,
2630 Requires<[HasV5T]>;
2631
2632// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
2633// => r0 = mux(p0, r1, #i)
2634def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3),
2635 (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>,
2636 Requires<[HasV5T]>;
2637
2638def: Pat<(i32 (fp_to_sint F64:$src1)),
2639 (LoReg (F2_conv_df2d_chop F64:$src1))>,
2640 Requires<[HasV5T]>;
2641
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002642def : Pat <(fabs F32:$src1),
2643 (S2_clrbit_i F32:$src1, 31)>,
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002644 Requires<[HasV5T]>;
2645
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00002646def : Pat <(fneg F32:$src1),
2647 (S2_togglebit_i F32:$src1, 31)>,
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002648 Requires<[HasV5T]>;
2649
2650
2651def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
2652 return isAlignedMemNode(dyn_cast<MemSDNode>(N));
2653}]>;
2654
2655def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
2656 return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
2657}]>;
2658
2659def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
2660 return isAlignedMemNode(dyn_cast<MemSDNode>(N));
2661}]>;
2662
2663def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
2664 return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
2665}]>;
2666
2667
Krzysztof Parzyszek846597d2016-11-06 18:05:14 +00002668def s4_6ImmPred: PatLeaf<(i32 imm), [{
2669 int64_t V = N->getSExtValue();
2670 return isShiftedInt<4,6>(V);
2671}]>;
2672
2673def s4_7ImmPred: PatLeaf<(i32 imm), [{
2674 int64_t V = N->getSExtValue();
2675 return isShiftedInt<4,7>(V);
2676}]>;
2677
2678
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00002679multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
2680 // Aligned stores
2681 def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
2682 (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
2683 Requires<[UseHVXSgl]>;
2684 def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
2685 (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
2686 Requires<[UseHVXSgl]>;
2687
2688 // 128B Aligned stores
2689 def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
2690 (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
2691 Requires<[UseHVXDbl]>;
2692 def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
2693 (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
2694 Requires<[UseHVXDbl]>;
2695
2696 // Fold Add R+OFF into vector store.
2697 let AddedComplexity = 10 in {
2698 def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
2699 (add IntRegs:$src2, s4_6ImmPred:$offset)),
2700 (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
2701 (VTSgl VectorRegs:$src1))>,
2702 Requires<[UseHVXSgl]>;
2703 def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
2704 (add IntRegs:$src2, s4_6ImmPred:$offset)),
2705 (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
2706 (VTSgl VectorRegs:$src1))>,
2707 Requires<[UseHVXSgl]>;
2708
2709 // Fold Add R+OFF into vector store 128B.
2710 def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
2711 (add IntRegs:$src2, s4_7ImmPred:$offset)),
2712 (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
2713 (VTDbl VectorRegs128B:$src1))>,
2714 Requires<[UseHVXDbl]>;
2715 def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
2716 (add IntRegs:$src2, s4_7ImmPred:$offset)),
2717 (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
2718 (VTDbl VectorRegs128B:$src1))>,
2719 Requires<[UseHVXDbl]>;
2720 }
2721}
2722
2723defm : vS32b_ai_pats <v64i8, v128i8>;
2724defm : vS32b_ai_pats <v32i16, v64i16>;
2725defm : vS32b_ai_pats <v16i32, v32i32>;
2726defm : vS32b_ai_pats <v8i64, v16i64>;
2727
2728
2729multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
2730 // Aligned loads
2731 def : Pat < (VTSgl (alignedload IntRegs:$addr)),
2732 (V6_vL32b_ai IntRegs:$addr, 0) >,
2733 Requires<[UseHVXSgl]>;
2734 def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
2735 (V6_vL32Ub_ai IntRegs:$addr, 0) >,
2736 Requires<[UseHVXSgl]>;
2737
2738 // 128B Load
2739 def : Pat < (VTDbl (alignedload IntRegs:$addr)),
2740 (V6_vL32b_ai_128B IntRegs:$addr, 0) >,
2741 Requires<[UseHVXDbl]>;
2742 def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
2743 (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
2744 Requires<[UseHVXDbl]>;
2745
2746 // Fold Add R+OFF into vector load.
2747 let AddedComplexity = 10 in {
2748 def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
2749 (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
2750 Requires<[UseHVXDbl]>;
2751 def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
2752 (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
2753 Requires<[UseHVXDbl]>;
2754
2755 def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
2756 (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
2757 Requires<[UseHVXSgl]>;
2758 def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
2759 (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
2760 Requires<[UseHVXSgl]>;
2761 }
2762}
2763
2764defm : vL32b_ai_pats <v64i8, v128i8>;
2765defm : vL32b_ai_pats <v32i16, v64i16>;
2766defm : vL32b_ai_pats <v16i32, v32i32>;
2767defm : vL32b_ai_pats <v8i64, v16i64>;
2768
2769multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
2770 def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
2771 (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
2772 Requires<[UseHVXSgl]>;
2773 def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
2774 (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
2775 Requires<[UseHVXSgl]>;
2776
2777 def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
2778 (PS_vstorerw_ai_128B IntRegs:$addr, 0,
2779 (VTDbl VecDblRegs128B:$src1))>,
2780 Requires<[UseHVXDbl]>;
2781 def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
2782 (PS_vstorerwu_ai_128B IntRegs:$addr, 0,
2783 (VTDbl VecDblRegs128B:$src1))>,
2784 Requires<[UseHVXDbl]>;
2785}
2786
2787defm : STrivv_pats <v128i8, v256i8>;
2788defm : STrivv_pats <v64i16, v128i16>;
2789defm : STrivv_pats <v32i32, v64i32>;
2790defm : STrivv_pats <v16i64, v32i64>;
2791
2792multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
2793 def : Pat<(VTSgl (alignedload I32:$addr)),
2794 (PS_vloadrw_ai I32:$addr, 0)>,
2795 Requires<[UseHVXSgl]>;
2796 def : Pat<(VTSgl (unalignedload I32:$addr)),
2797 (PS_vloadrwu_ai I32:$addr, 0)>,
2798 Requires<[UseHVXSgl]>;
2799
2800 def : Pat<(VTDbl (alignedload I32:$addr)),
2801 (PS_vloadrw_ai_128B I32:$addr, 0)>,
2802 Requires<[UseHVXDbl]>;
2803 def : Pat<(VTDbl (unalignedload I32:$addr)),
2804 (PS_vloadrwu_ai_128B I32:$addr, 0)>,
2805 Requires<[UseHVXDbl]>;
2806}
2807
2808defm : LDrivv_pats <v128i8, v256i8>;
2809defm : LDrivv_pats <v64i16, v128i16>;
2810defm : LDrivv_pats <v32i32, v64i32>;
2811defm : LDrivv_pats <v16i64, v32i64>;
2812
2813let Predicates = [HasV60T,UseHVXSgl] in {
2814 def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt),
2815 (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>;
2816 def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt),
2817 (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>;
2818}
2819let Predicates = [HasV60T,UseHVXDbl] in {
2820 def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt),
2821 (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>;
2822 def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt),
2823 (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>;
2824}
2825
2826
2827def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
2828 SDTCisSubVecOfVec<1, 0>]>;
2829
2830def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
2831
2832def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
2833 (v16i32 VectorRegs:$Vt))),
2834 (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
2835 Requires<[UseHVXSgl]>;
2836def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
2837 (v32i32 VecDblRegs:$Vt))),
2838 (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2839 Requires<[UseHVXDbl]>;
2840
2841def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
2842 SDTCisInt<3>]>;
2843
2844def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
2845
2846// 0 as the last argument denotes vpacke. 1 denotes vpacko
2847def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
2848 (v64i8 VectorRegs:$Vt), (i32 0))),
2849 (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
2850 Requires<[UseHVXSgl]>;
2851def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
2852 (v64i8 VectorRegs:$Vt), (i32 1))),
2853 (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
2854 Requires<[UseHVXSgl]>;
2855def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
2856 (v32i16 VectorRegs:$Vt), (i32 0))),
2857 (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
2858 Requires<[UseHVXSgl]>;
2859def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
2860 (v32i16 VectorRegs:$Vt), (i32 1))),
2861 (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
2862 Requires<[UseHVXSgl]>;
2863
2864def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
2865 (v128i8 VecDblRegs:$Vt), (i32 0))),
2866 (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2867 Requires<[UseHVXDbl]>;
2868def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
2869 (v128i8 VecDblRegs:$Vt), (i32 1))),
2870 (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2871 Requires<[UseHVXDbl]>;
2872def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
2873 (v64i16 VecDblRegs:$Vt), (i32 0))),
2874 (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2875 Requires<[UseHVXDbl]>;
2876def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
2877 (v64i16 VecDblRegs:$Vt), (i32 1))),
2878 (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
2879 Requires<[UseHVXDbl]>;
2880
2881def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
2882def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
2883def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
2884def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
2885def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
2886def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
2887def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
2888def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
2889
2890
2891multiclass bitconvert_32<ValueType a, ValueType b> {
2892 def : Pat <(b (bitconvert (a IntRegs:$src))),
2893 (b IntRegs:$src)>;
2894 def : Pat <(a (bitconvert (b IntRegs:$src))),
2895 (a IntRegs:$src)>;
2896}
2897
2898multiclass bitconvert_64<ValueType a, ValueType b> {
2899 def : Pat <(b (bitconvert (a DoubleRegs:$src))),
2900 (b DoubleRegs:$src)>;
2901 def : Pat <(a (bitconvert (b DoubleRegs:$src))),
2902 (a DoubleRegs:$src)>;
2903}
2904
2905// Bit convert vector types to integers.
2906defm : bitconvert_32<v4i8, i32>;
2907defm : bitconvert_32<v2i16, i32>;
2908defm : bitconvert_64<v8i8, i64>;
2909defm : bitconvert_64<v4i16, i64>;
2910defm : bitconvert_64<v2i32, i64>;
2911
2912def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2913 (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>;
2914def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2915 (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>;
2916def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
2917 (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>;
2918
2919def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2920 (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>;
2921def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2922 (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>;
2923def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
2924 (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>;
2925
2926def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
2927 (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
2928
2929def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
2930 (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
2931
2932def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
2933def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
2934
2935// Replicate the low 8-bits from 32-bits input register into each of the
2936// four bytes of 32-bits destination register.
2937def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
2938
2939// Replicate the low 16-bits from 32-bits input register into each of the
2940// four halfwords of 64-bits destination register.
2941def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
2942
2943
2944class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
2945 : Pat <(Op Type:$Rss, Type:$Rtt),
2946 (MI Type:$Rss, Type:$Rtt)>;
2947
2948def: VArith_pat <A2_vaddub, add, V8I8>;
2949def: VArith_pat <A2_vaddh, add, V4I16>;
2950def: VArith_pat <A2_vaddw, add, V2I32>;
2951def: VArith_pat <A2_vsubub, sub, V8I8>;
2952def: VArith_pat <A2_vsubh, sub, V4I16>;
2953def: VArith_pat <A2_vsubw, sub, V2I32>;
2954
2955def: VArith_pat <A2_and, and, V2I16>;
2956def: VArith_pat <A2_xor, xor, V2I16>;
2957def: VArith_pat <A2_or, or, V2I16>;
2958
2959def: VArith_pat <A2_andp, and, V8I8>;
2960def: VArith_pat <A2_andp, and, V4I16>;
2961def: VArith_pat <A2_andp, and, V2I32>;
2962def: VArith_pat <A2_orp, or, V8I8>;
2963def: VArith_pat <A2_orp, or, V4I16>;
2964def: VArith_pat <A2_orp, or, V2I32>;
2965def: VArith_pat <A2_xorp, xor, V8I8>;
2966def: VArith_pat <A2_xorp, xor, V4I16>;
2967def: VArith_pat <A2_xorp, xor, V2I32>;
2968
2969def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2970 (i32 u5_0ImmPred:$c))))),
2971 (S2_asr_i_vw V2I32:$b, imm:$c)>;
2972def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2973 (i32 u5_0ImmPred:$c))))),
2974 (S2_lsr_i_vw V2I32:$b, imm:$c)>;
2975def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
2976 (i32 u5_0ImmPred:$c))))),
2977 (S2_asl_i_vw V2I32:$b, imm:$c)>;
2978
2979def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2980 (S2_asr_i_vh V4I16:$b, imm:$c)>;
2981def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2982 (S2_lsr_i_vh V4I16:$b, imm:$c)>;
2983def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
2984 (S2_asl_i_vh V4I16:$b, imm:$c)>;
2985
2986
2987def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
2988 [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
2989def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
2990 [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
2991
2992def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
2993def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
2994def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
2995def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
2996def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
2997def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
2998
2999def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
3000 (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
3001def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
3002 (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
3003def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
3004 (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
3005def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
3006 (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
3007def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
3008 (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
3009def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
3010 (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
3011
3012class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
3013 : Pat <(Op Value:$Rs, I32:$Rt),
3014 (MI Value:$Rs, I32:$Rt)>;
3015
3016def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
3017def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
3018def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
3019def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
3020def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
3021def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
3022
3023
3024def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
3025 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
3026def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
3027 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
3028def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
3029 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
3030
3031def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
3032def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
3033def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
3034def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
3035def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
3036def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
3037def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
3038def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
3039def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
3040
3041
3042class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
3043 : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
3044 (MI Value:$Rs, Value:$Rt)>;
3045
3046def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
3047def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
3048def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
3049
3050def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
3051def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
3052def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
3053
3054def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
3055def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
3056def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
3057
3058
3059class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
3060 : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
3061 (MI InVal:$Rs, InVal:$Rt)>;
3062
3063def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
3064def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
3065def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
3066
3067def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
3068def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
3069def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
3070
3071def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
3072 (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>;
3073def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
3074 (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
3075
3076
3077// Adds two v4i8: Hexagon does not have an insn for this one, so we
3078// use the double add v8i8, and use only the low part of the result.
3079def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003080 (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003081
3082// Subtract two v4i8: Hexagon does not have an insn for this one, so we
3083// use the double sub v8i8, and use only the low part of the result.
3084def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003085 (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003086
3087//
3088// No 32 bit vector mux.
3089//
3090def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003091 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003092def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003093 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003094
3095//
3096// 64-bit vector mux.
3097//
3098def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
3099 (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
3100def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
3101 (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
3102def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
3103 (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
3104
3105//
3106// No 32 bit vector compare.
3107//
3108def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003109 (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003110def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003111 (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003112def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003113 (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003114
3115def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003116 (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003117def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003118 (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003119def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
Krzysztof Parzyszek4b4012a2016-11-05 21:02:54 +00003120 (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
Krzysztof Parzyszeka8d63dc2016-11-05 15:01:38 +00003121
3122
3123class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
3124 ValueType CmpTy>
3125 : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
3126 (InvMI Value:$Rt, Value:$Rs)>;
3127
3128// Map from a compare operation to the corresponding instruction with the
3129// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
3130def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
3131def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
3132def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
3133def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
3134def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
3135def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
3136
3137def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
3138def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
3139def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
3140def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
3141def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
3142def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
3143
3144// Map from vcmpne(Rss) -> !vcmpew(Rss).
3145// rs != rt -> !(rs == rt).
3146def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
3147 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
3148
3149
3150// Truncate: from vector B copy all 'E'ven 'B'yte elements:
3151// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
3152def: Pat<(v4i8 (trunc V4I16:$Rs)),
3153 (S2_vtrunehb V4I16:$Rs)>;
3154
3155// Truncate: from vector B copy all 'O'dd 'B'yte elements:
3156// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
3157// S2_vtrunohb
3158
3159// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
3160// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
3161// S2_vtruneh
3162
3163def: Pat<(v2i16 (trunc V2I32:$Rs)),
3164 (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
3165
3166
3167def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
3168def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
3169
3170def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
3171def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
3172
3173def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
3174def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
3175def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
3176def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
3177def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
3178def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
3179
3180// Sign extends a v2i8 into a v2i32.
3181def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
3182 (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
3183
3184// Sign extends a v2i16 into a v2i32.
3185def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
3186 (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
3187
3188
3189// Multiplies two v2i16 and returns a v2i32. We are using here the
3190// saturating multiply, as hexagon does not provide a non saturating
3191// vector multiply, and saturation does not impact the result that is
3192// in double precision of the operands.
3193
3194// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
3195// with the C semantics for this one, this pattern uses the half word
3196// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
3197// then truncated to fit this back into a v2i16 and to simulate the
3198// wrap around semantics for unsigned in C.
3199def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
3200 (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
3201
3202def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
3203 (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
3204 (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
3205
3206// Multiplies two v4i16 vectors.
3207def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
3208 (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
3209 (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
3210
3211def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
3212 (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
3213 (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
3214
3215// Multiplies two v4i8 vectors.
3216def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
3217 (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
3218 Requires<[HasV5T]>;
3219
3220def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
3221 (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
3222
3223// Multiplies two v8i8 vectors.
3224def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
3225 (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
3226 (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
3227 Requires<[HasV5T]>;
3228
3229def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
3230 (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
3231 (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
3232
3233def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
3234 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
3235
3236def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
3237def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
3238def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
3239def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
3240
3241class ShufflePat<InstHexagon MI, SDNode Op>
3242 : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
3243 (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
3244
3245// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
3246def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
3247
3248// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
3249def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
3250
3251// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
3252def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
3253
3254// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
3255def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
3256
3257
3258// Truncated store from v4i16 to v4i8.
3259def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
3260 (truncstore node:$val, node:$ptr),
3261 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
3262
3263// Truncated store from v2i32 to v2i16.
3264def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
3265 (truncstore node:$val, node:$ptr),
3266 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
3267
3268def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
3269 (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
3270 (LoReg $Rs))))>;
3271
3272def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
3273 (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
3274
3275
3276// Zero and sign extended load from v2i8 into v2i16.
3277def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
3278 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
3279
3280def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
3281 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
3282
3283def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
3284 (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
3285
3286def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
3287 (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
3288
3289def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
3290 (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
3291
3292def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
3293 (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
3294