blob: 11bce45161b3eae380e44c0633e1703c473ce440 [file] [log] [blame]
Javed Absarf043dac2016-11-15 11:34:54 +00001//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18// A number of forwarding paths enable results of computations to be input
19// to subsequent operations before they are written to registers.
20// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
Javed Absarf043dac2016-11-15 11:34:54 +000027 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
28}
29
30
31//===----------------------------------------------------------------------===//
32// Define each kind of processor resource and number available.
33
34// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35// Cortex-R52 is an in-order processor.
36
37def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
38def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
39def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
40def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
41def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
42def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
43def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
44def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
45
46// Cortex-R52 specific SchedReads
47def R52Read_ISS : SchedRead;
48def R52Read_EX1 : SchedRead;
49def R52Read_EX2 : SchedRead;
50def R52Read_WRI : SchedRead;
51def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
52def R52Read_F1 : SchedRead;
53def R52Read_F2 : SchedRead;
54
55
56//===----------------------------------------------------------------------===//
57// Subtarget-specific SchedWrite types which map ProcResources and set latency.
58
59let SchedModel = CortexR52Model in {
60
61// ALU - Write occurs in Late EX2 (independent of whether shift was required)
62def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
66
67// Compares
68def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71
Javed Absarbb8dcc62017-02-02 21:08:12 +000072// Multiply - aliased to sub-target specific later
73
Javed Absarf043dac2016-11-15 11:34:54 +000074// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
Javed Absarbb8dcc62017-02-02 21:08:12 +000075def : WriteRes<WriteDIV, [R52UnitDiv]> {
Javed Absar3d594372017-03-27 20:46:37 +000076 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
Javed Absarf043dac2016-11-15 11:34:54 +000077}
78
Javed Absarf043dac2016-11-15 11:34:54 +000079// Branches - LR written in Late EX2
80def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
83
84// Misc
85def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
Javed Absarf043dac2016-11-15 11:34:54 +000086
Javed Absar00cce412017-01-23 20:20:39 +000087// Integer pipeline by-passes
Javed Absarf043dac2016-11-15 11:34:54 +000088def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
89def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
Javed Absarbb8dcc62017-02-02 21:08:12 +000090def : ReadAdvance<ReadMUL, 0>;
91def : ReadAdvance<ReadMAC, 0>;
Javed Absarf043dac2016-11-15 11:34:54 +000092
Javed Absar00cce412017-01-23 20:20:39 +000093// Floating-point. Map target-defined SchedReadWrites to subtarget
94def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95
96def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
97 let Latency = 6;
98}
99
100def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101 let Latency = 11; // as it is internally two insns (MUL then ADD)
102}
103
104def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105 R52UnitFPALU, R52UnitFPALU]> {
106 let Latency = 11;
107}
108
109def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110 let Latency = 7; // FP div takes fixed #cycles
111 let ResourceCycles = [7]; // is not pipelined
112}
113
114def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
115 let Latency = 17;
116 let ResourceCycles = [17];
117}
118
119def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121
Javed Absara32e3a12017-05-24 05:32:48 +0000122// Overriden via InstRW for this processor.
123def : WriteRes<WriteVST1, []>;
124def : WriteRes<WriteVST2, []>;
125def : WriteRes<WriteVST3, []>;
126def : WriteRes<WriteVST4, []>;
127
Javed Absar00cce412017-01-23 20:20:39 +0000128def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130
Javed Absarf043dac2016-11-15 11:34:54 +0000131//===----------------------------------------------------------------------===//
132// Subtarget-specific SchedReadWrites.
133
134// Forwarding information - based on when an operand is read
135def : ReadAdvance<R52Read_ISS, 0>;
136def : ReadAdvance<R52Read_EX1, 1>;
137def : ReadAdvance<R52Read_EX2, 2>;
138def : ReadAdvance<R52Read_F0, 0>;
139def : ReadAdvance<R52Read_F1, 1>;
140def : ReadAdvance<R52Read_F2, 2>;
141
142
143// Cortex-R52 specific SchedWrites for use with InstRW
144def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
Javed Absarbb8dcc62017-02-02 21:08:12 +0000145def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
146 let Latency = 4; let NumMicroOps = 0;
147}
Javed Absarf043dac2016-11-15 11:34:54 +0000148def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
149 let Latency = 8; let ResourceCycles = [8]; // not pipelined
150}
151def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
154def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
155def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158
159def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161
Javed Absarbb8dcc62017-02-02 21:08:12 +0000162// Alias generics to sub-target specific
163def : SchedAlias<WriteMUL16, R52WriteMAC>;
164def : SchedAlias<WriteMUL32, R52WriteMAC>;
165def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167def : SchedAlias<WriteMAC16, R52WriteMAC>;
168def : SchedAlias<WriteMAC32, R52WriteMAC>;
169def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
Javed Absarb6727222017-02-22 07:22:57 +0000171def : SchedAlias<WritePreLd, R52WriteLd>;
172def : SchedAlias<WriteLd, R52WriteLd>;
173def : SchedAlias<WriteST, R52WriteST>;
Javed Absarbb8dcc62017-02-02 21:08:12 +0000174
Javed Absarf043dac2016-11-15 11:34:54 +0000175def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
177 let Latency = 4;
178}
179def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
181 let Latency = 5;
182}
183def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
185 let Latency = 6;
186}
187def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
189 let Latency = 6;
190}
191def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192 let Latency = 11; // as it is internally two insns (MUL then ADD)
193}
194def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195 R52UnitFPALU, R52UnitFPALU]> {
196 let Latency = 11;
197}
198
199def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201
Javed Absar00cce412017-01-23 20:20:39 +0000202//===----------------------------------------------------------------------===//
203// Floating-point. Map target defined SchedReadWrites to processor specific ones
204//
205def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
206def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
Javed Absarf043dac2016-11-15 11:34:54 +0000209
210//===----------------------------------------------------------------------===//
Javed Absar00cce412017-01-23 20:20:39 +0000211// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212//
Javed Absarf043dac2016-11-15 11:34:54 +0000213def : InstRW<[WriteALU], (instrs COPY)>;
214
215def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218
219def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000220 (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000221def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000222 (instregex "MOV_ga_pcrel$")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000223def : InstRW<[R52WriteLd,R52Read_ISS],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000224 (instregex "MOV_ga_pcrel_ldr")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000225
226def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
227
228def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
229 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
230 "(t|t2)UBFX", "(t|t2)SBFX")>;
231
232// Saturating arithmetic
233def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
234 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
235 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
236 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
237 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
238 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
239 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
240
241// Parallel arithmetic
242def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
243 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
244 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
245 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
246 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
247
248// Flag setting.
249def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
250 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
251 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
252 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
253 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
254 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
255 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
256
257// Sum of Absolute Difference
258def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000259 (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
Javed Absarf043dac2016-11-15 11:34:54 +0000260
261// Integer Multiply
262def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000263 (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
264 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
Javed Absarf043dac2016-11-15 11:34:54 +0000265 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
266 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
267
268// Multiply Accumulate
269// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
270// The store pipeline is used partly for 64-bit operations.
271def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000272 (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
273 "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
Javed Absarf043dac2016-11-15 11:34:54 +0000274 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
275 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
276 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
277 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
278 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
279 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000280 "SMLAL", "UMLAL", "SMLALBT",
Javed Absarf043dac2016-11-15 11:34:54 +0000281 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000282 "UMAAL", "t2SMLAL", "t2UMLAL",
Javed Absarf043dac2016-11-15 11:34:54 +0000283 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
284 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
285
286def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
Javed Absarbb8dcc62017-02-02 21:08:12 +0000287 (instregex "t2SDIV", "t2UDIV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000288
289// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
290// However, that's non-trivial to specify, so we keep it uniform
291def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
292 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
293 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
294 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
295 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
296 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
297 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
298def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
299 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
300 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
301 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
302 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000303 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
Javed Absarf043dac2016-11-15 11:34:54 +0000304 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
305
306def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
307def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
308
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000309def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
Javed Absarf043dac2016-11-15 11:34:54 +0000310 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
311 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
312 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
313
314def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000315 "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
Javed Absarf043dac2016-11-15 11:34:54 +0000316 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
317
318def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
319 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000320 "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000321
322def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
323 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000324 "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000325
326def : InstRW<[R52WriteALU_EX1],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000327 (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000328
329def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
330def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
331 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
332
333def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
334def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
335def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
336def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
337
338def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
339 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
340
341def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
342
343def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
344def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
345
Javed Absarf043dac2016-11-15 11:34:54 +0000346// Integer Load, Multiple.
347foreach Lat = 3-25 in {
348 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
349 let Latency = Lat;
350 }
351 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
352 let Latency = Lat;
353 let NumMicroOps = 0;
354 }
355}
356foreach NAddr = 1-16 in {
357 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
358}
359def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
360def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
361def R52WriteILDM : SchedWriteVariant<[
362 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
363
364 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
365 R52WriteILDM6Cy]>,
366 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
367 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
368
369 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
370 R52WriteILDM6Cy, R52WriteILDM7Cy,
371 R52WriteILDM8Cy]>,
372 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
373 R52WriteILDM6Cy, R52WriteILDM7Cy,
374 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
375
376 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
377 R52WriteILDM6Cy, R52WriteILDM7Cy,
378 R52WriteILDM8Cy, R52WriteILDM9Cy,
379 R52WriteILDM10Cy]>,
380 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
381 R52WriteILDM6Cy, R52WriteILDM7Cy,
382 R52WriteILDM8Cy, R52WriteILDM9Cy,
383 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
384
385 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
386 R52WriteILDM6Cy, R52WriteILDM7Cy,
387 R52WriteILDM8Cy, R52WriteILDM9Cy,
388 R52WriteILDM10Cy, R52WriteILDM11Cy,
389 R52WriteILDM12Cy]>,
390 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
391 R52WriteILDM6Cy, R52WriteILDM7Cy,
392 R52WriteILDM8Cy, R52WriteILDM9Cy,
393 R52WriteILDM10Cy, R52WriteILDM11Cy,
394 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
395
396 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
397 R52WriteILDM6Cy, R52WriteILDM7Cy,
398 R52WriteILDM8Cy, R52WriteILDM9Cy,
399 R52WriteILDM10Cy, R52WriteILDM11Cy,
400 R52WriteILDM12Cy, R52WriteILDM13Cy,
401 R52WriteILDM14Cy]>,
402 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
403 R52WriteILDM6Cy, R52WriteILDM7Cy,
404 R52WriteILDM8Cy, R52WriteILDM9Cy,
405 R52WriteILDM10Cy, R52WriteILDM11Cy,
406 R52WriteILDM12Cy, R52WriteILDM13Cy,
407 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
408
409 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
410 R52WriteILDM6Cy, R52WriteILDM7Cy,
411 R52WriteILDM8Cy, R52WriteILDM9Cy,
412 R52WriteILDM10Cy, R52WriteILDM11Cy,
413 R52WriteILDM12Cy, R52WriteILDM13Cy,
414 R52WriteILDM14Cy, R52WriteILDM15Cy,
415 R52WriteILDM16Cy]>,
416 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
417 R52WriteILDM6Cy, R52WriteILDM7Cy,
418 R52WriteILDM8Cy, R52WriteILDM9Cy,
419 R52WriteILDM10Cy, R52WriteILDM11Cy,
420 R52WriteILDM12Cy, R52WriteILDM13Cy,
421 R52WriteILDM14Cy, R52WriteILDM15Cy,
422 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
423
424 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
425 R52WriteILDM6Cy, R52WriteILDM7Cy,
426 R52WriteILDM8Cy, R52WriteILDM9Cy,
427 R52WriteILDM10Cy, R52WriteILDM11Cy,
428 R52WriteILDM12Cy, R52WriteILDM13Cy,
429 R52WriteILDM14Cy, R52WriteILDM15Cy,
430 R52WriteILDM16Cy, R52WriteILDM17Cy,
431 R52WriteILDM18Cy]>,
432 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
433 R52WriteILDM6Cy, R52WriteILDM7Cy,
434 R52WriteILDM8Cy, R52WriteILDM9Cy,
435 R52WriteILDM10Cy, R52WriteILDM11Cy,
436 R52WriteILDM12Cy, R52WriteILDM13Cy,
437 R52WriteILDM14Cy, R52WriteILDM15Cy,
438 R52WriteILDM16Cy, R52WriteILDM17Cy,
439 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
440
441// Unknown number of registers, just use resources for two registers.
442 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
443 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
444 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
445 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
446 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
447 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
448 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
449 R52WriteILDM18Cy, R52WriteILDM19Cy]>
450]> { let Variadic=1; }
451
452// Integer Store, Multiple
453def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
454 let Latency = 4;
455 let NumMicroOps = 2;
456}
457foreach NumAddr = 1-16 in {
458 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
459}
460def R52WriteISTM : SchedWriteVariant<[
461 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
462 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
463 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
464 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
465 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
466 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
467 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
468 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
469 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
470 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
471 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
472 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
473 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
474 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
475 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
476 // Unknow number of registers, just use resources for two registers.
477 SchedVar<NoSchedPred, [R52WriteISTM2]>
478]>;
479
480def : InstRW<[R52WriteILDM, R52Read_ISS],
481 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
482 "(t|sys)LDM(IA|DA|DB|IB)$")>;
483def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
484 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
485def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000486 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000487
488// Integer Store, Single Element
489def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
490 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
491 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
492 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
493
494def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
495 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
496 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
497 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
498 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
499
500// Integer Store, Dual
501def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000502 (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000503def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
504 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
505
506def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
507 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
508def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
509 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
Simon Pilgrim351e4fa2018-03-25 19:07:17 +0000510 "tPUSH")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000511
512// LDRLIT pseudo instructions, they expand to LDR + PICADD
513def : InstRW<[R52WriteLd],
Craig Topper7ccb5eb2018-03-22 17:17:47 +0000514 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000515// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
516def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
517
518
519
520//===----------------------------------------------------------------------===//
521// VFP, Floating Point Support
522def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
523def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
524
525def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
526def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
527def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
528
529def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
530def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
531
Craig Topper7ccb5eb2018-03-22 17:17:47 +0000532def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000533def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
534
Javed Absarf043dac2016-11-15 11:34:54 +0000535def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
536def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
537
538
539//===----------------------------------------------------------------------===//
540// Neon Support
541
542// vector multiple load stores
543foreach NumAddr = 1-16 in {
544 def R52LMAddrPred#NumAddr :
545 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
546}
547foreach Lat = 1-32 in {
548 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
549 let Latency = Lat;
550 }
551}
552foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
553 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
554 let Latency = 0;
555 let NumMicroOps = Num;
556 let ResourceCycles = [Num];
557 }
558}
559def R52WriteVLDM : SchedWriteVariant<[
560 // 1 D reg
561 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
562 R52ReserveLd5Cy]>,
563 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
564 R52ReserveLd5Cy]>,
565
566 // 2 D reg
567 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
568 R52ReserveLd6Cy]>,
569 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
570 R52ReserveLd6Cy]>,
571
572 // 3 D reg
573 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
574 R52WriteLM7Cy,
575 R52ReserveLd4Cy]>,
576 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
577 R52WriteLM7Cy,
578 R52ReserveLd7Cy]>,
579
580 // 4 D reg
581 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
582 R52WriteLM7Cy, R52WriteLM8Cy,
583 R52ReserveLd8Cy]>,
584 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
585 R52WriteLM7Cy, R52WriteLM8Cy,
586 R52ReserveLd8Cy]>,
587
588 // 5 D reg
589 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
590 R52WriteLM7Cy, R52WriteLM8Cy,
591 R52WriteLM9Cy,
592 R52ReserveLd9Cy]>,
593 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
594 R52WriteLM7Cy, R52WriteLM8Cy,
595 R52WriteLM9Cy,
596 R52ReserveLd9Cy]>,
597
598 // 6 D reg
599 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
600 R52WriteLM7Cy, R52WriteLM8Cy,
601 R52WriteLM9Cy, R52WriteLM10Cy,
602 R52ReserveLd10Cy]>,
603 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
604 R52WriteLM7Cy, R52WriteLM8Cy,
605 R52WriteLM9Cy, R52WriteLM10Cy,
606 R52ReserveLd10Cy]>,
607
608 // 7 D reg
609 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
610 R52WriteLM7Cy, R52WriteLM8Cy,
611 R52WriteLM9Cy, R52WriteLM10Cy,
612 R52WriteLM11Cy,
613 R52ReserveLd11Cy]>,
614 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
615 R52WriteLM7Cy, R52WriteLM8Cy,
616 R52WriteLM9Cy, R52WriteLM10Cy,
617 R52WriteLM11Cy,
618 R52ReserveLd11Cy]>,
619
620 // 8 D reg
621 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
622 R52WriteLM7Cy, R52WriteLM8Cy,
623 R52WriteLM9Cy, R52WriteLM10Cy,
624 R52WriteLM11Cy, R52WriteLM12Cy,
625 R52ReserveLd12Cy]>,
626 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
627 R52WriteLM7Cy, R52WriteLM8Cy,
628 R52WriteLM9Cy, R52WriteLM10Cy,
629 R52WriteLM11Cy, R52WriteLM12Cy,
630 R52ReserveLd12Cy]>,
631 // unknown number of reg.
632 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
633 R52WriteLM7Cy, R52WriteLM8Cy,
634 R52WriteLM9Cy, R52WriteLM10Cy,
635 R52WriteLM11Cy, R52WriteLM12Cy,
636 R52ReserveLd5Cy]>
637]> { let Variadic=1;}
638
639// variable stores. Cannot dual-issue
640def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
641 let Latency = 5;
642 let NumMicroOps = 2;
643 let ResourceCycles = [1];
644}
645def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
646 let Latency = 6;
647 let NumMicroOps = 4;
648 let ResourceCycles = [2];
649}
650def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
651 let Latency = 7;
652 let NumMicroOps = 6;
653 let ResourceCycles = [3];
654}
655def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
656 let Latency = 8;
657 let NumMicroOps = 8;
658 let ResourceCycles = [4];
659}
660def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
661 let Latency = 9;
662 let NumMicroOps = 10;
663 let ResourceCycles = [5];
664}
665def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
666 let Latency = 10;
667 let NumMicroOps = 12;
668 let ResourceCycles = [6];
669}
670def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
671 let Latency = 11;
672 let NumMicroOps = 14;
673 let ResourceCycles = [7];
674}
675def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
676 let Latency = 12;
677 let NumMicroOps = 16;
678 let ResourceCycles = [8];
679}
680def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
681 let Latency = 13;
682 let NumMicroOps = 18;
683 let ResourceCycles = [9];
684}
685def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
686 let Latency = 14;
687 let NumMicroOps = 20;
688 let ResourceCycles = [10];
689}
690def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
691 let Latency = 15;
692 let NumMicroOps = 22;
693 let ResourceCycles = [11];
694}
695
696def R52WriteSTM : SchedWriteVariant<[
697 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
698 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
699 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
700 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
701 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
702 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
703 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
704 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
705 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
706 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
707 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
708 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
709 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
710 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
711 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
712 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
713 // unknown number of registers, just use resources for two
714 SchedVar<NoSchedPred, [R52WriteSTM6]>
715]>;
716
717// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
718// another instruction in slot-1, but only in the last issue.
Javed Absara32e3a12017-05-24 05:32:48 +0000719def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
720def : WriteRes<WriteVLD2, [R52UnitLd]> {
Javed Absarf043dac2016-11-15 11:34:54 +0000721 let Latency = 6;
722 let NumMicroOps = 3;
723 let ResourceCycles = [2];
Javed Absar3d594372017-03-27 20:46:37 +0000724 let SingleIssue = 1;
Javed Absarf043dac2016-11-15 11:34:54 +0000725}
Javed Absara32e3a12017-05-24 05:32:48 +0000726def : WriteRes<WriteVLD3, [R52UnitLd]> {
Javed Absarf043dac2016-11-15 11:34:54 +0000727 let Latency = 7;
728 let NumMicroOps = 5;
729 let ResourceCycles = [3];
Javed Absar3d594372017-03-27 20:46:37 +0000730 let SingleIssue = 1;
Javed Absarf043dac2016-11-15 11:34:54 +0000731}
Javed Absara32e3a12017-05-24 05:32:48 +0000732def : WriteRes<WriteVLD4, [R52UnitLd]> {
Javed Absarf043dac2016-11-15 11:34:54 +0000733 let Latency = 8;
734 let NumMicroOps = 7;
735 let ResourceCycles = [4];
Javed Absar3d594372017-03-27 20:46:37 +0000736 let SingleIssue = 1;
Javed Absarf043dac2016-11-15 11:34:54 +0000737}
738def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
739 let Latency = 5;
740 let NumMicroOps = 1;
741 let ResourceCycles = [1];
742}
743def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
744 let Latency = 6;
745 let NumMicroOps = 3;
746 let ResourceCycles = [2];
747}
748def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
749 let Latency = 7;
750 let NumMicroOps = 5;
751 let ResourceCycles = [3];
752}
753def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
754 let Latency = 8;
755 let NumMicroOps = 7;
756 let ResourceCycles = [4];
757}
758def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
759 let Latency = 9;
760 let NumMicroOps = 9;
761 let ResourceCycles = [5];
762}
763
764
765def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
766def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
767def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
768
769def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
770def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
771def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
772
773def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
774
775def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
776 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
777def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
778 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
779def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
780 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
781
782def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
783 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
784
785def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
786def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
787
788def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
789def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
790
791def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
792def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
793
Javed Absarf043dac2016-11-15 11:34:54 +0000794def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
795 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
796def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
797 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
798
799def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
800def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
801def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
802def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
803
804def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
805def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
806
807def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
808def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
809
810def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
811def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
812
813def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
814def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
Javed Absar00cce412017-01-23 20:20:39 +0000815def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000816def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000817def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
818def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
819def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
820def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
821def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
822def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
823 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
824def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
825 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
826def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
827def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
828def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
829 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
830def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
831def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
832
833//---
Javed Absarf043dac2016-11-15 11:34:54 +0000834// VSTx. Vector Stores
835//---
836// 1-element structure store
837def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
838def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
839def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
840def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
841def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
842def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
843
844def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
845def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
846def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
847
848def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
849def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
850def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
851def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
852def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
853def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
854
855def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
856def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
857def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
858
859// 2-element structure store
860def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
861def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
862def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
863
864def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
865def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
866def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
867def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
868def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
869def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
870
871def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
872def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
873def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
874
875def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
876def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
877def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
878def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
879def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
880def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
881
882// 3-element structure store
883def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
884def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
885def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
886
887def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
888def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
889def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
890def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
891def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
892def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
893
894def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
895def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
896def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
897
898def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
899def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
900def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
901def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
902def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
903def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
904
905// 4-element structure store
906def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
907def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
908def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
909
910def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
911def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
912def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
913def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
914def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
915def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
916
917def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
918def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
919def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
920
921def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
922def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
923def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
924def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
925def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
926def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
927
928} // R52 SchedModel