blob: 0fdccd5d92ab3abfd93d4d182407a8f865b3536b [file] [log] [blame]
Javed Absarf043dac2016-11-15 11:34:54 +00001//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18// A number of forwarding paths enable results of computations to be input
19// to subsequent operations before they are written to registers.
20// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
Javed Absarf043dac2016-11-15 11:34:54 +000027 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
28}
29
30
31//===----------------------------------------------------------------------===//
32// Define each kind of processor resource and number available.
33
34// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35// Cortex-R52 is an in-order processor.
36
37def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
38def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
39def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
40def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
41def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
42def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
43def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
44def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
45
46// Cortex-R52 specific SchedReads
47def R52Read_ISS : SchedRead;
48def R52Read_EX1 : SchedRead;
49def R52Read_EX2 : SchedRead;
50def R52Read_WRI : SchedRead;
51def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
52def R52Read_F1 : SchedRead;
53def R52Read_F2 : SchedRead;
54
55
56//===----------------------------------------------------------------------===//
57// Subtarget-specific SchedWrite types which map ProcResources and set latency.
58
59let SchedModel = CortexR52Model in {
60
61// ALU - Write occurs in Late EX2 (independent of whether shift was required)
62def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
66
67// Compares
68def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71
Javed Absarbb8dcc62017-02-02 21:08:12 +000072// Multiply - aliased to sub-target specific later
73
Javed Absarf043dac2016-11-15 11:34:54 +000074// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
Javed Absarbb8dcc62017-02-02 21:08:12 +000075def : WriteRes<WriteDIV, [R52UnitDiv]> {
Javed Absar3d594372017-03-27 20:46:37 +000076 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
Javed Absarf043dac2016-11-15 11:34:54 +000077}
78
Javed Absarf043dac2016-11-15 11:34:54 +000079// Branches - LR written in Late EX2
80def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
83
84// Misc
85def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
Javed Absarf043dac2016-11-15 11:34:54 +000086
Javed Absar00cce412017-01-23 20:20:39 +000087// Integer pipeline by-passes
Javed Absarf043dac2016-11-15 11:34:54 +000088def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
89def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
Javed Absarbb8dcc62017-02-02 21:08:12 +000090def : ReadAdvance<ReadMUL, 0>;
91def : ReadAdvance<ReadMAC, 0>;
Javed Absarf043dac2016-11-15 11:34:54 +000092
Javed Absar00cce412017-01-23 20:20:39 +000093// Floating-point. Map target-defined SchedReadWrites to subtarget
94def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95
96def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
97 let Latency = 6;
98}
99
100def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101 let Latency = 11; // as it is internally two insns (MUL then ADD)
102}
103
104def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105 R52UnitFPALU, R52UnitFPALU]> {
106 let Latency = 11;
107}
108
109def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110 let Latency = 7; // FP div takes fixed #cycles
111 let ResourceCycles = [7]; // is not pipelined
112}
113
114def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
115 let Latency = 17;
116 let ResourceCycles = [17];
117}
118
119def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121
Javed Absara32e3a12017-05-24 05:32:48 +0000122// Overriden via InstRW for this processor.
123def : WriteRes<WriteVST1, []>;
124def : WriteRes<WriteVST2, []>;
125def : WriteRes<WriteVST3, []>;
126def : WriteRes<WriteVST4, []>;
127
Javed Absar00cce412017-01-23 20:20:39 +0000128def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130
Javed Absarf043dac2016-11-15 11:34:54 +0000131//===----------------------------------------------------------------------===//
132// Subtarget-specific SchedReadWrites.
133
134// Forwarding information - based on when an operand is read
135def : ReadAdvance<R52Read_ISS, 0>;
136def : ReadAdvance<R52Read_EX1, 1>;
137def : ReadAdvance<R52Read_EX2, 2>;
138def : ReadAdvance<R52Read_F0, 0>;
139def : ReadAdvance<R52Read_F1, 1>;
140def : ReadAdvance<R52Read_F2, 2>;
141
142
143// Cortex-R52 specific SchedWrites for use with InstRW
144def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
Javed Absarbb8dcc62017-02-02 21:08:12 +0000145def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
146 let Latency = 4; let NumMicroOps = 0;
147}
Javed Absarf043dac2016-11-15 11:34:54 +0000148def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
149 let Latency = 8; let ResourceCycles = [8]; // not pipelined
150}
151def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
154def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
155def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158
159def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161
Javed Absarbb8dcc62017-02-02 21:08:12 +0000162// Alias generics to sub-target specific
163def : SchedAlias<WriteMUL16, R52WriteMAC>;
164def : SchedAlias<WriteMUL32, R52WriteMAC>;
165def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167def : SchedAlias<WriteMAC16, R52WriteMAC>;
168def : SchedAlias<WriteMAC32, R52WriteMAC>;
169def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
Javed Absarb6727222017-02-22 07:22:57 +0000171def : SchedAlias<WritePreLd, R52WriteLd>;
172def : SchedAlias<WriteLd, R52WriteLd>;
173def : SchedAlias<WriteST, R52WriteST>;
Javed Absarbb8dcc62017-02-02 21:08:12 +0000174
Javed Absarf043dac2016-11-15 11:34:54 +0000175def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
177 let Latency = 4;
178}
179def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
181 let Latency = 5;
182}
183def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
185 let Latency = 6;
186}
187def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
189 let Latency = 6;
190}
191def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192 let Latency = 11; // as it is internally two insns (MUL then ADD)
193}
194def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195 R52UnitFPALU, R52UnitFPALU]> {
196 let Latency = 11;
197}
198
199def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201
Javed Absar00cce412017-01-23 20:20:39 +0000202//===----------------------------------------------------------------------===//
203// Floating-point. Map target defined SchedReadWrites to processor specific ones
204//
205def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
206def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
Javed Absarf043dac2016-11-15 11:34:54 +0000209
210//===----------------------------------------------------------------------===//
Javed Absar00cce412017-01-23 20:20:39 +0000211// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212//
Javed Absarf043dac2016-11-15 11:34:54 +0000213def : InstRW<[WriteALU], (instrs COPY)>;
214
215def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218
219def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
220 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
221 "t2MOVi", "t2MOV_ga_dyn")>;
222def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
Craig Topper7ccb5eb2018-03-22 17:17:47 +0000223 (instregex "MOV_ga_pcrel$", "t2MOV_ga_pcrel$")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000224def : InstRW<[R52WriteLd,R52Read_ISS],
225 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
226
227def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
228
229def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
230 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
231 "(t|t2)UBFX", "(t|t2)SBFX")>;
232
233// Saturating arithmetic
234def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
235 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
236 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
237 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
238 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
239 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
240 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
241
242// Parallel arithmetic
243def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
244 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
245 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
246 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
247 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
248
249// Flag setting.
250def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
251 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
252 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
253 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
254 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
255 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
256 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
257
258// Sum of Absolute Difference
259def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
260 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
261
262// Integer Multiply
263def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
264 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
265 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
266 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
267 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
268
269// Multiply Accumulate
270// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
271// The store pipeline is used partly for 64-bit operations.
272def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
273 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
274 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
275 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
276 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
277 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
278 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
279 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
280 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
281 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
282 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
283 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
284 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
285 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
286
287def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
Javed Absarbb8dcc62017-02-02 21:08:12 +0000288 (instregex "t2SDIV", "t2UDIV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000289
290// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
291// However, that's non-trivial to specify, so we keep it uniform
292def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
293 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
294 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
295 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
296 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
297 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
298 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
299def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
300 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
301 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
302 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
303 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
304 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
305 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
306
307def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
308def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
309
310def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
311 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
312 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
313 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
314
315def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
316 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
317 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
318
319def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
320 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
321 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
322
323def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
324 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
325 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
326
327def : InstRW<[R52WriteALU_EX1],
328 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
329
330def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
331def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
332 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
333
334def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
335def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
336def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
337def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
338
339def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
340 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
341
342def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
343
344def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
345def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
346
Javed Absarf043dac2016-11-15 11:34:54 +0000347// Integer Load, Multiple.
348foreach Lat = 3-25 in {
349 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
350 let Latency = Lat;
351 }
352 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
353 let Latency = Lat;
354 let NumMicroOps = 0;
355 }
356}
357foreach NAddr = 1-16 in {
358 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
359}
360def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
361def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
362def R52WriteILDM : SchedWriteVariant<[
363 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
364
365 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366 R52WriteILDM6Cy]>,
367 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
368 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
369
370 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
371 R52WriteILDM6Cy, R52WriteILDM7Cy,
372 R52WriteILDM8Cy]>,
373 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
374 R52WriteILDM6Cy, R52WriteILDM7Cy,
375 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
376
377 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
378 R52WriteILDM6Cy, R52WriteILDM7Cy,
379 R52WriteILDM8Cy, R52WriteILDM9Cy,
380 R52WriteILDM10Cy]>,
381 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
382 R52WriteILDM6Cy, R52WriteILDM7Cy,
383 R52WriteILDM8Cy, R52WriteILDM9Cy,
384 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
385
386 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
387 R52WriteILDM6Cy, R52WriteILDM7Cy,
388 R52WriteILDM8Cy, R52WriteILDM9Cy,
389 R52WriteILDM10Cy, R52WriteILDM11Cy,
390 R52WriteILDM12Cy]>,
391 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
392 R52WriteILDM6Cy, R52WriteILDM7Cy,
393 R52WriteILDM8Cy, R52WriteILDM9Cy,
394 R52WriteILDM10Cy, R52WriteILDM11Cy,
395 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
396
397 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
398 R52WriteILDM6Cy, R52WriteILDM7Cy,
399 R52WriteILDM8Cy, R52WriteILDM9Cy,
400 R52WriteILDM10Cy, R52WriteILDM11Cy,
401 R52WriteILDM12Cy, R52WriteILDM13Cy,
402 R52WriteILDM14Cy]>,
403 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
404 R52WriteILDM6Cy, R52WriteILDM7Cy,
405 R52WriteILDM8Cy, R52WriteILDM9Cy,
406 R52WriteILDM10Cy, R52WriteILDM11Cy,
407 R52WriteILDM12Cy, R52WriteILDM13Cy,
408 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
409
410 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
411 R52WriteILDM6Cy, R52WriteILDM7Cy,
412 R52WriteILDM8Cy, R52WriteILDM9Cy,
413 R52WriteILDM10Cy, R52WriteILDM11Cy,
414 R52WriteILDM12Cy, R52WriteILDM13Cy,
415 R52WriteILDM14Cy, R52WriteILDM15Cy,
416 R52WriteILDM16Cy]>,
417 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
418 R52WriteILDM6Cy, R52WriteILDM7Cy,
419 R52WriteILDM8Cy, R52WriteILDM9Cy,
420 R52WriteILDM10Cy, R52WriteILDM11Cy,
421 R52WriteILDM12Cy, R52WriteILDM13Cy,
422 R52WriteILDM14Cy, R52WriteILDM15Cy,
423 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
424
425 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
426 R52WriteILDM6Cy, R52WriteILDM7Cy,
427 R52WriteILDM8Cy, R52WriteILDM9Cy,
428 R52WriteILDM10Cy, R52WriteILDM11Cy,
429 R52WriteILDM12Cy, R52WriteILDM13Cy,
430 R52WriteILDM14Cy, R52WriteILDM15Cy,
431 R52WriteILDM16Cy, R52WriteILDM17Cy,
432 R52WriteILDM18Cy]>,
433 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
434 R52WriteILDM6Cy, R52WriteILDM7Cy,
435 R52WriteILDM8Cy, R52WriteILDM9Cy,
436 R52WriteILDM10Cy, R52WriteILDM11Cy,
437 R52WriteILDM12Cy, R52WriteILDM13Cy,
438 R52WriteILDM14Cy, R52WriteILDM15Cy,
439 R52WriteILDM16Cy, R52WriteILDM17Cy,
440 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
441
442// Unknown number of registers, just use resources for two registers.
443 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
444 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
445 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
446 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
447 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
448 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
449 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
450 R52WriteILDM18Cy, R52WriteILDM19Cy]>
451]> { let Variadic=1; }
452
453// Integer Store, Multiple
454def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
455 let Latency = 4;
456 let NumMicroOps = 2;
457}
458foreach NumAddr = 1-16 in {
459 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
460}
461def R52WriteISTM : SchedWriteVariant<[
462 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
463 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
464 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
465 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
466 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
467 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
468 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
469 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
470 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
471 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
472 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
473 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
474 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
475 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
476 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
477 // Unknow number of registers, just use resources for two registers.
478 SchedVar<NoSchedPred, [R52WriteISTM2]>
479]>;
480
481def : InstRW<[R52WriteILDM, R52Read_ISS],
482 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
483 "(t|sys)LDM(IA|DA|DB|IB)$")>;
484def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
486def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
487 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
488
489// Integer Store, Single Element
490def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
491 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
492 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
493 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
494
495def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
496 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
497 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
498 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
499 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
500
501// Integer Store, Dual
502def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
503 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
504def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
505 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
506
507def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
508 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
509def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
510 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
511 "PUSH", "tPUSH")>;
512
513// LDRLIT pseudo instructions, they expand to LDR + PICADD
514def : InstRW<[R52WriteLd],
Craig Topper7ccb5eb2018-03-22 17:17:47 +0000515 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000516// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
517def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
518
519
520
521//===----------------------------------------------------------------------===//
522// VFP, Floating Point Support
523def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
524def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
525
526def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
527def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
528def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
529
530def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
531def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
532
Craig Topper7ccb5eb2018-03-22 17:17:47 +0000533def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000534def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
535
Javed Absarf043dac2016-11-15 11:34:54 +0000536def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
537def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
538
539
540//===----------------------------------------------------------------------===//
541// Neon Support
542
543// vector multiple load stores
544foreach NumAddr = 1-16 in {
545 def R52LMAddrPred#NumAddr :
546 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
547}
548foreach Lat = 1-32 in {
549 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
550 let Latency = Lat;
551 }
552}
553foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
554 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
555 let Latency = 0;
556 let NumMicroOps = Num;
557 let ResourceCycles = [Num];
558 }
559}
560def R52WriteVLDM : SchedWriteVariant<[
561 // 1 D reg
562 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
563 R52ReserveLd5Cy]>,
564 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
565 R52ReserveLd5Cy]>,
566
567 // 2 D reg
568 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
569 R52ReserveLd6Cy]>,
570 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
571 R52ReserveLd6Cy]>,
572
573 // 3 D reg
574 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
575 R52WriteLM7Cy,
576 R52ReserveLd4Cy]>,
577 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
578 R52WriteLM7Cy,
579 R52ReserveLd7Cy]>,
580
581 // 4 D reg
582 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
583 R52WriteLM7Cy, R52WriteLM8Cy,
584 R52ReserveLd8Cy]>,
585 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
586 R52WriteLM7Cy, R52WriteLM8Cy,
587 R52ReserveLd8Cy]>,
588
589 // 5 D reg
590 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
591 R52WriteLM7Cy, R52WriteLM8Cy,
592 R52WriteLM9Cy,
593 R52ReserveLd9Cy]>,
594 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
595 R52WriteLM7Cy, R52WriteLM8Cy,
596 R52WriteLM9Cy,
597 R52ReserveLd9Cy]>,
598
599 // 6 D reg
600 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
601 R52WriteLM7Cy, R52WriteLM8Cy,
602 R52WriteLM9Cy, R52WriteLM10Cy,
603 R52ReserveLd10Cy]>,
604 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
605 R52WriteLM7Cy, R52WriteLM8Cy,
606 R52WriteLM9Cy, R52WriteLM10Cy,
607 R52ReserveLd10Cy]>,
608
609 // 7 D reg
610 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
611 R52WriteLM7Cy, R52WriteLM8Cy,
612 R52WriteLM9Cy, R52WriteLM10Cy,
613 R52WriteLM11Cy,
614 R52ReserveLd11Cy]>,
615 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
616 R52WriteLM7Cy, R52WriteLM8Cy,
617 R52WriteLM9Cy, R52WriteLM10Cy,
618 R52WriteLM11Cy,
619 R52ReserveLd11Cy]>,
620
621 // 8 D reg
622 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
623 R52WriteLM7Cy, R52WriteLM8Cy,
624 R52WriteLM9Cy, R52WriteLM10Cy,
625 R52WriteLM11Cy, R52WriteLM12Cy,
626 R52ReserveLd12Cy]>,
627 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
628 R52WriteLM7Cy, R52WriteLM8Cy,
629 R52WriteLM9Cy, R52WriteLM10Cy,
630 R52WriteLM11Cy, R52WriteLM12Cy,
631 R52ReserveLd12Cy]>,
632 // unknown number of reg.
633 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
634 R52WriteLM7Cy, R52WriteLM8Cy,
635 R52WriteLM9Cy, R52WriteLM10Cy,
636 R52WriteLM11Cy, R52WriteLM12Cy,
637 R52ReserveLd5Cy]>
638]> { let Variadic=1;}
639
640// variable stores. Cannot dual-issue
641def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
642 let Latency = 5;
643 let NumMicroOps = 2;
644 let ResourceCycles = [1];
645}
646def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
647 let Latency = 6;
648 let NumMicroOps = 4;
649 let ResourceCycles = [2];
650}
651def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
652 let Latency = 7;
653 let NumMicroOps = 6;
654 let ResourceCycles = [3];
655}
656def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
657 let Latency = 8;
658 let NumMicroOps = 8;
659 let ResourceCycles = [4];
660}
661def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
662 let Latency = 9;
663 let NumMicroOps = 10;
664 let ResourceCycles = [5];
665}
666def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
667 let Latency = 10;
668 let NumMicroOps = 12;
669 let ResourceCycles = [6];
670}
671def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
672 let Latency = 11;
673 let NumMicroOps = 14;
674 let ResourceCycles = [7];
675}
676def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
677 let Latency = 12;
678 let NumMicroOps = 16;
679 let ResourceCycles = [8];
680}
681def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
682 let Latency = 13;
683 let NumMicroOps = 18;
684 let ResourceCycles = [9];
685}
686def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
687 let Latency = 14;
688 let NumMicroOps = 20;
689 let ResourceCycles = [10];
690}
691def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
692 let Latency = 15;
693 let NumMicroOps = 22;
694 let ResourceCycles = [11];
695}
696
697def R52WriteSTM : SchedWriteVariant<[
698 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
699 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
700 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
701 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
702 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
703 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
704 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
705 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
706 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
707 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
708 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
709 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
710 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
711 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
712 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
713 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
714 // unknown number of registers, just use resources for two
715 SchedVar<NoSchedPred, [R52WriteSTM6]>
716]>;
717
718// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
719// another instruction in slot-1, but only in the last issue.
Javed Absara32e3a12017-05-24 05:32:48 +0000720def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
721def : WriteRes<WriteVLD2, [R52UnitLd]> {
Javed Absarf043dac2016-11-15 11:34:54 +0000722 let Latency = 6;
723 let NumMicroOps = 3;
724 let ResourceCycles = [2];
Javed Absar3d594372017-03-27 20:46:37 +0000725 let SingleIssue = 1;
Javed Absarf043dac2016-11-15 11:34:54 +0000726}
Javed Absara32e3a12017-05-24 05:32:48 +0000727def : WriteRes<WriteVLD3, [R52UnitLd]> {
Javed Absarf043dac2016-11-15 11:34:54 +0000728 let Latency = 7;
729 let NumMicroOps = 5;
730 let ResourceCycles = [3];
Javed Absar3d594372017-03-27 20:46:37 +0000731 let SingleIssue = 1;
Javed Absarf043dac2016-11-15 11:34:54 +0000732}
Javed Absara32e3a12017-05-24 05:32:48 +0000733def : WriteRes<WriteVLD4, [R52UnitLd]> {
Javed Absarf043dac2016-11-15 11:34:54 +0000734 let Latency = 8;
735 let NumMicroOps = 7;
736 let ResourceCycles = [4];
Javed Absar3d594372017-03-27 20:46:37 +0000737 let SingleIssue = 1;
Javed Absarf043dac2016-11-15 11:34:54 +0000738}
739def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
740 let Latency = 5;
741 let NumMicroOps = 1;
742 let ResourceCycles = [1];
743}
744def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
745 let Latency = 6;
746 let NumMicroOps = 3;
747 let ResourceCycles = [2];
748}
749def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
750 let Latency = 7;
751 let NumMicroOps = 5;
752 let ResourceCycles = [3];
753}
754def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
755 let Latency = 8;
756 let NumMicroOps = 7;
757 let ResourceCycles = [4];
758}
759def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
760 let Latency = 9;
761 let NumMicroOps = 9;
762 let ResourceCycles = [5];
763}
764
765
766def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
767def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
768def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
769
770def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
771def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
772def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
773
774def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
775
776def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
777 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
778def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
779 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
780def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
781 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
782
783def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
784 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
785
786def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
787def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
788
789def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
790def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
791
792def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
793def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
794
Javed Absarf043dac2016-11-15 11:34:54 +0000795def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
796 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
797def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
798 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
799
800def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
801def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
802def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
803def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
804
805def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
806def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
807
808def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
809def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
810
811def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
812def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
813
814def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
815def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
Javed Absar00cce412017-01-23 20:20:39 +0000816def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000817def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000818def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
819def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
820def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
821def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
822def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
823def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
824 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
825def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
826 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
827def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
828def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
829def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
830 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
831def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
832def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
833
834//---
Javed Absarf043dac2016-11-15 11:34:54 +0000835// VSTx. Vector Stores
836//---
837// 1-element structure store
838def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
839def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
840def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
841def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
842def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
843def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
844
845def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
846def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
847def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
848
849def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
850def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
851def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
852def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
853def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
854def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
855
856def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
857def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
858def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
859
860// 2-element structure store
861def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
862def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
863def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
864
865def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
866def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
867def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
868def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
869def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
870def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
871
872def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
873def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
874def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
875
876def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
877def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
878def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
879def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
880def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
881def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
882
883// 3-element structure store
884def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
885def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
886def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
887
888def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
889def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
890def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
891def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
892def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
893def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
894
895def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
896def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
897def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
898
899def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
900def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
901def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
902def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
903def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
904def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
905
906// 4-element structure store
907def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
908def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
909def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
910
911def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
912def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
913def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
914def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
915def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
916def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
917
918def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
919def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
920def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
921
922def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
923def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
924def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
925def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
926def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
927def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
928
929} // R52 SchedModel