blob: 59949344399cd3a780ce1123943dfac139e0ef0e [file] [log] [blame]
Javed Absarf043dac2016-11-15 11:34:54 +00001//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18// A number of forwarding paths enable results of computations to be input
19// to subsequent operations before they are written to registers.
20// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
27 let PostRAScheduler = 1; // Enable PostRA scheduler pass.
28 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
29}
30
31
32//===----------------------------------------------------------------------===//
33// Define each kind of processor resource and number available.
34
35// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
36// Cortex-R52 is an in-order processor.
37
38def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
39def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
40def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
41def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
42def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
43def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
44def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
45def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
46
47// Cortex-R52 specific SchedReads
48def R52Read_ISS : SchedRead;
49def R52Read_EX1 : SchedRead;
50def R52Read_EX2 : SchedRead;
51def R52Read_WRI : SchedRead;
52def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
53def R52Read_F1 : SchedRead;
54def R52Read_F2 : SchedRead;
55
56
57//===----------------------------------------------------------------------===//
58// Subtarget-specific SchedWrite types which map ProcResources and set latency.
59
60let SchedModel = CortexR52Model in {
61
62// ALU - Write occurs in Late EX2 (independent of whether shift was required)
63def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
65def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
66def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
67
68// Compares
69def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
70def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
71def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72
Javed Absarbb8dcc62017-02-02 21:08:12 +000073// Multiply - aliased to sub-target specific later
74
Javed Absarf043dac2016-11-15 11:34:54 +000075// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
Javed Absarbb8dcc62017-02-02 21:08:12 +000076def : WriteRes<WriteDIV, [R52UnitDiv]> {
Javed Absarf043dac2016-11-15 11:34:54 +000077 let Latency = 8; let ResourceCycles = [8]; // not pipelined
78}
79
Javed Absarf043dac2016-11-15 11:34:54 +000080// Branches - LR written in Late EX2
81def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
82def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
83def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
84
85// Misc
86def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
Javed Absarf043dac2016-11-15 11:34:54 +000087
Javed Absar00cce412017-01-23 20:20:39 +000088// Integer pipeline by-passes
Javed Absarf043dac2016-11-15 11:34:54 +000089def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
90def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
Javed Absarbb8dcc62017-02-02 21:08:12 +000091def : ReadAdvance<ReadMUL, 0>;
92def : ReadAdvance<ReadMAC, 0>;
Javed Absarf043dac2016-11-15 11:34:54 +000093
Javed Absar00cce412017-01-23 20:20:39 +000094// Floating-point. Map target-defined SchedReadWrites to subtarget
95def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
96
97def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
98 let Latency = 6;
99}
100
101def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
102 let Latency = 11; // as it is internally two insns (MUL then ADD)
103}
104
105def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
106 R52UnitFPALU, R52UnitFPALU]> {
107 let Latency = 11;
108}
109
110def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
111 let Latency = 7; // FP div takes fixed #cycles
112 let ResourceCycles = [7]; // is not pipelined
113}
114
115def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
116 let Latency = 17;
117 let ResourceCycles = [17];
118}
119
120def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
121def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
122
123def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
124def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
125
Javed Absarf043dac2016-11-15 11:34:54 +0000126//===----------------------------------------------------------------------===//
127// Subtarget-specific SchedReadWrites.
128
129// Forwarding information - based on when an operand is read
130def : ReadAdvance<R52Read_ISS, 0>;
131def : ReadAdvance<R52Read_EX1, 1>;
132def : ReadAdvance<R52Read_EX2, 2>;
133def : ReadAdvance<R52Read_F0, 0>;
134def : ReadAdvance<R52Read_F1, 1>;
135def : ReadAdvance<R52Read_F2, 2>;
136
137
138// Cortex-R52 specific SchedWrites for use with InstRW
139def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
Javed Absarbb8dcc62017-02-02 21:08:12 +0000140def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
141 let Latency = 4; let NumMicroOps = 0;
142}
Javed Absarf043dac2016-11-15 11:34:54 +0000143def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
144 let Latency = 8; let ResourceCycles = [8]; // not pipelined
145}
146def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
147def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
148def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
149def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
150def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
151def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
152def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
153
154def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
155def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
156
Javed Absarbb8dcc62017-02-02 21:08:12 +0000157// Alias generics to sub-target specific
158def : SchedAlias<WriteMUL16, R52WriteMAC>;
159def : SchedAlias<WriteMUL32, R52WriteMAC>;
160def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
161def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
162def : SchedAlias<WriteMAC16, R52WriteMAC>;
163def : SchedAlias<WriteMAC32, R52WriteMAC>;
164def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
165def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
Javed Absarb6727222017-02-22 07:22:57 +0000166def : SchedAlias<WritePreLd, R52WriteLd>;
167def : SchedAlias<WriteLd, R52WriteLd>;
168def : SchedAlias<WriteST, R52WriteST>;
Javed Absarbb8dcc62017-02-02 21:08:12 +0000169
Javed Absarf043dac2016-11-15 11:34:54 +0000170def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
171def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
172 let Latency = 4;
173}
174def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
175def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
176 let Latency = 5;
177}
178def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
179def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
180 let Latency = 6;
181}
182def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
183def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
184 let Latency = 6;
185}
186def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
187 let Latency = 11; // as it is internally two insns (MUL then ADD)
188}
189def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
190 R52UnitFPALU, R52UnitFPALU]> {
191 let Latency = 11;
192}
193
194def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
195def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
196
Javed Absar00cce412017-01-23 20:20:39 +0000197//===----------------------------------------------------------------------===//
198// Floating-point. Map target defined SchedReadWrites to processor specific ones
199//
200def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
201def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
202def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
203def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
Javed Absarf043dac2016-11-15 11:34:54 +0000204
205//===----------------------------------------------------------------------===//
Javed Absar00cce412017-01-23 20:20:39 +0000206// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
207//
Javed Absarf043dac2016-11-15 11:34:54 +0000208def : InstRW<[WriteALU], (instrs COPY)>;
209
210def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
211 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
212 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
213
214def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
215 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
216 "t2MOVi", "t2MOV_ga_dyn")>;
217def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
218 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
219def : InstRW<[R52WriteLd,R52Read_ISS],
220 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
221
222def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
223
224def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
225 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
226 "(t|t2)UBFX", "(t|t2)SBFX")>;
227
228// Saturating arithmetic
229def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
230 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
231 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
232 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
233 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
234 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
235 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
236
237// Parallel arithmetic
238def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
239 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
240 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
241 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
242 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
243
244// Flag setting.
245def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
246 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
247 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
248 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
249 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
250 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
251 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
252
253// Sum of Absolute Difference
254def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
255 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
256
257// Integer Multiply
258def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
259 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
260 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
261 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
262 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
263
264// Multiply Accumulate
265// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
266// The store pipeline is used partly for 64-bit operations.
267def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
268 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
269 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
270 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
271 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
272 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
273 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
274 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
275 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
276 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
277 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
278 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
279 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
280 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
281
282def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
Javed Absarbb8dcc62017-02-02 21:08:12 +0000283 (instregex "t2SDIV", "t2UDIV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000284
285// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
286// However, that's non-trivial to specify, so we keep it uniform
287def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
288 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
289 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
290 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
291 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
292 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
293 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
294def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
295 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
296 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
297 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
298 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
299 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
300 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
301
302def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
303def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
304
305def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
306 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
307 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
308 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
309
310def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
311 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
312 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
313
314def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
315 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
316 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
317
318def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
319 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
320 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
321
322def : InstRW<[R52WriteALU_EX1],
323 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
324
325def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
326def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
327 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
328
329def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
330def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
331def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
332def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
333
334def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
335 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
336
337def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
338
339def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
340def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
341
Javed Absarf043dac2016-11-15 11:34:54 +0000342// Integer Load, Multiple.
343foreach Lat = 3-25 in {
344 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
345 let Latency = Lat;
346 }
347 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
348 let Latency = Lat;
349 let NumMicroOps = 0;
350 }
351}
352foreach NAddr = 1-16 in {
353 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
354}
355def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
356def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
357def R52WriteILDM : SchedWriteVariant<[
358 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
359
360 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
361 R52WriteILDM6Cy]>,
362 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
363 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
364
365 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366 R52WriteILDM6Cy, R52WriteILDM7Cy,
367 R52WriteILDM8Cy]>,
368 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369 R52WriteILDM6Cy, R52WriteILDM7Cy,
370 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
371
372 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
373 R52WriteILDM6Cy, R52WriteILDM7Cy,
374 R52WriteILDM8Cy, R52WriteILDM9Cy,
375 R52WriteILDM10Cy]>,
376 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
377 R52WriteILDM6Cy, R52WriteILDM7Cy,
378 R52WriteILDM8Cy, R52WriteILDM9Cy,
379 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
380
381 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
382 R52WriteILDM6Cy, R52WriteILDM7Cy,
383 R52WriteILDM8Cy, R52WriteILDM9Cy,
384 R52WriteILDM10Cy, R52WriteILDM11Cy,
385 R52WriteILDM12Cy]>,
386 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
387 R52WriteILDM6Cy, R52WriteILDM7Cy,
388 R52WriteILDM8Cy, R52WriteILDM9Cy,
389 R52WriteILDM10Cy, R52WriteILDM11Cy,
390 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
391
392 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
393 R52WriteILDM6Cy, R52WriteILDM7Cy,
394 R52WriteILDM8Cy, R52WriteILDM9Cy,
395 R52WriteILDM10Cy, R52WriteILDM11Cy,
396 R52WriteILDM12Cy, R52WriteILDM13Cy,
397 R52WriteILDM14Cy]>,
398 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
399 R52WriteILDM6Cy, R52WriteILDM7Cy,
400 R52WriteILDM8Cy, R52WriteILDM9Cy,
401 R52WriteILDM10Cy, R52WriteILDM11Cy,
402 R52WriteILDM12Cy, R52WriteILDM13Cy,
403 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
404
405 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
406 R52WriteILDM6Cy, R52WriteILDM7Cy,
407 R52WriteILDM8Cy, R52WriteILDM9Cy,
408 R52WriteILDM10Cy, R52WriteILDM11Cy,
409 R52WriteILDM12Cy, R52WriteILDM13Cy,
410 R52WriteILDM14Cy, R52WriteILDM15Cy,
411 R52WriteILDM16Cy]>,
412 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
413 R52WriteILDM6Cy, R52WriteILDM7Cy,
414 R52WriteILDM8Cy, R52WriteILDM9Cy,
415 R52WriteILDM10Cy, R52WriteILDM11Cy,
416 R52WriteILDM12Cy, R52WriteILDM13Cy,
417 R52WriteILDM14Cy, R52WriteILDM15Cy,
418 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
419
420 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
421 R52WriteILDM6Cy, R52WriteILDM7Cy,
422 R52WriteILDM8Cy, R52WriteILDM9Cy,
423 R52WriteILDM10Cy, R52WriteILDM11Cy,
424 R52WriteILDM12Cy, R52WriteILDM13Cy,
425 R52WriteILDM14Cy, R52WriteILDM15Cy,
426 R52WriteILDM16Cy, R52WriteILDM17Cy,
427 R52WriteILDM18Cy]>,
428 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
429 R52WriteILDM6Cy, R52WriteILDM7Cy,
430 R52WriteILDM8Cy, R52WriteILDM9Cy,
431 R52WriteILDM10Cy, R52WriteILDM11Cy,
432 R52WriteILDM12Cy, R52WriteILDM13Cy,
433 R52WriteILDM14Cy, R52WriteILDM15Cy,
434 R52WriteILDM16Cy, R52WriteILDM17Cy,
435 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
436
437// Unknown number of registers, just use resources for two registers.
438 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
439 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
440 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
441 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
442 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
443 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
444 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
445 R52WriteILDM18Cy, R52WriteILDM19Cy]>
446]> { let Variadic=1; }
447
448// Integer Store, Multiple
449def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
450 let Latency = 4;
451 let NumMicroOps = 2;
452}
453foreach NumAddr = 1-16 in {
454 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
455}
456def R52WriteISTM : SchedWriteVariant<[
457 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
458 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
459 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
460 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
461 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
462 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
463 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
464 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
465 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
466 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
467 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
468 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
469 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
470 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
471 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
472 // Unknow number of registers, just use resources for two registers.
473 SchedVar<NoSchedPred, [R52WriteISTM2]>
474]>;
475
476def : InstRW<[R52WriteILDM, R52Read_ISS],
477 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
478 "(t|sys)LDM(IA|DA|DB|IB)$")>;
479def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
480 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
481def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
482 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
483
484// Integer Store, Single Element
485def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
486 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
487 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
488 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
489
490def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
491 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
492 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
493 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
494 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
495
496// Integer Store, Dual
497def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
498 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
499def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
500 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
501
502def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
503 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
504def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
505 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
506 "PUSH", "tPUSH")>;
507
508// LDRLIT pseudo instructions, they expand to LDR + PICADD
509def : InstRW<[R52WriteLd],
510 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
511// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
512def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
513
514
515
516//===----------------------------------------------------------------------===//
517// VFP, Floating Point Support
518def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
519def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
520
521def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
522def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
523def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
524
525def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
526def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
527
528def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
529def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
530
Javed Absarf043dac2016-11-15 11:34:54 +0000531def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
532def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
533
534
535//===----------------------------------------------------------------------===//
536// Neon Support
537
538// vector multiple load stores
539foreach NumAddr = 1-16 in {
540 def R52LMAddrPred#NumAddr :
541 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
542}
543foreach Lat = 1-32 in {
544 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
545 let Latency = Lat;
546 }
547}
548foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
549 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
550 let Latency = 0;
551 let NumMicroOps = Num;
552 let ResourceCycles = [Num];
553 }
554}
555def R52WriteVLDM : SchedWriteVariant<[
556 // 1 D reg
557 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
558 R52ReserveLd5Cy]>,
559 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
560 R52ReserveLd5Cy]>,
561
562 // 2 D reg
563 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
564 R52ReserveLd6Cy]>,
565 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
566 R52ReserveLd6Cy]>,
567
568 // 3 D reg
569 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
570 R52WriteLM7Cy,
571 R52ReserveLd4Cy]>,
572 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
573 R52WriteLM7Cy,
574 R52ReserveLd7Cy]>,
575
576 // 4 D reg
577 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
578 R52WriteLM7Cy, R52WriteLM8Cy,
579 R52ReserveLd8Cy]>,
580 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
581 R52WriteLM7Cy, R52WriteLM8Cy,
582 R52ReserveLd8Cy]>,
583
584 // 5 D reg
585 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
586 R52WriteLM7Cy, R52WriteLM8Cy,
587 R52WriteLM9Cy,
588 R52ReserveLd9Cy]>,
589 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
590 R52WriteLM7Cy, R52WriteLM8Cy,
591 R52WriteLM9Cy,
592 R52ReserveLd9Cy]>,
593
594 // 6 D reg
595 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
596 R52WriteLM7Cy, R52WriteLM8Cy,
597 R52WriteLM9Cy, R52WriteLM10Cy,
598 R52ReserveLd10Cy]>,
599 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
600 R52WriteLM7Cy, R52WriteLM8Cy,
601 R52WriteLM9Cy, R52WriteLM10Cy,
602 R52ReserveLd10Cy]>,
603
604 // 7 D reg
605 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
606 R52WriteLM7Cy, R52WriteLM8Cy,
607 R52WriteLM9Cy, R52WriteLM10Cy,
608 R52WriteLM11Cy,
609 R52ReserveLd11Cy]>,
610 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
611 R52WriteLM7Cy, R52WriteLM8Cy,
612 R52WriteLM9Cy, R52WriteLM10Cy,
613 R52WriteLM11Cy,
614 R52ReserveLd11Cy]>,
615
616 // 8 D reg
617 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
618 R52WriteLM7Cy, R52WriteLM8Cy,
619 R52WriteLM9Cy, R52WriteLM10Cy,
620 R52WriteLM11Cy, R52WriteLM12Cy,
621 R52ReserveLd12Cy]>,
622 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
623 R52WriteLM7Cy, R52WriteLM8Cy,
624 R52WriteLM9Cy, R52WriteLM10Cy,
625 R52WriteLM11Cy, R52WriteLM12Cy,
626 R52ReserveLd12Cy]>,
627 // unknown number of reg.
628 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
629 R52WriteLM7Cy, R52WriteLM8Cy,
630 R52WriteLM9Cy, R52WriteLM10Cy,
631 R52WriteLM11Cy, R52WriteLM12Cy,
632 R52ReserveLd5Cy]>
633]> { let Variadic=1;}
634
635// variable stores. Cannot dual-issue
636def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
637 let Latency = 5;
638 let NumMicroOps = 2;
639 let ResourceCycles = [1];
640}
641def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
642 let Latency = 6;
643 let NumMicroOps = 4;
644 let ResourceCycles = [2];
645}
646def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
647 let Latency = 7;
648 let NumMicroOps = 6;
649 let ResourceCycles = [3];
650}
651def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
652 let Latency = 8;
653 let NumMicroOps = 8;
654 let ResourceCycles = [4];
655}
656def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
657 let Latency = 9;
658 let NumMicroOps = 10;
659 let ResourceCycles = [5];
660}
661def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
662 let Latency = 10;
663 let NumMicroOps = 12;
664 let ResourceCycles = [6];
665}
666def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
667 let Latency = 11;
668 let NumMicroOps = 14;
669 let ResourceCycles = [7];
670}
671def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
672 let Latency = 12;
673 let NumMicroOps = 16;
674 let ResourceCycles = [8];
675}
676def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
677 let Latency = 13;
678 let NumMicroOps = 18;
679 let ResourceCycles = [9];
680}
681def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
682 let Latency = 14;
683 let NumMicroOps = 20;
684 let ResourceCycles = [10];
685}
686def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
687 let Latency = 15;
688 let NumMicroOps = 22;
689 let ResourceCycles = [11];
690}
691
692def R52WriteSTM : SchedWriteVariant<[
693 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
694 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
695 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
696 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
697 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
698 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
699 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
700 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
701 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
702 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
703 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
704 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
705 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
706 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
707 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
708 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
709 // unknown number of registers, just use resources for two
710 SchedVar<NoSchedPred, [R52WriteSTM6]>
711]>;
712
713// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
714// another instruction in slot-1, but only in the last issue.
715def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
716def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
717 let Latency = 6;
718 let NumMicroOps = 3;
719 let ResourceCycles = [2];
720}
721def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
722 let Latency = 7;
723 let NumMicroOps = 5;
724 let ResourceCycles = [3];
725}
726def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
727 let Latency = 8;
728 let NumMicroOps = 7;
729 let ResourceCycles = [4];
730}
731def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
732 let Latency = 5;
733 let NumMicroOps = 1;
734 let ResourceCycles = [1];
735}
736def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
737 let Latency = 6;
738 let NumMicroOps = 3;
739 let ResourceCycles = [2];
740}
741def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
742 let Latency = 7;
743 let NumMicroOps = 5;
744 let ResourceCycles = [3];
745}
746def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
747 let Latency = 8;
748 let NumMicroOps = 7;
749 let ResourceCycles = [4];
750}
751def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
752 let Latency = 9;
753 let NumMicroOps = 9;
754 let ResourceCycles = [5];
755}
756
757
758def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
759def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
760def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
761
762def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
763def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
764def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
765
766def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
767
768def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
769 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
770def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
771 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
772def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
773 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
774
775def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
776 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
777
778def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
779def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
780
781def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
782def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
783
784def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
785def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
786
787def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
788
789def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
790 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
791def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
792 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
793
794def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
795def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
796def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
797def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
798
799def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
800def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
801
802def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
803def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
804
805def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
806def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
807
808def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
809def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
Javed Absar00cce412017-01-23 20:20:39 +0000810def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000811def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000812def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
813def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
814def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
815def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
816def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
817def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
818 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
819def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
820 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
821def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
822def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
823def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
824 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
825def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
826def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
827
828//---
829// VLDx. Vector Loads
830//---
831// 1-element structure load
832def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
833def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
834def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
835def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
836def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
837def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
838
839def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
840def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
841def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
842
843def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
844def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
845def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
846def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
847def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
848def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
849
850def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
851def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
852def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
853def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
854
855// 2-element structure load
856def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
857def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
858def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
859def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
860def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
861def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
862
863def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
864def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
865def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
866def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
867def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
868def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
869def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
870def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
871
872def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
873def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
874
875def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
876def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
877
878def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
879def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
880def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
881def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
882
883// 3-element structure load
884def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
885def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
886def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
887def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
888def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
889def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
890
891def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
892def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
893def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
894
895def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
896def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
897def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
898def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
899
900// 4-element structure load
901def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
902def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
903def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
904def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
905def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
906def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
907
908
909def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
910def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
911def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
912def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
913def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
914def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
915def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
916
917//---
918// VSTx. Vector Stores
919//---
920// 1-element structure store
921def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
922def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
923def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
924def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
925def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
926def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
927
928def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
929def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
930def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
931
932def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
933def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
934def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
935def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
936def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
937def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
938
939def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
940def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
941def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
942
943// 2-element structure store
944def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
945def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
946def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
947
948def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
949def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
950def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
951def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
952def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
953def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
954
955def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
956def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
957def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
958
959def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
960def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
961def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
962def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
963def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
964def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
965
966// 3-element structure store
967def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
968def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
969def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
970
971def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
972def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
973def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
974def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
975def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
976def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
977
978def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
979def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
980def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
981
982def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
983def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
984def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
985def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
986def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
987def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
988
989// 4-element structure store
990def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
991def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
992def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
993
994def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
995def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
996def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
997def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
998def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
999def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
1000
1001def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
1002def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
1003def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
1004
1005def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
1006def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
1007def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
1008def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
1009def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
1010def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
1011
1012} // R52 SchedModel