blob: 3e684ed971301f303aa921d302961254eafb7fd8 [file] [log] [blame]
Javed Absarf043dac2016-11-15 11:34:54 +00001//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18// A number of forwarding paths enable results of computations to be input
19// to subsequent operations before they are written to registers.
20// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
27 let PostRAScheduler = 1; // Enable PostRA scheduler pass.
28 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
29}
30
31
32//===----------------------------------------------------------------------===//
33// Define each kind of processor resource and number available.
34
35// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
36// Cortex-R52 is an in-order processor.
37
38def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
39def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
40def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
41def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
42def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
43def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
44def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
45def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
46
47// Cortex-R52 specific SchedReads
48def R52Read_ISS : SchedRead;
49def R52Read_EX1 : SchedRead;
50def R52Read_EX2 : SchedRead;
51def R52Read_WRI : SchedRead;
52def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
53def R52Read_F1 : SchedRead;
54def R52Read_F2 : SchedRead;
55
56
57//===----------------------------------------------------------------------===//
58// Subtarget-specific SchedWrite types which map ProcResources and set latency.
59
60let SchedModel = CortexR52Model in {
61
62// ALU - Write occurs in Late EX2 (independent of whether shift was required)
63def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
65def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
66def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
67
68// Compares
69def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
70def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
71def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72
73// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74def : WriteRes<WriteDiv, [R52UnitDiv]> {
75 let Latency = 8; let ResourceCycles = [8]; // not pipelined
76}
77
78// Loads
79def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
80def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
81
82// Branches - LR written in Late EX2
83def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
84def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
85def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
86
87// Misc
88def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
Javed Absarf043dac2016-11-15 11:34:54 +000089
Javed Absar00cce412017-01-23 20:20:39 +000090// Integer pipeline by-passes
Javed Absarf043dac2016-11-15 11:34:54 +000091def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
92def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
93
94
Javed Absar00cce412017-01-23 20:20:39 +000095// Floating-point. Map target-defined SchedReadWrites to subtarget
96def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
97
98def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
99 let Latency = 6;
100}
101
102def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
103 let Latency = 11; // as it is internally two insns (MUL then ADD)
104}
105
106def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
107 R52UnitFPALU, R52UnitFPALU]> {
108 let Latency = 11;
109}
110
111def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
112 let Latency = 7; // FP div takes fixed #cycles
113 let ResourceCycles = [7]; // is not pipelined
114}
115
116def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
117 let Latency = 17;
118 let ResourceCycles = [17];
119}
120
121def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
122def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
123
124def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
125def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
126
127
Javed Absarf043dac2016-11-15 11:34:54 +0000128//===----------------------------------------------------------------------===//
129// Subtarget-specific SchedReadWrites.
130
131// Forwarding information - based on when an operand is read
132def : ReadAdvance<R52Read_ISS, 0>;
133def : ReadAdvance<R52Read_EX1, 1>;
134def : ReadAdvance<R52Read_EX2, 2>;
135def : ReadAdvance<R52Read_F0, 0>;
136def : ReadAdvance<R52Read_F1, 1>;
137def : ReadAdvance<R52Read_F2, 2>;
138
139
140// Cortex-R52 specific SchedWrites for use with InstRW
141def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
142def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
143 let Latency = 8; let ResourceCycles = [8]; // not pipelined
144}
145def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
146def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
147def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
148def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
149def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
150def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
151def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
152
153def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
154def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
155
156def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
157def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
158 let Latency = 4;
159}
160def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
161def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
162 let Latency = 5;
163}
164def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
165def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
166 let Latency = 6;
167}
168def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
169def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
170 let Latency = 6;
171}
172def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
173 let Latency = 11; // as it is internally two insns (MUL then ADD)
174}
175def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
176 R52UnitFPALU, R52UnitFPALU]> {
177 let Latency = 11;
178}
179
180def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
181def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
182
Javed Absar00cce412017-01-23 20:20:39 +0000183//===----------------------------------------------------------------------===//
184// Floating-point. Map target defined SchedReadWrites to processor specific ones
185//
186def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
187def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
188def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
189def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
Javed Absarf043dac2016-11-15 11:34:54 +0000190
191//===----------------------------------------------------------------------===//
Javed Absar00cce412017-01-23 20:20:39 +0000192// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
193//
Javed Absarf043dac2016-11-15 11:34:54 +0000194def : InstRW<[WriteALU], (instrs COPY)>;
195
196def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
197 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
198 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
199
200def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
201 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
202 "t2MOVi", "t2MOV_ga_dyn")>;
203def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
204 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
205def : InstRW<[R52WriteLd,R52Read_ISS],
206 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
207
208def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
209
210def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
211 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
212 "(t|t2)UBFX", "(t|t2)SBFX")>;
213
214// Saturating arithmetic
215def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
216 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
217 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
218 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
219 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
220 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
221 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
222
223// Parallel arithmetic
224def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
225 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
226 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
227 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
228 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
229
230// Flag setting.
231def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
232 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
233 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
234 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
235 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
236 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
237 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
238
239// Sum of Absolute Difference
240def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
241 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
242
243// Integer Multiply
244def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
245 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
246 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
247 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
248 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
249
250// Multiply Accumulate
251// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
252// The store pipeline is used partly for 64-bit operations.
253def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
254 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
255 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
256 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
257 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
258 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
259 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
260 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
261 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
262 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
263 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
264 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
265 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
266 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
267
268def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
269 (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
270
271// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
272// However, that's non-trivial to specify, so we keep it uniform
273def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
274 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
275 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
276 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
277 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
278 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
279 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
280def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
281 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
282 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
283 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
284 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
285 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
286 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
287
288def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
289def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
290
291def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
292 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
293 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
294 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
295
296def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
297 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
298 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
299
300def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
301 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
302 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
303
304def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
305 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
306 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
307
308def : InstRW<[R52WriteALU_EX1],
309 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
310
311def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
312def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
313 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
314
315def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
316def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
317def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
318def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
319
320def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
321 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
322
323def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
324
325def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
326def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
327
328//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
329//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
330//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
331
332//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
333//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
334//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
335
336
337// Integer Load, Multiple.
338foreach Lat = 3-25 in {
339 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
340 let Latency = Lat;
341 }
342 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
343 let Latency = Lat;
344 let NumMicroOps = 0;
345 }
346}
347foreach NAddr = 1-16 in {
348 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
349}
350def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
351def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
352def R52WriteILDM : SchedWriteVariant<[
353 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
354
355 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
356 R52WriteILDM6Cy]>,
357 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
358 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
359
360 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
361 R52WriteILDM6Cy, R52WriteILDM7Cy,
362 R52WriteILDM8Cy]>,
363 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
364 R52WriteILDM6Cy, R52WriteILDM7Cy,
365 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
366
367 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
368 R52WriteILDM6Cy, R52WriteILDM7Cy,
369 R52WriteILDM8Cy, R52WriteILDM9Cy,
370 R52WriteILDM10Cy]>,
371 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372 R52WriteILDM6Cy, R52WriteILDM7Cy,
373 R52WriteILDM8Cy, R52WriteILDM9Cy,
374 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
375
376 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
377 R52WriteILDM6Cy, R52WriteILDM7Cy,
378 R52WriteILDM8Cy, R52WriteILDM9Cy,
379 R52WriteILDM10Cy, R52WriteILDM11Cy,
380 R52WriteILDM12Cy]>,
381 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
382 R52WriteILDM6Cy, R52WriteILDM7Cy,
383 R52WriteILDM8Cy, R52WriteILDM9Cy,
384 R52WriteILDM10Cy, R52WriteILDM11Cy,
385 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
386
387 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
388 R52WriteILDM6Cy, R52WriteILDM7Cy,
389 R52WriteILDM8Cy, R52WriteILDM9Cy,
390 R52WriteILDM10Cy, R52WriteILDM11Cy,
391 R52WriteILDM12Cy, R52WriteILDM13Cy,
392 R52WriteILDM14Cy]>,
393 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
394 R52WriteILDM6Cy, R52WriteILDM7Cy,
395 R52WriteILDM8Cy, R52WriteILDM9Cy,
396 R52WriteILDM10Cy, R52WriteILDM11Cy,
397 R52WriteILDM12Cy, R52WriteILDM13Cy,
398 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
399
400 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
401 R52WriteILDM6Cy, R52WriteILDM7Cy,
402 R52WriteILDM8Cy, R52WriteILDM9Cy,
403 R52WriteILDM10Cy, R52WriteILDM11Cy,
404 R52WriteILDM12Cy, R52WriteILDM13Cy,
405 R52WriteILDM14Cy, R52WriteILDM15Cy,
406 R52WriteILDM16Cy]>,
407 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
408 R52WriteILDM6Cy, R52WriteILDM7Cy,
409 R52WriteILDM8Cy, R52WriteILDM9Cy,
410 R52WriteILDM10Cy, R52WriteILDM11Cy,
411 R52WriteILDM12Cy, R52WriteILDM13Cy,
412 R52WriteILDM14Cy, R52WriteILDM15Cy,
413 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
414
415 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
416 R52WriteILDM6Cy, R52WriteILDM7Cy,
417 R52WriteILDM8Cy, R52WriteILDM9Cy,
418 R52WriteILDM10Cy, R52WriteILDM11Cy,
419 R52WriteILDM12Cy, R52WriteILDM13Cy,
420 R52WriteILDM14Cy, R52WriteILDM15Cy,
421 R52WriteILDM16Cy, R52WriteILDM17Cy,
422 R52WriteILDM18Cy]>,
423 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
424 R52WriteILDM6Cy, R52WriteILDM7Cy,
425 R52WriteILDM8Cy, R52WriteILDM9Cy,
426 R52WriteILDM10Cy, R52WriteILDM11Cy,
427 R52WriteILDM12Cy, R52WriteILDM13Cy,
428 R52WriteILDM14Cy, R52WriteILDM15Cy,
429 R52WriteILDM16Cy, R52WriteILDM17Cy,
430 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
431
432// Unknown number of registers, just use resources for two registers.
433 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
434 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
435 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
436 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
437 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
438 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
439 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
440 R52WriteILDM18Cy, R52WriteILDM19Cy]>
441]> { let Variadic=1; }
442
443// Integer Store, Multiple
444def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
445 let Latency = 4;
446 let NumMicroOps = 2;
447}
448foreach NumAddr = 1-16 in {
449 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
450}
451def R52WriteISTM : SchedWriteVariant<[
452 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
453 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
454 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
455 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
456 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
457 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
458 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
459 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
460 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
461 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
462 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
463 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
464 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
465 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
466 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
467 // Unknow number of registers, just use resources for two registers.
468 SchedVar<NoSchedPred, [R52WriteISTM2]>
469]>;
470
471def : InstRW<[R52WriteILDM, R52Read_ISS],
472 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
473 "(t|sys)LDM(IA|DA|DB|IB)$")>;
474def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
475 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
476def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
477 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
478
479// Integer Store, Single Element
480def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
481 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
482 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
483 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
484
485def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
486 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
487 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
488 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
489 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
490
491// Integer Store, Dual
492def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
493 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
494def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
495 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
496
497def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
498 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
499def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
500 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
501 "PUSH", "tPUSH")>;
502
503// LDRLIT pseudo instructions, they expand to LDR + PICADD
504def : InstRW<[R52WriteLd],
505 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
506// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
507def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
508
509
510
511//===----------------------------------------------------------------------===//
512// VFP, Floating Point Support
513def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
514def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
515
516def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
517def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
518def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
519
520def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
521def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
522
523def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
524def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
525
Javed Absarf043dac2016-11-15 11:34:54 +0000526def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
527def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
528
529
530//===----------------------------------------------------------------------===//
531// Neon Support
532
533// vector multiple load stores
534foreach NumAddr = 1-16 in {
535 def R52LMAddrPred#NumAddr :
536 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
537}
538foreach Lat = 1-32 in {
539 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
540 let Latency = Lat;
541 }
542}
543foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
544 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
545 let Latency = 0;
546 let NumMicroOps = Num;
547 let ResourceCycles = [Num];
548 }
549}
550def R52WriteVLDM : SchedWriteVariant<[
551 // 1 D reg
552 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
553 R52ReserveLd5Cy]>,
554 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
555 R52ReserveLd5Cy]>,
556
557 // 2 D reg
558 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
559 R52ReserveLd6Cy]>,
560 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
561 R52ReserveLd6Cy]>,
562
563 // 3 D reg
564 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
565 R52WriteLM7Cy,
566 R52ReserveLd4Cy]>,
567 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
568 R52WriteLM7Cy,
569 R52ReserveLd7Cy]>,
570
571 // 4 D reg
572 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
573 R52WriteLM7Cy, R52WriteLM8Cy,
574 R52ReserveLd8Cy]>,
575 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
576 R52WriteLM7Cy, R52WriteLM8Cy,
577 R52ReserveLd8Cy]>,
578
579 // 5 D reg
580 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
581 R52WriteLM7Cy, R52WriteLM8Cy,
582 R52WriteLM9Cy,
583 R52ReserveLd9Cy]>,
584 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
585 R52WriteLM7Cy, R52WriteLM8Cy,
586 R52WriteLM9Cy,
587 R52ReserveLd9Cy]>,
588
589 // 6 D reg
590 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
591 R52WriteLM7Cy, R52WriteLM8Cy,
592 R52WriteLM9Cy, R52WriteLM10Cy,
593 R52ReserveLd10Cy]>,
594 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
595 R52WriteLM7Cy, R52WriteLM8Cy,
596 R52WriteLM9Cy, R52WriteLM10Cy,
597 R52ReserveLd10Cy]>,
598
599 // 7 D reg
600 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
601 R52WriteLM7Cy, R52WriteLM8Cy,
602 R52WriteLM9Cy, R52WriteLM10Cy,
603 R52WriteLM11Cy,
604 R52ReserveLd11Cy]>,
605 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
606 R52WriteLM7Cy, R52WriteLM8Cy,
607 R52WriteLM9Cy, R52WriteLM10Cy,
608 R52WriteLM11Cy,
609 R52ReserveLd11Cy]>,
610
611 // 8 D reg
612 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
613 R52WriteLM7Cy, R52WriteLM8Cy,
614 R52WriteLM9Cy, R52WriteLM10Cy,
615 R52WriteLM11Cy, R52WriteLM12Cy,
616 R52ReserveLd12Cy]>,
617 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
618 R52WriteLM7Cy, R52WriteLM8Cy,
619 R52WriteLM9Cy, R52WriteLM10Cy,
620 R52WriteLM11Cy, R52WriteLM12Cy,
621 R52ReserveLd12Cy]>,
622 // unknown number of reg.
623 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
624 R52WriteLM7Cy, R52WriteLM8Cy,
625 R52WriteLM9Cy, R52WriteLM10Cy,
626 R52WriteLM11Cy, R52WriteLM12Cy,
627 R52ReserveLd5Cy]>
628]> { let Variadic=1;}
629
630// variable stores. Cannot dual-issue
631def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
632 let Latency = 5;
633 let NumMicroOps = 2;
634 let ResourceCycles = [1];
635}
636def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
637 let Latency = 6;
638 let NumMicroOps = 4;
639 let ResourceCycles = [2];
640}
641def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
642 let Latency = 7;
643 let NumMicroOps = 6;
644 let ResourceCycles = [3];
645}
646def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
647 let Latency = 8;
648 let NumMicroOps = 8;
649 let ResourceCycles = [4];
650}
651def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
652 let Latency = 9;
653 let NumMicroOps = 10;
654 let ResourceCycles = [5];
655}
656def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
657 let Latency = 10;
658 let NumMicroOps = 12;
659 let ResourceCycles = [6];
660}
661def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
662 let Latency = 11;
663 let NumMicroOps = 14;
664 let ResourceCycles = [7];
665}
666def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
667 let Latency = 12;
668 let NumMicroOps = 16;
669 let ResourceCycles = [8];
670}
671def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
672 let Latency = 13;
673 let NumMicroOps = 18;
674 let ResourceCycles = [9];
675}
676def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
677 let Latency = 14;
678 let NumMicroOps = 20;
679 let ResourceCycles = [10];
680}
681def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
682 let Latency = 15;
683 let NumMicroOps = 22;
684 let ResourceCycles = [11];
685}
686
687def R52WriteSTM : SchedWriteVariant<[
688 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
689 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
690 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
691 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
692 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
693 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
694 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
695 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
696 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
697 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
698 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
699 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
700 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
701 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
702 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
703 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
704 // unknown number of registers, just use resources for two
705 SchedVar<NoSchedPred, [R52WriteSTM6]>
706]>;
707
708// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
709// another instruction in slot-1, but only in the last issue.
710def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
711def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
712 let Latency = 6;
713 let NumMicroOps = 3;
714 let ResourceCycles = [2];
715}
716def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
717 let Latency = 7;
718 let NumMicroOps = 5;
719 let ResourceCycles = [3];
720}
721def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
722 let Latency = 8;
723 let NumMicroOps = 7;
724 let ResourceCycles = [4];
725}
726def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
727 let Latency = 5;
728 let NumMicroOps = 1;
729 let ResourceCycles = [1];
730}
731def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
732 let Latency = 6;
733 let NumMicroOps = 3;
734 let ResourceCycles = [2];
735}
736def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
737 let Latency = 7;
738 let NumMicroOps = 5;
739 let ResourceCycles = [3];
740}
741def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
742 let Latency = 8;
743 let NumMicroOps = 7;
744 let ResourceCycles = [4];
745}
746def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
747 let Latency = 9;
748 let NumMicroOps = 9;
749 let ResourceCycles = [5];
750}
751
752
753def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
754def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
755def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
756
757def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
758def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
759def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
760
761def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
762
763def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
764 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
765def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
766 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
767def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
768 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
769
770def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
771 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
772
773def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
774def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
775
776def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
777def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
778
779def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
780def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
781
782def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
783
784def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
785 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
786def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
787 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
788
789def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
790def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
791def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
792def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
793
794def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
795def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
796
797def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
798def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
799
800def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
801def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
802
803def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
804def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
Javed Absar00cce412017-01-23 20:20:39 +0000805def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000806def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
Javed Absarf043dac2016-11-15 11:34:54 +0000807def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
808def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
809def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
810def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
811def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
812def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
813 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
814def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
815 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
816def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
817def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
818def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
819 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
820def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
821def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
822
823//---
824// VLDx. Vector Loads
825//---
826// 1-element structure load
827def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
828def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
829def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
830def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
831def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
832def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
833
834def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
835def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
836def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
837
838def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
839def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
840def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
841def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
842def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
843def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
844
845def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
846def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
847def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
848def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
849
850// 2-element structure load
851def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
852def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
853def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
854def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
855def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
856def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
857
858def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
859def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
860def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
861def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
862def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
863def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
864def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
865def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
866
867def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
868def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
869
870def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
871def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
872
873def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
874def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
875def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
876def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
877
878// 3-element structure load
879def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
880def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
881def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
882def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
883def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
884def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
885
886def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
887def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
888def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
889
890def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
891def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
892def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
893def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
894
895// 4-element structure load
896def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
897def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
898def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
899def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
900def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
901def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
902
903
904def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
905def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
906def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
907def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
908def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
909def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
910def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
911
912//---
913// VSTx. Vector Stores
914//---
915// 1-element structure store
916def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
917def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
918def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
919def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
920def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
921def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
922
923def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
924def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
925def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
926
927def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
928def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
929def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
930def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
931def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
932def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
933
934def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
935def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
936def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
937
938// 2-element structure store
939def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
940def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
941def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
942
943def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
944def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
945def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
946def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
947def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
948def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
949
950def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
951def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
952def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
953
954def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
955def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
956def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
957def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
958def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
959def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
960
961// 3-element structure store
962def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
963def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
964def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
965
966def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
967def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
968def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
969def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
970def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
971def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
972
973def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
974def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
975def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
976
977def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
978def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
979def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
980def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
981def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
982def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
983
984// 4-element structure store
985def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
986def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
987def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
988
989def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
990def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
991def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
992def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
993def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
994def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
995
996def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
997def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
998def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
999
1000def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
1001def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
1002def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
1003def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
1004def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
1005def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
1006
1007} // R52 SchedModel