blob: 1b40742a093b9441db8d330c3c7ff8540d57971d [file] [log] [blame]
Javed Absarf043dac2016-11-15 11:34:54 +00001//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18// A number of forwarding paths enable results of computations to be input
19// to subsequent operations before they are written to registers.
20// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
27 let PostRAScheduler = 1; // Enable PostRA scheduler pass.
28 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
29}
30
31
32//===----------------------------------------------------------------------===//
33// Define each kind of processor resource and number available.
34
35// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
36// Cortex-R52 is an in-order processor.
37
38def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
39def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
40def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
41def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
42def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
43def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
44def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
45def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
46
47// Cortex-R52 specific SchedReads
48def R52Read_ISS : SchedRead;
49def R52Read_EX1 : SchedRead;
50def R52Read_EX2 : SchedRead;
51def R52Read_WRI : SchedRead;
52def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
53def R52Read_F1 : SchedRead;
54def R52Read_F2 : SchedRead;
55
56
57//===----------------------------------------------------------------------===//
58// Subtarget-specific SchedWrite types which map ProcResources and set latency.
59
60let SchedModel = CortexR52Model in {
61
62// ALU - Write occurs in Late EX2 (independent of whether shift was required)
63def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
65def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
66def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
67
68// Compares
69def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
70def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
71def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72
73// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74def : WriteRes<WriteDiv, [R52UnitDiv]> {
75 let Latency = 8; let ResourceCycles = [8]; // not pipelined
76}
77
78// Loads
79def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
80def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
81
82// Branches - LR written in Late EX2
83def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
84def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
85def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
86
87// Misc
88def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
89def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
90
91def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
92def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
93
94
95//===----------------------------------------------------------------------===//
96// Subtarget-specific SchedReadWrites.
97
98// Forwarding information - based on when an operand is read
99def : ReadAdvance<R52Read_ISS, 0>;
100def : ReadAdvance<R52Read_EX1, 1>;
101def : ReadAdvance<R52Read_EX2, 2>;
102def : ReadAdvance<R52Read_F0, 0>;
103def : ReadAdvance<R52Read_F1, 1>;
104def : ReadAdvance<R52Read_F2, 2>;
105
106
107// Cortex-R52 specific SchedWrites for use with InstRW
108def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
109def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
110 let Latency = 8; let ResourceCycles = [8]; // not pipelined
111}
112def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
113def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
114def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
115def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
116def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
117def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
118def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
119
120def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
121def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
122
123def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
124def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
125 let Latency = 4;
126}
127def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
128def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
129 let Latency = 5;
130}
131def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
132def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
133 let Latency = 6;
134}
135def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
136def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
137 let Latency = 6;
138}
139def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
140 let Latency = 11; // as it is internally two insns (MUL then ADD)
141}
142def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
143 R52UnitFPALU, R52UnitFPALU]> {
144 let Latency = 11;
145}
146
147def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
148def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
149
150def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
151 let Latency = 7; // FP div takes fixed #cycles
152 let ResourceCycles = [7]; // is not pipelined
153 }
154def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
155 let Latency = 17;
156 let ResourceCycles = [17];
157}
158
159
160//===----------------------------------------------------------------------===//
161// Subtarget-specific - map operands to SchedReadWrites
162
163def : InstRW<[WriteALU], (instrs COPY)>;
164
165def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
166 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
167 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
168
169def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
170 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
171 "t2MOVi", "t2MOV_ga_dyn")>;
172def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
173 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
174def : InstRW<[R52WriteLd,R52Read_ISS],
175 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
176
177def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
178
179def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
180 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
181 "(t|t2)UBFX", "(t|t2)SBFX")>;
182
183// Saturating arithmetic
184def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
185 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
186 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
187 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
188 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
189 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
190 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
191
192// Parallel arithmetic
193def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
194 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
195 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
196 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
197 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
198
199// Flag setting.
200def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
201 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
202 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
203 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
204 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
205 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
206 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
207
208// Sum of Absolute Difference
209def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
210 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
211
212// Integer Multiply
213def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
214 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
215 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
216 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
217 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
218
219// Multiply Accumulate
220// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
221// The store pipeline is used partly for 64-bit operations.
222def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
223 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
224 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
225 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
226 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
227 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
228 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
229 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
230 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
231 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
232 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
233 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
234 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
235 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
236
237def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
238 (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
239
240// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
241// However, that's non-trivial to specify, so we keep it uniform
242def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
243 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
244 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
245 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
246 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
247 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
248 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
249def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
250 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
251 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
252 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
253 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
254 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
255 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
256
257def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
258def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
259
260def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
261 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
262 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
263 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
264
265def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
266 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
267 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
268
269def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
270 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
271 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
272
273def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
274 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
275 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
276
277def : InstRW<[R52WriteALU_EX1],
278 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
279
280def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
281def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
282 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
283
284def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
285def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
286def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
287def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
288
289def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
290 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
291
292def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
293
294def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
295def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
296
297//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
298//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
299//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
300
301//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
302//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
303//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
304
305
306// Integer Load, Multiple.
307foreach Lat = 3-25 in {
308 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
309 let Latency = Lat;
310 }
311 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
312 let Latency = Lat;
313 let NumMicroOps = 0;
314 }
315}
316foreach NAddr = 1-16 in {
317 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
318}
319def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
320def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
321def R52WriteILDM : SchedWriteVariant<[
322 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
323
324 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
325 R52WriteILDM6Cy]>,
326 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
327 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
328
329 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
330 R52WriteILDM6Cy, R52WriteILDM7Cy,
331 R52WriteILDM8Cy]>,
332 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
333 R52WriteILDM6Cy, R52WriteILDM7Cy,
334 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
335
336 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
337 R52WriteILDM6Cy, R52WriteILDM7Cy,
338 R52WriteILDM8Cy, R52WriteILDM9Cy,
339 R52WriteILDM10Cy]>,
340 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
341 R52WriteILDM6Cy, R52WriteILDM7Cy,
342 R52WriteILDM8Cy, R52WriteILDM9Cy,
343 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
344
345 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
346 R52WriteILDM6Cy, R52WriteILDM7Cy,
347 R52WriteILDM8Cy, R52WriteILDM9Cy,
348 R52WriteILDM10Cy, R52WriteILDM11Cy,
349 R52WriteILDM12Cy]>,
350 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
351 R52WriteILDM6Cy, R52WriteILDM7Cy,
352 R52WriteILDM8Cy, R52WriteILDM9Cy,
353 R52WriteILDM10Cy, R52WriteILDM11Cy,
354 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
355
356 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
357 R52WriteILDM6Cy, R52WriteILDM7Cy,
358 R52WriteILDM8Cy, R52WriteILDM9Cy,
359 R52WriteILDM10Cy, R52WriteILDM11Cy,
360 R52WriteILDM12Cy, R52WriteILDM13Cy,
361 R52WriteILDM14Cy]>,
362 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
363 R52WriteILDM6Cy, R52WriteILDM7Cy,
364 R52WriteILDM8Cy, R52WriteILDM9Cy,
365 R52WriteILDM10Cy, R52WriteILDM11Cy,
366 R52WriteILDM12Cy, R52WriteILDM13Cy,
367 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
368
369 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
370 R52WriteILDM6Cy, R52WriteILDM7Cy,
371 R52WriteILDM8Cy, R52WriteILDM9Cy,
372 R52WriteILDM10Cy, R52WriteILDM11Cy,
373 R52WriteILDM12Cy, R52WriteILDM13Cy,
374 R52WriteILDM14Cy, R52WriteILDM15Cy,
375 R52WriteILDM16Cy]>,
376 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
377 R52WriteILDM6Cy, R52WriteILDM7Cy,
378 R52WriteILDM8Cy, R52WriteILDM9Cy,
379 R52WriteILDM10Cy, R52WriteILDM11Cy,
380 R52WriteILDM12Cy, R52WriteILDM13Cy,
381 R52WriteILDM14Cy, R52WriteILDM15Cy,
382 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
383
384 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
385 R52WriteILDM6Cy, R52WriteILDM7Cy,
386 R52WriteILDM8Cy, R52WriteILDM9Cy,
387 R52WriteILDM10Cy, R52WriteILDM11Cy,
388 R52WriteILDM12Cy, R52WriteILDM13Cy,
389 R52WriteILDM14Cy, R52WriteILDM15Cy,
390 R52WriteILDM16Cy, R52WriteILDM17Cy,
391 R52WriteILDM18Cy]>,
392 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
393 R52WriteILDM6Cy, R52WriteILDM7Cy,
394 R52WriteILDM8Cy, R52WriteILDM9Cy,
395 R52WriteILDM10Cy, R52WriteILDM11Cy,
396 R52WriteILDM12Cy, R52WriteILDM13Cy,
397 R52WriteILDM14Cy, R52WriteILDM15Cy,
398 R52WriteILDM16Cy, R52WriteILDM17Cy,
399 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
400
401// Unknown number of registers, just use resources for two registers.
402 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
403 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
404 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
405 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
406 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
407 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
408 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
409 R52WriteILDM18Cy, R52WriteILDM19Cy]>
410]> { let Variadic=1; }
411
412// Integer Store, Multiple
413def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
414 let Latency = 4;
415 let NumMicroOps = 2;
416}
417foreach NumAddr = 1-16 in {
418 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
419}
420def R52WriteISTM : SchedWriteVariant<[
421 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
422 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
423 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
424 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
425 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
426 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
427 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
428 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
429 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
430 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
431 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
432 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
433 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
434 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
435 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
436 // Unknow number of registers, just use resources for two registers.
437 SchedVar<NoSchedPred, [R52WriteISTM2]>
438]>;
439
440def : InstRW<[R52WriteILDM, R52Read_ISS],
441 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
442 "(t|sys)LDM(IA|DA|DB|IB)$")>;
443def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
444 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
445def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
446 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
447
448// Integer Store, Single Element
449def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
450 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
451 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
452 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
453
454def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
455 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
456 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
457 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
458 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
459
460// Integer Store, Dual
461def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
462 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
463def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
464 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
465
466def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
467 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
468def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
469 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
470 "PUSH", "tPUSH")>;
471
472// LDRLIT pseudo instructions, they expand to LDR + PICADD
473def : InstRW<[R52WriteLd],
474 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
475// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
476def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
477
478
479
480//===----------------------------------------------------------------------===//
481// VFP, Floating Point Support
482def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
483def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
484
485def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
486def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
487def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
488
489def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
490def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
491
492def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
493def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
494
495def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
496def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
497
498def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
499 (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
500
501def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
502def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
503
504
505//===----------------------------------------------------------------------===//
506// Neon Support
507
508// vector multiple load stores
509foreach NumAddr = 1-16 in {
510 def R52LMAddrPred#NumAddr :
511 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
512}
513foreach Lat = 1-32 in {
514 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
515 let Latency = Lat;
516 }
517}
518foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
519 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
520 let Latency = 0;
521 let NumMicroOps = Num;
522 let ResourceCycles = [Num];
523 }
524}
525def R52WriteVLDM : SchedWriteVariant<[
526 // 1 D reg
527 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
528 R52ReserveLd5Cy]>,
529 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
530 R52ReserveLd5Cy]>,
531
532 // 2 D reg
533 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
534 R52ReserveLd6Cy]>,
535 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
536 R52ReserveLd6Cy]>,
537
538 // 3 D reg
539 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
540 R52WriteLM7Cy,
541 R52ReserveLd4Cy]>,
542 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
543 R52WriteLM7Cy,
544 R52ReserveLd7Cy]>,
545
546 // 4 D reg
547 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
548 R52WriteLM7Cy, R52WriteLM8Cy,
549 R52ReserveLd8Cy]>,
550 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
551 R52WriteLM7Cy, R52WriteLM8Cy,
552 R52ReserveLd8Cy]>,
553
554 // 5 D reg
555 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
556 R52WriteLM7Cy, R52WriteLM8Cy,
557 R52WriteLM9Cy,
558 R52ReserveLd9Cy]>,
559 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
560 R52WriteLM7Cy, R52WriteLM8Cy,
561 R52WriteLM9Cy,
562 R52ReserveLd9Cy]>,
563
564 // 6 D reg
565 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
566 R52WriteLM7Cy, R52WriteLM8Cy,
567 R52WriteLM9Cy, R52WriteLM10Cy,
568 R52ReserveLd10Cy]>,
569 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
570 R52WriteLM7Cy, R52WriteLM8Cy,
571 R52WriteLM9Cy, R52WriteLM10Cy,
572 R52ReserveLd10Cy]>,
573
574 // 7 D reg
575 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
576 R52WriteLM7Cy, R52WriteLM8Cy,
577 R52WriteLM9Cy, R52WriteLM10Cy,
578 R52WriteLM11Cy,
579 R52ReserveLd11Cy]>,
580 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
581 R52WriteLM7Cy, R52WriteLM8Cy,
582 R52WriteLM9Cy, R52WriteLM10Cy,
583 R52WriteLM11Cy,
584 R52ReserveLd11Cy]>,
585
586 // 8 D reg
587 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
588 R52WriteLM7Cy, R52WriteLM8Cy,
589 R52WriteLM9Cy, R52WriteLM10Cy,
590 R52WriteLM11Cy, R52WriteLM12Cy,
591 R52ReserveLd12Cy]>,
592 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
593 R52WriteLM7Cy, R52WriteLM8Cy,
594 R52WriteLM9Cy, R52WriteLM10Cy,
595 R52WriteLM11Cy, R52WriteLM12Cy,
596 R52ReserveLd12Cy]>,
597 // unknown number of reg.
598 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
599 R52WriteLM7Cy, R52WriteLM8Cy,
600 R52WriteLM9Cy, R52WriteLM10Cy,
601 R52WriteLM11Cy, R52WriteLM12Cy,
602 R52ReserveLd5Cy]>
603]> { let Variadic=1;}
604
605// variable stores. Cannot dual-issue
606def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
607 let Latency = 5;
608 let NumMicroOps = 2;
609 let ResourceCycles = [1];
610}
611def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
612 let Latency = 6;
613 let NumMicroOps = 4;
614 let ResourceCycles = [2];
615}
616def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
617 let Latency = 7;
618 let NumMicroOps = 6;
619 let ResourceCycles = [3];
620}
621def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
622 let Latency = 8;
623 let NumMicroOps = 8;
624 let ResourceCycles = [4];
625}
626def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
627 let Latency = 9;
628 let NumMicroOps = 10;
629 let ResourceCycles = [5];
630}
631def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
632 let Latency = 10;
633 let NumMicroOps = 12;
634 let ResourceCycles = [6];
635}
636def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
637 let Latency = 11;
638 let NumMicroOps = 14;
639 let ResourceCycles = [7];
640}
641def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
642 let Latency = 12;
643 let NumMicroOps = 16;
644 let ResourceCycles = [8];
645}
646def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
647 let Latency = 13;
648 let NumMicroOps = 18;
649 let ResourceCycles = [9];
650}
651def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
652 let Latency = 14;
653 let NumMicroOps = 20;
654 let ResourceCycles = [10];
655}
656def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
657 let Latency = 15;
658 let NumMicroOps = 22;
659 let ResourceCycles = [11];
660}
661
662def R52WriteSTM : SchedWriteVariant<[
663 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
664 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
665 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
666 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
667 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
668 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
669 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
670 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
671 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
672 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
673 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
674 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
675 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
676 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
677 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
678 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
679 // unknown number of registers, just use resources for two
680 SchedVar<NoSchedPred, [R52WriteSTM6]>
681]>;
682
683// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
684// another instruction in slot-1, but only in the last issue.
685def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
686def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
687 let Latency = 6;
688 let NumMicroOps = 3;
689 let ResourceCycles = [2];
690}
691def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
692 let Latency = 7;
693 let NumMicroOps = 5;
694 let ResourceCycles = [3];
695}
696def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
697 let Latency = 8;
698 let NumMicroOps = 7;
699 let ResourceCycles = [4];
700}
701def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
702 let Latency = 5;
703 let NumMicroOps = 1;
704 let ResourceCycles = [1];
705}
706def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
707 let Latency = 6;
708 let NumMicroOps = 3;
709 let ResourceCycles = [2];
710}
711def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
712 let Latency = 7;
713 let NumMicroOps = 5;
714 let ResourceCycles = [3];
715}
716def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
717 let Latency = 8;
718 let NumMicroOps = 7;
719 let ResourceCycles = [4];
720}
721def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
722 let Latency = 9;
723 let NumMicroOps = 9;
724 let ResourceCycles = [5];
725}
726
727
728def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
729def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
730def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
731
732def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
733def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
734def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
735
736def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
737
738def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
739 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
740def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
741 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
742def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
743 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
744
745def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
746 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
747
748def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
749def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
750
751def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
752def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
753
754def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
755def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
756
757def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
758
759def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
760 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
761def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
762 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
763
764def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
765def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
766def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
767def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
768
769def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
770def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
771
772def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
773def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
774
775def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
776def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
777
778def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
779def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
780def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
781def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
782def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
783def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
784def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
785def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
786def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
787def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
788def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
789 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
790def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
791 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
792def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
793def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
794def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
795 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
796def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
797def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
798
799//---
800// VLDx. Vector Loads
801//---
802// 1-element structure load
803def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
804def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
805def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
806def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
807def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
808def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
809
810def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
811def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
812def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
813
814def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
815def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
816def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
817def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
818def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
819def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
820
821def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
822def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
823def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
824def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
825
826// 2-element structure load
827def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
828def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
829def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
830def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
831def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
832def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
833
834def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
835def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
836def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
837def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
838def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
839def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
840def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
841def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
842
843def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
844def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
845
846def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
847def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
848
849def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
850def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
851def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
852def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
853
854// 3-element structure load
855def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
856def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
857def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
858def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
859def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
860def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
861
862def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
863def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
864def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
865
866def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
867def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
868def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
869def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
870
871// 4-element structure load
872def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
873def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
874def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
875def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
876def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
877def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
878
879
880def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
881def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
882def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
883def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
884def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
885def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
886def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
887
888//---
889// VSTx. Vector Stores
890//---
891// 1-element structure store
892def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
893def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
894def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
895def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
896def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
897def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
898
899def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
900def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
901def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
902
903def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
904def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
905def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
906def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
907def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
908def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
909
910def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
911def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
912def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
913
914// 2-element structure store
915def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
916def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
917def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
918
919def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
920def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
921def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
922def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
923def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
924def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
925
926def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
927def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
928def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
929
930def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
931def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
932def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
933def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
934def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
935def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
936
937// 3-element structure store
938def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
939def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
940def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
941
942def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
943def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
944def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
945def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
946def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
947def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
948
949def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
950def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
951def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
952
953def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
954def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
955def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
956def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
957def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
958def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
959
960// 4-element structure store
961def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
962def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
963def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
964
965def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
966def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
967def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
968def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
969def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
970def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
971
972def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
973def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
974def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
975
976def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
977def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
978def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
979def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
980def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
981def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
982
983} // R52 SchedModel