blob: 7334259bf574e015fb6a4ce67e9c2825dee8b9d4 [file] [log] [blame]
Bob Wilson22679332009-08-05 23:12:45 +00001//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE "neon-prealloc"
11#include "ARM.h"
12#include "ARMInstrInfo.h"
13#include "llvm/CodeGen/MachineInstr.h"
14#include "llvm/CodeGen/MachineInstrBuilder.h"
15#include "llvm/CodeGen/MachineFunctionPass.h"
16using namespace llvm;
17
18namespace {
Nick Lewycky492d06e2009-10-25 06:33:48 +000019 class NEONPreAllocPass : public MachineFunctionPass {
Bob Wilson22679332009-08-05 23:12:45 +000020 const TargetInstrInfo *TII;
21
22 public:
23 static char ID;
24 NEONPreAllocPass() : MachineFunctionPass(&ID) {}
25
26 virtual bool runOnMachineFunction(MachineFunction &MF);
27
28 virtual const char *getPassName() const {
29 return "NEON register pre-allocation pass";
30 }
31
32 private:
33 bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
34 };
35
36 char NEONPreAllocPass::ID = 0;
37}
38
Bob Wilsona8b43622009-10-07 17:24:55 +000039static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
40 unsigned &Offset, unsigned &Stride) {
41 // Default to unit stride with no offset.
42 Stride = 1;
43 Offset = 0;
44
Bob Wilson22679332009-08-05 23:12:45 +000045 switch (Opcode) {
46 default:
47 break;
48
Bob Wilson93e7f562010-03-23 05:25:43 +000049 case ARM::VLD1q8:
50 case ARM::VLD1q16:
51 case ARM::VLD1q32:
52 case ARM::VLD1q64:
Bob Wilson22679332009-08-05 23:12:45 +000053 case ARM::VLD2d8:
54 case ARM::VLD2d16:
55 case ARM::VLD2d32:
Bob Wilsond14b8b62009-09-01 04:26:28 +000056 case ARM::VLD2LNd8:
57 case ARM::VLD2LNd16:
58 case ARM::VLD2LNd32:
Bob Wilson22679332009-08-05 23:12:45 +000059 FirstOpnd = 0;
60 NumRegs = 2;
61 return true;
62
Bob Wilson47a1ff62009-10-08 22:27:33 +000063 case ARM::VLD2q8:
64 case ARM::VLD2q16:
65 case ARM::VLD2q32:
66 FirstOpnd = 0;
67 NumRegs = 4;
68 return true;
69
Bob Wilson14e26b32010-03-20 18:35:24 +000070 case ARM::VLD2LNq16:
71 case ARM::VLD2LNq32:
Bob Wilson5687d8a2009-10-08 18:56:10 +000072 FirstOpnd = 0;
73 NumRegs = 2;
74 Offset = 0;
75 Stride = 2;
76 return true;
77
Bob Wilson14e26b32010-03-20 18:35:24 +000078 case ARM::VLD2LNq16odd:
79 case ARM::VLD2LNq32odd:
Bob Wilson5687d8a2009-10-08 18:56:10 +000080 FirstOpnd = 0;
81 NumRegs = 2;
82 Offset = 1;
83 Stride = 2;
84 return true;
85
Bob Wilson22679332009-08-05 23:12:45 +000086 case ARM::VLD3d8:
87 case ARM::VLD3d16:
88 case ARM::VLD3d32:
Bob Wilson0ae84492010-03-22 18:13:18 +000089 case ARM::VLD1d64T:
Bob Wilsond14b8b62009-09-01 04:26:28 +000090 case ARM::VLD3LNd8:
91 case ARM::VLD3LNd16:
92 case ARM::VLD3LNd32:
Bob Wilson22679332009-08-05 23:12:45 +000093 FirstOpnd = 0;
94 NumRegs = 3;
95 return true;
96
Bob Wilson14e26b32010-03-20 18:35:24 +000097 case ARM::VLD3q8_UPD:
98 case ARM::VLD3q16_UPD:
99 case ARM::VLD3q32_UPD:
Bob Wilsona8b43622009-10-07 17:24:55 +0000100 FirstOpnd = 0;
101 NumRegs = 3;
102 Offset = 0;
103 Stride = 2;
104 return true;
105
Bob Wilson14e26b32010-03-20 18:35:24 +0000106 case ARM::VLD3q8odd_UPD:
107 case ARM::VLD3q16odd_UPD:
108 case ARM::VLD3q32odd_UPD:
Bob Wilsona8b43622009-10-07 17:24:55 +0000109 FirstOpnd = 0;
110 NumRegs = 3;
111 Offset = 1;
112 Stride = 2;
113 return true;
114
Bob Wilson14e26b32010-03-20 18:35:24 +0000115 case ARM::VLD3LNq16:
116 case ARM::VLD3LNq32:
Bob Wilson47a1ff62009-10-08 22:27:33 +0000117 FirstOpnd = 0;
118 NumRegs = 3;
119 Offset = 0;
120 Stride = 2;
121 return true;
122
Bob Wilson14e26b32010-03-20 18:35:24 +0000123 case ARM::VLD3LNq16odd:
124 case ARM::VLD3LNq32odd:
Bob Wilson47a1ff62009-10-08 22:27:33 +0000125 FirstOpnd = 0;
126 NumRegs = 3;
127 Offset = 1;
128 Stride = 2;
129 return true;
130
Bob Wilson22679332009-08-05 23:12:45 +0000131 case ARM::VLD4d8:
132 case ARM::VLD4d16:
133 case ARM::VLD4d32:
Bob Wilson0ae84492010-03-22 18:13:18 +0000134 case ARM::VLD1d64Q:
Bob Wilsond14b8b62009-09-01 04:26:28 +0000135 case ARM::VLD4LNd8:
136 case ARM::VLD4LNd16:
137 case ARM::VLD4LNd32:
Bob Wilson22679332009-08-05 23:12:45 +0000138 FirstOpnd = 0;
139 NumRegs = 4;
140 return true;
Bob Wilson6a209cd2009-08-06 18:47:44 +0000141
Bob Wilson14e26b32010-03-20 18:35:24 +0000142 case ARM::VLD4q8_UPD:
143 case ARM::VLD4q16_UPD:
144 case ARM::VLD4q32_UPD:
Bob Wilson004a2e12009-10-07 18:09:32 +0000145 FirstOpnd = 0;
146 NumRegs = 4;
147 Offset = 0;
148 Stride = 2;
149 return true;
150
Bob Wilson14e26b32010-03-20 18:35:24 +0000151 case ARM::VLD4q8odd_UPD:
152 case ARM::VLD4q16odd_UPD:
153 case ARM::VLD4q32odd_UPD:
Bob Wilson004a2e12009-10-07 18:09:32 +0000154 FirstOpnd = 0;
155 NumRegs = 4;
156 Offset = 1;
157 Stride = 2;
158 return true;
159
Bob Wilson14e26b32010-03-20 18:35:24 +0000160 case ARM::VLD4LNq16:
161 case ARM::VLD4LNq32:
Bob Wilson7a8c6df2009-10-08 22:53:57 +0000162 FirstOpnd = 0;
163 NumRegs = 4;
164 Offset = 0;
165 Stride = 2;
166 return true;
167
Bob Wilson14e26b32010-03-20 18:35:24 +0000168 case ARM::VLD4LNq16odd:
169 case ARM::VLD4LNq32odd:
Bob Wilson7a8c6df2009-10-08 22:53:57 +0000170 FirstOpnd = 0;
171 NumRegs = 4;
172 Offset = 1;
173 Stride = 2;
174 return true;
175
Bob Wilsond0a74632010-03-23 06:20:33 +0000176 case ARM::VST1q8:
177 case ARM::VST1q16:
178 case ARM::VST1q32:
179 case ARM::VST1q64:
Bob Wilson6a209cd2009-08-06 18:47:44 +0000180 case ARM::VST2d8:
181 case ARM::VST2d16:
182 case ARM::VST2d32:
Bob Wilsonc2d65852009-09-01 18:51:56 +0000183 case ARM::VST2LNd8:
184 case ARM::VST2LNd16:
185 case ARM::VST2LNd32:
Bob Wilson255e7482010-03-20 22:13:40 +0000186 FirstOpnd = 2;
Bob Wilson6a209cd2009-08-06 18:47:44 +0000187 NumRegs = 2;
188 return true;
189
Bob Wilson5fa67d352009-10-07 18:47:39 +0000190 case ARM::VST2q8:
191 case ARM::VST2q16:
192 case ARM::VST2q32:
Bob Wilson255e7482010-03-20 22:13:40 +0000193 FirstOpnd = 2;
Bob Wilson5fa67d352009-10-07 18:47:39 +0000194 NumRegs = 4;
195 return true;
196
Bob Wilson14e26b32010-03-20 18:35:24 +0000197 case ARM::VST2LNq16:
198 case ARM::VST2LNq32:
Bob Wilson255e7482010-03-20 22:13:40 +0000199 FirstOpnd = 2;
Bob Wilson18e94a72009-10-08 23:38:24 +0000200 NumRegs = 2;
201 Offset = 0;
202 Stride = 2;
203 return true;
204
Bob Wilson14e26b32010-03-20 18:35:24 +0000205 case ARM::VST2LNq16odd:
206 case ARM::VST2LNq32odd:
Bob Wilson255e7482010-03-20 22:13:40 +0000207 FirstOpnd = 2;
Bob Wilson18e94a72009-10-08 23:38:24 +0000208 NumRegs = 2;
209 Offset = 1;
210 Stride = 2;
211 return true;
212
Bob Wilson6a209cd2009-08-06 18:47:44 +0000213 case ARM::VST3d8:
214 case ARM::VST3d16:
215 case ARM::VST3d32:
Bob Wilson0ae84492010-03-22 18:13:18 +0000216 case ARM::VST1d64T:
Bob Wilsonc2d65852009-09-01 18:51:56 +0000217 case ARM::VST3LNd8:
218 case ARM::VST3LNd16:
219 case ARM::VST3LNd32:
Bob Wilson255e7482010-03-20 22:13:40 +0000220 FirstOpnd = 2;
Bob Wilson6a209cd2009-08-06 18:47:44 +0000221 NumRegs = 3;
222 return true;
223
Bob Wilson14e26b32010-03-20 18:35:24 +0000224 case ARM::VST3q8_UPD:
225 case ARM::VST3q16_UPD:
226 case ARM::VST3q32_UPD:
Bob Wilson255e7482010-03-20 22:13:40 +0000227 FirstOpnd = 4;
Bob Wilson2a85bd12009-10-07 20:30:08 +0000228 NumRegs = 3;
229 Offset = 0;
230 Stride = 2;
231 return true;
232
Bob Wilson14e26b32010-03-20 18:35:24 +0000233 case ARM::VST3q8odd_UPD:
234 case ARM::VST3q16odd_UPD:
235 case ARM::VST3q32odd_UPD:
Bob Wilson255e7482010-03-20 22:13:40 +0000236 FirstOpnd = 4;
Bob Wilson2a85bd12009-10-07 20:30:08 +0000237 NumRegs = 3;
238 Offset = 1;
239 Stride = 2;
240 return true;
241
Bob Wilson14e26b32010-03-20 18:35:24 +0000242 case ARM::VST3LNq16:
243 case ARM::VST3LNq32:
Bob Wilson255e7482010-03-20 22:13:40 +0000244 FirstOpnd = 2;
Bob Wilsondbffb212009-10-08 23:51:31 +0000245 NumRegs = 3;
246 Offset = 0;
247 Stride = 2;
248 return true;
249
Bob Wilson14e26b32010-03-20 18:35:24 +0000250 case ARM::VST3LNq16odd:
251 case ARM::VST3LNq32odd:
Bob Wilson255e7482010-03-20 22:13:40 +0000252 FirstOpnd = 2;
Bob Wilsondbffb212009-10-08 23:51:31 +0000253 NumRegs = 3;
254 Offset = 1;
255 Stride = 2;
256 return true;
257
Bob Wilson6a209cd2009-08-06 18:47:44 +0000258 case ARM::VST4d8:
259 case ARM::VST4d16:
260 case ARM::VST4d32:
Bob Wilson0ae84492010-03-22 18:13:18 +0000261 case ARM::VST1d64Q:
Bob Wilsonc2d65852009-09-01 18:51:56 +0000262 case ARM::VST4LNd8:
263 case ARM::VST4LNd16:
264 case ARM::VST4LNd32:
Bob Wilson255e7482010-03-20 22:13:40 +0000265 FirstOpnd = 2;
Bob Wilson6a209cd2009-08-06 18:47:44 +0000266 NumRegs = 4;
267 return true;
Bob Wilson5ef42ed2009-08-12 20:51:55 +0000268
Bob Wilson14e26b32010-03-20 18:35:24 +0000269 case ARM::VST4q8_UPD:
270 case ARM::VST4q16_UPD:
271 case ARM::VST4q32_UPD:
Bob Wilson255e7482010-03-20 22:13:40 +0000272 FirstOpnd = 4;
Bob Wilson931c76b2009-10-07 20:49:18 +0000273 NumRegs = 4;
274 Offset = 0;
275 Stride = 2;
276 return true;
277
Bob Wilson14e26b32010-03-20 18:35:24 +0000278 case ARM::VST4q8odd_UPD:
279 case ARM::VST4q16odd_UPD:
280 case ARM::VST4q32odd_UPD:
Bob Wilson255e7482010-03-20 22:13:40 +0000281 FirstOpnd = 4;
Bob Wilson931c76b2009-10-07 20:49:18 +0000282 NumRegs = 4;
283 Offset = 1;
284 Stride = 2;
285 return true;
286
Bob Wilson14e26b32010-03-20 18:35:24 +0000287 case ARM::VST4LNq16:
288 case ARM::VST4LNq32:
Bob Wilson255e7482010-03-20 22:13:40 +0000289 FirstOpnd = 2;
Bob Wilsonc7692e02009-10-09 00:01:36 +0000290 NumRegs = 4;
291 Offset = 0;
292 Stride = 2;
293 return true;
294
Bob Wilson14e26b32010-03-20 18:35:24 +0000295 case ARM::VST4LNq16odd:
296 case ARM::VST4LNq32odd:
Bob Wilson255e7482010-03-20 22:13:40 +0000297 FirstOpnd = 2;
Bob Wilsonc7692e02009-10-09 00:01:36 +0000298 NumRegs = 4;
299 Offset = 1;
300 Stride = 2;
301 return true;
302
Bob Wilson5ef42ed2009-08-12 20:51:55 +0000303 case ARM::VTBL2:
304 FirstOpnd = 1;
305 NumRegs = 2;
306 return true;
307
308 case ARM::VTBL3:
309 FirstOpnd = 1;
310 NumRegs = 3;
311 return true;
312
313 case ARM::VTBL4:
314 FirstOpnd = 1;
315 NumRegs = 4;
316 return true;
317
318 case ARM::VTBX2:
319 FirstOpnd = 2;
320 NumRegs = 2;
321 return true;
322
323 case ARM::VTBX3:
324 FirstOpnd = 2;
325 NumRegs = 3;
326 return true;
327
328 case ARM::VTBX4:
329 FirstOpnd = 2;
330 NumRegs = 4;
331 return true;
Bob Wilson22679332009-08-05 23:12:45 +0000332 }
333
334 return false;
335}
336
337bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
338 bool Modified = false;
339
340 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
341 for (; MBBI != E; ++MBBI) {
342 MachineInstr *MI = &*MBBI;
Bob Wilsona8b43622009-10-07 17:24:55 +0000343 unsigned FirstOpnd, NumRegs, Offset, Stride;
344 if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
Bob Wilson22679332009-08-05 23:12:45 +0000345 continue;
346
Chris Lattnerb44b4292009-12-03 00:50:42 +0000347 MachineBasicBlock::iterator NextI = llvm::next(MBBI);
Bob Wilson22679332009-08-05 23:12:45 +0000348 for (unsigned R = 0; R < NumRegs; ++R) {
349 MachineOperand &MO = MI->getOperand(FirstOpnd + R);
350 assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
351 unsigned VirtReg = MO.getReg();
352 assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
353 "expected a virtual register");
354
355 // For now, just assign a fixed set of adjacent registers.
356 // This leaves plenty of room for future improvements.
357 static const unsigned NEONDRegs[] = {
Bob Wilsona8b43622009-10-07 17:24:55 +0000358 ARM::D0, ARM::D1, ARM::D2, ARM::D3,
359 ARM::D4, ARM::D5, ARM::D6, ARM::D7
Bob Wilson22679332009-08-05 23:12:45 +0000360 };
Bob Wilsona8b43622009-10-07 17:24:55 +0000361 MO.setReg(NEONDRegs[Offset + R * Stride]);
Bob Wilson22679332009-08-05 23:12:45 +0000362
363 if (MO.isUse()) {
364 // Insert a copy from VirtReg.
Bob Wilson33609452009-10-06 22:01:15 +0000365 TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
366 ARM::DPRRegisterClass, ARM::DPRRegisterClass);
Bob Wilson22679332009-08-05 23:12:45 +0000367 if (MO.isKill()) {
368 MachineInstr *CopyMI = prior(MBBI);
369 CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
370 }
371 MO.setIsKill();
372 } else if (MO.isDef() && !MO.isDead()) {
373 // Add a copy to VirtReg.
Bob Wilson33609452009-10-06 22:01:15 +0000374 TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
375 ARM::DPRRegisterClass, ARM::DPRRegisterClass);
Bob Wilson22679332009-08-05 23:12:45 +0000376 }
377 }
378 }
379
380 return Modified;
381}
382
383bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) {
384 TII = MF.getTarget().getInstrInfo();
385
386 bool Modified = false;
387 for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
388 ++MFI) {
389 MachineBasicBlock &MBB = *MFI;
390 Modified |= PreAllocNEONRegisters(MBB);
391 }
392
393 return Modified;
394}
395
396/// createNEONPreAllocPass - returns an instance of the NEON register
397/// pre-allocation pass.
398FunctionPass *llvm::createNEONPreAllocPass() {
399 return new NEONPreAllocPass();
400}