blob: 706eea90ec7167fd2c61eee511f0edc44d445092 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
Chris Lattner84bc5422007-12-31 04:13:23 +000023#include "llvm/CodeGen/MachineRegisterInfo.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000024#include "llvm/CodeGen/SelectionDAG.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000025#include "llvm/Constants.h"
26#include "llvm/Function.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetOptions.h"
31
32#include <map>
33
34using namespace llvm;
35
36// Used in getTargetNodeName() below
37namespace {
38 std::map<unsigned, const char *> node_names;
39
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
44 };
45
46 const valtype_map_s valtype_map[] = {
47 { MVT::i1, 3 },
48 { MVT::i8, 3 },
49 { MVT::i16, 2 },
50 { MVT::i32, 0 },
51 { MVT::f32, 0 },
52 { MVT::i64, 0 },
53 { MVT::f64, 0 },
54 { MVT::i128, 0 }
55 };
56
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
58
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
61
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
65 break;
66 }
67 }
68
69#ifndef NDEBUG
70 if (retval == 0) {
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
73 << "\n";
74 abort();
75 }
76#endif
77
78 return retval;
79 }
80
81 //! Predicate that returns true if operand is a memory target
82 /*!
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
Scott Michel9de5d0d2008-01-11 02:53:15 +000085 address, external symbol, constant pool) or an A-form
Scott Michel266bc8f2007-12-04 22:23:35 +000086 address.
87 */
88 bool isMemoryOperand(const SDOperand &Op)
89 {
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
Scott Michel266bc8f2007-12-04 22:23:35 +000093 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
Scott Michel266bc8f2007-12-04 22:23:35 +000098 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
Scott Michel9de5d0d2008-01-11 02:53:15 +0000101 || Opc == SPUISD::AFormAddr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000102 }
103}
104
105SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
106 : TargetLowering(TM),
107 SPUTM(TM)
108{
109 // Fold away setcc operations if possible.
110 setPow2DivIsCheap();
111
112 // Use _setjmp/_longjmp instead of setjmp/longjmp.
113 setUseUnderscoreSetJmp(true);
114 setUseUnderscoreLongJmp(true);
115
116 // Set up the SPU's register classes:
117 // NOTE: i8 register class is not registered because we cannot determine when
118 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000119 // NOTE: Ignore the previous note. For now. :-)
120 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
121 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
122 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
123 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
124 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
125 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000126 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
127
128 // SPU has no sign or zero extended loads for i1, i8, i16:
129 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
130 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
131 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
132 setStoreXAction(MVT::i1, Custom);
133
134 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
137 setStoreXAction(MVT::i8, Custom);
138
139 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
140 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
141 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
142
143 // SPU constant load actions are custom lowered:
144 setOperationAction(ISD::Constant, MVT::i64, Custom);
145 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
146 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
147
148 // SPU's loads and stores have to be custom lowered:
149 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
150 ++sctype) {
151 setOperationAction(ISD::LOAD, sctype, Custom);
152 setOperationAction(ISD::STORE, sctype, Custom);
153 }
154
155 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
156 // into BR_CCs. BR_CC instructions are custom selected in
157 // SPUDAGToDAGISel.
158 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
159
160 // Expand the jumptable branches
161 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
162 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
163 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
164
165 // SPU has no intrinsics for these particular operations:
166 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
167 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
168 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
169
170 // PowerPC has no SREM/UREM instructions
171 setOperationAction(ISD::SREM, MVT::i32, Expand);
172 setOperationAction(ISD::UREM, MVT::i32, Expand);
173 setOperationAction(ISD::SREM, MVT::i64, Expand);
174 setOperationAction(ISD::UREM, MVT::i64, Expand);
175
176 // We don't support sin/cos/sqrt/fmod
177 setOperationAction(ISD::FSIN , MVT::f64, Expand);
178 setOperationAction(ISD::FCOS , MVT::f64, Expand);
179 setOperationAction(ISD::FREM , MVT::f64, Expand);
180 setOperationAction(ISD::FSIN , MVT::f32, Expand);
181 setOperationAction(ISD::FCOS , MVT::f32, Expand);
182 setOperationAction(ISD::FREM , MVT::f32, Expand);
183
184 // If we're enabling GP optimizations, use hardware square root
185 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
186 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
187
188 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
189 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
190
191 // SPU can do rotate right and left, so legalize it... but customize for i8
192 // because instructions don't exist.
193 setOperationAction(ISD::ROTR, MVT::i32, Legal);
194 setOperationAction(ISD::ROTR, MVT::i16, Legal);
195 setOperationAction(ISD::ROTR, MVT::i8, Custom);
196 setOperationAction(ISD::ROTL, MVT::i32, Legal);
197 setOperationAction(ISD::ROTL, MVT::i16, Legal);
198 setOperationAction(ISD::ROTL, MVT::i8, Custom);
199 // SPU has no native version of shift left/right for i8
200 setOperationAction(ISD::SHL, MVT::i8, Custom);
201 setOperationAction(ISD::SRL, MVT::i8, Custom);
202 setOperationAction(ISD::SRA, MVT::i8, Custom);
203
204 // Custom lower i32 multiplications
205 setOperationAction(ISD::MUL, MVT::i32, Custom);
206
207 // Need to custom handle (some) common i8 math ops
208 setOperationAction(ISD::SUB, MVT::i8, Custom);
209 setOperationAction(ISD::MUL, MVT::i8, Custom);
210
211 // SPU does not have BSWAP. It does have i32 support CTLZ.
212 // CTPOP has to be custom lowered.
213 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
214 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
215
216 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
217 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
218 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
220
221 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
222 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
223
224 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
225
226 // SPU does not have select or setcc
227 setOperationAction(ISD::SELECT, MVT::i1, Expand);
228 setOperationAction(ISD::SELECT, MVT::i8, Expand);
229 setOperationAction(ISD::SELECT, MVT::i16, Expand);
230 setOperationAction(ISD::SELECT, MVT::i32, Expand);
231 setOperationAction(ISD::SELECT, MVT::i64, Expand);
232 setOperationAction(ISD::SELECT, MVT::f32, Expand);
233 setOperationAction(ISD::SELECT, MVT::f64, Expand);
234
235 setOperationAction(ISD::SETCC, MVT::i1, Expand);
236 setOperationAction(ISD::SETCC, MVT::i8, Expand);
237 setOperationAction(ISD::SETCC, MVT::i16, Expand);
238 setOperationAction(ISD::SETCC, MVT::i32, Expand);
239 setOperationAction(ISD::SETCC, MVT::i64, Expand);
240 setOperationAction(ISD::SETCC, MVT::f32, Expand);
241 setOperationAction(ISD::SETCC, MVT::f64, Expand);
242
243 // SPU has a legal FP -> signed INT instruction
244 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
245 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
246 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
248
249 // FDIV on SPU requires custom lowering
250 setOperationAction(ISD::FDIV, MVT::f32, Custom);
251 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
252
253 // SPU has [U|S]INT_TO_FP
254 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
255 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
256 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
257 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
262
Scott Michel86c041f2007-12-20 00:44:13 +0000263 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
264 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel266bc8f2007-12-04 22:23:35 +0000267
268 // We cannot sextinreg(i1). Expand to shifts.
269 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
270
271 // Support label based line numbers.
272 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
273 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
274
275 // We want to legalize GlobalAddress and ConstantPool nodes into the
276 // appropriate instructions to materialize the address.
277 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
278 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
279 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
280 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
281 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
283 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
284 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
285
286 // RET must be custom lowered, to meet ABI requirements
287 setOperationAction(ISD::RET, MVT::Other, Custom);
288
289 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
290 setOperationAction(ISD::VASTART , MVT::Other, Custom);
291
292 // Use the default implementation.
293 setOperationAction(ISD::VAARG , MVT::Other, Expand);
294 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
295 setOperationAction(ISD::VAEND , MVT::Other, Expand);
296 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
297 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
298 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
299 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
300
301 // Cell SPU has instructions for converting between i64 and fp.
302 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
303 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
304
305 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
306 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
307
308 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
309 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
310
311 // First set operation action for all vector types to expand. Then we
312 // will selectively turn on ones that can be effectively codegen'd.
313 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
314 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
315 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
319
320 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
321 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
322 // add/sub are legal for all supported vector VT's.
323 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
324 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
325 // mul has to be custom lowered.
326 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
327
328 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
329 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
330 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
334
335 // These operations need to be expanded:
336 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
337 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
338 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
341
342 // Custom lower build_vector, constant pool spills, insert and
343 // extract vector elements:
344 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
345 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
346 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
350 }
351
352 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
353 setOperationAction(ISD::AND, MVT::v16i8, Custom);
354 setOperationAction(ISD::OR, MVT::v16i8, Custom);
355 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
356 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000357
Scott Michel266bc8f2007-12-04 22:23:35 +0000358 setSetCCResultType(MVT::i32);
359 setShiftAmountType(MVT::i32);
360 setSetCCResultContents(ZeroOrOneSetCCResult);
361
362 setStackPointerRegisterToSaveRestore(SPU::R1);
363
364 // We have target-specific dag combine patterns for the following nodes:
365 // e.g., setTargetDAGCombine(ISD::SUB);
366
367 computeRegisterProperties();
368}
369
370const char *
371SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
372{
373 if (node_names.empty()) {
374 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
375 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
376 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
377 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
Scott Michel9de5d0d2008-01-11 02:53:15 +0000378 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
Scott Michel266bc8f2007-12-04 22:23:35 +0000379 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
380 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
381 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
382 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
383 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
384 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
385 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
386 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
387 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
389 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
393 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
394 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
395 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
396 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
397 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
398 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
399 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
400 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
401 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
402 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
403 "SPUISD::ROTBYTES_RIGHT_Z";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
405 "SPUISD::ROTBYTES_RIGHT_S";
406 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
408 "SPUISD::ROTBYTES_LEFT_CHAINED";
409 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
410 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
411 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
412 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
413 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
414 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
415 }
416
417 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
418
419 return ((i != node_names.end()) ? i->second : 0);
420}
421
422//===----------------------------------------------------------------------===//
423// Calling convention code:
424//===----------------------------------------------------------------------===//
425
426#include "SPUGenCallingConv.inc"
427
428//===----------------------------------------------------------------------===//
429// LowerOperation implementation
430//===----------------------------------------------------------------------===//
431
Scott Michel9de5d0d2008-01-11 02:53:15 +0000432/// Aligned load common code for CellSPU
433/*!
434 \param[in] Op The SelectionDAG load or store operand
435 \param[in] DAG The selection DAG
436 \param[in] ST CellSPU subtarget information structure
437 \param[in,out] alignment Caller initializes this to the load or store node's
438 value from getAlignment(), may be updated while generating the aligned load
439 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
440 offset (divisible by 16, modulo 16 == 0)
441 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
442 offset of the preferred slot (modulo 16 != 0)
443 \param[in,out] VT Caller initializes this value type to the the load or store
444 node's loaded or stored value type; may be updated if an i1-extended load or
445 store.
446 \param[out] was16aligned true if the base pointer had 16-byte alignment,
447 otherwise false. Can help to determine if the chunk needs to be rotated.
448
449 Both load and store lowering load a block of data aligned on a 16-byte
450 boundary. This is the common aligned load code shared between both.
451 */
452static SDOperand
453AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
454 LSBaseSDNode *LSN,
455 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
456 unsigned &VT, bool &was16aligned)
457{
458 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
459 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
460 SDOperand basePtr = LSN->getBasePtr();
461 SDOperand chain = LSN->getChain();
462
463 if (basePtr.getOpcode() == ISD::ADD) {
464 SDOperand Op1 = basePtr.Val->getOperand(1);
465
466 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
467 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
468
469 alignOffs = (int) CN->getValue();
470 prefSlotOffs = (int) (alignOffs & 0xf);
471
472 // Adjust the rotation amount to ensure that the final result ends up in
473 // the preferred slot:
474 prefSlotOffs -= vtm->prefslot_byte;
475 basePtr = basePtr.getOperand(0);
476
477 // Modify alignment, since the ADD is likely from getElementPtr:
478 switch (basePtr.getOpcode()) {
479 case ISD::GlobalAddress:
480 case ISD::TargetGlobalAddress: {
481 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
482 const GlobalValue *GV = GN->getGlobal();
483 alignment = GV->getAlignment();
484 break;
485 }
486 }
487 } else {
488 alignOffs = 0;
489 prefSlotOffs = -vtm->prefslot_byte;
490 }
491 } else {
492 alignOffs = 0;
493 prefSlotOffs = -vtm->prefslot_byte;
494 }
495
496 if (alignment == 16) {
497 // Realign the base pointer as a D-Form address:
498 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
499 if (isMemoryOperand(basePtr)) {
500 SDOperand Zero = DAG.getConstant(0, PtrVT);
501 unsigned Opc = (!ST->usingLargeMem()
502 ? SPUISD::AFormAddr
503 : SPUISD::XFormAddr);
504 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
505 }
506 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
507 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
508 }
509
510 // Emit the vector load:
511 was16aligned = true;
512 return DAG.getLoad(MVT::v16i8, chain, basePtr,
513 LSN->getSrcValue(), LSN->getSrcValueOffset(),
514 LSN->isVolatile(), 16);
515 }
516
517 // Unaligned load or we're using the "large memory" model, which means that
518 // we have to be very pessimistic:
519 if (isMemoryOperand(basePtr)) {
520 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
521 }
522
523 // Add the offset
524 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
525 was16aligned = false;
526 return DAG.getLoad(MVT::v16i8, chain, basePtr,
527 LSN->getSrcValue(), LSN->getSrcValueOffset(),
528 LSN->isVolatile(), 16);
529}
530
Scott Michel266bc8f2007-12-04 22:23:35 +0000531/// Custom lower loads for CellSPU
532/*!
533 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
534 within a 16-byte block, we have to rotate to extract the requested element.
535 */
536static SDOperand
537LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
538 LoadSDNode *LN = cast<LoadSDNode>(Op);
Scott Michel266bc8f2007-12-04 22:23:35 +0000539 SDOperand the_chain = LN->getChain();
540 MVT::ValueType VT = LN->getLoadedVT();
541 MVT::ValueType OpVT = Op.Val->getValueType(0);
Scott Michel266bc8f2007-12-04 22:23:35 +0000542 ISD::LoadExtType ExtType = LN->getExtensionType();
543 unsigned alignment = LN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000544 SDOperand Ops[8];
545
546 // For an extending load of an i1 variable, just call it i8 (or whatever we
547 // were passed) and make it zero-extended:
548 if (VT == MVT::i1) {
549 VT = OpVT;
550 ExtType = ISD::ZEXTLOAD;
551 }
552
553 switch (LN->getAddressingMode()) {
554 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000555 int offset, rotamt;
556 bool was16aligned;
557 SDOperand result =
558 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000559
Scott Michel9de5d0d2008-01-11 02:53:15 +0000560 if (result.Val == 0)
Scott Michel266bc8f2007-12-04 22:23:35 +0000561 return result;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000562
563 the_chain = result.getValue(1);
564 // Rotate the chunk if necessary
565 if (rotamt < 0)
566 rotamt += 16;
Scott Michel497e8882008-01-11 21:01:19 +0000567 if (rotamt != 0 || !was16aligned) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000568 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
569
570 if (was16aligned) {
571 Ops[0] = the_chain;
572 Ops[1] = result;
573 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
574 } else {
Scott Michel497e8882008-01-11 21:01:19 +0000575 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000576 LoadSDNode *LN1 = cast<LoadSDNode>(result);
577 Ops[0] = the_chain;
578 Ops[1] = result;
Scott Michel497e8882008-01-11 21:01:19 +0000579 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
580 DAG.getConstant(rotamt, PtrVT));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000581 }
582
583 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
584 the_chain = result.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000585 }
Scott Michel9de5d0d2008-01-11 02:53:15 +0000586
587 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
588 SDVTList scalarvts;
589 MVT::ValueType vecVT = MVT::v16i8;
590
591 // Convert the loaded v16i8 vector to the appropriate vector type
592 // specified by the operand:
593 if (OpVT == VT) {
594 if (VT != MVT::i1)
595 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
596 } else
597 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
598
599 Ops[0] = the_chain;
600 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
601 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
602 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
603 the_chain = result.getValue(1);
604 } else {
605 // Handle the sign and zero-extending loads for i1 and i8:
606 unsigned NewOpC;
607
608 if (ExtType == ISD::SEXTLOAD) {
609 NewOpC = (OpVT == MVT::i1
610 ? SPUISD::EXTRACT_I1_SEXT
611 : SPUISD::EXTRACT_I8_SEXT);
612 } else {
613 assert(ExtType == ISD::ZEXTLOAD);
614 NewOpC = (OpVT == MVT::i1
615 ? SPUISD::EXTRACT_I1_ZEXT
616 : SPUISD::EXTRACT_I8_ZEXT);
617 }
618
619 result = DAG.getNode(NewOpC, OpVT, result);
620 }
621
622 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
623 SDOperand retops[2] = { result, the_chain };
624
625 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
626 return result;
Scott Michel266bc8f2007-12-04 22:23:35 +0000627 }
628 case ISD::PRE_INC:
629 case ISD::PRE_DEC:
630 case ISD::POST_INC:
631 case ISD::POST_DEC:
632 case ISD::LAST_INDEXED_MODE:
633 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
634 "UNINDEXED\n";
635 cerr << (unsigned) LN->getAddressingMode() << "\n";
636 abort();
637 /*NOTREACHED*/
638 }
639
640 return SDOperand();
641}
642
643/// Custom lower stores for CellSPU
644/*!
645 All CellSPU stores are aligned to 16-byte boundaries, so for elements
646 within a 16-byte block, we have to generate a shuffle to insert the
647 requested element into its place, then store the resulting block.
648 */
649static SDOperand
650LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
651 StoreSDNode *SN = cast<StoreSDNode>(Op);
652 SDOperand Value = SN->getValue();
653 MVT::ValueType VT = Value.getValueType();
654 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
655 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000656 unsigned alignment = SN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000657
658 switch (SN->getAddressingMode()) {
659 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000660 int chunk_offset, slot_offset;
661 bool was16aligned;
Scott Michel266bc8f2007-12-04 22:23:35 +0000662
663 // The vector type we really want to load from the 16-byte chunk, except
664 // in the case of MVT::i1, which has to be v16i8.
Scott Michel9de5d0d2008-01-11 02:53:15 +0000665 unsigned vecVT, stVecVT = MVT::v16i8;
666
Scott Michel266bc8f2007-12-04 22:23:35 +0000667 if (StVT != MVT::i1)
668 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
Scott Michel266bc8f2007-12-04 22:23:35 +0000669 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
670
Scott Michel9de5d0d2008-01-11 02:53:15 +0000671 SDOperand alignLoadVec =
672 AlignedLoad(Op, DAG, ST, SN, alignment,
673 chunk_offset, slot_offset, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000674
Scott Michel9de5d0d2008-01-11 02:53:15 +0000675 if (alignLoadVec.Val == 0)
676 return alignLoadVec;
Scott Michel266bc8f2007-12-04 22:23:35 +0000677
Scott Michel9de5d0d2008-01-11 02:53:15 +0000678 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
679 SDOperand basePtr = LN->getBasePtr();
680 SDOperand the_chain = alignLoadVec.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000681 SDOperand theValue = SN->getValue();
682 SDOperand result;
683
684 if (StVT != VT
685 && (theValue.getOpcode() == ISD::AssertZext
686 || theValue.getOpcode() == ISD::AssertSext)) {
687 // Drill down and get the value for zero- and sign-extended
688 // quantities
689 theValue = theValue.getOperand(0);
690 }
691
Scott Michel9de5d0d2008-01-11 02:53:15 +0000692 chunk_offset &= 0xf;
Scott Michel266bc8f2007-12-04 22:23:35 +0000693
Scott Michel9de5d0d2008-01-11 02:53:15 +0000694 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
695 SDOperand insertEltPtr;
696 SDOperand insertEltOp;
697
698 // If the base pointer is already a D-form address, then just create
699 // a new D-form address with a slot offset and the orignal base pointer.
700 // Otherwise generate a D-form address with the slot offset relative
701 // to the stack pointer, which is always aligned.
Scott Michel497e8882008-01-11 21:01:19 +0000702 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
703 DEBUG(basePtr.Val->dump(&DAG));
704 DEBUG(cerr << "\n");
705
Scott Michel9de5d0d2008-01-11 02:53:15 +0000706 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
707 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
708 basePtr.getOperand(0),
709 insertEltOffs);
Scott Michel497e8882008-01-11 21:01:19 +0000710 } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
711 (basePtr.getOpcode() == ISD::ADD
712 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
713 insertEltPtr = basePtr;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000714 } else {
715 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
716 DAG.getRegister(SPU::R1, PtrVT),
717 insertEltOffs);
718 }
719
720 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000721 result = DAG.getNode(SPUISD::SHUFB, vecVT,
722 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
Scott Michel9de5d0d2008-01-11 02:53:15 +0000723 alignLoadVec,
Scott Michel266bc8f2007-12-04 22:23:35 +0000724 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
725
Scott Michel9de5d0d2008-01-11 02:53:15 +0000726 result = DAG.getStore(the_chain, result, basePtr,
Scott Michel266bc8f2007-12-04 22:23:35 +0000727 LN->getSrcValue(), LN->getSrcValueOffset(),
728 LN->isVolatile(), LN->getAlignment());
729
730 return result;
731 /*UNREACHED*/
732 }
733 case ISD::PRE_INC:
734 case ISD::PRE_DEC:
735 case ISD::POST_INC:
736 case ISD::POST_DEC:
737 case ISD::LAST_INDEXED_MODE:
738 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
739 "UNINDEXED\n";
740 cerr << (unsigned) SN->getAddressingMode() << "\n";
741 abort();
742 /*NOTREACHED*/
743 }
744
745 return SDOperand();
746}
747
748/// Generate the address of a constant pool entry.
749static SDOperand
750LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
751 MVT::ValueType PtrVT = Op.getValueType();
752 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
753 Constant *C = CP->getConstVal();
754 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
Scott Michel266bc8f2007-12-04 22:23:35 +0000755 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000756 const TargetMachine &TM = DAG.getTarget();
Scott Michel266bc8f2007-12-04 22:23:35 +0000757
758 if (TM.getRelocationModel() == Reloc::Static) {
759 if (!ST->usingLargeMem()) {
760 // Just return the SDOperand with the constant pool address in it.
761 return CPI;
762 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000763#if 1
Scott Michel266bc8f2007-12-04 22:23:35 +0000764 // Generate hi/lo address pair
765 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
766 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
767
768 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000769#else
770 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
771#endif
Scott Michel266bc8f2007-12-04 22:23:35 +0000772 }
773 }
774
775 assert(0 &&
776 "LowerConstantPool: Relocation model other than static not supported.");
777 return SDOperand();
778}
779
780static SDOperand
781LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
782 MVT::ValueType PtrVT = Op.getValueType();
783 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
784 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
785 SDOperand Zero = DAG.getConstant(0, PtrVT);
786 const TargetMachine &TM = DAG.getTarget();
787
788 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000789 return (!ST->usingLargeMem()
790 ? JTI
791 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000792 }
793
794 assert(0 &&
795 "LowerJumpTable: Relocation model other than static not supported.");
796 return SDOperand();
797}
798
799static SDOperand
800LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
801 MVT::ValueType PtrVT = Op.getValueType();
802 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
803 GlobalValue *GV = GSDN->getGlobal();
804 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
Scott Michel266bc8f2007-12-04 22:23:35 +0000805 const TargetMachine &TM = DAG.getTarget();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000806 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000807
808 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000809 return (!ST->usingLargeMem()
810 ? GA
811 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000812 } else {
813 cerr << "LowerGlobalAddress: Relocation model other than static not "
814 << "supported.\n";
815 abort();
816 /*NOTREACHED*/
817 }
818
819 return SDOperand();
820}
821
822//! Custom lower i64 integer constants
823/*!
824 This code inserts all of the necessary juggling that needs to occur to load
825 a 64-bit constant into a register.
826 */
827static SDOperand
828LowerConstant(SDOperand Op, SelectionDAG &DAG) {
829 unsigned VT = Op.getValueType();
830 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
831
832 if (VT == MVT::i64) {
833 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
834 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
835 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
836
837 } else {
838 cerr << "LowerConstant: unhandled constant type "
839 << MVT::getValueTypeString(VT)
840 << "\n";
841 abort();
842 /*NOTREACHED*/
843 }
844
845 return SDOperand();
846}
847
848//! Custom lower single precision floating point constants
849/*!
850 "float" immediates can be lowered as if they were unsigned 32-bit integers.
851 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
852 target description.
853 */
854static SDOperand
855LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
856 unsigned VT = Op.getValueType();
857 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
858
859 assert((FP != 0) &&
860 "LowerConstantFP: Node is not ConstantFPSDNode");
861
Scott Michel266bc8f2007-12-04 22:23:35 +0000862 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000863 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000864 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000865 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000866 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000867 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000868 return DAG.getNode(ISD::BIT_CONVERT, VT,
869 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
870 }
871
872 return SDOperand();
873}
874
875static SDOperand
876LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
877{
878 MachineFunction &MF = DAG.getMachineFunction();
879 MachineFrameInfo *MFI = MF.getFrameInfo();
Chris Lattner84bc5422007-12-31 04:13:23 +0000880 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +0000881 SmallVector<SDOperand, 8> ArgValues;
882 SDOperand Root = Op.getOperand(0);
883 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
884
885 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
886 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
887
888 unsigned ArgOffset = SPUFrameInfo::minStackSize();
889 unsigned ArgRegIdx = 0;
890 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
891
892 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
893
894 // Add DAG nodes to load the arguments or copy them out of registers.
895 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
896 SDOperand ArgVal;
897 bool needsLoad = false;
898 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
899 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
900
901 switch (ObjectVT) {
902 default: {
903 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
904 << MVT::getValueTypeString(ObjectVT)
905 << "\n";
906 abort();
907 }
908 case MVT::i8:
909 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000910 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
911 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000912 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
913 ++ArgRegIdx;
914 } else {
915 needsLoad = true;
916 }
917 break;
918 case MVT::i16:
919 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000920 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
921 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000922 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
923 ++ArgRegIdx;
924 } else {
925 needsLoad = true;
926 }
927 break;
928 case MVT::i32:
929 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000930 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
931 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000932 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
933 ++ArgRegIdx;
934 } else {
935 needsLoad = true;
936 }
937 break;
938 case MVT::i64:
939 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000940 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
941 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000942 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
943 ++ArgRegIdx;
944 } else {
945 needsLoad = true;
946 }
947 break;
948 case MVT::f32:
949 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000950 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
951 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000952 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
953 ++ArgRegIdx;
954 } else {
955 needsLoad = true;
956 }
957 break;
958 case MVT::f64:
959 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000960 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
961 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000962 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
963 ++ArgRegIdx;
964 } else {
965 needsLoad = true;
966 }
967 break;
968 case MVT::v2f64:
969 case MVT::v4f32:
970 case MVT::v4i32:
971 case MVT::v8i16:
972 case MVT::v16i8:
973 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000974 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
975 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000976 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
977 ++ArgRegIdx;
978 } else {
979 needsLoad = true;
980 }
981 break;
982 }
983
984 // We need to load the argument to a virtual register if we determined above
985 // that we ran out of physical registers of the appropriate type
986 if (needsLoad) {
987 // If the argument is actually used, emit a load from the right stack
988 // slot.
989 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
990 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
991 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
992 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
993 } else {
994 // Don't emit a dead load.
995 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
996 }
997
998 ArgOffset += StackSlotSize;
999 }
1000
1001 ArgValues.push_back(ArgVal);
1002 }
1003
1004 // If the function takes variable number of arguments, make a frame index for
1005 // the start of the first vararg value... for expansion of llvm.va_start.
1006 if (isVarArg) {
1007 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1008 ArgOffset);
1009 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1010 // If this function is vararg, store any remaining integer argument regs to
1011 // their spots on the stack so that they may be loaded by deferencing the
1012 // result of va_next.
1013 SmallVector<SDOperand, 8> MemOps;
1014 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
Chris Lattner84bc5422007-12-31 04:13:23 +00001015 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1016 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +00001017 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1018 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1019 MemOps.push_back(Store);
1020 // Increment the address by four for the next argument to store
1021 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1022 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1023 }
1024 if (!MemOps.empty())
1025 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1026 }
1027
1028 ArgValues.push_back(Root);
1029
1030 // Return the new list of results.
1031 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1032 Op.Val->value_end());
1033 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1034}
1035
1036/// isLSAAddress - Return the immediate to use if the specified
1037/// value is representable as a LSA address.
1038static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1039 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1040 if (!C) return 0;
1041
1042 int Addr = C->getValue();
1043 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1044 (Addr << 14 >> 14) != Addr)
1045 return 0; // Top 14 bits have to be sext of immediate.
1046
1047 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1048}
1049
1050static
1051SDOperand
Scott Michel9de5d0d2008-01-11 02:53:15 +00001052LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001053 SDOperand Chain = Op.getOperand(0);
1054#if 0
1055 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1056 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1057#endif
1058 SDOperand Callee = Op.getOperand(4);
1059 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1060 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1061 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1062 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1063
1064 // Handy pointer type
1065 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1066
1067 // Accumulate how many bytes are to be pushed on the stack, including the
1068 // linkage area, and parameter passing area. According to the SPU ABI,
1069 // we minimally need space for [LR] and [SP]
1070 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1071
1072 // Set up a copy of the stack pointer for use loading and storing any
1073 // arguments that may not fit in the registers available for argument
1074 // passing.
1075 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1076
1077 // Figure out which arguments are going to go in registers, and which in
1078 // memory.
1079 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1080 unsigned ArgRegIdx = 0;
1081
1082 // Keep track of registers passing arguments
1083 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1084 // And the arguments passed on the stack
1085 SmallVector<SDOperand, 8> MemOpChains;
1086
1087 for (unsigned i = 0; i != NumOps; ++i) {
1088 SDOperand Arg = Op.getOperand(5+2*i);
1089
1090 // PtrOff will be used to store the current argument to the stack if a
1091 // register cannot be found for it.
1092 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1093 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1094
1095 switch (Arg.getValueType()) {
1096 default: assert(0 && "Unexpected ValueType for argument!");
1097 case MVT::i32:
1098 case MVT::i64:
1099 case MVT::i128:
1100 if (ArgRegIdx != NumArgRegs) {
1101 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1102 } else {
1103 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1104 ArgOffset += StackSlotSize;
1105 }
1106 break;
1107 case MVT::f32:
1108 case MVT::f64:
1109 if (ArgRegIdx != NumArgRegs) {
1110 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1111 } else {
1112 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1113 ArgOffset += StackSlotSize;
1114 }
1115 break;
1116 case MVT::v4f32:
1117 case MVT::v4i32:
1118 case MVT::v8i16:
1119 case MVT::v16i8:
1120 if (ArgRegIdx != NumArgRegs) {
1121 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 } else {
1123 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1124 ArgOffset += StackSlotSize;
1125 }
1126 break;
1127 }
1128 }
1129
1130 // Update number of stack bytes actually used, insert a call sequence start
1131 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1132 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1133
1134 if (!MemOpChains.empty()) {
1135 // Adjust the stack pointer for the stack arguments.
1136 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1137 &MemOpChains[0], MemOpChains.size());
1138 }
1139
1140 // Build a sequence of copy-to-reg nodes chained together with token chain
1141 // and flag operands which copy the outgoing args into the appropriate regs.
1142 SDOperand InFlag;
1143 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1144 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1145 InFlag);
1146 InFlag = Chain.getValue(1);
1147 }
1148
1149 std::vector<MVT::ValueType> NodeTys;
1150 NodeTys.push_back(MVT::Other); // Returns a chain
1151 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1152
1153 SmallVector<SDOperand, 8> Ops;
1154 unsigned CallOpc = SPUISD::CALL;
1155
1156 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1157 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1158 // node so that legalize doesn't hack it.
1159 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1160 GlobalValue *GV = G->getGlobal();
1161 unsigned CalleeVT = Callee.getValueType();
Scott Michel9de5d0d2008-01-11 02:53:15 +00001162 SDOperand Zero = DAG.getConstant(0, PtrVT);
1163 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
Scott Michel266bc8f2007-12-04 22:23:35 +00001164
Scott Michel9de5d0d2008-01-11 02:53:15 +00001165 if (!ST->usingLargeMem()) {
1166 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1167 // style calls, otherwise, external symbols are BRASL calls. This assumes
1168 // that declared/defined symbols are in the same compilation unit and can
1169 // be reached through PC-relative jumps.
1170 //
1171 // NOTE:
1172 // This may be an unsafe assumption for JIT and really large compilation
1173 // units.
1174 if (GV->isDeclaration()) {
1175 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1176 } else {
1177 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1178 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001179 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +00001180 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1181 // address pairs:
1182 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
Scott Michel266bc8f2007-12-04 22:23:35 +00001183 }
1184 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1185 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
Scott Michel9de5d0d2008-01-11 02:53:15 +00001186 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001187 // If this is an absolute destination address that appears to be a legal
1188 // local store address, use the munged value.
1189 Callee = SDOperand(Dest, 0);
Scott Michel9de5d0d2008-01-11 02:53:15 +00001190 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001191
1192 Ops.push_back(Chain);
1193 Ops.push_back(Callee);
1194
1195 // Add argument registers to the end of the list so that they are known live
1196 // into the call.
1197 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1198 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1199 RegsToPass[i].second.getValueType()));
1200
1201 if (InFlag.Val)
1202 Ops.push_back(InFlag);
1203 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1204 InFlag = Chain.getValue(1);
1205
1206 SDOperand ResultVals[3];
1207 unsigned NumResults = 0;
1208 NodeTys.clear();
1209
1210 // If the call has results, copy the values out of the ret val registers.
1211 switch (Op.Val->getValueType(0)) {
1212 default: assert(0 && "Unexpected ret value!");
1213 case MVT::Other: break;
1214 case MVT::i32:
1215 if (Op.Val->getValueType(1) == MVT::i32) {
1216 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1217 ResultVals[0] = Chain.getValue(0);
1218 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1219 Chain.getValue(2)).getValue(1);
1220 ResultVals[1] = Chain.getValue(0);
1221 NumResults = 2;
1222 NodeTys.push_back(MVT::i32);
1223 } else {
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1226 NumResults = 1;
1227 }
1228 NodeTys.push_back(MVT::i32);
1229 break;
1230 case MVT::i64:
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1232 ResultVals[0] = Chain.getValue(0);
1233 NumResults = 1;
1234 NodeTys.push_back(MVT::i64);
1235 break;
1236 case MVT::f32:
1237 case MVT::f64:
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1239 InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1241 NumResults = 1;
1242 NodeTys.push_back(Op.Val->getValueType(0));
1243 break;
1244 case MVT::v2f64:
1245 case MVT::v4f32:
1246 case MVT::v4i32:
1247 case MVT::v8i16:
1248 case MVT::v16i8:
1249 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1250 InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1252 NumResults = 1;
1253 NodeTys.push_back(Op.Val->getValueType(0));
1254 break;
1255 }
1256
1257 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1258 DAG.getConstant(NumStackBytes, PtrVT));
1259 NodeTys.push_back(MVT::Other);
1260
1261 // If the function returns void, just return the chain.
1262 if (NumResults == 0)
1263 return Chain;
1264
1265 // Otherwise, merge everything together with a MERGE_VALUES node.
1266 ResultVals[NumResults++] = Chain;
1267 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1268 ResultVals, NumResults);
1269 return Res.getValue(Op.ResNo);
1270}
1271
1272static SDOperand
1273LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1274 SmallVector<CCValAssign, 16> RVLocs;
1275 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1276 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1277 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1278 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1279
1280 // If this is the first return lowered for this function, add the regs to the
1281 // liveout set for the function.
Chris Lattner84bc5422007-12-31 04:13:23 +00001282 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001283 for (unsigned i = 0; i != RVLocs.size(); ++i)
Chris Lattner84bc5422007-12-31 04:13:23 +00001284 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Scott Michel266bc8f2007-12-04 22:23:35 +00001285 }
1286
1287 SDOperand Chain = Op.getOperand(0);
1288 SDOperand Flag;
1289
1290 // Copy the result values into the output registers.
1291 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1292 CCValAssign &VA = RVLocs[i];
1293 assert(VA.isRegLoc() && "Can only return in registers!");
1294 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1295 Flag = Chain.getValue(1);
1296 }
1297
1298 if (Flag.Val)
1299 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1300 else
1301 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1302}
1303
1304
1305//===----------------------------------------------------------------------===//
1306// Vector related lowering:
1307//===----------------------------------------------------------------------===//
1308
1309static ConstantSDNode *
1310getVecImm(SDNode *N) {
1311 SDOperand OpVal(0, 0);
1312
1313 // Check to see if this buildvec has a single non-undef value in its elements.
1314 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1315 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1316 if (OpVal.Val == 0)
1317 OpVal = N->getOperand(i);
1318 else if (OpVal != N->getOperand(i))
1319 return 0;
1320 }
1321
1322 if (OpVal.Val != 0) {
1323 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1324 return CN;
1325 }
1326 }
1327
1328 return 0; // All UNDEF: use implicit def.; not Constant node
1329}
1330
1331/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1332/// and the value fits into an unsigned 18-bit constant, and if so, return the
1333/// constant
1334SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1335 MVT::ValueType ValueType) {
1336 if (ConstantSDNode *CN = getVecImm(N)) {
1337 uint64_t Value = CN->getValue();
1338 if (Value <= 0x3ffff)
1339 return DAG.getConstant(Value, ValueType);
1340 }
1341
1342 return SDOperand();
1343}
1344
1345/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1346/// and the value fits into a signed 16-bit constant, and if so, return the
1347/// constant
1348SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1349 MVT::ValueType ValueType) {
1350 if (ConstantSDNode *CN = getVecImm(N)) {
1351 if (ValueType == MVT::i32) {
1352 int Value = (int) CN->getValue();
1353 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1354
1355 if (Value == SExtValue)
1356 return DAG.getConstant(Value, ValueType);
1357 } else if (ValueType == MVT::i16) {
1358 short Value = (short) CN->getValue();
1359 int SExtValue = ((int) Value << 16) >> 16;
1360
1361 if (Value == (short) SExtValue)
1362 return DAG.getConstant(Value, ValueType);
1363 } else if (ValueType == MVT::i64) {
1364 int64_t Value = CN->getValue();
1365 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1366
1367 if (Value == SExtValue)
1368 return DAG.getConstant(Value, ValueType);
1369 }
1370 }
1371
1372 return SDOperand();
1373}
1374
1375/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1376/// and the value fits into a signed 10-bit constant, and if so, return the
1377/// constant
1378SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1379 MVT::ValueType ValueType) {
1380 if (ConstantSDNode *CN = getVecImm(N)) {
1381 int Value = (int) CN->getValue();
1382 if ((ValueType == MVT::i32 && isS10Constant(Value))
1383 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1384 return DAG.getConstant(Value, ValueType);
1385 }
1386
1387 return SDOperand();
1388}
1389
1390/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1391/// and the value fits into a signed 8-bit constant, and if so, return the
1392/// constant.
1393///
1394/// @note: The incoming vector is v16i8 because that's the only way we can load
1395/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1396/// same value.
1397SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1398 MVT::ValueType ValueType) {
1399 if (ConstantSDNode *CN = getVecImm(N)) {
1400 int Value = (int) CN->getValue();
1401 if (ValueType == MVT::i16
1402 && Value <= 0xffff /* truncated from uint64_t */
1403 && ((short) Value >> 8) == ((short) Value & 0xff))
1404 return DAG.getConstant(Value & 0xff, ValueType);
1405 else if (ValueType == MVT::i8
1406 && (Value & 0xff) == Value)
1407 return DAG.getConstant(Value, ValueType);
1408 }
1409
1410 return SDOperand();
1411}
1412
1413/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1414/// and the value fits into a signed 16-bit constant, and if so, return the
1415/// constant
1416SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1417 MVT::ValueType ValueType) {
1418 if (ConstantSDNode *CN = getVecImm(N)) {
1419 uint64_t Value = CN->getValue();
1420 if ((ValueType == MVT::i32
1421 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1422 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1423 return DAG.getConstant(Value >> 16, ValueType);
1424 }
1425
1426 return SDOperand();
1427}
1428
1429/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1430SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1431 if (ConstantSDNode *CN = getVecImm(N)) {
1432 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1433 }
1434
1435 return SDOperand();
1436}
1437
1438/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1439SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1440 if (ConstantSDNode *CN = getVecImm(N)) {
1441 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1442 }
1443
1444 return SDOperand();
1445}
1446
1447// If this is a vector of constants or undefs, get the bits. A bit in
1448// UndefBits is set if the corresponding element of the vector is an
1449// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1450// zero. Return true if this is not an array of constants, false if it is.
1451//
1452static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1453 uint64_t UndefBits[2]) {
1454 // Start with zero'd results.
1455 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1456
1457 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1458 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1459 SDOperand OpVal = BV->getOperand(i);
1460
1461 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1462 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1463
1464 uint64_t EltBits = 0;
1465 if (OpVal.getOpcode() == ISD::UNDEF) {
1466 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1467 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1468 continue;
1469 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1470 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1471 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1472 const APFloat &apf = CN->getValueAPF();
1473 EltBits = (CN->getValueType(0) == MVT::f32
1474 ? FloatToBits(apf.convertToFloat())
1475 : DoubleToBits(apf.convertToDouble()));
1476 } else {
1477 // Nonconstant element.
1478 return true;
1479 }
1480
1481 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1482 }
1483
1484 //printf("%llx %llx %llx %llx\n",
1485 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1486 return false;
1487}
1488
1489/// If this is a splat (repetition) of a value across the whole vector, return
1490/// the smallest size that splats it. For example, "0x01010101010101..." is a
1491/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1492/// SplatSize = 1 byte.
1493static bool isConstantSplat(const uint64_t Bits128[2],
1494 const uint64_t Undef128[2],
1495 int MinSplatBits,
1496 uint64_t &SplatBits, uint64_t &SplatUndef,
1497 int &SplatSize) {
1498 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1499 // the same as the lower 64-bits, ignoring undefs.
1500 uint64_t Bits64 = Bits128[0] | Bits128[1];
1501 uint64_t Undef64 = Undef128[0] & Undef128[1];
1502 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1503 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1504 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1505 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1506
1507 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1508 if (MinSplatBits < 64) {
1509
1510 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1511 // undefs.
1512 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1513 if (MinSplatBits < 32) {
1514
1515 // If the top 16-bits are different than the lower 16-bits, ignoring
1516 // undefs, we have an i32 splat.
1517 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1518 if (MinSplatBits < 16) {
1519 // If the top 8-bits are different than the lower 8-bits, ignoring
1520 // undefs, we have an i16 splat.
1521 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1522 // Otherwise, we have an 8-bit splat.
1523 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1524 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1525 SplatSize = 1;
1526 return true;
1527 }
1528 } else {
1529 SplatBits = Bits16;
1530 SplatUndef = Undef16;
1531 SplatSize = 2;
1532 return true;
1533 }
1534 }
1535 } else {
1536 SplatBits = Bits32;
1537 SplatUndef = Undef32;
1538 SplatSize = 4;
1539 return true;
1540 }
1541 }
1542 } else {
1543 SplatBits = Bits128[0];
1544 SplatUndef = Undef128[0];
1545 SplatSize = 8;
1546 return true;
1547 }
1548 }
1549
1550 return false; // Can't be a splat if two pieces don't match.
1551}
1552
1553// If this is a case we can't handle, return null and let the default
1554// expansion code take care of it. If we CAN select this case, and if it
1555// selects to a single instruction, return Op. Otherwise, if we can codegen
1556// this case more efficiently than a constant pool load, lower it to the
1557// sequence of ops that should be used.
1558static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1559 MVT::ValueType VT = Op.getValueType();
1560 // If this is a vector of constants or undefs, get the bits. A bit in
1561 // UndefBits is set if the corresponding element of the vector is an
1562 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1563 // zero.
1564 uint64_t VectorBits[2];
1565 uint64_t UndefBits[2];
1566 uint64_t SplatBits, SplatUndef;
1567 int SplatSize;
1568 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1569 || !isConstantSplat(VectorBits, UndefBits,
1570 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1571 SplatBits, SplatUndef, SplatSize))
1572 return SDOperand(); // Not a constant vector, not a splat.
1573
1574 switch (VT) {
1575 default:
1576 case MVT::v4f32: {
1577 uint32_t Value32 = SplatBits;
1578 assert(SplatSize == 4
1579 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1580 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1581 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1582 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1583 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1584 break;
1585 }
1586 case MVT::v2f64: {
1587 uint64_t f64val = SplatBits;
1588 assert(SplatSize == 8
1589 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1590 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1592 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1593 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1594 break;
1595 }
1596 case MVT::v16i8: {
1597 // 8-bit constants have to be expanded to 16-bits
1598 unsigned short Value16 = SplatBits | (SplatBits << 8);
1599 SDOperand Ops[8];
1600 for (int i = 0; i < 8; ++i)
1601 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1602 return DAG.getNode(ISD::BIT_CONVERT, VT,
1603 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1604 }
1605 case MVT::v8i16: {
1606 unsigned short Value16;
1607 if (SplatSize == 2)
1608 Value16 = (unsigned short) (SplatBits & 0xffff);
1609 else
1610 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1611 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1612 SDOperand Ops[8];
1613 for (int i = 0; i < 8; ++i) Ops[i] = T;
1614 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1615 }
1616 case MVT::v4i32: {
1617 unsigned int Value = SplatBits;
1618 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1619 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1620 }
1621 case MVT::v2i64: {
1622 uint64_t val = SplatBits;
1623 uint32_t upper = uint32_t(val >> 32);
1624 uint32_t lower = uint32_t(val);
1625
1626 if (val != 0) {
1627 SDOperand LO32;
1628 SDOperand HI32;
1629 SmallVector<SDOperand, 16> ShufBytes;
1630 SDOperand Result;
1631 bool upper_special, lower_special;
1632
1633 // NOTE: This code creates common-case shuffle masks that can be easily
1634 // detected as common expressions. It is not attempting to create highly
1635 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1636
1637 // Detect if the upper or lower half is a special shuffle mask pattern:
1638 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1639 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1640
1641 // Create lower vector if not a special pattern
1642 if (!lower_special) {
1643 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1644 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1645 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1646 LO32C, LO32C, LO32C, LO32C));
1647 }
1648
1649 // Create upper vector if not a special pattern
1650 if (!upper_special) {
1651 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1652 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 HI32C, HI32C, HI32C, HI32C));
1655 }
1656
1657 // If either upper or lower are special, then the two input operands are
1658 // the same (basically, one of them is a "don't care")
1659 if (lower_special)
1660 LO32 = HI32;
1661 if (upper_special)
1662 HI32 = LO32;
1663 if (lower_special && upper_special) {
1664 // Unhappy situation... both upper and lower are special, so punt with
1665 // a target constant:
1666 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1667 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1668 Zero, Zero);
1669 }
1670
1671 for (int i = 0; i < 4; ++i) {
1672 for (int j = 0; j < 4; ++j) {
1673 SDOperand V;
1674 bool process_upper, process_lower;
Chris Lattner52ec3752007-12-22 22:47:03 +00001675 uint64_t val = 0;
Scott Michel266bc8f2007-12-04 22:23:35 +00001676
1677 process_upper = (upper_special && (i & 1) == 0);
1678 process_lower = (lower_special && (i & 1) == 1);
1679
1680 if (process_upper || process_lower) {
1681 if ((process_upper && upper == 0)
1682 || (process_lower && lower == 0))
1683 val = 0x80;
1684 else if ((process_upper && upper == 0xffffffff)
1685 || (process_lower && lower == 0xffffffff))
1686 val = 0xc0;
1687 else if ((process_upper && upper == 0x80000000)
1688 || (process_lower && lower == 0x80000000))
1689 val = (j == 0 ? 0xe0 : 0x80);
1690 } else
1691 val = i * 4 + j + ((i & 1) * 16);
1692
1693 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1694 }
1695 }
1696
1697 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1698 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1699 &ShufBytes[0], ShufBytes.size()));
1700 } else {
1701 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1702 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1703 return DAG.getNode(ISD::BIT_CONVERT, VT,
1704 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1705 Zero, Zero, Zero, Zero));
1706 }
1707 }
1708 }
1709
1710 return SDOperand();
1711}
1712
1713/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1714/// which the Cell can operate. The code inspects V3 to ascertain whether the
1715/// permutation vector, V3, is monotonically increasing with one "exception"
1716/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1717/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1718/// In either case, the net result is going to eventually invoke SHUFB to
1719/// permute/shuffle the bytes from V1 and V2.
1720/// \note
1721/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1722/// control word for byte/halfword/word insertion. This takes care of a single
1723/// element move from V2 into V1.
1724/// \note
1725/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1726static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1727 SDOperand V1 = Op.getOperand(0);
1728 SDOperand V2 = Op.getOperand(1);
1729 SDOperand PermMask = Op.getOperand(2);
1730
1731 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1732
1733 // If we have a single element being moved from V1 to V2, this can be handled
1734 // using the C*[DX] compute mask instructions, but the vector elements have
1735 // to be monotonically increasing with one exception element.
1736 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1737 unsigned EltsFromV2 = 0;
1738 unsigned V2Elt = 0;
1739 unsigned V2EltIdx0 = 0;
1740 unsigned CurrElt = 0;
1741 bool monotonic = true;
1742 if (EltVT == MVT::i8)
1743 V2EltIdx0 = 16;
1744 else if (EltVT == MVT::i16)
1745 V2EltIdx0 = 8;
1746 else if (EltVT == MVT::i32)
1747 V2EltIdx0 = 4;
1748 else
1749 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1750
1751 for (unsigned i = 0, e = PermMask.getNumOperands();
1752 EltsFromV2 <= 1 && monotonic && i != e;
1753 ++i) {
1754 unsigned SrcElt;
1755 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1756 SrcElt = 0;
1757 else
1758 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1759
1760 if (SrcElt >= V2EltIdx0) {
1761 ++EltsFromV2;
1762 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1763 } else if (CurrElt != SrcElt) {
1764 monotonic = false;
1765 }
1766
1767 ++CurrElt;
1768 }
1769
1770 if (EltsFromV2 == 1 && monotonic) {
1771 // Compute mask and shuffle
1772 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001773 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1774 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001775 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1776 // Initialize temporary register to 0
1777 SDOperand InitTempReg =
1778 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1779 // Copy register's contents as index in INSERT_MASK:
1780 SDOperand ShufMaskOp =
1781 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1782 DAG.getTargetConstant(V2Elt, MVT::i32),
1783 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1784 // Use shuffle mask in SHUFB synthetic instruction:
1785 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1786 } else {
1787 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1788 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1789
1790 SmallVector<SDOperand, 16> ResultMask;
1791 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1792 unsigned SrcElt;
1793 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1794 SrcElt = 0;
1795 else
1796 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1797
1798 for (unsigned j = 0; j != BytesPerElement; ++j) {
1799 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1800 MVT::i8));
1801 }
1802 }
1803
1804 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1805 &ResultMask[0], ResultMask.size());
1806 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1807 }
1808}
1809
1810static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1811 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1812
1813 if (Op0.Val->getOpcode() == ISD::Constant) {
1814 // For a constant, build the appropriate constant vector, which will
1815 // eventually simplify to a vector register load.
1816
1817 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1818 SmallVector<SDOperand, 16> ConstVecValues;
1819 MVT::ValueType VT;
1820 size_t n_copies;
1821
1822 // Create a constant vector:
1823 switch (Op.getValueType()) {
1824 default: assert(0 && "Unexpected constant value type in "
1825 "LowerSCALAR_TO_VECTOR");
1826 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1827 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1828 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1829 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1830 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1831 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1832 }
1833
1834 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1835 for (size_t j = 0; j < n_copies; ++j)
1836 ConstVecValues.push_back(CValue);
1837
1838 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1839 &ConstVecValues[0], ConstVecValues.size());
1840 } else {
1841 // Otherwise, copy the value from one register to another:
1842 switch (Op0.getValueType()) {
1843 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1844 case MVT::i8:
1845 case MVT::i16:
1846 case MVT::i32:
1847 case MVT::i64:
1848 case MVT::f32:
1849 case MVT::f64:
1850 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1851 }
1852 }
1853
1854 return SDOperand();
1855}
1856
1857static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1858 switch (Op.getValueType()) {
1859 case MVT::v4i32: {
1860 SDOperand rA = Op.getOperand(0);
1861 SDOperand rB = Op.getOperand(1);
1862 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1863 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1864 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1865 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1866
1867 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1868 break;
1869 }
1870
1871 // Multiply two v8i16 vectors (pipeline friendly version):
1872 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1873 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1874 // c) Use SELB to select upper and lower halves from the intermediate results
1875 //
1876 // NOTE: We really want to move the FSMBI to earlier to actually get the
1877 // dual-issue. This code does manage to do this, even if it's a little on
1878 // the wacky side
1879 case MVT::v8i16: {
1880 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001881 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001882 SDOperand Chain = Op.getOperand(0);
1883 SDOperand rA = Op.getOperand(0);
1884 SDOperand rB = Op.getOperand(1);
Chris Lattner84bc5422007-12-31 04:13:23 +00001885 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1886 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001887
1888 SDOperand FSMBOp =
1889 DAG.getCopyToReg(Chain, FSMBIreg,
1890 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1891 DAG.getConstant(0xcccc, MVT::i32)));
1892
1893 SDOperand HHProd =
1894 DAG.getCopyToReg(FSMBOp, HiProdReg,
1895 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1896
1897 SDOperand HHProd_v4i32 =
1898 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1899 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1900
1901 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1902 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1903 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1904 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1905 HHProd_v4i32,
1906 DAG.getConstant(16, MVT::i16))),
1907 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1908 }
1909
1910 // This M00sE is N@stI! (apologies to Monty Python)
1911 //
1912 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1913 // is to break it all apart, sign extend, and reassemble the various
1914 // intermediate products.
1915 case MVT::v16i8: {
1916 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001917 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001918 SDOperand Chain = Op.getOperand(0);
1919 SDOperand rA = Op.getOperand(0);
1920 SDOperand rB = Op.getOperand(1);
1921 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1922 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1923
Chris Lattner84bc5422007-12-31 04:13:23 +00001924 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1925 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1926 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001927
1928 SDOperand LLProd =
1929 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1930 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1931 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1932
1933 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1934
1935 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1936
1937 SDOperand LHProd =
1938 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1939 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1940
1941 SDOperand FSMBdef_2222 =
1942 DAG.getCopyToReg(Chain, FSMBreg_2222,
1943 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1944 DAG.getConstant(0x2222, MVT::i32)));
1945
1946 SDOperand FSMBuse_2222 =
1947 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1948
1949 SDOperand LoProd_1 =
1950 DAG.getCopyToReg(Chain, LoProd_reg,
1951 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1952 FSMBuse_2222));
1953
1954 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1955
1956 SDOperand LoProd =
1957 DAG.getNode(ISD::AND, MVT::v4i32,
1958 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1959 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1960 LoProdMask, LoProdMask,
1961 LoProdMask, LoProdMask));
1962
1963 SDOperand rAH =
1964 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1965 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1966
1967 SDOperand rBH =
1968 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1969 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1970
1971 SDOperand HLProd =
1972 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1975
1976 SDOperand HHProd_1 =
1977 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1981 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1982
1983 SDOperand HHProd =
1984 DAG.getCopyToReg(Chain, HiProd_reg,
1985 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1986 HLProd,
1987 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1988 FSMBuse_2222));
1989
1990 SDOperand HiProd =
1991 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1992 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
1993
1994 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1995 DAG.getNode(ISD::OR, MVT::v4i32,
1996 LoProd, HiProd));
1997 }
1998
1999 default:
2000 cerr << "CellSPU: Unknown vector multiplication, got "
2001 << MVT::getValueTypeString(Op.getValueType())
2002 << "\n";
2003 abort();
2004 /*NOTREACHED*/
2005 }
2006
2007 return SDOperand();
2008}
2009
2010static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2011 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002012 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002013
2014 SDOperand A = Op.getOperand(0);
2015 SDOperand B = Op.getOperand(1);
2016 unsigned VT = Op.getValueType();
2017
2018 unsigned VRegBR, VRegC;
2019
2020 if (VT == MVT::f32) {
Chris Lattner84bc5422007-12-31 04:13:23 +00002021 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2022 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002023 } else {
Chris Lattner84bc5422007-12-31 04:13:23 +00002024 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2025 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002026 }
2027 // TODO: make sure we're feeding FPInterp the right arguments
2028 // Right now: fi B, frest(B)
2029
2030 // Computes BRcpl =
2031 // (Floating Interpolate (FP Reciprocal Estimate B))
2032 SDOperand BRcpl =
2033 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2034 DAG.getNode(SPUISD::FPInterp, VT, B,
2035 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2036
2037 // Computes A * BRcpl and stores in a temporary register
2038 SDOperand AxBRcpl =
2039 DAG.getCopyToReg(BRcpl, VRegC,
2040 DAG.getNode(ISD::FMUL, VT, A,
2041 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2042 // What's the Chain variable do? It's magic!
2043 // TODO: set Chain = Op(0).getEntryNode()
2044
2045 return DAG.getNode(ISD::FADD, VT,
2046 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2047 DAG.getNode(ISD::FMUL, VT,
2048 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2049 DAG.getNode(ISD::FSUB, VT, A,
2050 DAG.getNode(ISD::FMUL, VT, B,
2051 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2052}
2053
Scott Michel266bc8f2007-12-04 22:23:35 +00002054static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2055 unsigned VT = Op.getValueType();
2056 SDOperand N = Op.getOperand(0);
2057 SDOperand Elt = Op.getOperand(1);
2058 SDOperand ShufMask[16];
2059 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2060
2061 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2062
2063 int EltNo = (int) C->getValue();
2064
2065 // sanity checks:
2066 if (VT == MVT::i8 && EltNo >= 16)
2067 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2068 else if (VT == MVT::i16 && EltNo >= 8)
2069 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2070 else if (VT == MVT::i32 && EltNo >= 4)
2071 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2072 else if (VT == MVT::i64 && EltNo >= 2)
2073 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2074
2075 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2076 // i32 and i64: Element 0 is the preferred slot
2077 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2078 }
2079
2080 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002081 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002082 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2083
2084 switch (VT) {
2085 case MVT::i8: {
2086 prefslot_begin = prefslot_end = 3;
2087 break;
2088 }
2089 case MVT::i16: {
2090 prefslot_begin = 2; prefslot_end = 3;
2091 break;
2092 }
2093 case MVT::i32: {
2094 prefslot_begin = 0; prefslot_end = 3;
2095 break;
2096 }
2097 case MVT::i64: {
2098 prefslot_begin = 0; prefslot_end = 7;
2099 break;
2100 }
2101 }
2102
Scott Michel0e5665b2007-12-19 21:17:42 +00002103 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2104 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2105
Scott Michel266bc8f2007-12-04 22:23:35 +00002106 for (int i = 0; i < 16; ++i) {
2107 // zero fill uppper part of preferred slot, don't care about the
2108 // other slots:
2109 unsigned int mask_val;
2110
2111 if (i <= prefslot_end) {
2112 mask_val =
2113 ((i < prefslot_begin)
2114 ? 0x80
2115 : elt_byte + (i - prefslot_begin));
2116
Scott Michel0e5665b2007-12-19 21:17:42 +00002117 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002118 } else
2119 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2120 }
2121
2122 SDOperand ShufMaskVec =
2123 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2124 &ShufMask[0],
2125 sizeof(ShufMask) / sizeof(ShufMask[0]));
2126
2127 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2128 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2129 N, N, ShufMaskVec));
2130
2131}
2132
2133static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2134 SDOperand VecOp = Op.getOperand(0);
2135 SDOperand ValOp = Op.getOperand(1);
2136 SDOperand IdxOp = Op.getOperand(2);
2137 MVT::ValueType VT = Op.getValueType();
2138
2139 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2140 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2141
2142 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2143 // Use $2 because it's always 16-byte aligned and it's available:
2144 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2145
2146 SDOperand result =
2147 DAG.getNode(SPUISD::SHUFB, VT,
2148 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2149 VecOp,
2150 DAG.getNode(SPUISD::INSERT_MASK, VT,
2151 DAG.getNode(ISD::ADD, PtrVT,
2152 PtrBase,
2153 DAG.getConstant(CN->getValue(),
2154 PtrVT))));
2155
2156 return result;
2157}
2158
2159static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2160 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2161
2162 assert(Op.getValueType() == MVT::i8);
2163 switch (Opc) {
2164 default:
2165 assert(0 && "Unhandled i8 math operator");
2166 /*NOTREACHED*/
2167 break;
2168 case ISD::SUB: {
2169 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2170 // the result:
2171 SDOperand N1 = Op.getOperand(1);
2172 N0 = (N0.getOpcode() != ISD::Constant
2173 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2174 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2175 N1 = (N1.getOpcode() != ISD::Constant
2176 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2177 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2178 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2179 DAG.getNode(Opc, MVT::i16, N0, N1));
2180 }
2181 case ISD::ROTR:
2182 case ISD::ROTL: {
2183 SDOperand N1 = Op.getOperand(1);
2184 unsigned N1Opc;
2185 N0 = (N0.getOpcode() != ISD::Constant
2186 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2187 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2188 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2189 N1 = (N1.getOpcode() != ISD::Constant
2190 ? DAG.getNode(N1Opc, MVT::i16, N1)
2191 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2192 SDOperand ExpandArg =
2193 DAG.getNode(ISD::OR, MVT::i16, N0,
2194 DAG.getNode(ISD::SHL, MVT::i16,
2195 N0, DAG.getConstant(8, MVT::i16)));
2196 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2197 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2198 }
2199 case ISD::SRL:
2200 case ISD::SHL: {
2201 SDOperand N1 = Op.getOperand(1);
2202 unsigned N1Opc;
2203 N0 = (N0.getOpcode() != ISD::Constant
2204 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2205 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2206 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2207 N1 = (N1.getOpcode() != ISD::Constant
2208 ? DAG.getNode(N1Opc, MVT::i16, N1)
2209 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2210 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2211 DAG.getNode(Opc, MVT::i16, N0, N1));
2212 }
2213 case ISD::SRA: {
2214 SDOperand N1 = Op.getOperand(1);
2215 unsigned N1Opc;
2216 N0 = (N0.getOpcode() != ISD::Constant
2217 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2218 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2219 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2220 N1 = (N1.getOpcode() != ISD::Constant
2221 ? DAG.getNode(N1Opc, MVT::i16, N1)
2222 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2223 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224 DAG.getNode(Opc, MVT::i16, N0, N1));
2225 }
2226 case ISD::MUL: {
2227 SDOperand N1 = Op.getOperand(1);
2228 unsigned N1Opc;
2229 N0 = (N0.getOpcode() != ISD::Constant
2230 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2231 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2232 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2233 N1 = (N1.getOpcode() != ISD::Constant
2234 ? DAG.getNode(N1Opc, MVT::i16, N1)
2235 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2236 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237 DAG.getNode(Opc, MVT::i16, N0, N1));
2238 break;
2239 }
2240 }
2241
2242 return SDOperand();
2243}
2244
2245//! Lower byte immediate operations for v16i8 vectors:
2246static SDOperand
2247LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2248 SDOperand ConstVec;
2249 SDOperand Arg;
2250 MVT::ValueType VT = Op.getValueType();
2251
2252 ConstVec = Op.getOperand(0);
2253 Arg = Op.getOperand(1);
2254 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2255 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2256 ConstVec = ConstVec.getOperand(0);
2257 } else {
2258 ConstVec = Op.getOperand(1);
2259 Arg = Op.getOperand(0);
2260 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2261 ConstVec = ConstVec.getOperand(0);
2262 }
2263 }
2264 }
2265
2266 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2267 uint64_t VectorBits[2];
2268 uint64_t UndefBits[2];
2269 uint64_t SplatBits, SplatUndef;
2270 int SplatSize;
2271
2272 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2273 && isConstantSplat(VectorBits, UndefBits,
2274 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2275 SplatBits, SplatUndef, SplatSize)) {
2276 SDOperand tcVec[16];
2277 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2278 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2279
2280 // Turn the BUILD_VECTOR into a set of target constants:
2281 for (size_t i = 0; i < tcVecSize; ++i)
2282 tcVec[i] = tc;
2283
2284 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2285 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2286 }
2287 }
2288
2289 return SDOperand();
2290}
2291
2292//! Lower i32 multiplication
2293static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2294 unsigned Opc) {
2295 switch (VT) {
2296 default:
2297 cerr << "CellSPU: Unknown LowerMUL value type, got "
2298 << MVT::getValueTypeString(Op.getValueType())
2299 << "\n";
2300 abort();
2301 /*NOTREACHED*/
2302
2303 case MVT::i32: {
2304 SDOperand rA = Op.getOperand(0);
2305 SDOperand rB = Op.getOperand(1);
2306
2307 return DAG.getNode(ISD::ADD, MVT::i32,
2308 DAG.getNode(ISD::ADD, MVT::i32,
2309 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2310 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2311 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2312 }
2313 }
2314
2315 return SDOperand();
2316}
2317
2318//! Custom lowering for CTPOP (count population)
2319/*!
2320 Custom lowering code that counts the number ones in the input
2321 operand. SPU has such an instruction, but it counts the number of
2322 ones per byte, which then have to be accumulated.
2323*/
2324static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2325 unsigned VT = Op.getValueType();
2326 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2327
2328 switch (VT) {
2329 case MVT::i8: {
2330 SDOperand N = Op.getOperand(0);
2331 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2332
2333 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2334 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2335
2336 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2337 }
2338
2339 case MVT::i16: {
2340 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002341 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002342
Chris Lattner84bc5422007-12-31 04:13:23 +00002343 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002344
2345 SDOperand N = Op.getOperand(0);
2346 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2347 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2348 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2349
2350 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2351 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2352
2353 // CNTB_result becomes the chain to which all of the virtual registers
2354 // CNTB_reg, SUM1_reg become associated:
2355 SDOperand CNTB_result =
2356 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2357
2358 SDOperand CNTB_rescopy =
2359 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2360
2361 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2362
2363 return DAG.getNode(ISD::AND, MVT::i16,
2364 DAG.getNode(ISD::ADD, MVT::i16,
2365 DAG.getNode(ISD::SRL, MVT::i16,
2366 Tmp1, Shift1),
2367 Tmp1),
2368 Mask0);
2369 }
2370
2371 case MVT::i32: {
2372 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002373 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002374
Chris Lattner84bc5422007-12-31 04:13:23 +00002375 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2376 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002377
2378 SDOperand N = Op.getOperand(0);
2379 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2380 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2381 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2382 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2383
2384 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2385 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2386
2387 // CNTB_result becomes the chain to which all of the virtual registers
2388 // CNTB_reg, SUM1_reg become associated:
2389 SDOperand CNTB_result =
2390 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2391
2392 SDOperand CNTB_rescopy =
2393 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2394
2395 SDOperand Comp1 =
2396 DAG.getNode(ISD::SRL, MVT::i32,
2397 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2398
2399 SDOperand Sum1 =
2400 DAG.getNode(ISD::ADD, MVT::i32,
2401 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2402
2403 SDOperand Sum1_rescopy =
2404 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2405
2406 SDOperand Comp2 =
2407 DAG.getNode(ISD::SRL, MVT::i32,
2408 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2409 Shift2);
2410 SDOperand Sum2 =
2411 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2412 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2413
2414 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2415 }
2416
2417 case MVT::i64:
2418 break;
2419 }
2420
2421 return SDOperand();
2422}
2423
2424/// LowerOperation - Provide custom lowering hooks for some operations.
2425///
2426SDOperand
2427SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2428{
2429 switch (Op.getOpcode()) {
2430 default: {
2431 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2432 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2433 cerr << "*Op.Val:\n";
2434 Op.Val->dump();
2435 abort();
2436 }
2437 case ISD::LOAD:
2438 case ISD::SEXTLOAD:
2439 case ISD::ZEXTLOAD:
2440 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2441 case ISD::STORE:
2442 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2443 case ISD::ConstantPool:
2444 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2445 case ISD::GlobalAddress:
2446 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2447 case ISD::JumpTable:
2448 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2449 case ISD::Constant:
2450 return LowerConstant(Op, DAG);
2451 case ISD::ConstantFP:
2452 return LowerConstantFP(Op, DAG);
2453 case ISD::FORMAL_ARGUMENTS:
2454 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2455 case ISD::CALL:
Scott Michel9de5d0d2008-01-11 02:53:15 +00002456 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
Scott Michel266bc8f2007-12-04 22:23:35 +00002457 case ISD::RET:
2458 return LowerRET(Op, DAG, getTargetMachine());
2459
2460 // i8 math ops:
2461 case ISD::SUB:
2462 case ISD::ROTR:
2463 case ISD::ROTL:
2464 case ISD::SRL:
2465 case ISD::SHL:
2466 case ISD::SRA:
2467 return LowerI8Math(Op, DAG, Op.getOpcode());
2468
2469 // Vector-related lowering.
2470 case ISD::BUILD_VECTOR:
2471 return LowerBUILD_VECTOR(Op, DAG);
2472 case ISD::SCALAR_TO_VECTOR:
2473 return LowerSCALAR_TO_VECTOR(Op, DAG);
2474 case ISD::VECTOR_SHUFFLE:
2475 return LowerVECTOR_SHUFFLE(Op, DAG);
2476 case ISD::EXTRACT_VECTOR_ELT:
2477 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2478 case ISD::INSERT_VECTOR_ELT:
2479 return LowerINSERT_VECTOR_ELT(Op, DAG);
2480
2481 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2482 case ISD::AND:
2483 case ISD::OR:
2484 case ISD::XOR:
2485 return LowerByteImmed(Op, DAG);
2486
2487 // Vector and i8 multiply:
2488 case ISD::MUL:
2489 if (MVT::isVector(Op.getValueType()))
2490 return LowerVectorMUL(Op, DAG);
2491 else if (Op.getValueType() == MVT::i8)
2492 return LowerI8Math(Op, DAG, Op.getOpcode());
2493 else
2494 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2495
2496 case ISD::FDIV:
2497 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2498 return LowerFDIVf32(Op, DAG);
2499// else if (Op.getValueType() == MVT::f64)
2500// return LowerFDIVf64(Op, DAG);
2501 else
2502 assert(0 && "Calling FDIV on unsupported MVT");
2503
2504 case ISD::CTPOP:
2505 return LowerCTPOP(Op, DAG);
2506 }
2507
2508 return SDOperand();
2509}
2510
2511//===----------------------------------------------------------------------===//
2512// Other Lowering Code
2513//===----------------------------------------------------------------------===//
2514
2515MachineBasicBlock *
2516SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2517 MachineBasicBlock *BB)
2518{
2519 return BB;
2520}
2521
2522//===----------------------------------------------------------------------===//
2523// Target Optimization Hooks
2524//===----------------------------------------------------------------------===//
2525
2526SDOperand
2527SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2528{
2529#if 0
2530 TargetMachine &TM = getTargetMachine();
2531 SelectionDAG &DAG = DCI.DAG;
2532#endif
2533 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2534
2535 switch (N->getOpcode()) {
2536 default: break;
2537
2538 // Look for obvious optimizations for shift left:
2539 // a) Replace 0 << V with 0
2540 // b) Replace V << 0 with V
2541 //
2542 // N.B: llvm will generate an undef node if the shift amount is greater than
2543 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2544 case SPU::SHLIr32:
2545 case SPU::SHLHIr16:
2546 case SPU::SHLQBIIvec:
2547 case SPU::ROTHIr16:
2548 case SPU::ROTHIr16_i32:
2549 case SPU::ROTIr32:
2550 case SPU::ROTIr32_i16:
2551 case SPU::ROTQBYIvec:
2552 case SPU::ROTQBYBIvec:
2553 case SPU::ROTQBIIvec:
2554 case SPU::ROTHMIr16:
2555 case SPU::ROTMIr32:
2556 case SPU::ROTQMBYIvec: {
2557 if (N0.getOpcode() == ISD::Constant) {
2558 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2559 if (C->getValue() == 0) // 0 << V -> 0.
2560 return N0;
2561 }
2562 }
2563 SDOperand N1 = N->getOperand(1);
2564 if (N1.getOpcode() == ISD::Constant) {
2565 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2566 if (C->getValue() == 0) // V << 0 -> V
2567 return N1;
2568 }
2569 }
2570 break;
2571 }
2572 }
2573
2574 return SDOperand();
2575}
2576
2577//===----------------------------------------------------------------------===//
2578// Inline Assembly Support
2579//===----------------------------------------------------------------------===//
2580
2581/// getConstraintType - Given a constraint letter, return the type of
2582/// constraint it is for this target.
2583SPUTargetLowering::ConstraintType
2584SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2585 if (ConstraintLetter.size() == 1) {
2586 switch (ConstraintLetter[0]) {
2587 default: break;
2588 case 'b':
2589 case 'r':
2590 case 'f':
2591 case 'v':
2592 case 'y':
2593 return C_RegisterClass;
2594 }
2595 }
2596 return TargetLowering::getConstraintType(ConstraintLetter);
2597}
2598
2599std::pair<unsigned, const TargetRegisterClass*>
2600SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2601 MVT::ValueType VT) const
2602{
2603 if (Constraint.size() == 1) {
2604 // GCC RS6000 Constraint Letters
2605 switch (Constraint[0]) {
2606 case 'b': // R1-R31
2607 case 'r': // R0-R31
2608 if (VT == MVT::i64)
2609 return std::make_pair(0U, SPU::R64CRegisterClass);
2610 return std::make_pair(0U, SPU::R32CRegisterClass);
2611 case 'f':
2612 if (VT == MVT::f32)
2613 return std::make_pair(0U, SPU::R32FPRegisterClass);
2614 else if (VT == MVT::f64)
2615 return std::make_pair(0U, SPU::R64FPRegisterClass);
2616 break;
2617 case 'v':
2618 return std::make_pair(0U, SPU::GPRCRegisterClass);
2619 }
2620 }
2621
2622 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2623}
2624
2625void
2626SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2627 uint64_t Mask,
2628 uint64_t &KnownZero,
2629 uint64_t &KnownOne,
2630 const SelectionDAG &DAG,
2631 unsigned Depth ) const {
2632 KnownZero = 0;
2633 KnownOne = 0;
2634}
2635
2636// LowerAsmOperandForConstraint
2637void
2638SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2639 char ConstraintLetter,
2640 std::vector<SDOperand> &Ops,
2641 SelectionDAG &DAG) {
2642 // Default, for the time being, to the base class handler
2643 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2644}
2645
2646/// isLegalAddressImmediate - Return true if the integer value can be used
2647/// as the offset of the target addressing mode.
2648bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2649 // SPU's addresses are 256K:
2650 return (V > -(1 << 18) && V < (1 << 18) - 1);
2651}
2652
2653bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2654 return false;
2655}