blob: c7d7f978216ff00128f135d3fe42012c92a094d6 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
Chris Lattner84bc5422007-12-31 04:13:23 +000023#include "llvm/CodeGen/MachineRegisterInfo.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000024#include "llvm/CodeGen/SelectionDAG.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000025#include "llvm/Constants.h"
26#include "llvm/Function.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetOptions.h"
31
32#include <map>
33
34using namespace llvm;
35
36// Used in getTargetNodeName() below
37namespace {
38 std::map<unsigned, const char *> node_names;
39
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
44 };
45
46 const valtype_map_s valtype_map[] = {
47 { MVT::i1, 3 },
48 { MVT::i8, 3 },
49 { MVT::i16, 2 },
50 { MVT::i32, 0 },
51 { MVT::f32, 0 },
52 { MVT::i64, 0 },
53 { MVT::f64, 0 },
54 { MVT::i128, 0 }
55 };
56
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
58
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
61
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
65 break;
66 }
67 }
68
69#ifndef NDEBUG
70 if (retval == 0) {
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
73 << "\n";
74 abort();
75 }
76#endif
77
78 return retval;
79 }
80
81 //! Predicate that returns true if operand is a memory target
82 /*!
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
Scott Michel9de5d0d2008-01-11 02:53:15 +000085 address, external symbol, constant pool) or an A-form
Scott Michel266bc8f2007-12-04 22:23:35 +000086 address.
87 */
88 bool isMemoryOperand(const SDOperand &Op)
89 {
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
Scott Michel266bc8f2007-12-04 22:23:35 +000093 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
Scott Michel266bc8f2007-12-04 22:23:35 +000098 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
Scott Michel9de5d0d2008-01-11 02:53:15 +0000101 || Opc == SPUISD::AFormAddr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000102 }
Scott Michel58c58182008-01-17 20:38:41 +0000103
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
106 {
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
110 }
Scott Michel266bc8f2007-12-04 22:23:35 +0000111}
112
113SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
115 SPUTM(TM)
116{
117 // Fold away setcc operations if possible.
118 setPow2DivIsCheap();
119
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
123
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
135
136 // SPU has no sign or zero extended loads for i1, i8, i16:
Scott Michel58c58182008-01-17 20:38:41 +0000137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
Scott Michel266bc8f2007-12-04 22:23:35 +0000138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
Chris Lattnerddf89562008-01-17 19:59:44 +0000140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
Scott Michel266bc8f2007-12-04 22:23:35 +0000145
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
Chris Lattnerddf89562008-01-17 19:59:44 +0000149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
154
Scott Michel266bc8f2007-12-04 22:23:35 +0000155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
158
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
163
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
166 ++sctype) {
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
169 }
170
Scott Michel58c58182008-01-17 20:38:41 +0000171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Scott Michel266bc8f2007-12-04 22:23:35 +0000174
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
184
185 // PowerPC has no SREM/UREM instructions
186 setOperationAction(ISD::SREM, MVT::i32, Expand);
187 setOperationAction(ISD::UREM, MVT::i32, Expand);
188 setOperationAction(ISD::SREM, MVT::i64, Expand);
189 setOperationAction(ISD::UREM, MVT::i64, Expand);
190
191 // We don't support sin/cos/sqrt/fmod
192 setOperationAction(ISD::FSIN , MVT::f64, Expand);
193 setOperationAction(ISD::FCOS , MVT::f64, Expand);
194 setOperationAction(ISD::FREM , MVT::f64, Expand);
195 setOperationAction(ISD::FSIN , MVT::f32, Expand);
196 setOperationAction(ISD::FCOS , MVT::f32, Expand);
197 setOperationAction(ISD::FREM , MVT::f32, Expand);
198
199 // If we're enabling GP optimizations, use hardware square root
200 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
201 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
202
203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
205
206 // SPU can do rotate right and left, so legalize it... but customize for i8
207 // because instructions don't exist.
208 setOperationAction(ISD::ROTR, MVT::i32, Legal);
209 setOperationAction(ISD::ROTR, MVT::i16, Legal);
210 setOperationAction(ISD::ROTR, MVT::i8, Custom);
211 setOperationAction(ISD::ROTL, MVT::i32, Legal);
212 setOperationAction(ISD::ROTL, MVT::i16, Legal);
213 setOperationAction(ISD::ROTL, MVT::i8, Custom);
214 // SPU has no native version of shift left/right for i8
215 setOperationAction(ISD::SHL, MVT::i8, Custom);
216 setOperationAction(ISD::SRL, MVT::i8, Custom);
217 setOperationAction(ISD::SRA, MVT::i8, Custom);
218
219 // Custom lower i32 multiplications
220 setOperationAction(ISD::MUL, MVT::i32, Custom);
221
222 // Need to custom handle (some) common i8 math ops
223 setOperationAction(ISD::SUB, MVT::i8, Custom);
224 setOperationAction(ISD::MUL, MVT::i8, Custom);
225
226 // SPU does not have BSWAP. It does have i32 support CTLZ.
227 // CTPOP has to be custom lowered.
228 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
229 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
230
231 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
232 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
233 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
235
236 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
237 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
238
239 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
240
241 // SPU does not have select or setcc
242 setOperationAction(ISD::SELECT, MVT::i1, Expand);
243 setOperationAction(ISD::SELECT, MVT::i8, Expand);
244 setOperationAction(ISD::SELECT, MVT::i16, Expand);
245 setOperationAction(ISD::SELECT, MVT::i32, Expand);
246 setOperationAction(ISD::SELECT, MVT::i64, Expand);
247 setOperationAction(ISD::SELECT, MVT::f32, Expand);
248 setOperationAction(ISD::SELECT, MVT::f64, Expand);
249
250 setOperationAction(ISD::SETCC, MVT::i1, Expand);
251 setOperationAction(ISD::SETCC, MVT::i8, Expand);
252 setOperationAction(ISD::SETCC, MVT::i16, Expand);
253 setOperationAction(ISD::SETCC, MVT::i32, Expand);
254 setOperationAction(ISD::SETCC, MVT::i64, Expand);
255 setOperationAction(ISD::SETCC, MVT::f32, Expand);
256 setOperationAction(ISD::SETCC, MVT::f64, Expand);
257
258 // SPU has a legal FP -> signed INT instruction
259 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
262 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
263
264 // FDIV on SPU requires custom lowering
265 setOperationAction(ISD::FDIV, MVT::f32, Custom);
266 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
267
268 // SPU has [U|S]INT_TO_FP
269 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
271 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
273 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
274 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
275 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
276 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
277
Scott Michel86c041f2007-12-20 00:44:13 +0000278 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
279 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
280 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
281 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel266bc8f2007-12-04 22:23:35 +0000282
283 // We cannot sextinreg(i1). Expand to shifts.
284 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
285
286 // Support label based line numbers.
287 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
288 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
289
290 // We want to legalize GlobalAddress and ConstantPool nodes into the
291 // appropriate instructions to materialize the address.
Scott Michel053c1da2008-01-29 02:16:57 +0000292 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
293 ++sctype) {
294 setOperationAction(ISD::GlobalAddress, sctype, Custom);
295 setOperationAction(ISD::ConstantPool, sctype, Custom);
296 setOperationAction(ISD::JumpTable, sctype, Custom);
297 }
Scott Michel266bc8f2007-12-04 22:23:35 +0000298
299 // RET must be custom lowered, to meet ABI requirements
300 setOperationAction(ISD::RET, MVT::Other, Custom);
301
302 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
303 setOperationAction(ISD::VASTART , MVT::Other, Custom);
304
305 // Use the default implementation.
306 setOperationAction(ISD::VAARG , MVT::Other, Expand);
307 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
308 setOperationAction(ISD::VAEND , MVT::Other, Expand);
309 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
310 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
313
314 // Cell SPU has instructions for converting between i64 and fp.
315 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
316 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
317
318 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
319 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
320
321 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
322 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
323
324 // First set operation action for all vector types to expand. Then we
325 // will selectively turn on ones that can be effectively codegen'd.
326 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
327 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
332
333 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
334 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
335 // add/sub are legal for all supported vector VT's.
336 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
337 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
338 // mul has to be custom lowered.
339 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
340
341 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
342 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
343 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
344 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
345 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
346 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
347
348 // These operations need to be expanded:
349 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
350 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
351 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
352 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
353 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
354
355 // Custom lower build_vector, constant pool spills, insert and
356 // extract vector elements:
357 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
358 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
360 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
361 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
362 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
363 }
364
365 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
366 setOperationAction(ISD::AND, MVT::v16i8, Custom);
367 setOperationAction(ISD::OR, MVT::v16i8, Custom);
368 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
369 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000370
Scott Michel266bc8f2007-12-04 22:23:35 +0000371 setSetCCResultType(MVT::i32);
372 setShiftAmountType(MVT::i32);
373 setSetCCResultContents(ZeroOrOneSetCCResult);
374
375 setStackPointerRegisterToSaveRestore(SPU::R1);
376
377 // We have target-specific dag combine patterns for the following nodes:
Scott Michel053c1da2008-01-29 02:16:57 +0000378 setTargetDAGCombine(ISD::ADD);
Scott Michel266bc8f2007-12-04 22:23:35 +0000379
380 computeRegisterProperties();
381}
382
383const char *
384SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
385{
386 if (node_names.empty()) {
387 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
388 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
389 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
390 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
Scott Michel9de5d0d2008-01-11 02:53:15 +0000391 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
Scott Michel053c1da2008-01-29 02:16:57 +0000392 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
Scott Michel266bc8f2007-12-04 22:23:35 +0000393 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
394 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
395 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
396 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
397 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
398 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
399 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
400 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
401 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
402 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
403 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
404 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
405 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
406 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
407 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
408 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
409 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
410 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
411 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
412 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
413 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
414 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
415 "SPUISD::ROTBYTES_RIGHT_Z";
416 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
417 "SPUISD::ROTBYTES_RIGHT_S";
418 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
419 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
420 "SPUISD::ROTBYTES_LEFT_CHAINED";
421 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
422 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
423 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
424 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
425 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
426 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
427 }
428
429 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
430
431 return ((i != node_names.end()) ? i->second : 0);
432}
433
434//===----------------------------------------------------------------------===//
435// Calling convention code:
436//===----------------------------------------------------------------------===//
437
438#include "SPUGenCallingConv.inc"
439
440//===----------------------------------------------------------------------===//
441// LowerOperation implementation
442//===----------------------------------------------------------------------===//
443
Scott Michel9de5d0d2008-01-11 02:53:15 +0000444/// Aligned load common code for CellSPU
445/*!
446 \param[in] Op The SelectionDAG load or store operand
447 \param[in] DAG The selection DAG
448 \param[in] ST CellSPU subtarget information structure
449 \param[in,out] alignment Caller initializes this to the load or store node's
450 value from getAlignment(), may be updated while generating the aligned load
451 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
452 offset (divisible by 16, modulo 16 == 0)
453 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
454 offset of the preferred slot (modulo 16 != 0)
455 \param[in,out] VT Caller initializes this value type to the the load or store
456 node's loaded or stored value type; may be updated if an i1-extended load or
457 store.
458 \param[out] was16aligned true if the base pointer had 16-byte alignment,
459 otherwise false. Can help to determine if the chunk needs to be rotated.
460
461 Both load and store lowering load a block of data aligned on a 16-byte
462 boundary. This is the common aligned load code shared between both.
463 */
464static SDOperand
465AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
466 LSBaseSDNode *LSN,
467 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
Chris Lattner3f732802008-01-12 22:54:07 +0000468 MVT::ValueType &VT, bool &was16aligned)
Scott Michel9de5d0d2008-01-11 02:53:15 +0000469{
470 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
471 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
472 SDOperand basePtr = LSN->getBasePtr();
473 SDOperand chain = LSN->getChain();
474
475 if (basePtr.getOpcode() == ISD::ADD) {
476 SDOperand Op1 = basePtr.Val->getOperand(1);
477
478 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
Scott Michel58c58182008-01-17 20:38:41 +0000479 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000480
481 alignOffs = (int) CN->getValue();
482 prefSlotOffs = (int) (alignOffs & 0xf);
483
484 // Adjust the rotation amount to ensure that the final result ends up in
485 // the preferred slot:
486 prefSlotOffs -= vtm->prefslot_byte;
487 basePtr = basePtr.getOperand(0);
488
Scott Michel58c58182008-01-17 20:38:41 +0000489 // Loading from memory, can we adjust alignment?
490 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
491 SDOperand APtr = basePtr.getOperand(0);
492 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
493 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
494 alignment = GSDN->getGlobal()->getAlignment();
495 }
Scott Michel9de5d0d2008-01-11 02:53:15 +0000496 }
497 } else {
498 alignOffs = 0;
499 prefSlotOffs = -vtm->prefslot_byte;
500 }
501 } else {
502 alignOffs = 0;
503 prefSlotOffs = -vtm->prefslot_byte;
504 }
505
506 if (alignment == 16) {
507 // Realign the base pointer as a D-Form address:
508 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
Scott Michel58c58182008-01-17 20:38:41 +0000509 basePtr = DAG.getNode(ISD::ADD, PtrVT,
510 basePtr,
511 DAG.getConstant((alignOffs & ~0xf), PtrVT));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000512 }
513
514 // Emit the vector load:
515 was16aligned = true;
516 return DAG.getLoad(MVT::v16i8, chain, basePtr,
517 LSN->getSrcValue(), LSN->getSrcValueOffset(),
518 LSN->isVolatile(), 16);
519 }
520
521 // Unaligned load or we're using the "large memory" model, which means that
522 // we have to be very pessimistic:
Scott Michel58c58182008-01-17 20:38:41 +0000523 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
Scott Michel053c1da2008-01-29 02:16:57 +0000524 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000525 }
526
527 // Add the offset
Scott Michel053c1da2008-01-29 02:16:57 +0000528 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
529 DAG.getConstant((alignOffs & ~0xf), PtrVT));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000530 was16aligned = false;
531 return DAG.getLoad(MVT::v16i8, chain, basePtr,
532 LSN->getSrcValue(), LSN->getSrcValueOffset(),
533 LSN->isVolatile(), 16);
534}
535
Scott Michel266bc8f2007-12-04 22:23:35 +0000536/// Custom lower loads for CellSPU
537/*!
538 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
539 within a 16-byte block, we have to rotate to extract the requested element.
540 */
541static SDOperand
542LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
543 LoadSDNode *LN = cast<LoadSDNode>(Op);
Scott Michel266bc8f2007-12-04 22:23:35 +0000544 SDOperand the_chain = LN->getChain();
Dan Gohmanb625f2f2008-01-30 00:15:11 +0000545 MVT::ValueType VT = LN->getMemoryVT();
Scott Michel266bc8f2007-12-04 22:23:35 +0000546 MVT::ValueType OpVT = Op.Val->getValueType(0);
Scott Michel266bc8f2007-12-04 22:23:35 +0000547 ISD::LoadExtType ExtType = LN->getExtensionType();
548 unsigned alignment = LN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000549 SDOperand Ops[8];
550
Scott Michel266bc8f2007-12-04 22:23:35 +0000551 switch (LN->getAddressingMode()) {
552 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000553 int offset, rotamt;
554 bool was16aligned;
555 SDOperand result =
556 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000557
Scott Michel9de5d0d2008-01-11 02:53:15 +0000558 if (result.Val == 0)
Scott Michel266bc8f2007-12-04 22:23:35 +0000559 return result;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000560
561 the_chain = result.getValue(1);
562 // Rotate the chunk if necessary
563 if (rotamt < 0)
564 rotamt += 16;
Scott Michel497e8882008-01-11 21:01:19 +0000565 if (rotamt != 0 || !was16aligned) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000566 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
567
Scott Michel58c58182008-01-17 20:38:41 +0000568 Ops[0] = the_chain;
569 Ops[1] = result;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000570 if (was16aligned) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000571 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
572 } else {
Scott Michel497e8882008-01-11 21:01:19 +0000573 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000574 LoadSDNode *LN1 = cast<LoadSDNode>(result);
Scott Michel497e8882008-01-11 21:01:19 +0000575 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
576 DAG.getConstant(rotamt, PtrVT));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000577 }
578
579 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
580 the_chain = result.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000581 }
Scott Michel9de5d0d2008-01-11 02:53:15 +0000582
583 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
584 SDVTList scalarvts;
585 MVT::ValueType vecVT = MVT::v16i8;
586
587 // Convert the loaded v16i8 vector to the appropriate vector type
588 // specified by the operand:
589 if (OpVT == VT) {
590 if (VT != MVT::i1)
591 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
592 } else
593 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
594
595 Ops[0] = the_chain;
596 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
597 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
598 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
599 the_chain = result.getValue(1);
600 } else {
601 // Handle the sign and zero-extending loads for i1 and i8:
602 unsigned NewOpC;
603
604 if (ExtType == ISD::SEXTLOAD) {
605 NewOpC = (OpVT == MVT::i1
606 ? SPUISD::EXTRACT_I1_SEXT
607 : SPUISD::EXTRACT_I8_SEXT);
608 } else {
609 assert(ExtType == ISD::ZEXTLOAD);
610 NewOpC = (OpVT == MVT::i1
611 ? SPUISD::EXTRACT_I1_ZEXT
612 : SPUISD::EXTRACT_I8_ZEXT);
613 }
614
615 result = DAG.getNode(NewOpC, OpVT, result);
616 }
617
618 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
Scott Michel58c58182008-01-17 20:38:41 +0000619 SDOperand retops[3] = {
620 result,
621 the_chain,
622 DAG.getConstant(alignment, MVT::i32)
623 };
Scott Michel9de5d0d2008-01-11 02:53:15 +0000624
Scott Michel58c58182008-01-17 20:38:41 +0000625 result = DAG.getNode(SPUISD::LDRESULT, retvts,
626 retops, sizeof(retops) / sizeof(retops[0]));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000627 return result;
Scott Michel266bc8f2007-12-04 22:23:35 +0000628 }
629 case ISD::PRE_INC:
630 case ISD::PRE_DEC:
631 case ISD::POST_INC:
632 case ISD::POST_DEC:
633 case ISD::LAST_INDEXED_MODE:
634 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
635 "UNINDEXED\n";
636 cerr << (unsigned) LN->getAddressingMode() << "\n";
637 abort();
638 /*NOTREACHED*/
639 }
640
641 return SDOperand();
642}
643
644/// Custom lower stores for CellSPU
645/*!
646 All CellSPU stores are aligned to 16-byte boundaries, so for elements
647 within a 16-byte block, we have to generate a shuffle to insert the
648 requested element into its place, then store the resulting block.
649 */
650static SDOperand
651LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
652 StoreSDNode *SN = cast<StoreSDNode>(Op);
653 SDOperand Value = SN->getValue();
654 MVT::ValueType VT = Value.getValueType();
Dan Gohmanb625f2f2008-01-30 00:15:11 +0000655 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
Scott Michel266bc8f2007-12-04 22:23:35 +0000656 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000657 unsigned alignment = SN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000658
659 switch (SN->getAddressingMode()) {
660 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000661 int chunk_offset, slot_offset;
662 bool was16aligned;
Scott Michel266bc8f2007-12-04 22:23:35 +0000663
664 // The vector type we really want to load from the 16-byte chunk, except
665 // in the case of MVT::i1, which has to be v16i8.
Scott Michel9de5d0d2008-01-11 02:53:15 +0000666 unsigned vecVT, stVecVT = MVT::v16i8;
667
Scott Michel266bc8f2007-12-04 22:23:35 +0000668 if (StVT != MVT::i1)
669 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
Scott Michel266bc8f2007-12-04 22:23:35 +0000670 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
671
Scott Michel9de5d0d2008-01-11 02:53:15 +0000672 SDOperand alignLoadVec =
673 AlignedLoad(Op, DAG, ST, SN, alignment,
674 chunk_offset, slot_offset, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000675
Scott Michel9de5d0d2008-01-11 02:53:15 +0000676 if (alignLoadVec.Val == 0)
677 return alignLoadVec;
Scott Michel266bc8f2007-12-04 22:23:35 +0000678
Scott Michel9de5d0d2008-01-11 02:53:15 +0000679 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
680 SDOperand basePtr = LN->getBasePtr();
681 SDOperand the_chain = alignLoadVec.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000682 SDOperand theValue = SN->getValue();
683 SDOperand result;
684
685 if (StVT != VT
686 && (theValue.getOpcode() == ISD::AssertZext
687 || theValue.getOpcode() == ISD::AssertSext)) {
688 // Drill down and get the value for zero- and sign-extended
689 // quantities
690 theValue = theValue.getOperand(0);
691 }
692
Scott Michel9de5d0d2008-01-11 02:53:15 +0000693 chunk_offset &= 0xf;
Scott Michel266bc8f2007-12-04 22:23:35 +0000694
Scott Michel9de5d0d2008-01-11 02:53:15 +0000695 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
696 SDOperand insertEltPtr;
697 SDOperand insertEltOp;
698
699 // If the base pointer is already a D-form address, then just create
700 // a new D-form address with a slot offset and the orignal base pointer.
701 // Otherwise generate a D-form address with the slot offset relative
702 // to the stack pointer, which is always aligned.
Scott Michel497e8882008-01-11 21:01:19 +0000703 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
704 DEBUG(basePtr.Val->dump(&DAG));
705 DEBUG(cerr << "\n");
706
Scott Michel053c1da2008-01-29 02:16:57 +0000707 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
708 (basePtr.getOpcode() == ISD::ADD
709 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
Scott Michel497e8882008-01-11 21:01:19 +0000710 insertEltPtr = basePtr;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000711 } else {
Scott Michel053c1da2008-01-29 02:16:57 +0000712#if 0
713 // $sp is always aligned, so use it when necessary to avoid loading
714 // an address
715 SDOperand ptrP =
716 basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr;
717 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs);
718#else
719 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
720#endif
Scott Michel9de5d0d2008-01-11 02:53:15 +0000721 }
722
723 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000724 result = DAG.getNode(SPUISD::SHUFB, vecVT,
725 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
Scott Michel9de5d0d2008-01-11 02:53:15 +0000726 alignLoadVec,
Scott Michel266bc8f2007-12-04 22:23:35 +0000727 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
728
Scott Michel9de5d0d2008-01-11 02:53:15 +0000729 result = DAG.getStore(the_chain, result, basePtr,
Scott Michel266bc8f2007-12-04 22:23:35 +0000730 LN->getSrcValue(), LN->getSrcValueOffset(),
731 LN->isVolatile(), LN->getAlignment());
732
733 return result;
734 /*UNREACHED*/
735 }
736 case ISD::PRE_INC:
737 case ISD::PRE_DEC:
738 case ISD::POST_INC:
739 case ISD::POST_DEC:
740 case ISD::LAST_INDEXED_MODE:
741 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
742 "UNINDEXED\n";
743 cerr << (unsigned) SN->getAddressingMode() << "\n";
744 abort();
745 /*NOTREACHED*/
746 }
747
748 return SDOperand();
749}
750
751/// Generate the address of a constant pool entry.
752static SDOperand
753LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
754 MVT::ValueType PtrVT = Op.getValueType();
755 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
756 Constant *C = CP->getConstVal();
757 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
Scott Michel266bc8f2007-12-04 22:23:35 +0000758 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000759 const TargetMachine &TM = DAG.getTarget();
Scott Michel266bc8f2007-12-04 22:23:35 +0000760
761 if (TM.getRelocationModel() == Reloc::Static) {
762 if (!ST->usingLargeMem()) {
763 // Just return the SDOperand with the constant pool address in it.
Scott Michel58c58182008-01-17 20:38:41 +0000764 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
Scott Michel266bc8f2007-12-04 22:23:35 +0000765 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000766#if 1
Scott Michel266bc8f2007-12-04 22:23:35 +0000767 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
768 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
769
770 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000771#else
Scott Michel053c1da2008-01-29 02:16:57 +0000772 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000773#endif
Scott Michel266bc8f2007-12-04 22:23:35 +0000774 }
775 }
776
777 assert(0 &&
778 "LowerConstantPool: Relocation model other than static not supported.");
779 return SDOperand();
780}
781
782static SDOperand
783LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
784 MVT::ValueType PtrVT = Op.getValueType();
785 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
786 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
787 SDOperand Zero = DAG.getConstant(0, PtrVT);
788 const TargetMachine &TM = DAG.getTarget();
789
790 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel053c1da2008-01-29 02:16:57 +0000791 SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000792 return (!ST->usingLargeMem()
Scott Michel053c1da2008-01-29 02:16:57 +0000793 ? JmpAForm
794 : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000795 }
796
797 assert(0 &&
798 "LowerJumpTable: Relocation model other than static not supported.");
799 return SDOperand();
800}
801
802static SDOperand
803LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
804 MVT::ValueType PtrVT = Op.getValueType();
805 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
806 GlobalValue *GV = GSDN->getGlobal();
807 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
Scott Michel266bc8f2007-12-04 22:23:35 +0000808 const TargetMachine &TM = DAG.getTarget();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000809 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000810
811 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel053c1da2008-01-29 02:16:57 +0000812 if (!ST->usingLargeMem()) {
813 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
814 } else {
815 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
816 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
817 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
818 }
Scott Michel266bc8f2007-12-04 22:23:35 +0000819 } else {
820 cerr << "LowerGlobalAddress: Relocation model other than static not "
821 << "supported.\n";
822 abort();
823 /*NOTREACHED*/
824 }
825
826 return SDOperand();
827}
828
829//! Custom lower i64 integer constants
830/*!
831 This code inserts all of the necessary juggling that needs to occur to load
832 a 64-bit constant into a register.
833 */
834static SDOperand
835LowerConstant(SDOperand Op, SelectionDAG &DAG) {
836 unsigned VT = Op.getValueType();
837 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
838
839 if (VT == MVT::i64) {
840 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
841 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
842 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
843
844 } else {
845 cerr << "LowerConstant: unhandled constant type "
846 << MVT::getValueTypeString(VT)
847 << "\n";
848 abort();
849 /*NOTREACHED*/
850 }
851
852 return SDOperand();
853}
854
855//! Custom lower single precision floating point constants
856/*!
857 "float" immediates can be lowered as if they were unsigned 32-bit integers.
858 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
859 target description.
860 */
861static SDOperand
862LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
863 unsigned VT = Op.getValueType();
864 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
865
866 assert((FP != 0) &&
867 "LowerConstantFP: Node is not ConstantFPSDNode");
868
Scott Michel266bc8f2007-12-04 22:23:35 +0000869 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000870 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000871 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000872 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000873 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000874 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000875 return DAG.getNode(ISD::BIT_CONVERT, VT,
876 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
877 }
878
879 return SDOperand();
880}
881
Scott Michel58c58182008-01-17 20:38:41 +0000882//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
883static SDOperand
884LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
885{
886 SDOperand Cond = Op.getOperand(1);
887 MVT::ValueType CondVT = Cond.getValueType();
888 MVT::ValueType CondNVT;
889
890 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
891 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
892 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
893 Op.getOperand(0),
894 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
895 Op.getOperand(2));
896 } else
897 return SDOperand(); // Unchanged
898}
899
Scott Michel266bc8f2007-12-04 22:23:35 +0000900static SDOperand
901LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
902{
903 MachineFunction &MF = DAG.getMachineFunction();
904 MachineFrameInfo *MFI = MF.getFrameInfo();
Chris Lattner84bc5422007-12-31 04:13:23 +0000905 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +0000906 SmallVector<SDOperand, 8> ArgValues;
907 SDOperand Root = Op.getOperand(0);
908 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
909
910 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
911 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
912
913 unsigned ArgOffset = SPUFrameInfo::minStackSize();
914 unsigned ArgRegIdx = 0;
915 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
916
917 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
918
919 // Add DAG nodes to load the arguments or copy them out of registers.
920 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
921 SDOperand ArgVal;
922 bool needsLoad = false;
923 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
924 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
925
926 switch (ObjectVT) {
927 default: {
928 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
929 << MVT::getValueTypeString(ObjectVT)
930 << "\n";
931 abort();
932 }
933 case MVT::i8:
934 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000935 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
936 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000937 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
938 ++ArgRegIdx;
939 } else {
940 needsLoad = true;
941 }
942 break;
943 case MVT::i16:
944 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000945 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
946 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000947 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
948 ++ArgRegIdx;
949 } else {
950 needsLoad = true;
951 }
952 break;
953 case MVT::i32:
954 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000955 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
956 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000957 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
958 ++ArgRegIdx;
959 } else {
960 needsLoad = true;
961 }
962 break;
963 case MVT::i64:
964 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000965 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
966 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000967 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
968 ++ArgRegIdx;
969 } else {
970 needsLoad = true;
971 }
972 break;
973 case MVT::f32:
974 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000975 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
976 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000977 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
978 ++ArgRegIdx;
979 } else {
980 needsLoad = true;
981 }
982 break;
983 case MVT::f64:
984 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000985 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
986 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000987 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
988 ++ArgRegIdx;
989 } else {
990 needsLoad = true;
991 }
992 break;
993 case MVT::v2f64:
994 case MVT::v4f32:
995 case MVT::v4i32:
996 case MVT::v8i16:
997 case MVT::v16i8:
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +00001001 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1002 ++ArgRegIdx;
1003 } else {
1004 needsLoad = true;
1005 }
1006 break;
1007 }
1008
1009 // We need to load the argument to a virtual register if we determined above
1010 // that we ran out of physical registers of the appropriate type
1011 if (needsLoad) {
1012 // If the argument is actually used, emit a load from the right stack
1013 // slot.
1014 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1015 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1016 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1017 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1018 } else {
1019 // Don't emit a dead load.
1020 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1021 }
1022
1023 ArgOffset += StackSlotSize;
1024 }
1025
1026 ArgValues.push_back(ArgVal);
1027 }
1028
1029 // If the function takes variable number of arguments, make a frame index for
1030 // the start of the first vararg value... for expansion of llvm.va_start.
1031 if (isVarArg) {
1032 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1033 ArgOffset);
1034 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035 // If this function is vararg, store any remaining integer argument regs to
1036 // their spots on the stack so that they may be loaded by deferencing the
1037 // result of va_next.
1038 SmallVector<SDOperand, 8> MemOps;
1039 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
Chris Lattner84bc5422007-12-31 04:13:23 +00001040 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1041 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +00001042 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1043 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1044 MemOps.push_back(Store);
1045 // Increment the address by four for the next argument to store
1046 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1047 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1048 }
1049 if (!MemOps.empty())
1050 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1051 }
1052
1053 ArgValues.push_back(Root);
1054
1055 // Return the new list of results.
1056 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1057 Op.Val->value_end());
1058 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1059}
1060
1061/// isLSAAddress - Return the immediate to use if the specified
1062/// value is representable as a LSA address.
1063static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1064 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1065 if (!C) return 0;
1066
1067 int Addr = C->getValue();
1068 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1069 (Addr << 14 >> 14) != Addr)
1070 return 0; // Top 14 bits have to be sext of immediate.
1071
1072 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1073}
1074
1075static
1076SDOperand
Scott Michel9de5d0d2008-01-11 02:53:15 +00001077LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001078 SDOperand Chain = Op.getOperand(0);
1079#if 0
1080 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1081 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1082#endif
1083 SDOperand Callee = Op.getOperand(4);
1084 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1085 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1086 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1087 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1088
1089 // Handy pointer type
1090 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1091
1092 // Accumulate how many bytes are to be pushed on the stack, including the
1093 // linkage area, and parameter passing area. According to the SPU ABI,
1094 // we minimally need space for [LR] and [SP]
1095 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1096
1097 // Set up a copy of the stack pointer for use loading and storing any
1098 // arguments that may not fit in the registers available for argument
1099 // passing.
1100 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1101
1102 // Figure out which arguments are going to go in registers, and which in
1103 // memory.
1104 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1105 unsigned ArgRegIdx = 0;
1106
1107 // Keep track of registers passing arguments
1108 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1109 // And the arguments passed on the stack
1110 SmallVector<SDOperand, 8> MemOpChains;
1111
1112 for (unsigned i = 0; i != NumOps; ++i) {
1113 SDOperand Arg = Op.getOperand(5+2*i);
1114
1115 // PtrOff will be used to store the current argument to the stack if a
1116 // register cannot be found for it.
1117 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1118 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1119
1120 switch (Arg.getValueType()) {
1121 default: assert(0 && "Unexpected ValueType for argument!");
1122 case MVT::i32:
1123 case MVT::i64:
1124 case MVT::i128:
1125 if (ArgRegIdx != NumArgRegs) {
1126 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127 } else {
1128 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129 ArgOffset += StackSlotSize;
1130 }
1131 break;
1132 case MVT::f32:
1133 case MVT::f64:
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 } else {
1137 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1139 }
1140 break;
1141 case MVT::v4f32:
1142 case MVT::v4i32:
1143 case MVT::v8i16:
1144 case MVT::v16i8:
1145 if (ArgRegIdx != NumArgRegs) {
1146 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1147 } else {
1148 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1149 ArgOffset += StackSlotSize;
1150 }
1151 break;
1152 }
1153 }
1154
1155 // Update number of stack bytes actually used, insert a call sequence start
1156 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1157 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1158
1159 if (!MemOpChains.empty()) {
1160 // Adjust the stack pointer for the stack arguments.
1161 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1162 &MemOpChains[0], MemOpChains.size());
1163 }
1164
1165 // Build a sequence of copy-to-reg nodes chained together with token chain
1166 // and flag operands which copy the outgoing args into the appropriate regs.
1167 SDOperand InFlag;
1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1170 InFlag);
1171 InFlag = Chain.getValue(1);
1172 }
1173
1174 std::vector<MVT::ValueType> NodeTys;
1175 NodeTys.push_back(MVT::Other); // Returns a chain
1176 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1177
1178 SmallVector<SDOperand, 8> Ops;
1179 unsigned CallOpc = SPUISD::CALL;
1180
1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1182 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1183 // node so that legalize doesn't hack it.
1184 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1185 GlobalValue *GV = G->getGlobal();
1186 unsigned CalleeVT = Callee.getValueType();
Scott Michel9de5d0d2008-01-11 02:53:15 +00001187 SDOperand Zero = DAG.getConstant(0, PtrVT);
1188 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
Scott Michel266bc8f2007-12-04 22:23:35 +00001189
Scott Michel9de5d0d2008-01-11 02:53:15 +00001190 if (!ST->usingLargeMem()) {
1191 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1192 // style calls, otherwise, external symbols are BRASL calls. This assumes
1193 // that declared/defined symbols are in the same compilation unit and can
1194 // be reached through PC-relative jumps.
1195 //
1196 // NOTE:
1197 // This may be an unsafe assumption for JIT and really large compilation
1198 // units.
1199 if (GV->isDeclaration()) {
1200 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1201 } else {
1202 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1203 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001204 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +00001205 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1206 // address pairs:
Scott Michel053c1da2008-01-29 02:16:57 +00001207 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
Scott Michel266bc8f2007-12-04 22:23:35 +00001208 }
1209 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1210 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
Scott Michel9de5d0d2008-01-11 02:53:15 +00001211 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001212 // If this is an absolute destination address that appears to be a legal
1213 // local store address, use the munged value.
1214 Callee = SDOperand(Dest, 0);
Scott Michel9de5d0d2008-01-11 02:53:15 +00001215 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001216
1217 Ops.push_back(Chain);
1218 Ops.push_back(Callee);
1219
1220 // Add argument registers to the end of the list so that they are known live
1221 // into the call.
1222 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1223 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1224 RegsToPass[i].second.getValueType()));
1225
1226 if (InFlag.Val)
1227 Ops.push_back(InFlag);
1228 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1229 InFlag = Chain.getValue(1);
1230
1231 SDOperand ResultVals[3];
1232 unsigned NumResults = 0;
1233 NodeTys.clear();
1234
1235 // If the call has results, copy the values out of the ret val registers.
1236 switch (Op.Val->getValueType(0)) {
1237 default: assert(0 && "Unexpected ret value!");
1238 case MVT::Other: break;
1239 case MVT::i32:
1240 if (Op.Val->getValueType(1) == MVT::i32) {
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1243 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1244 Chain.getValue(2)).getValue(1);
1245 ResultVals[1] = Chain.getValue(0);
1246 NumResults = 2;
1247 NodeTys.push_back(MVT::i32);
1248 } else {
1249 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1250 ResultVals[0] = Chain.getValue(0);
1251 NumResults = 1;
1252 }
1253 NodeTys.push_back(MVT::i32);
1254 break;
1255 case MVT::i64:
1256 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1257 ResultVals[0] = Chain.getValue(0);
1258 NumResults = 1;
1259 NodeTys.push_back(MVT::i64);
1260 break;
1261 case MVT::f32:
1262 case MVT::f64:
1263 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1266 NumResults = 1;
1267 NodeTys.push_back(Op.Val->getValueType(0));
1268 break;
1269 case MVT::v2f64:
1270 case MVT::v4f32:
1271 case MVT::v4i32:
1272 case MVT::v8i16:
1273 case MVT::v16i8:
1274 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1275 InFlag).getValue(1);
1276 ResultVals[0] = Chain.getValue(0);
1277 NumResults = 1;
1278 NodeTys.push_back(Op.Val->getValueType(0));
1279 break;
1280 }
1281
1282 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1283 DAG.getConstant(NumStackBytes, PtrVT));
1284 NodeTys.push_back(MVT::Other);
1285
1286 // If the function returns void, just return the chain.
1287 if (NumResults == 0)
1288 return Chain;
1289
1290 // Otherwise, merge everything together with a MERGE_VALUES node.
1291 ResultVals[NumResults++] = Chain;
1292 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1293 ResultVals, NumResults);
1294 return Res.getValue(Op.ResNo);
1295}
1296
1297static SDOperand
1298LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1299 SmallVector<CCValAssign, 16> RVLocs;
1300 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1301 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1302 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1303 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1304
1305 // If this is the first return lowered for this function, add the regs to the
1306 // liveout set for the function.
Chris Lattner84bc5422007-12-31 04:13:23 +00001307 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001308 for (unsigned i = 0; i != RVLocs.size(); ++i)
Chris Lattner84bc5422007-12-31 04:13:23 +00001309 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Scott Michel266bc8f2007-12-04 22:23:35 +00001310 }
1311
1312 SDOperand Chain = Op.getOperand(0);
1313 SDOperand Flag;
1314
1315 // Copy the result values into the output registers.
1316 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1317 CCValAssign &VA = RVLocs[i];
1318 assert(VA.isRegLoc() && "Can only return in registers!");
1319 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1320 Flag = Chain.getValue(1);
1321 }
1322
1323 if (Flag.Val)
1324 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1325 else
1326 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1327}
1328
1329
1330//===----------------------------------------------------------------------===//
1331// Vector related lowering:
1332//===----------------------------------------------------------------------===//
1333
1334static ConstantSDNode *
1335getVecImm(SDNode *N) {
1336 SDOperand OpVal(0, 0);
1337
1338 // Check to see if this buildvec has a single non-undef value in its elements.
1339 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1340 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1341 if (OpVal.Val == 0)
1342 OpVal = N->getOperand(i);
1343 else if (OpVal != N->getOperand(i))
1344 return 0;
1345 }
1346
1347 if (OpVal.Val != 0) {
1348 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1349 return CN;
1350 }
1351 }
1352
1353 return 0; // All UNDEF: use implicit def.; not Constant node
1354}
1355
1356/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1357/// and the value fits into an unsigned 18-bit constant, and if so, return the
1358/// constant
1359SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1360 MVT::ValueType ValueType) {
1361 if (ConstantSDNode *CN = getVecImm(N)) {
1362 uint64_t Value = CN->getValue();
1363 if (Value <= 0x3ffff)
1364 return DAG.getConstant(Value, ValueType);
1365 }
1366
1367 return SDOperand();
1368}
1369
1370/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1371/// and the value fits into a signed 16-bit constant, and if so, return the
1372/// constant
1373SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1374 MVT::ValueType ValueType) {
1375 if (ConstantSDNode *CN = getVecImm(N)) {
1376 if (ValueType == MVT::i32) {
1377 int Value = (int) CN->getValue();
1378 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1379
1380 if (Value == SExtValue)
1381 return DAG.getConstant(Value, ValueType);
1382 } else if (ValueType == MVT::i16) {
1383 short Value = (short) CN->getValue();
1384 int SExtValue = ((int) Value << 16) >> 16;
1385
1386 if (Value == (short) SExtValue)
1387 return DAG.getConstant(Value, ValueType);
1388 } else if (ValueType == MVT::i64) {
1389 int64_t Value = CN->getValue();
1390 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1391
1392 if (Value == SExtValue)
1393 return DAG.getConstant(Value, ValueType);
1394 }
1395 }
1396
1397 return SDOperand();
1398}
1399
1400/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1401/// and the value fits into a signed 10-bit constant, and if so, return the
1402/// constant
1403SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1404 MVT::ValueType ValueType) {
1405 if (ConstantSDNode *CN = getVecImm(N)) {
1406 int Value = (int) CN->getValue();
1407 if ((ValueType == MVT::i32 && isS10Constant(Value))
1408 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1409 return DAG.getConstant(Value, ValueType);
1410 }
1411
1412 return SDOperand();
1413}
1414
1415/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1416/// and the value fits into a signed 8-bit constant, and if so, return the
1417/// constant.
1418///
1419/// @note: The incoming vector is v16i8 because that's the only way we can load
1420/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1421/// same value.
1422SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1423 MVT::ValueType ValueType) {
1424 if (ConstantSDNode *CN = getVecImm(N)) {
1425 int Value = (int) CN->getValue();
1426 if (ValueType == MVT::i16
1427 && Value <= 0xffff /* truncated from uint64_t */
1428 && ((short) Value >> 8) == ((short) Value & 0xff))
1429 return DAG.getConstant(Value & 0xff, ValueType);
1430 else if (ValueType == MVT::i8
1431 && (Value & 0xff) == Value)
1432 return DAG.getConstant(Value, ValueType);
1433 }
1434
1435 return SDOperand();
1436}
1437
1438/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1439/// and the value fits into a signed 16-bit constant, and if so, return the
1440/// constant
1441SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1442 MVT::ValueType ValueType) {
1443 if (ConstantSDNode *CN = getVecImm(N)) {
1444 uint64_t Value = CN->getValue();
1445 if ((ValueType == MVT::i32
1446 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1447 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1448 return DAG.getConstant(Value >> 16, ValueType);
1449 }
1450
1451 return SDOperand();
1452}
1453
1454/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1455SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1456 if (ConstantSDNode *CN = getVecImm(N)) {
1457 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1458 }
1459
1460 return SDOperand();
1461}
1462
1463/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1464SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1465 if (ConstantSDNode *CN = getVecImm(N)) {
1466 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1467 }
1468
1469 return SDOperand();
1470}
1471
1472// If this is a vector of constants or undefs, get the bits. A bit in
1473// UndefBits is set if the corresponding element of the vector is an
1474// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1475// zero. Return true if this is not an array of constants, false if it is.
1476//
1477static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1478 uint64_t UndefBits[2]) {
1479 // Start with zero'd results.
1480 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1481
1482 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1483 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1484 SDOperand OpVal = BV->getOperand(i);
1485
1486 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1487 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1488
1489 uint64_t EltBits = 0;
1490 if (OpVal.getOpcode() == ISD::UNDEF) {
1491 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1492 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1493 continue;
1494 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1495 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1496 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1497 const APFloat &apf = CN->getValueAPF();
1498 EltBits = (CN->getValueType(0) == MVT::f32
1499 ? FloatToBits(apf.convertToFloat())
1500 : DoubleToBits(apf.convertToDouble()));
1501 } else {
1502 // Nonconstant element.
1503 return true;
1504 }
1505
1506 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1507 }
1508
1509 //printf("%llx %llx %llx %llx\n",
1510 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1511 return false;
1512}
1513
1514/// If this is a splat (repetition) of a value across the whole vector, return
1515/// the smallest size that splats it. For example, "0x01010101010101..." is a
1516/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1517/// SplatSize = 1 byte.
1518static bool isConstantSplat(const uint64_t Bits128[2],
1519 const uint64_t Undef128[2],
1520 int MinSplatBits,
1521 uint64_t &SplatBits, uint64_t &SplatUndef,
1522 int &SplatSize) {
1523 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1524 // the same as the lower 64-bits, ignoring undefs.
1525 uint64_t Bits64 = Bits128[0] | Bits128[1];
1526 uint64_t Undef64 = Undef128[0] & Undef128[1];
1527 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1528 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1529 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1530 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1531
1532 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1533 if (MinSplatBits < 64) {
1534
1535 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1536 // undefs.
1537 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1538 if (MinSplatBits < 32) {
1539
1540 // If the top 16-bits are different than the lower 16-bits, ignoring
1541 // undefs, we have an i32 splat.
1542 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1543 if (MinSplatBits < 16) {
1544 // If the top 8-bits are different than the lower 8-bits, ignoring
1545 // undefs, we have an i16 splat.
1546 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1547 // Otherwise, we have an 8-bit splat.
1548 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1549 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1550 SplatSize = 1;
1551 return true;
1552 }
1553 } else {
1554 SplatBits = Bits16;
1555 SplatUndef = Undef16;
1556 SplatSize = 2;
1557 return true;
1558 }
1559 }
1560 } else {
1561 SplatBits = Bits32;
1562 SplatUndef = Undef32;
1563 SplatSize = 4;
1564 return true;
1565 }
1566 }
1567 } else {
1568 SplatBits = Bits128[0];
1569 SplatUndef = Undef128[0];
1570 SplatSize = 8;
1571 return true;
1572 }
1573 }
1574
1575 return false; // Can't be a splat if two pieces don't match.
1576}
1577
1578// If this is a case we can't handle, return null and let the default
1579// expansion code take care of it. If we CAN select this case, and if it
1580// selects to a single instruction, return Op. Otherwise, if we can codegen
1581// this case more efficiently than a constant pool load, lower it to the
1582// sequence of ops that should be used.
1583static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1584 MVT::ValueType VT = Op.getValueType();
1585 // If this is a vector of constants or undefs, get the bits. A bit in
1586 // UndefBits is set if the corresponding element of the vector is an
1587 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1588 // zero.
1589 uint64_t VectorBits[2];
1590 uint64_t UndefBits[2];
1591 uint64_t SplatBits, SplatUndef;
1592 int SplatSize;
1593 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1594 || !isConstantSplat(VectorBits, UndefBits,
1595 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1596 SplatBits, SplatUndef, SplatSize))
1597 return SDOperand(); // Not a constant vector, not a splat.
1598
1599 switch (VT) {
1600 default:
1601 case MVT::v4f32: {
1602 uint32_t Value32 = SplatBits;
1603 assert(SplatSize == 4
1604 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1607 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1608 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1609 break;
1610 }
1611 case MVT::v2f64: {
1612 uint64_t f64val = SplatBits;
1613 assert(SplatSize == 8
1614 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1615 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1616 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1617 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1618 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1619 break;
1620 }
1621 case MVT::v16i8: {
1622 // 8-bit constants have to be expanded to 16-bits
1623 unsigned short Value16 = SplatBits | (SplatBits << 8);
1624 SDOperand Ops[8];
1625 for (int i = 0; i < 8; ++i)
1626 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1627 return DAG.getNode(ISD::BIT_CONVERT, VT,
1628 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1629 }
1630 case MVT::v8i16: {
1631 unsigned short Value16;
1632 if (SplatSize == 2)
1633 Value16 = (unsigned short) (SplatBits & 0xffff);
1634 else
1635 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1636 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1637 SDOperand Ops[8];
1638 for (int i = 0; i < 8; ++i) Ops[i] = T;
1639 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1640 }
1641 case MVT::v4i32: {
1642 unsigned int Value = SplatBits;
1643 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1644 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1645 }
1646 case MVT::v2i64: {
1647 uint64_t val = SplatBits;
1648 uint32_t upper = uint32_t(val >> 32);
1649 uint32_t lower = uint32_t(val);
1650
1651 if (val != 0) {
1652 SDOperand LO32;
1653 SDOperand HI32;
1654 SmallVector<SDOperand, 16> ShufBytes;
1655 SDOperand Result;
1656 bool upper_special, lower_special;
1657
1658 // NOTE: This code creates common-case shuffle masks that can be easily
1659 // detected as common expressions. It is not attempting to create highly
1660 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1661
1662 // Detect if the upper or lower half is a special shuffle mask pattern:
1663 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1664 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1665
1666 // Create lower vector if not a special pattern
1667 if (!lower_special) {
1668 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1669 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1670 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1671 LO32C, LO32C, LO32C, LO32C));
1672 }
1673
1674 // Create upper vector if not a special pattern
1675 if (!upper_special) {
1676 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1677 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 HI32C, HI32C, HI32C, HI32C));
1680 }
1681
1682 // If either upper or lower are special, then the two input operands are
1683 // the same (basically, one of them is a "don't care")
1684 if (lower_special)
1685 LO32 = HI32;
1686 if (upper_special)
1687 HI32 = LO32;
1688 if (lower_special && upper_special) {
1689 // Unhappy situation... both upper and lower are special, so punt with
1690 // a target constant:
1691 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1692 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1693 Zero, Zero);
1694 }
1695
1696 for (int i = 0; i < 4; ++i) {
1697 for (int j = 0; j < 4; ++j) {
1698 SDOperand V;
1699 bool process_upper, process_lower;
Chris Lattner52ec3752007-12-22 22:47:03 +00001700 uint64_t val = 0;
Scott Michel266bc8f2007-12-04 22:23:35 +00001701
1702 process_upper = (upper_special && (i & 1) == 0);
1703 process_lower = (lower_special && (i & 1) == 1);
1704
1705 if (process_upper || process_lower) {
1706 if ((process_upper && upper == 0)
1707 || (process_lower && lower == 0))
1708 val = 0x80;
1709 else if ((process_upper && upper == 0xffffffff)
1710 || (process_lower && lower == 0xffffffff))
1711 val = 0xc0;
1712 else if ((process_upper && upper == 0x80000000)
1713 || (process_lower && lower == 0x80000000))
1714 val = (j == 0 ? 0xe0 : 0x80);
1715 } else
1716 val = i * 4 + j + ((i & 1) * 16);
1717
1718 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1719 }
1720 }
1721
1722 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1723 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1724 &ShufBytes[0], ShufBytes.size()));
1725 } else {
1726 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1727 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1728 return DAG.getNode(ISD::BIT_CONVERT, VT,
1729 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1730 Zero, Zero, Zero, Zero));
1731 }
1732 }
1733 }
1734
1735 return SDOperand();
1736}
1737
1738/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1739/// which the Cell can operate. The code inspects V3 to ascertain whether the
1740/// permutation vector, V3, is monotonically increasing with one "exception"
1741/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1742/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1743/// In either case, the net result is going to eventually invoke SHUFB to
1744/// permute/shuffle the bytes from V1 and V2.
1745/// \note
1746/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1747/// control word for byte/halfword/word insertion. This takes care of a single
1748/// element move from V2 into V1.
1749/// \note
1750/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1751static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1752 SDOperand V1 = Op.getOperand(0);
1753 SDOperand V2 = Op.getOperand(1);
1754 SDOperand PermMask = Op.getOperand(2);
1755
1756 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1757
1758 // If we have a single element being moved from V1 to V2, this can be handled
1759 // using the C*[DX] compute mask instructions, but the vector elements have
1760 // to be monotonically increasing with one exception element.
1761 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1762 unsigned EltsFromV2 = 0;
1763 unsigned V2Elt = 0;
1764 unsigned V2EltIdx0 = 0;
1765 unsigned CurrElt = 0;
1766 bool monotonic = true;
1767 if (EltVT == MVT::i8)
1768 V2EltIdx0 = 16;
1769 else if (EltVT == MVT::i16)
1770 V2EltIdx0 = 8;
1771 else if (EltVT == MVT::i32)
1772 V2EltIdx0 = 4;
1773 else
1774 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1775
1776 for (unsigned i = 0, e = PermMask.getNumOperands();
1777 EltsFromV2 <= 1 && monotonic && i != e;
1778 ++i) {
1779 unsigned SrcElt;
1780 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1781 SrcElt = 0;
1782 else
1783 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1784
1785 if (SrcElt >= V2EltIdx0) {
1786 ++EltsFromV2;
1787 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1788 } else if (CurrElt != SrcElt) {
1789 monotonic = false;
1790 }
1791
1792 ++CurrElt;
1793 }
1794
1795 if (EltsFromV2 == 1 && monotonic) {
1796 // Compute mask and shuffle
1797 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001798 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1799 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001800 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1801 // Initialize temporary register to 0
1802 SDOperand InitTempReg =
1803 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1804 // Copy register's contents as index in INSERT_MASK:
1805 SDOperand ShufMaskOp =
1806 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1807 DAG.getTargetConstant(V2Elt, MVT::i32),
1808 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1809 // Use shuffle mask in SHUFB synthetic instruction:
1810 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1811 } else {
1812 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1813 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1814
1815 SmallVector<SDOperand, 16> ResultMask;
1816 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1817 unsigned SrcElt;
1818 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1819 SrcElt = 0;
1820 else
1821 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1822
1823 for (unsigned j = 0; j != BytesPerElement; ++j) {
1824 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1825 MVT::i8));
1826 }
1827 }
1828
1829 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1830 &ResultMask[0], ResultMask.size());
1831 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1832 }
1833}
1834
1835static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1836 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1837
1838 if (Op0.Val->getOpcode() == ISD::Constant) {
1839 // For a constant, build the appropriate constant vector, which will
1840 // eventually simplify to a vector register load.
1841
1842 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1843 SmallVector<SDOperand, 16> ConstVecValues;
1844 MVT::ValueType VT;
1845 size_t n_copies;
1846
1847 // Create a constant vector:
1848 switch (Op.getValueType()) {
1849 default: assert(0 && "Unexpected constant value type in "
1850 "LowerSCALAR_TO_VECTOR");
1851 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1852 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1853 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1854 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1855 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1856 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1857 }
1858
1859 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1860 for (size_t j = 0; j < n_copies; ++j)
1861 ConstVecValues.push_back(CValue);
1862
1863 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1864 &ConstVecValues[0], ConstVecValues.size());
1865 } else {
1866 // Otherwise, copy the value from one register to another:
1867 switch (Op0.getValueType()) {
1868 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1869 case MVT::i8:
1870 case MVT::i16:
1871 case MVT::i32:
1872 case MVT::i64:
1873 case MVT::f32:
1874 case MVT::f64:
1875 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1876 }
1877 }
1878
1879 return SDOperand();
1880}
1881
1882static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1883 switch (Op.getValueType()) {
1884 case MVT::v4i32: {
1885 SDOperand rA = Op.getOperand(0);
1886 SDOperand rB = Op.getOperand(1);
1887 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1888 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1889 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1890 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1891
1892 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1893 break;
1894 }
1895
1896 // Multiply two v8i16 vectors (pipeline friendly version):
1897 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1898 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1899 // c) Use SELB to select upper and lower halves from the intermediate results
1900 //
1901 // NOTE: We really want to move the FSMBI to earlier to actually get the
1902 // dual-issue. This code does manage to do this, even if it's a little on
1903 // the wacky side
1904 case MVT::v8i16: {
1905 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001906 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001907 SDOperand Chain = Op.getOperand(0);
1908 SDOperand rA = Op.getOperand(0);
1909 SDOperand rB = Op.getOperand(1);
Chris Lattner84bc5422007-12-31 04:13:23 +00001910 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1911 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001912
1913 SDOperand FSMBOp =
1914 DAG.getCopyToReg(Chain, FSMBIreg,
1915 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1916 DAG.getConstant(0xcccc, MVT::i32)));
1917
1918 SDOperand HHProd =
1919 DAG.getCopyToReg(FSMBOp, HiProdReg,
1920 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1921
1922 SDOperand HHProd_v4i32 =
1923 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1924 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1925
1926 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1927 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1928 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1929 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1930 HHProd_v4i32,
1931 DAG.getConstant(16, MVT::i16))),
1932 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1933 }
1934
1935 // This M00sE is N@stI! (apologies to Monty Python)
1936 //
1937 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1938 // is to break it all apart, sign extend, and reassemble the various
1939 // intermediate products.
1940 case MVT::v16i8: {
1941 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001942 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001943 SDOperand Chain = Op.getOperand(0);
1944 SDOperand rA = Op.getOperand(0);
1945 SDOperand rB = Op.getOperand(1);
1946 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1947 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1948
Chris Lattner84bc5422007-12-31 04:13:23 +00001949 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1950 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1951 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001952
1953 SDOperand LLProd =
1954 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1955 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1956 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1957
1958 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1959
1960 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1961
1962 SDOperand LHProd =
1963 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1964 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1965
1966 SDOperand FSMBdef_2222 =
1967 DAG.getCopyToReg(Chain, FSMBreg_2222,
1968 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1969 DAG.getConstant(0x2222, MVT::i32)));
1970
1971 SDOperand FSMBuse_2222 =
1972 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1973
1974 SDOperand LoProd_1 =
1975 DAG.getCopyToReg(Chain, LoProd_reg,
1976 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1977 FSMBuse_2222));
1978
1979 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1980
1981 SDOperand LoProd =
1982 DAG.getNode(ISD::AND, MVT::v4i32,
1983 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1984 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1985 LoProdMask, LoProdMask,
1986 LoProdMask, LoProdMask));
1987
1988 SDOperand rAH =
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1991
1992 SDOperand rBH =
1993 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1994 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1995
1996 SDOperand HLProd =
1997 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1998 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1999 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2000
2001 SDOperand HHProd_1 =
2002 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2003 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2004 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2005 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2006 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2007
2008 SDOperand HHProd =
2009 DAG.getCopyToReg(Chain, HiProd_reg,
2010 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2011 HLProd,
2012 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2013 FSMBuse_2222));
2014
2015 SDOperand HiProd =
2016 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2017 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2018
2019 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2020 DAG.getNode(ISD::OR, MVT::v4i32,
2021 LoProd, HiProd));
2022 }
2023
2024 default:
2025 cerr << "CellSPU: Unknown vector multiplication, got "
2026 << MVT::getValueTypeString(Op.getValueType())
2027 << "\n";
2028 abort();
2029 /*NOTREACHED*/
2030 }
2031
2032 return SDOperand();
2033}
2034
2035static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2036 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002037 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002038
2039 SDOperand A = Op.getOperand(0);
2040 SDOperand B = Op.getOperand(1);
2041 unsigned VT = Op.getValueType();
2042
2043 unsigned VRegBR, VRegC;
2044
2045 if (VT == MVT::f32) {
Chris Lattner84bc5422007-12-31 04:13:23 +00002046 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2047 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002048 } else {
Chris Lattner84bc5422007-12-31 04:13:23 +00002049 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2050 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002051 }
2052 // TODO: make sure we're feeding FPInterp the right arguments
2053 // Right now: fi B, frest(B)
2054
2055 // Computes BRcpl =
2056 // (Floating Interpolate (FP Reciprocal Estimate B))
2057 SDOperand BRcpl =
2058 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2059 DAG.getNode(SPUISD::FPInterp, VT, B,
2060 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2061
2062 // Computes A * BRcpl and stores in a temporary register
2063 SDOperand AxBRcpl =
2064 DAG.getCopyToReg(BRcpl, VRegC,
2065 DAG.getNode(ISD::FMUL, VT, A,
2066 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2067 // What's the Chain variable do? It's magic!
2068 // TODO: set Chain = Op(0).getEntryNode()
2069
2070 return DAG.getNode(ISD::FADD, VT,
2071 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2072 DAG.getNode(ISD::FMUL, VT,
2073 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2074 DAG.getNode(ISD::FSUB, VT, A,
2075 DAG.getNode(ISD::FMUL, VT, B,
2076 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2077}
2078
Scott Michel266bc8f2007-12-04 22:23:35 +00002079static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2080 unsigned VT = Op.getValueType();
2081 SDOperand N = Op.getOperand(0);
2082 SDOperand Elt = Op.getOperand(1);
2083 SDOperand ShufMask[16];
2084 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2085
2086 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2087
2088 int EltNo = (int) C->getValue();
2089
2090 // sanity checks:
2091 if (VT == MVT::i8 && EltNo >= 16)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2093 else if (VT == MVT::i16 && EltNo >= 8)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2095 else if (VT == MVT::i32 && EltNo >= 4)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2097 else if (VT == MVT::i64 && EltNo >= 2)
2098 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2099
2100 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2101 // i32 and i64: Element 0 is the preferred slot
2102 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2103 }
2104
2105 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002106 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002107 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2108
2109 switch (VT) {
2110 case MVT::i8: {
2111 prefslot_begin = prefslot_end = 3;
2112 break;
2113 }
2114 case MVT::i16: {
2115 prefslot_begin = 2; prefslot_end = 3;
2116 break;
2117 }
2118 case MVT::i32: {
2119 prefslot_begin = 0; prefslot_end = 3;
2120 break;
2121 }
2122 case MVT::i64: {
2123 prefslot_begin = 0; prefslot_end = 7;
2124 break;
2125 }
2126 }
2127
Scott Michel0e5665b2007-12-19 21:17:42 +00002128 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2129 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2130
Scott Michel266bc8f2007-12-04 22:23:35 +00002131 for (int i = 0; i < 16; ++i) {
2132 // zero fill uppper part of preferred slot, don't care about the
2133 // other slots:
2134 unsigned int mask_val;
2135
2136 if (i <= prefslot_end) {
2137 mask_val =
2138 ((i < prefslot_begin)
2139 ? 0x80
2140 : elt_byte + (i - prefslot_begin));
2141
Scott Michel0e5665b2007-12-19 21:17:42 +00002142 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002143 } else
2144 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2145 }
2146
2147 SDOperand ShufMaskVec =
2148 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2149 &ShufMask[0],
2150 sizeof(ShufMask) / sizeof(ShufMask[0]));
2151
2152 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2153 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2154 N, N, ShufMaskVec));
2155
2156}
2157
2158static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2159 SDOperand VecOp = Op.getOperand(0);
2160 SDOperand ValOp = Op.getOperand(1);
2161 SDOperand IdxOp = Op.getOperand(2);
2162 MVT::ValueType VT = Op.getValueType();
2163
2164 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2165 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2166
2167 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2168 // Use $2 because it's always 16-byte aligned and it's available:
2169 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2170
2171 SDOperand result =
2172 DAG.getNode(SPUISD::SHUFB, VT,
2173 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2174 VecOp,
2175 DAG.getNode(SPUISD::INSERT_MASK, VT,
2176 DAG.getNode(ISD::ADD, PtrVT,
2177 PtrBase,
2178 DAG.getConstant(CN->getValue(),
2179 PtrVT))));
2180
2181 return result;
2182}
2183
2184static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2185 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2186
2187 assert(Op.getValueType() == MVT::i8);
2188 switch (Opc) {
2189 default:
2190 assert(0 && "Unhandled i8 math operator");
2191 /*NOTREACHED*/
2192 break;
2193 case ISD::SUB: {
2194 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2195 // the result:
2196 SDOperand N1 = Op.getOperand(1);
2197 N0 = (N0.getOpcode() != ISD::Constant
2198 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2199 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2200 N1 = (N1.getOpcode() != ISD::Constant
2201 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2202 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2203 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2204 DAG.getNode(Opc, MVT::i16, N0, N1));
2205 }
2206 case ISD::ROTR:
2207 case ISD::ROTL: {
2208 SDOperand N1 = Op.getOperand(1);
2209 unsigned N1Opc;
2210 N0 = (N0.getOpcode() != ISD::Constant
2211 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2212 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2213 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2214 N1 = (N1.getOpcode() != ISD::Constant
2215 ? DAG.getNode(N1Opc, MVT::i16, N1)
2216 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2217 SDOperand ExpandArg =
2218 DAG.getNode(ISD::OR, MVT::i16, N0,
2219 DAG.getNode(ISD::SHL, MVT::i16,
2220 N0, DAG.getConstant(8, MVT::i16)));
2221 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2222 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2223 }
2224 case ISD::SRL:
2225 case ISD::SHL: {
2226 SDOperand N1 = Op.getOperand(1);
2227 unsigned N1Opc;
2228 N0 = (N0.getOpcode() != ISD::Constant
2229 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2230 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2231 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2232 N1 = (N1.getOpcode() != ISD::Constant
2233 ? DAG.getNode(N1Opc, MVT::i16, N1)
2234 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2235 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2236 DAG.getNode(Opc, MVT::i16, N0, N1));
2237 }
2238 case ISD::SRA: {
2239 SDOperand N1 = Op.getOperand(1);
2240 unsigned N1Opc;
2241 N0 = (N0.getOpcode() != ISD::Constant
2242 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2243 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2244 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2245 N1 = (N1.getOpcode() != ISD::Constant
2246 ? DAG.getNode(N1Opc, MVT::i16, N1)
2247 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2248 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2249 DAG.getNode(Opc, MVT::i16, N0, N1));
2250 }
2251 case ISD::MUL: {
2252 SDOperand N1 = Op.getOperand(1);
2253 unsigned N1Opc;
2254 N0 = (N0.getOpcode() != ISD::Constant
2255 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2256 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2257 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2258 N1 = (N1.getOpcode() != ISD::Constant
2259 ? DAG.getNode(N1Opc, MVT::i16, N1)
2260 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2261 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2262 DAG.getNode(Opc, MVT::i16, N0, N1));
2263 break;
2264 }
2265 }
2266
2267 return SDOperand();
2268}
2269
2270//! Lower byte immediate operations for v16i8 vectors:
2271static SDOperand
2272LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2273 SDOperand ConstVec;
2274 SDOperand Arg;
2275 MVT::ValueType VT = Op.getValueType();
2276
2277 ConstVec = Op.getOperand(0);
2278 Arg = Op.getOperand(1);
2279 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2280 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2281 ConstVec = ConstVec.getOperand(0);
2282 } else {
2283 ConstVec = Op.getOperand(1);
2284 Arg = Op.getOperand(0);
2285 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2286 ConstVec = ConstVec.getOperand(0);
2287 }
2288 }
2289 }
2290
2291 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2292 uint64_t VectorBits[2];
2293 uint64_t UndefBits[2];
2294 uint64_t SplatBits, SplatUndef;
2295 int SplatSize;
2296
2297 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2298 && isConstantSplat(VectorBits, UndefBits,
2299 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2300 SplatBits, SplatUndef, SplatSize)) {
2301 SDOperand tcVec[16];
2302 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2303 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2304
2305 // Turn the BUILD_VECTOR into a set of target constants:
2306 for (size_t i = 0; i < tcVecSize; ++i)
2307 tcVec[i] = tc;
2308
2309 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2310 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2311 }
2312 }
2313
2314 return SDOperand();
2315}
2316
2317//! Lower i32 multiplication
2318static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2319 unsigned Opc) {
2320 switch (VT) {
2321 default:
2322 cerr << "CellSPU: Unknown LowerMUL value type, got "
2323 << MVT::getValueTypeString(Op.getValueType())
2324 << "\n";
2325 abort();
2326 /*NOTREACHED*/
2327
2328 case MVT::i32: {
2329 SDOperand rA = Op.getOperand(0);
2330 SDOperand rB = Op.getOperand(1);
2331
2332 return DAG.getNode(ISD::ADD, MVT::i32,
2333 DAG.getNode(ISD::ADD, MVT::i32,
2334 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2335 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2336 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2337 }
2338 }
2339
2340 return SDOperand();
2341}
2342
2343//! Custom lowering for CTPOP (count population)
2344/*!
2345 Custom lowering code that counts the number ones in the input
2346 operand. SPU has such an instruction, but it counts the number of
2347 ones per byte, which then have to be accumulated.
2348*/
2349static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2350 unsigned VT = Op.getValueType();
2351 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2352
2353 switch (VT) {
2354 case MVT::i8: {
2355 SDOperand N = Op.getOperand(0);
2356 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2357
2358 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2359 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2360
2361 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2362 }
2363
2364 case MVT::i16: {
2365 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002366 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002367
Chris Lattner84bc5422007-12-31 04:13:23 +00002368 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002369
2370 SDOperand N = Op.getOperand(0);
2371 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2372 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2373 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2374
2375 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2376 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2377
2378 // CNTB_result becomes the chain to which all of the virtual registers
2379 // CNTB_reg, SUM1_reg become associated:
2380 SDOperand CNTB_result =
2381 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2382
2383 SDOperand CNTB_rescopy =
2384 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2385
2386 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2387
2388 return DAG.getNode(ISD::AND, MVT::i16,
2389 DAG.getNode(ISD::ADD, MVT::i16,
2390 DAG.getNode(ISD::SRL, MVT::i16,
2391 Tmp1, Shift1),
2392 Tmp1),
2393 Mask0);
2394 }
2395
2396 case MVT::i32: {
2397 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002398 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002399
Chris Lattner84bc5422007-12-31 04:13:23 +00002400 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2401 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002402
2403 SDOperand N = Op.getOperand(0);
2404 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2405 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2406 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2407 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2408
2409 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2410 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2411
2412 // CNTB_result becomes the chain to which all of the virtual registers
2413 // CNTB_reg, SUM1_reg become associated:
2414 SDOperand CNTB_result =
2415 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2416
2417 SDOperand CNTB_rescopy =
2418 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2419
2420 SDOperand Comp1 =
2421 DAG.getNode(ISD::SRL, MVT::i32,
2422 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2423
2424 SDOperand Sum1 =
2425 DAG.getNode(ISD::ADD, MVT::i32,
2426 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2427
2428 SDOperand Sum1_rescopy =
2429 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2430
2431 SDOperand Comp2 =
2432 DAG.getNode(ISD::SRL, MVT::i32,
2433 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2434 Shift2);
2435 SDOperand Sum2 =
2436 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2437 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2438
2439 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2440 }
2441
2442 case MVT::i64:
2443 break;
2444 }
2445
2446 return SDOperand();
2447}
2448
2449/// LowerOperation - Provide custom lowering hooks for some operations.
2450///
2451SDOperand
2452SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2453{
2454 switch (Op.getOpcode()) {
2455 default: {
2456 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2457 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2458 cerr << "*Op.Val:\n";
2459 Op.Val->dump();
2460 abort();
2461 }
2462 case ISD::LOAD:
2463 case ISD::SEXTLOAD:
2464 case ISD::ZEXTLOAD:
2465 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::STORE:
2467 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::ConstantPool:
2469 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::GlobalAddress:
2471 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2472 case ISD::JumpTable:
2473 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2474 case ISD::Constant:
2475 return LowerConstant(Op, DAG);
2476 case ISD::ConstantFP:
2477 return LowerConstantFP(Op, DAG);
Scott Michel58c58182008-01-17 20:38:41 +00002478 case ISD::BRCOND:
2479 return LowerBRCOND(Op, DAG);
Scott Michel266bc8f2007-12-04 22:23:35 +00002480 case ISD::FORMAL_ARGUMENTS:
Scott Michel58c58182008-01-17 20:38:41 +00002481 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
Scott Michel266bc8f2007-12-04 22:23:35 +00002482 case ISD::CALL:
Scott Michel9de5d0d2008-01-11 02:53:15 +00002483 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
Scott Michel266bc8f2007-12-04 22:23:35 +00002484 case ISD::RET:
2485 return LowerRET(Op, DAG, getTargetMachine());
2486
2487 // i8 math ops:
2488 case ISD::SUB:
2489 case ISD::ROTR:
2490 case ISD::ROTL:
2491 case ISD::SRL:
2492 case ISD::SHL:
2493 case ISD::SRA:
2494 return LowerI8Math(Op, DAG, Op.getOpcode());
2495
2496 // Vector-related lowering.
2497 case ISD::BUILD_VECTOR:
2498 return LowerBUILD_VECTOR(Op, DAG);
2499 case ISD::SCALAR_TO_VECTOR:
2500 return LowerSCALAR_TO_VECTOR(Op, DAG);
2501 case ISD::VECTOR_SHUFFLE:
2502 return LowerVECTOR_SHUFFLE(Op, DAG);
2503 case ISD::EXTRACT_VECTOR_ELT:
2504 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2505 case ISD::INSERT_VECTOR_ELT:
2506 return LowerINSERT_VECTOR_ELT(Op, DAG);
2507
2508 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2509 case ISD::AND:
2510 case ISD::OR:
2511 case ISD::XOR:
2512 return LowerByteImmed(Op, DAG);
2513
2514 // Vector and i8 multiply:
2515 case ISD::MUL:
2516 if (MVT::isVector(Op.getValueType()))
2517 return LowerVectorMUL(Op, DAG);
2518 else if (Op.getValueType() == MVT::i8)
2519 return LowerI8Math(Op, DAG, Op.getOpcode());
2520 else
2521 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2522
2523 case ISD::FDIV:
2524 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2525 return LowerFDIVf32(Op, DAG);
2526// else if (Op.getValueType() == MVT::f64)
2527// return LowerFDIVf64(Op, DAG);
2528 else
2529 assert(0 && "Calling FDIV on unsupported MVT");
2530
2531 case ISD::CTPOP:
2532 return LowerCTPOP(Op, DAG);
2533 }
2534
2535 return SDOperand();
2536}
2537
2538//===----------------------------------------------------------------------===//
2539// Other Lowering Code
2540//===----------------------------------------------------------------------===//
2541
2542MachineBasicBlock *
2543SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2544 MachineBasicBlock *BB)
2545{
2546 return BB;
2547}
2548
2549//===----------------------------------------------------------------------===//
2550// Target Optimization Hooks
2551//===----------------------------------------------------------------------===//
2552
2553SDOperand
2554SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2555{
2556#if 0
2557 TargetMachine &TM = getTargetMachine();
Scott Michel053c1da2008-01-29 02:16:57 +00002558#endif
2559 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
Scott Michel266bc8f2007-12-04 22:23:35 +00002560 SelectionDAG &DAG = DCI.DAG;
Scott Michel266bc8f2007-12-04 22:23:35 +00002561 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2562
2563 switch (N->getOpcode()) {
2564 default: break;
Scott Michel053c1da2008-01-29 02:16:57 +00002565 case SPUISD::IndirectAddr: {
2566 if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) {
2567 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2568 if (CN->getValue() == 0) {
2569 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2570 // (SPUaform <addr>, 0)
2571
2572 DEBUG(cerr << "Replace: ");
2573 DEBUG(N->dump(&DAG));
2574 DEBUG(cerr << "\nWith: ");
2575 DEBUG(N0.Val->dump(&DAG));
2576 DEBUG(cerr << "\n");
2577
2578 return N0;
2579 }
2580 }
Scott Michel266bc8f2007-12-04 22:23:35 +00002581 }
Scott Michel053c1da2008-01-29 02:16:57 +00002582 case ISD::ADD: {
2583 SDOperand Op0 = N->getOperand(0);
2584 SDOperand Op1 = N->getOperand(1);
2585
2586 if ((Op1.getOpcode() == ISD::Constant
2587 || Op1.getOpcode() == ISD::TargetConstant)
2588 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2589 SDOperand Op01 = Op0.getOperand(1);
2590 if (Op01.getOpcode() == ISD::Constant
2591 || Op01.getOpcode() == ISD::TargetConstant) {
2592 // (add <const>, (SPUindirect <arg>, <const>)) ->
2593 // (SPUindirect <arg>, <const + const>)
2594 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2595 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2596 SDOperand combinedConst =
2597 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2598 Op0.getValueType());
2599
2600 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2601 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2602 DEBUG(cerr << "With: (SPUindirect <arg>, "
2603 << CN0->getValue() + CN1->getValue() << ")\n");
2604 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2605 Op0.getOperand(0), combinedConst);
2606 }
2607 } else if ((Op0.getOpcode() == ISD::Constant
2608 || Op0.getOpcode() == ISD::TargetConstant)
2609 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2610 SDOperand Op11 = Op1.getOperand(1);
2611 if (Op11.getOpcode() == ISD::Constant
2612 || Op11.getOpcode() == ISD::TargetConstant) {
2613 // (add (SPUindirect <arg>, <const>), <const>) ->
2614 // (SPUindirect <arg>, <const + const>)
2615 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2616 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2617 SDOperand combinedConst =
2618 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2619 Op0.getValueType());
2620
2621 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2622 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2623 DEBUG(cerr << "With: (SPUindirect <arg>, "
2624 << CN0->getValue() + CN1->getValue() << ")\n");
2625
2626 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2627 Op1.getOperand(0), combinedConst);
2628 }
2629 }
2630 }
2631 }
Scott Michel58c58182008-01-17 20:38:41 +00002632 // Otherwise, return unchanged.
Scott Michel266bc8f2007-12-04 22:23:35 +00002633 return SDOperand();
2634}
2635
2636//===----------------------------------------------------------------------===//
2637// Inline Assembly Support
2638//===----------------------------------------------------------------------===//
2639
2640/// getConstraintType - Given a constraint letter, return the type of
2641/// constraint it is for this target.
2642SPUTargetLowering::ConstraintType
2643SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2644 if (ConstraintLetter.size() == 1) {
2645 switch (ConstraintLetter[0]) {
2646 default: break;
2647 case 'b':
2648 case 'r':
2649 case 'f':
2650 case 'v':
2651 case 'y':
2652 return C_RegisterClass;
2653 }
2654 }
2655 return TargetLowering::getConstraintType(ConstraintLetter);
2656}
2657
2658std::pair<unsigned, const TargetRegisterClass*>
2659SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2660 MVT::ValueType VT) const
2661{
2662 if (Constraint.size() == 1) {
2663 // GCC RS6000 Constraint Letters
2664 switch (Constraint[0]) {
2665 case 'b': // R1-R31
2666 case 'r': // R0-R31
2667 if (VT == MVT::i64)
2668 return std::make_pair(0U, SPU::R64CRegisterClass);
2669 return std::make_pair(0U, SPU::R32CRegisterClass);
2670 case 'f':
2671 if (VT == MVT::f32)
2672 return std::make_pair(0U, SPU::R32FPRegisterClass);
2673 else if (VT == MVT::f64)
2674 return std::make_pair(0U, SPU::R64FPRegisterClass);
2675 break;
2676 case 'v':
2677 return std::make_pair(0U, SPU::GPRCRegisterClass);
2678 }
2679 }
2680
2681 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2682}
2683
2684void
2685SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2686 uint64_t Mask,
2687 uint64_t &KnownZero,
2688 uint64_t &KnownOne,
2689 const SelectionDAG &DAG,
2690 unsigned Depth ) const {
2691 KnownZero = 0;
2692 KnownOne = 0;
2693}
2694
2695// LowerAsmOperandForConstraint
2696void
2697SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2698 char ConstraintLetter,
2699 std::vector<SDOperand> &Ops,
2700 SelectionDAG &DAG) {
2701 // Default, for the time being, to the base class handler
2702 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2703}
2704
2705/// isLegalAddressImmediate - Return true if the integer value can be used
2706/// as the offset of the target addressing mode.
2707bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2708 // SPU's addresses are 256K:
2709 return (V > -(1 << 18) && V < (1 << 18) - 1);
2710}
2711
2712bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2713 return false;
2714}