blob: 0f1d04528042fcbb44c68b1936cecf13a57b45df [file] [log] [blame]
Scott Michel8efdca42007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner081ce942007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Scott Michel8efdca42007-12-04 22:23:35 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
Chris Lattner1b989192007-12-31 04:13:23 +000023#include "llvm/CodeGen/MachineRegisterInfo.h"
Scott Michel8efdca42007-12-04 22:23:35 +000024#include "llvm/CodeGen/SelectionDAG.h"
Scott Michel8efdca42007-12-04 22:23:35 +000025#include "llvm/Constants.h"
26#include "llvm/Function.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetOptions.h"
31
32#include <map>
33
34using namespace llvm;
35
36// Used in getTargetNodeName() below
37namespace {
38 std::map<unsigned, const char *> node_names;
39
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
44 };
45
46 const valtype_map_s valtype_map[] = {
47 { MVT::i1, 3 },
48 { MVT::i8, 3 },
49 { MVT::i16, 2 },
50 { MVT::i32, 0 },
51 { MVT::f32, 0 },
52 { MVT::i64, 0 },
53 { MVT::f64, 0 },
54 { MVT::i128, 0 }
55 };
56
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
58
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
61
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
65 break;
66 }
67 }
68
69#ifndef NDEBUG
70 if (retval == 0) {
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
73 << "\n";
74 abort();
75 }
76#endif
77
78 return retval;
79 }
80
81 //! Predicate that returns true if operand is a memory target
82 /*!
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
Scott Micheldbac4cf2008-01-11 02:53:15 +000085 address, external symbol, constant pool) or an A-form
Scott Michel8efdca42007-12-04 22:23:35 +000086 address.
87 */
88 bool isMemoryOperand(const SDOperand &Op)
89 {
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
Scott Michel8efdca42007-12-04 22:23:35 +000093 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
Scott Michel8efdca42007-12-04 22:23:35 +000098 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
Scott Micheldbac4cf2008-01-11 02:53:15 +0000101 || Opc == SPUISD::AFormAddr);
Scott Michel8efdca42007-12-04 22:23:35 +0000102 }
Scott Michel394e26d2008-01-17 20:38:41 +0000103
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
106 {
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
110 }
Scott Michel8efdca42007-12-04 22:23:35 +0000111}
112
113SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
115 SPUTM(TM)
116{
117 // Fold away setcc operations if possible.
118 setPow2DivIsCheap();
119
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
123
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel438be252007-12-17 22:32:34 +0000127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel8efdca42007-12-04 22:23:35 +0000134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
135
136 // SPU has no sign or zero extended loads for i1, i8, i16:
Scott Michel394e26d2008-01-17 20:38:41 +0000137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
Scott Michel8efdca42007-12-04 22:23:35 +0000138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
Chris Lattner3bc08502008-01-17 19:59:44 +0000140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
Scott Michel8efdca42007-12-04 22:23:35 +0000145
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
Chris Lattner3bc08502008-01-17 19:59:44 +0000149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
154
Scott Michel8efdca42007-12-04 22:23:35 +0000155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
158
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
163
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
166 ++sctype) {
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
169 }
170
Scott Michel394e26d2008-01-17 20:38:41 +0000171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
Scott Michel8efdca42007-12-04 22:23:35 +0000174
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
184
185 // PowerPC has no SREM/UREM instructions
186 setOperationAction(ISD::SREM, MVT::i32, Expand);
187 setOperationAction(ISD::UREM, MVT::i32, Expand);
188 setOperationAction(ISD::SREM, MVT::i64, Expand);
189 setOperationAction(ISD::UREM, MVT::i64, Expand);
190
191 // We don't support sin/cos/sqrt/fmod
192 setOperationAction(ISD::FSIN , MVT::f64, Expand);
193 setOperationAction(ISD::FCOS , MVT::f64, Expand);
194 setOperationAction(ISD::FREM , MVT::f64, Expand);
195 setOperationAction(ISD::FSIN , MVT::f32, Expand);
196 setOperationAction(ISD::FCOS , MVT::f32, Expand);
197 setOperationAction(ISD::FREM , MVT::f32, Expand);
198
199 // If we're enabling GP optimizations, use hardware square root
200 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
201 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
202
203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
205
206 // SPU can do rotate right and left, so legalize it... but customize for i8
207 // because instructions don't exist.
208 setOperationAction(ISD::ROTR, MVT::i32, Legal);
209 setOperationAction(ISD::ROTR, MVT::i16, Legal);
210 setOperationAction(ISD::ROTR, MVT::i8, Custom);
211 setOperationAction(ISD::ROTL, MVT::i32, Legal);
212 setOperationAction(ISD::ROTL, MVT::i16, Legal);
213 setOperationAction(ISD::ROTL, MVT::i8, Custom);
214 // SPU has no native version of shift left/right for i8
215 setOperationAction(ISD::SHL, MVT::i8, Custom);
216 setOperationAction(ISD::SRL, MVT::i8, Custom);
217 setOperationAction(ISD::SRA, MVT::i8, Custom);
218
219 // Custom lower i32 multiplications
220 setOperationAction(ISD::MUL, MVT::i32, Custom);
221
222 // Need to custom handle (some) common i8 math ops
223 setOperationAction(ISD::SUB, MVT::i8, Custom);
224 setOperationAction(ISD::MUL, MVT::i8, Custom);
225
226 // SPU does not have BSWAP. It does have i32 support CTLZ.
227 // CTPOP has to be custom lowered.
228 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
229 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
230
231 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
232 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
233 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
235
236 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
237 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
238
239 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
240
241 // SPU does not have select or setcc
242 setOperationAction(ISD::SELECT, MVT::i1, Expand);
243 setOperationAction(ISD::SELECT, MVT::i8, Expand);
244 setOperationAction(ISD::SELECT, MVT::i16, Expand);
245 setOperationAction(ISD::SELECT, MVT::i32, Expand);
246 setOperationAction(ISD::SELECT, MVT::i64, Expand);
247 setOperationAction(ISD::SELECT, MVT::f32, Expand);
248 setOperationAction(ISD::SELECT, MVT::f64, Expand);
249
250 setOperationAction(ISD::SETCC, MVT::i1, Expand);
251 setOperationAction(ISD::SETCC, MVT::i8, Expand);
252 setOperationAction(ISD::SETCC, MVT::i16, Expand);
253 setOperationAction(ISD::SETCC, MVT::i32, Expand);
254 setOperationAction(ISD::SETCC, MVT::i64, Expand);
255 setOperationAction(ISD::SETCC, MVT::f32, Expand);
256 setOperationAction(ISD::SETCC, MVT::f64, Expand);
257
258 // SPU has a legal FP -> signed INT instruction
259 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
262 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
263
264 // FDIV on SPU requires custom lowering
265 setOperationAction(ISD::FDIV, MVT::f32, Custom);
266 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
267
268 // SPU has [U|S]INT_TO_FP
269 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
271 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
273 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
274 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
275 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
276 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
277
Scott Michel754d8662007-12-20 00:44:13 +0000278 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
279 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
280 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
281 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel8efdca42007-12-04 22:23:35 +0000282
283 // We cannot sextinreg(i1). Expand to shifts.
284 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
285
286 // Support label based line numbers.
287 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
288 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
289
290 // We want to legalize GlobalAddress and ConstantPool nodes into the
291 // appropriate instructions to materialize the address.
292 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
293 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
294 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
295 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
296 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
297 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
298 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
299 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
300
301 // RET must be custom lowered, to meet ABI requirements
302 setOperationAction(ISD::RET, MVT::Other, Custom);
303
304 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
305 setOperationAction(ISD::VASTART , MVT::Other, Custom);
306
307 // Use the default implementation.
308 setOperationAction(ISD::VAARG , MVT::Other, Expand);
309 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
310 setOperationAction(ISD::VAEND , MVT::Other, Expand);
311 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
312 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
314 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
315
316 // Cell SPU has instructions for converting between i64 and fp.
317 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
318 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
319
320 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
321 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
322
323 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
324 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
325
326 // First set operation action for all vector types to expand. Then we
327 // will selectively turn on ones that can be effectively codegen'd.
328 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
333 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
334
335 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
336 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
337 // add/sub are legal for all supported vector VT's.
338 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
339 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
340 // mul has to be custom lowered.
341 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
342
343 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
344 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
345 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
346 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
347 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
348 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
349
350 // These operations need to be expanded:
351 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
352 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
353 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
354 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
355 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
356
357 // Custom lower build_vector, constant pool spills, insert and
358 // extract vector elements:
359 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
360 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
361 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
362 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
363 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
364 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
365 }
366
367 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
368 setOperationAction(ISD::AND, MVT::v16i8, Custom);
369 setOperationAction(ISD::OR, MVT::v16i8, Custom);
370 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
371 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
Scott Micheldbac4cf2008-01-11 02:53:15 +0000372
Scott Michel8efdca42007-12-04 22:23:35 +0000373 setSetCCResultType(MVT::i32);
374 setShiftAmountType(MVT::i32);
375 setSetCCResultContents(ZeroOrOneSetCCResult);
376
377 setStackPointerRegisterToSaveRestore(SPU::R1);
378
379 // We have target-specific dag combine patterns for the following nodes:
380 // e.g., setTargetDAGCombine(ISD::SUB);
381
382 computeRegisterProperties();
383}
384
385const char *
386SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
387{
388 if (node_names.empty()) {
389 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
390 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
391 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
392 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
Scott Micheldbac4cf2008-01-11 02:53:15 +0000393 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
Scott Michel8efdca42007-12-04 22:23:35 +0000394 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
395 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
396 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
397 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
398 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
399 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
400 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
401 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
402 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
403 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
404 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
405 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
406 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
407 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
408 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
409 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
410 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
411 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
412 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
413 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
414 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
415 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
416 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
417 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
418 "SPUISD::ROTBYTES_RIGHT_Z";
419 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
420 "SPUISD::ROTBYTES_RIGHT_S";
421 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
422 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
423 "SPUISD::ROTBYTES_LEFT_CHAINED";
424 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
425 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
426 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
427 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
428 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
429 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
430 }
431
432 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
433
434 return ((i != node_names.end()) ? i->second : 0);
435}
436
437//===----------------------------------------------------------------------===//
438// Calling convention code:
439//===----------------------------------------------------------------------===//
440
441#include "SPUGenCallingConv.inc"
442
443//===----------------------------------------------------------------------===//
444// LowerOperation implementation
445//===----------------------------------------------------------------------===//
446
Scott Micheldbac4cf2008-01-11 02:53:15 +0000447/// Aligned load common code for CellSPU
448/*!
449 \param[in] Op The SelectionDAG load or store operand
450 \param[in] DAG The selection DAG
451 \param[in] ST CellSPU subtarget information structure
452 \param[in,out] alignment Caller initializes this to the load or store node's
453 value from getAlignment(), may be updated while generating the aligned load
454 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
455 offset (divisible by 16, modulo 16 == 0)
456 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
457 offset of the preferred slot (modulo 16 != 0)
458 \param[in,out] VT Caller initializes this value type to the the load or store
459 node's loaded or stored value type; may be updated if an i1-extended load or
460 store.
461 \param[out] was16aligned true if the base pointer had 16-byte alignment,
462 otherwise false. Can help to determine if the chunk needs to be rotated.
463
464 Both load and store lowering load a block of data aligned on a 16-byte
465 boundary. This is the common aligned load code shared between both.
466 */
467static SDOperand
468AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
469 LSBaseSDNode *LSN,
470 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
Chris Lattner2dff5dd2008-01-12 22:54:07 +0000471 MVT::ValueType &VT, bool &was16aligned)
Scott Micheldbac4cf2008-01-11 02:53:15 +0000472{
473 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
474 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
475 SDOperand basePtr = LSN->getBasePtr();
476 SDOperand chain = LSN->getChain();
477
478 if (basePtr.getOpcode() == ISD::ADD) {
479 SDOperand Op1 = basePtr.Val->getOperand(1);
480
481 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
Scott Michel394e26d2008-01-17 20:38:41 +0000482 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
Scott Micheldbac4cf2008-01-11 02:53:15 +0000483
484 alignOffs = (int) CN->getValue();
485 prefSlotOffs = (int) (alignOffs & 0xf);
486
487 // Adjust the rotation amount to ensure that the final result ends up in
488 // the preferred slot:
489 prefSlotOffs -= vtm->prefslot_byte;
490 basePtr = basePtr.getOperand(0);
491
Scott Michel394e26d2008-01-17 20:38:41 +0000492 // Loading from memory, can we adjust alignment?
493 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
494 SDOperand APtr = basePtr.getOperand(0);
495 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
496 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
497 alignment = GSDN->getGlobal()->getAlignment();
498 }
Scott Micheldbac4cf2008-01-11 02:53:15 +0000499 }
500 } else {
501 alignOffs = 0;
502 prefSlotOffs = -vtm->prefslot_byte;
503 }
504 } else {
505 alignOffs = 0;
506 prefSlotOffs = -vtm->prefslot_byte;
507 }
508
509 if (alignment == 16) {
510 // Realign the base pointer as a D-Form address:
511 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
Scott Michel394e26d2008-01-17 20:38:41 +0000512 basePtr = DAG.getNode(ISD::ADD, PtrVT,
513 basePtr,
514 DAG.getConstant((alignOffs & ~0xf), PtrVT));
Scott Micheldbac4cf2008-01-11 02:53:15 +0000515 }
516
517 // Emit the vector load:
518 was16aligned = true;
519 return DAG.getLoad(MVT::v16i8, chain, basePtr,
520 LSN->getSrcValue(), LSN->getSrcValueOffset(),
521 LSN->isVolatile(), 16);
522 }
523
524 // Unaligned load or we're using the "large memory" model, which means that
525 // we have to be very pessimistic:
Scott Michel394e26d2008-01-17 20:38:41 +0000526 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000527 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
528 }
529
530 // Add the offset
531 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
532 was16aligned = false;
533 return DAG.getLoad(MVT::v16i8, chain, basePtr,
534 LSN->getSrcValue(), LSN->getSrcValueOffset(),
535 LSN->isVolatile(), 16);
536}
537
Scott Michel8efdca42007-12-04 22:23:35 +0000538/// Custom lower loads for CellSPU
539/*!
540 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
541 within a 16-byte block, we have to rotate to extract the requested element.
542 */
543static SDOperand
544LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
545 LoadSDNode *LN = cast<LoadSDNode>(Op);
Scott Michel8efdca42007-12-04 22:23:35 +0000546 SDOperand the_chain = LN->getChain();
547 MVT::ValueType VT = LN->getLoadedVT();
548 MVT::ValueType OpVT = Op.Val->getValueType(0);
Scott Michel8efdca42007-12-04 22:23:35 +0000549 ISD::LoadExtType ExtType = LN->getExtensionType();
550 unsigned alignment = LN->getAlignment();
Scott Michel8efdca42007-12-04 22:23:35 +0000551 SDOperand Ops[8];
552
Scott Michel8efdca42007-12-04 22:23:35 +0000553 switch (LN->getAddressingMode()) {
554 case ISD::UNINDEXED: {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000555 int offset, rotamt;
556 bool was16aligned;
557 SDOperand result =
558 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
Scott Michel8efdca42007-12-04 22:23:35 +0000559
Scott Micheldbac4cf2008-01-11 02:53:15 +0000560 if (result.Val == 0)
Scott Michel8efdca42007-12-04 22:23:35 +0000561 return result;
Scott Micheldbac4cf2008-01-11 02:53:15 +0000562
563 the_chain = result.getValue(1);
564 // Rotate the chunk if necessary
565 if (rotamt < 0)
566 rotamt += 16;
Scott Michelabc58242008-01-11 21:01:19 +0000567 if (rotamt != 0 || !was16aligned) {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000568 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
569
Scott Michel394e26d2008-01-17 20:38:41 +0000570 Ops[0] = the_chain;
571 Ops[1] = result;
Scott Micheldbac4cf2008-01-11 02:53:15 +0000572 if (was16aligned) {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000573 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
574 } else {
Scott Michelabc58242008-01-11 21:01:19 +0000575 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Micheldbac4cf2008-01-11 02:53:15 +0000576 LoadSDNode *LN1 = cast<LoadSDNode>(result);
Scott Michelabc58242008-01-11 21:01:19 +0000577 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
578 DAG.getConstant(rotamt, PtrVT));
Scott Micheldbac4cf2008-01-11 02:53:15 +0000579 }
580
581 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
582 the_chain = result.getValue(1);
Scott Michel8efdca42007-12-04 22:23:35 +0000583 }
Scott Micheldbac4cf2008-01-11 02:53:15 +0000584
585 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
586 SDVTList scalarvts;
587 MVT::ValueType vecVT = MVT::v16i8;
588
589 // Convert the loaded v16i8 vector to the appropriate vector type
590 // specified by the operand:
591 if (OpVT == VT) {
592 if (VT != MVT::i1)
593 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
594 } else
595 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
596
597 Ops[0] = the_chain;
598 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
599 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
600 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
601 the_chain = result.getValue(1);
602 } else {
603 // Handle the sign and zero-extending loads for i1 and i8:
604 unsigned NewOpC;
605
606 if (ExtType == ISD::SEXTLOAD) {
607 NewOpC = (OpVT == MVT::i1
608 ? SPUISD::EXTRACT_I1_SEXT
609 : SPUISD::EXTRACT_I8_SEXT);
610 } else {
611 assert(ExtType == ISD::ZEXTLOAD);
612 NewOpC = (OpVT == MVT::i1
613 ? SPUISD::EXTRACT_I1_ZEXT
614 : SPUISD::EXTRACT_I8_ZEXT);
615 }
616
617 result = DAG.getNode(NewOpC, OpVT, result);
618 }
619
620 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
Scott Michel394e26d2008-01-17 20:38:41 +0000621 SDOperand retops[3] = {
622 result,
623 the_chain,
624 DAG.getConstant(alignment, MVT::i32)
625 };
Scott Micheldbac4cf2008-01-11 02:53:15 +0000626
Scott Michel394e26d2008-01-17 20:38:41 +0000627 result = DAG.getNode(SPUISD::LDRESULT, retvts,
628 retops, sizeof(retops) / sizeof(retops[0]));
Scott Micheldbac4cf2008-01-11 02:53:15 +0000629 return result;
Scott Michel8efdca42007-12-04 22:23:35 +0000630 }
631 case ISD::PRE_INC:
632 case ISD::PRE_DEC:
633 case ISD::POST_INC:
634 case ISD::POST_DEC:
635 case ISD::LAST_INDEXED_MODE:
636 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
637 "UNINDEXED\n";
638 cerr << (unsigned) LN->getAddressingMode() << "\n";
639 abort();
640 /*NOTREACHED*/
641 }
642
643 return SDOperand();
644}
645
646/// Custom lower stores for CellSPU
647/*!
648 All CellSPU stores are aligned to 16-byte boundaries, so for elements
649 within a 16-byte block, we have to generate a shuffle to insert the
650 requested element into its place, then store the resulting block.
651 */
652static SDOperand
653LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
654 StoreSDNode *SN = cast<StoreSDNode>(Op);
655 SDOperand Value = SN->getValue();
656 MVT::ValueType VT = Value.getValueType();
657 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
658 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Micheldbac4cf2008-01-11 02:53:15 +0000659 unsigned alignment = SN->getAlignment();
Scott Michel8efdca42007-12-04 22:23:35 +0000660
661 switch (SN->getAddressingMode()) {
662 case ISD::UNINDEXED: {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000663 int chunk_offset, slot_offset;
664 bool was16aligned;
Scott Michel8efdca42007-12-04 22:23:35 +0000665
666 // The vector type we really want to load from the 16-byte chunk, except
667 // in the case of MVT::i1, which has to be v16i8.
Scott Micheldbac4cf2008-01-11 02:53:15 +0000668 unsigned vecVT, stVecVT = MVT::v16i8;
669
Scott Michel8efdca42007-12-04 22:23:35 +0000670 if (StVT != MVT::i1)
671 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
Scott Michel8efdca42007-12-04 22:23:35 +0000672 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
673
Scott Micheldbac4cf2008-01-11 02:53:15 +0000674 SDOperand alignLoadVec =
675 AlignedLoad(Op, DAG, ST, SN, alignment,
676 chunk_offset, slot_offset, VT, was16aligned);
Scott Michel8efdca42007-12-04 22:23:35 +0000677
Scott Micheldbac4cf2008-01-11 02:53:15 +0000678 if (alignLoadVec.Val == 0)
679 return alignLoadVec;
Scott Michel8efdca42007-12-04 22:23:35 +0000680
Scott Micheldbac4cf2008-01-11 02:53:15 +0000681 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
682 SDOperand basePtr = LN->getBasePtr();
683 SDOperand the_chain = alignLoadVec.getValue(1);
Scott Michel8efdca42007-12-04 22:23:35 +0000684 SDOperand theValue = SN->getValue();
685 SDOperand result;
686
687 if (StVT != VT
688 && (theValue.getOpcode() == ISD::AssertZext
689 || theValue.getOpcode() == ISD::AssertSext)) {
690 // Drill down and get the value for zero- and sign-extended
691 // quantities
692 theValue = theValue.getOperand(0);
693 }
694
Scott Micheldbac4cf2008-01-11 02:53:15 +0000695 chunk_offset &= 0xf;
Scott Michel8efdca42007-12-04 22:23:35 +0000696
Scott Micheldbac4cf2008-01-11 02:53:15 +0000697 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
698 SDOperand insertEltPtr;
699 SDOperand insertEltOp;
700
701 // If the base pointer is already a D-form address, then just create
702 // a new D-form address with a slot offset and the orignal base pointer.
703 // Otherwise generate a D-form address with the slot offset relative
704 // to the stack pointer, which is always aligned.
Scott Michelabc58242008-01-11 21:01:19 +0000705 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
706 DEBUG(basePtr.Val->dump(&DAG));
707 DEBUG(cerr << "\n");
708
Scott Micheldbac4cf2008-01-11 02:53:15 +0000709 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
Scott Michel394e26d2008-01-17 20:38:41 +0000710 // Hmmmm... do we ever actually hit this code?
Scott Micheldbac4cf2008-01-11 02:53:15 +0000711 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
712 basePtr.getOperand(0),
713 insertEltOffs);
Scott Michelabc58242008-01-11 21:01:19 +0000714 } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
715 (basePtr.getOpcode() == ISD::ADD
716 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
717 insertEltPtr = basePtr;
Scott Micheldbac4cf2008-01-11 02:53:15 +0000718 } else {
Scott Michel394e26d2008-01-17 20:38:41 +0000719 // $sp is always aligned, so use it instead of potentially loading an
720 // address into a new register:
Scott Micheldbac4cf2008-01-11 02:53:15 +0000721 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
722 DAG.getRegister(SPU::R1, PtrVT),
723 insertEltOffs);
724 }
725
726 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
Scott Michel8efdca42007-12-04 22:23:35 +0000727 result = DAG.getNode(SPUISD::SHUFB, vecVT,
728 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
Scott Micheldbac4cf2008-01-11 02:53:15 +0000729 alignLoadVec,
Scott Michel8efdca42007-12-04 22:23:35 +0000730 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
731
Scott Micheldbac4cf2008-01-11 02:53:15 +0000732 result = DAG.getStore(the_chain, result, basePtr,
Scott Michel8efdca42007-12-04 22:23:35 +0000733 LN->getSrcValue(), LN->getSrcValueOffset(),
734 LN->isVolatile(), LN->getAlignment());
735
736 return result;
737 /*UNREACHED*/
738 }
739 case ISD::PRE_INC:
740 case ISD::PRE_DEC:
741 case ISD::POST_INC:
742 case ISD::POST_DEC:
743 case ISD::LAST_INDEXED_MODE:
744 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
745 "UNINDEXED\n";
746 cerr << (unsigned) SN->getAddressingMode() << "\n";
747 abort();
748 /*NOTREACHED*/
749 }
750
751 return SDOperand();
752}
753
754/// Generate the address of a constant pool entry.
755static SDOperand
756LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
757 MVT::ValueType PtrVT = Op.getValueType();
758 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
759 Constant *C = CP->getConstVal();
760 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
Scott Michel8efdca42007-12-04 22:23:35 +0000761 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Micheldbac4cf2008-01-11 02:53:15 +0000762 const TargetMachine &TM = DAG.getTarget();
Scott Michel8efdca42007-12-04 22:23:35 +0000763
764 if (TM.getRelocationModel() == Reloc::Static) {
765 if (!ST->usingLargeMem()) {
766 // Just return the SDOperand with the constant pool address in it.
Scott Michel394e26d2008-01-17 20:38:41 +0000767 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
Scott Michel8efdca42007-12-04 22:23:35 +0000768 } else {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000769#if 1
Scott Michel8efdca42007-12-04 22:23:35 +0000770 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
771 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
772
773 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
Scott Micheldbac4cf2008-01-11 02:53:15 +0000774#else
775 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
776#endif
Scott Michel8efdca42007-12-04 22:23:35 +0000777 }
778 }
779
780 assert(0 &&
781 "LowerConstantPool: Relocation model other than static not supported.");
782 return SDOperand();
783}
784
785static SDOperand
786LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
787 MVT::ValueType PtrVT = Op.getValueType();
788 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
789 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
790 SDOperand Zero = DAG.getConstant(0, PtrVT);
791 const TargetMachine &TM = DAG.getTarget();
792
793 if (TM.getRelocationModel() == Reloc::Static) {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000794 return (!ST->usingLargeMem()
Scott Michel394e26d2008-01-17 20:38:41 +0000795 ? DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero)
Scott Micheldbac4cf2008-01-11 02:53:15 +0000796 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
Scott Michel8efdca42007-12-04 22:23:35 +0000797 }
798
799 assert(0 &&
800 "LowerJumpTable: Relocation model other than static not supported.");
801 return SDOperand();
802}
803
804static SDOperand
805LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
806 MVT::ValueType PtrVT = Op.getValueType();
807 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
808 GlobalValue *GV = GSDN->getGlobal();
809 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
Scott Michel8efdca42007-12-04 22:23:35 +0000810 const TargetMachine &TM = DAG.getTarget();
Scott Micheldbac4cf2008-01-11 02:53:15 +0000811 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel8efdca42007-12-04 22:23:35 +0000812
813 if (TM.getRelocationModel() == Reloc::Static) {
Scott Micheldbac4cf2008-01-11 02:53:15 +0000814 return (!ST->usingLargeMem()
Scott Michel394e26d2008-01-17 20:38:41 +0000815 ? DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero)
Scott Micheldbac4cf2008-01-11 02:53:15 +0000816 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
Scott Michel8efdca42007-12-04 22:23:35 +0000817 } else {
818 cerr << "LowerGlobalAddress: Relocation model other than static not "
819 << "supported.\n";
820 abort();
821 /*NOTREACHED*/
822 }
823
824 return SDOperand();
825}
826
827//! Custom lower i64 integer constants
828/*!
829 This code inserts all of the necessary juggling that needs to occur to load
830 a 64-bit constant into a register.
831 */
832static SDOperand
833LowerConstant(SDOperand Op, SelectionDAG &DAG) {
834 unsigned VT = Op.getValueType();
835 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
836
837 if (VT == MVT::i64) {
838 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
839 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
840 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
841
842 } else {
843 cerr << "LowerConstant: unhandled constant type "
844 << MVT::getValueTypeString(VT)
845 << "\n";
846 abort();
847 /*NOTREACHED*/
848 }
849
850 return SDOperand();
851}
852
853//! Custom lower single precision floating point constants
854/*!
855 "float" immediates can be lowered as if they were unsigned 32-bit integers.
856 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
857 target description.
858 */
859static SDOperand
860LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
861 unsigned VT = Op.getValueType();
862 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
863
864 assert((FP != 0) &&
865 "LowerConstantFP: Node is not ConstantFPSDNode");
866
Scott Michel8efdca42007-12-04 22:23:35 +0000867 if (VT == MVT::f32) {
Scott Michel11e88bb2007-12-19 20:15:47 +0000868 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel8efdca42007-12-04 22:23:35 +0000869 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel11e88bb2007-12-19 20:15:47 +0000870 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel8efdca42007-12-04 22:23:35 +0000871 } else if (VT == MVT::f64) {
Scott Michel11e88bb2007-12-19 20:15:47 +0000872 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel8efdca42007-12-04 22:23:35 +0000873 return DAG.getNode(ISD::BIT_CONVERT, VT,
874 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
875 }
876
877 return SDOperand();
878}
879
Scott Michel394e26d2008-01-17 20:38:41 +0000880//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
881static SDOperand
882LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
883{
884 SDOperand Cond = Op.getOperand(1);
885 MVT::ValueType CondVT = Cond.getValueType();
886 MVT::ValueType CondNVT;
887
888 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
889 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
890 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
891 Op.getOperand(0),
892 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
893 Op.getOperand(2));
894 } else
895 return SDOperand(); // Unchanged
896}
897
Scott Michel8efdca42007-12-04 22:23:35 +0000898static SDOperand
899LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
900{
901 MachineFunction &MF = DAG.getMachineFunction();
902 MachineFrameInfo *MFI = MF.getFrameInfo();
Chris Lattner1b989192007-12-31 04:13:23 +0000903 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel8efdca42007-12-04 22:23:35 +0000904 SmallVector<SDOperand, 8> ArgValues;
905 SDOperand Root = Op.getOperand(0);
906 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
907
908 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
909 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
910
911 unsigned ArgOffset = SPUFrameInfo::minStackSize();
912 unsigned ArgRegIdx = 0;
913 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
914
915 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
916
917 // Add DAG nodes to load the arguments or copy them out of registers.
918 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
919 SDOperand ArgVal;
920 bool needsLoad = false;
921 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
922 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
923
924 switch (ObjectVT) {
925 default: {
926 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
927 << MVT::getValueTypeString(ObjectVT)
928 << "\n";
929 abort();
930 }
931 case MVT::i8:
932 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000933 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
934 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000935 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
936 ++ArgRegIdx;
937 } else {
938 needsLoad = true;
939 }
940 break;
941 case MVT::i16:
942 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000943 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
944 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000945 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
946 ++ArgRegIdx;
947 } else {
948 needsLoad = true;
949 }
950 break;
951 case MVT::i32:
952 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000953 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
954 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000955 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
956 ++ArgRegIdx;
957 } else {
958 needsLoad = true;
959 }
960 break;
961 case MVT::i64:
962 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000963 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
964 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000965 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
966 ++ArgRegIdx;
967 } else {
968 needsLoad = true;
969 }
970 break;
971 case MVT::f32:
972 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000973 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
974 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000975 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
976 ++ArgRegIdx;
977 } else {
978 needsLoad = true;
979 }
980 break;
981 case MVT::f64:
982 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000983 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
984 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000985 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
986 ++ArgRegIdx;
987 } else {
988 needsLoad = true;
989 }
990 break;
991 case MVT::v2f64:
992 case MVT::v4f32:
993 case MVT::v4i32:
994 case MVT::v8i16:
995 case MVT::v16i8:
996 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner1b989192007-12-31 04:13:23 +0000997 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
998 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +0000999 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1000 ++ArgRegIdx;
1001 } else {
1002 needsLoad = true;
1003 }
1004 break;
1005 }
1006
1007 // We need to load the argument to a virtual register if we determined above
1008 // that we ran out of physical registers of the appropriate type
1009 if (needsLoad) {
1010 // If the argument is actually used, emit a load from the right stack
1011 // slot.
1012 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1013 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1014 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1015 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1016 } else {
1017 // Don't emit a dead load.
1018 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1019 }
1020
1021 ArgOffset += StackSlotSize;
1022 }
1023
1024 ArgValues.push_back(ArgVal);
1025 }
1026
1027 // If the function takes variable number of arguments, make a frame index for
1028 // the start of the first vararg value... for expansion of llvm.va_start.
1029 if (isVarArg) {
1030 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1031 ArgOffset);
1032 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1033 // If this function is vararg, store any remaining integer argument regs to
1034 // their spots on the stack so that they may be loaded by deferencing the
1035 // result of va_next.
1036 SmallVector<SDOperand, 8> MemOps;
1037 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
Chris Lattner1b989192007-12-31 04:13:23 +00001038 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1039 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel8efdca42007-12-04 22:23:35 +00001040 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1041 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1042 MemOps.push_back(Store);
1043 // Increment the address by four for the next argument to store
1044 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1045 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1046 }
1047 if (!MemOps.empty())
1048 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1049 }
1050
1051 ArgValues.push_back(Root);
1052
1053 // Return the new list of results.
1054 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1055 Op.Val->value_end());
1056 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1057}
1058
1059/// isLSAAddress - Return the immediate to use if the specified
1060/// value is representable as a LSA address.
1061static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1062 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1063 if (!C) return 0;
1064
1065 int Addr = C->getValue();
1066 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1067 (Addr << 14 >> 14) != Addr)
1068 return 0; // Top 14 bits have to be sext of immediate.
1069
1070 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1071}
1072
1073static
1074SDOperand
Scott Micheldbac4cf2008-01-11 02:53:15 +00001075LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
Scott Michel8efdca42007-12-04 22:23:35 +00001076 SDOperand Chain = Op.getOperand(0);
1077#if 0
1078 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1079 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1080#endif
1081 SDOperand Callee = Op.getOperand(4);
1082 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1083 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1084 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1085 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1086
1087 // Handy pointer type
1088 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1089
1090 // Accumulate how many bytes are to be pushed on the stack, including the
1091 // linkage area, and parameter passing area. According to the SPU ABI,
1092 // we minimally need space for [LR] and [SP]
1093 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1094
1095 // Set up a copy of the stack pointer for use loading and storing any
1096 // arguments that may not fit in the registers available for argument
1097 // passing.
1098 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1099
1100 // Figure out which arguments are going to go in registers, and which in
1101 // memory.
1102 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1103 unsigned ArgRegIdx = 0;
1104
1105 // Keep track of registers passing arguments
1106 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1107 // And the arguments passed on the stack
1108 SmallVector<SDOperand, 8> MemOpChains;
1109
1110 for (unsigned i = 0; i != NumOps; ++i) {
1111 SDOperand Arg = Op.getOperand(5+2*i);
1112
1113 // PtrOff will be used to store the current argument to the stack if a
1114 // register cannot be found for it.
1115 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1116 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1117
1118 switch (Arg.getValueType()) {
1119 default: assert(0 && "Unexpected ValueType for argument!");
1120 case MVT::i32:
1121 case MVT::i64:
1122 case MVT::i128:
1123 if (ArgRegIdx != NumArgRegs) {
1124 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1125 } else {
1126 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1127 ArgOffset += StackSlotSize;
1128 }
1129 break;
1130 case MVT::f32:
1131 case MVT::f64:
1132 if (ArgRegIdx != NumArgRegs) {
1133 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1134 } else {
1135 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1136 ArgOffset += StackSlotSize;
1137 }
1138 break;
1139 case MVT::v4f32:
1140 case MVT::v4i32:
1141 case MVT::v8i16:
1142 case MVT::v16i8:
1143 if (ArgRegIdx != NumArgRegs) {
1144 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1145 } else {
1146 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1147 ArgOffset += StackSlotSize;
1148 }
1149 break;
1150 }
1151 }
1152
1153 // Update number of stack bytes actually used, insert a call sequence start
1154 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1155 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1156
1157 if (!MemOpChains.empty()) {
1158 // Adjust the stack pointer for the stack arguments.
1159 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1160 &MemOpChains[0], MemOpChains.size());
1161 }
1162
1163 // Build a sequence of copy-to-reg nodes chained together with token chain
1164 // and flag operands which copy the outgoing args into the appropriate regs.
1165 SDOperand InFlag;
1166 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1167 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1168 InFlag);
1169 InFlag = Chain.getValue(1);
1170 }
1171
1172 std::vector<MVT::ValueType> NodeTys;
1173 NodeTys.push_back(MVT::Other); // Returns a chain
1174 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1175
1176 SmallVector<SDOperand, 8> Ops;
1177 unsigned CallOpc = SPUISD::CALL;
1178
1179 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1180 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1181 // node so that legalize doesn't hack it.
1182 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1183 GlobalValue *GV = G->getGlobal();
1184 unsigned CalleeVT = Callee.getValueType();
Scott Micheldbac4cf2008-01-11 02:53:15 +00001185 SDOperand Zero = DAG.getConstant(0, PtrVT);
1186 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
Scott Michel8efdca42007-12-04 22:23:35 +00001187
Scott Micheldbac4cf2008-01-11 02:53:15 +00001188 if (!ST->usingLargeMem()) {
1189 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1190 // style calls, otherwise, external symbols are BRASL calls. This assumes
1191 // that declared/defined symbols are in the same compilation unit and can
1192 // be reached through PC-relative jumps.
1193 //
1194 // NOTE:
1195 // This may be an unsafe assumption for JIT and really large compilation
1196 // units.
1197 if (GV->isDeclaration()) {
1198 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1199 } else {
1200 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1201 }
Scott Michel8efdca42007-12-04 22:23:35 +00001202 } else {
Scott Micheldbac4cf2008-01-11 02:53:15 +00001203 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1204 // address pairs:
1205 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
Scott Michel8efdca42007-12-04 22:23:35 +00001206 }
1207 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1208 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
Scott Micheldbac4cf2008-01-11 02:53:15 +00001209 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
Scott Michel8efdca42007-12-04 22:23:35 +00001210 // If this is an absolute destination address that appears to be a legal
1211 // local store address, use the munged value.
1212 Callee = SDOperand(Dest, 0);
Scott Micheldbac4cf2008-01-11 02:53:15 +00001213 }
Scott Michel8efdca42007-12-04 22:23:35 +00001214
1215 Ops.push_back(Chain);
1216 Ops.push_back(Callee);
1217
1218 // Add argument registers to the end of the list so that they are known live
1219 // into the call.
1220 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1221 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1222 RegsToPass[i].second.getValueType()));
1223
1224 if (InFlag.Val)
1225 Ops.push_back(InFlag);
1226 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1227 InFlag = Chain.getValue(1);
1228
1229 SDOperand ResultVals[3];
1230 unsigned NumResults = 0;
1231 NodeTys.clear();
1232
1233 // If the call has results, copy the values out of the ret val registers.
1234 switch (Op.Val->getValueType(0)) {
1235 default: assert(0 && "Unexpected ret value!");
1236 case MVT::Other: break;
1237 case MVT::i32:
1238 if (Op.Val->getValueType(1) == MVT::i32) {
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1242 Chain.getValue(2)).getValue(1);
1243 ResultVals[1] = Chain.getValue(0);
1244 NumResults = 2;
1245 NodeTys.push_back(MVT::i32);
1246 } else {
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1249 NumResults = 1;
1250 }
1251 NodeTys.push_back(MVT::i32);
1252 break;
1253 case MVT::i64:
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1256 NumResults = 1;
1257 NodeTys.push_back(MVT::i64);
1258 break;
1259 case MVT::f32:
1260 case MVT::f64:
1261 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1262 InFlag).getValue(1);
1263 ResultVals[0] = Chain.getValue(0);
1264 NumResults = 1;
1265 NodeTys.push_back(Op.Val->getValueType(0));
1266 break;
1267 case MVT::v2f64:
1268 case MVT::v4f32:
1269 case MVT::v4i32:
1270 case MVT::v8i16:
1271 case MVT::v16i8:
1272 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1273 InFlag).getValue(1);
1274 ResultVals[0] = Chain.getValue(0);
1275 NumResults = 1;
1276 NodeTys.push_back(Op.Val->getValueType(0));
1277 break;
1278 }
1279
1280 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1281 DAG.getConstant(NumStackBytes, PtrVT));
1282 NodeTys.push_back(MVT::Other);
1283
1284 // If the function returns void, just return the chain.
1285 if (NumResults == 0)
1286 return Chain;
1287
1288 // Otherwise, merge everything together with a MERGE_VALUES node.
1289 ResultVals[NumResults++] = Chain;
1290 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1291 ResultVals, NumResults);
1292 return Res.getValue(Op.ResNo);
1293}
1294
1295static SDOperand
1296LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1297 SmallVector<CCValAssign, 16> RVLocs;
1298 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1299 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1300 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1301 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1302
1303 // If this is the first return lowered for this function, add the regs to the
1304 // liveout set for the function.
Chris Lattner1b989192007-12-31 04:13:23 +00001305 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Scott Michel8efdca42007-12-04 22:23:35 +00001306 for (unsigned i = 0; i != RVLocs.size(); ++i)
Chris Lattner1b989192007-12-31 04:13:23 +00001307 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Scott Michel8efdca42007-12-04 22:23:35 +00001308 }
1309
1310 SDOperand Chain = Op.getOperand(0);
1311 SDOperand Flag;
1312
1313 // Copy the result values into the output registers.
1314 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1315 CCValAssign &VA = RVLocs[i];
1316 assert(VA.isRegLoc() && "Can only return in registers!");
1317 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1318 Flag = Chain.getValue(1);
1319 }
1320
1321 if (Flag.Val)
1322 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1323 else
1324 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1325}
1326
1327
1328//===----------------------------------------------------------------------===//
1329// Vector related lowering:
1330//===----------------------------------------------------------------------===//
1331
1332static ConstantSDNode *
1333getVecImm(SDNode *N) {
1334 SDOperand OpVal(0, 0);
1335
1336 // Check to see if this buildvec has a single non-undef value in its elements.
1337 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1338 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1339 if (OpVal.Val == 0)
1340 OpVal = N->getOperand(i);
1341 else if (OpVal != N->getOperand(i))
1342 return 0;
1343 }
1344
1345 if (OpVal.Val != 0) {
1346 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1347 return CN;
1348 }
1349 }
1350
1351 return 0; // All UNDEF: use implicit def.; not Constant node
1352}
1353
1354/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1355/// and the value fits into an unsigned 18-bit constant, and if so, return the
1356/// constant
1357SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1358 MVT::ValueType ValueType) {
1359 if (ConstantSDNode *CN = getVecImm(N)) {
1360 uint64_t Value = CN->getValue();
1361 if (Value <= 0x3ffff)
1362 return DAG.getConstant(Value, ValueType);
1363 }
1364
1365 return SDOperand();
1366}
1367
1368/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1369/// and the value fits into a signed 16-bit constant, and if so, return the
1370/// constant
1371SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1372 MVT::ValueType ValueType) {
1373 if (ConstantSDNode *CN = getVecImm(N)) {
1374 if (ValueType == MVT::i32) {
1375 int Value = (int) CN->getValue();
1376 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1377
1378 if (Value == SExtValue)
1379 return DAG.getConstant(Value, ValueType);
1380 } else if (ValueType == MVT::i16) {
1381 short Value = (short) CN->getValue();
1382 int SExtValue = ((int) Value << 16) >> 16;
1383
1384 if (Value == (short) SExtValue)
1385 return DAG.getConstant(Value, ValueType);
1386 } else if (ValueType == MVT::i64) {
1387 int64_t Value = CN->getValue();
1388 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1389
1390 if (Value == SExtValue)
1391 return DAG.getConstant(Value, ValueType);
1392 }
1393 }
1394
1395 return SDOperand();
1396}
1397
1398/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1399/// and the value fits into a signed 10-bit constant, and if so, return the
1400/// constant
1401SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1402 MVT::ValueType ValueType) {
1403 if (ConstantSDNode *CN = getVecImm(N)) {
1404 int Value = (int) CN->getValue();
1405 if ((ValueType == MVT::i32 && isS10Constant(Value))
1406 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1407 return DAG.getConstant(Value, ValueType);
1408 }
1409
1410 return SDOperand();
1411}
1412
1413/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1414/// and the value fits into a signed 8-bit constant, and if so, return the
1415/// constant.
1416///
1417/// @note: The incoming vector is v16i8 because that's the only way we can load
1418/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1419/// same value.
1420SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1421 MVT::ValueType ValueType) {
1422 if (ConstantSDNode *CN = getVecImm(N)) {
1423 int Value = (int) CN->getValue();
1424 if (ValueType == MVT::i16
1425 && Value <= 0xffff /* truncated from uint64_t */
1426 && ((short) Value >> 8) == ((short) Value & 0xff))
1427 return DAG.getConstant(Value & 0xff, ValueType);
1428 else if (ValueType == MVT::i8
1429 && (Value & 0xff) == Value)
1430 return DAG.getConstant(Value, ValueType);
1431 }
1432
1433 return SDOperand();
1434}
1435
1436/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1437/// and the value fits into a signed 16-bit constant, and if so, return the
1438/// constant
1439SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1440 MVT::ValueType ValueType) {
1441 if (ConstantSDNode *CN = getVecImm(N)) {
1442 uint64_t Value = CN->getValue();
1443 if ((ValueType == MVT::i32
1444 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1445 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1446 return DAG.getConstant(Value >> 16, ValueType);
1447 }
1448
1449 return SDOperand();
1450}
1451
1452/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1453SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1456 }
1457
1458 return SDOperand();
1459}
1460
1461/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1462SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1463 if (ConstantSDNode *CN = getVecImm(N)) {
1464 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1465 }
1466
1467 return SDOperand();
1468}
1469
1470// If this is a vector of constants or undefs, get the bits. A bit in
1471// UndefBits is set if the corresponding element of the vector is an
1472// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1473// zero. Return true if this is not an array of constants, false if it is.
1474//
1475static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1476 uint64_t UndefBits[2]) {
1477 // Start with zero'd results.
1478 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1479
1480 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1481 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1482 SDOperand OpVal = BV->getOperand(i);
1483
1484 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1485 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1486
1487 uint64_t EltBits = 0;
1488 if (OpVal.getOpcode() == ISD::UNDEF) {
1489 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1490 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1491 continue;
1492 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1493 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1494 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1495 const APFloat &apf = CN->getValueAPF();
1496 EltBits = (CN->getValueType(0) == MVT::f32
1497 ? FloatToBits(apf.convertToFloat())
1498 : DoubleToBits(apf.convertToDouble()));
1499 } else {
1500 // Nonconstant element.
1501 return true;
1502 }
1503
1504 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1505 }
1506
1507 //printf("%llx %llx %llx %llx\n",
1508 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1509 return false;
1510}
1511
1512/// If this is a splat (repetition) of a value across the whole vector, return
1513/// the smallest size that splats it. For example, "0x01010101010101..." is a
1514/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1515/// SplatSize = 1 byte.
1516static bool isConstantSplat(const uint64_t Bits128[2],
1517 const uint64_t Undef128[2],
1518 int MinSplatBits,
1519 uint64_t &SplatBits, uint64_t &SplatUndef,
1520 int &SplatSize) {
1521 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1522 // the same as the lower 64-bits, ignoring undefs.
1523 uint64_t Bits64 = Bits128[0] | Bits128[1];
1524 uint64_t Undef64 = Undef128[0] & Undef128[1];
1525 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1526 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1527 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1528 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1529
1530 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1531 if (MinSplatBits < 64) {
1532
1533 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1534 // undefs.
1535 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1536 if (MinSplatBits < 32) {
1537
1538 // If the top 16-bits are different than the lower 16-bits, ignoring
1539 // undefs, we have an i32 splat.
1540 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1541 if (MinSplatBits < 16) {
1542 // If the top 8-bits are different than the lower 8-bits, ignoring
1543 // undefs, we have an i16 splat.
1544 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1545 // Otherwise, we have an 8-bit splat.
1546 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1547 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1548 SplatSize = 1;
1549 return true;
1550 }
1551 } else {
1552 SplatBits = Bits16;
1553 SplatUndef = Undef16;
1554 SplatSize = 2;
1555 return true;
1556 }
1557 }
1558 } else {
1559 SplatBits = Bits32;
1560 SplatUndef = Undef32;
1561 SplatSize = 4;
1562 return true;
1563 }
1564 }
1565 } else {
1566 SplatBits = Bits128[0];
1567 SplatUndef = Undef128[0];
1568 SplatSize = 8;
1569 return true;
1570 }
1571 }
1572
1573 return false; // Can't be a splat if two pieces don't match.
1574}
1575
1576// If this is a case we can't handle, return null and let the default
1577// expansion code take care of it. If we CAN select this case, and if it
1578// selects to a single instruction, return Op. Otherwise, if we can codegen
1579// this case more efficiently than a constant pool load, lower it to the
1580// sequence of ops that should be used.
1581static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1582 MVT::ValueType VT = Op.getValueType();
1583 // If this is a vector of constants or undefs, get the bits. A bit in
1584 // UndefBits is set if the corresponding element of the vector is an
1585 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1586 // zero.
1587 uint64_t VectorBits[2];
1588 uint64_t UndefBits[2];
1589 uint64_t SplatBits, SplatUndef;
1590 int SplatSize;
1591 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1592 || !isConstantSplat(VectorBits, UndefBits,
1593 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1594 SplatBits, SplatUndef, SplatSize))
1595 return SDOperand(); // Not a constant vector, not a splat.
1596
1597 switch (VT) {
1598 default:
1599 case MVT::v4f32: {
1600 uint32_t Value32 = SplatBits;
1601 assert(SplatSize == 4
1602 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1603 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1604 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1605 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1606 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1607 break;
1608 }
1609 case MVT::v2f64: {
1610 uint64_t f64val = SplatBits;
1611 assert(SplatSize == 8
1612 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1613 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1614 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1615 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1616 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1617 break;
1618 }
1619 case MVT::v16i8: {
1620 // 8-bit constants have to be expanded to 16-bits
1621 unsigned short Value16 = SplatBits | (SplatBits << 8);
1622 SDOperand Ops[8];
1623 for (int i = 0; i < 8; ++i)
1624 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1625 return DAG.getNode(ISD::BIT_CONVERT, VT,
1626 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1627 }
1628 case MVT::v8i16: {
1629 unsigned short Value16;
1630 if (SplatSize == 2)
1631 Value16 = (unsigned short) (SplatBits & 0xffff);
1632 else
1633 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1634 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1635 SDOperand Ops[8];
1636 for (int i = 0; i < 8; ++i) Ops[i] = T;
1637 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1638 }
1639 case MVT::v4i32: {
1640 unsigned int Value = SplatBits;
1641 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1642 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1643 }
1644 case MVT::v2i64: {
1645 uint64_t val = SplatBits;
1646 uint32_t upper = uint32_t(val >> 32);
1647 uint32_t lower = uint32_t(val);
1648
1649 if (val != 0) {
1650 SDOperand LO32;
1651 SDOperand HI32;
1652 SmallVector<SDOperand, 16> ShufBytes;
1653 SDOperand Result;
1654 bool upper_special, lower_special;
1655
1656 // NOTE: This code creates common-case shuffle masks that can be easily
1657 // detected as common expressions. It is not attempting to create highly
1658 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1659
1660 // Detect if the upper or lower half is a special shuffle mask pattern:
1661 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1662 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1663
1664 // Create lower vector if not a special pattern
1665 if (!lower_special) {
1666 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1667 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669 LO32C, LO32C, LO32C, LO32C));
1670 }
1671
1672 // Create upper vector if not a special pattern
1673 if (!upper_special) {
1674 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1675 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1676 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1677 HI32C, HI32C, HI32C, HI32C));
1678 }
1679
1680 // If either upper or lower are special, then the two input operands are
1681 // the same (basically, one of them is a "don't care")
1682 if (lower_special)
1683 LO32 = HI32;
1684 if (upper_special)
1685 HI32 = LO32;
1686 if (lower_special && upper_special) {
1687 // Unhappy situation... both upper and lower are special, so punt with
1688 // a target constant:
1689 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1690 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1691 Zero, Zero);
1692 }
1693
1694 for (int i = 0; i < 4; ++i) {
1695 for (int j = 0; j < 4; ++j) {
1696 SDOperand V;
1697 bool process_upper, process_lower;
Chris Lattnerf7e423f2007-12-22 22:47:03 +00001698 uint64_t val = 0;
Scott Michel8efdca42007-12-04 22:23:35 +00001699
1700 process_upper = (upper_special && (i & 1) == 0);
1701 process_lower = (lower_special && (i & 1) == 1);
1702
1703 if (process_upper || process_lower) {
1704 if ((process_upper && upper == 0)
1705 || (process_lower && lower == 0))
1706 val = 0x80;
1707 else if ((process_upper && upper == 0xffffffff)
1708 || (process_lower && lower == 0xffffffff))
1709 val = 0xc0;
1710 else if ((process_upper && upper == 0x80000000)
1711 || (process_lower && lower == 0x80000000))
1712 val = (j == 0 ? 0xe0 : 0x80);
1713 } else
1714 val = i * 4 + j + ((i & 1) * 16);
1715
1716 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1717 }
1718 }
1719
1720 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1721 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1722 &ShufBytes[0], ShufBytes.size()));
1723 } else {
1724 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1725 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1726 return DAG.getNode(ISD::BIT_CONVERT, VT,
1727 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1728 Zero, Zero, Zero, Zero));
1729 }
1730 }
1731 }
1732
1733 return SDOperand();
1734}
1735
1736/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1737/// which the Cell can operate. The code inspects V3 to ascertain whether the
1738/// permutation vector, V3, is monotonically increasing with one "exception"
1739/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1740/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1741/// In either case, the net result is going to eventually invoke SHUFB to
1742/// permute/shuffle the bytes from V1 and V2.
1743/// \note
1744/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1745/// control word for byte/halfword/word insertion. This takes care of a single
1746/// element move from V2 into V1.
1747/// \note
1748/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1749static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1750 SDOperand V1 = Op.getOperand(0);
1751 SDOperand V2 = Op.getOperand(1);
1752 SDOperand PermMask = Op.getOperand(2);
1753
1754 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1755
1756 // If we have a single element being moved from V1 to V2, this can be handled
1757 // using the C*[DX] compute mask instructions, but the vector elements have
1758 // to be monotonically increasing with one exception element.
1759 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1760 unsigned EltsFromV2 = 0;
1761 unsigned V2Elt = 0;
1762 unsigned V2EltIdx0 = 0;
1763 unsigned CurrElt = 0;
1764 bool monotonic = true;
1765 if (EltVT == MVT::i8)
1766 V2EltIdx0 = 16;
1767 else if (EltVT == MVT::i16)
1768 V2EltIdx0 = 8;
1769 else if (EltVT == MVT::i32)
1770 V2EltIdx0 = 4;
1771 else
1772 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1773
1774 for (unsigned i = 0, e = PermMask.getNumOperands();
1775 EltsFromV2 <= 1 && monotonic && i != e;
1776 ++i) {
1777 unsigned SrcElt;
1778 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1779 SrcElt = 0;
1780 else
1781 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1782
1783 if (SrcElt >= V2EltIdx0) {
1784 ++EltsFromV2;
1785 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1786 } else if (CurrElt != SrcElt) {
1787 monotonic = false;
1788 }
1789
1790 ++CurrElt;
1791 }
1792
1793 if (EltsFromV2 == 1 && monotonic) {
1794 // Compute mask and shuffle
1795 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner1b989192007-12-31 04:13:23 +00001796 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1797 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00001798 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1799 // Initialize temporary register to 0
1800 SDOperand InitTempReg =
1801 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1802 // Copy register's contents as index in INSERT_MASK:
1803 SDOperand ShufMaskOp =
1804 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1805 DAG.getTargetConstant(V2Elt, MVT::i32),
1806 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1807 // Use shuffle mask in SHUFB synthetic instruction:
1808 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1809 } else {
1810 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1811 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1812
1813 SmallVector<SDOperand, 16> ResultMask;
1814 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1815 unsigned SrcElt;
1816 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1817 SrcElt = 0;
1818 else
1819 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1820
1821 for (unsigned j = 0; j != BytesPerElement; ++j) {
1822 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1823 MVT::i8));
1824 }
1825 }
1826
1827 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1828 &ResultMask[0], ResultMask.size());
1829 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1830 }
1831}
1832
1833static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1834 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1835
1836 if (Op0.Val->getOpcode() == ISD::Constant) {
1837 // For a constant, build the appropriate constant vector, which will
1838 // eventually simplify to a vector register load.
1839
1840 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1841 SmallVector<SDOperand, 16> ConstVecValues;
1842 MVT::ValueType VT;
1843 size_t n_copies;
1844
1845 // Create a constant vector:
1846 switch (Op.getValueType()) {
1847 default: assert(0 && "Unexpected constant value type in "
1848 "LowerSCALAR_TO_VECTOR");
1849 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1850 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1851 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1852 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1853 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1854 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1855 }
1856
1857 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1858 for (size_t j = 0; j < n_copies; ++j)
1859 ConstVecValues.push_back(CValue);
1860
1861 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1862 &ConstVecValues[0], ConstVecValues.size());
1863 } else {
1864 // Otherwise, copy the value from one register to another:
1865 switch (Op0.getValueType()) {
1866 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1867 case MVT::i8:
1868 case MVT::i16:
1869 case MVT::i32:
1870 case MVT::i64:
1871 case MVT::f32:
1872 case MVT::f64:
1873 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1874 }
1875 }
1876
1877 return SDOperand();
1878}
1879
1880static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1881 switch (Op.getValueType()) {
1882 case MVT::v4i32: {
1883 SDOperand rA = Op.getOperand(0);
1884 SDOperand rB = Op.getOperand(1);
1885 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1886 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1887 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1888 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1889
1890 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1891 break;
1892 }
1893
1894 // Multiply two v8i16 vectors (pipeline friendly version):
1895 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1896 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1897 // c) Use SELB to select upper and lower halves from the intermediate results
1898 //
1899 // NOTE: We really want to move the FSMBI to earlier to actually get the
1900 // dual-issue. This code does manage to do this, even if it's a little on
1901 // the wacky side
1902 case MVT::v8i16: {
1903 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner1b989192007-12-31 04:13:23 +00001904 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel8efdca42007-12-04 22:23:35 +00001905 SDOperand Chain = Op.getOperand(0);
1906 SDOperand rA = Op.getOperand(0);
1907 SDOperand rB = Op.getOperand(1);
Chris Lattner1b989192007-12-31 04:13:23 +00001908 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1909 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00001910
1911 SDOperand FSMBOp =
1912 DAG.getCopyToReg(Chain, FSMBIreg,
1913 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1914 DAG.getConstant(0xcccc, MVT::i32)));
1915
1916 SDOperand HHProd =
1917 DAG.getCopyToReg(FSMBOp, HiProdReg,
1918 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1919
1920 SDOperand HHProd_v4i32 =
1921 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1922 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1923
1924 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1925 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1926 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1927 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1928 HHProd_v4i32,
1929 DAG.getConstant(16, MVT::i16))),
1930 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1931 }
1932
1933 // This M00sE is N@stI! (apologies to Monty Python)
1934 //
1935 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1936 // is to break it all apart, sign extend, and reassemble the various
1937 // intermediate products.
1938 case MVT::v16i8: {
1939 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner1b989192007-12-31 04:13:23 +00001940 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel8efdca42007-12-04 22:23:35 +00001941 SDOperand Chain = Op.getOperand(0);
1942 SDOperand rA = Op.getOperand(0);
1943 SDOperand rB = Op.getOperand(1);
1944 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1945 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1946
Chris Lattner1b989192007-12-31 04:13:23 +00001947 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1948 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1949 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00001950
1951 SDOperand LLProd =
1952 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1953 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1954 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1955
1956 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1957
1958 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1959
1960 SDOperand LHProd =
1961 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1962 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1963
1964 SDOperand FSMBdef_2222 =
1965 DAG.getCopyToReg(Chain, FSMBreg_2222,
1966 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1967 DAG.getConstant(0x2222, MVT::i32)));
1968
1969 SDOperand FSMBuse_2222 =
1970 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1971
1972 SDOperand LoProd_1 =
1973 DAG.getCopyToReg(Chain, LoProd_reg,
1974 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1975 FSMBuse_2222));
1976
1977 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1978
1979 SDOperand LoProd =
1980 DAG.getNode(ISD::AND, MVT::v4i32,
1981 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1982 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1983 LoProdMask, LoProdMask,
1984 LoProdMask, LoProdMask));
1985
1986 SDOperand rAH =
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1989
1990 SDOperand rBH =
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1993
1994 SDOperand HLProd =
1995 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1997 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1998
1999 SDOperand HHProd_1 =
2000 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2001 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2002 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2003 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2004 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2005
2006 SDOperand HHProd =
2007 DAG.getCopyToReg(Chain, HiProd_reg,
2008 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2009 HLProd,
2010 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2011 FSMBuse_2222));
2012
2013 SDOperand HiProd =
2014 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2015 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2016
2017 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2018 DAG.getNode(ISD::OR, MVT::v4i32,
2019 LoProd, HiProd));
2020 }
2021
2022 default:
2023 cerr << "CellSPU: Unknown vector multiplication, got "
2024 << MVT::getValueTypeString(Op.getValueType())
2025 << "\n";
2026 abort();
2027 /*NOTREACHED*/
2028 }
2029
2030 return SDOperand();
2031}
2032
2033static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2034 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner1b989192007-12-31 04:13:23 +00002035 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel8efdca42007-12-04 22:23:35 +00002036
2037 SDOperand A = Op.getOperand(0);
2038 SDOperand B = Op.getOperand(1);
2039 unsigned VT = Op.getValueType();
2040
2041 unsigned VRegBR, VRegC;
2042
2043 if (VT == MVT::f32) {
Chris Lattner1b989192007-12-31 04:13:23 +00002044 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2045 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00002046 } else {
Chris Lattner1b989192007-12-31 04:13:23 +00002047 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2048 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00002049 }
2050 // TODO: make sure we're feeding FPInterp the right arguments
2051 // Right now: fi B, frest(B)
2052
2053 // Computes BRcpl =
2054 // (Floating Interpolate (FP Reciprocal Estimate B))
2055 SDOperand BRcpl =
2056 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2057 DAG.getNode(SPUISD::FPInterp, VT, B,
2058 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2059
2060 // Computes A * BRcpl and stores in a temporary register
2061 SDOperand AxBRcpl =
2062 DAG.getCopyToReg(BRcpl, VRegC,
2063 DAG.getNode(ISD::FMUL, VT, A,
2064 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2065 // What's the Chain variable do? It's magic!
2066 // TODO: set Chain = Op(0).getEntryNode()
2067
2068 return DAG.getNode(ISD::FADD, VT,
2069 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2070 DAG.getNode(ISD::FMUL, VT,
2071 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2072 DAG.getNode(ISD::FSUB, VT, A,
2073 DAG.getNode(ISD::FMUL, VT, B,
2074 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2075}
2076
Scott Michel8efdca42007-12-04 22:23:35 +00002077static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2078 unsigned VT = Op.getValueType();
2079 SDOperand N = Op.getOperand(0);
2080 SDOperand Elt = Op.getOperand(1);
2081 SDOperand ShufMask[16];
2082 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2083
2084 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2085
2086 int EltNo = (int) C->getValue();
2087
2088 // sanity checks:
2089 if (VT == MVT::i8 && EltNo >= 16)
2090 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2091 else if (VT == MVT::i16 && EltNo >= 8)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2093 else if (VT == MVT::i32 && EltNo >= 4)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2095 else if (VT == MVT::i64 && EltNo >= 2)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2097
2098 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2099 // i32 and i64: Element 0 is the preferred slot
2100 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2101 }
2102
2103 // Need to generate shuffle mask and extract:
Scott Michel4c07cbd2007-12-19 21:17:42 +00002104 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel8efdca42007-12-04 22:23:35 +00002105 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2106
2107 switch (VT) {
2108 case MVT::i8: {
2109 prefslot_begin = prefslot_end = 3;
2110 break;
2111 }
2112 case MVT::i16: {
2113 prefslot_begin = 2; prefslot_end = 3;
2114 break;
2115 }
2116 case MVT::i32: {
2117 prefslot_begin = 0; prefslot_end = 3;
2118 break;
2119 }
2120 case MVT::i64: {
2121 prefslot_begin = 0; prefslot_end = 7;
2122 break;
2123 }
2124 }
2125
Scott Michel4c07cbd2007-12-19 21:17:42 +00002126 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2127 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2128
Scott Michel8efdca42007-12-04 22:23:35 +00002129 for (int i = 0; i < 16; ++i) {
2130 // zero fill uppper part of preferred slot, don't care about the
2131 // other slots:
2132 unsigned int mask_val;
2133
2134 if (i <= prefslot_end) {
2135 mask_val =
2136 ((i < prefslot_begin)
2137 ? 0x80
2138 : elt_byte + (i - prefslot_begin));
2139
Scott Michel4c07cbd2007-12-19 21:17:42 +00002140 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel8efdca42007-12-04 22:23:35 +00002141 } else
2142 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2143 }
2144
2145 SDOperand ShufMaskVec =
2146 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2147 &ShufMask[0],
2148 sizeof(ShufMask) / sizeof(ShufMask[0]));
2149
2150 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2151 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2152 N, N, ShufMaskVec));
2153
2154}
2155
2156static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2157 SDOperand VecOp = Op.getOperand(0);
2158 SDOperand ValOp = Op.getOperand(1);
2159 SDOperand IdxOp = Op.getOperand(2);
2160 MVT::ValueType VT = Op.getValueType();
2161
2162 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2163 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2164
2165 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2166 // Use $2 because it's always 16-byte aligned and it's available:
2167 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2168
2169 SDOperand result =
2170 DAG.getNode(SPUISD::SHUFB, VT,
2171 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2172 VecOp,
2173 DAG.getNode(SPUISD::INSERT_MASK, VT,
2174 DAG.getNode(ISD::ADD, PtrVT,
2175 PtrBase,
2176 DAG.getConstant(CN->getValue(),
2177 PtrVT))));
2178
2179 return result;
2180}
2181
2182static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2183 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2184
2185 assert(Op.getValueType() == MVT::i8);
2186 switch (Opc) {
2187 default:
2188 assert(0 && "Unhandled i8 math operator");
2189 /*NOTREACHED*/
2190 break;
2191 case ISD::SUB: {
2192 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2193 // the result:
2194 SDOperand N1 = Op.getOperand(1);
2195 N0 = (N0.getOpcode() != ISD::Constant
2196 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2197 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2198 N1 = (N1.getOpcode() != ISD::Constant
2199 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2200 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2201 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202 DAG.getNode(Opc, MVT::i16, N0, N1));
2203 }
2204 case ISD::ROTR:
2205 case ISD::ROTL: {
2206 SDOperand N1 = Op.getOperand(1);
2207 unsigned N1Opc;
2208 N0 = (N0.getOpcode() != ISD::Constant
2209 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2211 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2212 N1 = (N1.getOpcode() != ISD::Constant
2213 ? DAG.getNode(N1Opc, MVT::i16, N1)
2214 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2215 SDOperand ExpandArg =
2216 DAG.getNode(ISD::OR, MVT::i16, N0,
2217 DAG.getNode(ISD::SHL, MVT::i16,
2218 N0, DAG.getConstant(8, MVT::i16)));
2219 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2220 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2221 }
2222 case ISD::SRL:
2223 case ISD::SHL: {
2224 SDOperand N1 = Op.getOperand(1);
2225 unsigned N1Opc;
2226 N0 = (N0.getOpcode() != ISD::Constant
2227 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2228 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2229 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2230 N1 = (N1.getOpcode() != ISD::Constant
2231 ? DAG.getNode(N1Opc, MVT::i16, N1)
2232 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2233 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2234 DAG.getNode(Opc, MVT::i16, N0, N1));
2235 }
2236 case ISD::SRA: {
2237 SDOperand N1 = Op.getOperand(1);
2238 unsigned N1Opc;
2239 N0 = (N0.getOpcode() != ISD::Constant
2240 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2241 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2242 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2243 N1 = (N1.getOpcode() != ISD::Constant
2244 ? DAG.getNode(N1Opc, MVT::i16, N1)
2245 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2246 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2247 DAG.getNode(Opc, MVT::i16, N0, N1));
2248 }
2249 case ISD::MUL: {
2250 SDOperand N1 = Op.getOperand(1);
2251 unsigned N1Opc;
2252 N0 = (N0.getOpcode() != ISD::Constant
2253 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2254 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2255 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2256 N1 = (N1.getOpcode() != ISD::Constant
2257 ? DAG.getNode(N1Opc, MVT::i16, N1)
2258 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2259 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2260 DAG.getNode(Opc, MVT::i16, N0, N1));
2261 break;
2262 }
2263 }
2264
2265 return SDOperand();
2266}
2267
2268//! Lower byte immediate operations for v16i8 vectors:
2269static SDOperand
2270LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2271 SDOperand ConstVec;
2272 SDOperand Arg;
2273 MVT::ValueType VT = Op.getValueType();
2274
2275 ConstVec = Op.getOperand(0);
2276 Arg = Op.getOperand(1);
2277 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2278 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2279 ConstVec = ConstVec.getOperand(0);
2280 } else {
2281 ConstVec = Op.getOperand(1);
2282 Arg = Op.getOperand(0);
2283 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2284 ConstVec = ConstVec.getOperand(0);
2285 }
2286 }
2287 }
2288
2289 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2290 uint64_t VectorBits[2];
2291 uint64_t UndefBits[2];
2292 uint64_t SplatBits, SplatUndef;
2293 int SplatSize;
2294
2295 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2296 && isConstantSplat(VectorBits, UndefBits,
2297 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2298 SplatBits, SplatUndef, SplatSize)) {
2299 SDOperand tcVec[16];
2300 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2301 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2302
2303 // Turn the BUILD_VECTOR into a set of target constants:
2304 for (size_t i = 0; i < tcVecSize; ++i)
2305 tcVec[i] = tc;
2306
2307 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2308 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2309 }
2310 }
2311
2312 return SDOperand();
2313}
2314
2315//! Lower i32 multiplication
2316static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2317 unsigned Opc) {
2318 switch (VT) {
2319 default:
2320 cerr << "CellSPU: Unknown LowerMUL value type, got "
2321 << MVT::getValueTypeString(Op.getValueType())
2322 << "\n";
2323 abort();
2324 /*NOTREACHED*/
2325
2326 case MVT::i32: {
2327 SDOperand rA = Op.getOperand(0);
2328 SDOperand rB = Op.getOperand(1);
2329
2330 return DAG.getNode(ISD::ADD, MVT::i32,
2331 DAG.getNode(ISD::ADD, MVT::i32,
2332 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2333 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2334 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2335 }
2336 }
2337
2338 return SDOperand();
2339}
2340
2341//! Custom lowering for CTPOP (count population)
2342/*!
2343 Custom lowering code that counts the number ones in the input
2344 operand. SPU has such an instruction, but it counts the number of
2345 ones per byte, which then have to be accumulated.
2346*/
2347static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2348 unsigned VT = Op.getValueType();
2349 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2350
2351 switch (VT) {
2352 case MVT::i8: {
2353 SDOperand N = Op.getOperand(0);
2354 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2355
2356 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2357 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2358
2359 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2360 }
2361
2362 case MVT::i16: {
2363 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner1b989192007-12-31 04:13:23 +00002364 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel8efdca42007-12-04 22:23:35 +00002365
Chris Lattner1b989192007-12-31 04:13:23 +00002366 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00002367
2368 SDOperand N = Op.getOperand(0);
2369 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2370 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2371 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2372
2373 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2374 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2375
2376 // CNTB_result becomes the chain to which all of the virtual registers
2377 // CNTB_reg, SUM1_reg become associated:
2378 SDOperand CNTB_result =
2379 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2380
2381 SDOperand CNTB_rescopy =
2382 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2383
2384 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2385
2386 return DAG.getNode(ISD::AND, MVT::i16,
2387 DAG.getNode(ISD::ADD, MVT::i16,
2388 DAG.getNode(ISD::SRL, MVT::i16,
2389 Tmp1, Shift1),
2390 Tmp1),
2391 Mask0);
2392 }
2393
2394 case MVT::i32: {
2395 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner1b989192007-12-31 04:13:23 +00002396 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel8efdca42007-12-04 22:23:35 +00002397
Chris Lattner1b989192007-12-31 04:13:23 +00002398 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2399 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel8efdca42007-12-04 22:23:35 +00002400
2401 SDOperand N = Op.getOperand(0);
2402 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2403 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2404 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2405 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2406
2407 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2408 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2409
2410 // CNTB_result becomes the chain to which all of the virtual registers
2411 // CNTB_reg, SUM1_reg become associated:
2412 SDOperand CNTB_result =
2413 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2414
2415 SDOperand CNTB_rescopy =
2416 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2417
2418 SDOperand Comp1 =
2419 DAG.getNode(ISD::SRL, MVT::i32,
2420 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2421
2422 SDOperand Sum1 =
2423 DAG.getNode(ISD::ADD, MVT::i32,
2424 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2425
2426 SDOperand Sum1_rescopy =
2427 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2428
2429 SDOperand Comp2 =
2430 DAG.getNode(ISD::SRL, MVT::i32,
2431 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2432 Shift2);
2433 SDOperand Sum2 =
2434 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2435 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2436
2437 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2438 }
2439
2440 case MVT::i64:
2441 break;
2442 }
2443
2444 return SDOperand();
2445}
2446
2447/// LowerOperation - Provide custom lowering hooks for some operations.
2448///
2449SDOperand
2450SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2451{
2452 switch (Op.getOpcode()) {
2453 default: {
2454 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2455 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2456 cerr << "*Op.Val:\n";
2457 Op.Val->dump();
2458 abort();
2459 }
2460 case ISD::LOAD:
2461 case ISD::SEXTLOAD:
2462 case ISD::ZEXTLOAD:
2463 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::STORE:
2465 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::ConstantPool:
2467 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::GlobalAddress:
2469 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::JumpTable:
2471 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2472 case ISD::Constant:
2473 return LowerConstant(Op, DAG);
2474 case ISD::ConstantFP:
2475 return LowerConstantFP(Op, DAG);
Scott Michel394e26d2008-01-17 20:38:41 +00002476 case ISD::BRCOND:
2477 return LowerBRCOND(Op, DAG);
Scott Michel8efdca42007-12-04 22:23:35 +00002478 case ISD::FORMAL_ARGUMENTS:
Scott Michel394e26d2008-01-17 20:38:41 +00002479 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
Scott Michel8efdca42007-12-04 22:23:35 +00002480 case ISD::CALL:
Scott Micheldbac4cf2008-01-11 02:53:15 +00002481 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
Scott Michel8efdca42007-12-04 22:23:35 +00002482 case ISD::RET:
2483 return LowerRET(Op, DAG, getTargetMachine());
2484
2485 // i8 math ops:
2486 case ISD::SUB:
2487 case ISD::ROTR:
2488 case ISD::ROTL:
2489 case ISD::SRL:
2490 case ISD::SHL:
2491 case ISD::SRA:
2492 return LowerI8Math(Op, DAG, Op.getOpcode());
2493
2494 // Vector-related lowering.
2495 case ISD::BUILD_VECTOR:
2496 return LowerBUILD_VECTOR(Op, DAG);
2497 case ISD::SCALAR_TO_VECTOR:
2498 return LowerSCALAR_TO_VECTOR(Op, DAG);
2499 case ISD::VECTOR_SHUFFLE:
2500 return LowerVECTOR_SHUFFLE(Op, DAG);
2501 case ISD::EXTRACT_VECTOR_ELT:
2502 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2503 case ISD::INSERT_VECTOR_ELT:
2504 return LowerINSERT_VECTOR_ELT(Op, DAG);
2505
2506 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2507 case ISD::AND:
2508 case ISD::OR:
2509 case ISD::XOR:
2510 return LowerByteImmed(Op, DAG);
2511
2512 // Vector and i8 multiply:
2513 case ISD::MUL:
2514 if (MVT::isVector(Op.getValueType()))
2515 return LowerVectorMUL(Op, DAG);
2516 else if (Op.getValueType() == MVT::i8)
2517 return LowerI8Math(Op, DAG, Op.getOpcode());
2518 else
2519 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2520
2521 case ISD::FDIV:
2522 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2523 return LowerFDIVf32(Op, DAG);
2524// else if (Op.getValueType() == MVT::f64)
2525// return LowerFDIVf64(Op, DAG);
2526 else
2527 assert(0 && "Calling FDIV on unsupported MVT");
2528
2529 case ISD::CTPOP:
2530 return LowerCTPOP(Op, DAG);
2531 }
2532
2533 return SDOperand();
2534}
2535
2536//===----------------------------------------------------------------------===//
2537// Other Lowering Code
2538//===----------------------------------------------------------------------===//
2539
2540MachineBasicBlock *
2541SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2542 MachineBasicBlock *BB)
2543{
2544 return BB;
2545}
2546
2547//===----------------------------------------------------------------------===//
2548// Target Optimization Hooks
2549//===----------------------------------------------------------------------===//
2550
2551SDOperand
2552SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2553{
2554#if 0
2555 TargetMachine &TM = getTargetMachine();
2556 SelectionDAG &DAG = DCI.DAG;
Scott Michel8efdca42007-12-04 22:23:35 +00002557 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2558
2559 switch (N->getOpcode()) {
2560 default: break;
Scott Michel394e26d2008-01-17 20:38:41 +00002561 // Do something creative here for ISD nodes that can be coalesced in unique
2562 // ways.
Scott Michel8efdca42007-12-04 22:23:35 +00002563 }
Scott Michel394e26d2008-01-17 20:38:41 +00002564#endif
Scott Michel8efdca42007-12-04 22:23:35 +00002565
Scott Michel394e26d2008-01-17 20:38:41 +00002566 // Otherwise, return unchanged.
Scott Michel8efdca42007-12-04 22:23:35 +00002567 return SDOperand();
2568}
2569
2570//===----------------------------------------------------------------------===//
2571// Inline Assembly Support
2572//===----------------------------------------------------------------------===//
2573
2574/// getConstraintType - Given a constraint letter, return the type of
2575/// constraint it is for this target.
2576SPUTargetLowering::ConstraintType
2577SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2578 if (ConstraintLetter.size() == 1) {
2579 switch (ConstraintLetter[0]) {
2580 default: break;
2581 case 'b':
2582 case 'r':
2583 case 'f':
2584 case 'v':
2585 case 'y':
2586 return C_RegisterClass;
2587 }
2588 }
2589 return TargetLowering::getConstraintType(ConstraintLetter);
2590}
2591
2592std::pair<unsigned, const TargetRegisterClass*>
2593SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2594 MVT::ValueType VT) const
2595{
2596 if (Constraint.size() == 1) {
2597 // GCC RS6000 Constraint Letters
2598 switch (Constraint[0]) {
2599 case 'b': // R1-R31
2600 case 'r': // R0-R31
2601 if (VT == MVT::i64)
2602 return std::make_pair(0U, SPU::R64CRegisterClass);
2603 return std::make_pair(0U, SPU::R32CRegisterClass);
2604 case 'f':
2605 if (VT == MVT::f32)
2606 return std::make_pair(0U, SPU::R32FPRegisterClass);
2607 else if (VT == MVT::f64)
2608 return std::make_pair(0U, SPU::R64FPRegisterClass);
2609 break;
2610 case 'v':
2611 return std::make_pair(0U, SPU::GPRCRegisterClass);
2612 }
2613 }
2614
2615 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2616}
2617
2618void
2619SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2620 uint64_t Mask,
2621 uint64_t &KnownZero,
2622 uint64_t &KnownOne,
2623 const SelectionDAG &DAG,
2624 unsigned Depth ) const {
2625 KnownZero = 0;
2626 KnownOne = 0;
2627}
2628
2629// LowerAsmOperandForConstraint
2630void
2631SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2632 char ConstraintLetter,
2633 std::vector<SDOperand> &Ops,
2634 SelectionDAG &DAG) {
2635 // Default, for the time being, to the base class handler
2636 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2637}
2638
2639/// isLegalAddressImmediate - Return true if the integer value can be used
2640/// as the offset of the target addressing mode.
2641bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2642 // SPU's addresses are 256K:
2643 return (V > -(1 << 18) && V < (1 << 18) - 1);
2644}
2645
2646bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2647 return false;
2648}