blob: e2a1b43d1a07777e4402fe9b7f53cef6c6cc3e5c [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
Chris Lattner84bc5422007-12-31 04:13:23 +000023#include "llvm/CodeGen/MachineRegisterInfo.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000024#include "llvm/CodeGen/SelectionDAG.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000025#include "llvm/Constants.h"
26#include "llvm/Function.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetOptions.h"
31
32#include <map>
33
34using namespace llvm;
35
36// Used in getTargetNodeName() below
37namespace {
38 std::map<unsigned, const char *> node_names;
39
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
44 };
45
46 const valtype_map_s valtype_map[] = {
47 { MVT::i1, 3 },
48 { MVT::i8, 3 },
49 { MVT::i16, 2 },
50 { MVT::i32, 0 },
51 { MVT::f32, 0 },
52 { MVT::i64, 0 },
53 { MVT::f64, 0 },
54 { MVT::i128, 0 }
55 };
56
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
58
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
61
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
65 break;
66 }
67 }
68
69#ifndef NDEBUG
70 if (retval == 0) {
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
73 << "\n";
74 abort();
75 }
76#endif
77
78 return retval;
79 }
80
81 //! Predicate that returns true if operand is a memory target
82 /*!
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
Scott Michel9de5d0d2008-01-11 02:53:15 +000085 address, external symbol, constant pool) or an A-form
Scott Michel266bc8f2007-12-04 22:23:35 +000086 address.
87 */
88 bool isMemoryOperand(const SDOperand &Op)
89 {
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
Scott Michel266bc8f2007-12-04 22:23:35 +000093 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
Scott Michel266bc8f2007-12-04 22:23:35 +000098 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
Scott Michel9de5d0d2008-01-11 02:53:15 +0000101 || Opc == SPUISD::AFormAddr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000102 }
103}
104
105SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
106 : TargetLowering(TM),
107 SPUTM(TM)
108{
109 // Fold away setcc operations if possible.
110 setPow2DivIsCheap();
111
112 // Use _setjmp/_longjmp instead of setjmp/longjmp.
113 setUseUnderscoreSetJmp(true);
114 setUseUnderscoreLongJmp(true);
115
116 // Set up the SPU's register classes:
117 // NOTE: i8 register class is not registered because we cannot determine when
118 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000119 // NOTE: Ignore the previous note. For now. :-)
120 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
121 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
122 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
123 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
124 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
125 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000126 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
127
128 // SPU has no sign or zero extended loads for i1, i8, i16:
129 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
130 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
131 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
Chris Lattnerddf89562008-01-17 19:59:44 +0000132 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
133 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
134 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
135 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
136 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
Scott Michel266bc8f2007-12-04 22:23:35 +0000137
138 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
140 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
Chris Lattnerddf89562008-01-17 19:59:44 +0000141 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
142 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
143 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
144 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
145 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
146
Scott Michel266bc8f2007-12-04 22:23:35 +0000147 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
148 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
149 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
150
151 // SPU constant load actions are custom lowered:
152 setOperationAction(ISD::Constant, MVT::i64, Custom);
153 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
154 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
155
156 // SPU's loads and stores have to be custom lowered:
157 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
158 ++sctype) {
159 setOperationAction(ISD::LOAD, sctype, Custom);
160 setOperationAction(ISD::STORE, sctype, Custom);
161 }
162
163 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
164 // into BR_CCs. BR_CC instructions are custom selected in
165 // SPUDAGToDAGISel.
166 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
167
168 // Expand the jumptable branches
169 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
170 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
171 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
172
173 // SPU has no intrinsics for these particular operations:
174 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
175 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
176 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
177
178 // PowerPC has no SREM/UREM instructions
179 setOperationAction(ISD::SREM, MVT::i32, Expand);
180 setOperationAction(ISD::UREM, MVT::i32, Expand);
181 setOperationAction(ISD::SREM, MVT::i64, Expand);
182 setOperationAction(ISD::UREM, MVT::i64, Expand);
183
184 // We don't support sin/cos/sqrt/fmod
185 setOperationAction(ISD::FSIN , MVT::f64, Expand);
186 setOperationAction(ISD::FCOS , MVT::f64, Expand);
187 setOperationAction(ISD::FREM , MVT::f64, Expand);
188 setOperationAction(ISD::FSIN , MVT::f32, Expand);
189 setOperationAction(ISD::FCOS , MVT::f32, Expand);
190 setOperationAction(ISD::FREM , MVT::f32, Expand);
191
192 // If we're enabling GP optimizations, use hardware square root
193 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
194 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
195
196 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
197 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
198
199 // SPU can do rotate right and left, so legalize it... but customize for i8
200 // because instructions don't exist.
201 setOperationAction(ISD::ROTR, MVT::i32, Legal);
202 setOperationAction(ISD::ROTR, MVT::i16, Legal);
203 setOperationAction(ISD::ROTR, MVT::i8, Custom);
204 setOperationAction(ISD::ROTL, MVT::i32, Legal);
205 setOperationAction(ISD::ROTL, MVT::i16, Legal);
206 setOperationAction(ISD::ROTL, MVT::i8, Custom);
207 // SPU has no native version of shift left/right for i8
208 setOperationAction(ISD::SHL, MVT::i8, Custom);
209 setOperationAction(ISD::SRL, MVT::i8, Custom);
210 setOperationAction(ISD::SRA, MVT::i8, Custom);
211
212 // Custom lower i32 multiplications
213 setOperationAction(ISD::MUL, MVT::i32, Custom);
214
215 // Need to custom handle (some) common i8 math ops
216 setOperationAction(ISD::SUB, MVT::i8, Custom);
217 setOperationAction(ISD::MUL, MVT::i8, Custom);
218
219 // SPU does not have BSWAP. It does have i32 support CTLZ.
220 // CTPOP has to be custom lowered.
221 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
222 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
223
224 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
225 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
226 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
227 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
228
229 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
230 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
231
232 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
233
234 // SPU does not have select or setcc
235 setOperationAction(ISD::SELECT, MVT::i1, Expand);
236 setOperationAction(ISD::SELECT, MVT::i8, Expand);
237 setOperationAction(ISD::SELECT, MVT::i16, Expand);
238 setOperationAction(ISD::SELECT, MVT::i32, Expand);
239 setOperationAction(ISD::SELECT, MVT::i64, Expand);
240 setOperationAction(ISD::SELECT, MVT::f32, Expand);
241 setOperationAction(ISD::SELECT, MVT::f64, Expand);
242
243 setOperationAction(ISD::SETCC, MVT::i1, Expand);
244 setOperationAction(ISD::SETCC, MVT::i8, Expand);
245 setOperationAction(ISD::SETCC, MVT::i16, Expand);
246 setOperationAction(ISD::SETCC, MVT::i32, Expand);
247 setOperationAction(ISD::SETCC, MVT::i64, Expand);
248 setOperationAction(ISD::SETCC, MVT::f32, Expand);
249 setOperationAction(ISD::SETCC, MVT::f64, Expand);
250
251 // SPU has a legal FP -> signed INT instruction
252 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
253 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
254 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
255 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
256
257 // FDIV on SPU requires custom lowering
258 setOperationAction(ISD::FDIV, MVT::f32, Custom);
259 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
260
261 // SPU has [U|S]INT_TO_FP
262 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
264 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
265 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
266 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
267 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
268 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
269 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
270
Scott Michel86c041f2007-12-20 00:44:13 +0000271 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
272 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
273 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
274 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel266bc8f2007-12-04 22:23:35 +0000275
276 // We cannot sextinreg(i1). Expand to shifts.
277 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
278
279 // Support label based line numbers.
280 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
281 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
282
283 // We want to legalize GlobalAddress and ConstantPool nodes into the
284 // appropriate instructions to materialize the address.
285 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
287 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
288 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
289 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
290 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
291 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
292 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
293
294 // RET must be custom lowered, to meet ABI requirements
295 setOperationAction(ISD::RET, MVT::Other, Custom);
296
297 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
298 setOperationAction(ISD::VASTART , MVT::Other, Custom);
299
300 // Use the default implementation.
301 setOperationAction(ISD::VAARG , MVT::Other, Expand);
302 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
303 setOperationAction(ISD::VAEND , MVT::Other, Expand);
304 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
305 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
306 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
307 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
308
309 // Cell SPU has instructions for converting between i64 and fp.
310 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
311 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
312
313 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
314 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
315
316 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
317 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
318
319 // First set operation action for all vector types to expand. Then we
320 // will selectively turn on ones that can be effectively codegen'd.
321 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
322 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
323 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
324 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
325 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
326 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
327
328 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
329 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
330 // add/sub are legal for all supported vector VT's.
331 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
333 // mul has to be custom lowered.
334 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
335
336 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
337 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
338 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
339 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
340 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
341 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
342
343 // These operations need to be expanded:
344 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
345 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
346 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
347 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
348 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
349
350 // Custom lower build_vector, constant pool spills, insert and
351 // extract vector elements:
352 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
353 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
354 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
355 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
356 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
357 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
358 }
359
360 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
361 setOperationAction(ISD::AND, MVT::v16i8, Custom);
362 setOperationAction(ISD::OR, MVT::v16i8, Custom);
363 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
364 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000365
Scott Michel266bc8f2007-12-04 22:23:35 +0000366 setSetCCResultType(MVT::i32);
367 setShiftAmountType(MVT::i32);
368 setSetCCResultContents(ZeroOrOneSetCCResult);
369
370 setStackPointerRegisterToSaveRestore(SPU::R1);
371
372 // We have target-specific dag combine patterns for the following nodes:
373 // e.g., setTargetDAGCombine(ISD::SUB);
374
375 computeRegisterProperties();
376}
377
378const char *
379SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
380{
381 if (node_names.empty()) {
382 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
383 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
384 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
385 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
Scott Michel9de5d0d2008-01-11 02:53:15 +0000386 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
Scott Michel266bc8f2007-12-04 22:23:35 +0000387 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
388 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
389 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
390 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
391 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
392 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
393 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
394 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
395 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
396 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
397 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
398 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
399 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
400 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
401 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
402 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
403 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
404 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
405 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
406 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
407 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
408 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
409 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
410 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
411 "SPUISD::ROTBYTES_RIGHT_Z";
412 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
413 "SPUISD::ROTBYTES_RIGHT_S";
414 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
415 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
416 "SPUISD::ROTBYTES_LEFT_CHAINED";
417 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
418 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
419 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
420 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
421 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
422 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
423 }
424
425 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
426
427 return ((i != node_names.end()) ? i->second : 0);
428}
429
430//===----------------------------------------------------------------------===//
431// Calling convention code:
432//===----------------------------------------------------------------------===//
433
434#include "SPUGenCallingConv.inc"
435
436//===----------------------------------------------------------------------===//
437// LowerOperation implementation
438//===----------------------------------------------------------------------===//
439
Scott Michel9de5d0d2008-01-11 02:53:15 +0000440/// Aligned load common code for CellSPU
441/*!
442 \param[in] Op The SelectionDAG load or store operand
443 \param[in] DAG The selection DAG
444 \param[in] ST CellSPU subtarget information structure
445 \param[in,out] alignment Caller initializes this to the load or store node's
446 value from getAlignment(), may be updated while generating the aligned load
447 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
448 offset (divisible by 16, modulo 16 == 0)
449 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
450 offset of the preferred slot (modulo 16 != 0)
451 \param[in,out] VT Caller initializes this value type to the the load or store
452 node's loaded or stored value type; may be updated if an i1-extended load or
453 store.
454 \param[out] was16aligned true if the base pointer had 16-byte alignment,
455 otherwise false. Can help to determine if the chunk needs to be rotated.
456
457 Both load and store lowering load a block of data aligned on a 16-byte
458 boundary. This is the common aligned load code shared between both.
459 */
460static SDOperand
461AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
462 LSBaseSDNode *LSN,
463 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
Chris Lattner3f732802008-01-12 22:54:07 +0000464 MVT::ValueType &VT, bool &was16aligned)
Scott Michel9de5d0d2008-01-11 02:53:15 +0000465{
466 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
467 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
468 SDOperand basePtr = LSN->getBasePtr();
469 SDOperand chain = LSN->getChain();
470
471 if (basePtr.getOpcode() == ISD::ADD) {
472 SDOperand Op1 = basePtr.Val->getOperand(1);
473
474 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
475 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
476
477 alignOffs = (int) CN->getValue();
478 prefSlotOffs = (int) (alignOffs & 0xf);
479
480 // Adjust the rotation amount to ensure that the final result ends up in
481 // the preferred slot:
482 prefSlotOffs -= vtm->prefslot_byte;
483 basePtr = basePtr.getOperand(0);
484
485 // Modify alignment, since the ADD is likely from getElementPtr:
486 switch (basePtr.getOpcode()) {
487 case ISD::GlobalAddress:
488 case ISD::TargetGlobalAddress: {
489 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
490 const GlobalValue *GV = GN->getGlobal();
491 alignment = GV->getAlignment();
492 break;
493 }
494 }
495 } else {
496 alignOffs = 0;
497 prefSlotOffs = -vtm->prefslot_byte;
498 }
499 } else {
500 alignOffs = 0;
501 prefSlotOffs = -vtm->prefslot_byte;
502 }
503
504 if (alignment == 16) {
505 // Realign the base pointer as a D-Form address:
506 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
507 if (isMemoryOperand(basePtr)) {
508 SDOperand Zero = DAG.getConstant(0, PtrVT);
509 unsigned Opc = (!ST->usingLargeMem()
510 ? SPUISD::AFormAddr
511 : SPUISD::XFormAddr);
512 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
513 }
514 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
515 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
516 }
517
518 // Emit the vector load:
519 was16aligned = true;
520 return DAG.getLoad(MVT::v16i8, chain, basePtr,
521 LSN->getSrcValue(), LSN->getSrcValueOffset(),
522 LSN->isVolatile(), 16);
523 }
524
525 // Unaligned load or we're using the "large memory" model, which means that
526 // we have to be very pessimistic:
527 if (isMemoryOperand(basePtr)) {
528 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
529 }
530
531 // Add the offset
532 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
533 was16aligned = false;
534 return DAG.getLoad(MVT::v16i8, chain, basePtr,
535 LSN->getSrcValue(), LSN->getSrcValueOffset(),
536 LSN->isVolatile(), 16);
537}
538
Scott Michel266bc8f2007-12-04 22:23:35 +0000539/// Custom lower loads for CellSPU
540/*!
541 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
542 within a 16-byte block, we have to rotate to extract the requested element.
543 */
544static SDOperand
545LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
546 LoadSDNode *LN = cast<LoadSDNode>(Op);
Scott Michel266bc8f2007-12-04 22:23:35 +0000547 SDOperand the_chain = LN->getChain();
548 MVT::ValueType VT = LN->getLoadedVT();
549 MVT::ValueType OpVT = Op.Val->getValueType(0);
Scott Michel266bc8f2007-12-04 22:23:35 +0000550 ISD::LoadExtType ExtType = LN->getExtensionType();
551 unsigned alignment = LN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000552 SDOperand Ops[8];
553
554 // For an extending load of an i1 variable, just call it i8 (or whatever we
555 // were passed) and make it zero-extended:
556 if (VT == MVT::i1) {
557 VT = OpVT;
558 ExtType = ISD::ZEXTLOAD;
559 }
560
561 switch (LN->getAddressingMode()) {
562 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000563 int offset, rotamt;
564 bool was16aligned;
565 SDOperand result =
566 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000567
Scott Michel9de5d0d2008-01-11 02:53:15 +0000568 if (result.Val == 0)
Scott Michel266bc8f2007-12-04 22:23:35 +0000569 return result;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000570
571 the_chain = result.getValue(1);
572 // Rotate the chunk if necessary
573 if (rotamt < 0)
574 rotamt += 16;
Scott Michel497e8882008-01-11 21:01:19 +0000575 if (rotamt != 0 || !was16aligned) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000576 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
577
578 if (was16aligned) {
579 Ops[0] = the_chain;
580 Ops[1] = result;
581 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
582 } else {
Scott Michel497e8882008-01-11 21:01:19 +0000583 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000584 LoadSDNode *LN1 = cast<LoadSDNode>(result);
585 Ops[0] = the_chain;
586 Ops[1] = result;
Scott Michel497e8882008-01-11 21:01:19 +0000587 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
588 DAG.getConstant(rotamt, PtrVT));
Scott Michel9de5d0d2008-01-11 02:53:15 +0000589 }
590
591 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
592 the_chain = result.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000593 }
Scott Michel9de5d0d2008-01-11 02:53:15 +0000594
595 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
596 SDVTList scalarvts;
597 MVT::ValueType vecVT = MVT::v16i8;
598
599 // Convert the loaded v16i8 vector to the appropriate vector type
600 // specified by the operand:
601 if (OpVT == VT) {
602 if (VT != MVT::i1)
603 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
604 } else
605 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
606
607 Ops[0] = the_chain;
608 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
609 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
610 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
611 the_chain = result.getValue(1);
612 } else {
613 // Handle the sign and zero-extending loads for i1 and i8:
614 unsigned NewOpC;
615
616 if (ExtType == ISD::SEXTLOAD) {
617 NewOpC = (OpVT == MVT::i1
618 ? SPUISD::EXTRACT_I1_SEXT
619 : SPUISD::EXTRACT_I8_SEXT);
620 } else {
621 assert(ExtType == ISD::ZEXTLOAD);
622 NewOpC = (OpVT == MVT::i1
623 ? SPUISD::EXTRACT_I1_ZEXT
624 : SPUISD::EXTRACT_I8_ZEXT);
625 }
626
627 result = DAG.getNode(NewOpC, OpVT, result);
628 }
629
630 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
631 SDOperand retops[2] = { result, the_chain };
632
633 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
634 return result;
Scott Michel266bc8f2007-12-04 22:23:35 +0000635 }
636 case ISD::PRE_INC:
637 case ISD::PRE_DEC:
638 case ISD::POST_INC:
639 case ISD::POST_DEC:
640 case ISD::LAST_INDEXED_MODE:
641 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
642 "UNINDEXED\n";
643 cerr << (unsigned) LN->getAddressingMode() << "\n";
644 abort();
645 /*NOTREACHED*/
646 }
647
648 return SDOperand();
649}
650
651/// Custom lower stores for CellSPU
652/*!
653 All CellSPU stores are aligned to 16-byte boundaries, so for elements
654 within a 16-byte block, we have to generate a shuffle to insert the
655 requested element into its place, then store the resulting block.
656 */
657static SDOperand
658LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
659 StoreSDNode *SN = cast<StoreSDNode>(Op);
660 SDOperand Value = SN->getValue();
661 MVT::ValueType VT = Value.getValueType();
662 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
663 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000664 unsigned alignment = SN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000665
666 switch (SN->getAddressingMode()) {
667 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000668 int chunk_offset, slot_offset;
669 bool was16aligned;
Scott Michel266bc8f2007-12-04 22:23:35 +0000670
671 // The vector type we really want to load from the 16-byte chunk, except
672 // in the case of MVT::i1, which has to be v16i8.
Scott Michel9de5d0d2008-01-11 02:53:15 +0000673 unsigned vecVT, stVecVT = MVT::v16i8;
674
Scott Michel266bc8f2007-12-04 22:23:35 +0000675 if (StVT != MVT::i1)
676 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
Scott Michel266bc8f2007-12-04 22:23:35 +0000677 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
678
Scott Michel9de5d0d2008-01-11 02:53:15 +0000679 SDOperand alignLoadVec =
680 AlignedLoad(Op, DAG, ST, SN, alignment,
681 chunk_offset, slot_offset, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000682
Scott Michel9de5d0d2008-01-11 02:53:15 +0000683 if (alignLoadVec.Val == 0)
684 return alignLoadVec;
Scott Michel266bc8f2007-12-04 22:23:35 +0000685
Scott Michel9de5d0d2008-01-11 02:53:15 +0000686 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
687 SDOperand basePtr = LN->getBasePtr();
688 SDOperand the_chain = alignLoadVec.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000689 SDOperand theValue = SN->getValue();
690 SDOperand result;
691
692 if (StVT != VT
693 && (theValue.getOpcode() == ISD::AssertZext
694 || theValue.getOpcode() == ISD::AssertSext)) {
695 // Drill down and get the value for zero- and sign-extended
696 // quantities
697 theValue = theValue.getOperand(0);
698 }
699
Scott Michel9de5d0d2008-01-11 02:53:15 +0000700 chunk_offset &= 0xf;
Scott Michel266bc8f2007-12-04 22:23:35 +0000701
Scott Michel9de5d0d2008-01-11 02:53:15 +0000702 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
703 SDOperand insertEltPtr;
704 SDOperand insertEltOp;
705
706 // If the base pointer is already a D-form address, then just create
707 // a new D-form address with a slot offset and the orignal base pointer.
708 // Otherwise generate a D-form address with the slot offset relative
709 // to the stack pointer, which is always aligned.
Scott Michel497e8882008-01-11 21:01:19 +0000710 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
711 DEBUG(basePtr.Val->dump(&DAG));
712 DEBUG(cerr << "\n");
713
Scott Michel9de5d0d2008-01-11 02:53:15 +0000714 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
715 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
716 basePtr.getOperand(0),
717 insertEltOffs);
Scott Michel497e8882008-01-11 21:01:19 +0000718 } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
719 (basePtr.getOpcode() == ISD::ADD
720 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
721 insertEltPtr = basePtr;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000722 } else {
723 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
724 DAG.getRegister(SPU::R1, PtrVT),
725 insertEltOffs);
726 }
727
728 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000729 result = DAG.getNode(SPUISD::SHUFB, vecVT,
730 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
Scott Michel9de5d0d2008-01-11 02:53:15 +0000731 alignLoadVec,
Scott Michel266bc8f2007-12-04 22:23:35 +0000732 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
733
Scott Michel9de5d0d2008-01-11 02:53:15 +0000734 result = DAG.getStore(the_chain, result, basePtr,
Scott Michel266bc8f2007-12-04 22:23:35 +0000735 LN->getSrcValue(), LN->getSrcValueOffset(),
736 LN->isVolatile(), LN->getAlignment());
737
738 return result;
739 /*UNREACHED*/
740 }
741 case ISD::PRE_INC:
742 case ISD::PRE_DEC:
743 case ISD::POST_INC:
744 case ISD::POST_DEC:
745 case ISD::LAST_INDEXED_MODE:
746 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
747 "UNINDEXED\n";
748 cerr << (unsigned) SN->getAddressingMode() << "\n";
749 abort();
750 /*NOTREACHED*/
751 }
752
753 return SDOperand();
754}
755
756/// Generate the address of a constant pool entry.
757static SDOperand
758LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
759 MVT::ValueType PtrVT = Op.getValueType();
760 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
761 Constant *C = CP->getConstVal();
762 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
Scott Michel266bc8f2007-12-04 22:23:35 +0000763 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000764 const TargetMachine &TM = DAG.getTarget();
Scott Michel266bc8f2007-12-04 22:23:35 +0000765
766 if (TM.getRelocationModel() == Reloc::Static) {
767 if (!ST->usingLargeMem()) {
768 // Just return the SDOperand with the constant pool address in it.
769 return CPI;
770 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000771#if 1
Scott Michel266bc8f2007-12-04 22:23:35 +0000772 // Generate hi/lo address pair
773 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
774 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
775
776 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000777#else
778 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
779#endif
Scott Michel266bc8f2007-12-04 22:23:35 +0000780 }
781 }
782
783 assert(0 &&
784 "LowerConstantPool: Relocation model other than static not supported.");
785 return SDOperand();
786}
787
788static SDOperand
789LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
790 MVT::ValueType PtrVT = Op.getValueType();
791 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
792 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
793 SDOperand Zero = DAG.getConstant(0, PtrVT);
794 const TargetMachine &TM = DAG.getTarget();
795
796 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000797 return (!ST->usingLargeMem()
798 ? JTI
799 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000800 }
801
802 assert(0 &&
803 "LowerJumpTable: Relocation model other than static not supported.");
804 return SDOperand();
805}
806
807static SDOperand
808LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
809 MVT::ValueType PtrVT = Op.getValueType();
810 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
811 GlobalValue *GV = GSDN->getGlobal();
812 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
Scott Michel266bc8f2007-12-04 22:23:35 +0000813 const TargetMachine &TM = DAG.getTarget();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000814 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000815
816 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000817 return (!ST->usingLargeMem()
818 ? GA
819 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000820 } else {
821 cerr << "LowerGlobalAddress: Relocation model other than static not "
822 << "supported.\n";
823 abort();
824 /*NOTREACHED*/
825 }
826
827 return SDOperand();
828}
829
830//! Custom lower i64 integer constants
831/*!
832 This code inserts all of the necessary juggling that needs to occur to load
833 a 64-bit constant into a register.
834 */
835static SDOperand
836LowerConstant(SDOperand Op, SelectionDAG &DAG) {
837 unsigned VT = Op.getValueType();
838 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
839
840 if (VT == MVT::i64) {
841 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
842 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
843 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
844
845 } else {
846 cerr << "LowerConstant: unhandled constant type "
847 << MVT::getValueTypeString(VT)
848 << "\n";
849 abort();
850 /*NOTREACHED*/
851 }
852
853 return SDOperand();
854}
855
856//! Custom lower single precision floating point constants
857/*!
858 "float" immediates can be lowered as if they were unsigned 32-bit integers.
859 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
860 target description.
861 */
862static SDOperand
863LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
864 unsigned VT = Op.getValueType();
865 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
866
867 assert((FP != 0) &&
868 "LowerConstantFP: Node is not ConstantFPSDNode");
869
Scott Michel266bc8f2007-12-04 22:23:35 +0000870 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000871 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000872 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000873 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000874 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000875 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000876 return DAG.getNode(ISD::BIT_CONVERT, VT,
877 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
878 }
879
880 return SDOperand();
881}
882
883static SDOperand
884LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
885{
886 MachineFunction &MF = DAG.getMachineFunction();
887 MachineFrameInfo *MFI = MF.getFrameInfo();
Chris Lattner84bc5422007-12-31 04:13:23 +0000888 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +0000889 SmallVector<SDOperand, 8> ArgValues;
890 SDOperand Root = Op.getOperand(0);
891 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
892
893 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
894 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
895
896 unsigned ArgOffset = SPUFrameInfo::minStackSize();
897 unsigned ArgRegIdx = 0;
898 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
899
900 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
901
902 // Add DAG nodes to load the arguments or copy them out of registers.
903 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
904 SDOperand ArgVal;
905 bool needsLoad = false;
906 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
907 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
908
909 switch (ObjectVT) {
910 default: {
911 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
912 << MVT::getValueTypeString(ObjectVT)
913 << "\n";
914 abort();
915 }
916 case MVT::i8:
917 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000918 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
919 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000920 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
921 ++ArgRegIdx;
922 } else {
923 needsLoad = true;
924 }
925 break;
926 case MVT::i16:
927 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000928 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
929 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000930 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
931 ++ArgRegIdx;
932 } else {
933 needsLoad = true;
934 }
935 break;
936 case MVT::i32:
937 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000938 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
939 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000940 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
941 ++ArgRegIdx;
942 } else {
943 needsLoad = true;
944 }
945 break;
946 case MVT::i64:
947 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000948 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
949 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000950 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
951 ++ArgRegIdx;
952 } else {
953 needsLoad = true;
954 }
955 break;
956 case MVT::f32:
957 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000958 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
959 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000960 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
961 ++ArgRegIdx;
962 } else {
963 needsLoad = true;
964 }
965 break;
966 case MVT::f64:
967 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000968 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
969 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000970 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
971 ++ArgRegIdx;
972 } else {
973 needsLoad = true;
974 }
975 break;
976 case MVT::v2f64:
977 case MVT::v4f32:
978 case MVT::v4i32:
979 case MVT::v8i16:
980 case MVT::v16i8:
981 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000982 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
983 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000984 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
985 ++ArgRegIdx;
986 } else {
987 needsLoad = true;
988 }
989 break;
990 }
991
992 // We need to load the argument to a virtual register if we determined above
993 // that we ran out of physical registers of the appropriate type
994 if (needsLoad) {
995 // If the argument is actually used, emit a load from the right stack
996 // slot.
997 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
998 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
999 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1000 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1001 } else {
1002 // Don't emit a dead load.
1003 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1004 }
1005
1006 ArgOffset += StackSlotSize;
1007 }
1008
1009 ArgValues.push_back(ArgVal);
1010 }
1011
1012 // If the function takes variable number of arguments, make a frame index for
1013 // the start of the first vararg value... for expansion of llvm.va_start.
1014 if (isVarArg) {
1015 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1016 ArgOffset);
1017 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1018 // If this function is vararg, store any remaining integer argument regs to
1019 // their spots on the stack so that they may be loaded by deferencing the
1020 // result of va_next.
1021 SmallVector<SDOperand, 8> MemOps;
1022 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
Chris Lattner84bc5422007-12-31 04:13:23 +00001023 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1024 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +00001025 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1026 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1027 MemOps.push_back(Store);
1028 // Increment the address by four for the next argument to store
1029 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1030 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1031 }
1032 if (!MemOps.empty())
1033 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1034 }
1035
1036 ArgValues.push_back(Root);
1037
1038 // Return the new list of results.
1039 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1040 Op.Val->value_end());
1041 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1042}
1043
1044/// isLSAAddress - Return the immediate to use if the specified
1045/// value is representable as a LSA address.
1046static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1047 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1048 if (!C) return 0;
1049
1050 int Addr = C->getValue();
1051 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1052 (Addr << 14 >> 14) != Addr)
1053 return 0; // Top 14 bits have to be sext of immediate.
1054
1055 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1056}
1057
1058static
1059SDOperand
Scott Michel9de5d0d2008-01-11 02:53:15 +00001060LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001061 SDOperand Chain = Op.getOperand(0);
1062#if 0
1063 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1064 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1065#endif
1066 SDOperand Callee = Op.getOperand(4);
1067 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1068 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1069 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1070 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1071
1072 // Handy pointer type
1073 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1074
1075 // Accumulate how many bytes are to be pushed on the stack, including the
1076 // linkage area, and parameter passing area. According to the SPU ABI,
1077 // we minimally need space for [LR] and [SP]
1078 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1079
1080 // Set up a copy of the stack pointer for use loading and storing any
1081 // arguments that may not fit in the registers available for argument
1082 // passing.
1083 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1084
1085 // Figure out which arguments are going to go in registers, and which in
1086 // memory.
1087 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1088 unsigned ArgRegIdx = 0;
1089
1090 // Keep track of registers passing arguments
1091 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1092 // And the arguments passed on the stack
1093 SmallVector<SDOperand, 8> MemOpChains;
1094
1095 for (unsigned i = 0; i != NumOps; ++i) {
1096 SDOperand Arg = Op.getOperand(5+2*i);
1097
1098 // PtrOff will be used to store the current argument to the stack if a
1099 // register cannot be found for it.
1100 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1101 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1102
1103 switch (Arg.getValueType()) {
1104 default: assert(0 && "Unexpected ValueType for argument!");
1105 case MVT::i32:
1106 case MVT::i64:
1107 case MVT::i128:
1108 if (ArgRegIdx != NumArgRegs) {
1109 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1110 } else {
1111 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1112 ArgOffset += StackSlotSize;
1113 }
1114 break;
1115 case MVT::f32:
1116 case MVT::f64:
1117 if (ArgRegIdx != NumArgRegs) {
1118 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1119 } else {
1120 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1121 ArgOffset += StackSlotSize;
1122 }
1123 break;
1124 case MVT::v4f32:
1125 case MVT::v4i32:
1126 case MVT::v8i16:
1127 case MVT::v16i8:
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130 } else {
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1133 }
1134 break;
1135 }
1136 }
1137
1138 // Update number of stack bytes actually used, insert a call sequence start
1139 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1140 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1141
1142 if (!MemOpChains.empty()) {
1143 // Adjust the stack pointer for the stack arguments.
1144 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1145 &MemOpChains[0], MemOpChains.size());
1146 }
1147
1148 // Build a sequence of copy-to-reg nodes chained together with token chain
1149 // and flag operands which copy the outgoing args into the appropriate regs.
1150 SDOperand InFlag;
1151 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1152 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1153 InFlag);
1154 InFlag = Chain.getValue(1);
1155 }
1156
1157 std::vector<MVT::ValueType> NodeTys;
1158 NodeTys.push_back(MVT::Other); // Returns a chain
1159 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1160
1161 SmallVector<SDOperand, 8> Ops;
1162 unsigned CallOpc = SPUISD::CALL;
1163
1164 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1165 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1166 // node so that legalize doesn't hack it.
1167 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1168 GlobalValue *GV = G->getGlobal();
1169 unsigned CalleeVT = Callee.getValueType();
Scott Michel9de5d0d2008-01-11 02:53:15 +00001170 SDOperand Zero = DAG.getConstant(0, PtrVT);
1171 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
Scott Michel266bc8f2007-12-04 22:23:35 +00001172
Scott Michel9de5d0d2008-01-11 02:53:15 +00001173 if (!ST->usingLargeMem()) {
1174 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1175 // style calls, otherwise, external symbols are BRASL calls. This assumes
1176 // that declared/defined symbols are in the same compilation unit and can
1177 // be reached through PC-relative jumps.
1178 //
1179 // NOTE:
1180 // This may be an unsafe assumption for JIT and really large compilation
1181 // units.
1182 if (GV->isDeclaration()) {
1183 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1184 } else {
1185 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1186 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001187 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +00001188 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1189 // address pairs:
1190 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
Scott Michel266bc8f2007-12-04 22:23:35 +00001191 }
1192 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1193 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
Scott Michel9de5d0d2008-01-11 02:53:15 +00001194 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001195 // If this is an absolute destination address that appears to be a legal
1196 // local store address, use the munged value.
1197 Callee = SDOperand(Dest, 0);
Scott Michel9de5d0d2008-01-11 02:53:15 +00001198 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001199
1200 Ops.push_back(Chain);
1201 Ops.push_back(Callee);
1202
1203 // Add argument registers to the end of the list so that they are known live
1204 // into the call.
1205 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1206 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1207 RegsToPass[i].second.getValueType()));
1208
1209 if (InFlag.Val)
1210 Ops.push_back(InFlag);
1211 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1212 InFlag = Chain.getValue(1);
1213
1214 SDOperand ResultVals[3];
1215 unsigned NumResults = 0;
1216 NodeTys.clear();
1217
1218 // If the call has results, copy the values out of the ret val registers.
1219 switch (Op.Val->getValueType(0)) {
1220 default: assert(0 && "Unexpected ret value!");
1221 case MVT::Other: break;
1222 case MVT::i32:
1223 if (Op.Val->getValueType(1) == MVT::i32) {
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1227 Chain.getValue(2)).getValue(1);
1228 ResultVals[1] = Chain.getValue(0);
1229 NumResults = 2;
1230 NodeTys.push_back(MVT::i32);
1231 } else {
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1234 NumResults = 1;
1235 }
1236 NodeTys.push_back(MVT::i32);
1237 break;
1238 case MVT::i64:
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1241 NumResults = 1;
1242 NodeTys.push_back(MVT::i64);
1243 break;
1244 case MVT::f32:
1245 case MVT::f64:
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1247 InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1249 NumResults = 1;
1250 NodeTys.push_back(Op.Val->getValueType(0));
1251 break;
1252 case MVT::v2f64:
1253 case MVT::v4f32:
1254 case MVT::v4i32:
1255 case MVT::v8i16:
1256 case MVT::v16i8:
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1260 NumResults = 1;
1261 NodeTys.push_back(Op.Val->getValueType(0));
1262 break;
1263 }
1264
1265 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1266 DAG.getConstant(NumStackBytes, PtrVT));
1267 NodeTys.push_back(MVT::Other);
1268
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1271 return Chain;
1272
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1276 ResultVals, NumResults);
1277 return Res.getValue(Op.ResNo);
1278}
1279
1280static SDOperand
1281LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1282 SmallVector<CCValAssign, 16> RVLocs;
1283 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1287
1288 // If this is the first return lowered for this function, add the regs to the
1289 // liveout set for the function.
Chris Lattner84bc5422007-12-31 04:13:23 +00001290 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001291 for (unsigned i = 0; i != RVLocs.size(); ++i)
Chris Lattner84bc5422007-12-31 04:13:23 +00001292 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Scott Michel266bc8f2007-12-04 22:23:35 +00001293 }
1294
1295 SDOperand Chain = Op.getOperand(0);
1296 SDOperand Flag;
1297
1298 // Copy the result values into the output registers.
1299 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300 CCValAssign &VA = RVLocs[i];
1301 assert(VA.isRegLoc() && "Can only return in registers!");
1302 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303 Flag = Chain.getValue(1);
1304 }
1305
1306 if (Flag.Val)
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308 else
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1310}
1311
1312
1313//===----------------------------------------------------------------------===//
1314// Vector related lowering:
1315//===----------------------------------------------------------------------===//
1316
1317static ConstantSDNode *
1318getVecImm(SDNode *N) {
1319 SDOperand OpVal(0, 0);
1320
1321 // Check to see if this buildvec has a single non-undef value in its elements.
1322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324 if (OpVal.Val == 0)
1325 OpVal = N->getOperand(i);
1326 else if (OpVal != N->getOperand(i))
1327 return 0;
1328 }
1329
1330 if (OpVal.Val != 0) {
1331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1332 return CN;
1333 }
1334 }
1335
1336 return 0; // All UNDEF: use implicit def.; not Constant node
1337}
1338
1339/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340/// and the value fits into an unsigned 18-bit constant, and if so, return the
1341/// constant
1342SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 MVT::ValueType ValueType) {
1344 if (ConstantSDNode *CN = getVecImm(N)) {
1345 uint64_t Value = CN->getValue();
1346 if (Value <= 0x3ffff)
1347 return DAG.getConstant(Value, ValueType);
1348 }
1349
1350 return SDOperand();
1351}
1352
1353/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1354/// and the value fits into a signed 16-bit constant, and if so, return the
1355/// constant
1356SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1357 MVT::ValueType ValueType) {
1358 if (ConstantSDNode *CN = getVecImm(N)) {
1359 if (ValueType == MVT::i32) {
1360 int Value = (int) CN->getValue();
1361 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1362
1363 if (Value == SExtValue)
1364 return DAG.getConstant(Value, ValueType);
1365 } else if (ValueType == MVT::i16) {
1366 short Value = (short) CN->getValue();
1367 int SExtValue = ((int) Value << 16) >> 16;
1368
1369 if (Value == (short) SExtValue)
1370 return DAG.getConstant(Value, ValueType);
1371 } else if (ValueType == MVT::i64) {
1372 int64_t Value = CN->getValue();
1373 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1374
1375 if (Value == SExtValue)
1376 return DAG.getConstant(Value, ValueType);
1377 }
1378 }
1379
1380 return SDOperand();
1381}
1382
1383/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384/// and the value fits into a signed 10-bit constant, and if so, return the
1385/// constant
1386SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1387 MVT::ValueType ValueType) {
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int Value = (int) CN->getValue();
1390 if ((ValueType == MVT::i32 && isS10Constant(Value))
1391 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1392 return DAG.getConstant(Value, ValueType);
1393 }
1394
1395 return SDOperand();
1396}
1397
1398/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1399/// and the value fits into a signed 8-bit constant, and if so, return the
1400/// constant.
1401///
1402/// @note: The incoming vector is v16i8 because that's the only way we can load
1403/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1404/// same value.
1405SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1406 MVT::ValueType ValueType) {
1407 if (ConstantSDNode *CN = getVecImm(N)) {
1408 int Value = (int) CN->getValue();
1409 if (ValueType == MVT::i16
1410 && Value <= 0xffff /* truncated from uint64_t */
1411 && ((short) Value >> 8) == ((short) Value & 0xff))
1412 return DAG.getConstant(Value & 0xff, ValueType);
1413 else if (ValueType == MVT::i8
1414 && (Value & 0xff) == Value)
1415 return DAG.getConstant(Value, ValueType);
1416 }
1417
1418 return SDOperand();
1419}
1420
1421/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1422/// and the value fits into a signed 16-bit constant, and if so, return the
1423/// constant
1424SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1425 MVT::ValueType ValueType) {
1426 if (ConstantSDNode *CN = getVecImm(N)) {
1427 uint64_t Value = CN->getValue();
1428 if ((ValueType == MVT::i32
1429 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1430 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1431 return DAG.getConstant(Value >> 16, ValueType);
1432 }
1433
1434 return SDOperand();
1435}
1436
1437/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1438SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1441 }
1442
1443 return SDOperand();
1444}
1445
1446/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1447SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1448 if (ConstantSDNode *CN = getVecImm(N)) {
1449 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1450 }
1451
1452 return SDOperand();
1453}
1454
1455// If this is a vector of constants or undefs, get the bits. A bit in
1456// UndefBits is set if the corresponding element of the vector is an
1457// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1458// zero. Return true if this is not an array of constants, false if it is.
1459//
1460static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1461 uint64_t UndefBits[2]) {
1462 // Start with zero'd results.
1463 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1464
1465 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1466 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1467 SDOperand OpVal = BV->getOperand(i);
1468
1469 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1470 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1471
1472 uint64_t EltBits = 0;
1473 if (OpVal.getOpcode() == ISD::UNDEF) {
1474 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1475 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1476 continue;
1477 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1478 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1479 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1480 const APFloat &apf = CN->getValueAPF();
1481 EltBits = (CN->getValueType(0) == MVT::f32
1482 ? FloatToBits(apf.convertToFloat())
1483 : DoubleToBits(apf.convertToDouble()));
1484 } else {
1485 // Nonconstant element.
1486 return true;
1487 }
1488
1489 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1490 }
1491
1492 //printf("%llx %llx %llx %llx\n",
1493 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1494 return false;
1495}
1496
1497/// If this is a splat (repetition) of a value across the whole vector, return
1498/// the smallest size that splats it. For example, "0x01010101010101..." is a
1499/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1500/// SplatSize = 1 byte.
1501static bool isConstantSplat(const uint64_t Bits128[2],
1502 const uint64_t Undef128[2],
1503 int MinSplatBits,
1504 uint64_t &SplatBits, uint64_t &SplatUndef,
1505 int &SplatSize) {
1506 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1507 // the same as the lower 64-bits, ignoring undefs.
1508 uint64_t Bits64 = Bits128[0] | Bits128[1];
1509 uint64_t Undef64 = Undef128[0] & Undef128[1];
1510 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1511 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1512 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1513 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1514
1515 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1516 if (MinSplatBits < 64) {
1517
1518 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1519 // undefs.
1520 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1521 if (MinSplatBits < 32) {
1522
1523 // If the top 16-bits are different than the lower 16-bits, ignoring
1524 // undefs, we have an i32 splat.
1525 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1526 if (MinSplatBits < 16) {
1527 // If the top 8-bits are different than the lower 8-bits, ignoring
1528 // undefs, we have an i16 splat.
1529 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1530 // Otherwise, we have an 8-bit splat.
1531 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1532 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1533 SplatSize = 1;
1534 return true;
1535 }
1536 } else {
1537 SplatBits = Bits16;
1538 SplatUndef = Undef16;
1539 SplatSize = 2;
1540 return true;
1541 }
1542 }
1543 } else {
1544 SplatBits = Bits32;
1545 SplatUndef = Undef32;
1546 SplatSize = 4;
1547 return true;
1548 }
1549 }
1550 } else {
1551 SplatBits = Bits128[0];
1552 SplatUndef = Undef128[0];
1553 SplatSize = 8;
1554 return true;
1555 }
1556 }
1557
1558 return false; // Can't be a splat if two pieces don't match.
1559}
1560
1561// If this is a case we can't handle, return null and let the default
1562// expansion code take care of it. If we CAN select this case, and if it
1563// selects to a single instruction, return Op. Otherwise, if we can codegen
1564// this case more efficiently than a constant pool load, lower it to the
1565// sequence of ops that should be used.
1566static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1567 MVT::ValueType VT = Op.getValueType();
1568 // If this is a vector of constants or undefs, get the bits. A bit in
1569 // UndefBits is set if the corresponding element of the vector is an
1570 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1571 // zero.
1572 uint64_t VectorBits[2];
1573 uint64_t UndefBits[2];
1574 uint64_t SplatBits, SplatUndef;
1575 int SplatSize;
1576 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1577 || !isConstantSplat(VectorBits, UndefBits,
1578 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1579 SplatBits, SplatUndef, SplatSize))
1580 return SDOperand(); // Not a constant vector, not a splat.
1581
1582 switch (VT) {
1583 default:
1584 case MVT::v4f32: {
1585 uint32_t Value32 = SplatBits;
1586 assert(SplatSize == 4
1587 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1590 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1591 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1592 break;
1593 }
1594 case MVT::v2f64: {
1595 uint64_t f64val = SplatBits;
1596 assert(SplatSize == 8
1597 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1602 break;
1603 }
1604 case MVT::v16i8: {
1605 // 8-bit constants have to be expanded to 16-bits
1606 unsigned short Value16 = SplatBits | (SplatBits << 8);
1607 SDOperand Ops[8];
1608 for (int i = 0; i < 8; ++i)
1609 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1610 return DAG.getNode(ISD::BIT_CONVERT, VT,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1612 }
1613 case MVT::v8i16: {
1614 unsigned short Value16;
1615 if (SplatSize == 2)
1616 Value16 = (unsigned short) (SplatBits & 0xffff);
1617 else
1618 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1619 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1620 SDOperand Ops[8];
1621 for (int i = 0; i < 8; ++i) Ops[i] = T;
1622 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1623 }
1624 case MVT::v4i32: {
1625 unsigned int Value = SplatBits;
1626 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1627 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1628 }
1629 case MVT::v2i64: {
1630 uint64_t val = SplatBits;
1631 uint32_t upper = uint32_t(val >> 32);
1632 uint32_t lower = uint32_t(val);
1633
1634 if (val != 0) {
1635 SDOperand LO32;
1636 SDOperand HI32;
1637 SmallVector<SDOperand, 16> ShufBytes;
1638 SDOperand Result;
1639 bool upper_special, lower_special;
1640
1641 // NOTE: This code creates common-case shuffle masks that can be easily
1642 // detected as common expressions. It is not attempting to create highly
1643 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1644
1645 // Detect if the upper or lower half is a special shuffle mask pattern:
1646 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1647 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1648
1649 // Create lower vector if not a special pattern
1650 if (!lower_special) {
1651 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1652 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 LO32C, LO32C, LO32C, LO32C));
1655 }
1656
1657 // Create upper vector if not a special pattern
1658 if (!upper_special) {
1659 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1660 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 HI32C, HI32C, HI32C, HI32C));
1663 }
1664
1665 // If either upper or lower are special, then the two input operands are
1666 // the same (basically, one of them is a "don't care")
1667 if (lower_special)
1668 LO32 = HI32;
1669 if (upper_special)
1670 HI32 = LO32;
1671 if (lower_special && upper_special) {
1672 // Unhappy situation... both upper and lower are special, so punt with
1673 // a target constant:
1674 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1675 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1676 Zero, Zero);
1677 }
1678
1679 for (int i = 0; i < 4; ++i) {
1680 for (int j = 0; j < 4; ++j) {
1681 SDOperand V;
1682 bool process_upper, process_lower;
Chris Lattner52ec3752007-12-22 22:47:03 +00001683 uint64_t val = 0;
Scott Michel266bc8f2007-12-04 22:23:35 +00001684
1685 process_upper = (upper_special && (i & 1) == 0);
1686 process_lower = (lower_special && (i & 1) == 1);
1687
1688 if (process_upper || process_lower) {
1689 if ((process_upper && upper == 0)
1690 || (process_lower && lower == 0))
1691 val = 0x80;
1692 else if ((process_upper && upper == 0xffffffff)
1693 || (process_lower && lower == 0xffffffff))
1694 val = 0xc0;
1695 else if ((process_upper && upper == 0x80000000)
1696 || (process_lower && lower == 0x80000000))
1697 val = (j == 0 ? 0xe0 : 0x80);
1698 } else
1699 val = i * 4 + j + ((i & 1) * 16);
1700
1701 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1702 }
1703 }
1704
1705 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1706 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1707 &ShufBytes[0], ShufBytes.size()));
1708 } else {
1709 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1710 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1711 return DAG.getNode(ISD::BIT_CONVERT, VT,
1712 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713 Zero, Zero, Zero, Zero));
1714 }
1715 }
1716 }
1717
1718 return SDOperand();
1719}
1720
1721/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1722/// which the Cell can operate. The code inspects V3 to ascertain whether the
1723/// permutation vector, V3, is monotonically increasing with one "exception"
1724/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1725/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1726/// In either case, the net result is going to eventually invoke SHUFB to
1727/// permute/shuffle the bytes from V1 and V2.
1728/// \note
1729/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1730/// control word for byte/halfword/word insertion. This takes care of a single
1731/// element move from V2 into V1.
1732/// \note
1733/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1734static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1735 SDOperand V1 = Op.getOperand(0);
1736 SDOperand V2 = Op.getOperand(1);
1737 SDOperand PermMask = Op.getOperand(2);
1738
1739 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1740
1741 // If we have a single element being moved from V1 to V2, this can be handled
1742 // using the C*[DX] compute mask instructions, but the vector elements have
1743 // to be monotonically increasing with one exception element.
1744 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1745 unsigned EltsFromV2 = 0;
1746 unsigned V2Elt = 0;
1747 unsigned V2EltIdx0 = 0;
1748 unsigned CurrElt = 0;
1749 bool monotonic = true;
1750 if (EltVT == MVT::i8)
1751 V2EltIdx0 = 16;
1752 else if (EltVT == MVT::i16)
1753 V2EltIdx0 = 8;
1754 else if (EltVT == MVT::i32)
1755 V2EltIdx0 = 4;
1756 else
1757 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1758
1759 for (unsigned i = 0, e = PermMask.getNumOperands();
1760 EltsFromV2 <= 1 && monotonic && i != e;
1761 ++i) {
1762 unsigned SrcElt;
1763 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1764 SrcElt = 0;
1765 else
1766 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1767
1768 if (SrcElt >= V2EltIdx0) {
1769 ++EltsFromV2;
1770 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1771 } else if (CurrElt != SrcElt) {
1772 monotonic = false;
1773 }
1774
1775 ++CurrElt;
1776 }
1777
1778 if (EltsFromV2 == 1 && monotonic) {
1779 // Compute mask and shuffle
1780 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001781 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1782 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001783 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1784 // Initialize temporary register to 0
1785 SDOperand InitTempReg =
1786 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1787 // Copy register's contents as index in INSERT_MASK:
1788 SDOperand ShufMaskOp =
1789 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1790 DAG.getTargetConstant(V2Elt, MVT::i32),
1791 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1792 // Use shuffle mask in SHUFB synthetic instruction:
1793 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1794 } else {
1795 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1796 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1797
1798 SmallVector<SDOperand, 16> ResultMask;
1799 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1800 unsigned SrcElt;
1801 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1802 SrcElt = 0;
1803 else
1804 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1805
1806 for (unsigned j = 0; j != BytesPerElement; ++j) {
1807 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1808 MVT::i8));
1809 }
1810 }
1811
1812 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1813 &ResultMask[0], ResultMask.size());
1814 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1815 }
1816}
1817
1818static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1819 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1820
1821 if (Op0.Val->getOpcode() == ISD::Constant) {
1822 // For a constant, build the appropriate constant vector, which will
1823 // eventually simplify to a vector register load.
1824
1825 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1826 SmallVector<SDOperand, 16> ConstVecValues;
1827 MVT::ValueType VT;
1828 size_t n_copies;
1829
1830 // Create a constant vector:
1831 switch (Op.getValueType()) {
1832 default: assert(0 && "Unexpected constant value type in "
1833 "LowerSCALAR_TO_VECTOR");
1834 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1835 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1836 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1837 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1838 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1839 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1840 }
1841
1842 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1843 for (size_t j = 0; j < n_copies; ++j)
1844 ConstVecValues.push_back(CValue);
1845
1846 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1847 &ConstVecValues[0], ConstVecValues.size());
1848 } else {
1849 // Otherwise, copy the value from one register to another:
1850 switch (Op0.getValueType()) {
1851 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1852 case MVT::i8:
1853 case MVT::i16:
1854 case MVT::i32:
1855 case MVT::i64:
1856 case MVT::f32:
1857 case MVT::f64:
1858 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1859 }
1860 }
1861
1862 return SDOperand();
1863}
1864
1865static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1866 switch (Op.getValueType()) {
1867 case MVT::v4i32: {
1868 SDOperand rA = Op.getOperand(0);
1869 SDOperand rB = Op.getOperand(1);
1870 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1871 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1872 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1873 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1874
1875 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1876 break;
1877 }
1878
1879 // Multiply two v8i16 vectors (pipeline friendly version):
1880 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1881 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1882 // c) Use SELB to select upper and lower halves from the intermediate results
1883 //
1884 // NOTE: We really want to move the FSMBI to earlier to actually get the
1885 // dual-issue. This code does manage to do this, even if it's a little on
1886 // the wacky side
1887 case MVT::v8i16: {
1888 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001889 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001890 SDOperand Chain = Op.getOperand(0);
1891 SDOperand rA = Op.getOperand(0);
1892 SDOperand rB = Op.getOperand(1);
Chris Lattner84bc5422007-12-31 04:13:23 +00001893 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1894 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001895
1896 SDOperand FSMBOp =
1897 DAG.getCopyToReg(Chain, FSMBIreg,
1898 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1899 DAG.getConstant(0xcccc, MVT::i32)));
1900
1901 SDOperand HHProd =
1902 DAG.getCopyToReg(FSMBOp, HiProdReg,
1903 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1904
1905 SDOperand HHProd_v4i32 =
1906 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1907 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1908
1909 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1910 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1911 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1912 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1913 HHProd_v4i32,
1914 DAG.getConstant(16, MVT::i16))),
1915 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1916 }
1917
1918 // This M00sE is N@stI! (apologies to Monty Python)
1919 //
1920 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1921 // is to break it all apart, sign extend, and reassemble the various
1922 // intermediate products.
1923 case MVT::v16i8: {
1924 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001925 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001926 SDOperand Chain = Op.getOperand(0);
1927 SDOperand rA = Op.getOperand(0);
1928 SDOperand rB = Op.getOperand(1);
1929 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1930 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1931
Chris Lattner84bc5422007-12-31 04:13:23 +00001932 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1933 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1934 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001935
1936 SDOperand LLProd =
1937 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1938 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1939 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1940
1941 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1942
1943 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1944
1945 SDOperand LHProd =
1946 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1947 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1948
1949 SDOperand FSMBdef_2222 =
1950 DAG.getCopyToReg(Chain, FSMBreg_2222,
1951 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1952 DAG.getConstant(0x2222, MVT::i32)));
1953
1954 SDOperand FSMBuse_2222 =
1955 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1956
1957 SDOperand LoProd_1 =
1958 DAG.getCopyToReg(Chain, LoProd_reg,
1959 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1960 FSMBuse_2222));
1961
1962 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1963
1964 SDOperand LoProd =
1965 DAG.getNode(ISD::AND, MVT::v4i32,
1966 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1967 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1968 LoProdMask, LoProdMask,
1969 LoProdMask, LoProdMask));
1970
1971 SDOperand rAH =
1972 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1974
1975 SDOperand rBH =
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1978
1979 SDOperand HLProd =
1980 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1983
1984 SDOperand HHProd_1 =
1985 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1990
1991 SDOperand HHProd =
1992 DAG.getCopyToReg(Chain, HiProd_reg,
1993 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1994 HLProd,
1995 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1996 FSMBuse_2222));
1997
1998 SDOperand HiProd =
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2000 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2001
2002 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2003 DAG.getNode(ISD::OR, MVT::v4i32,
2004 LoProd, HiProd));
2005 }
2006
2007 default:
2008 cerr << "CellSPU: Unknown vector multiplication, got "
2009 << MVT::getValueTypeString(Op.getValueType())
2010 << "\n";
2011 abort();
2012 /*NOTREACHED*/
2013 }
2014
2015 return SDOperand();
2016}
2017
2018static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2019 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002020 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002021
2022 SDOperand A = Op.getOperand(0);
2023 SDOperand B = Op.getOperand(1);
2024 unsigned VT = Op.getValueType();
2025
2026 unsigned VRegBR, VRegC;
2027
2028 if (VT == MVT::f32) {
Chris Lattner84bc5422007-12-31 04:13:23 +00002029 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2030 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002031 } else {
Chris Lattner84bc5422007-12-31 04:13:23 +00002032 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2033 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002034 }
2035 // TODO: make sure we're feeding FPInterp the right arguments
2036 // Right now: fi B, frest(B)
2037
2038 // Computes BRcpl =
2039 // (Floating Interpolate (FP Reciprocal Estimate B))
2040 SDOperand BRcpl =
2041 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2042 DAG.getNode(SPUISD::FPInterp, VT, B,
2043 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2044
2045 // Computes A * BRcpl and stores in a temporary register
2046 SDOperand AxBRcpl =
2047 DAG.getCopyToReg(BRcpl, VRegC,
2048 DAG.getNode(ISD::FMUL, VT, A,
2049 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2050 // What's the Chain variable do? It's magic!
2051 // TODO: set Chain = Op(0).getEntryNode()
2052
2053 return DAG.getNode(ISD::FADD, VT,
2054 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2055 DAG.getNode(ISD::FMUL, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2057 DAG.getNode(ISD::FSUB, VT, A,
2058 DAG.getNode(ISD::FMUL, VT, B,
2059 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2060}
2061
Scott Michel266bc8f2007-12-04 22:23:35 +00002062static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2063 unsigned VT = Op.getValueType();
2064 SDOperand N = Op.getOperand(0);
2065 SDOperand Elt = Op.getOperand(1);
2066 SDOperand ShufMask[16];
2067 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2068
2069 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2070
2071 int EltNo = (int) C->getValue();
2072
2073 // sanity checks:
2074 if (VT == MVT::i8 && EltNo >= 16)
2075 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2076 else if (VT == MVT::i16 && EltNo >= 8)
2077 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2078 else if (VT == MVT::i32 && EltNo >= 4)
2079 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2080 else if (VT == MVT::i64 && EltNo >= 2)
2081 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2082
2083 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2084 // i32 and i64: Element 0 is the preferred slot
2085 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2086 }
2087
2088 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002089 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002090 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2091
2092 switch (VT) {
2093 case MVT::i8: {
2094 prefslot_begin = prefslot_end = 3;
2095 break;
2096 }
2097 case MVT::i16: {
2098 prefslot_begin = 2; prefslot_end = 3;
2099 break;
2100 }
2101 case MVT::i32: {
2102 prefslot_begin = 0; prefslot_end = 3;
2103 break;
2104 }
2105 case MVT::i64: {
2106 prefslot_begin = 0; prefslot_end = 7;
2107 break;
2108 }
2109 }
2110
Scott Michel0e5665b2007-12-19 21:17:42 +00002111 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2112 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2113
Scott Michel266bc8f2007-12-04 22:23:35 +00002114 for (int i = 0; i < 16; ++i) {
2115 // zero fill uppper part of preferred slot, don't care about the
2116 // other slots:
2117 unsigned int mask_val;
2118
2119 if (i <= prefslot_end) {
2120 mask_val =
2121 ((i < prefslot_begin)
2122 ? 0x80
2123 : elt_byte + (i - prefslot_begin));
2124
Scott Michel0e5665b2007-12-19 21:17:42 +00002125 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002126 } else
2127 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2128 }
2129
2130 SDOperand ShufMaskVec =
2131 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2132 &ShufMask[0],
2133 sizeof(ShufMask) / sizeof(ShufMask[0]));
2134
2135 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2136 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2137 N, N, ShufMaskVec));
2138
2139}
2140
2141static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2142 SDOperand VecOp = Op.getOperand(0);
2143 SDOperand ValOp = Op.getOperand(1);
2144 SDOperand IdxOp = Op.getOperand(2);
2145 MVT::ValueType VT = Op.getValueType();
2146
2147 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2148 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2149
2150 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2151 // Use $2 because it's always 16-byte aligned and it's available:
2152 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2153
2154 SDOperand result =
2155 DAG.getNode(SPUISD::SHUFB, VT,
2156 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2157 VecOp,
2158 DAG.getNode(SPUISD::INSERT_MASK, VT,
2159 DAG.getNode(ISD::ADD, PtrVT,
2160 PtrBase,
2161 DAG.getConstant(CN->getValue(),
2162 PtrVT))));
2163
2164 return result;
2165}
2166
2167static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2168 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2169
2170 assert(Op.getValueType() == MVT::i8);
2171 switch (Opc) {
2172 default:
2173 assert(0 && "Unhandled i8 math operator");
2174 /*NOTREACHED*/
2175 break;
2176 case ISD::SUB: {
2177 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2178 // the result:
2179 SDOperand N1 = Op.getOperand(1);
2180 N0 = (N0.getOpcode() != ISD::Constant
2181 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2182 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2183 N1 = (N1.getOpcode() != ISD::Constant
2184 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2185 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2186 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2187 DAG.getNode(Opc, MVT::i16, N0, N1));
2188 }
2189 case ISD::ROTR:
2190 case ISD::ROTL: {
2191 SDOperand N1 = Op.getOperand(1);
2192 unsigned N1Opc;
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2197 N1 = (N1.getOpcode() != ISD::Constant
2198 ? DAG.getNode(N1Opc, MVT::i16, N1)
2199 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2200 SDOperand ExpandArg =
2201 DAG.getNode(ISD::OR, MVT::i16, N0,
2202 DAG.getNode(ISD::SHL, MVT::i16,
2203 N0, DAG.getConstant(8, MVT::i16)));
2204 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2205 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2206 }
2207 case ISD::SRL:
2208 case ISD::SHL: {
2209 SDOperand N1 = Op.getOperand(1);
2210 unsigned N1Opc;
2211 N0 = (N0.getOpcode() != ISD::Constant
2212 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2213 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2214 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2215 N1 = (N1.getOpcode() != ISD::Constant
2216 ? DAG.getNode(N1Opc, MVT::i16, N1)
2217 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2218 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2219 DAG.getNode(Opc, MVT::i16, N0, N1));
2220 }
2221 case ISD::SRA: {
2222 SDOperand N1 = Op.getOperand(1);
2223 unsigned N1Opc;
2224 N0 = (N0.getOpcode() != ISD::Constant
2225 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2226 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232 DAG.getNode(Opc, MVT::i16, N0, N1));
2233 }
2234 case ISD::MUL: {
2235 SDOperand N1 = Op.getOperand(1);
2236 unsigned N1Opc;
2237 N0 = (N0.getOpcode() != ISD::Constant
2238 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2241 N1 = (N1.getOpcode() != ISD::Constant
2242 ? DAG.getNode(N1Opc, MVT::i16, N1)
2243 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245 DAG.getNode(Opc, MVT::i16, N0, N1));
2246 break;
2247 }
2248 }
2249
2250 return SDOperand();
2251}
2252
2253//! Lower byte immediate operations for v16i8 vectors:
2254static SDOperand
2255LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2256 SDOperand ConstVec;
2257 SDOperand Arg;
2258 MVT::ValueType VT = Op.getValueType();
2259
2260 ConstVec = Op.getOperand(0);
2261 Arg = Op.getOperand(1);
2262 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2263 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2264 ConstVec = ConstVec.getOperand(0);
2265 } else {
2266 ConstVec = Op.getOperand(1);
2267 Arg = Op.getOperand(0);
2268 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2269 ConstVec = ConstVec.getOperand(0);
2270 }
2271 }
2272 }
2273
2274 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2275 uint64_t VectorBits[2];
2276 uint64_t UndefBits[2];
2277 uint64_t SplatBits, SplatUndef;
2278 int SplatSize;
2279
2280 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2281 && isConstantSplat(VectorBits, UndefBits,
2282 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2283 SplatBits, SplatUndef, SplatSize)) {
2284 SDOperand tcVec[16];
2285 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2286 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2287
2288 // Turn the BUILD_VECTOR into a set of target constants:
2289 for (size_t i = 0; i < tcVecSize; ++i)
2290 tcVec[i] = tc;
2291
2292 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2293 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2294 }
2295 }
2296
2297 return SDOperand();
2298}
2299
2300//! Lower i32 multiplication
2301static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2302 unsigned Opc) {
2303 switch (VT) {
2304 default:
2305 cerr << "CellSPU: Unknown LowerMUL value type, got "
2306 << MVT::getValueTypeString(Op.getValueType())
2307 << "\n";
2308 abort();
2309 /*NOTREACHED*/
2310
2311 case MVT::i32: {
2312 SDOperand rA = Op.getOperand(0);
2313 SDOperand rB = Op.getOperand(1);
2314
2315 return DAG.getNode(ISD::ADD, MVT::i32,
2316 DAG.getNode(ISD::ADD, MVT::i32,
2317 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2318 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2319 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2320 }
2321 }
2322
2323 return SDOperand();
2324}
2325
2326//! Custom lowering for CTPOP (count population)
2327/*!
2328 Custom lowering code that counts the number ones in the input
2329 operand. SPU has such an instruction, but it counts the number of
2330 ones per byte, which then have to be accumulated.
2331*/
2332static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2333 unsigned VT = Op.getValueType();
2334 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2335
2336 switch (VT) {
2337 case MVT::i8: {
2338 SDOperand N = Op.getOperand(0);
2339 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2340
2341 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2342 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2343
2344 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2345 }
2346
2347 case MVT::i16: {
2348 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002349 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002350
Chris Lattner84bc5422007-12-31 04:13:23 +00002351 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002352
2353 SDOperand N = Op.getOperand(0);
2354 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2355 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2356 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2357
2358 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2359 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2360
2361 // CNTB_result becomes the chain to which all of the virtual registers
2362 // CNTB_reg, SUM1_reg become associated:
2363 SDOperand CNTB_result =
2364 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2365
2366 SDOperand CNTB_rescopy =
2367 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2368
2369 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2370
2371 return DAG.getNode(ISD::AND, MVT::i16,
2372 DAG.getNode(ISD::ADD, MVT::i16,
2373 DAG.getNode(ISD::SRL, MVT::i16,
2374 Tmp1, Shift1),
2375 Tmp1),
2376 Mask0);
2377 }
2378
2379 case MVT::i32: {
2380 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002381 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002382
Chris Lattner84bc5422007-12-31 04:13:23 +00002383 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2384 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002385
2386 SDOperand N = Op.getOperand(0);
2387 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2388 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2389 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2390 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2391
2392 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2393 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2394
2395 // CNTB_result becomes the chain to which all of the virtual registers
2396 // CNTB_reg, SUM1_reg become associated:
2397 SDOperand CNTB_result =
2398 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2399
2400 SDOperand CNTB_rescopy =
2401 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2402
2403 SDOperand Comp1 =
2404 DAG.getNode(ISD::SRL, MVT::i32,
2405 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2406
2407 SDOperand Sum1 =
2408 DAG.getNode(ISD::ADD, MVT::i32,
2409 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2410
2411 SDOperand Sum1_rescopy =
2412 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2413
2414 SDOperand Comp2 =
2415 DAG.getNode(ISD::SRL, MVT::i32,
2416 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2417 Shift2);
2418 SDOperand Sum2 =
2419 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2420 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2421
2422 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2423 }
2424
2425 case MVT::i64:
2426 break;
2427 }
2428
2429 return SDOperand();
2430}
2431
2432/// LowerOperation - Provide custom lowering hooks for some operations.
2433///
2434SDOperand
2435SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2436{
2437 switch (Op.getOpcode()) {
2438 default: {
2439 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2440 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2441 cerr << "*Op.Val:\n";
2442 Op.Val->dump();
2443 abort();
2444 }
2445 case ISD::LOAD:
2446 case ISD::SEXTLOAD:
2447 case ISD::ZEXTLOAD:
2448 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2449 case ISD::STORE:
2450 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2451 case ISD::ConstantPool:
2452 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2453 case ISD::GlobalAddress:
2454 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2455 case ISD::JumpTable:
2456 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2457 case ISD::Constant:
2458 return LowerConstant(Op, DAG);
2459 case ISD::ConstantFP:
2460 return LowerConstantFP(Op, DAG);
2461 case ISD::FORMAL_ARGUMENTS:
2462 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2463 case ISD::CALL:
Scott Michel9de5d0d2008-01-11 02:53:15 +00002464 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
Scott Michel266bc8f2007-12-04 22:23:35 +00002465 case ISD::RET:
2466 return LowerRET(Op, DAG, getTargetMachine());
2467
2468 // i8 math ops:
2469 case ISD::SUB:
2470 case ISD::ROTR:
2471 case ISD::ROTL:
2472 case ISD::SRL:
2473 case ISD::SHL:
2474 case ISD::SRA:
2475 return LowerI8Math(Op, DAG, Op.getOpcode());
2476
2477 // Vector-related lowering.
2478 case ISD::BUILD_VECTOR:
2479 return LowerBUILD_VECTOR(Op, DAG);
2480 case ISD::SCALAR_TO_VECTOR:
2481 return LowerSCALAR_TO_VECTOR(Op, DAG);
2482 case ISD::VECTOR_SHUFFLE:
2483 return LowerVECTOR_SHUFFLE(Op, DAG);
2484 case ISD::EXTRACT_VECTOR_ELT:
2485 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2486 case ISD::INSERT_VECTOR_ELT:
2487 return LowerINSERT_VECTOR_ELT(Op, DAG);
2488
2489 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2490 case ISD::AND:
2491 case ISD::OR:
2492 case ISD::XOR:
2493 return LowerByteImmed(Op, DAG);
2494
2495 // Vector and i8 multiply:
2496 case ISD::MUL:
2497 if (MVT::isVector(Op.getValueType()))
2498 return LowerVectorMUL(Op, DAG);
2499 else if (Op.getValueType() == MVT::i8)
2500 return LowerI8Math(Op, DAG, Op.getOpcode());
2501 else
2502 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2503
2504 case ISD::FDIV:
2505 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2506 return LowerFDIVf32(Op, DAG);
2507// else if (Op.getValueType() == MVT::f64)
2508// return LowerFDIVf64(Op, DAG);
2509 else
2510 assert(0 && "Calling FDIV on unsupported MVT");
2511
2512 case ISD::CTPOP:
2513 return LowerCTPOP(Op, DAG);
2514 }
2515
2516 return SDOperand();
2517}
2518
2519//===----------------------------------------------------------------------===//
2520// Other Lowering Code
2521//===----------------------------------------------------------------------===//
2522
2523MachineBasicBlock *
2524SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2525 MachineBasicBlock *BB)
2526{
2527 return BB;
2528}
2529
2530//===----------------------------------------------------------------------===//
2531// Target Optimization Hooks
2532//===----------------------------------------------------------------------===//
2533
2534SDOperand
2535SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2536{
2537#if 0
2538 TargetMachine &TM = getTargetMachine();
2539 SelectionDAG &DAG = DCI.DAG;
2540#endif
2541 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2542
2543 switch (N->getOpcode()) {
2544 default: break;
2545
2546 // Look for obvious optimizations for shift left:
2547 // a) Replace 0 << V with 0
2548 // b) Replace V << 0 with V
2549 //
2550 // N.B: llvm will generate an undef node if the shift amount is greater than
2551 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2552 case SPU::SHLIr32:
2553 case SPU::SHLHIr16:
2554 case SPU::SHLQBIIvec:
2555 case SPU::ROTHIr16:
2556 case SPU::ROTHIr16_i32:
2557 case SPU::ROTIr32:
2558 case SPU::ROTIr32_i16:
2559 case SPU::ROTQBYIvec:
2560 case SPU::ROTQBYBIvec:
2561 case SPU::ROTQBIIvec:
2562 case SPU::ROTHMIr16:
2563 case SPU::ROTMIr32:
2564 case SPU::ROTQMBYIvec: {
2565 if (N0.getOpcode() == ISD::Constant) {
2566 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2567 if (C->getValue() == 0) // 0 << V -> 0.
2568 return N0;
2569 }
2570 }
2571 SDOperand N1 = N->getOperand(1);
2572 if (N1.getOpcode() == ISD::Constant) {
2573 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2574 if (C->getValue() == 0) // V << 0 -> V
2575 return N1;
2576 }
2577 }
2578 break;
2579 }
2580 }
2581
2582 return SDOperand();
2583}
2584
2585//===----------------------------------------------------------------------===//
2586// Inline Assembly Support
2587//===----------------------------------------------------------------------===//
2588
2589/// getConstraintType - Given a constraint letter, return the type of
2590/// constraint it is for this target.
2591SPUTargetLowering::ConstraintType
2592SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2593 if (ConstraintLetter.size() == 1) {
2594 switch (ConstraintLetter[0]) {
2595 default: break;
2596 case 'b':
2597 case 'r':
2598 case 'f':
2599 case 'v':
2600 case 'y':
2601 return C_RegisterClass;
2602 }
2603 }
2604 return TargetLowering::getConstraintType(ConstraintLetter);
2605}
2606
2607std::pair<unsigned, const TargetRegisterClass*>
2608SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2609 MVT::ValueType VT) const
2610{
2611 if (Constraint.size() == 1) {
2612 // GCC RS6000 Constraint Letters
2613 switch (Constraint[0]) {
2614 case 'b': // R1-R31
2615 case 'r': // R0-R31
2616 if (VT == MVT::i64)
2617 return std::make_pair(0U, SPU::R64CRegisterClass);
2618 return std::make_pair(0U, SPU::R32CRegisterClass);
2619 case 'f':
2620 if (VT == MVT::f32)
2621 return std::make_pair(0U, SPU::R32FPRegisterClass);
2622 else if (VT == MVT::f64)
2623 return std::make_pair(0U, SPU::R64FPRegisterClass);
2624 break;
2625 case 'v':
2626 return std::make_pair(0U, SPU::GPRCRegisterClass);
2627 }
2628 }
2629
2630 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2631}
2632
2633void
2634SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2635 uint64_t Mask,
2636 uint64_t &KnownZero,
2637 uint64_t &KnownOne,
2638 const SelectionDAG &DAG,
2639 unsigned Depth ) const {
2640 KnownZero = 0;
2641 KnownOne = 0;
2642}
2643
2644// LowerAsmOperandForConstraint
2645void
2646SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2647 char ConstraintLetter,
2648 std::vector<SDOperand> &Ops,
2649 SelectionDAG &DAG) {
2650 // Default, for the time being, to the base class handler
2651 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2652}
2653
2654/// isLegalAddressImmediate - Return true if the integer value can be used
2655/// as the offset of the target addressing mode.
2656bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2657 // SPU's addresses are 256K:
2658 return (V > -(1 << 18) && V < (1 << 18) - 1);
2659}
2660
2661bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2662 return false;
2663}