blob: 59e2068a7a0a168a171cffd7123a75ab72b18c0c [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner4ee451d2007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
Chris Lattner84bc5422007-12-31 04:13:23 +000023#include "llvm/CodeGen/MachineRegisterInfo.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000024#include "llvm/CodeGen/SelectionDAG.h"
Scott Michel266bc8f2007-12-04 22:23:35 +000025#include "llvm/Constants.h"
26#include "llvm/Function.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetOptions.h"
31
32#include <map>
33
34using namespace llvm;
35
36// Used in getTargetNodeName() below
37namespace {
38 std::map<unsigned, const char *> node_names;
39
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
44 };
45
46 const valtype_map_s valtype_map[] = {
47 { MVT::i1, 3 },
48 { MVT::i8, 3 },
49 { MVT::i16, 2 },
50 { MVT::i32, 0 },
51 { MVT::f32, 0 },
52 { MVT::i64, 0 },
53 { MVT::f64, 0 },
54 { MVT::i128, 0 }
55 };
56
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
58
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
61
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
65 break;
66 }
67 }
68
69#ifndef NDEBUG
70 if (retval == 0) {
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
73 << "\n";
74 abort();
75 }
76#endif
77
78 return retval;
79 }
80
81 //! Predicate that returns true if operand is a memory target
82 /*!
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
Scott Michel9de5d0d2008-01-11 02:53:15 +000085 address, external symbol, constant pool) or an A-form
Scott Michel266bc8f2007-12-04 22:23:35 +000086 address.
87 */
88 bool isMemoryOperand(const SDOperand &Op)
89 {
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
Scott Michel9de5d0d2008-01-11 02:53:15 +000093 /* || Opc == ISD::FrameIndex */
Scott Michel266bc8f2007-12-04 22:23:35 +000094 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
Scott Michel9de5d0d2008-01-11 02:53:15 +000099 /* || Opc == ISD::TargetFrameIndex */
Scott Michel266bc8f2007-12-04 22:23:35 +0000100 || Opc == ISD::TargetJumpTable
101 || Opc == ISD::TargetConstantPool
102 || Opc == ISD::TargetExternalSymbol
Scott Michel9de5d0d2008-01-11 02:53:15 +0000103 || Opc == SPUISD::AFormAddr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000104 }
105}
106
107SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
108 : TargetLowering(TM),
109 SPUTM(TM)
110{
111 // Fold away setcc operations if possible.
112 setPow2DivIsCheap();
113
114 // Use _setjmp/_longjmp instead of setjmp/longjmp.
115 setUseUnderscoreSetJmp(true);
116 setUseUnderscoreLongJmp(true);
117
118 // Set up the SPU's register classes:
119 // NOTE: i8 register class is not registered because we cannot determine when
120 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000121 // NOTE: Ignore the previous note. For now. :-)
122 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
123 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
124 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
125 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
126 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
127 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000128 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
129
130 // SPU has no sign or zero extended loads for i1, i8, i16:
131 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
132 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
133 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
134 setStoreXAction(MVT::i1, Custom);
135
136 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
139 setStoreXAction(MVT::i8, Custom);
140
141 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
144
145 // SPU constant load actions are custom lowered:
146 setOperationAction(ISD::Constant, MVT::i64, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
149
150 // SPU's loads and stores have to be custom lowered:
151 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
152 ++sctype) {
153 setOperationAction(ISD::LOAD, sctype, Custom);
154 setOperationAction(ISD::STORE, sctype, Custom);
155 }
156
157 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
158 // into BR_CCs. BR_CC instructions are custom selected in
159 // SPUDAGToDAGISel.
160 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
161
162 // Expand the jumptable branches
163 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
165 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
166
167 // SPU has no intrinsics for these particular operations:
168 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
169 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
170 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
171
172 // PowerPC has no SREM/UREM instructions
173 setOperationAction(ISD::SREM, MVT::i32, Expand);
174 setOperationAction(ISD::UREM, MVT::i32, Expand);
175 setOperationAction(ISD::SREM, MVT::i64, Expand);
176 setOperationAction(ISD::UREM, MVT::i64, Expand);
177
178 // We don't support sin/cos/sqrt/fmod
179 setOperationAction(ISD::FSIN , MVT::f64, Expand);
180 setOperationAction(ISD::FCOS , MVT::f64, Expand);
181 setOperationAction(ISD::FREM , MVT::f64, Expand);
182 setOperationAction(ISD::FSIN , MVT::f32, Expand);
183 setOperationAction(ISD::FCOS , MVT::f32, Expand);
184 setOperationAction(ISD::FREM , MVT::f32, Expand);
185
186 // If we're enabling GP optimizations, use hardware square root
187 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
188 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
189
190 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
192
193 // SPU can do rotate right and left, so legalize it... but customize for i8
194 // because instructions don't exist.
195 setOperationAction(ISD::ROTR, MVT::i32, Legal);
196 setOperationAction(ISD::ROTR, MVT::i16, Legal);
197 setOperationAction(ISD::ROTR, MVT::i8, Custom);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
201 // SPU has no native version of shift left/right for i8
202 setOperationAction(ISD::SHL, MVT::i8, Custom);
203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204 setOperationAction(ISD::SRA, MVT::i8, Custom);
205
206 // Custom lower i32 multiplications
207 setOperationAction(ISD::MUL, MVT::i32, Custom);
208
209 // Need to custom handle (some) common i8 math ops
210 setOperationAction(ISD::SUB, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i8, Custom);
212
213 // SPU does not have BSWAP. It does have i32 support CTLZ.
214 // CTPOP has to be custom lowered.
215 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
216 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
217
218 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
222
223 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
224 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
225
226 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
227
228 // SPU does not have select or setcc
229 setOperationAction(ISD::SELECT, MVT::i1, Expand);
230 setOperationAction(ISD::SELECT, MVT::i8, Expand);
231 setOperationAction(ISD::SELECT, MVT::i16, Expand);
232 setOperationAction(ISD::SELECT, MVT::i32, Expand);
233 setOperationAction(ISD::SELECT, MVT::i64, Expand);
234 setOperationAction(ISD::SELECT, MVT::f32, Expand);
235 setOperationAction(ISD::SELECT, MVT::f64, Expand);
236
237 setOperationAction(ISD::SETCC, MVT::i1, Expand);
238 setOperationAction(ISD::SETCC, MVT::i8, Expand);
239 setOperationAction(ISD::SETCC, MVT::i16, Expand);
240 setOperationAction(ISD::SETCC, MVT::i32, Expand);
241 setOperationAction(ISD::SETCC, MVT::i64, Expand);
242 setOperationAction(ISD::SETCC, MVT::f32, Expand);
243 setOperationAction(ISD::SETCC, MVT::f64, Expand);
244
245 // SPU has a legal FP -> signed INT instruction
246 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
250
251 // FDIV on SPU requires custom lowering
252 setOperationAction(ISD::FDIV, MVT::f32, Custom);
253 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
254
255 // SPU has [U|S]INT_TO_FP
256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
264
Scott Michel86c041f2007-12-20 00:44:13 +0000265 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel266bc8f2007-12-04 22:23:35 +0000269
270 // We cannot sextinreg(i1). Expand to shifts.
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
272
273 // Support label based line numbers.
274 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
275 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
276
277 // We want to legalize GlobalAddress and ConstantPool nodes into the
278 // appropriate instructions to materialize the address.
279 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
282 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
286 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
287
288 // RET must be custom lowered, to meet ABI requirements
289 setOperationAction(ISD::RET, MVT::Other, Custom);
290
291 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
292 setOperationAction(ISD::VASTART , MVT::Other, Custom);
293
294 // Use the default implementation.
295 setOperationAction(ISD::VAARG , MVT::Other, Expand);
296 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
297 setOperationAction(ISD::VAEND , MVT::Other, Expand);
298 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
299 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
302
303 // Cell SPU has instructions for converting between i64 and fp.
304 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
306
307 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
308 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
309
310 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
311 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
312
313 // First set operation action for all vector types to expand. Then we
314 // will selectively turn on ones that can be effectively codegen'd.
315 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
321
322 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
323 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
324 // add/sub are legal for all supported vector VT's.
325 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
326 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
327 // mul has to be custom lowered.
328 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
329
330 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
336
337 // These operations need to be expanded:
338 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
343
344 // Custom lower build_vector, constant pool spills, insert and
345 // extract vector elements:
346 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
352 }
353
354 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
355 setOperationAction(ISD::AND, MVT::v16i8, Custom);
356 setOperationAction(ISD::OR, MVT::v16i8, Custom);
357 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000359
Scott Michel266bc8f2007-12-04 22:23:35 +0000360 setSetCCResultType(MVT::i32);
361 setShiftAmountType(MVT::i32);
362 setSetCCResultContents(ZeroOrOneSetCCResult);
363
364 setStackPointerRegisterToSaveRestore(SPU::R1);
365
366 // We have target-specific dag combine patterns for the following nodes:
367 // e.g., setTargetDAGCombine(ISD::SUB);
368
369 computeRegisterProperties();
370}
371
372const char *
373SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
374{
375 if (node_names.empty()) {
376 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
377 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
378 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
379 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
Scott Michel9de5d0d2008-01-11 02:53:15 +0000380 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
Scott Michel266bc8f2007-12-04 22:23:35 +0000381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 }
418
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420
421 return ((i != node_names.end()) ? i->second : 0);
422}
423
424//===----------------------------------------------------------------------===//
425// Calling convention code:
426//===----------------------------------------------------------------------===//
427
428#include "SPUGenCallingConv.inc"
429
430//===----------------------------------------------------------------------===//
431// LowerOperation implementation
432//===----------------------------------------------------------------------===//
433
Scott Michel9de5d0d2008-01-11 02:53:15 +0000434/// Aligned load common code for CellSPU
435/*!
436 \param[in] Op The SelectionDAG load or store operand
437 \param[in] DAG The selection DAG
438 \param[in] ST CellSPU subtarget information structure
439 \param[in,out] alignment Caller initializes this to the load or store node's
440 value from getAlignment(), may be updated while generating the aligned load
441 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
442 offset (divisible by 16, modulo 16 == 0)
443 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
444 offset of the preferred slot (modulo 16 != 0)
445 \param[in,out] VT Caller initializes this value type to the the load or store
446 node's loaded or stored value type; may be updated if an i1-extended load or
447 store.
448 \param[out] was16aligned true if the base pointer had 16-byte alignment,
449 otherwise false. Can help to determine if the chunk needs to be rotated.
450
451 Both load and store lowering load a block of data aligned on a 16-byte
452 boundary. This is the common aligned load code shared between both.
453 */
454static SDOperand
455AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
456 LSBaseSDNode *LSN,
457 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
458 unsigned &VT, bool &was16aligned)
459{
460 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
461 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
462 SDOperand basePtr = LSN->getBasePtr();
463 SDOperand chain = LSN->getChain();
464
465 if (basePtr.getOpcode() == ISD::ADD) {
466 SDOperand Op1 = basePtr.Val->getOperand(1);
467
468 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
469 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
470
471 alignOffs = (int) CN->getValue();
472 prefSlotOffs = (int) (alignOffs & 0xf);
473
474 // Adjust the rotation amount to ensure that the final result ends up in
475 // the preferred slot:
476 prefSlotOffs -= vtm->prefslot_byte;
477 basePtr = basePtr.getOperand(0);
478
479 // Modify alignment, since the ADD is likely from getElementPtr:
480 switch (basePtr.getOpcode()) {
481 case ISD::GlobalAddress:
482 case ISD::TargetGlobalAddress: {
483 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
484 const GlobalValue *GV = GN->getGlobal();
485 alignment = GV->getAlignment();
486 break;
487 }
488 }
489 } else {
490 alignOffs = 0;
491 prefSlotOffs = -vtm->prefslot_byte;
492 }
493 } else {
494 alignOffs = 0;
495 prefSlotOffs = -vtm->prefslot_byte;
496 }
497
498 if (alignment == 16) {
499 // Realign the base pointer as a D-Form address:
500 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
501 if (isMemoryOperand(basePtr)) {
502 SDOperand Zero = DAG.getConstant(0, PtrVT);
503 unsigned Opc = (!ST->usingLargeMem()
504 ? SPUISD::AFormAddr
505 : SPUISD::XFormAddr);
506 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
507 }
508 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
509 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
510 }
511
512 // Emit the vector load:
513 was16aligned = true;
514 return DAG.getLoad(MVT::v16i8, chain, basePtr,
515 LSN->getSrcValue(), LSN->getSrcValueOffset(),
516 LSN->isVolatile(), 16);
517 }
518
519 // Unaligned load or we're using the "large memory" model, which means that
520 // we have to be very pessimistic:
521 if (isMemoryOperand(basePtr)) {
522 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
523 }
524
525 // Add the offset
526 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
527 was16aligned = false;
528 return DAG.getLoad(MVT::v16i8, chain, basePtr,
529 LSN->getSrcValue(), LSN->getSrcValueOffset(),
530 LSN->isVolatile(), 16);
531}
532
Scott Michel266bc8f2007-12-04 22:23:35 +0000533/// Custom lower loads for CellSPU
534/*!
535 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
536 within a 16-byte block, we have to rotate to extract the requested element.
537 */
538static SDOperand
539LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
540 LoadSDNode *LN = cast<LoadSDNode>(Op);
Scott Michel266bc8f2007-12-04 22:23:35 +0000541 SDOperand the_chain = LN->getChain();
542 MVT::ValueType VT = LN->getLoadedVT();
543 MVT::ValueType OpVT = Op.Val->getValueType(0);
Scott Michel266bc8f2007-12-04 22:23:35 +0000544 ISD::LoadExtType ExtType = LN->getExtensionType();
545 unsigned alignment = LN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000546 SDOperand Ops[8];
547
548 // For an extending load of an i1 variable, just call it i8 (or whatever we
549 // were passed) and make it zero-extended:
550 if (VT == MVT::i1) {
551 VT = OpVT;
552 ExtType = ISD::ZEXTLOAD;
553 }
554
555 switch (LN->getAddressingMode()) {
556 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000557 int offset, rotamt;
558 bool was16aligned;
559 SDOperand result =
560 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000561
Scott Michel9de5d0d2008-01-11 02:53:15 +0000562 if (result.Val == 0)
Scott Michel266bc8f2007-12-04 22:23:35 +0000563 return result;
Scott Michel9de5d0d2008-01-11 02:53:15 +0000564
565 the_chain = result.getValue(1);
566 // Rotate the chunk if necessary
567 if (rotamt < 0)
568 rotamt += 16;
569 if (rotamt != 0) {
570 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
571
572 if (was16aligned) {
573 Ops[0] = the_chain;
574 Ops[1] = result;
575 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
576 } else {
577 LoadSDNode *LN1 = cast<LoadSDNode>(result);
578 Ops[0] = the_chain;
579 Ops[1] = result;
580 Ops[2] = LN1->getBasePtr();
581 }
582
583 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
584 the_chain = result.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000585 }
Scott Michel9de5d0d2008-01-11 02:53:15 +0000586
587 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
588 SDVTList scalarvts;
589 MVT::ValueType vecVT = MVT::v16i8;
590
591 // Convert the loaded v16i8 vector to the appropriate vector type
592 // specified by the operand:
593 if (OpVT == VT) {
594 if (VT != MVT::i1)
595 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
596 } else
597 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
598
599 Ops[0] = the_chain;
600 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
601 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
602 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
603 the_chain = result.getValue(1);
604 } else {
605 // Handle the sign and zero-extending loads for i1 and i8:
606 unsigned NewOpC;
607
608 if (ExtType == ISD::SEXTLOAD) {
609 NewOpC = (OpVT == MVT::i1
610 ? SPUISD::EXTRACT_I1_SEXT
611 : SPUISD::EXTRACT_I8_SEXT);
612 } else {
613 assert(ExtType == ISD::ZEXTLOAD);
614 NewOpC = (OpVT == MVT::i1
615 ? SPUISD::EXTRACT_I1_ZEXT
616 : SPUISD::EXTRACT_I8_ZEXT);
617 }
618
619 result = DAG.getNode(NewOpC, OpVT, result);
620 }
621
622 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
623 SDOperand retops[2] = { result, the_chain };
624
625 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
626 return result;
Scott Michel266bc8f2007-12-04 22:23:35 +0000627 }
628 case ISD::PRE_INC:
629 case ISD::PRE_DEC:
630 case ISD::POST_INC:
631 case ISD::POST_DEC:
632 case ISD::LAST_INDEXED_MODE:
633 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
634 "UNINDEXED\n";
635 cerr << (unsigned) LN->getAddressingMode() << "\n";
636 abort();
637 /*NOTREACHED*/
638 }
639
640 return SDOperand();
641}
642
643/// Custom lower stores for CellSPU
644/*!
645 All CellSPU stores are aligned to 16-byte boundaries, so for elements
646 within a 16-byte block, we have to generate a shuffle to insert the
647 requested element into its place, then store the resulting block.
648 */
649static SDOperand
650LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
651 StoreSDNode *SN = cast<StoreSDNode>(Op);
652 SDOperand Value = SN->getValue();
653 MVT::ValueType VT = Value.getValueType();
654 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
655 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000656 unsigned alignment = SN->getAlignment();
Scott Michel266bc8f2007-12-04 22:23:35 +0000657
658 switch (SN->getAddressingMode()) {
659 case ISD::UNINDEXED: {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000660 int chunk_offset, slot_offset;
661 bool was16aligned;
Scott Michel266bc8f2007-12-04 22:23:35 +0000662
663 // The vector type we really want to load from the 16-byte chunk, except
664 // in the case of MVT::i1, which has to be v16i8.
Scott Michel9de5d0d2008-01-11 02:53:15 +0000665 unsigned vecVT, stVecVT = MVT::v16i8;
666
Scott Michel266bc8f2007-12-04 22:23:35 +0000667 if (StVT != MVT::i1)
668 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
Scott Michel266bc8f2007-12-04 22:23:35 +0000669 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
670
Scott Michel9de5d0d2008-01-11 02:53:15 +0000671 SDOperand alignLoadVec =
672 AlignedLoad(Op, DAG, ST, SN, alignment,
673 chunk_offset, slot_offset, VT, was16aligned);
Scott Michel266bc8f2007-12-04 22:23:35 +0000674
Scott Michel9de5d0d2008-01-11 02:53:15 +0000675 if (alignLoadVec.Val == 0)
676 return alignLoadVec;
Scott Michel266bc8f2007-12-04 22:23:35 +0000677
Scott Michel9de5d0d2008-01-11 02:53:15 +0000678 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
679 SDOperand basePtr = LN->getBasePtr();
680 SDOperand the_chain = alignLoadVec.getValue(1);
Scott Michel266bc8f2007-12-04 22:23:35 +0000681 SDOperand theValue = SN->getValue();
682 SDOperand result;
683
684 if (StVT != VT
685 && (theValue.getOpcode() == ISD::AssertZext
686 || theValue.getOpcode() == ISD::AssertSext)) {
687 // Drill down and get the value for zero- and sign-extended
688 // quantities
689 theValue = theValue.getOperand(0);
690 }
691
Scott Michel9de5d0d2008-01-11 02:53:15 +0000692 chunk_offset &= 0xf;
693 chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8);
Scott Michel266bc8f2007-12-04 22:23:35 +0000694
Scott Michel9de5d0d2008-01-11 02:53:15 +0000695 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
696 SDOperand insertEltPtr;
697 SDOperand insertEltOp;
698
699 // If the base pointer is already a D-form address, then just create
700 // a new D-form address with a slot offset and the orignal base pointer.
701 // Otherwise generate a D-form address with the slot offset relative
702 // to the stack pointer, which is always aligned.
703 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
704 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
705 basePtr.getOperand(0),
706 insertEltOffs);
707 } else {
708 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
709 DAG.getRegister(SPU::R1, PtrVT),
710 insertEltOffs);
711 }
712
713 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
Scott Michel266bc8f2007-12-04 22:23:35 +0000714 result = DAG.getNode(SPUISD::SHUFB, vecVT,
715 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
Scott Michel9de5d0d2008-01-11 02:53:15 +0000716 alignLoadVec,
Scott Michel266bc8f2007-12-04 22:23:35 +0000717 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
718
Scott Michel9de5d0d2008-01-11 02:53:15 +0000719 result = DAG.getStore(the_chain, result, basePtr,
Scott Michel266bc8f2007-12-04 22:23:35 +0000720 LN->getSrcValue(), LN->getSrcValueOffset(),
721 LN->isVolatile(), LN->getAlignment());
722
723 return result;
724 /*UNREACHED*/
725 }
726 case ISD::PRE_INC:
727 case ISD::PRE_DEC:
728 case ISD::POST_INC:
729 case ISD::POST_DEC:
730 case ISD::LAST_INDEXED_MODE:
731 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
732 "UNINDEXED\n";
733 cerr << (unsigned) SN->getAddressingMode() << "\n";
734 abort();
735 /*NOTREACHED*/
736 }
737
738 return SDOperand();
739}
740
741/// Generate the address of a constant pool entry.
742static SDOperand
743LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
744 MVT::ValueType PtrVT = Op.getValueType();
745 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
746 Constant *C = CP->getConstVal();
747 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
Scott Michel266bc8f2007-12-04 22:23:35 +0000748 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000749 const TargetMachine &TM = DAG.getTarget();
Scott Michel266bc8f2007-12-04 22:23:35 +0000750
751 if (TM.getRelocationModel() == Reloc::Static) {
752 if (!ST->usingLargeMem()) {
753 // Just return the SDOperand with the constant pool address in it.
754 return CPI;
755 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000756#if 1
Scott Michel266bc8f2007-12-04 22:23:35 +0000757 // Generate hi/lo address pair
758 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
759 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
760
761 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
Scott Michel9de5d0d2008-01-11 02:53:15 +0000762#else
763 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
764#endif
Scott Michel266bc8f2007-12-04 22:23:35 +0000765 }
766 }
767
768 assert(0 &&
769 "LowerConstantPool: Relocation model other than static not supported.");
770 return SDOperand();
771}
772
773static SDOperand
774LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
775 MVT::ValueType PtrVT = Op.getValueType();
776 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
777 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
778 SDOperand Zero = DAG.getConstant(0, PtrVT);
779 const TargetMachine &TM = DAG.getTarget();
780
781 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000782 return (!ST->usingLargeMem()
783 ? JTI
784 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000785 }
786
787 assert(0 &&
788 "LowerJumpTable: Relocation model other than static not supported.");
789 return SDOperand();
790}
791
792static SDOperand
793LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
794 MVT::ValueType PtrVT = Op.getValueType();
795 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
796 GlobalValue *GV = GSDN->getGlobal();
797 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
Scott Michel266bc8f2007-12-04 22:23:35 +0000798 const TargetMachine &TM = DAG.getTarget();
Scott Michel9de5d0d2008-01-11 02:53:15 +0000799 SDOperand Zero = DAG.getConstant(0, PtrVT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000800
801 if (TM.getRelocationModel() == Reloc::Static) {
Scott Michel9de5d0d2008-01-11 02:53:15 +0000802 return (!ST->usingLargeMem()
803 ? GA
804 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
Scott Michel266bc8f2007-12-04 22:23:35 +0000805 } else {
806 cerr << "LowerGlobalAddress: Relocation model other than static not "
807 << "supported.\n";
808 abort();
809 /*NOTREACHED*/
810 }
811
812 return SDOperand();
813}
814
815//! Custom lower i64 integer constants
816/*!
817 This code inserts all of the necessary juggling that needs to occur to load
818 a 64-bit constant into a register.
819 */
820static SDOperand
821LowerConstant(SDOperand Op, SelectionDAG &DAG) {
822 unsigned VT = Op.getValueType();
823 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
824
825 if (VT == MVT::i64) {
826 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
827 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
828 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
829
830 } else {
831 cerr << "LowerConstant: unhandled constant type "
832 << MVT::getValueTypeString(VT)
833 << "\n";
834 abort();
835 /*NOTREACHED*/
836 }
837
838 return SDOperand();
839}
840
841//! Custom lower single precision floating point constants
842/*!
843 "float" immediates can be lowered as if they were unsigned 32-bit integers.
844 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
845 target description.
846 */
847static SDOperand
848LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
849 unsigned VT = Op.getValueType();
850 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
851
852 assert((FP != 0) &&
853 "LowerConstantFP: Node is not ConstantFPSDNode");
854
Scott Michel266bc8f2007-12-04 22:23:35 +0000855 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000856 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000857 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000858 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000859 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000860 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000861 return DAG.getNode(ISD::BIT_CONVERT, VT,
862 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
863 }
864
865 return SDOperand();
866}
867
868static SDOperand
869LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
870{
871 MachineFunction &MF = DAG.getMachineFunction();
872 MachineFrameInfo *MFI = MF.getFrameInfo();
Chris Lattner84bc5422007-12-31 04:13:23 +0000873 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +0000874 SmallVector<SDOperand, 8> ArgValues;
875 SDOperand Root = Op.getOperand(0);
876 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
877
878 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
879 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
880
881 unsigned ArgOffset = SPUFrameInfo::minStackSize();
882 unsigned ArgRegIdx = 0;
883 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
884
885 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
886
887 // Add DAG nodes to load the arguments or copy them out of registers.
888 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
889 SDOperand ArgVal;
890 bool needsLoad = false;
891 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
892 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
893
894 switch (ObjectVT) {
895 default: {
896 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
897 << MVT::getValueTypeString(ObjectVT)
898 << "\n";
899 abort();
900 }
901 case MVT::i8:
902 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000903 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
904 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000905 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
906 ++ArgRegIdx;
907 } else {
908 needsLoad = true;
909 }
910 break;
911 case MVT::i16:
912 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000913 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
914 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000915 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
916 ++ArgRegIdx;
917 } else {
918 needsLoad = true;
919 }
920 break;
921 case MVT::i32:
922 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000923 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
924 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000925 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
926 ++ArgRegIdx;
927 } else {
928 needsLoad = true;
929 }
930 break;
931 case MVT::i64:
932 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000933 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
934 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000935 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
936 ++ArgRegIdx;
937 } else {
938 needsLoad = true;
939 }
940 break;
941 case MVT::f32:
942 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000943 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
944 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000945 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
946 ++ArgRegIdx;
947 } else {
948 needsLoad = true;
949 }
950 break;
951 case MVT::f64:
952 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000953 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
954 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000955 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
956 ++ArgRegIdx;
957 } else {
958 needsLoad = true;
959 }
960 break;
961 case MVT::v2f64:
962 case MVT::v4f32:
963 case MVT::v4i32:
964 case MVT::v8i16:
965 case MVT::v16i8:
966 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Chris Lattner84bc5422007-12-31 04:13:23 +0000967 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
968 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +0000969 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
970 ++ArgRegIdx;
971 } else {
972 needsLoad = true;
973 }
974 break;
975 }
976
977 // We need to load the argument to a virtual register if we determined above
978 // that we ran out of physical registers of the appropriate type
979 if (needsLoad) {
980 // If the argument is actually used, emit a load from the right stack
981 // slot.
982 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
983 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
984 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
985 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
986 } else {
987 // Don't emit a dead load.
988 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
989 }
990
991 ArgOffset += StackSlotSize;
992 }
993
994 ArgValues.push_back(ArgVal);
995 }
996
997 // If the function takes variable number of arguments, make a frame index for
998 // the start of the first vararg value... for expansion of llvm.va_start.
999 if (isVarArg) {
1000 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1001 ArgOffset);
1002 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1003 // If this function is vararg, store any remaining integer argument regs to
1004 // their spots on the stack so that they may be loaded by deferencing the
1005 // result of va_next.
1006 SmallVector<SDOperand, 8> MemOps;
1007 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
Chris Lattner84bc5422007-12-31 04:13:23 +00001008 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1009 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
Scott Michel266bc8f2007-12-04 22:23:35 +00001010 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1011 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1012 MemOps.push_back(Store);
1013 // Increment the address by four for the next argument to store
1014 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1015 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1016 }
1017 if (!MemOps.empty())
1018 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1019 }
1020
1021 ArgValues.push_back(Root);
1022
1023 // Return the new list of results.
1024 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1025 Op.Val->value_end());
1026 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1027}
1028
1029/// isLSAAddress - Return the immediate to use if the specified
1030/// value is representable as a LSA address.
1031static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1032 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1033 if (!C) return 0;
1034
1035 int Addr = C->getValue();
1036 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1037 (Addr << 14 >> 14) != Addr)
1038 return 0; // Top 14 bits have to be sext of immediate.
1039
1040 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1041}
1042
1043static
1044SDOperand
Scott Michel9de5d0d2008-01-11 02:53:15 +00001045LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001046 SDOperand Chain = Op.getOperand(0);
1047#if 0
1048 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1049 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1050#endif
1051 SDOperand Callee = Op.getOperand(4);
1052 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1053 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1054 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1055 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1056
1057 // Handy pointer type
1058 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1059
1060 // Accumulate how many bytes are to be pushed on the stack, including the
1061 // linkage area, and parameter passing area. According to the SPU ABI,
1062 // we minimally need space for [LR] and [SP]
1063 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1064
1065 // Set up a copy of the stack pointer for use loading and storing any
1066 // arguments that may not fit in the registers available for argument
1067 // passing.
1068 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1069
1070 // Figure out which arguments are going to go in registers, and which in
1071 // memory.
1072 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1073 unsigned ArgRegIdx = 0;
1074
1075 // Keep track of registers passing arguments
1076 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1077 // And the arguments passed on the stack
1078 SmallVector<SDOperand, 8> MemOpChains;
1079
1080 for (unsigned i = 0; i != NumOps; ++i) {
1081 SDOperand Arg = Op.getOperand(5+2*i);
1082
1083 // PtrOff will be used to store the current argument to the stack if a
1084 // register cannot be found for it.
1085 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1086 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1087
1088 switch (Arg.getValueType()) {
1089 default: assert(0 && "Unexpected ValueType for argument!");
1090 case MVT::i32:
1091 case MVT::i64:
1092 case MVT::i128:
1093 if (ArgRegIdx != NumArgRegs) {
1094 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1095 } else {
1096 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1097 ArgOffset += StackSlotSize;
1098 }
1099 break;
1100 case MVT::f32:
1101 case MVT::f64:
1102 if (ArgRegIdx != NumArgRegs) {
1103 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1104 } else {
1105 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1106 ArgOffset += StackSlotSize;
1107 }
1108 break;
1109 case MVT::v4f32:
1110 case MVT::v4i32:
1111 case MVT::v8i16:
1112 case MVT::v16i8:
1113 if (ArgRegIdx != NumArgRegs) {
1114 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1115 } else {
1116 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1117 ArgOffset += StackSlotSize;
1118 }
1119 break;
1120 }
1121 }
1122
1123 // Update number of stack bytes actually used, insert a call sequence start
1124 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1125 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1126
1127 if (!MemOpChains.empty()) {
1128 // Adjust the stack pointer for the stack arguments.
1129 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1130 &MemOpChains[0], MemOpChains.size());
1131 }
1132
1133 // Build a sequence of copy-to-reg nodes chained together with token chain
1134 // and flag operands which copy the outgoing args into the appropriate regs.
1135 SDOperand InFlag;
1136 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1137 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1138 InFlag);
1139 InFlag = Chain.getValue(1);
1140 }
1141
1142 std::vector<MVT::ValueType> NodeTys;
1143 NodeTys.push_back(MVT::Other); // Returns a chain
1144 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1145
1146 SmallVector<SDOperand, 8> Ops;
1147 unsigned CallOpc = SPUISD::CALL;
1148
1149 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1150 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1151 // node so that legalize doesn't hack it.
1152 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1153 GlobalValue *GV = G->getGlobal();
1154 unsigned CalleeVT = Callee.getValueType();
Scott Michel9de5d0d2008-01-11 02:53:15 +00001155 SDOperand Zero = DAG.getConstant(0, PtrVT);
1156 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
Scott Michel266bc8f2007-12-04 22:23:35 +00001157
Scott Michel9de5d0d2008-01-11 02:53:15 +00001158 if (!ST->usingLargeMem()) {
1159 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1160 // style calls, otherwise, external symbols are BRASL calls. This assumes
1161 // that declared/defined symbols are in the same compilation unit and can
1162 // be reached through PC-relative jumps.
1163 //
1164 // NOTE:
1165 // This may be an unsafe assumption for JIT and really large compilation
1166 // units.
1167 if (GV->isDeclaration()) {
1168 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1169 } else {
1170 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1171 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001172 } else {
Scott Michel9de5d0d2008-01-11 02:53:15 +00001173 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1174 // address pairs:
1175 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
Scott Michel266bc8f2007-12-04 22:23:35 +00001176 }
1177 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1178 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
Scott Michel9de5d0d2008-01-11 02:53:15 +00001179 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001180 // If this is an absolute destination address that appears to be a legal
1181 // local store address, use the munged value.
1182 Callee = SDOperand(Dest, 0);
Scott Michel9de5d0d2008-01-11 02:53:15 +00001183 }
Scott Michel266bc8f2007-12-04 22:23:35 +00001184
1185 Ops.push_back(Chain);
1186 Ops.push_back(Callee);
1187
1188 // Add argument registers to the end of the list so that they are known live
1189 // into the call.
1190 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1191 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1192 RegsToPass[i].second.getValueType()));
1193
1194 if (InFlag.Val)
1195 Ops.push_back(InFlag);
1196 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1197 InFlag = Chain.getValue(1);
1198
1199 SDOperand ResultVals[3];
1200 unsigned NumResults = 0;
1201 NodeTys.clear();
1202
1203 // If the call has results, copy the values out of the ret val registers.
1204 switch (Op.Val->getValueType(0)) {
1205 default: assert(0 && "Unexpected ret value!");
1206 case MVT::Other: break;
1207 case MVT::i32:
1208 if (Op.Val->getValueType(1) == MVT::i32) {
1209 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1210 ResultVals[0] = Chain.getValue(0);
1211 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1212 Chain.getValue(2)).getValue(1);
1213 ResultVals[1] = Chain.getValue(0);
1214 NumResults = 2;
1215 NodeTys.push_back(MVT::i32);
1216 } else {
1217 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1218 ResultVals[0] = Chain.getValue(0);
1219 NumResults = 1;
1220 }
1221 NodeTys.push_back(MVT::i32);
1222 break;
1223 case MVT::i64:
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1226 NumResults = 1;
1227 NodeTys.push_back(MVT::i64);
1228 break;
1229 case MVT::f32:
1230 case MVT::f64:
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1232 InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1234 NumResults = 1;
1235 NodeTys.push_back(Op.Val->getValueType(0));
1236 break;
1237 case MVT::v2f64:
1238 case MVT::v4f32:
1239 case MVT::v4i32:
1240 case MVT::v8i16:
1241 case MVT::v16i8:
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1243 InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1245 NumResults = 1;
1246 NodeTys.push_back(Op.Val->getValueType(0));
1247 break;
1248 }
1249
1250 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1251 DAG.getConstant(NumStackBytes, PtrVT));
1252 NodeTys.push_back(MVT::Other);
1253
1254 // If the function returns void, just return the chain.
1255 if (NumResults == 0)
1256 return Chain;
1257
1258 // Otherwise, merge everything together with a MERGE_VALUES node.
1259 ResultVals[NumResults++] = Chain;
1260 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1261 ResultVals, NumResults);
1262 return Res.getValue(Op.ResNo);
1263}
1264
1265static SDOperand
1266LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1267 SmallVector<CCValAssign, 16> RVLocs;
1268 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1269 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1270 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1271 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1272
1273 // If this is the first return lowered for this function, add the regs to the
1274 // liveout set for the function.
Chris Lattner84bc5422007-12-31 04:13:23 +00001275 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Scott Michel266bc8f2007-12-04 22:23:35 +00001276 for (unsigned i = 0; i != RVLocs.size(); ++i)
Chris Lattner84bc5422007-12-31 04:13:23 +00001277 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Scott Michel266bc8f2007-12-04 22:23:35 +00001278 }
1279
1280 SDOperand Chain = Op.getOperand(0);
1281 SDOperand Flag;
1282
1283 // Copy the result values into the output registers.
1284 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1285 CCValAssign &VA = RVLocs[i];
1286 assert(VA.isRegLoc() && "Can only return in registers!");
1287 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1288 Flag = Chain.getValue(1);
1289 }
1290
1291 if (Flag.Val)
1292 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1293 else
1294 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1295}
1296
1297
1298//===----------------------------------------------------------------------===//
1299// Vector related lowering:
1300//===----------------------------------------------------------------------===//
1301
1302static ConstantSDNode *
1303getVecImm(SDNode *N) {
1304 SDOperand OpVal(0, 0);
1305
1306 // Check to see if this buildvec has a single non-undef value in its elements.
1307 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1308 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1309 if (OpVal.Val == 0)
1310 OpVal = N->getOperand(i);
1311 else if (OpVal != N->getOperand(i))
1312 return 0;
1313 }
1314
1315 if (OpVal.Val != 0) {
1316 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1317 return CN;
1318 }
1319 }
1320
1321 return 0; // All UNDEF: use implicit def.; not Constant node
1322}
1323
1324/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1325/// and the value fits into an unsigned 18-bit constant, and if so, return the
1326/// constant
1327SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1328 MVT::ValueType ValueType) {
1329 if (ConstantSDNode *CN = getVecImm(N)) {
1330 uint64_t Value = CN->getValue();
1331 if (Value <= 0x3ffff)
1332 return DAG.getConstant(Value, ValueType);
1333 }
1334
1335 return SDOperand();
1336}
1337
1338/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1339/// and the value fits into a signed 16-bit constant, and if so, return the
1340/// constant
1341SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1342 MVT::ValueType ValueType) {
1343 if (ConstantSDNode *CN = getVecImm(N)) {
1344 if (ValueType == MVT::i32) {
1345 int Value = (int) CN->getValue();
1346 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1347
1348 if (Value == SExtValue)
1349 return DAG.getConstant(Value, ValueType);
1350 } else if (ValueType == MVT::i16) {
1351 short Value = (short) CN->getValue();
1352 int SExtValue = ((int) Value << 16) >> 16;
1353
1354 if (Value == (short) SExtValue)
1355 return DAG.getConstant(Value, ValueType);
1356 } else if (ValueType == MVT::i64) {
1357 int64_t Value = CN->getValue();
1358 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1359
1360 if (Value == SExtValue)
1361 return DAG.getConstant(Value, ValueType);
1362 }
1363 }
1364
1365 return SDOperand();
1366}
1367
1368/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1369/// and the value fits into a signed 10-bit constant, and if so, return the
1370/// constant
1371SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1372 MVT::ValueType ValueType) {
1373 if (ConstantSDNode *CN = getVecImm(N)) {
1374 int Value = (int) CN->getValue();
1375 if ((ValueType == MVT::i32 && isS10Constant(Value))
1376 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1377 return DAG.getConstant(Value, ValueType);
1378 }
1379
1380 return SDOperand();
1381}
1382
1383/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1384/// and the value fits into a signed 8-bit constant, and if so, return the
1385/// constant.
1386///
1387/// @note: The incoming vector is v16i8 because that's the only way we can load
1388/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1389/// same value.
1390SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1391 MVT::ValueType ValueType) {
1392 if (ConstantSDNode *CN = getVecImm(N)) {
1393 int Value = (int) CN->getValue();
1394 if (ValueType == MVT::i16
1395 && Value <= 0xffff /* truncated from uint64_t */
1396 && ((short) Value >> 8) == ((short) Value & 0xff))
1397 return DAG.getConstant(Value & 0xff, ValueType);
1398 else if (ValueType == MVT::i8
1399 && (Value & 0xff) == Value)
1400 return DAG.getConstant(Value, ValueType);
1401 }
1402
1403 return SDOperand();
1404}
1405
1406/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1407/// and the value fits into a signed 16-bit constant, and if so, return the
1408/// constant
1409SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1410 MVT::ValueType ValueType) {
1411 if (ConstantSDNode *CN = getVecImm(N)) {
1412 uint64_t Value = CN->getValue();
1413 if ((ValueType == MVT::i32
1414 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1415 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1416 return DAG.getConstant(Value >> 16, ValueType);
1417 }
1418
1419 return SDOperand();
1420}
1421
1422/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1423SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1424 if (ConstantSDNode *CN = getVecImm(N)) {
1425 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1426 }
1427
1428 return SDOperand();
1429}
1430
1431/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1432SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1435 }
1436
1437 return SDOperand();
1438}
1439
1440// If this is a vector of constants or undefs, get the bits. A bit in
1441// UndefBits is set if the corresponding element of the vector is an
1442// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1443// zero. Return true if this is not an array of constants, false if it is.
1444//
1445static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1446 uint64_t UndefBits[2]) {
1447 // Start with zero'd results.
1448 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1449
1450 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1451 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1452 SDOperand OpVal = BV->getOperand(i);
1453
1454 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1455 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1456
1457 uint64_t EltBits = 0;
1458 if (OpVal.getOpcode() == ISD::UNDEF) {
1459 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1460 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1461 continue;
1462 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1463 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1464 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1465 const APFloat &apf = CN->getValueAPF();
1466 EltBits = (CN->getValueType(0) == MVT::f32
1467 ? FloatToBits(apf.convertToFloat())
1468 : DoubleToBits(apf.convertToDouble()));
1469 } else {
1470 // Nonconstant element.
1471 return true;
1472 }
1473
1474 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1475 }
1476
1477 //printf("%llx %llx %llx %llx\n",
1478 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1479 return false;
1480}
1481
1482/// If this is a splat (repetition) of a value across the whole vector, return
1483/// the smallest size that splats it. For example, "0x01010101010101..." is a
1484/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1485/// SplatSize = 1 byte.
1486static bool isConstantSplat(const uint64_t Bits128[2],
1487 const uint64_t Undef128[2],
1488 int MinSplatBits,
1489 uint64_t &SplatBits, uint64_t &SplatUndef,
1490 int &SplatSize) {
1491 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1492 // the same as the lower 64-bits, ignoring undefs.
1493 uint64_t Bits64 = Bits128[0] | Bits128[1];
1494 uint64_t Undef64 = Undef128[0] & Undef128[1];
1495 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1496 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1497 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1498 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1499
1500 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1501 if (MinSplatBits < 64) {
1502
1503 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1504 // undefs.
1505 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1506 if (MinSplatBits < 32) {
1507
1508 // If the top 16-bits are different than the lower 16-bits, ignoring
1509 // undefs, we have an i32 splat.
1510 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1511 if (MinSplatBits < 16) {
1512 // If the top 8-bits are different than the lower 8-bits, ignoring
1513 // undefs, we have an i16 splat.
1514 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1515 // Otherwise, we have an 8-bit splat.
1516 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1517 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1518 SplatSize = 1;
1519 return true;
1520 }
1521 } else {
1522 SplatBits = Bits16;
1523 SplatUndef = Undef16;
1524 SplatSize = 2;
1525 return true;
1526 }
1527 }
1528 } else {
1529 SplatBits = Bits32;
1530 SplatUndef = Undef32;
1531 SplatSize = 4;
1532 return true;
1533 }
1534 }
1535 } else {
1536 SplatBits = Bits128[0];
1537 SplatUndef = Undef128[0];
1538 SplatSize = 8;
1539 return true;
1540 }
1541 }
1542
1543 return false; // Can't be a splat if two pieces don't match.
1544}
1545
1546// If this is a case we can't handle, return null and let the default
1547// expansion code take care of it. If we CAN select this case, and if it
1548// selects to a single instruction, return Op. Otherwise, if we can codegen
1549// this case more efficiently than a constant pool load, lower it to the
1550// sequence of ops that should be used.
1551static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1552 MVT::ValueType VT = Op.getValueType();
1553 // If this is a vector of constants or undefs, get the bits. A bit in
1554 // UndefBits is set if the corresponding element of the vector is an
1555 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1556 // zero.
1557 uint64_t VectorBits[2];
1558 uint64_t UndefBits[2];
1559 uint64_t SplatBits, SplatUndef;
1560 int SplatSize;
1561 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1562 || !isConstantSplat(VectorBits, UndefBits,
1563 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1564 SplatBits, SplatUndef, SplatSize))
1565 return SDOperand(); // Not a constant vector, not a splat.
1566
1567 switch (VT) {
1568 default:
1569 case MVT::v4f32: {
1570 uint32_t Value32 = SplatBits;
1571 assert(SplatSize == 4
1572 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1573 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1574 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1575 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1576 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1577 break;
1578 }
1579 case MVT::v2f64: {
1580 uint64_t f64val = SplatBits;
1581 assert(SplatSize == 8
1582 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1583 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1584 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1585 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1586 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1587 break;
1588 }
1589 case MVT::v16i8: {
1590 // 8-bit constants have to be expanded to 16-bits
1591 unsigned short Value16 = SplatBits | (SplatBits << 8);
1592 SDOperand Ops[8];
1593 for (int i = 0; i < 8; ++i)
1594 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1595 return DAG.getNode(ISD::BIT_CONVERT, VT,
1596 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1597 }
1598 case MVT::v8i16: {
1599 unsigned short Value16;
1600 if (SplatSize == 2)
1601 Value16 = (unsigned short) (SplatBits & 0xffff);
1602 else
1603 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1604 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1605 SDOperand Ops[8];
1606 for (int i = 0; i < 8; ++i) Ops[i] = T;
1607 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1608 }
1609 case MVT::v4i32: {
1610 unsigned int Value = SplatBits;
1611 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1612 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1613 }
1614 case MVT::v2i64: {
1615 uint64_t val = SplatBits;
1616 uint32_t upper = uint32_t(val >> 32);
1617 uint32_t lower = uint32_t(val);
1618
1619 if (val != 0) {
1620 SDOperand LO32;
1621 SDOperand HI32;
1622 SmallVector<SDOperand, 16> ShufBytes;
1623 SDOperand Result;
1624 bool upper_special, lower_special;
1625
1626 // NOTE: This code creates common-case shuffle masks that can be easily
1627 // detected as common expressions. It is not attempting to create highly
1628 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1629
1630 // Detect if the upper or lower half is a special shuffle mask pattern:
1631 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1632 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1633
1634 // Create lower vector if not a special pattern
1635 if (!lower_special) {
1636 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1637 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1638 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1639 LO32C, LO32C, LO32C, LO32C));
1640 }
1641
1642 // Create upper vector if not a special pattern
1643 if (!upper_special) {
1644 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1645 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1646 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1647 HI32C, HI32C, HI32C, HI32C));
1648 }
1649
1650 // If either upper or lower are special, then the two input operands are
1651 // the same (basically, one of them is a "don't care")
1652 if (lower_special)
1653 LO32 = HI32;
1654 if (upper_special)
1655 HI32 = LO32;
1656 if (lower_special && upper_special) {
1657 // Unhappy situation... both upper and lower are special, so punt with
1658 // a target constant:
1659 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1660 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1661 Zero, Zero);
1662 }
1663
1664 for (int i = 0; i < 4; ++i) {
1665 for (int j = 0; j < 4; ++j) {
1666 SDOperand V;
1667 bool process_upper, process_lower;
Chris Lattner52ec3752007-12-22 22:47:03 +00001668 uint64_t val = 0;
Scott Michel266bc8f2007-12-04 22:23:35 +00001669
1670 process_upper = (upper_special && (i & 1) == 0);
1671 process_lower = (lower_special && (i & 1) == 1);
1672
1673 if (process_upper || process_lower) {
1674 if ((process_upper && upper == 0)
1675 || (process_lower && lower == 0))
1676 val = 0x80;
1677 else if ((process_upper && upper == 0xffffffff)
1678 || (process_lower && lower == 0xffffffff))
1679 val = 0xc0;
1680 else if ((process_upper && upper == 0x80000000)
1681 || (process_lower && lower == 0x80000000))
1682 val = (j == 0 ? 0xe0 : 0x80);
1683 } else
1684 val = i * 4 + j + ((i & 1) * 16);
1685
1686 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1687 }
1688 }
1689
1690 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1691 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1692 &ShufBytes[0], ShufBytes.size()));
1693 } else {
1694 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1695 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1696 return DAG.getNode(ISD::BIT_CONVERT, VT,
1697 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1698 Zero, Zero, Zero, Zero));
1699 }
1700 }
1701 }
1702
1703 return SDOperand();
1704}
1705
1706/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1707/// which the Cell can operate. The code inspects V3 to ascertain whether the
1708/// permutation vector, V3, is monotonically increasing with one "exception"
1709/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1710/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1711/// In either case, the net result is going to eventually invoke SHUFB to
1712/// permute/shuffle the bytes from V1 and V2.
1713/// \note
1714/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1715/// control word for byte/halfword/word insertion. This takes care of a single
1716/// element move from V2 into V1.
1717/// \note
1718/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1719static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1720 SDOperand V1 = Op.getOperand(0);
1721 SDOperand V2 = Op.getOperand(1);
1722 SDOperand PermMask = Op.getOperand(2);
1723
1724 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1725
1726 // If we have a single element being moved from V1 to V2, this can be handled
1727 // using the C*[DX] compute mask instructions, but the vector elements have
1728 // to be monotonically increasing with one exception element.
1729 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1730 unsigned EltsFromV2 = 0;
1731 unsigned V2Elt = 0;
1732 unsigned V2EltIdx0 = 0;
1733 unsigned CurrElt = 0;
1734 bool monotonic = true;
1735 if (EltVT == MVT::i8)
1736 V2EltIdx0 = 16;
1737 else if (EltVT == MVT::i16)
1738 V2EltIdx0 = 8;
1739 else if (EltVT == MVT::i32)
1740 V2EltIdx0 = 4;
1741 else
1742 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1743
1744 for (unsigned i = 0, e = PermMask.getNumOperands();
1745 EltsFromV2 <= 1 && monotonic && i != e;
1746 ++i) {
1747 unsigned SrcElt;
1748 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1749 SrcElt = 0;
1750 else
1751 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1752
1753 if (SrcElt >= V2EltIdx0) {
1754 ++EltsFromV2;
1755 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1756 } else if (CurrElt != SrcElt) {
1757 monotonic = false;
1758 }
1759
1760 ++CurrElt;
1761 }
1762
1763 if (EltsFromV2 == 1 && monotonic) {
1764 // Compute mask and shuffle
1765 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001766 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1767 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001768 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1769 // Initialize temporary register to 0
1770 SDOperand InitTempReg =
1771 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1772 // Copy register's contents as index in INSERT_MASK:
1773 SDOperand ShufMaskOp =
1774 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1775 DAG.getTargetConstant(V2Elt, MVT::i32),
1776 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1777 // Use shuffle mask in SHUFB synthetic instruction:
1778 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1779 } else {
1780 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1781 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1782
1783 SmallVector<SDOperand, 16> ResultMask;
1784 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1785 unsigned SrcElt;
1786 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1787 SrcElt = 0;
1788 else
1789 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1790
1791 for (unsigned j = 0; j != BytesPerElement; ++j) {
1792 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1793 MVT::i8));
1794 }
1795 }
1796
1797 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1798 &ResultMask[0], ResultMask.size());
1799 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1800 }
1801}
1802
1803static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1804 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1805
1806 if (Op0.Val->getOpcode() == ISD::Constant) {
1807 // For a constant, build the appropriate constant vector, which will
1808 // eventually simplify to a vector register load.
1809
1810 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1811 SmallVector<SDOperand, 16> ConstVecValues;
1812 MVT::ValueType VT;
1813 size_t n_copies;
1814
1815 // Create a constant vector:
1816 switch (Op.getValueType()) {
1817 default: assert(0 && "Unexpected constant value type in "
1818 "LowerSCALAR_TO_VECTOR");
1819 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1820 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1821 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1822 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1823 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1824 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1825 }
1826
1827 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1828 for (size_t j = 0; j < n_copies; ++j)
1829 ConstVecValues.push_back(CValue);
1830
1831 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1832 &ConstVecValues[0], ConstVecValues.size());
1833 } else {
1834 // Otherwise, copy the value from one register to another:
1835 switch (Op0.getValueType()) {
1836 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1837 case MVT::i8:
1838 case MVT::i16:
1839 case MVT::i32:
1840 case MVT::i64:
1841 case MVT::f32:
1842 case MVT::f64:
1843 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1844 }
1845 }
1846
1847 return SDOperand();
1848}
1849
1850static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1851 switch (Op.getValueType()) {
1852 case MVT::v4i32: {
1853 SDOperand rA = Op.getOperand(0);
1854 SDOperand rB = Op.getOperand(1);
1855 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1856 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1857 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1858 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1859
1860 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1861 break;
1862 }
1863
1864 // Multiply two v8i16 vectors (pipeline friendly version):
1865 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1866 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1867 // c) Use SELB to select upper and lower halves from the intermediate results
1868 //
1869 // NOTE: We really want to move the FSMBI to earlier to actually get the
1870 // dual-issue. This code does manage to do this, even if it's a little on
1871 // the wacky side
1872 case MVT::v8i16: {
1873 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001874 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001875 SDOperand Chain = Op.getOperand(0);
1876 SDOperand rA = Op.getOperand(0);
1877 SDOperand rB = Op.getOperand(1);
Chris Lattner84bc5422007-12-31 04:13:23 +00001878 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1879 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001880
1881 SDOperand FSMBOp =
1882 DAG.getCopyToReg(Chain, FSMBIreg,
1883 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1884 DAG.getConstant(0xcccc, MVT::i32)));
1885
1886 SDOperand HHProd =
1887 DAG.getCopyToReg(FSMBOp, HiProdReg,
1888 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1889
1890 SDOperand HHProd_v4i32 =
1891 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1892 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1893
1894 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1895 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1896 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1897 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1898 HHProd_v4i32,
1899 DAG.getConstant(16, MVT::i16))),
1900 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1901 }
1902
1903 // This M00sE is N@stI! (apologies to Monty Python)
1904 //
1905 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1906 // is to break it all apart, sign extend, and reassemble the various
1907 // intermediate products.
1908 case MVT::v16i8: {
1909 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00001910 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00001911 SDOperand Chain = Op.getOperand(0);
1912 SDOperand rA = Op.getOperand(0);
1913 SDOperand rB = Op.getOperand(1);
1914 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1915 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1916
Chris Lattner84bc5422007-12-31 04:13:23 +00001917 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1918 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1919 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00001920
1921 SDOperand LLProd =
1922 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1923 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1924 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1925
1926 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1927
1928 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1929
1930 SDOperand LHProd =
1931 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1932 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1933
1934 SDOperand FSMBdef_2222 =
1935 DAG.getCopyToReg(Chain, FSMBreg_2222,
1936 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1937 DAG.getConstant(0x2222, MVT::i32)));
1938
1939 SDOperand FSMBuse_2222 =
1940 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1941
1942 SDOperand LoProd_1 =
1943 DAG.getCopyToReg(Chain, LoProd_reg,
1944 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1945 FSMBuse_2222));
1946
1947 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1948
1949 SDOperand LoProd =
1950 DAG.getNode(ISD::AND, MVT::v4i32,
1951 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1952 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1953 LoProdMask, LoProdMask,
1954 LoProdMask, LoProdMask));
1955
1956 SDOperand rAH =
1957 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1958 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1959
1960 SDOperand rBH =
1961 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1962 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1963
1964 SDOperand HLProd =
1965 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1966 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1967 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1968
1969 SDOperand HHProd_1 =
1970 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1971 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1972 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1974 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1975
1976 SDOperand HHProd =
1977 DAG.getCopyToReg(Chain, HiProd_reg,
1978 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1979 HLProd,
1980 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1981 FSMBuse_2222));
1982
1983 SDOperand HiProd =
1984 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1985 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
1986
1987 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1988 DAG.getNode(ISD::OR, MVT::v4i32,
1989 LoProd, HiProd));
1990 }
1991
1992 default:
1993 cerr << "CellSPU: Unknown vector multiplication, got "
1994 << MVT::getValueTypeString(Op.getValueType())
1995 << "\n";
1996 abort();
1997 /*NOTREACHED*/
1998 }
1999
2000 return SDOperand();
2001}
2002
2003static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2004 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002005 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002006
2007 SDOperand A = Op.getOperand(0);
2008 SDOperand B = Op.getOperand(1);
2009 unsigned VT = Op.getValueType();
2010
2011 unsigned VRegBR, VRegC;
2012
2013 if (VT == MVT::f32) {
Chris Lattner84bc5422007-12-31 04:13:23 +00002014 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2015 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002016 } else {
Chris Lattner84bc5422007-12-31 04:13:23 +00002017 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2018 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002019 }
2020 // TODO: make sure we're feeding FPInterp the right arguments
2021 // Right now: fi B, frest(B)
2022
2023 // Computes BRcpl =
2024 // (Floating Interpolate (FP Reciprocal Estimate B))
2025 SDOperand BRcpl =
2026 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2027 DAG.getNode(SPUISD::FPInterp, VT, B,
2028 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2029
2030 // Computes A * BRcpl and stores in a temporary register
2031 SDOperand AxBRcpl =
2032 DAG.getCopyToReg(BRcpl, VRegC,
2033 DAG.getNode(ISD::FMUL, VT, A,
2034 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2035 // What's the Chain variable do? It's magic!
2036 // TODO: set Chain = Op(0).getEntryNode()
2037
2038 return DAG.getNode(ISD::FADD, VT,
2039 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2040 DAG.getNode(ISD::FMUL, VT,
2041 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2042 DAG.getNode(ISD::FSUB, VT, A,
2043 DAG.getNode(ISD::FMUL, VT, B,
2044 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2045}
2046
Scott Michel266bc8f2007-12-04 22:23:35 +00002047static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2048 unsigned VT = Op.getValueType();
2049 SDOperand N = Op.getOperand(0);
2050 SDOperand Elt = Op.getOperand(1);
2051 SDOperand ShufMask[16];
2052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2053
2054 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2055
2056 int EltNo = (int) C->getValue();
2057
2058 // sanity checks:
2059 if (VT == MVT::i8 && EltNo >= 16)
2060 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2061 else if (VT == MVT::i16 && EltNo >= 8)
2062 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2063 else if (VT == MVT::i32 && EltNo >= 4)
2064 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2065 else if (VT == MVT::i64 && EltNo >= 2)
2066 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2067
2068 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2069 // i32 and i64: Element 0 is the preferred slot
2070 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2071 }
2072
2073 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002074 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002075 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2076
2077 switch (VT) {
2078 case MVT::i8: {
2079 prefslot_begin = prefslot_end = 3;
2080 break;
2081 }
2082 case MVT::i16: {
2083 prefslot_begin = 2; prefslot_end = 3;
2084 break;
2085 }
2086 case MVT::i32: {
2087 prefslot_begin = 0; prefslot_end = 3;
2088 break;
2089 }
2090 case MVT::i64: {
2091 prefslot_begin = 0; prefslot_end = 7;
2092 break;
2093 }
2094 }
2095
Scott Michel0e5665b2007-12-19 21:17:42 +00002096 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2097 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2098
Scott Michel266bc8f2007-12-04 22:23:35 +00002099 for (int i = 0; i < 16; ++i) {
2100 // zero fill uppper part of preferred slot, don't care about the
2101 // other slots:
2102 unsigned int mask_val;
2103
2104 if (i <= prefslot_end) {
2105 mask_val =
2106 ((i < prefslot_begin)
2107 ? 0x80
2108 : elt_byte + (i - prefslot_begin));
2109
Scott Michel0e5665b2007-12-19 21:17:42 +00002110 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002111 } else
2112 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2113 }
2114
2115 SDOperand ShufMaskVec =
2116 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2117 &ShufMask[0],
2118 sizeof(ShufMask) / sizeof(ShufMask[0]));
2119
2120 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2121 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2122 N, N, ShufMaskVec));
2123
2124}
2125
2126static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2127 SDOperand VecOp = Op.getOperand(0);
2128 SDOperand ValOp = Op.getOperand(1);
2129 SDOperand IdxOp = Op.getOperand(2);
2130 MVT::ValueType VT = Op.getValueType();
2131
2132 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2133 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2134
2135 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2136 // Use $2 because it's always 16-byte aligned and it's available:
2137 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2138
2139 SDOperand result =
2140 DAG.getNode(SPUISD::SHUFB, VT,
2141 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2142 VecOp,
2143 DAG.getNode(SPUISD::INSERT_MASK, VT,
2144 DAG.getNode(ISD::ADD, PtrVT,
2145 PtrBase,
2146 DAG.getConstant(CN->getValue(),
2147 PtrVT))));
2148
2149 return result;
2150}
2151
2152static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2153 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2154
2155 assert(Op.getValueType() == MVT::i8);
2156 switch (Opc) {
2157 default:
2158 assert(0 && "Unhandled i8 math operator");
2159 /*NOTREACHED*/
2160 break;
2161 case ISD::SUB: {
2162 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2163 // the result:
2164 SDOperand N1 = Op.getOperand(1);
2165 N0 = (N0.getOpcode() != ISD::Constant
2166 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2167 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2168 N1 = (N1.getOpcode() != ISD::Constant
2169 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2170 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2171 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2172 DAG.getNode(Opc, MVT::i16, N0, N1));
2173 }
2174 case ISD::ROTR:
2175 case ISD::ROTL: {
2176 SDOperand N1 = Op.getOperand(1);
2177 unsigned N1Opc;
2178 N0 = (N0.getOpcode() != ISD::Constant
2179 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2180 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2181 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2182 N1 = (N1.getOpcode() != ISD::Constant
2183 ? DAG.getNode(N1Opc, MVT::i16, N1)
2184 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2185 SDOperand ExpandArg =
2186 DAG.getNode(ISD::OR, MVT::i16, N0,
2187 DAG.getNode(ISD::SHL, MVT::i16,
2188 N0, DAG.getConstant(8, MVT::i16)));
2189 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2190 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2191 }
2192 case ISD::SRL:
2193 case ISD::SHL: {
2194 SDOperand N1 = Op.getOperand(1);
2195 unsigned N1Opc;
2196 N0 = (N0.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2198 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2199 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2200 N1 = (N1.getOpcode() != ISD::Constant
2201 ? DAG.getNode(N1Opc, MVT::i16, N1)
2202 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2203 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2204 DAG.getNode(Opc, MVT::i16, N0, N1));
2205 }
2206 case ISD::SRA: {
2207 SDOperand N1 = Op.getOperand(1);
2208 unsigned N1Opc;
2209 N0 = (N0.getOpcode() != ISD::Constant
2210 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2211 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2212 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2213 N1 = (N1.getOpcode() != ISD::Constant
2214 ? DAG.getNode(N1Opc, MVT::i16, N1)
2215 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2216 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2217 DAG.getNode(Opc, MVT::i16, N0, N1));
2218 }
2219 case ISD::MUL: {
2220 SDOperand N1 = Op.getOperand(1);
2221 unsigned N1Opc;
2222 N0 = (N0.getOpcode() != ISD::Constant
2223 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2224 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2225 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2226 N1 = (N1.getOpcode() != ISD::Constant
2227 ? DAG.getNode(N1Opc, MVT::i16, N1)
2228 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2229 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2230 DAG.getNode(Opc, MVT::i16, N0, N1));
2231 break;
2232 }
2233 }
2234
2235 return SDOperand();
2236}
2237
2238//! Lower byte immediate operations for v16i8 vectors:
2239static SDOperand
2240LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2241 SDOperand ConstVec;
2242 SDOperand Arg;
2243 MVT::ValueType VT = Op.getValueType();
2244
2245 ConstVec = Op.getOperand(0);
2246 Arg = Op.getOperand(1);
2247 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2248 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2249 ConstVec = ConstVec.getOperand(0);
2250 } else {
2251 ConstVec = Op.getOperand(1);
2252 Arg = Op.getOperand(0);
2253 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2254 ConstVec = ConstVec.getOperand(0);
2255 }
2256 }
2257 }
2258
2259 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2260 uint64_t VectorBits[2];
2261 uint64_t UndefBits[2];
2262 uint64_t SplatBits, SplatUndef;
2263 int SplatSize;
2264
2265 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2266 && isConstantSplat(VectorBits, UndefBits,
2267 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2268 SplatBits, SplatUndef, SplatSize)) {
2269 SDOperand tcVec[16];
2270 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2271 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2272
2273 // Turn the BUILD_VECTOR into a set of target constants:
2274 for (size_t i = 0; i < tcVecSize; ++i)
2275 tcVec[i] = tc;
2276
2277 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2278 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2279 }
2280 }
2281
2282 return SDOperand();
2283}
2284
2285//! Lower i32 multiplication
2286static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2287 unsigned Opc) {
2288 switch (VT) {
2289 default:
2290 cerr << "CellSPU: Unknown LowerMUL value type, got "
2291 << MVT::getValueTypeString(Op.getValueType())
2292 << "\n";
2293 abort();
2294 /*NOTREACHED*/
2295
2296 case MVT::i32: {
2297 SDOperand rA = Op.getOperand(0);
2298 SDOperand rB = Op.getOperand(1);
2299
2300 return DAG.getNode(ISD::ADD, MVT::i32,
2301 DAG.getNode(ISD::ADD, MVT::i32,
2302 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2303 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2304 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2305 }
2306 }
2307
2308 return SDOperand();
2309}
2310
2311//! Custom lowering for CTPOP (count population)
2312/*!
2313 Custom lowering code that counts the number ones in the input
2314 operand. SPU has such an instruction, but it counts the number of
2315 ones per byte, which then have to be accumulated.
2316*/
2317static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2318 unsigned VT = Op.getValueType();
2319 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2320
2321 switch (VT) {
2322 case MVT::i8: {
2323 SDOperand N = Op.getOperand(0);
2324 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2325
2326 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2327 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2328
2329 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2330 }
2331
2332 case MVT::i16: {
2333 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002334 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002335
Chris Lattner84bc5422007-12-31 04:13:23 +00002336 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002337
2338 SDOperand N = Op.getOperand(0);
2339 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2340 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2341 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2342
2343 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2344 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2345
2346 // CNTB_result becomes the chain to which all of the virtual registers
2347 // CNTB_reg, SUM1_reg become associated:
2348 SDOperand CNTB_result =
2349 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2350
2351 SDOperand CNTB_rescopy =
2352 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2353
2354 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2355
2356 return DAG.getNode(ISD::AND, MVT::i16,
2357 DAG.getNode(ISD::ADD, MVT::i16,
2358 DAG.getNode(ISD::SRL, MVT::i16,
2359 Tmp1, Shift1),
2360 Tmp1),
2361 Mask0);
2362 }
2363
2364 case MVT::i32: {
2365 MachineFunction &MF = DAG.getMachineFunction();
Chris Lattner84bc5422007-12-31 04:13:23 +00002366 MachineRegisterInfo &RegInfo = MF.getRegInfo();
Scott Michel266bc8f2007-12-04 22:23:35 +00002367
Chris Lattner84bc5422007-12-31 04:13:23 +00002368 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2369 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +00002370
2371 SDOperand N = Op.getOperand(0);
2372 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2373 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2374 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2375 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2376
2377 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2378 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2379
2380 // CNTB_result becomes the chain to which all of the virtual registers
2381 // CNTB_reg, SUM1_reg become associated:
2382 SDOperand CNTB_result =
2383 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2384
2385 SDOperand CNTB_rescopy =
2386 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2387
2388 SDOperand Comp1 =
2389 DAG.getNode(ISD::SRL, MVT::i32,
2390 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2391
2392 SDOperand Sum1 =
2393 DAG.getNode(ISD::ADD, MVT::i32,
2394 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2395
2396 SDOperand Sum1_rescopy =
2397 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2398
2399 SDOperand Comp2 =
2400 DAG.getNode(ISD::SRL, MVT::i32,
2401 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2402 Shift2);
2403 SDOperand Sum2 =
2404 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2405 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2406
2407 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2408 }
2409
2410 case MVT::i64:
2411 break;
2412 }
2413
2414 return SDOperand();
2415}
2416
2417/// LowerOperation - Provide custom lowering hooks for some operations.
2418///
2419SDOperand
2420SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2421{
2422 switch (Op.getOpcode()) {
2423 default: {
2424 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2425 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2426 cerr << "*Op.Val:\n";
2427 Op.Val->dump();
2428 abort();
2429 }
2430 case ISD::LOAD:
2431 case ISD::SEXTLOAD:
2432 case ISD::ZEXTLOAD:
2433 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2434 case ISD::STORE:
2435 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2436 case ISD::ConstantPool:
2437 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2438 case ISD::GlobalAddress:
2439 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2440 case ISD::JumpTable:
2441 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2442 case ISD::Constant:
2443 return LowerConstant(Op, DAG);
2444 case ISD::ConstantFP:
2445 return LowerConstantFP(Op, DAG);
2446 case ISD::FORMAL_ARGUMENTS:
2447 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2448 case ISD::CALL:
Scott Michel9de5d0d2008-01-11 02:53:15 +00002449 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
Scott Michel266bc8f2007-12-04 22:23:35 +00002450 case ISD::RET:
2451 return LowerRET(Op, DAG, getTargetMachine());
2452
2453 // i8 math ops:
2454 case ISD::SUB:
2455 case ISD::ROTR:
2456 case ISD::ROTL:
2457 case ISD::SRL:
2458 case ISD::SHL:
2459 case ISD::SRA:
2460 return LowerI8Math(Op, DAG, Op.getOpcode());
2461
2462 // Vector-related lowering.
2463 case ISD::BUILD_VECTOR:
2464 return LowerBUILD_VECTOR(Op, DAG);
2465 case ISD::SCALAR_TO_VECTOR:
2466 return LowerSCALAR_TO_VECTOR(Op, DAG);
2467 case ISD::VECTOR_SHUFFLE:
2468 return LowerVECTOR_SHUFFLE(Op, DAG);
2469 case ISD::EXTRACT_VECTOR_ELT:
2470 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2471 case ISD::INSERT_VECTOR_ELT:
2472 return LowerINSERT_VECTOR_ELT(Op, DAG);
2473
2474 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2475 case ISD::AND:
2476 case ISD::OR:
2477 case ISD::XOR:
2478 return LowerByteImmed(Op, DAG);
2479
2480 // Vector and i8 multiply:
2481 case ISD::MUL:
2482 if (MVT::isVector(Op.getValueType()))
2483 return LowerVectorMUL(Op, DAG);
2484 else if (Op.getValueType() == MVT::i8)
2485 return LowerI8Math(Op, DAG, Op.getOpcode());
2486 else
2487 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2488
2489 case ISD::FDIV:
2490 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2491 return LowerFDIVf32(Op, DAG);
2492// else if (Op.getValueType() == MVT::f64)
2493// return LowerFDIVf64(Op, DAG);
2494 else
2495 assert(0 && "Calling FDIV on unsupported MVT");
2496
2497 case ISD::CTPOP:
2498 return LowerCTPOP(Op, DAG);
2499 }
2500
2501 return SDOperand();
2502}
2503
2504//===----------------------------------------------------------------------===//
2505// Other Lowering Code
2506//===----------------------------------------------------------------------===//
2507
2508MachineBasicBlock *
2509SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2510 MachineBasicBlock *BB)
2511{
2512 return BB;
2513}
2514
2515//===----------------------------------------------------------------------===//
2516// Target Optimization Hooks
2517//===----------------------------------------------------------------------===//
2518
2519SDOperand
2520SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2521{
2522#if 0
2523 TargetMachine &TM = getTargetMachine();
2524 SelectionDAG &DAG = DCI.DAG;
2525#endif
2526 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2527
2528 switch (N->getOpcode()) {
2529 default: break;
2530
2531 // Look for obvious optimizations for shift left:
2532 // a) Replace 0 << V with 0
2533 // b) Replace V << 0 with V
2534 //
2535 // N.B: llvm will generate an undef node if the shift amount is greater than
2536 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2537 case SPU::SHLIr32:
2538 case SPU::SHLHIr16:
2539 case SPU::SHLQBIIvec:
2540 case SPU::ROTHIr16:
2541 case SPU::ROTHIr16_i32:
2542 case SPU::ROTIr32:
2543 case SPU::ROTIr32_i16:
2544 case SPU::ROTQBYIvec:
2545 case SPU::ROTQBYBIvec:
2546 case SPU::ROTQBIIvec:
2547 case SPU::ROTHMIr16:
2548 case SPU::ROTMIr32:
2549 case SPU::ROTQMBYIvec: {
2550 if (N0.getOpcode() == ISD::Constant) {
2551 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2552 if (C->getValue() == 0) // 0 << V -> 0.
2553 return N0;
2554 }
2555 }
2556 SDOperand N1 = N->getOperand(1);
2557 if (N1.getOpcode() == ISD::Constant) {
2558 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2559 if (C->getValue() == 0) // V << 0 -> V
2560 return N1;
2561 }
2562 }
2563 break;
2564 }
2565 }
2566
2567 return SDOperand();
2568}
2569
2570//===----------------------------------------------------------------------===//
2571// Inline Assembly Support
2572//===----------------------------------------------------------------------===//
2573
2574/// getConstraintType - Given a constraint letter, return the type of
2575/// constraint it is for this target.
2576SPUTargetLowering::ConstraintType
2577SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2578 if (ConstraintLetter.size() == 1) {
2579 switch (ConstraintLetter[0]) {
2580 default: break;
2581 case 'b':
2582 case 'r':
2583 case 'f':
2584 case 'v':
2585 case 'y':
2586 return C_RegisterClass;
2587 }
2588 }
2589 return TargetLowering::getConstraintType(ConstraintLetter);
2590}
2591
2592std::pair<unsigned, const TargetRegisterClass*>
2593SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2594 MVT::ValueType VT) const
2595{
2596 if (Constraint.size() == 1) {
2597 // GCC RS6000 Constraint Letters
2598 switch (Constraint[0]) {
2599 case 'b': // R1-R31
2600 case 'r': // R0-R31
2601 if (VT == MVT::i64)
2602 return std::make_pair(0U, SPU::R64CRegisterClass);
2603 return std::make_pair(0U, SPU::R32CRegisterClass);
2604 case 'f':
2605 if (VT == MVT::f32)
2606 return std::make_pair(0U, SPU::R32FPRegisterClass);
2607 else if (VT == MVT::f64)
2608 return std::make_pair(0U, SPU::R64FPRegisterClass);
2609 break;
2610 case 'v':
2611 return std::make_pair(0U, SPU::GPRCRegisterClass);
2612 }
2613 }
2614
2615 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2616}
2617
2618void
2619SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2620 uint64_t Mask,
2621 uint64_t &KnownZero,
2622 uint64_t &KnownOne,
2623 const SelectionDAG &DAG,
2624 unsigned Depth ) const {
2625 KnownZero = 0;
2626 KnownOne = 0;
2627}
2628
2629// LowerAsmOperandForConstraint
2630void
2631SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2632 char ConstraintLetter,
2633 std::vector<SDOperand> &Ops,
2634 SelectionDAG &DAG) {
2635 // Default, for the time being, to the base class handler
2636 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2637}
2638
2639/// isLegalAddressImmediate - Return true if the integer value can be used
2640/// as the offset of the target addressing mode.
2641bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2642 // SPU's addresses are 256K:
2643 return (V > -(1 << 18) && V < (1 << 18) - 1);
2644}
2645
2646bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2647 return false;
2648}