blob: 91c0024d744ca3abf971622677aed924cee5c3e3 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
6// Department at The Aerospace Corporation.
7//
8// See README.txt for details.
9//
10//===----------------------------------------------------------------------===//
11//
12// This file implements the SPUTargetLowering class.
13//
14//===----------------------------------------------------------------------===//
15
16#include "SPURegisterNames.h"
17#include "SPUISelLowering.h"
18#include "SPUTargetMachine.h"
19#include "llvm/ADT/VectorExtras.h"
20#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21#include "llvm/CodeGen/CallingConvLower.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SSARegMap.h"
27#include "llvm/Constants.h"
28#include "llvm/Function.h"
29#include "llvm/Intrinsics.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Target/TargetOptions.h"
33
34#include <map>
35
36using namespace llvm;
37
38// Used in getTargetNodeName() below
39namespace {
40 std::map<unsigned, const char *> node_names;
41
42 //! MVT::ValueType mapping to useful data for Cell SPU
43 struct valtype_map_s {
44 const MVT::ValueType valtype;
45 const int prefslot_byte;
46 };
47
48 const valtype_map_s valtype_map[] = {
49 { MVT::i1, 3 },
50 { MVT::i8, 3 },
51 { MVT::i16, 2 },
52 { MVT::i32, 0 },
53 { MVT::f32, 0 },
54 { MVT::i64, 0 },
55 { MVT::f64, 0 },
56 { MVT::i128, 0 }
57 };
58
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60
61 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
62 const valtype_map_s *retval = 0;
63
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
67 break;
68 }
69 }
70
71#ifndef NDEBUG
72 if (retval == 0) {
73 cerr << "getValueTypeMapEntry returns NULL for "
74 << MVT::getValueTypeString(VT)
75 << "\n";
76 abort();
77 }
78#endif
79
80 return retval;
81 }
82
83 //! Predicate that returns true if operand is a memory target
84 /*!
85 \arg Op Operand to test
86 \return true if the operand is a memory target (i.e., global
87 address, external symbol, constant pool) or an existing D-Form
88 address.
89 */
90 bool isMemoryOperand(const SDOperand &Op)
91 {
92 const unsigned Opc = Op.getOpcode();
93 return (Opc == ISD::GlobalAddress
94 || Opc == ISD::GlobalTLSAddress
95 || Opc == ISD::FrameIndex
96 || Opc == ISD::JumpTable
97 || Opc == ISD::ConstantPool
98 || Opc == ISD::ExternalSymbol
99 || Opc == ISD::TargetGlobalAddress
100 || Opc == ISD::TargetGlobalTLSAddress
101 || Opc == ISD::TargetFrameIndex
102 || Opc == ISD::TargetJumpTable
103 || Opc == ISD::TargetConstantPool
104 || Opc == ISD::TargetExternalSymbol
105 || Opc == SPUISD::DFormAddr);
106 }
107}
108
109SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
110 : TargetLowering(TM),
111 SPUTM(TM)
112{
113 // Fold away setcc operations if possible.
114 setPow2DivIsCheap();
115
116 // Use _setjmp/_longjmp instead of setjmp/longjmp.
117 setUseUnderscoreSetJmp(true);
118 setUseUnderscoreLongJmp(true);
119
120 // Set up the SPU's register classes:
121 // NOTE: i8 register class is not registered because we cannot determine when
122 // we need to zero or sign extend for custom-lowered loads and stores.
123 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
124 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
125 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
126 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
127 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
128 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
129
130 // SPU has no sign or zero extended loads for i1, i8, i16:
131 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
132 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
133 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
134 setStoreXAction(MVT::i1, Custom);
135
136 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
139 setStoreXAction(MVT::i8, Custom);
140
141 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
144
145 // SPU constant load actions are custom lowered:
146 setOperationAction(ISD::Constant, MVT::i64, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
149
150 // SPU's loads and stores have to be custom lowered:
151 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
152 ++sctype) {
153 setOperationAction(ISD::LOAD, sctype, Custom);
154 setOperationAction(ISD::STORE, sctype, Custom);
155 }
156
157 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
158 // into BR_CCs. BR_CC instructions are custom selected in
159 // SPUDAGToDAGISel.
160 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
161
162 // Expand the jumptable branches
163 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
165 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
166
167 // SPU has no intrinsics for these particular operations:
168 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
169 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
170 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
171
172 // PowerPC has no SREM/UREM instructions
173 setOperationAction(ISD::SREM, MVT::i32, Expand);
174 setOperationAction(ISD::UREM, MVT::i32, Expand);
175 setOperationAction(ISD::SREM, MVT::i64, Expand);
176 setOperationAction(ISD::UREM, MVT::i64, Expand);
177
178 // We don't support sin/cos/sqrt/fmod
179 setOperationAction(ISD::FSIN , MVT::f64, Expand);
180 setOperationAction(ISD::FCOS , MVT::f64, Expand);
181 setOperationAction(ISD::FREM , MVT::f64, Expand);
182 setOperationAction(ISD::FSIN , MVT::f32, Expand);
183 setOperationAction(ISD::FCOS , MVT::f32, Expand);
184 setOperationAction(ISD::FREM , MVT::f32, Expand);
185
186 // If we're enabling GP optimizations, use hardware square root
187 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
188 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
189
190 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
192
193 // SPU can do rotate right and left, so legalize it... but customize for i8
194 // because instructions don't exist.
195 setOperationAction(ISD::ROTR, MVT::i32, Legal);
196 setOperationAction(ISD::ROTR, MVT::i16, Legal);
197 setOperationAction(ISD::ROTR, MVT::i8, Custom);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
201 // SPU has no native version of shift left/right for i8
202 setOperationAction(ISD::SHL, MVT::i8, Custom);
203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204 setOperationAction(ISD::SRA, MVT::i8, Custom);
205
206 // Custom lower i32 multiplications
207 setOperationAction(ISD::MUL, MVT::i32, Custom);
208
209 // Need to custom handle (some) common i8 math ops
210 setOperationAction(ISD::SUB, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i8, Custom);
212
213 // SPU does not have BSWAP. It does have i32 support CTLZ.
214 // CTPOP has to be custom lowered.
215 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
216 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
217
218 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
222
223 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
224 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
225
226 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
227
228 // SPU does not have select or setcc
229 setOperationAction(ISD::SELECT, MVT::i1, Expand);
230 setOperationAction(ISD::SELECT, MVT::i8, Expand);
231 setOperationAction(ISD::SELECT, MVT::i16, Expand);
232 setOperationAction(ISD::SELECT, MVT::i32, Expand);
233 setOperationAction(ISD::SELECT, MVT::i64, Expand);
234 setOperationAction(ISD::SELECT, MVT::f32, Expand);
235 setOperationAction(ISD::SELECT, MVT::f64, Expand);
236
237 setOperationAction(ISD::SETCC, MVT::i1, Expand);
238 setOperationAction(ISD::SETCC, MVT::i8, Expand);
239 setOperationAction(ISD::SETCC, MVT::i16, Expand);
240 setOperationAction(ISD::SETCC, MVT::i32, Expand);
241 setOperationAction(ISD::SETCC, MVT::i64, Expand);
242 setOperationAction(ISD::SETCC, MVT::f32, Expand);
243 setOperationAction(ISD::SETCC, MVT::f64, Expand);
244
245 // SPU has a legal FP -> signed INT instruction
246 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
250
251 // FDIV on SPU requires custom lowering
252 setOperationAction(ISD::FDIV, MVT::f32, Custom);
253 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
254
255 // SPU has [U|S]INT_TO_FP
256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
264
265 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
266 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
269
270 // We cannot sextinreg(i1). Expand to shifts.
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
272
273 // Support label based line numbers.
274 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
275 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
276
277 // We want to legalize GlobalAddress and ConstantPool nodes into the
278 // appropriate instructions to materialize the address.
279 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
282 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
286 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
287
288 // RET must be custom lowered, to meet ABI requirements
289 setOperationAction(ISD::RET, MVT::Other, Custom);
290
291 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
292 setOperationAction(ISD::VASTART , MVT::Other, Custom);
293
294 // Use the default implementation.
295 setOperationAction(ISD::VAARG , MVT::Other, Expand);
296 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
297 setOperationAction(ISD::VAEND , MVT::Other, Expand);
298 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
299 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
302
303 // Cell SPU has instructions for converting between i64 and fp.
304 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
306
307 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
308 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
309
310 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
311 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
312
313 // First set operation action for all vector types to expand. Then we
314 // will selectively turn on ones that can be effectively codegen'd.
315 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
321
322 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
323 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
324 // add/sub are legal for all supported vector VT's.
325 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
326 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
327 // mul has to be custom lowered.
328 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
329
330 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
336
337 // These operations need to be expanded:
338 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
343
344 // Custom lower build_vector, constant pool spills, insert and
345 // extract vector elements:
346 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
352 }
353
354 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
355 setOperationAction(ISD::AND, MVT::v16i8, Custom);
356 setOperationAction(ISD::OR, MVT::v16i8, Custom);
357 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
359
360 setSetCCResultType(MVT::i32);
361 setShiftAmountType(MVT::i32);
362 setSetCCResultContents(ZeroOrOneSetCCResult);
363
364 setStackPointerRegisterToSaveRestore(SPU::R1);
365
366 // We have target-specific dag combine patterns for the following nodes:
367 // e.g., setTargetDAGCombine(ISD::SUB);
368
369 computeRegisterProperties();
370}
371
372const char *
373SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
374{
375 if (node_names.empty()) {
376 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
377 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
378 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
379 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
380 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
381 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
382 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
383 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
384 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
385 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
386 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
387 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
394 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
395 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
396 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
397 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
398 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
399 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
400 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
401 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
402 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
403 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
404 "SPUISD::ROTBYTES_RIGHT_Z";
405 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
406 "SPUISD::ROTBYTES_RIGHT_S";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
409 "SPUISD::ROTBYTES_LEFT_CHAINED";
410 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
411 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
412 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
413 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
414 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
415 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
416 }
417
418 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
419
420 return ((i != node_names.end()) ? i->second : 0);
421}
422
423//===----------------------------------------------------------------------===//
424// Calling convention code:
425//===----------------------------------------------------------------------===//
426
427#include "SPUGenCallingConv.inc"
428
429//===----------------------------------------------------------------------===//
430// LowerOperation implementation
431//===----------------------------------------------------------------------===//
432
433/// Custom lower loads for CellSPU
434/*!
435 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
436 within a 16-byte block, we have to rotate to extract the requested element.
437 */
438static SDOperand
439LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
440 LoadSDNode *LN = cast<LoadSDNode>(Op);
441 SDOperand basep = LN->getBasePtr();
442 SDOperand the_chain = LN->getChain();
443 MVT::ValueType VT = LN->getLoadedVT();
444 MVT::ValueType OpVT = Op.Val->getValueType(0);
445 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
446 ISD::LoadExtType ExtType = LN->getExtensionType();
447 unsigned alignment = LN->getAlignment();
448 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
449 SDOperand Ops[8];
450
451 // For an extending load of an i1 variable, just call it i8 (or whatever we
452 // were passed) and make it zero-extended:
453 if (VT == MVT::i1) {
454 VT = OpVT;
455 ExtType = ISD::ZEXTLOAD;
456 }
457
458 switch (LN->getAddressingMode()) {
459 case ISD::UNINDEXED: {
460 SDOperand result;
461 SDOperand rot_op, rotamt;
462 SDOperand ptrp;
463 int c_offset;
464 int c_rotamt;
465
466 // The vector type we really want to be when we load the 16-byte chunk
467 MVT::ValueType vecVT, opVecVT;
468
469 if (VT != MVT::i1)
470 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
471 else
472 vecVT = MVT::v16i8;
473
474 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
475
476 if (basep.getOpcode() == ISD::ADD) {
477 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
478
479 assert(CN != NULL
480 && "LowerLOAD: ISD::ADD operand 1 is not constant");
481
482 c_offset = (int) CN->getValue();
483 c_rotamt = (int) (c_offset & 0xf);
484
485 // Adjust the rotation amount to ensure that the final result ends up in
486 // the preferred slot:
487 c_rotamt -= vtm->prefslot_byte;
488 ptrp = basep.getOperand(0);
489 } else {
490 c_offset = 0;
491 c_rotamt = -vtm->prefslot_byte;
492 ptrp = basep;
493 }
494
495 if (alignment == 16) {
496 // 16-byte aligned load into preferred slot, no rotation
497 if (c_rotamt == 0) {
498 if (isMemoryOperand(ptrp))
499 // Return unchanged
500 return SDOperand();
501 else {
502 // Return modified D-Form address for pointer:
503 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
504 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
505 if (VT == OpVT)
506 return DAG.getLoad(VT, LN->getChain(), ptrp,
507 LN->getSrcValue(), LN->getSrcValueOffset(),
508 LN->isVolatile(), 16);
509 else
510 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
511 LN->getSrcValueOffset(), OpVT,
512 LN->isVolatile(), 16);
513 }
514 } else {
515 // Need to rotate...
516 if (c_rotamt < 0)
517 c_rotamt += 16;
518 // Realign the base pointer, with a D-Form address
519 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
520 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
521 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
522 else
523 basep = ptrp;
524
525 // Rotate the load:
526 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
527 LN->getSrcValue(), LN->getSrcValueOffset(),
528 LN->isVolatile(), 16);
529 the_chain = rot_op.getValue(1);
530 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
531
532 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
533 Ops[0] = the_chain;
534 Ops[1] = rot_op;
535 Ops[2] = rotamt;
536
537 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
538 the_chain = result.getValue(1);
539
540 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
541 SDVTList scalarvts;
542 Ops[0] = the_chain;
543 Ops[1] = result;
544 if (OpVT == VT) {
545 scalarvts = DAG.getVTList(VT, MVT::Other);
546 } else {
547 scalarvts = DAG.getVTList(OpVT, MVT::Other);
548 }
549
550 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
551 result);
552 Ops[0] = the_chain;
553 Ops[1] = result;
554 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
555 the_chain = result.getValue(1);
556 } else {
557 // Handle the sign and zero-extending loads for i1 and i8:
558 unsigned NewOpC;
559
560 if (ExtType == ISD::SEXTLOAD) {
561 NewOpC = (OpVT == MVT::i1
562 ? SPUISD::EXTRACT_I1_SEXT
563 : SPUISD::EXTRACT_I8_SEXT);
564 } else if (ExtType == ISD::ZEXTLOAD) {
565 NewOpC = (OpVT == MVT::i1
566 ? SPUISD::EXTRACT_I1_ZEXT
567 : SPUISD::EXTRACT_I8_ZEXT);
568 }
569
570 result = DAG.getNode(NewOpC, OpVT, result);
571 }
572
573 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
574 SDOperand retops[2] = { result, the_chain };
575
576 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
577 return result;
578 /*UNREACHED*/
579 }
580 } else {
581 // Misaligned 16-byte load:
582 if (basep.getOpcode() == ISD::LOAD) {
583 LN = cast<LoadSDNode>(basep);
584 if (LN->getAlignment() == 16) {
585 // We can verify that we're really loading from a 16-byte aligned
586 // chunk. Encapsulate basep as a D-Form address and return a new
587 // load:
588 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
589 DAG.getConstant(0, PtrVT));
590 if (OpVT == VT)
591 return DAG.getLoad(VT, LN->getChain(), basep,
592 LN->getSrcValue(), LN->getSrcValueOffset(),
593 LN->isVolatile(), 16);
594 else
595 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
596 LN->getSrcValue(), LN->getSrcValueOffset(),
597 OpVT, LN->isVolatile(), 16);
598 }
599 }
600
601 // Catch all other cases where we can't guarantee that we have a
602 // 16-byte aligned entity, which means resorting to an X-form
603 // address scheme:
604
605 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
606 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
607 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
608
609 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
610
611 SDOperand alignLoad =
612 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
613 LN->getSrcValue(), LN->getSrcValueOffset(),
614 LN->isVolatile(), 16);
615
616 SDOperand insertEltOp =
617 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
618
619 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
620 alignLoad,
621 alignLoad,
622 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
623
624 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
625
626 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
627 SDOperand retops[2] = { result, the_chain };
628
629 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
630 return result;
631 }
632 break;
633 }
634 case ISD::PRE_INC:
635 case ISD::PRE_DEC:
636 case ISD::POST_INC:
637 case ISD::POST_DEC:
638 case ISD::LAST_INDEXED_MODE:
639 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
640 "UNINDEXED\n";
641 cerr << (unsigned) LN->getAddressingMode() << "\n";
642 abort();
643 /*NOTREACHED*/
644 }
645
646 return SDOperand();
647}
648
649/// Custom lower stores for CellSPU
650/*!
651 All CellSPU stores are aligned to 16-byte boundaries, so for elements
652 within a 16-byte block, we have to generate a shuffle to insert the
653 requested element into its place, then store the resulting block.
654 */
655static SDOperand
656LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
657 StoreSDNode *SN = cast<StoreSDNode>(Op);
658 SDOperand Value = SN->getValue();
659 MVT::ValueType VT = Value.getValueType();
660 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
661 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
662 SDOperand the_chain = SN->getChain();
663 unsigned alignment = SN->getAlignment();
664 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
665
666 switch (SN->getAddressingMode()) {
667 case ISD::UNINDEXED: {
668 SDOperand basep = SN->getBasePtr();
669 SDOperand ptrOp;
670 int offset;
671
672 if (basep.getOpcode() == ISD::ADD) {
673 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
674 assert(CN != NULL
675 && "LowerSTORE: ISD::ADD operand 1 is not constant");
676 offset = unsigned(CN->getValue());
677 ptrOp = basep.getOperand(0);
678 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
679 << offset
680 << "\n");
681 } else {
682 ptrOp = basep;
683 offset = 0;
684 }
685
686 // The vector type we really want to load from the 16-byte chunk, except
687 // in the case of MVT::i1, which has to be v16i8.
688 unsigned vecVT, stVecVT;
689
690 if (StVT != MVT::i1)
691 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
692 else
693 stVecVT = MVT::v16i8;
694 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
695
696 // Realign the pointer as a D-Form address (ptrOp is the pointer,
697 // to force a register load with the address; basep is the actual
698 // dform addr offs($reg).
699 ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
700 DAG.getConstant(0, PtrVT));
701 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
702 ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
703
704 // Create the 16-byte aligned vector load
705 SDOperand alignLoad =
706 DAG.getLoad(vecVT, the_chain, basep,
707 SN->getSrcValue(), SN->getSrcValueOffset(),
708 SN->isVolatile(), 16);
709 the_chain = alignLoad.getValue(1);
710
711 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
712 SDOperand theValue = SN->getValue();
713 SDOperand result;
714
715 if (StVT != VT
716 && (theValue.getOpcode() == ISD::AssertZext
717 || theValue.getOpcode() == ISD::AssertSext)) {
718 // Drill down and get the value for zero- and sign-extended
719 // quantities
720 theValue = theValue.getOperand(0);
721 }
722
723 SDOperand insertEltOp =
724 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
725 DAG.getNode(SPUISD::DFormAddr, PtrVT,
726 ptrOp,
727 DAG.getConstant((offset & 0xf), PtrVT)));
728
729 result = DAG.getNode(SPUISD::SHUFB, vecVT,
730 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
731 alignLoad,
732 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
733
734 result = DAG.getStore(the_chain, result, basep,
735 LN->getSrcValue(), LN->getSrcValueOffset(),
736 LN->isVolatile(), LN->getAlignment());
737
738 return result;
739 /*UNREACHED*/
740 }
741 case ISD::PRE_INC:
742 case ISD::PRE_DEC:
743 case ISD::POST_INC:
744 case ISD::POST_DEC:
745 case ISD::LAST_INDEXED_MODE:
746 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
747 "UNINDEXED\n";
748 cerr << (unsigned) SN->getAddressingMode() << "\n";
749 abort();
750 /*NOTREACHED*/
751 }
752
753 return SDOperand();
754}
755
756/// Generate the address of a constant pool entry.
757static SDOperand
758LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
759 MVT::ValueType PtrVT = Op.getValueType();
760 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
761 Constant *C = CP->getConstVal();
762 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
763 const TargetMachine &TM = DAG.getTarget();
764 SDOperand Zero = DAG.getConstant(0, PtrVT);
765
766 if (TM.getRelocationModel() == Reloc::Static) {
767 if (!ST->usingLargeMem()) {
768 // Just return the SDOperand with the constant pool address in it.
769 return CPI;
770 } else {
771 // Generate hi/lo address pair
772 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
773 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
774
775 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
776 }
777 }
778
779 assert(0 &&
780 "LowerConstantPool: Relocation model other than static not supported.");
781 return SDOperand();
782}
783
784static SDOperand
785LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
786 MVT::ValueType PtrVT = Op.getValueType();
787 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
788 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
789 SDOperand Zero = DAG.getConstant(0, PtrVT);
790 const TargetMachine &TM = DAG.getTarget();
791
792 if (TM.getRelocationModel() == Reloc::Static) {
793 if (!ST->usingLargeMem()) {
794 // Just return the SDOperand with the jump table address in it.
795 return JTI;
796 } else {
797 // Generate hi/lo address pair
798 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
799 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
800
801 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
802 }
803 }
804
805 assert(0 &&
806 "LowerJumpTable: Relocation model other than static not supported.");
807 return SDOperand();
808}
809
810static SDOperand
811LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
812 MVT::ValueType PtrVT = Op.getValueType();
813 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
814 GlobalValue *GV = GSDN->getGlobal();
815 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
816 SDOperand Zero = DAG.getConstant(0, PtrVT);
817 const TargetMachine &TM = DAG.getTarget();
818
819 if (TM.getRelocationModel() == Reloc::Static) {
820 if (!ST->usingLargeMem()) {
821 // Generate a local store address
822 return GA;
823 } else {
824 // Generate hi/lo address pair
825 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
826 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
827
828 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
829 }
830 } else {
831 cerr << "LowerGlobalAddress: Relocation model other than static not "
832 << "supported.\n";
833 abort();
834 /*NOTREACHED*/
835 }
836
837 return SDOperand();
838}
839
840//! Custom lower i64 integer constants
841/*!
842 This code inserts all of the necessary juggling that needs to occur to load
843 a 64-bit constant into a register.
844 */
845static SDOperand
846LowerConstant(SDOperand Op, SelectionDAG &DAG) {
847 unsigned VT = Op.getValueType();
848 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
849
850 if (VT == MVT::i64) {
851 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
852 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
853 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
854
855 } else {
856 cerr << "LowerConstant: unhandled constant type "
857 << MVT::getValueTypeString(VT)
858 << "\n";
859 abort();
860 /*NOTREACHED*/
861 }
862
863 return SDOperand();
864}
865
866//! Custom lower single precision floating point constants
867/*!
868 "float" immediates can be lowered as if they were unsigned 32-bit integers.
869 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
870 target description.
871 */
872static SDOperand
873LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
874 unsigned VT = Op.getValueType();
875 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
876
877 assert((FP != 0) &&
878 "LowerConstantFP: Node is not ConstantFPSDNode");
879
880 const APFloat &apf = FP->getValueAPF();
881
882 if (VT == MVT::f32) {
883 return DAG.getNode(SPUISD::SFPConstant, VT,
884 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
885 } else if (VT == MVT::f64) {
886 uint64_t dbits = DoubleToBits(apf.convertToDouble());
887 return DAG.getNode(ISD::BIT_CONVERT, VT,
888 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
889 }
890
891 return SDOperand();
892}
893
894static SDOperand
895LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
896{
897 MachineFunction &MF = DAG.getMachineFunction();
898 MachineFrameInfo *MFI = MF.getFrameInfo();
899 SSARegMap *RegMap = MF.getSSARegMap();
900 SmallVector<SDOperand, 8> ArgValues;
901 SDOperand Root = Op.getOperand(0);
902 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
903
904 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
905 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
906
907 unsigned ArgOffset = SPUFrameInfo::minStackSize();
908 unsigned ArgRegIdx = 0;
909 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
910
911 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
912
913 // Add DAG nodes to load the arguments or copy them out of registers.
914 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
915 SDOperand ArgVal;
916 bool needsLoad = false;
917 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
918 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
919
920 switch (ObjectVT) {
921 default: {
922 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
923 << MVT::getValueTypeString(ObjectVT)
924 << "\n";
925 abort();
926 }
927 case MVT::i8:
928 if (!isVarArg && ArgRegIdx < NumArgRegs) {
929 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
930 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
931 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
932 ++ArgRegIdx;
933 } else {
934 needsLoad = true;
935 }
936 break;
937 case MVT::i16:
938 if (!isVarArg && ArgRegIdx < NumArgRegs) {
939 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
940 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
941 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
942 ++ArgRegIdx;
943 } else {
944 needsLoad = true;
945 }
946 break;
947 case MVT::i32:
948 if (!isVarArg && ArgRegIdx < NumArgRegs) {
949 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
950 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
951 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
952 ++ArgRegIdx;
953 } else {
954 needsLoad = true;
955 }
956 break;
957 case MVT::i64:
958 if (!isVarArg && ArgRegIdx < NumArgRegs) {
959 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
960 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
962 ++ArgRegIdx;
963 } else {
964 needsLoad = true;
965 }
966 break;
967 case MVT::f32:
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
970 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
972 ++ArgRegIdx;
973 } else {
974 needsLoad = true;
975 }
976 break;
977 case MVT::f64:
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
980 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
982 ++ArgRegIdx;
983 } else {
984 needsLoad = true;
985 }
986 break;
987 case MVT::v2f64:
988 case MVT::v4f32:
989 case MVT::v4i32:
990 case MVT::v8i16:
991 case MVT::v16i8:
992 if (!isVarArg && ArgRegIdx < NumArgRegs) {
993 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
994 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
995 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
996 ++ArgRegIdx;
997 } else {
998 needsLoad = true;
999 }
1000 break;
1001 }
1002
1003 // We need to load the argument to a virtual register if we determined above
1004 // that we ran out of physical registers of the appropriate type
1005 if (needsLoad) {
1006 // If the argument is actually used, emit a load from the right stack
1007 // slot.
1008 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1009 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1010 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1011 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1012 } else {
1013 // Don't emit a dead load.
1014 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1015 }
1016
1017 ArgOffset += StackSlotSize;
1018 }
1019
1020 ArgValues.push_back(ArgVal);
1021 }
1022
1023 // If the function takes variable number of arguments, make a frame index for
1024 // the start of the first vararg value... for expansion of llvm.va_start.
1025 if (isVarArg) {
1026 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1027 ArgOffset);
1028 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 // If this function is vararg, store any remaining integer argument regs to
1030 // their spots on the stack so that they may be loaded by deferencing the
1031 // result of va_next.
1032 SmallVector<SDOperand, 8> MemOps;
1033 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1034 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1035 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1037 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1038 MemOps.push_back(Store);
1039 // Increment the address by four for the next argument to store
1040 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1041 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1042 }
1043 if (!MemOps.empty())
1044 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1045 }
1046
1047 ArgValues.push_back(Root);
1048
1049 // Return the new list of results.
1050 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1051 Op.Val->value_end());
1052 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1053}
1054
1055/// isLSAAddress - Return the immediate to use if the specified
1056/// value is representable as a LSA address.
1057static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1058 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1059 if (!C) return 0;
1060
1061 int Addr = C->getValue();
1062 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1063 (Addr << 14 >> 14) != Addr)
1064 return 0; // Top 14 bits have to be sext of immediate.
1065
1066 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1067}
1068
1069static
1070SDOperand
1071LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1072 SDOperand Chain = Op.getOperand(0);
1073#if 0
1074 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1075 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1076#endif
1077 SDOperand Callee = Op.getOperand(4);
1078 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1079 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1080 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1081 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1082
1083 // Handy pointer type
1084 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1085
1086 // Accumulate how many bytes are to be pushed on the stack, including the
1087 // linkage area, and parameter passing area. According to the SPU ABI,
1088 // we minimally need space for [LR] and [SP]
1089 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1090
1091 // Set up a copy of the stack pointer for use loading and storing any
1092 // arguments that may not fit in the registers available for argument
1093 // passing.
1094 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1095
1096 // Figure out which arguments are going to go in registers, and which in
1097 // memory.
1098 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1099 unsigned ArgRegIdx = 0;
1100
1101 // Keep track of registers passing arguments
1102 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1103 // And the arguments passed on the stack
1104 SmallVector<SDOperand, 8> MemOpChains;
1105
1106 for (unsigned i = 0; i != NumOps; ++i) {
1107 SDOperand Arg = Op.getOperand(5+2*i);
1108
1109 // PtrOff will be used to store the current argument to the stack if a
1110 // register cannot be found for it.
1111 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1112 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1113
1114 switch (Arg.getValueType()) {
1115 default: assert(0 && "Unexpected ValueType for argument!");
1116 case MVT::i32:
1117 case MVT::i64:
1118 case MVT::i128:
1119 if (ArgRegIdx != NumArgRegs) {
1120 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1121 } else {
1122 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1123 ArgOffset += StackSlotSize;
1124 }
1125 break;
1126 case MVT::f32:
1127 case MVT::f64:
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130 } else {
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1133 }
1134 break;
1135 case MVT::v4f32:
1136 case MVT::v4i32:
1137 case MVT::v8i16:
1138 case MVT::v16i8:
1139 if (ArgRegIdx != NumArgRegs) {
1140 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1141 } else {
1142 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1143 ArgOffset += StackSlotSize;
1144 }
1145 break;
1146 }
1147 }
1148
1149 // Update number of stack bytes actually used, insert a call sequence start
1150 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1151 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1152
1153 if (!MemOpChains.empty()) {
1154 // Adjust the stack pointer for the stack arguments.
1155 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1156 &MemOpChains[0], MemOpChains.size());
1157 }
1158
1159 // Build a sequence of copy-to-reg nodes chained together with token chain
1160 // and flag operands which copy the outgoing args into the appropriate regs.
1161 SDOperand InFlag;
1162 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1163 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1164 InFlag);
1165 InFlag = Chain.getValue(1);
1166 }
1167
1168 std::vector<MVT::ValueType> NodeTys;
1169 NodeTys.push_back(MVT::Other); // Returns a chain
1170 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1171
1172 SmallVector<SDOperand, 8> Ops;
1173 unsigned CallOpc = SPUISD::CALL;
1174
1175 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1176 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1177 // node so that legalize doesn't hack it.
1178 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1179 GlobalValue *GV = G->getGlobal();
1180 unsigned CalleeVT = Callee.getValueType();
1181
1182 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1183 // style calls, otherwise, external symbols are BRASL calls.
1184 // NOTE:
1185 // This may be an unsafe assumption for JIT and really large compilation
1186 // units.
1187 if (GV->isDeclaration()) {
1188 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1189 } else {
1190 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1191 DAG.getTargetGlobalAddress(GV, CalleeVT),
1192 DAG.getConstant(0, PtrVT));
1193 }
1194 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1195 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1196 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1197 // If this is an absolute destination address that appears to be a legal
1198 // local store address, use the munged value.
1199 Callee = SDOperand(Dest, 0);
1200
1201 Ops.push_back(Chain);
1202 Ops.push_back(Callee);
1203
1204 // Add argument registers to the end of the list so that they are known live
1205 // into the call.
1206 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1207 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1208 RegsToPass[i].second.getValueType()));
1209
1210 if (InFlag.Val)
1211 Ops.push_back(InFlag);
1212 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1213 InFlag = Chain.getValue(1);
1214
1215 SDOperand ResultVals[3];
1216 unsigned NumResults = 0;
1217 NodeTys.clear();
1218
1219 // If the call has results, copy the values out of the ret val registers.
1220 switch (Op.Val->getValueType(0)) {
1221 default: assert(0 && "Unexpected ret value!");
1222 case MVT::Other: break;
1223 case MVT::i32:
1224 if (Op.Val->getValueType(1) == MVT::i32) {
1225 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1226 ResultVals[0] = Chain.getValue(0);
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1228 Chain.getValue(2)).getValue(1);
1229 ResultVals[1] = Chain.getValue(0);
1230 NumResults = 2;
1231 NodeTys.push_back(MVT::i32);
1232 } else {
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1234 ResultVals[0] = Chain.getValue(0);
1235 NumResults = 1;
1236 }
1237 NodeTys.push_back(MVT::i32);
1238 break;
1239 case MVT::i64:
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1242 NumResults = 1;
1243 NodeTys.push_back(MVT::i64);
1244 break;
1245 case MVT::f32:
1246 case MVT::f64:
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1248 InFlag).getValue(1);
1249 ResultVals[0] = Chain.getValue(0);
1250 NumResults = 1;
1251 NodeTys.push_back(Op.Val->getValueType(0));
1252 break;
1253 case MVT::v2f64:
1254 case MVT::v4f32:
1255 case MVT::v4i32:
1256 case MVT::v8i16:
1257 case MVT::v16i8:
1258 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1259 InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1261 NumResults = 1;
1262 NodeTys.push_back(Op.Val->getValueType(0));
1263 break;
1264 }
1265
1266 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1267 DAG.getConstant(NumStackBytes, PtrVT));
1268 NodeTys.push_back(MVT::Other);
1269
1270 // If the function returns void, just return the chain.
1271 if (NumResults == 0)
1272 return Chain;
1273
1274 // Otherwise, merge everything together with a MERGE_VALUES node.
1275 ResultVals[NumResults++] = Chain;
1276 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1277 ResultVals, NumResults);
1278 return Res.getValue(Op.ResNo);
1279}
1280
1281static SDOperand
1282LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1283 SmallVector<CCValAssign, 16> RVLocs;
1284 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1285 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1286 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1287 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1288
1289 // If this is the first return lowered for this function, add the regs to the
1290 // liveout set for the function.
1291 if (DAG.getMachineFunction().liveout_empty()) {
1292 for (unsigned i = 0; i != RVLocs.size(); ++i)
1293 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1294 }
1295
1296 SDOperand Chain = Op.getOperand(0);
1297 SDOperand Flag;
1298
1299 // Copy the result values into the output registers.
1300 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1301 CCValAssign &VA = RVLocs[i];
1302 assert(VA.isRegLoc() && "Can only return in registers!");
1303 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1304 Flag = Chain.getValue(1);
1305 }
1306
1307 if (Flag.Val)
1308 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1309 else
1310 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1311}
1312
1313
1314//===----------------------------------------------------------------------===//
1315// Vector related lowering:
1316//===----------------------------------------------------------------------===//
1317
1318static ConstantSDNode *
1319getVecImm(SDNode *N) {
1320 SDOperand OpVal(0, 0);
1321
1322 // Check to see if this buildvec has a single non-undef value in its elements.
1323 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1324 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1325 if (OpVal.Val == 0)
1326 OpVal = N->getOperand(i);
1327 else if (OpVal != N->getOperand(i))
1328 return 0;
1329 }
1330
1331 if (OpVal.Val != 0) {
1332 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1333 return CN;
1334 }
1335 }
1336
1337 return 0; // All UNDEF: use implicit def.; not Constant node
1338}
1339
1340/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1341/// and the value fits into an unsigned 18-bit constant, and if so, return the
1342/// constant
1343SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1344 MVT::ValueType ValueType) {
1345 if (ConstantSDNode *CN = getVecImm(N)) {
1346 uint64_t Value = CN->getValue();
1347 if (Value <= 0x3ffff)
1348 return DAG.getConstant(Value, ValueType);
1349 }
1350
1351 return SDOperand();
1352}
1353
1354/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1355/// and the value fits into a signed 16-bit constant, and if so, return the
1356/// constant
1357SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1358 MVT::ValueType ValueType) {
1359 if (ConstantSDNode *CN = getVecImm(N)) {
1360 if (ValueType == MVT::i32) {
1361 int Value = (int) CN->getValue();
1362 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1363
1364 if (Value == SExtValue)
1365 return DAG.getConstant(Value, ValueType);
1366 } else if (ValueType == MVT::i16) {
1367 short Value = (short) CN->getValue();
1368 int SExtValue = ((int) Value << 16) >> 16;
1369
1370 if (Value == (short) SExtValue)
1371 return DAG.getConstant(Value, ValueType);
1372 } else if (ValueType == MVT::i64) {
1373 int64_t Value = CN->getValue();
1374 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1375
1376 if (Value == SExtValue)
1377 return DAG.getConstant(Value, ValueType);
1378 }
1379 }
1380
1381 return SDOperand();
1382}
1383
1384/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1385/// and the value fits into a signed 10-bit constant, and if so, return the
1386/// constant
1387SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388 MVT::ValueType ValueType) {
1389 if (ConstantSDNode *CN = getVecImm(N)) {
1390 int Value = (int) CN->getValue();
1391 if ((ValueType == MVT::i32 && isS10Constant(Value))
1392 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1393 return DAG.getConstant(Value, ValueType);
1394 }
1395
1396 return SDOperand();
1397}
1398
1399/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1400/// and the value fits into a signed 8-bit constant, and if so, return the
1401/// constant.
1402///
1403/// @note: The incoming vector is v16i8 because that's the only way we can load
1404/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1405/// same value.
1406SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1407 MVT::ValueType ValueType) {
1408 if (ConstantSDNode *CN = getVecImm(N)) {
1409 int Value = (int) CN->getValue();
1410 if (ValueType == MVT::i16
1411 && Value <= 0xffff /* truncated from uint64_t */
1412 && ((short) Value >> 8) == ((short) Value & 0xff))
1413 return DAG.getConstant(Value & 0xff, ValueType);
1414 else if (ValueType == MVT::i8
1415 && (Value & 0xff) == Value)
1416 return DAG.getConstant(Value, ValueType);
1417 }
1418
1419 return SDOperand();
1420}
1421
1422/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1423/// and the value fits into a signed 16-bit constant, and if so, return the
1424/// constant
1425SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1426 MVT::ValueType ValueType) {
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 uint64_t Value = CN->getValue();
1429 if ((ValueType == MVT::i32
1430 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1431 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1432 return DAG.getConstant(Value >> 16, ValueType);
1433 }
1434
1435 return SDOperand();
1436}
1437
1438/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1439SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1440 if (ConstantSDNode *CN = getVecImm(N)) {
1441 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1442 }
1443
1444 return SDOperand();
1445}
1446
1447/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1448SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1449 if (ConstantSDNode *CN = getVecImm(N)) {
1450 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1451 }
1452
1453 return SDOperand();
1454}
1455
1456// If this is a vector of constants or undefs, get the bits. A bit in
1457// UndefBits is set if the corresponding element of the vector is an
1458// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1459// zero. Return true if this is not an array of constants, false if it is.
1460//
1461static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1462 uint64_t UndefBits[2]) {
1463 // Start with zero'd results.
1464 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1465
1466 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1467 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1468 SDOperand OpVal = BV->getOperand(i);
1469
1470 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1471 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1472
1473 uint64_t EltBits = 0;
1474 if (OpVal.getOpcode() == ISD::UNDEF) {
1475 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1476 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1477 continue;
1478 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1479 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1480 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1481 const APFloat &apf = CN->getValueAPF();
1482 EltBits = (CN->getValueType(0) == MVT::f32
1483 ? FloatToBits(apf.convertToFloat())
1484 : DoubleToBits(apf.convertToDouble()));
1485 } else {
1486 // Nonconstant element.
1487 return true;
1488 }
1489
1490 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1491 }
1492
1493 //printf("%llx %llx %llx %llx\n",
1494 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1495 return false;
1496}
1497
1498/// If this is a splat (repetition) of a value across the whole vector, return
1499/// the smallest size that splats it. For example, "0x01010101010101..." is a
1500/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1501/// SplatSize = 1 byte.
1502static bool isConstantSplat(const uint64_t Bits128[2],
1503 const uint64_t Undef128[2],
1504 int MinSplatBits,
1505 uint64_t &SplatBits, uint64_t &SplatUndef,
1506 int &SplatSize) {
1507 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1508 // the same as the lower 64-bits, ignoring undefs.
1509 uint64_t Bits64 = Bits128[0] | Bits128[1];
1510 uint64_t Undef64 = Undef128[0] & Undef128[1];
1511 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1512 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1513 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1514 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1515
1516 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1517 if (MinSplatBits < 64) {
1518
1519 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1520 // undefs.
1521 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1522 if (MinSplatBits < 32) {
1523
1524 // If the top 16-bits are different than the lower 16-bits, ignoring
1525 // undefs, we have an i32 splat.
1526 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1527 if (MinSplatBits < 16) {
1528 // If the top 8-bits are different than the lower 8-bits, ignoring
1529 // undefs, we have an i16 splat.
1530 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1531 // Otherwise, we have an 8-bit splat.
1532 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1533 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1534 SplatSize = 1;
1535 return true;
1536 }
1537 } else {
1538 SplatBits = Bits16;
1539 SplatUndef = Undef16;
1540 SplatSize = 2;
1541 return true;
1542 }
1543 }
1544 } else {
1545 SplatBits = Bits32;
1546 SplatUndef = Undef32;
1547 SplatSize = 4;
1548 return true;
1549 }
1550 }
1551 } else {
1552 SplatBits = Bits128[0];
1553 SplatUndef = Undef128[0];
1554 SplatSize = 8;
1555 return true;
1556 }
1557 }
1558
1559 return false; // Can't be a splat if two pieces don't match.
1560}
1561
1562// If this is a case we can't handle, return null and let the default
1563// expansion code take care of it. If we CAN select this case, and if it
1564// selects to a single instruction, return Op. Otherwise, if we can codegen
1565// this case more efficiently than a constant pool load, lower it to the
1566// sequence of ops that should be used.
1567static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1568 MVT::ValueType VT = Op.getValueType();
1569 // If this is a vector of constants or undefs, get the bits. A bit in
1570 // UndefBits is set if the corresponding element of the vector is an
1571 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1572 // zero.
1573 uint64_t VectorBits[2];
1574 uint64_t UndefBits[2];
1575 uint64_t SplatBits, SplatUndef;
1576 int SplatSize;
1577 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1578 || !isConstantSplat(VectorBits, UndefBits,
1579 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1580 SplatBits, SplatUndef, SplatSize))
1581 return SDOperand(); // Not a constant vector, not a splat.
1582
1583 switch (VT) {
1584 default:
1585 case MVT::v4f32: {
1586 uint32_t Value32 = SplatBits;
1587 assert(SplatSize == 4
1588 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1589 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1590 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1591 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1592 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1593 break;
1594 }
1595 case MVT::v2f64: {
1596 uint64_t f64val = SplatBits;
1597 assert(SplatSize == 8
1598 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1599 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1600 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1601 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1602 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1603 break;
1604 }
1605 case MVT::v16i8: {
1606 // 8-bit constants have to be expanded to 16-bits
1607 unsigned short Value16 = SplatBits | (SplatBits << 8);
1608 SDOperand Ops[8];
1609 for (int i = 0; i < 8; ++i)
1610 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1611 return DAG.getNode(ISD::BIT_CONVERT, VT,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1613 }
1614 case MVT::v8i16: {
1615 unsigned short Value16;
1616 if (SplatSize == 2)
1617 Value16 = (unsigned short) (SplatBits & 0xffff);
1618 else
1619 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1620 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1621 SDOperand Ops[8];
1622 for (int i = 0; i < 8; ++i) Ops[i] = T;
1623 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1624 }
1625 case MVT::v4i32: {
1626 unsigned int Value = SplatBits;
1627 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1628 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1629 }
1630 case MVT::v2i64: {
1631 uint64_t val = SplatBits;
1632 uint32_t upper = uint32_t(val >> 32);
1633 uint32_t lower = uint32_t(val);
1634
1635 if (val != 0) {
1636 SDOperand LO32;
1637 SDOperand HI32;
1638 SmallVector<SDOperand, 16> ShufBytes;
1639 SDOperand Result;
1640 bool upper_special, lower_special;
1641
1642 // NOTE: This code creates common-case shuffle masks that can be easily
1643 // detected as common expressions. It is not attempting to create highly
1644 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1645
1646 // Detect if the upper or lower half is a special shuffle mask pattern:
1647 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1648 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1649
1650 // Create lower vector if not a special pattern
1651 if (!lower_special) {
1652 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1653 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1654 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1655 LO32C, LO32C, LO32C, LO32C));
1656 }
1657
1658 // Create upper vector if not a special pattern
1659 if (!upper_special) {
1660 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1661 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1662 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1663 HI32C, HI32C, HI32C, HI32C));
1664 }
1665
1666 // If either upper or lower are special, then the two input operands are
1667 // the same (basically, one of them is a "don't care")
1668 if (lower_special)
1669 LO32 = HI32;
1670 if (upper_special)
1671 HI32 = LO32;
1672 if (lower_special && upper_special) {
1673 // Unhappy situation... both upper and lower are special, so punt with
1674 // a target constant:
1675 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1676 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1677 Zero, Zero);
1678 }
1679
1680 for (int i = 0; i < 4; ++i) {
1681 for (int j = 0; j < 4; ++j) {
1682 SDOperand V;
1683 bool process_upper, process_lower;
1684 uint64_t val;
1685
1686 process_upper = (upper_special && (i & 1) == 0);
1687 process_lower = (lower_special && (i & 1) == 1);
1688
1689 if (process_upper || process_lower) {
1690 if ((process_upper && upper == 0)
1691 || (process_lower && lower == 0))
1692 val = 0x80;
1693 else if ((process_upper && upper == 0xffffffff)
1694 || (process_lower && lower == 0xffffffff))
1695 val = 0xc0;
1696 else if ((process_upper && upper == 0x80000000)
1697 || (process_lower && lower == 0x80000000))
1698 val = (j == 0 ? 0xe0 : 0x80);
1699 } else
1700 val = i * 4 + j + ((i & 1) * 16);
1701
1702 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1703 }
1704 }
1705
1706 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1707 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1708 &ShufBytes[0], ShufBytes.size()));
1709 } else {
1710 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1711 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1712 return DAG.getNode(ISD::BIT_CONVERT, VT,
1713 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1714 Zero, Zero, Zero, Zero));
1715 }
1716 }
1717 }
1718
1719 return SDOperand();
1720}
1721
1722/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1723/// which the Cell can operate. The code inspects V3 to ascertain whether the
1724/// permutation vector, V3, is monotonically increasing with one "exception"
1725/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1726/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1727/// In either case, the net result is going to eventually invoke SHUFB to
1728/// permute/shuffle the bytes from V1 and V2.
1729/// \note
1730/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1731/// control word for byte/halfword/word insertion. This takes care of a single
1732/// element move from V2 into V1.
1733/// \note
1734/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1735static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1736 SDOperand V1 = Op.getOperand(0);
1737 SDOperand V2 = Op.getOperand(1);
1738 SDOperand PermMask = Op.getOperand(2);
1739
1740 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1741
1742 // If we have a single element being moved from V1 to V2, this can be handled
1743 // using the C*[DX] compute mask instructions, but the vector elements have
1744 // to be monotonically increasing with one exception element.
1745 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1746 unsigned EltsFromV2 = 0;
1747 unsigned V2Elt = 0;
1748 unsigned V2EltIdx0 = 0;
1749 unsigned CurrElt = 0;
1750 bool monotonic = true;
1751 if (EltVT == MVT::i8)
1752 V2EltIdx0 = 16;
1753 else if (EltVT == MVT::i16)
1754 V2EltIdx0 = 8;
1755 else if (EltVT == MVT::i32)
1756 V2EltIdx0 = 4;
1757 else
1758 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1759
1760 for (unsigned i = 0, e = PermMask.getNumOperands();
1761 EltsFromV2 <= 1 && monotonic && i != e;
1762 ++i) {
1763 unsigned SrcElt;
1764 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1765 SrcElt = 0;
1766 else
1767 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1768
1769 if (SrcElt >= V2EltIdx0) {
1770 ++EltsFromV2;
1771 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1772 } else if (CurrElt != SrcElt) {
1773 monotonic = false;
1774 }
1775
1776 ++CurrElt;
1777 }
1778
1779 if (EltsFromV2 == 1 && monotonic) {
1780 // Compute mask and shuffle
1781 MachineFunction &MF = DAG.getMachineFunction();
1782 SSARegMap *RegMap = MF.getSSARegMap();
1783 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1784 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1785 // Initialize temporary register to 0
1786 SDOperand InitTempReg =
1787 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1788 // Copy register's contents as index in INSERT_MASK:
1789 SDOperand ShufMaskOp =
1790 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1791 DAG.getTargetConstant(V2Elt, MVT::i32),
1792 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1793 // Use shuffle mask in SHUFB synthetic instruction:
1794 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1795 } else {
1796 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1797 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1798
1799 SmallVector<SDOperand, 16> ResultMask;
1800 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1801 unsigned SrcElt;
1802 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1803 SrcElt = 0;
1804 else
1805 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1806
1807 for (unsigned j = 0; j != BytesPerElement; ++j) {
1808 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1809 MVT::i8));
1810 }
1811 }
1812
1813 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1814 &ResultMask[0], ResultMask.size());
1815 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1816 }
1817}
1818
1819static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1820 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1821
1822 if (Op0.Val->getOpcode() == ISD::Constant) {
1823 // For a constant, build the appropriate constant vector, which will
1824 // eventually simplify to a vector register load.
1825
1826 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1827 SmallVector<SDOperand, 16> ConstVecValues;
1828 MVT::ValueType VT;
1829 size_t n_copies;
1830
1831 // Create a constant vector:
1832 switch (Op.getValueType()) {
1833 default: assert(0 && "Unexpected constant value type in "
1834 "LowerSCALAR_TO_VECTOR");
1835 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1836 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1837 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1838 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1839 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1840 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1841 }
1842
1843 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1844 for (size_t j = 0; j < n_copies; ++j)
1845 ConstVecValues.push_back(CValue);
1846
1847 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1848 &ConstVecValues[0], ConstVecValues.size());
1849 } else {
1850 // Otherwise, copy the value from one register to another:
1851 switch (Op0.getValueType()) {
1852 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1853 case MVT::i8:
1854 case MVT::i16:
1855 case MVT::i32:
1856 case MVT::i64:
1857 case MVT::f32:
1858 case MVT::f64:
1859 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1860 }
1861 }
1862
1863 return SDOperand();
1864}
1865
1866static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1867 switch (Op.getValueType()) {
1868 case MVT::v4i32: {
1869 SDOperand rA = Op.getOperand(0);
1870 SDOperand rB = Op.getOperand(1);
1871 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1872 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1873 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1874 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1875
1876 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1877 break;
1878 }
1879
1880 // Multiply two v8i16 vectors (pipeline friendly version):
1881 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1882 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1883 // c) Use SELB to select upper and lower halves from the intermediate results
1884 //
1885 // NOTE: We really want to move the FSMBI to earlier to actually get the
1886 // dual-issue. This code does manage to do this, even if it's a little on
1887 // the wacky side
1888 case MVT::v8i16: {
1889 MachineFunction &MF = DAG.getMachineFunction();
1890 SSARegMap *RegMap = MF.getSSARegMap();
1891 SDOperand Chain = Op.getOperand(0);
1892 SDOperand rA = Op.getOperand(0);
1893 SDOperand rB = Op.getOperand(1);
1894 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1895 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1896
1897 SDOperand FSMBOp =
1898 DAG.getCopyToReg(Chain, FSMBIreg,
1899 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1900 DAG.getConstant(0xcccc, MVT::i32)));
1901
1902 SDOperand HHProd =
1903 DAG.getCopyToReg(FSMBOp, HiProdReg,
1904 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1905
1906 SDOperand HHProd_v4i32 =
1907 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1908 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1909
1910 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1911 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1912 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1913 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1914 HHProd_v4i32,
1915 DAG.getConstant(16, MVT::i16))),
1916 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1917 }
1918
1919 // This M00sE is N@stI! (apologies to Monty Python)
1920 //
1921 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1922 // is to break it all apart, sign extend, and reassemble the various
1923 // intermediate products.
1924 case MVT::v16i8: {
1925 MachineFunction &MF = DAG.getMachineFunction();
1926 SSARegMap *RegMap = MF.getSSARegMap();
1927 SDOperand Chain = Op.getOperand(0);
1928 SDOperand rA = Op.getOperand(0);
1929 SDOperand rB = Op.getOperand(1);
1930 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1931 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1932
1933 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1934 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1935 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1936
1937 SDOperand LLProd =
1938 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1939 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1940 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1941
1942 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1943
1944 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1945
1946 SDOperand LHProd =
1947 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1948 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1949
1950 SDOperand FSMBdef_2222 =
1951 DAG.getCopyToReg(Chain, FSMBreg_2222,
1952 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1953 DAG.getConstant(0x2222, MVT::i32)));
1954
1955 SDOperand FSMBuse_2222 =
1956 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1957
1958 SDOperand LoProd_1 =
1959 DAG.getCopyToReg(Chain, LoProd_reg,
1960 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1961 FSMBuse_2222));
1962
1963 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1964
1965 SDOperand LoProd =
1966 DAG.getNode(ISD::AND, MVT::v4i32,
1967 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1968 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1969 LoProdMask, LoProdMask,
1970 LoProdMask, LoProdMask));
1971
1972 SDOperand rAH =
1973 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1975
1976 SDOperand rBH =
1977 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1979
1980 SDOperand HLProd =
1981 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1984
1985 SDOperand HHProd_1 =
1986 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1987 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1988 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1991
1992 SDOperand HHProd =
1993 DAG.getCopyToReg(Chain, HiProd_reg,
1994 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1995 HLProd,
1996 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1997 FSMBuse_2222));
1998
1999 SDOperand HiProd =
2000 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2001 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2002
2003 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2004 DAG.getNode(ISD::OR, MVT::v4i32,
2005 LoProd, HiProd));
2006 }
2007
2008 default:
2009 cerr << "CellSPU: Unknown vector multiplication, got "
2010 << MVT::getValueTypeString(Op.getValueType())
2011 << "\n";
2012 abort();
2013 /*NOTREACHED*/
2014 }
2015
2016 return SDOperand();
2017}
2018
2019static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2020 MachineFunction &MF = DAG.getMachineFunction();
2021 SSARegMap *RegMap = MF.getSSARegMap();
2022
2023 SDOperand A = Op.getOperand(0);
2024 SDOperand B = Op.getOperand(1);
2025 unsigned VT = Op.getValueType();
2026
2027 unsigned VRegBR, VRegC;
2028
2029 if (VT == MVT::f32) {
2030 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2031 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2032 } else {
2033 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2034 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2035 }
2036 // TODO: make sure we're feeding FPInterp the right arguments
2037 // Right now: fi B, frest(B)
2038
2039 // Computes BRcpl =
2040 // (Floating Interpolate (FP Reciprocal Estimate B))
2041 SDOperand BRcpl =
2042 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2043 DAG.getNode(SPUISD::FPInterp, VT, B,
2044 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2045
2046 // Computes A * BRcpl and stores in a temporary register
2047 SDOperand AxBRcpl =
2048 DAG.getCopyToReg(BRcpl, VRegC,
2049 DAG.getNode(ISD::FMUL, VT, A,
2050 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2051 // What's the Chain variable do? It's magic!
2052 // TODO: set Chain = Op(0).getEntryNode()
2053
2054 return DAG.getNode(ISD::FADD, VT,
2055 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2056 DAG.getNode(ISD::FMUL, VT,
2057 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2058 DAG.getNode(ISD::FSUB, VT, A,
2059 DAG.getNode(ISD::FMUL, VT, B,
2060 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2061}
2062
2063// Expands double-precision FDIV
2064// Expects two doubles as inputs X and Y, does a floating point
2065// reciprocal estimate, and three iterations of Newton-Raphson
2066// to increase accuracy.
2067//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2068// MachineFunction &MF = DAG.getMachineFunction();
2069// SSARegMap *RegMap = MF.getSSARegMap();
2070//
2071// SDOperand X = Op.getOperand(0);
2072// SDOperand Y = Op.getOperand(1);
2073//}
2074
2075static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2076 unsigned VT = Op.getValueType();
2077 SDOperand N = Op.getOperand(0);
2078 SDOperand Elt = Op.getOperand(1);
2079 SDOperand ShufMask[16];
2080 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2081
2082 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2083
2084 int EltNo = (int) C->getValue();
2085
2086 // sanity checks:
2087 if (VT == MVT::i8 && EltNo >= 16)
2088 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2089 else if (VT == MVT::i16 && EltNo >= 8)
2090 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2091 else if (VT == MVT::i32 && EltNo >= 4)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2093 else if (VT == MVT::i64 && EltNo >= 2)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2095
2096 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2097 // i32 and i64: Element 0 is the preferred slot
2098 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2099 }
2100
2101 // Need to generate shuffle mask and extract:
2102 int prefslot_begin, prefslot_end;
2103 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2104
2105 switch (VT) {
2106 case MVT::i8: {
2107 prefslot_begin = prefslot_end = 3;
2108 break;
2109 }
2110 case MVT::i16: {
2111 prefslot_begin = 2; prefslot_end = 3;
2112 break;
2113 }
2114 case MVT::i32: {
2115 prefslot_begin = 0; prefslot_end = 3;
2116 break;
2117 }
2118 case MVT::i64: {
2119 prefslot_begin = 0; prefslot_end = 7;
2120 break;
2121 }
2122 }
2123
2124 for (int i = 0; i < 16; ++i) {
2125 // zero fill uppper part of preferred slot, don't care about the
2126 // other slots:
2127 unsigned int mask_val;
2128
2129 if (i <= prefslot_end) {
2130 mask_val =
2131 ((i < prefslot_begin)
2132 ? 0x80
2133 : elt_byte + (i - prefslot_begin));
2134
2135 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2136 } else
2137 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2138 }
2139
2140 SDOperand ShufMaskVec =
2141 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2142 &ShufMask[0],
2143 sizeof(ShufMask) / sizeof(ShufMask[0]));
2144
2145 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2146 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2147 N, N, ShufMaskVec));
2148
2149}
2150
2151static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2152 SDOperand VecOp = Op.getOperand(0);
2153 SDOperand ValOp = Op.getOperand(1);
2154 SDOperand IdxOp = Op.getOperand(2);
2155 MVT::ValueType VT = Op.getValueType();
2156
2157 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2158 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2159
2160 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2161 // Use $2 because it's always 16-byte aligned and it's available:
2162 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2163
2164 SDOperand result =
2165 DAG.getNode(SPUISD::SHUFB, VT,
2166 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2167 VecOp,
2168 DAG.getNode(SPUISD::INSERT_MASK, VT,
2169 DAG.getNode(ISD::ADD, PtrVT,
2170 PtrBase,
2171 DAG.getConstant(CN->getValue(),
2172 PtrVT))));
2173
2174 return result;
2175}
2176
2177static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2178 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2179
2180 assert(Op.getValueType() == MVT::i8);
2181 switch (Opc) {
2182 default:
2183 assert(0 && "Unhandled i8 math operator");
2184 /*NOTREACHED*/
2185 break;
2186 case ISD::SUB: {
2187 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2188 // the result:
2189 SDOperand N1 = Op.getOperand(1);
2190 N0 = (N0.getOpcode() != ISD::Constant
2191 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2192 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2193 N1 = (N1.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2195 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2196 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2197 DAG.getNode(Opc, MVT::i16, N0, N1));
2198 }
2199 case ISD::ROTR:
2200 case ISD::ROTL: {
2201 SDOperand N1 = Op.getOperand(1);
2202 unsigned N1Opc;
2203 N0 = (N0.getOpcode() != ISD::Constant
2204 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2205 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2206 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2207 N1 = (N1.getOpcode() != ISD::Constant
2208 ? DAG.getNode(N1Opc, MVT::i16, N1)
2209 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2210 SDOperand ExpandArg =
2211 DAG.getNode(ISD::OR, MVT::i16, N0,
2212 DAG.getNode(ISD::SHL, MVT::i16,
2213 N0, DAG.getConstant(8, MVT::i16)));
2214 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2215 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2216 }
2217 case ISD::SRL:
2218 case ISD::SHL: {
2219 SDOperand N1 = Op.getOperand(1);
2220 unsigned N1Opc;
2221 N0 = (N0.getOpcode() != ISD::Constant
2222 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2223 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2224 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2225 N1 = (N1.getOpcode() != ISD::Constant
2226 ? DAG.getNode(N1Opc, MVT::i16, N1)
2227 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2228 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2229 DAG.getNode(Opc, MVT::i16, N0, N1));
2230 }
2231 case ISD::SRA: {
2232 SDOperand N1 = Op.getOperand(1);
2233 unsigned N1Opc;
2234 N0 = (N0.getOpcode() != ISD::Constant
2235 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2236 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2237 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2238 N1 = (N1.getOpcode() != ISD::Constant
2239 ? DAG.getNode(N1Opc, MVT::i16, N1)
2240 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2241 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2242 DAG.getNode(Opc, MVT::i16, N0, N1));
2243 }
2244 case ISD::MUL: {
2245 SDOperand N1 = Op.getOperand(1);
2246 unsigned N1Opc;
2247 N0 = (N0.getOpcode() != ISD::Constant
2248 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2249 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2250 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2251 N1 = (N1.getOpcode() != ISD::Constant
2252 ? DAG.getNode(N1Opc, MVT::i16, N1)
2253 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2254 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2255 DAG.getNode(Opc, MVT::i16, N0, N1));
2256 break;
2257 }
2258 }
2259
2260 return SDOperand();
2261}
2262
2263//! Lower byte immediate operations for v16i8 vectors:
2264static SDOperand
2265LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2266 SDOperand ConstVec;
2267 SDOperand Arg;
2268 MVT::ValueType VT = Op.getValueType();
2269
2270 ConstVec = Op.getOperand(0);
2271 Arg = Op.getOperand(1);
2272 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2273 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2274 ConstVec = ConstVec.getOperand(0);
2275 } else {
2276 ConstVec = Op.getOperand(1);
2277 Arg = Op.getOperand(0);
2278 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2279 ConstVec = ConstVec.getOperand(0);
2280 }
2281 }
2282 }
2283
2284 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2285 uint64_t VectorBits[2];
2286 uint64_t UndefBits[2];
2287 uint64_t SplatBits, SplatUndef;
2288 int SplatSize;
2289
2290 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2291 && isConstantSplat(VectorBits, UndefBits,
2292 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2293 SplatBits, SplatUndef, SplatSize)) {
2294 SDOperand tcVec[16];
2295 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2296 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2297
2298 // Turn the BUILD_VECTOR into a set of target constants:
2299 for (size_t i = 0; i < tcVecSize; ++i)
2300 tcVec[i] = tc;
2301
2302 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2303 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2304 }
2305 }
2306
2307 return SDOperand();
2308}
2309
2310//! Lower i32 multiplication
2311static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2312 unsigned Opc) {
2313 switch (VT) {
2314 default:
2315 cerr << "CellSPU: Unknown LowerMUL value type, got "
2316 << MVT::getValueTypeString(Op.getValueType())
2317 << "\n";
2318 abort();
2319 /*NOTREACHED*/
2320
2321 case MVT::i32: {
2322 SDOperand rA = Op.getOperand(0);
2323 SDOperand rB = Op.getOperand(1);
2324
2325 return DAG.getNode(ISD::ADD, MVT::i32,
2326 DAG.getNode(ISD::ADD, MVT::i32,
2327 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2328 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2329 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2330 }
2331 }
2332
2333 return SDOperand();
2334}
2335
2336//! Custom lowering for CTPOP (count population)
2337/*!
2338 Custom lowering code that counts the number ones in the input
2339 operand. SPU has such an instruction, but it counts the number of
2340 ones per byte, which then have to be accumulated.
2341*/
2342static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2343 unsigned VT = Op.getValueType();
2344 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2345
2346 switch (VT) {
2347 case MVT::i8: {
2348 SDOperand N = Op.getOperand(0);
2349 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2350
2351 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2352 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2353
2354 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2355 }
2356
2357 case MVT::i16: {
2358 MachineFunction &MF = DAG.getMachineFunction();
2359 SSARegMap *RegMap = MF.getSSARegMap();
2360
2361 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2362
2363 SDOperand N = Op.getOperand(0);
2364 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2365 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2366 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2367
2368 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2369 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2370
2371 // CNTB_result becomes the chain to which all of the virtual registers
2372 // CNTB_reg, SUM1_reg become associated:
2373 SDOperand CNTB_result =
2374 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2375
2376 SDOperand CNTB_rescopy =
2377 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2378
2379 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2380
2381 return DAG.getNode(ISD::AND, MVT::i16,
2382 DAG.getNode(ISD::ADD, MVT::i16,
2383 DAG.getNode(ISD::SRL, MVT::i16,
2384 Tmp1, Shift1),
2385 Tmp1),
2386 Mask0);
2387 }
2388
2389 case MVT::i32: {
2390 MachineFunction &MF = DAG.getMachineFunction();
2391 SSARegMap *RegMap = MF.getSSARegMap();
2392
2393 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2394 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2395
2396 SDOperand N = Op.getOperand(0);
2397 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2398 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2399 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2400 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2401
2402 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2403 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2404
2405 // CNTB_result becomes the chain to which all of the virtual registers
2406 // CNTB_reg, SUM1_reg become associated:
2407 SDOperand CNTB_result =
2408 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2409
2410 SDOperand CNTB_rescopy =
2411 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2412
2413 SDOperand Comp1 =
2414 DAG.getNode(ISD::SRL, MVT::i32,
2415 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2416
2417 SDOperand Sum1 =
2418 DAG.getNode(ISD::ADD, MVT::i32,
2419 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2420
2421 SDOperand Sum1_rescopy =
2422 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2423
2424 SDOperand Comp2 =
2425 DAG.getNode(ISD::SRL, MVT::i32,
2426 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2427 Shift2);
2428 SDOperand Sum2 =
2429 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2430 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2431
2432 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2433 }
2434
2435 case MVT::i64:
2436 break;
2437 }
2438
2439 return SDOperand();
2440}
2441
2442/// LowerOperation - Provide custom lowering hooks for some operations.
2443///
2444SDOperand
2445SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2446{
2447 switch (Op.getOpcode()) {
2448 default: {
2449 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2450 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2451 cerr << "*Op.Val:\n";
2452 Op.Val->dump();
2453 abort();
2454 }
2455 case ISD::LOAD:
2456 case ISD::SEXTLOAD:
2457 case ISD::ZEXTLOAD:
2458 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2459 case ISD::STORE:
2460 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2461 case ISD::ConstantPool:
2462 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2463 case ISD::GlobalAddress:
2464 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2465 case ISD::JumpTable:
2466 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2467 case ISD::Constant:
2468 return LowerConstant(Op, DAG);
2469 case ISD::ConstantFP:
2470 return LowerConstantFP(Op, DAG);
2471 case ISD::FORMAL_ARGUMENTS:
2472 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2473 case ISD::CALL:
2474 return LowerCALL(Op, DAG);
2475 case ISD::RET:
2476 return LowerRET(Op, DAG, getTargetMachine());
2477
2478 // i8 math ops:
2479 case ISD::SUB:
2480 case ISD::ROTR:
2481 case ISD::ROTL:
2482 case ISD::SRL:
2483 case ISD::SHL:
2484 case ISD::SRA:
2485 return LowerI8Math(Op, DAG, Op.getOpcode());
2486
2487 // Vector-related lowering.
2488 case ISD::BUILD_VECTOR:
2489 return LowerBUILD_VECTOR(Op, DAG);
2490 case ISD::SCALAR_TO_VECTOR:
2491 return LowerSCALAR_TO_VECTOR(Op, DAG);
2492 case ISD::VECTOR_SHUFFLE:
2493 return LowerVECTOR_SHUFFLE(Op, DAG);
2494 case ISD::EXTRACT_VECTOR_ELT:
2495 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2496 case ISD::INSERT_VECTOR_ELT:
2497 return LowerINSERT_VECTOR_ELT(Op, DAG);
2498
2499 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2500 case ISD::AND:
2501 case ISD::OR:
2502 case ISD::XOR:
2503 return LowerByteImmed(Op, DAG);
2504
2505 // Vector and i8 multiply:
2506 case ISD::MUL:
2507 if (MVT::isVector(Op.getValueType()))
2508 return LowerVectorMUL(Op, DAG);
2509 else if (Op.getValueType() == MVT::i8)
2510 return LowerI8Math(Op, DAG, Op.getOpcode());
2511 else
2512 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2513
2514 case ISD::FDIV:
2515 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2516 return LowerFDIVf32(Op, DAG);
2517// else if (Op.getValueType() == MVT::f64)
2518// return LowerFDIVf64(Op, DAG);
2519 else
2520 assert(0 && "Calling FDIV on unsupported MVT");
2521
2522 case ISD::CTPOP:
2523 return LowerCTPOP(Op, DAG);
2524 }
2525
2526 return SDOperand();
2527}
2528
2529//===----------------------------------------------------------------------===//
2530// Other Lowering Code
2531//===----------------------------------------------------------------------===//
2532
2533MachineBasicBlock *
2534SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2535 MachineBasicBlock *BB)
2536{
2537 return BB;
2538}
2539
2540//===----------------------------------------------------------------------===//
2541// Target Optimization Hooks
2542//===----------------------------------------------------------------------===//
2543
2544SDOperand
2545SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2546{
2547#if 0
2548 TargetMachine &TM = getTargetMachine();
2549 SelectionDAG &DAG = DCI.DAG;
2550#endif
2551 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2552
2553 switch (N->getOpcode()) {
2554 default: break;
2555
2556 // Look for obvious optimizations for shift left:
2557 // a) Replace 0 << V with 0
2558 // b) Replace V << 0 with V
2559 //
2560 // N.B: llvm will generate an undef node if the shift amount is greater than
2561 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2562 case SPU::SHLIr32:
2563 case SPU::SHLHIr16:
2564 case SPU::SHLQBIIvec:
2565 case SPU::ROTHIr16:
2566 case SPU::ROTHIr16_i32:
2567 case SPU::ROTIr32:
2568 case SPU::ROTIr32_i16:
2569 case SPU::ROTQBYIvec:
2570 case SPU::ROTQBYBIvec:
2571 case SPU::ROTQBIIvec:
2572 case SPU::ROTHMIr16:
2573 case SPU::ROTMIr32:
2574 case SPU::ROTQMBYIvec: {
2575 if (N0.getOpcode() == ISD::Constant) {
2576 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2577 if (C->getValue() == 0) // 0 << V -> 0.
2578 return N0;
2579 }
2580 }
2581 SDOperand N1 = N->getOperand(1);
2582 if (N1.getOpcode() == ISD::Constant) {
2583 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2584 if (C->getValue() == 0) // V << 0 -> V
2585 return N1;
2586 }
2587 }
2588 break;
2589 }
2590 }
2591
2592 return SDOperand();
2593}
2594
2595//===----------------------------------------------------------------------===//
2596// Inline Assembly Support
2597//===----------------------------------------------------------------------===//
2598
2599/// getConstraintType - Given a constraint letter, return the type of
2600/// constraint it is for this target.
2601SPUTargetLowering::ConstraintType
2602SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2603 if (ConstraintLetter.size() == 1) {
2604 switch (ConstraintLetter[0]) {
2605 default: break;
2606 case 'b':
2607 case 'r':
2608 case 'f':
2609 case 'v':
2610 case 'y':
2611 return C_RegisterClass;
2612 }
2613 }
2614 return TargetLowering::getConstraintType(ConstraintLetter);
2615}
2616
2617std::pair<unsigned, const TargetRegisterClass*>
2618SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2619 MVT::ValueType VT) const
2620{
2621 if (Constraint.size() == 1) {
2622 // GCC RS6000 Constraint Letters
2623 switch (Constraint[0]) {
2624 case 'b': // R1-R31
2625 case 'r': // R0-R31
2626 if (VT == MVT::i64)
2627 return std::make_pair(0U, SPU::R64CRegisterClass);
2628 return std::make_pair(0U, SPU::R32CRegisterClass);
2629 case 'f':
2630 if (VT == MVT::f32)
2631 return std::make_pair(0U, SPU::R32FPRegisterClass);
2632 else if (VT == MVT::f64)
2633 return std::make_pair(0U, SPU::R64FPRegisterClass);
2634 break;
2635 case 'v':
2636 return std::make_pair(0U, SPU::GPRCRegisterClass);
2637 }
2638 }
2639
2640 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2641}
2642
2643void
2644SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2645 uint64_t Mask,
2646 uint64_t &KnownZero,
2647 uint64_t &KnownOne,
2648 const SelectionDAG &DAG,
2649 unsigned Depth ) const {
2650 KnownZero = 0;
2651 KnownOne = 0;
2652}
2653
2654// LowerAsmOperandForConstraint
2655void
2656SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2657 char ConstraintLetter,
2658 std::vector<SDOperand> &Ops,
2659 SelectionDAG &DAG) {
2660 // Default, for the time being, to the base class handler
2661 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2662}
2663
2664/// isLegalAddressImmediate - Return true if the integer value can be used
2665/// as the offset of the target addressing mode.
2666bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2667 // SPU's addresses are 256K:
2668 return (V > -(1 << 18) && V < (1 << 18) - 1);
2669}
2670
2671bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2672 return false;
2673}