blob: 3a23c6fec9e9bff6c37590dad8919e8cb8de8885 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
Scott Michel2466c372007-12-05 01:40:25 +00006// Department at The Aerospace Corporation and is distributed under the
7// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00008//
9//===----------------------------------------------------------------------===//
10//
11// This file implements the SPUTargetLowering class.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SPURegisterNames.h"
16#include "SPUISelLowering.h"
17#include "SPUTargetMachine.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SSARegMap.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39 std::map<unsigned, const char *> node_names;
40
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
45 };
46
47 const valtype_map_s valtype_map[] = {
48 { MVT::i1, 3 },
49 { MVT::i8, 3 },
50 { MVT::i16, 2 },
51 { MVT::i32, 0 },
52 { MVT::f32, 0 },
53 { MVT::i64, 0 },
54 { MVT::f64, 0 },
55 { MVT::i128, 0 }
56 };
57
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
62
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
66 break;
67 }
68 }
69
70#ifndef NDEBUG
71 if (retval == 0) {
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
74 << "\n";
75 abort();
76 }
77#endif
78
79 return retval;
80 }
81
82 //! Predicate that returns true if operand is a memory target
83 /*!
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
87 address.
88 */
89 bool isMemoryOperand(const SDOperand &Op)
90 {
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
105 }
106}
107
108SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
110 SPUTM(TM)
111{
112 // Fold away setcc operations if possible.
113 setPow2DivIsCheap();
114
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
118
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
122 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
123 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
124 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
125 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
126 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
127 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
128
129 // SPU has no sign or zero extended loads for i1, i8, i16:
130 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
131 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
132 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
133 setStoreXAction(MVT::i1, Custom);
134
135 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
136 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
138 setStoreXAction(MVT::i8, Custom);
139
140 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
141 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
143
144 // SPU constant load actions are custom lowered:
145 setOperationAction(ISD::Constant, MVT::i64, Custom);
146 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
148
149 // SPU's loads and stores have to be custom lowered:
150 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
151 ++sctype) {
152 setOperationAction(ISD::LOAD, sctype, Custom);
153 setOperationAction(ISD::STORE, sctype, Custom);
154 }
155
156 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
157 // into BR_CCs. BR_CC instructions are custom selected in
158 // SPUDAGToDAGISel.
159 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
160
161 // Expand the jumptable branches
162 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
163 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
164 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
165
166 // SPU has no intrinsics for these particular operations:
167 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
168 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
169 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
170
171 // PowerPC has no SREM/UREM instructions
172 setOperationAction(ISD::SREM, MVT::i32, Expand);
173 setOperationAction(ISD::UREM, MVT::i32, Expand);
174 setOperationAction(ISD::SREM, MVT::i64, Expand);
175 setOperationAction(ISD::UREM, MVT::i64, Expand);
176
177 // We don't support sin/cos/sqrt/fmod
178 setOperationAction(ISD::FSIN , MVT::f64, Expand);
179 setOperationAction(ISD::FCOS , MVT::f64, Expand);
180 setOperationAction(ISD::FREM , MVT::f64, Expand);
181 setOperationAction(ISD::FSIN , MVT::f32, Expand);
182 setOperationAction(ISD::FCOS , MVT::f32, Expand);
183 setOperationAction(ISD::FREM , MVT::f32, Expand);
184
185 // If we're enabling GP optimizations, use hardware square root
186 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
187 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
188
189 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
190 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
191
192 // SPU can do rotate right and left, so legalize it... but customize for i8
193 // because instructions don't exist.
194 setOperationAction(ISD::ROTR, MVT::i32, Legal);
195 setOperationAction(ISD::ROTR, MVT::i16, Legal);
196 setOperationAction(ISD::ROTR, MVT::i8, Custom);
197 setOperationAction(ISD::ROTL, MVT::i32, Legal);
198 setOperationAction(ISD::ROTL, MVT::i16, Legal);
199 setOperationAction(ISD::ROTL, MVT::i8, Custom);
200 // SPU has no native version of shift left/right for i8
201 setOperationAction(ISD::SHL, MVT::i8, Custom);
202 setOperationAction(ISD::SRL, MVT::i8, Custom);
203 setOperationAction(ISD::SRA, MVT::i8, Custom);
204
205 // Custom lower i32 multiplications
206 setOperationAction(ISD::MUL, MVT::i32, Custom);
207
208 // Need to custom handle (some) common i8 math ops
209 setOperationAction(ISD::SUB, MVT::i8, Custom);
210 setOperationAction(ISD::MUL, MVT::i8, Custom);
211
212 // SPU does not have BSWAP. It does have i32 support CTLZ.
213 // CTPOP has to be custom lowered.
214 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
215 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
216
217 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
218 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
221
222 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
223 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
224
225 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
226
227 // SPU does not have select or setcc
228 setOperationAction(ISD::SELECT, MVT::i1, Expand);
229 setOperationAction(ISD::SELECT, MVT::i8, Expand);
230 setOperationAction(ISD::SELECT, MVT::i16, Expand);
231 setOperationAction(ISD::SELECT, MVT::i32, Expand);
232 setOperationAction(ISD::SELECT, MVT::i64, Expand);
233 setOperationAction(ISD::SELECT, MVT::f32, Expand);
234 setOperationAction(ISD::SELECT, MVT::f64, Expand);
235
236 setOperationAction(ISD::SETCC, MVT::i1, Expand);
237 setOperationAction(ISD::SETCC, MVT::i8, Expand);
238 setOperationAction(ISD::SETCC, MVT::i16, Expand);
239 setOperationAction(ISD::SETCC, MVT::i32, Expand);
240 setOperationAction(ISD::SETCC, MVT::i64, Expand);
241 setOperationAction(ISD::SETCC, MVT::f32, Expand);
242 setOperationAction(ISD::SETCC, MVT::f64, Expand);
243
244 // SPU has a legal FP -> signed INT instruction
245 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
246 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
247 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
249
250 // FDIV on SPU requires custom lowering
251 setOperationAction(ISD::FDIV, MVT::f32, Custom);
252 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
253
254 // SPU has [U|S]INT_TO_FP
255 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
256 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
258 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
261 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
263
264 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
266 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
268
269 // We cannot sextinreg(i1). Expand to shifts.
270 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
271
272 // Support label based line numbers.
273 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
274 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
275
276 // We want to legalize GlobalAddress and ConstantPool nodes into the
277 // appropriate instructions to materialize the address.
278 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
279 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
281 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
282 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
283 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
285 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
286
287 // RET must be custom lowered, to meet ABI requirements
288 setOperationAction(ISD::RET, MVT::Other, Custom);
289
290 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
291 setOperationAction(ISD::VASTART , MVT::Other, Custom);
292
293 // Use the default implementation.
294 setOperationAction(ISD::VAARG , MVT::Other, Expand);
295 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
296 setOperationAction(ISD::VAEND , MVT::Other, Expand);
297 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
298 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
299 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
301
302 // Cell SPU has instructions for converting between i64 and fp.
303 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305
306 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
307 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
308
309 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
310 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
311
312 // First set operation action for all vector types to expand. Then we
313 // will selectively turn on ones that can be effectively codegen'd.
314 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
315 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
320
321 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
322 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
323 // add/sub are legal for all supported vector VT's.
324 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
325 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
326 // mul has to be custom lowered.
327 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
328
329 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
330 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
335
336 // These operations need to be expanded:
337 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
338 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
342
343 // Custom lower build_vector, constant pool spills, insert and
344 // extract vector elements:
345 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
346 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
351 }
352
353 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
354 setOperationAction(ISD::AND, MVT::v16i8, Custom);
355 setOperationAction(ISD::OR, MVT::v16i8, Custom);
356 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
357 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
358
359 setSetCCResultType(MVT::i32);
360 setShiftAmountType(MVT::i32);
361 setSetCCResultContents(ZeroOrOneSetCCResult);
362
363 setStackPointerRegisterToSaveRestore(SPU::R1);
364
365 // We have target-specific dag combine patterns for the following nodes:
366 // e.g., setTargetDAGCombine(ISD::SUB);
367
368 computeRegisterProperties();
369}
370
371const char *
372SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
373{
374 if (node_names.empty()) {
375 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
376 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
377 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
378 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
379 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
380 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
381 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
382 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
383 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
384 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
385 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
386 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
387 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
389 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
393 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
394 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
395 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
396 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
397 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
398 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
399 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
400 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
401 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
402 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
403 "SPUISD::ROTBYTES_RIGHT_Z";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
405 "SPUISD::ROTBYTES_RIGHT_S";
406 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
408 "SPUISD::ROTBYTES_LEFT_CHAINED";
409 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
410 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
411 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
412 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
413 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
414 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
415 }
416
417 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
418
419 return ((i != node_names.end()) ? i->second : 0);
420}
421
422//===----------------------------------------------------------------------===//
423// Calling convention code:
424//===----------------------------------------------------------------------===//
425
426#include "SPUGenCallingConv.inc"
427
428//===----------------------------------------------------------------------===//
429// LowerOperation implementation
430//===----------------------------------------------------------------------===//
431
432/// Custom lower loads for CellSPU
433/*!
434 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
435 within a 16-byte block, we have to rotate to extract the requested element.
436 */
437static SDOperand
438LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
439 LoadSDNode *LN = cast<LoadSDNode>(Op);
440 SDOperand basep = LN->getBasePtr();
441 SDOperand the_chain = LN->getChain();
442 MVT::ValueType VT = LN->getLoadedVT();
443 MVT::ValueType OpVT = Op.Val->getValueType(0);
444 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
445 ISD::LoadExtType ExtType = LN->getExtensionType();
446 unsigned alignment = LN->getAlignment();
447 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
448 SDOperand Ops[8];
449
450 // For an extending load of an i1 variable, just call it i8 (or whatever we
451 // were passed) and make it zero-extended:
452 if (VT == MVT::i1) {
453 VT = OpVT;
454 ExtType = ISD::ZEXTLOAD;
455 }
456
457 switch (LN->getAddressingMode()) {
458 case ISD::UNINDEXED: {
459 SDOperand result;
460 SDOperand rot_op, rotamt;
461 SDOperand ptrp;
462 int c_offset;
463 int c_rotamt;
464
465 // The vector type we really want to be when we load the 16-byte chunk
466 MVT::ValueType vecVT, opVecVT;
467
468 if (VT != MVT::i1)
469 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
470 else
471 vecVT = MVT::v16i8;
472
473 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
474
475 if (basep.getOpcode() == ISD::ADD) {
476 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
477
478 assert(CN != NULL
479 && "LowerLOAD: ISD::ADD operand 1 is not constant");
480
481 c_offset = (int) CN->getValue();
482 c_rotamt = (int) (c_offset & 0xf);
483
484 // Adjust the rotation amount to ensure that the final result ends up in
485 // the preferred slot:
486 c_rotamt -= vtm->prefslot_byte;
487 ptrp = basep.getOperand(0);
488 } else {
489 c_offset = 0;
490 c_rotamt = -vtm->prefslot_byte;
491 ptrp = basep;
492 }
493
494 if (alignment == 16) {
495 // 16-byte aligned load into preferred slot, no rotation
496 if (c_rotamt == 0) {
497 if (isMemoryOperand(ptrp))
498 // Return unchanged
499 return SDOperand();
500 else {
501 // Return modified D-Form address for pointer:
502 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
503 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
504 if (VT == OpVT)
505 return DAG.getLoad(VT, LN->getChain(), ptrp,
506 LN->getSrcValue(), LN->getSrcValueOffset(),
507 LN->isVolatile(), 16);
508 else
509 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
510 LN->getSrcValueOffset(), OpVT,
511 LN->isVolatile(), 16);
512 }
513 } else {
514 // Need to rotate...
515 if (c_rotamt < 0)
516 c_rotamt += 16;
517 // Realign the base pointer, with a D-Form address
518 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
519 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
520 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
521 else
522 basep = ptrp;
523
524 // Rotate the load:
525 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
526 LN->getSrcValue(), LN->getSrcValueOffset(),
527 LN->isVolatile(), 16);
528 the_chain = rot_op.getValue(1);
529 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
530
531 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
532 Ops[0] = the_chain;
533 Ops[1] = rot_op;
534 Ops[2] = rotamt;
535
536 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
537 the_chain = result.getValue(1);
538
539 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
540 SDVTList scalarvts;
541 Ops[0] = the_chain;
542 Ops[1] = result;
543 if (OpVT == VT) {
544 scalarvts = DAG.getVTList(VT, MVT::Other);
545 } else {
546 scalarvts = DAG.getVTList(OpVT, MVT::Other);
547 }
548
549 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
550 result);
551 Ops[0] = the_chain;
552 Ops[1] = result;
553 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
554 the_chain = result.getValue(1);
555 } else {
556 // Handle the sign and zero-extending loads for i1 and i8:
557 unsigned NewOpC;
558
559 if (ExtType == ISD::SEXTLOAD) {
560 NewOpC = (OpVT == MVT::i1
561 ? SPUISD::EXTRACT_I1_SEXT
562 : SPUISD::EXTRACT_I8_SEXT);
563 } else if (ExtType == ISD::ZEXTLOAD) {
564 NewOpC = (OpVT == MVT::i1
565 ? SPUISD::EXTRACT_I1_ZEXT
566 : SPUISD::EXTRACT_I8_ZEXT);
567 }
568
569 result = DAG.getNode(NewOpC, OpVT, result);
570 }
571
572 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
573 SDOperand retops[2] = { result, the_chain };
574
575 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
576 return result;
577 /*UNREACHED*/
578 }
579 } else {
580 // Misaligned 16-byte load:
581 if (basep.getOpcode() == ISD::LOAD) {
582 LN = cast<LoadSDNode>(basep);
583 if (LN->getAlignment() == 16) {
584 // We can verify that we're really loading from a 16-byte aligned
585 // chunk. Encapsulate basep as a D-Form address and return a new
586 // load:
587 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
588 DAG.getConstant(0, PtrVT));
589 if (OpVT == VT)
590 return DAG.getLoad(VT, LN->getChain(), basep,
591 LN->getSrcValue(), LN->getSrcValueOffset(),
592 LN->isVolatile(), 16);
593 else
594 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
595 LN->getSrcValue(), LN->getSrcValueOffset(),
596 OpVT, LN->isVolatile(), 16);
597 }
598 }
599
600 // Catch all other cases where we can't guarantee that we have a
601 // 16-byte aligned entity, which means resorting to an X-form
602 // address scheme:
603
604 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
605 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
606 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
607
608 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
609
610 SDOperand alignLoad =
611 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
612 LN->getSrcValue(), LN->getSrcValueOffset(),
613 LN->isVolatile(), 16);
614
615 SDOperand insertEltOp =
616 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
617
618 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
619 alignLoad,
620 alignLoad,
621 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
622
623 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
624
625 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
626 SDOperand retops[2] = { result, the_chain };
627
628 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
629 return result;
630 }
631 break;
632 }
633 case ISD::PRE_INC:
634 case ISD::PRE_DEC:
635 case ISD::POST_INC:
636 case ISD::POST_DEC:
637 case ISD::LAST_INDEXED_MODE:
638 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
639 "UNINDEXED\n";
640 cerr << (unsigned) LN->getAddressingMode() << "\n";
641 abort();
642 /*NOTREACHED*/
643 }
644
645 return SDOperand();
646}
647
648/// Custom lower stores for CellSPU
649/*!
650 All CellSPU stores are aligned to 16-byte boundaries, so for elements
651 within a 16-byte block, we have to generate a shuffle to insert the
652 requested element into its place, then store the resulting block.
653 */
654static SDOperand
655LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
656 StoreSDNode *SN = cast<StoreSDNode>(Op);
657 SDOperand Value = SN->getValue();
658 MVT::ValueType VT = Value.getValueType();
659 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
660 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
661 SDOperand the_chain = SN->getChain();
Chris Lattner4d321c52007-12-05 18:32:18 +0000662 //unsigned alignment = SN->getAlignment();
663 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000664
665 switch (SN->getAddressingMode()) {
666 case ISD::UNINDEXED: {
667 SDOperand basep = SN->getBasePtr();
668 SDOperand ptrOp;
669 int offset;
670
671 if (basep.getOpcode() == ISD::ADD) {
672 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
673 assert(CN != NULL
674 && "LowerSTORE: ISD::ADD operand 1 is not constant");
675 offset = unsigned(CN->getValue());
676 ptrOp = basep.getOperand(0);
677 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
678 << offset
679 << "\n");
680 } else {
681 ptrOp = basep;
682 offset = 0;
683 }
684
685 // The vector type we really want to load from the 16-byte chunk, except
686 // in the case of MVT::i1, which has to be v16i8.
687 unsigned vecVT, stVecVT;
688
689 if (StVT != MVT::i1)
690 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
691 else
692 stVecVT = MVT::v16i8;
693 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
694
695 // Realign the pointer as a D-Form address (ptrOp is the pointer,
696 // to force a register load with the address; basep is the actual
697 // dform addr offs($reg).
698 ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
699 DAG.getConstant(0, PtrVT));
700 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
701 ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
702
703 // Create the 16-byte aligned vector load
704 SDOperand alignLoad =
705 DAG.getLoad(vecVT, the_chain, basep,
706 SN->getSrcValue(), SN->getSrcValueOffset(),
707 SN->isVolatile(), 16);
708 the_chain = alignLoad.getValue(1);
709
710 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
711 SDOperand theValue = SN->getValue();
712 SDOperand result;
713
714 if (StVT != VT
715 && (theValue.getOpcode() == ISD::AssertZext
716 || theValue.getOpcode() == ISD::AssertSext)) {
717 // Drill down and get the value for zero- and sign-extended
718 // quantities
719 theValue = theValue.getOperand(0);
720 }
721
722 SDOperand insertEltOp =
723 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
724 DAG.getNode(SPUISD::DFormAddr, PtrVT,
725 ptrOp,
726 DAG.getConstant((offset & 0xf), PtrVT)));
727
728 result = DAG.getNode(SPUISD::SHUFB, vecVT,
729 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
730 alignLoad,
731 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
732
733 result = DAG.getStore(the_chain, result, basep,
734 LN->getSrcValue(), LN->getSrcValueOffset(),
735 LN->isVolatile(), LN->getAlignment());
736
737 return result;
738 /*UNREACHED*/
739 }
740 case ISD::PRE_INC:
741 case ISD::PRE_DEC:
742 case ISD::POST_INC:
743 case ISD::POST_DEC:
744 case ISD::LAST_INDEXED_MODE:
745 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
746 "UNINDEXED\n";
747 cerr << (unsigned) SN->getAddressingMode() << "\n";
748 abort();
749 /*NOTREACHED*/
750 }
751
752 return SDOperand();
753}
754
755/// Generate the address of a constant pool entry.
756static SDOperand
757LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
758 MVT::ValueType PtrVT = Op.getValueType();
759 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
760 Constant *C = CP->getConstVal();
761 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
762 const TargetMachine &TM = DAG.getTarget();
763 SDOperand Zero = DAG.getConstant(0, PtrVT);
764
765 if (TM.getRelocationModel() == Reloc::Static) {
766 if (!ST->usingLargeMem()) {
767 // Just return the SDOperand with the constant pool address in it.
768 return CPI;
769 } else {
770 // Generate hi/lo address pair
771 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
772 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
773
774 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
775 }
776 }
777
778 assert(0 &&
779 "LowerConstantPool: Relocation model other than static not supported.");
780 return SDOperand();
781}
782
783static SDOperand
784LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
785 MVT::ValueType PtrVT = Op.getValueType();
786 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
787 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
788 SDOperand Zero = DAG.getConstant(0, PtrVT);
789 const TargetMachine &TM = DAG.getTarget();
790
791 if (TM.getRelocationModel() == Reloc::Static) {
792 if (!ST->usingLargeMem()) {
793 // Just return the SDOperand with the jump table address in it.
794 return JTI;
795 } else {
796 // Generate hi/lo address pair
797 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
798 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
799
800 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
801 }
802 }
803
804 assert(0 &&
805 "LowerJumpTable: Relocation model other than static not supported.");
806 return SDOperand();
807}
808
809static SDOperand
810LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
811 MVT::ValueType PtrVT = Op.getValueType();
812 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
813 GlobalValue *GV = GSDN->getGlobal();
814 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
815 SDOperand Zero = DAG.getConstant(0, PtrVT);
816 const TargetMachine &TM = DAG.getTarget();
817
818 if (TM.getRelocationModel() == Reloc::Static) {
819 if (!ST->usingLargeMem()) {
820 // Generate a local store address
821 return GA;
822 } else {
823 // Generate hi/lo address pair
824 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
825 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
826
827 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
828 }
829 } else {
830 cerr << "LowerGlobalAddress: Relocation model other than static not "
831 << "supported.\n";
832 abort();
833 /*NOTREACHED*/
834 }
835
836 return SDOperand();
837}
838
839//! Custom lower i64 integer constants
840/*!
841 This code inserts all of the necessary juggling that needs to occur to load
842 a 64-bit constant into a register.
843 */
844static SDOperand
845LowerConstant(SDOperand Op, SelectionDAG &DAG) {
846 unsigned VT = Op.getValueType();
847 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
848
849 if (VT == MVT::i64) {
850 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
851 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
852 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
853
854 } else {
855 cerr << "LowerConstant: unhandled constant type "
856 << MVT::getValueTypeString(VT)
857 << "\n";
858 abort();
859 /*NOTREACHED*/
860 }
861
862 return SDOperand();
863}
864
865//! Custom lower single precision floating point constants
866/*!
867 "float" immediates can be lowered as if they were unsigned 32-bit integers.
868 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
869 target description.
870 */
871static SDOperand
872LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
873 unsigned VT = Op.getValueType();
874 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
875
876 assert((FP != 0) &&
877 "LowerConstantFP: Node is not ConstantFPSDNode");
878
879 const APFloat &apf = FP->getValueAPF();
880
881 if (VT == MVT::f32) {
882 return DAG.getNode(SPUISD::SFPConstant, VT,
883 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
884 } else if (VT == MVT::f64) {
885 uint64_t dbits = DoubleToBits(apf.convertToDouble());
886 return DAG.getNode(ISD::BIT_CONVERT, VT,
887 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
888 }
889
890 return SDOperand();
891}
892
893static SDOperand
894LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
895{
896 MachineFunction &MF = DAG.getMachineFunction();
897 MachineFrameInfo *MFI = MF.getFrameInfo();
898 SSARegMap *RegMap = MF.getSSARegMap();
899 SmallVector<SDOperand, 8> ArgValues;
900 SDOperand Root = Op.getOperand(0);
901 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
902
903 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
904 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
905
906 unsigned ArgOffset = SPUFrameInfo::minStackSize();
907 unsigned ArgRegIdx = 0;
908 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
909
910 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
911
912 // Add DAG nodes to load the arguments or copy them out of registers.
913 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
914 SDOperand ArgVal;
915 bool needsLoad = false;
916 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
917 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
918
919 switch (ObjectVT) {
920 default: {
921 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
922 << MVT::getValueTypeString(ObjectVT)
923 << "\n";
924 abort();
925 }
926 case MVT::i8:
927 if (!isVarArg && ArgRegIdx < NumArgRegs) {
928 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
929 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
930 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
931 ++ArgRegIdx;
932 } else {
933 needsLoad = true;
934 }
935 break;
936 case MVT::i16:
937 if (!isVarArg && ArgRegIdx < NumArgRegs) {
938 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
939 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
940 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
941 ++ArgRegIdx;
942 } else {
943 needsLoad = true;
944 }
945 break;
946 case MVT::i32:
947 if (!isVarArg && ArgRegIdx < NumArgRegs) {
948 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
949 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
950 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
951 ++ArgRegIdx;
952 } else {
953 needsLoad = true;
954 }
955 break;
956 case MVT::i64:
957 if (!isVarArg && ArgRegIdx < NumArgRegs) {
958 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
959 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
960 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
961 ++ArgRegIdx;
962 } else {
963 needsLoad = true;
964 }
965 break;
966 case MVT::f32:
967 if (!isVarArg && ArgRegIdx < NumArgRegs) {
968 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
969 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
970 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
971 ++ArgRegIdx;
972 } else {
973 needsLoad = true;
974 }
975 break;
976 case MVT::f64:
977 if (!isVarArg && ArgRegIdx < NumArgRegs) {
978 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
979 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
980 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
981 ++ArgRegIdx;
982 } else {
983 needsLoad = true;
984 }
985 break;
986 case MVT::v2f64:
987 case MVT::v4f32:
988 case MVT::v4i32:
989 case MVT::v8i16:
990 case MVT::v16i8:
991 if (!isVarArg && ArgRegIdx < NumArgRegs) {
992 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
993 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
994 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
995 ++ArgRegIdx;
996 } else {
997 needsLoad = true;
998 }
999 break;
1000 }
1001
1002 // We need to load the argument to a virtual register if we determined above
1003 // that we ran out of physical registers of the appropriate type
1004 if (needsLoad) {
1005 // If the argument is actually used, emit a load from the right stack
1006 // slot.
1007 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1008 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1009 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1010 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1011 } else {
1012 // Don't emit a dead load.
1013 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1014 }
1015
1016 ArgOffset += StackSlotSize;
1017 }
1018
1019 ArgValues.push_back(ArgVal);
1020 }
1021
1022 // If the function takes variable number of arguments, make a frame index for
1023 // the start of the first vararg value... for expansion of llvm.va_start.
1024 if (isVarArg) {
1025 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1026 ArgOffset);
1027 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1028 // If this function is vararg, store any remaining integer argument regs to
1029 // their spots on the stack so that they may be loaded by deferencing the
1030 // result of va_next.
1031 SmallVector<SDOperand, 8> MemOps;
1032 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1033 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1034 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1035 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1036 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1037 MemOps.push_back(Store);
1038 // Increment the address by four for the next argument to store
1039 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1040 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1041 }
1042 if (!MemOps.empty())
1043 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1044 }
1045
1046 ArgValues.push_back(Root);
1047
1048 // Return the new list of results.
1049 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1050 Op.Val->value_end());
1051 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1052}
1053
1054/// isLSAAddress - Return the immediate to use if the specified
1055/// value is representable as a LSA address.
1056static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1057 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1058 if (!C) return 0;
1059
1060 int Addr = C->getValue();
1061 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1062 (Addr << 14 >> 14) != Addr)
1063 return 0; // Top 14 bits have to be sext of immediate.
1064
1065 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1066}
1067
1068static
1069SDOperand
1070LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1071 SDOperand Chain = Op.getOperand(0);
1072#if 0
1073 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1074 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1075#endif
1076 SDOperand Callee = Op.getOperand(4);
1077 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1078 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1079 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1080 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1081
1082 // Handy pointer type
1083 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1084
1085 // Accumulate how many bytes are to be pushed on the stack, including the
1086 // linkage area, and parameter passing area. According to the SPU ABI,
1087 // we minimally need space for [LR] and [SP]
1088 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1089
1090 // Set up a copy of the stack pointer for use loading and storing any
1091 // arguments that may not fit in the registers available for argument
1092 // passing.
1093 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1094
1095 // Figure out which arguments are going to go in registers, and which in
1096 // memory.
1097 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1098 unsigned ArgRegIdx = 0;
1099
1100 // Keep track of registers passing arguments
1101 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1102 // And the arguments passed on the stack
1103 SmallVector<SDOperand, 8> MemOpChains;
1104
1105 for (unsigned i = 0; i != NumOps; ++i) {
1106 SDOperand Arg = Op.getOperand(5+2*i);
1107
1108 // PtrOff will be used to store the current argument to the stack if a
1109 // register cannot be found for it.
1110 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1111 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1112
1113 switch (Arg.getValueType()) {
1114 default: assert(0 && "Unexpected ValueType for argument!");
1115 case MVT::i32:
1116 case MVT::i64:
1117 case MVT::i128:
1118 if (ArgRegIdx != NumArgRegs) {
1119 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1120 } else {
1121 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1122 ArgOffset += StackSlotSize;
1123 }
1124 break;
1125 case MVT::f32:
1126 case MVT::f64:
1127 if (ArgRegIdx != NumArgRegs) {
1128 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1129 } else {
1130 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1131 ArgOffset += StackSlotSize;
1132 }
1133 break;
1134 case MVT::v4f32:
1135 case MVT::v4i32:
1136 case MVT::v8i16:
1137 case MVT::v16i8:
1138 if (ArgRegIdx != NumArgRegs) {
1139 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1140 } else {
1141 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1142 ArgOffset += StackSlotSize;
1143 }
1144 break;
1145 }
1146 }
1147
1148 // Update number of stack bytes actually used, insert a call sequence start
1149 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1150 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1151
1152 if (!MemOpChains.empty()) {
1153 // Adjust the stack pointer for the stack arguments.
1154 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1155 &MemOpChains[0], MemOpChains.size());
1156 }
1157
1158 // Build a sequence of copy-to-reg nodes chained together with token chain
1159 // and flag operands which copy the outgoing args into the appropriate regs.
1160 SDOperand InFlag;
1161 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1162 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1163 InFlag);
1164 InFlag = Chain.getValue(1);
1165 }
1166
1167 std::vector<MVT::ValueType> NodeTys;
1168 NodeTys.push_back(MVT::Other); // Returns a chain
1169 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1170
1171 SmallVector<SDOperand, 8> Ops;
1172 unsigned CallOpc = SPUISD::CALL;
1173
1174 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1175 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1176 // node so that legalize doesn't hack it.
1177 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1178 GlobalValue *GV = G->getGlobal();
1179 unsigned CalleeVT = Callee.getValueType();
1180
1181 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1182 // style calls, otherwise, external symbols are BRASL calls.
1183 // NOTE:
1184 // This may be an unsafe assumption for JIT and really large compilation
1185 // units.
1186 if (GV->isDeclaration()) {
1187 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1188 } else {
1189 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1190 DAG.getTargetGlobalAddress(GV, CalleeVT),
1191 DAG.getConstant(0, PtrVT));
1192 }
1193 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1194 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1195 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1196 // If this is an absolute destination address that appears to be a legal
1197 // local store address, use the munged value.
1198 Callee = SDOperand(Dest, 0);
1199
1200 Ops.push_back(Chain);
1201 Ops.push_back(Callee);
1202
1203 // Add argument registers to the end of the list so that they are known live
1204 // into the call.
1205 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1206 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1207 RegsToPass[i].second.getValueType()));
1208
1209 if (InFlag.Val)
1210 Ops.push_back(InFlag);
1211 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1212 InFlag = Chain.getValue(1);
1213
1214 SDOperand ResultVals[3];
1215 unsigned NumResults = 0;
1216 NodeTys.clear();
1217
1218 // If the call has results, copy the values out of the ret val registers.
1219 switch (Op.Val->getValueType(0)) {
1220 default: assert(0 && "Unexpected ret value!");
1221 case MVT::Other: break;
1222 case MVT::i32:
1223 if (Op.Val->getValueType(1) == MVT::i32) {
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1227 Chain.getValue(2)).getValue(1);
1228 ResultVals[1] = Chain.getValue(0);
1229 NumResults = 2;
1230 NodeTys.push_back(MVT::i32);
1231 } else {
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1234 NumResults = 1;
1235 }
1236 NodeTys.push_back(MVT::i32);
1237 break;
1238 case MVT::i64:
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1241 NumResults = 1;
1242 NodeTys.push_back(MVT::i64);
1243 break;
1244 case MVT::f32:
1245 case MVT::f64:
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1247 InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1249 NumResults = 1;
1250 NodeTys.push_back(Op.Val->getValueType(0));
1251 break;
1252 case MVT::v2f64:
1253 case MVT::v4f32:
1254 case MVT::v4i32:
1255 case MVT::v8i16:
1256 case MVT::v16i8:
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1260 NumResults = 1;
1261 NodeTys.push_back(Op.Val->getValueType(0));
1262 break;
1263 }
1264
1265 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1266 DAG.getConstant(NumStackBytes, PtrVT));
1267 NodeTys.push_back(MVT::Other);
1268
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1271 return Chain;
1272
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1276 ResultVals, NumResults);
1277 return Res.getValue(Op.ResNo);
1278}
1279
1280static SDOperand
1281LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1282 SmallVector<CCValAssign, 16> RVLocs;
1283 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1287
1288 // If this is the first return lowered for this function, add the regs to the
1289 // liveout set for the function.
1290 if (DAG.getMachineFunction().liveout_empty()) {
1291 for (unsigned i = 0; i != RVLocs.size(); ++i)
1292 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1293 }
1294
1295 SDOperand Chain = Op.getOperand(0);
1296 SDOperand Flag;
1297
1298 // Copy the result values into the output registers.
1299 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300 CCValAssign &VA = RVLocs[i];
1301 assert(VA.isRegLoc() && "Can only return in registers!");
1302 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303 Flag = Chain.getValue(1);
1304 }
1305
1306 if (Flag.Val)
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308 else
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1310}
1311
1312
1313//===----------------------------------------------------------------------===//
1314// Vector related lowering:
1315//===----------------------------------------------------------------------===//
1316
1317static ConstantSDNode *
1318getVecImm(SDNode *N) {
1319 SDOperand OpVal(0, 0);
1320
1321 // Check to see if this buildvec has a single non-undef value in its elements.
1322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324 if (OpVal.Val == 0)
1325 OpVal = N->getOperand(i);
1326 else if (OpVal != N->getOperand(i))
1327 return 0;
1328 }
1329
1330 if (OpVal.Val != 0) {
1331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1332 return CN;
1333 }
1334 }
1335
1336 return 0; // All UNDEF: use implicit def.; not Constant node
1337}
1338
1339/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340/// and the value fits into an unsigned 18-bit constant, and if so, return the
1341/// constant
1342SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 MVT::ValueType ValueType) {
1344 if (ConstantSDNode *CN = getVecImm(N)) {
1345 uint64_t Value = CN->getValue();
1346 if (Value <= 0x3ffff)
1347 return DAG.getConstant(Value, ValueType);
1348 }
1349
1350 return SDOperand();
1351}
1352
1353/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1354/// and the value fits into a signed 16-bit constant, and if so, return the
1355/// constant
1356SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1357 MVT::ValueType ValueType) {
1358 if (ConstantSDNode *CN = getVecImm(N)) {
1359 if (ValueType == MVT::i32) {
1360 int Value = (int) CN->getValue();
1361 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1362
1363 if (Value == SExtValue)
1364 return DAG.getConstant(Value, ValueType);
1365 } else if (ValueType == MVT::i16) {
1366 short Value = (short) CN->getValue();
1367 int SExtValue = ((int) Value << 16) >> 16;
1368
1369 if (Value == (short) SExtValue)
1370 return DAG.getConstant(Value, ValueType);
1371 } else if (ValueType == MVT::i64) {
1372 int64_t Value = CN->getValue();
1373 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1374
1375 if (Value == SExtValue)
1376 return DAG.getConstant(Value, ValueType);
1377 }
1378 }
1379
1380 return SDOperand();
1381}
1382
1383/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384/// and the value fits into a signed 10-bit constant, and if so, return the
1385/// constant
1386SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1387 MVT::ValueType ValueType) {
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int Value = (int) CN->getValue();
1390 if ((ValueType == MVT::i32 && isS10Constant(Value))
1391 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1392 return DAG.getConstant(Value, ValueType);
1393 }
1394
1395 return SDOperand();
1396}
1397
1398/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1399/// and the value fits into a signed 8-bit constant, and if so, return the
1400/// constant.
1401///
1402/// @note: The incoming vector is v16i8 because that's the only way we can load
1403/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1404/// same value.
1405SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1406 MVT::ValueType ValueType) {
1407 if (ConstantSDNode *CN = getVecImm(N)) {
1408 int Value = (int) CN->getValue();
1409 if (ValueType == MVT::i16
1410 && Value <= 0xffff /* truncated from uint64_t */
1411 && ((short) Value >> 8) == ((short) Value & 0xff))
1412 return DAG.getConstant(Value & 0xff, ValueType);
1413 else if (ValueType == MVT::i8
1414 && (Value & 0xff) == Value)
1415 return DAG.getConstant(Value, ValueType);
1416 }
1417
1418 return SDOperand();
1419}
1420
1421/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1422/// and the value fits into a signed 16-bit constant, and if so, return the
1423/// constant
1424SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1425 MVT::ValueType ValueType) {
1426 if (ConstantSDNode *CN = getVecImm(N)) {
1427 uint64_t Value = CN->getValue();
1428 if ((ValueType == MVT::i32
1429 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1430 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1431 return DAG.getConstant(Value >> 16, ValueType);
1432 }
1433
1434 return SDOperand();
1435}
1436
1437/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1438SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1441 }
1442
1443 return SDOperand();
1444}
1445
1446/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1447SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1448 if (ConstantSDNode *CN = getVecImm(N)) {
1449 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1450 }
1451
1452 return SDOperand();
1453}
1454
1455// If this is a vector of constants or undefs, get the bits. A bit in
1456// UndefBits is set if the corresponding element of the vector is an
1457// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1458// zero. Return true if this is not an array of constants, false if it is.
1459//
1460static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1461 uint64_t UndefBits[2]) {
1462 // Start with zero'd results.
1463 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1464
1465 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1466 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1467 SDOperand OpVal = BV->getOperand(i);
1468
1469 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1470 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1471
1472 uint64_t EltBits = 0;
1473 if (OpVal.getOpcode() == ISD::UNDEF) {
1474 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1475 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1476 continue;
1477 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1478 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1479 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1480 const APFloat &apf = CN->getValueAPF();
1481 EltBits = (CN->getValueType(0) == MVT::f32
1482 ? FloatToBits(apf.convertToFloat())
1483 : DoubleToBits(apf.convertToDouble()));
1484 } else {
1485 // Nonconstant element.
1486 return true;
1487 }
1488
1489 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1490 }
1491
1492 //printf("%llx %llx %llx %llx\n",
1493 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1494 return false;
1495}
1496
1497/// If this is a splat (repetition) of a value across the whole vector, return
1498/// the smallest size that splats it. For example, "0x01010101010101..." is a
1499/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1500/// SplatSize = 1 byte.
1501static bool isConstantSplat(const uint64_t Bits128[2],
1502 const uint64_t Undef128[2],
1503 int MinSplatBits,
1504 uint64_t &SplatBits, uint64_t &SplatUndef,
1505 int &SplatSize) {
1506 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1507 // the same as the lower 64-bits, ignoring undefs.
1508 uint64_t Bits64 = Bits128[0] | Bits128[1];
1509 uint64_t Undef64 = Undef128[0] & Undef128[1];
1510 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1511 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1512 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1513 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1514
1515 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1516 if (MinSplatBits < 64) {
1517
1518 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1519 // undefs.
1520 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1521 if (MinSplatBits < 32) {
1522
1523 // If the top 16-bits are different than the lower 16-bits, ignoring
1524 // undefs, we have an i32 splat.
1525 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1526 if (MinSplatBits < 16) {
1527 // If the top 8-bits are different than the lower 8-bits, ignoring
1528 // undefs, we have an i16 splat.
1529 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1530 // Otherwise, we have an 8-bit splat.
1531 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1532 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1533 SplatSize = 1;
1534 return true;
1535 }
1536 } else {
1537 SplatBits = Bits16;
1538 SplatUndef = Undef16;
1539 SplatSize = 2;
1540 return true;
1541 }
1542 }
1543 } else {
1544 SplatBits = Bits32;
1545 SplatUndef = Undef32;
1546 SplatSize = 4;
1547 return true;
1548 }
1549 }
1550 } else {
1551 SplatBits = Bits128[0];
1552 SplatUndef = Undef128[0];
1553 SplatSize = 8;
1554 return true;
1555 }
1556 }
1557
1558 return false; // Can't be a splat if two pieces don't match.
1559}
1560
1561// If this is a case we can't handle, return null and let the default
1562// expansion code take care of it. If we CAN select this case, and if it
1563// selects to a single instruction, return Op. Otherwise, if we can codegen
1564// this case more efficiently than a constant pool load, lower it to the
1565// sequence of ops that should be used.
1566static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1567 MVT::ValueType VT = Op.getValueType();
1568 // If this is a vector of constants or undefs, get the bits. A bit in
1569 // UndefBits is set if the corresponding element of the vector is an
1570 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1571 // zero.
1572 uint64_t VectorBits[2];
1573 uint64_t UndefBits[2];
1574 uint64_t SplatBits, SplatUndef;
1575 int SplatSize;
1576 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1577 || !isConstantSplat(VectorBits, UndefBits,
1578 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1579 SplatBits, SplatUndef, SplatSize))
1580 return SDOperand(); // Not a constant vector, not a splat.
1581
1582 switch (VT) {
1583 default:
1584 case MVT::v4f32: {
1585 uint32_t Value32 = SplatBits;
1586 assert(SplatSize == 4
1587 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1590 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1591 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1592 break;
1593 }
1594 case MVT::v2f64: {
1595 uint64_t f64val = SplatBits;
1596 assert(SplatSize == 8
1597 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1602 break;
1603 }
1604 case MVT::v16i8: {
1605 // 8-bit constants have to be expanded to 16-bits
1606 unsigned short Value16 = SplatBits | (SplatBits << 8);
1607 SDOperand Ops[8];
1608 for (int i = 0; i < 8; ++i)
1609 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1610 return DAG.getNode(ISD::BIT_CONVERT, VT,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1612 }
1613 case MVT::v8i16: {
1614 unsigned short Value16;
1615 if (SplatSize == 2)
1616 Value16 = (unsigned short) (SplatBits & 0xffff);
1617 else
1618 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1619 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1620 SDOperand Ops[8];
1621 for (int i = 0; i < 8; ++i) Ops[i] = T;
1622 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1623 }
1624 case MVT::v4i32: {
1625 unsigned int Value = SplatBits;
1626 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1627 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1628 }
1629 case MVT::v2i64: {
1630 uint64_t val = SplatBits;
1631 uint32_t upper = uint32_t(val >> 32);
1632 uint32_t lower = uint32_t(val);
1633
1634 if (val != 0) {
1635 SDOperand LO32;
1636 SDOperand HI32;
1637 SmallVector<SDOperand, 16> ShufBytes;
1638 SDOperand Result;
1639 bool upper_special, lower_special;
1640
1641 // NOTE: This code creates common-case shuffle masks that can be easily
1642 // detected as common expressions. It is not attempting to create highly
1643 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1644
1645 // Detect if the upper or lower half is a special shuffle mask pattern:
1646 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1647 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1648
1649 // Create lower vector if not a special pattern
1650 if (!lower_special) {
1651 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1652 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 LO32C, LO32C, LO32C, LO32C));
1655 }
1656
1657 // Create upper vector if not a special pattern
1658 if (!upper_special) {
1659 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1660 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 HI32C, HI32C, HI32C, HI32C));
1663 }
1664
1665 // If either upper or lower are special, then the two input operands are
1666 // the same (basically, one of them is a "don't care")
1667 if (lower_special)
1668 LO32 = HI32;
1669 if (upper_special)
1670 HI32 = LO32;
1671 if (lower_special && upper_special) {
1672 // Unhappy situation... both upper and lower are special, so punt with
1673 // a target constant:
1674 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1675 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1676 Zero, Zero);
1677 }
1678
1679 for (int i = 0; i < 4; ++i) {
1680 for (int j = 0; j < 4; ++j) {
1681 SDOperand V;
1682 bool process_upper, process_lower;
1683 uint64_t val;
1684
1685 process_upper = (upper_special && (i & 1) == 0);
1686 process_lower = (lower_special && (i & 1) == 1);
1687
1688 if (process_upper || process_lower) {
1689 if ((process_upper && upper == 0)
1690 || (process_lower && lower == 0))
1691 val = 0x80;
1692 else if ((process_upper && upper == 0xffffffff)
1693 || (process_lower && lower == 0xffffffff))
1694 val = 0xc0;
1695 else if ((process_upper && upper == 0x80000000)
1696 || (process_lower && lower == 0x80000000))
1697 val = (j == 0 ? 0xe0 : 0x80);
1698 } else
1699 val = i * 4 + j + ((i & 1) * 16);
1700
1701 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1702 }
1703 }
1704
1705 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1706 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1707 &ShufBytes[0], ShufBytes.size()));
1708 } else {
1709 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1710 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1711 return DAG.getNode(ISD::BIT_CONVERT, VT,
1712 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713 Zero, Zero, Zero, Zero));
1714 }
1715 }
1716 }
1717
1718 return SDOperand();
1719}
1720
1721/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1722/// which the Cell can operate. The code inspects V3 to ascertain whether the
1723/// permutation vector, V3, is monotonically increasing with one "exception"
1724/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1725/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1726/// In either case, the net result is going to eventually invoke SHUFB to
1727/// permute/shuffle the bytes from V1 and V2.
1728/// \note
1729/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1730/// control word for byte/halfword/word insertion. This takes care of a single
1731/// element move from V2 into V1.
1732/// \note
1733/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1734static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1735 SDOperand V1 = Op.getOperand(0);
1736 SDOperand V2 = Op.getOperand(1);
1737 SDOperand PermMask = Op.getOperand(2);
1738
1739 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1740
1741 // If we have a single element being moved from V1 to V2, this can be handled
1742 // using the C*[DX] compute mask instructions, but the vector elements have
1743 // to be monotonically increasing with one exception element.
1744 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1745 unsigned EltsFromV2 = 0;
1746 unsigned V2Elt = 0;
1747 unsigned V2EltIdx0 = 0;
1748 unsigned CurrElt = 0;
1749 bool monotonic = true;
1750 if (EltVT == MVT::i8)
1751 V2EltIdx0 = 16;
1752 else if (EltVT == MVT::i16)
1753 V2EltIdx0 = 8;
1754 else if (EltVT == MVT::i32)
1755 V2EltIdx0 = 4;
1756 else
1757 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1758
1759 for (unsigned i = 0, e = PermMask.getNumOperands();
1760 EltsFromV2 <= 1 && monotonic && i != e;
1761 ++i) {
1762 unsigned SrcElt;
1763 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1764 SrcElt = 0;
1765 else
1766 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1767
1768 if (SrcElt >= V2EltIdx0) {
1769 ++EltsFromV2;
1770 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1771 } else if (CurrElt != SrcElt) {
1772 monotonic = false;
1773 }
1774
1775 ++CurrElt;
1776 }
1777
1778 if (EltsFromV2 == 1 && monotonic) {
1779 // Compute mask and shuffle
1780 MachineFunction &MF = DAG.getMachineFunction();
1781 SSARegMap *RegMap = MF.getSSARegMap();
1782 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1783 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1784 // Initialize temporary register to 0
1785 SDOperand InitTempReg =
1786 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1787 // Copy register's contents as index in INSERT_MASK:
1788 SDOperand ShufMaskOp =
1789 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1790 DAG.getTargetConstant(V2Elt, MVT::i32),
1791 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1792 // Use shuffle mask in SHUFB synthetic instruction:
1793 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1794 } else {
1795 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1796 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1797
1798 SmallVector<SDOperand, 16> ResultMask;
1799 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1800 unsigned SrcElt;
1801 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1802 SrcElt = 0;
1803 else
1804 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1805
1806 for (unsigned j = 0; j != BytesPerElement; ++j) {
1807 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1808 MVT::i8));
1809 }
1810 }
1811
1812 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1813 &ResultMask[0], ResultMask.size());
1814 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1815 }
1816}
1817
1818static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1819 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1820
1821 if (Op0.Val->getOpcode() == ISD::Constant) {
1822 // For a constant, build the appropriate constant vector, which will
1823 // eventually simplify to a vector register load.
1824
1825 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1826 SmallVector<SDOperand, 16> ConstVecValues;
1827 MVT::ValueType VT;
1828 size_t n_copies;
1829
1830 // Create a constant vector:
1831 switch (Op.getValueType()) {
1832 default: assert(0 && "Unexpected constant value type in "
1833 "LowerSCALAR_TO_VECTOR");
1834 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1835 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1836 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1837 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1838 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1839 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1840 }
1841
1842 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1843 for (size_t j = 0; j < n_copies; ++j)
1844 ConstVecValues.push_back(CValue);
1845
1846 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1847 &ConstVecValues[0], ConstVecValues.size());
1848 } else {
1849 // Otherwise, copy the value from one register to another:
1850 switch (Op0.getValueType()) {
1851 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1852 case MVT::i8:
1853 case MVT::i16:
1854 case MVT::i32:
1855 case MVT::i64:
1856 case MVT::f32:
1857 case MVT::f64:
1858 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1859 }
1860 }
1861
1862 return SDOperand();
1863}
1864
1865static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1866 switch (Op.getValueType()) {
1867 case MVT::v4i32: {
1868 SDOperand rA = Op.getOperand(0);
1869 SDOperand rB = Op.getOperand(1);
1870 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1871 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1872 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1873 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1874
1875 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1876 break;
1877 }
1878
1879 // Multiply two v8i16 vectors (pipeline friendly version):
1880 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1881 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1882 // c) Use SELB to select upper and lower halves from the intermediate results
1883 //
1884 // NOTE: We really want to move the FSMBI to earlier to actually get the
1885 // dual-issue. This code does manage to do this, even if it's a little on
1886 // the wacky side
1887 case MVT::v8i16: {
1888 MachineFunction &MF = DAG.getMachineFunction();
1889 SSARegMap *RegMap = MF.getSSARegMap();
1890 SDOperand Chain = Op.getOperand(0);
1891 SDOperand rA = Op.getOperand(0);
1892 SDOperand rB = Op.getOperand(1);
1893 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1894 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1895
1896 SDOperand FSMBOp =
1897 DAG.getCopyToReg(Chain, FSMBIreg,
1898 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1899 DAG.getConstant(0xcccc, MVT::i32)));
1900
1901 SDOperand HHProd =
1902 DAG.getCopyToReg(FSMBOp, HiProdReg,
1903 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1904
1905 SDOperand HHProd_v4i32 =
1906 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1907 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1908
1909 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1910 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1911 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1912 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1913 HHProd_v4i32,
1914 DAG.getConstant(16, MVT::i16))),
1915 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1916 }
1917
1918 // This M00sE is N@stI! (apologies to Monty Python)
1919 //
1920 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1921 // is to break it all apart, sign extend, and reassemble the various
1922 // intermediate products.
1923 case MVT::v16i8: {
1924 MachineFunction &MF = DAG.getMachineFunction();
1925 SSARegMap *RegMap = MF.getSSARegMap();
1926 SDOperand Chain = Op.getOperand(0);
1927 SDOperand rA = Op.getOperand(0);
1928 SDOperand rB = Op.getOperand(1);
1929 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1930 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1931
1932 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1933 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1934 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1935
1936 SDOperand LLProd =
1937 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1938 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1939 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1940
1941 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1942
1943 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1944
1945 SDOperand LHProd =
1946 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1947 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1948
1949 SDOperand FSMBdef_2222 =
1950 DAG.getCopyToReg(Chain, FSMBreg_2222,
1951 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1952 DAG.getConstant(0x2222, MVT::i32)));
1953
1954 SDOperand FSMBuse_2222 =
1955 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1956
1957 SDOperand LoProd_1 =
1958 DAG.getCopyToReg(Chain, LoProd_reg,
1959 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1960 FSMBuse_2222));
1961
1962 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1963
1964 SDOperand LoProd =
1965 DAG.getNode(ISD::AND, MVT::v4i32,
1966 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1967 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1968 LoProdMask, LoProdMask,
1969 LoProdMask, LoProdMask));
1970
1971 SDOperand rAH =
1972 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1974
1975 SDOperand rBH =
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1978
1979 SDOperand HLProd =
1980 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1983
1984 SDOperand HHProd_1 =
1985 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1990
1991 SDOperand HHProd =
1992 DAG.getCopyToReg(Chain, HiProd_reg,
1993 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1994 HLProd,
1995 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1996 FSMBuse_2222));
1997
1998 SDOperand HiProd =
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2000 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2001
2002 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2003 DAG.getNode(ISD::OR, MVT::v4i32,
2004 LoProd, HiProd));
2005 }
2006
2007 default:
2008 cerr << "CellSPU: Unknown vector multiplication, got "
2009 << MVT::getValueTypeString(Op.getValueType())
2010 << "\n";
2011 abort();
2012 /*NOTREACHED*/
2013 }
2014
2015 return SDOperand();
2016}
2017
2018static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2019 MachineFunction &MF = DAG.getMachineFunction();
2020 SSARegMap *RegMap = MF.getSSARegMap();
2021
2022 SDOperand A = Op.getOperand(0);
2023 SDOperand B = Op.getOperand(1);
2024 unsigned VT = Op.getValueType();
2025
2026 unsigned VRegBR, VRegC;
2027
2028 if (VT == MVT::f32) {
2029 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2030 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2031 } else {
2032 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2033 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2034 }
2035 // TODO: make sure we're feeding FPInterp the right arguments
2036 // Right now: fi B, frest(B)
2037
2038 // Computes BRcpl =
2039 // (Floating Interpolate (FP Reciprocal Estimate B))
2040 SDOperand BRcpl =
2041 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2042 DAG.getNode(SPUISD::FPInterp, VT, B,
2043 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2044
2045 // Computes A * BRcpl and stores in a temporary register
2046 SDOperand AxBRcpl =
2047 DAG.getCopyToReg(BRcpl, VRegC,
2048 DAG.getNode(ISD::FMUL, VT, A,
2049 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2050 // What's the Chain variable do? It's magic!
2051 // TODO: set Chain = Op(0).getEntryNode()
2052
2053 return DAG.getNode(ISD::FADD, VT,
2054 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2055 DAG.getNode(ISD::FMUL, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2057 DAG.getNode(ISD::FSUB, VT, A,
2058 DAG.getNode(ISD::FMUL, VT, B,
2059 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2060}
2061
2062// Expands double-precision FDIV
2063// Expects two doubles as inputs X and Y, does a floating point
2064// reciprocal estimate, and three iterations of Newton-Raphson
2065// to increase accuracy.
2066//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2067// MachineFunction &MF = DAG.getMachineFunction();
2068// SSARegMap *RegMap = MF.getSSARegMap();
2069//
2070// SDOperand X = Op.getOperand(0);
2071// SDOperand Y = Op.getOperand(1);
2072//}
2073
2074static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2075 unsigned VT = Op.getValueType();
2076 SDOperand N = Op.getOperand(0);
2077 SDOperand Elt = Op.getOperand(1);
2078 SDOperand ShufMask[16];
2079 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2080
2081 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2082
2083 int EltNo = (int) C->getValue();
2084
2085 // sanity checks:
2086 if (VT == MVT::i8 && EltNo >= 16)
2087 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2088 else if (VT == MVT::i16 && EltNo >= 8)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2090 else if (VT == MVT::i32 && EltNo >= 4)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2092 else if (VT == MVT::i64 && EltNo >= 2)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2094
2095 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2096 // i32 and i64: Element 0 is the preferred slot
2097 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2098 }
2099
2100 // Need to generate shuffle mask and extract:
2101 int prefslot_begin, prefslot_end;
2102 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2103
2104 switch (VT) {
2105 case MVT::i8: {
2106 prefslot_begin = prefslot_end = 3;
2107 break;
2108 }
2109 case MVT::i16: {
2110 prefslot_begin = 2; prefslot_end = 3;
2111 break;
2112 }
2113 case MVT::i32: {
2114 prefslot_begin = 0; prefslot_end = 3;
2115 break;
2116 }
2117 case MVT::i64: {
2118 prefslot_begin = 0; prefslot_end = 7;
2119 break;
2120 }
2121 }
2122
2123 for (int i = 0; i < 16; ++i) {
2124 // zero fill uppper part of preferred slot, don't care about the
2125 // other slots:
2126 unsigned int mask_val;
2127
2128 if (i <= prefslot_end) {
2129 mask_val =
2130 ((i < prefslot_begin)
2131 ? 0x80
2132 : elt_byte + (i - prefslot_begin));
2133
2134 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2135 } else
2136 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2137 }
2138
2139 SDOperand ShufMaskVec =
2140 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2141 &ShufMask[0],
2142 sizeof(ShufMask) / sizeof(ShufMask[0]));
2143
2144 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2145 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2146 N, N, ShufMaskVec));
2147
2148}
2149
2150static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2151 SDOperand VecOp = Op.getOperand(0);
2152 SDOperand ValOp = Op.getOperand(1);
2153 SDOperand IdxOp = Op.getOperand(2);
2154 MVT::ValueType VT = Op.getValueType();
2155
2156 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2157 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2158
2159 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2160 // Use $2 because it's always 16-byte aligned and it's available:
2161 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2162
2163 SDOperand result =
2164 DAG.getNode(SPUISD::SHUFB, VT,
2165 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2166 VecOp,
2167 DAG.getNode(SPUISD::INSERT_MASK, VT,
2168 DAG.getNode(ISD::ADD, PtrVT,
2169 PtrBase,
2170 DAG.getConstant(CN->getValue(),
2171 PtrVT))));
2172
2173 return result;
2174}
2175
2176static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2177 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2178
2179 assert(Op.getValueType() == MVT::i8);
2180 switch (Opc) {
2181 default:
2182 assert(0 && "Unhandled i8 math operator");
2183 /*NOTREACHED*/
2184 break;
2185 case ISD::SUB: {
2186 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2187 // the result:
2188 SDOperand N1 = Op.getOperand(1);
2189 N0 = (N0.getOpcode() != ISD::Constant
2190 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2191 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2192 N1 = (N1.getOpcode() != ISD::Constant
2193 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2194 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2195 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2196 DAG.getNode(Opc, MVT::i16, N0, N1));
2197 }
2198 case ISD::ROTR:
2199 case ISD::ROTL: {
2200 SDOperand N1 = Op.getOperand(1);
2201 unsigned N1Opc;
2202 N0 = (N0.getOpcode() != ISD::Constant
2203 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2204 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2205 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2206 N1 = (N1.getOpcode() != ISD::Constant
2207 ? DAG.getNode(N1Opc, MVT::i16, N1)
2208 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2209 SDOperand ExpandArg =
2210 DAG.getNode(ISD::OR, MVT::i16, N0,
2211 DAG.getNode(ISD::SHL, MVT::i16,
2212 N0, DAG.getConstant(8, MVT::i16)));
2213 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2214 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2215 }
2216 case ISD::SRL:
2217 case ISD::SHL: {
2218 SDOperand N1 = Op.getOperand(1);
2219 unsigned N1Opc;
2220 N0 = (N0.getOpcode() != ISD::Constant
2221 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2222 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2223 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2224 N1 = (N1.getOpcode() != ISD::Constant
2225 ? DAG.getNode(N1Opc, MVT::i16, N1)
2226 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2227 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2228 DAG.getNode(Opc, MVT::i16, N0, N1));
2229 }
2230 case ISD::SRA: {
2231 SDOperand N1 = Op.getOperand(1);
2232 unsigned N1Opc;
2233 N0 = (N0.getOpcode() != ISD::Constant
2234 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2235 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2236 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2237 N1 = (N1.getOpcode() != ISD::Constant
2238 ? DAG.getNode(N1Opc, MVT::i16, N1)
2239 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2240 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2241 DAG.getNode(Opc, MVT::i16, N0, N1));
2242 }
2243 case ISD::MUL: {
2244 SDOperand N1 = Op.getOperand(1);
2245 unsigned N1Opc;
2246 N0 = (N0.getOpcode() != ISD::Constant
2247 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2248 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2249 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2250 N1 = (N1.getOpcode() != ISD::Constant
2251 ? DAG.getNode(N1Opc, MVT::i16, N1)
2252 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2253 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2254 DAG.getNode(Opc, MVT::i16, N0, N1));
2255 break;
2256 }
2257 }
2258
2259 return SDOperand();
2260}
2261
2262//! Lower byte immediate operations for v16i8 vectors:
2263static SDOperand
2264LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2265 SDOperand ConstVec;
2266 SDOperand Arg;
2267 MVT::ValueType VT = Op.getValueType();
2268
2269 ConstVec = Op.getOperand(0);
2270 Arg = Op.getOperand(1);
2271 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2272 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2273 ConstVec = ConstVec.getOperand(0);
2274 } else {
2275 ConstVec = Op.getOperand(1);
2276 Arg = Op.getOperand(0);
2277 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2278 ConstVec = ConstVec.getOperand(0);
2279 }
2280 }
2281 }
2282
2283 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2284 uint64_t VectorBits[2];
2285 uint64_t UndefBits[2];
2286 uint64_t SplatBits, SplatUndef;
2287 int SplatSize;
2288
2289 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2290 && isConstantSplat(VectorBits, UndefBits,
2291 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2292 SplatBits, SplatUndef, SplatSize)) {
2293 SDOperand tcVec[16];
2294 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2295 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2296
2297 // Turn the BUILD_VECTOR into a set of target constants:
2298 for (size_t i = 0; i < tcVecSize; ++i)
2299 tcVec[i] = tc;
2300
2301 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2302 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2303 }
2304 }
2305
2306 return SDOperand();
2307}
2308
2309//! Lower i32 multiplication
2310static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2311 unsigned Opc) {
2312 switch (VT) {
2313 default:
2314 cerr << "CellSPU: Unknown LowerMUL value type, got "
2315 << MVT::getValueTypeString(Op.getValueType())
2316 << "\n";
2317 abort();
2318 /*NOTREACHED*/
2319
2320 case MVT::i32: {
2321 SDOperand rA = Op.getOperand(0);
2322 SDOperand rB = Op.getOperand(1);
2323
2324 return DAG.getNode(ISD::ADD, MVT::i32,
2325 DAG.getNode(ISD::ADD, MVT::i32,
2326 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2327 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2328 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2329 }
2330 }
2331
2332 return SDOperand();
2333}
2334
2335//! Custom lowering for CTPOP (count population)
2336/*!
2337 Custom lowering code that counts the number ones in the input
2338 operand. SPU has such an instruction, but it counts the number of
2339 ones per byte, which then have to be accumulated.
2340*/
2341static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2342 unsigned VT = Op.getValueType();
2343 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2344
2345 switch (VT) {
2346 case MVT::i8: {
2347 SDOperand N = Op.getOperand(0);
2348 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2349
2350 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2351 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2352
2353 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2354 }
2355
2356 case MVT::i16: {
2357 MachineFunction &MF = DAG.getMachineFunction();
2358 SSARegMap *RegMap = MF.getSSARegMap();
2359
2360 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2361
2362 SDOperand N = Op.getOperand(0);
2363 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2364 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2365 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2366
2367 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2368 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2369
2370 // CNTB_result becomes the chain to which all of the virtual registers
2371 // CNTB_reg, SUM1_reg become associated:
2372 SDOperand CNTB_result =
2373 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2374
2375 SDOperand CNTB_rescopy =
2376 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2377
2378 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2379
2380 return DAG.getNode(ISD::AND, MVT::i16,
2381 DAG.getNode(ISD::ADD, MVT::i16,
2382 DAG.getNode(ISD::SRL, MVT::i16,
2383 Tmp1, Shift1),
2384 Tmp1),
2385 Mask0);
2386 }
2387
2388 case MVT::i32: {
2389 MachineFunction &MF = DAG.getMachineFunction();
2390 SSARegMap *RegMap = MF.getSSARegMap();
2391
2392 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2393 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2394
2395 SDOperand N = Op.getOperand(0);
2396 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2397 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2398 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2399 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2400
2401 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2402 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2403
2404 // CNTB_result becomes the chain to which all of the virtual registers
2405 // CNTB_reg, SUM1_reg become associated:
2406 SDOperand CNTB_result =
2407 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2408
2409 SDOperand CNTB_rescopy =
2410 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2411
2412 SDOperand Comp1 =
2413 DAG.getNode(ISD::SRL, MVT::i32,
2414 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2415
2416 SDOperand Sum1 =
2417 DAG.getNode(ISD::ADD, MVT::i32,
2418 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2419
2420 SDOperand Sum1_rescopy =
2421 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2422
2423 SDOperand Comp2 =
2424 DAG.getNode(ISD::SRL, MVT::i32,
2425 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2426 Shift2);
2427 SDOperand Sum2 =
2428 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2429 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2430
2431 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2432 }
2433
2434 case MVT::i64:
2435 break;
2436 }
2437
2438 return SDOperand();
2439}
2440
2441/// LowerOperation - Provide custom lowering hooks for some operations.
2442///
2443SDOperand
2444SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2445{
2446 switch (Op.getOpcode()) {
2447 default: {
2448 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2449 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2450 cerr << "*Op.Val:\n";
2451 Op.Val->dump();
2452 abort();
2453 }
2454 case ISD::LOAD:
2455 case ISD::SEXTLOAD:
2456 case ISD::ZEXTLOAD:
2457 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2458 case ISD::STORE:
2459 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2460 case ISD::ConstantPool:
2461 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2462 case ISD::GlobalAddress:
2463 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::JumpTable:
2465 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::Constant:
2467 return LowerConstant(Op, DAG);
2468 case ISD::ConstantFP:
2469 return LowerConstantFP(Op, DAG);
2470 case ISD::FORMAL_ARGUMENTS:
2471 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2472 case ISD::CALL:
2473 return LowerCALL(Op, DAG);
2474 case ISD::RET:
2475 return LowerRET(Op, DAG, getTargetMachine());
2476
2477 // i8 math ops:
2478 case ISD::SUB:
2479 case ISD::ROTR:
2480 case ISD::ROTL:
2481 case ISD::SRL:
2482 case ISD::SHL:
2483 case ISD::SRA:
2484 return LowerI8Math(Op, DAG, Op.getOpcode());
2485
2486 // Vector-related lowering.
2487 case ISD::BUILD_VECTOR:
2488 return LowerBUILD_VECTOR(Op, DAG);
2489 case ISD::SCALAR_TO_VECTOR:
2490 return LowerSCALAR_TO_VECTOR(Op, DAG);
2491 case ISD::VECTOR_SHUFFLE:
2492 return LowerVECTOR_SHUFFLE(Op, DAG);
2493 case ISD::EXTRACT_VECTOR_ELT:
2494 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2495 case ISD::INSERT_VECTOR_ELT:
2496 return LowerINSERT_VECTOR_ELT(Op, DAG);
2497
2498 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2499 case ISD::AND:
2500 case ISD::OR:
2501 case ISD::XOR:
2502 return LowerByteImmed(Op, DAG);
2503
2504 // Vector and i8 multiply:
2505 case ISD::MUL:
2506 if (MVT::isVector(Op.getValueType()))
2507 return LowerVectorMUL(Op, DAG);
2508 else if (Op.getValueType() == MVT::i8)
2509 return LowerI8Math(Op, DAG, Op.getOpcode());
2510 else
2511 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2512
2513 case ISD::FDIV:
2514 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2515 return LowerFDIVf32(Op, DAG);
2516// else if (Op.getValueType() == MVT::f64)
2517// return LowerFDIVf64(Op, DAG);
2518 else
2519 assert(0 && "Calling FDIV on unsupported MVT");
2520
2521 case ISD::CTPOP:
2522 return LowerCTPOP(Op, DAG);
2523 }
2524
2525 return SDOperand();
2526}
2527
2528//===----------------------------------------------------------------------===//
2529// Other Lowering Code
2530//===----------------------------------------------------------------------===//
2531
2532MachineBasicBlock *
2533SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2534 MachineBasicBlock *BB)
2535{
2536 return BB;
2537}
2538
2539//===----------------------------------------------------------------------===//
2540// Target Optimization Hooks
2541//===----------------------------------------------------------------------===//
2542
2543SDOperand
2544SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2545{
2546#if 0
2547 TargetMachine &TM = getTargetMachine();
2548 SelectionDAG &DAG = DCI.DAG;
2549#endif
2550 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2551
2552 switch (N->getOpcode()) {
2553 default: break;
2554
2555 // Look for obvious optimizations for shift left:
2556 // a) Replace 0 << V with 0
2557 // b) Replace V << 0 with V
2558 //
2559 // N.B: llvm will generate an undef node if the shift amount is greater than
2560 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2561 case SPU::SHLIr32:
2562 case SPU::SHLHIr16:
2563 case SPU::SHLQBIIvec:
2564 case SPU::ROTHIr16:
2565 case SPU::ROTHIr16_i32:
2566 case SPU::ROTIr32:
2567 case SPU::ROTIr32_i16:
2568 case SPU::ROTQBYIvec:
2569 case SPU::ROTQBYBIvec:
2570 case SPU::ROTQBIIvec:
2571 case SPU::ROTHMIr16:
2572 case SPU::ROTMIr32:
2573 case SPU::ROTQMBYIvec: {
2574 if (N0.getOpcode() == ISD::Constant) {
2575 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2576 if (C->getValue() == 0) // 0 << V -> 0.
2577 return N0;
2578 }
2579 }
2580 SDOperand N1 = N->getOperand(1);
2581 if (N1.getOpcode() == ISD::Constant) {
2582 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2583 if (C->getValue() == 0) // V << 0 -> V
2584 return N1;
2585 }
2586 }
2587 break;
2588 }
2589 }
2590
2591 return SDOperand();
2592}
2593
2594//===----------------------------------------------------------------------===//
2595// Inline Assembly Support
2596//===----------------------------------------------------------------------===//
2597
2598/// getConstraintType - Given a constraint letter, return the type of
2599/// constraint it is for this target.
2600SPUTargetLowering::ConstraintType
2601SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2602 if (ConstraintLetter.size() == 1) {
2603 switch (ConstraintLetter[0]) {
2604 default: break;
2605 case 'b':
2606 case 'r':
2607 case 'f':
2608 case 'v':
2609 case 'y':
2610 return C_RegisterClass;
2611 }
2612 }
2613 return TargetLowering::getConstraintType(ConstraintLetter);
2614}
2615
2616std::pair<unsigned, const TargetRegisterClass*>
2617SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2618 MVT::ValueType VT) const
2619{
2620 if (Constraint.size() == 1) {
2621 // GCC RS6000 Constraint Letters
2622 switch (Constraint[0]) {
2623 case 'b': // R1-R31
2624 case 'r': // R0-R31
2625 if (VT == MVT::i64)
2626 return std::make_pair(0U, SPU::R64CRegisterClass);
2627 return std::make_pair(0U, SPU::R32CRegisterClass);
2628 case 'f':
2629 if (VT == MVT::f32)
2630 return std::make_pair(0U, SPU::R32FPRegisterClass);
2631 else if (VT == MVT::f64)
2632 return std::make_pair(0U, SPU::R64FPRegisterClass);
2633 break;
2634 case 'v':
2635 return std::make_pair(0U, SPU::GPRCRegisterClass);
2636 }
2637 }
2638
2639 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2640}
2641
2642void
2643SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2644 uint64_t Mask,
2645 uint64_t &KnownZero,
2646 uint64_t &KnownOne,
2647 const SelectionDAG &DAG,
2648 unsigned Depth ) const {
2649 KnownZero = 0;
2650 KnownOne = 0;
2651}
2652
2653// LowerAsmOperandForConstraint
2654void
2655SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2656 char ConstraintLetter,
2657 std::vector<SDOperand> &Ops,
2658 SelectionDAG &DAG) {
2659 // Default, for the time being, to the base class handler
2660 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2661}
2662
2663/// isLegalAddressImmediate - Return true if the integer value can be used
2664/// as the offset of the target addressing mode.
2665bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2666 // SPU's addresses are 256K:
2667 return (V > -(1 << 18) && V < (1 << 18) - 1);
2668}
2669
2670bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2671 return false;
2672}