blob: 2ab4841c64ed42ac4fc56c3a92bef9ec2a74d2ee [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
Scott Michel2466c372007-12-05 01:40:25 +00006// Department at The Aerospace Corporation and is distributed under the
7// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00008//
9//===----------------------------------------------------------------------===//
10//
11// This file implements the SPUTargetLowering class.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SPURegisterNames.h"
16#include "SPUISelLowering.h"
17#include "SPUTargetMachine.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SSARegMap.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39 std::map<unsigned, const char *> node_names;
40
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
45 };
46
47 const valtype_map_s valtype_map[] = {
48 { MVT::i1, 3 },
49 { MVT::i8, 3 },
50 { MVT::i16, 2 },
51 { MVT::i32, 0 },
52 { MVT::f32, 0 },
53 { MVT::i64, 0 },
54 { MVT::f64, 0 },
55 { MVT::i128, 0 }
56 };
57
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
62
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
66 break;
67 }
68 }
69
70#ifndef NDEBUG
71 if (retval == 0) {
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
74 << "\n";
75 abort();
76 }
77#endif
78
79 return retval;
80 }
81
82 //! Predicate that returns true if operand is a memory target
83 /*!
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
87 address.
88 */
89 bool isMemoryOperand(const SDOperand &Op)
90 {
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
105 }
106}
107
108SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
110 SPUTM(TM)
111{
112 // Fold away setcc operations if possible.
113 setPow2DivIsCheap();
114
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
118
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
136
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
141
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 ++sctype) {
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
156 }
157
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
160 // SPUDAGToDAGISel.
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
178
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
186
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
206
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
209
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
213
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
270
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
291
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
353 }
354
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
364
365 setStackPointerRegisterToSaveRestore(SPU::R1);
366
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
369
370 computeRegisterProperties();
371}
372
373const char *
374SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375{
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 }
418
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420
421 return ((i != node_names.end()) ? i->second : 0);
422}
423
424//===----------------------------------------------------------------------===//
425// Calling convention code:
426//===----------------------------------------------------------------------===//
427
428#include "SPUGenCallingConv.inc"
429
430//===----------------------------------------------------------------------===//
431// LowerOperation implementation
432//===----------------------------------------------------------------------===//
433
434/// Custom lower loads for CellSPU
435/*!
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
438 */
439static SDOperand
440LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
450 SDOperand Ops[8];
451
452 // For an extending load of an i1 variable, just call it i8 (or whatever we
453 // were passed) and make it zero-extended:
454 if (VT == MVT::i1) {
455 VT = OpVT;
456 ExtType = ISD::ZEXTLOAD;
457 }
458
459 switch (LN->getAddressingMode()) {
460 case ISD::UNINDEXED: {
461 SDOperand result;
462 SDOperand rot_op, rotamt;
463 SDOperand ptrp;
464 int c_offset;
465 int c_rotamt;
466
467 // The vector type we really want to be when we load the 16-byte chunk
468 MVT::ValueType vecVT, opVecVT;
469
470 if (VT != MVT::i1)
471 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
472 else
473 vecVT = MVT::v16i8;
474
475 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
476
477 if (basep.getOpcode() == ISD::ADD) {
478 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
479
480 assert(CN != NULL
481 && "LowerLOAD: ISD::ADD operand 1 is not constant");
482
483 c_offset = (int) CN->getValue();
484 c_rotamt = (int) (c_offset & 0xf);
485
486 // Adjust the rotation amount to ensure that the final result ends up in
487 // the preferred slot:
488 c_rotamt -= vtm->prefslot_byte;
489 ptrp = basep.getOperand(0);
490 } else {
491 c_offset = 0;
492 c_rotamt = -vtm->prefslot_byte;
493 ptrp = basep;
494 }
495
496 if (alignment == 16) {
497 // 16-byte aligned load into preferred slot, no rotation
498 if (c_rotamt == 0) {
499 if (isMemoryOperand(ptrp))
500 // Return unchanged
501 return SDOperand();
502 else {
503 // Return modified D-Form address for pointer:
504 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
505 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
506 if (VT == OpVT)
507 return DAG.getLoad(VT, LN->getChain(), ptrp,
508 LN->getSrcValue(), LN->getSrcValueOffset(),
509 LN->isVolatile(), 16);
510 else
511 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
512 LN->getSrcValueOffset(), OpVT,
513 LN->isVolatile(), 16);
514 }
515 } else {
516 // Need to rotate...
517 if (c_rotamt < 0)
518 c_rotamt += 16;
519 // Realign the base pointer, with a D-Form address
520 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
521 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
522 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
523 else
524 basep = ptrp;
525
526 // Rotate the load:
527 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
528 LN->getSrcValue(), LN->getSrcValueOffset(),
529 LN->isVolatile(), 16);
530 the_chain = rot_op.getValue(1);
531 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
532
533 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
534 Ops[0] = the_chain;
535 Ops[1] = rot_op;
536 Ops[2] = rotamt;
537
538 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
539 the_chain = result.getValue(1);
540
541 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
542 SDVTList scalarvts;
543 Ops[0] = the_chain;
544 Ops[1] = result;
545 if (OpVT == VT) {
546 scalarvts = DAG.getVTList(VT, MVT::Other);
547 } else {
548 scalarvts = DAG.getVTList(OpVT, MVT::Other);
549 }
550
551 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
552 result);
553 Ops[0] = the_chain;
554 Ops[1] = result;
555 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
556 the_chain = result.getValue(1);
557 } else {
558 // Handle the sign and zero-extending loads for i1 and i8:
559 unsigned NewOpC;
560
561 if (ExtType == ISD::SEXTLOAD) {
562 NewOpC = (OpVT == MVT::i1
563 ? SPUISD::EXTRACT_I1_SEXT
564 : SPUISD::EXTRACT_I8_SEXT);
565 } else if (ExtType == ISD::ZEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_ZEXT
568 : SPUISD::EXTRACT_I8_ZEXT);
569 }
570
571 result = DAG.getNode(NewOpC, OpVT, result);
572 }
573
574 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
575 SDOperand retops[2] = { result, the_chain };
576
577 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
578 return result;
579 /*UNREACHED*/
580 }
581 } else {
582 // Misaligned 16-byte load:
583 if (basep.getOpcode() == ISD::LOAD) {
584 LN = cast<LoadSDNode>(basep);
585 if (LN->getAlignment() == 16) {
586 // We can verify that we're really loading from a 16-byte aligned
587 // chunk. Encapsulate basep as a D-Form address and return a new
588 // load:
589 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
590 DAG.getConstant(0, PtrVT));
591 if (OpVT == VT)
592 return DAG.getLoad(VT, LN->getChain(), basep,
593 LN->getSrcValue(), LN->getSrcValueOffset(),
594 LN->isVolatile(), 16);
595 else
596 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 OpVT, LN->isVolatile(), 16);
599 }
600 }
601
602 // Catch all other cases where we can't guarantee that we have a
603 // 16-byte aligned entity, which means resorting to an X-form
604 // address scheme:
605
606 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
607 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
608 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
609
610 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
611
612 SDOperand alignLoad =
613 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
614 LN->getSrcValue(), LN->getSrcValueOffset(),
615 LN->isVolatile(), 16);
616
617 SDOperand insertEltOp =
618 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
619
620 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
621 alignLoad,
622 alignLoad,
623 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
624
625 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
626
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = { result, the_chain };
629
630 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
631 return result;
632 }
633 break;
634 }
635 case ISD::PRE_INC:
636 case ISD::PRE_DEC:
637 case ISD::POST_INC:
638 case ISD::POST_DEC:
639 case ISD::LAST_INDEXED_MODE:
640 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
641 "UNINDEXED\n";
642 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 abort();
644 /*NOTREACHED*/
645 }
646
647 return SDOperand();
648}
649
650/// Custom lower stores for CellSPU
651/*!
652 All CellSPU stores are aligned to 16-byte boundaries, so for elements
653 within a 16-byte block, we have to generate a shuffle to insert the
654 requested element into its place, then store the resulting block.
655 */
656static SDOperand
657LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
658 StoreSDNode *SN = cast<StoreSDNode>(Op);
659 SDOperand Value = SN->getValue();
660 MVT::ValueType VT = Value.getValueType();
661 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
662 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
663 SDOperand the_chain = SN->getChain();
Chris Lattner4d321c52007-12-05 18:32:18 +0000664 //unsigned alignment = SN->getAlignment();
665 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000666
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 SDOperand basep = SN->getBasePtr();
670 SDOperand ptrOp;
671 int offset;
672
Scott Michel9999e682007-12-19 07:35:06 +0000673 if (basep.getOpcode() == ISD::FrameIndex) {
674 // FrameIndex nodes are always properly aligned. Really.
675 return SDOperand();
676 }
677
Scott Michel266bc8f2007-12-04 22:23:35 +0000678 if (basep.getOpcode() == ISD::ADD) {
679 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
680 assert(CN != NULL
681 && "LowerSTORE: ISD::ADD operand 1 is not constant");
682 offset = unsigned(CN->getValue());
683 ptrOp = basep.getOperand(0);
684 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
685 << offset
686 << "\n");
687 } else {
688 ptrOp = basep;
689 offset = 0;
690 }
691
692 // The vector type we really want to load from the 16-byte chunk, except
693 // in the case of MVT::i1, which has to be v16i8.
694 unsigned vecVT, stVecVT;
695
696 if (StVT != MVT::i1)
697 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
698 else
699 stVecVT = MVT::v16i8;
700 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
701
Scott Michel9999e682007-12-19 07:35:06 +0000702 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
703 // the actual dform addr offs($reg).
704 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
705 DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000706
707 // Create the 16-byte aligned vector load
708 SDOperand alignLoad =
709 DAG.getLoad(vecVT, the_chain, basep,
710 SN->getSrcValue(), SN->getSrcValueOffset(),
711 SN->isVolatile(), 16);
712 the_chain = alignLoad.getValue(1);
713
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
715 SDOperand theValue = SN->getValue();
716 SDOperand result;
717
718 if (StVT != VT
719 && (theValue.getOpcode() == ISD::AssertZext
720 || theValue.getOpcode() == ISD::AssertSext)) {
721 // Drill down and get the value for zero- and sign-extended
722 // quantities
723 theValue = theValue.getOperand(0);
724 }
725
726 SDOperand insertEltOp =
727 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
728 DAG.getNode(SPUISD::DFormAddr, PtrVT,
729 ptrOp,
730 DAG.getConstant((offset & 0xf), PtrVT)));
731
732 result = DAG.getNode(SPUISD::SHUFB, vecVT,
733 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
734 alignLoad,
735 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
736
737 result = DAG.getStore(the_chain, result, basep,
738 LN->getSrcValue(), LN->getSrcValueOffset(),
739 LN->isVolatile(), LN->getAlignment());
740
741 return result;
742 /*UNREACHED*/
743 }
744 case ISD::PRE_INC:
745 case ISD::PRE_DEC:
746 case ISD::POST_INC:
747 case ISD::POST_DEC:
748 case ISD::LAST_INDEXED_MODE:
749 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
750 "UNINDEXED\n";
751 cerr << (unsigned) SN->getAddressingMode() << "\n";
752 abort();
753 /*NOTREACHED*/
754 }
755
756 return SDOperand();
757}
758
759/// Generate the address of a constant pool entry.
760static SDOperand
761LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
762 MVT::ValueType PtrVT = Op.getValueType();
763 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
764 Constant *C = CP->getConstVal();
765 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
766 const TargetMachine &TM = DAG.getTarget();
767 SDOperand Zero = DAG.getConstant(0, PtrVT);
768
769 if (TM.getRelocationModel() == Reloc::Static) {
770 if (!ST->usingLargeMem()) {
771 // Just return the SDOperand with the constant pool address in it.
772 return CPI;
773 } else {
774 // Generate hi/lo address pair
775 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
776 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
777
778 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
779 }
780 }
781
782 assert(0 &&
783 "LowerConstantPool: Relocation model other than static not supported.");
784 return SDOperand();
785}
786
787static SDOperand
788LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
789 MVT::ValueType PtrVT = Op.getValueType();
790 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
791 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
792 SDOperand Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
794
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 // Just return the SDOperand with the jump table address in it.
798 return JTI;
799 } else {
800 // Generate hi/lo address pair
801 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
802 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
803
804 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
805 }
806 }
807
808 assert(0 &&
809 "LowerJumpTable: Relocation model other than static not supported.");
810 return SDOperand();
811}
812
813static SDOperand
814LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
815 MVT::ValueType PtrVT = Op.getValueType();
816 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
817 GlobalValue *GV = GSDN->getGlobal();
818 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
819 SDOperand Zero = DAG.getConstant(0, PtrVT);
820 const TargetMachine &TM = DAG.getTarget();
821
822 if (TM.getRelocationModel() == Reloc::Static) {
823 if (!ST->usingLargeMem()) {
824 // Generate a local store address
825 return GA;
826 } else {
827 // Generate hi/lo address pair
828 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
829 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
830
831 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
832 }
833 } else {
834 cerr << "LowerGlobalAddress: Relocation model other than static not "
835 << "supported.\n";
836 abort();
837 /*NOTREACHED*/
838 }
839
840 return SDOperand();
841}
842
843//! Custom lower i64 integer constants
844/*!
845 This code inserts all of the necessary juggling that needs to occur to load
846 a 64-bit constant into a register.
847 */
848static SDOperand
849LowerConstant(SDOperand Op, SelectionDAG &DAG) {
850 unsigned VT = Op.getValueType();
851 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
852
853 if (VT == MVT::i64) {
854 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
855 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
856 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
857
858 } else {
859 cerr << "LowerConstant: unhandled constant type "
860 << MVT::getValueTypeString(VT)
861 << "\n";
862 abort();
863 /*NOTREACHED*/
864 }
865
866 return SDOperand();
867}
868
869//! Custom lower single precision floating point constants
870/*!
871 "float" immediates can be lowered as if they were unsigned 32-bit integers.
872 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
873 target description.
874 */
875static SDOperand
876LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
877 unsigned VT = Op.getValueType();
878 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
879
880 assert((FP != 0) &&
881 "LowerConstantFP: Node is not ConstantFPSDNode");
882
Scott Michel266bc8f2007-12-04 22:23:35 +0000883 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000884 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000885 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000886 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000887 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000888 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000889 return DAG.getNode(ISD::BIT_CONVERT, VT,
890 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
891 }
892
893 return SDOperand();
894}
895
896static SDOperand
897LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
898{
899 MachineFunction &MF = DAG.getMachineFunction();
900 MachineFrameInfo *MFI = MF.getFrameInfo();
901 SSARegMap *RegMap = MF.getSSARegMap();
902 SmallVector<SDOperand, 8> ArgValues;
903 SDOperand Root = Op.getOperand(0);
904 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
905
906 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
907 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
908
909 unsigned ArgOffset = SPUFrameInfo::minStackSize();
910 unsigned ArgRegIdx = 0;
911 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
912
913 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
914
915 // Add DAG nodes to load the arguments or copy them out of registers.
916 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
917 SDOperand ArgVal;
918 bool needsLoad = false;
919 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
920 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
921
922 switch (ObjectVT) {
923 default: {
924 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
925 << MVT::getValueTypeString(ObjectVT)
926 << "\n";
927 abort();
928 }
929 case MVT::i8:
930 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel504c3692007-12-17 22:32:34 +0000931 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000932 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
933 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
934 ++ArgRegIdx;
935 } else {
936 needsLoad = true;
937 }
938 break;
939 case MVT::i16:
940 if (!isVarArg && ArgRegIdx < NumArgRegs) {
941 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
942 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
943 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
944 ++ArgRegIdx;
945 } else {
946 needsLoad = true;
947 }
948 break;
949 case MVT::i32:
950 if (!isVarArg && ArgRegIdx < NumArgRegs) {
951 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
952 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
953 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
954 ++ArgRegIdx;
955 } else {
956 needsLoad = true;
957 }
958 break;
959 case MVT::i64:
960 if (!isVarArg && ArgRegIdx < NumArgRegs) {
961 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
962 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
963 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
964 ++ArgRegIdx;
965 } else {
966 needsLoad = true;
967 }
968 break;
969 case MVT::f32:
970 if (!isVarArg && ArgRegIdx < NumArgRegs) {
971 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
972 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
973 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
974 ++ArgRegIdx;
975 } else {
976 needsLoad = true;
977 }
978 break;
979 case MVT::f64:
980 if (!isVarArg && ArgRegIdx < NumArgRegs) {
981 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
982 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
983 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
984 ++ArgRegIdx;
985 } else {
986 needsLoad = true;
987 }
988 break;
989 case MVT::v2f64:
990 case MVT::v4f32:
991 case MVT::v4i32:
992 case MVT::v8i16:
993 case MVT::v16i8:
994 if (!isVarArg && ArgRegIdx < NumArgRegs) {
995 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
996 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
997 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
998 ++ArgRegIdx;
999 } else {
1000 needsLoad = true;
1001 }
1002 break;
1003 }
1004
1005 // We need to load the argument to a virtual register if we determined above
1006 // that we ran out of physical registers of the appropriate type
1007 if (needsLoad) {
1008 // If the argument is actually used, emit a load from the right stack
1009 // slot.
1010 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1011 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1012 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1013 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1014 } else {
1015 // Don't emit a dead load.
1016 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1017 }
1018
1019 ArgOffset += StackSlotSize;
1020 }
1021
1022 ArgValues.push_back(ArgVal);
1023 }
1024
1025 // If the function takes variable number of arguments, make a frame index for
1026 // the start of the first vararg value... for expansion of llvm.va_start.
1027 if (isVarArg) {
1028 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1029 ArgOffset);
1030 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1031 // If this function is vararg, store any remaining integer argument regs to
1032 // their spots on the stack so that they may be loaded by deferencing the
1033 // result of va_next.
1034 SmallVector<SDOperand, 8> MemOps;
1035 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1036 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1037 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1038 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1039 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1040 MemOps.push_back(Store);
1041 // Increment the address by four for the next argument to store
1042 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1043 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1044 }
1045 if (!MemOps.empty())
1046 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1047 }
1048
1049 ArgValues.push_back(Root);
1050
1051 // Return the new list of results.
1052 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1053 Op.Val->value_end());
1054 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1055}
1056
1057/// isLSAAddress - Return the immediate to use if the specified
1058/// value is representable as a LSA address.
1059static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1060 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1061 if (!C) return 0;
1062
1063 int Addr = C->getValue();
1064 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1065 (Addr << 14 >> 14) != Addr)
1066 return 0; // Top 14 bits have to be sext of immediate.
1067
1068 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1069}
1070
1071static
1072SDOperand
1073LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1074 SDOperand Chain = Op.getOperand(0);
1075#if 0
1076 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1077 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1078#endif
1079 SDOperand Callee = Op.getOperand(4);
1080 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1081 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1082 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1083 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1084
1085 // Handy pointer type
1086 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1087
1088 // Accumulate how many bytes are to be pushed on the stack, including the
1089 // linkage area, and parameter passing area. According to the SPU ABI,
1090 // we minimally need space for [LR] and [SP]
1091 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1092
1093 // Set up a copy of the stack pointer for use loading and storing any
1094 // arguments that may not fit in the registers available for argument
1095 // passing.
1096 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1097
1098 // Figure out which arguments are going to go in registers, and which in
1099 // memory.
1100 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1101 unsigned ArgRegIdx = 0;
1102
1103 // Keep track of registers passing arguments
1104 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1105 // And the arguments passed on the stack
1106 SmallVector<SDOperand, 8> MemOpChains;
1107
1108 for (unsigned i = 0; i != NumOps; ++i) {
1109 SDOperand Arg = Op.getOperand(5+2*i);
1110
1111 // PtrOff will be used to store the current argument to the stack if a
1112 // register cannot be found for it.
1113 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1114 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1115
1116 switch (Arg.getValueType()) {
1117 default: assert(0 && "Unexpected ValueType for argument!");
1118 case MVT::i32:
1119 case MVT::i64:
1120 case MVT::i128:
1121 if (ArgRegIdx != NumArgRegs) {
1122 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1123 } else {
1124 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1125 ArgOffset += StackSlotSize;
1126 }
1127 break;
1128 case MVT::f32:
1129 case MVT::f64:
1130 if (ArgRegIdx != NumArgRegs) {
1131 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1132 } else {
1133 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1134 ArgOffset += StackSlotSize;
1135 }
1136 break;
1137 case MVT::v4f32:
1138 case MVT::v4i32:
1139 case MVT::v8i16:
1140 case MVT::v16i8:
1141 if (ArgRegIdx != NumArgRegs) {
1142 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1143 } else {
1144 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1145 ArgOffset += StackSlotSize;
1146 }
1147 break;
1148 }
1149 }
1150
1151 // Update number of stack bytes actually used, insert a call sequence start
1152 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1153 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1154
1155 if (!MemOpChains.empty()) {
1156 // Adjust the stack pointer for the stack arguments.
1157 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1158 &MemOpChains[0], MemOpChains.size());
1159 }
1160
1161 // Build a sequence of copy-to-reg nodes chained together with token chain
1162 // and flag operands which copy the outgoing args into the appropriate regs.
1163 SDOperand InFlag;
1164 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1165 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1166 InFlag);
1167 InFlag = Chain.getValue(1);
1168 }
1169
1170 std::vector<MVT::ValueType> NodeTys;
1171 NodeTys.push_back(MVT::Other); // Returns a chain
1172 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1173
1174 SmallVector<SDOperand, 8> Ops;
1175 unsigned CallOpc = SPUISD::CALL;
1176
1177 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1178 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1179 // node so that legalize doesn't hack it.
1180 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1181 GlobalValue *GV = G->getGlobal();
1182 unsigned CalleeVT = Callee.getValueType();
1183
1184 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1185 // style calls, otherwise, external symbols are BRASL calls.
1186 // NOTE:
1187 // This may be an unsafe assumption for JIT and really large compilation
1188 // units.
1189 if (GV->isDeclaration()) {
1190 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1191 } else {
1192 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1193 DAG.getTargetGlobalAddress(GV, CalleeVT),
1194 DAG.getConstant(0, PtrVT));
1195 }
1196 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1197 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1198 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1199 // If this is an absolute destination address that appears to be a legal
1200 // local store address, use the munged value.
1201 Callee = SDOperand(Dest, 0);
1202
1203 Ops.push_back(Chain);
1204 Ops.push_back(Callee);
1205
1206 // Add argument registers to the end of the list so that they are known live
1207 // into the call.
1208 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1209 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1210 RegsToPass[i].second.getValueType()));
1211
1212 if (InFlag.Val)
1213 Ops.push_back(InFlag);
1214 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1215 InFlag = Chain.getValue(1);
1216
1217 SDOperand ResultVals[3];
1218 unsigned NumResults = 0;
1219 NodeTys.clear();
1220
1221 // If the call has results, copy the values out of the ret val registers.
1222 switch (Op.Val->getValueType(0)) {
1223 default: assert(0 && "Unexpected ret value!");
1224 case MVT::Other: break;
1225 case MVT::i32:
1226 if (Op.Val->getValueType(1) == MVT::i32) {
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1228 ResultVals[0] = Chain.getValue(0);
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1230 Chain.getValue(2)).getValue(1);
1231 ResultVals[1] = Chain.getValue(0);
1232 NumResults = 2;
1233 NodeTys.push_back(MVT::i32);
1234 } else {
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1236 ResultVals[0] = Chain.getValue(0);
1237 NumResults = 1;
1238 }
1239 NodeTys.push_back(MVT::i32);
1240 break;
1241 case MVT::i64:
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1243 ResultVals[0] = Chain.getValue(0);
1244 NumResults = 1;
1245 NodeTys.push_back(MVT::i64);
1246 break;
1247 case MVT::f32:
1248 case MVT::f64:
1249 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1250 InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1252 NumResults = 1;
1253 NodeTys.push_back(Op.Val->getValueType(0));
1254 break;
1255 case MVT::v2f64:
1256 case MVT::v4f32:
1257 case MVT::v4i32:
1258 case MVT::v8i16:
1259 case MVT::v16i8:
1260 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1261 InFlag).getValue(1);
1262 ResultVals[0] = Chain.getValue(0);
1263 NumResults = 1;
1264 NodeTys.push_back(Op.Val->getValueType(0));
1265 break;
1266 }
1267
1268 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1269 DAG.getConstant(NumStackBytes, PtrVT));
1270 NodeTys.push_back(MVT::Other);
1271
1272 // If the function returns void, just return the chain.
1273 if (NumResults == 0)
1274 return Chain;
1275
1276 // Otherwise, merge everything together with a MERGE_VALUES node.
1277 ResultVals[NumResults++] = Chain;
1278 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1279 ResultVals, NumResults);
1280 return Res.getValue(Op.ResNo);
1281}
1282
1283static SDOperand
1284LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1285 SmallVector<CCValAssign, 16> RVLocs;
1286 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1287 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1288 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1289 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1290
1291 // If this is the first return lowered for this function, add the regs to the
1292 // liveout set for the function.
1293 if (DAG.getMachineFunction().liveout_empty()) {
1294 for (unsigned i = 0; i != RVLocs.size(); ++i)
1295 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1296 }
1297
1298 SDOperand Chain = Op.getOperand(0);
1299 SDOperand Flag;
1300
1301 // Copy the result values into the output registers.
1302 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1303 CCValAssign &VA = RVLocs[i];
1304 assert(VA.isRegLoc() && "Can only return in registers!");
1305 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1306 Flag = Chain.getValue(1);
1307 }
1308
1309 if (Flag.Val)
1310 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1311 else
1312 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1313}
1314
1315
1316//===----------------------------------------------------------------------===//
1317// Vector related lowering:
1318//===----------------------------------------------------------------------===//
1319
1320static ConstantSDNode *
1321getVecImm(SDNode *N) {
1322 SDOperand OpVal(0, 0);
1323
1324 // Check to see if this buildvec has a single non-undef value in its elements.
1325 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1326 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1327 if (OpVal.Val == 0)
1328 OpVal = N->getOperand(i);
1329 else if (OpVal != N->getOperand(i))
1330 return 0;
1331 }
1332
1333 if (OpVal.Val != 0) {
1334 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1335 return CN;
1336 }
1337 }
1338
1339 return 0; // All UNDEF: use implicit def.; not Constant node
1340}
1341
1342/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1343/// and the value fits into an unsigned 18-bit constant, and if so, return the
1344/// constant
1345SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1346 MVT::ValueType ValueType) {
1347 if (ConstantSDNode *CN = getVecImm(N)) {
1348 uint64_t Value = CN->getValue();
1349 if (Value <= 0x3ffff)
1350 return DAG.getConstant(Value, ValueType);
1351 }
1352
1353 return SDOperand();
1354}
1355
1356/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1357/// and the value fits into a signed 16-bit constant, and if so, return the
1358/// constant
1359SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1360 MVT::ValueType ValueType) {
1361 if (ConstantSDNode *CN = getVecImm(N)) {
1362 if (ValueType == MVT::i32) {
1363 int Value = (int) CN->getValue();
1364 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1365
1366 if (Value == SExtValue)
1367 return DAG.getConstant(Value, ValueType);
1368 } else if (ValueType == MVT::i16) {
1369 short Value = (short) CN->getValue();
1370 int SExtValue = ((int) Value << 16) >> 16;
1371
1372 if (Value == (short) SExtValue)
1373 return DAG.getConstant(Value, ValueType);
1374 } else if (ValueType == MVT::i64) {
1375 int64_t Value = CN->getValue();
1376 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1377
1378 if (Value == SExtValue)
1379 return DAG.getConstant(Value, ValueType);
1380 }
1381 }
1382
1383 return SDOperand();
1384}
1385
1386/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1387/// and the value fits into a signed 10-bit constant, and if so, return the
1388/// constant
1389SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1390 MVT::ValueType ValueType) {
1391 if (ConstantSDNode *CN = getVecImm(N)) {
1392 int Value = (int) CN->getValue();
1393 if ((ValueType == MVT::i32 && isS10Constant(Value))
1394 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1395 return DAG.getConstant(Value, ValueType);
1396 }
1397
1398 return SDOperand();
1399}
1400
1401/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1402/// and the value fits into a signed 8-bit constant, and if so, return the
1403/// constant.
1404///
1405/// @note: The incoming vector is v16i8 because that's the only way we can load
1406/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1407/// same value.
1408SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1409 MVT::ValueType ValueType) {
1410 if (ConstantSDNode *CN = getVecImm(N)) {
1411 int Value = (int) CN->getValue();
1412 if (ValueType == MVT::i16
1413 && Value <= 0xffff /* truncated from uint64_t */
1414 && ((short) Value >> 8) == ((short) Value & 0xff))
1415 return DAG.getConstant(Value & 0xff, ValueType);
1416 else if (ValueType == MVT::i8
1417 && (Value & 0xff) == Value)
1418 return DAG.getConstant(Value, ValueType);
1419 }
1420
1421 return SDOperand();
1422}
1423
1424/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1425/// and the value fits into a signed 16-bit constant, and if so, return the
1426/// constant
1427SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1428 MVT::ValueType ValueType) {
1429 if (ConstantSDNode *CN = getVecImm(N)) {
1430 uint64_t Value = CN->getValue();
1431 if ((ValueType == MVT::i32
1432 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1433 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1434 return DAG.getConstant(Value >> 16, ValueType);
1435 }
1436
1437 return SDOperand();
1438}
1439
1440/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1441SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1442 if (ConstantSDNode *CN = getVecImm(N)) {
1443 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1444 }
1445
1446 return SDOperand();
1447}
1448
1449/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1450SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1453 }
1454
1455 return SDOperand();
1456}
1457
1458// If this is a vector of constants or undefs, get the bits. A bit in
1459// UndefBits is set if the corresponding element of the vector is an
1460// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1461// zero. Return true if this is not an array of constants, false if it is.
1462//
1463static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1464 uint64_t UndefBits[2]) {
1465 // Start with zero'd results.
1466 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1467
1468 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1469 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1470 SDOperand OpVal = BV->getOperand(i);
1471
1472 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1473 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1474
1475 uint64_t EltBits = 0;
1476 if (OpVal.getOpcode() == ISD::UNDEF) {
1477 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1478 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1479 continue;
1480 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1481 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1482 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1483 const APFloat &apf = CN->getValueAPF();
1484 EltBits = (CN->getValueType(0) == MVT::f32
1485 ? FloatToBits(apf.convertToFloat())
1486 : DoubleToBits(apf.convertToDouble()));
1487 } else {
1488 // Nonconstant element.
1489 return true;
1490 }
1491
1492 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1493 }
1494
1495 //printf("%llx %llx %llx %llx\n",
1496 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1497 return false;
1498}
1499
1500/// If this is a splat (repetition) of a value across the whole vector, return
1501/// the smallest size that splats it. For example, "0x01010101010101..." is a
1502/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1503/// SplatSize = 1 byte.
1504static bool isConstantSplat(const uint64_t Bits128[2],
1505 const uint64_t Undef128[2],
1506 int MinSplatBits,
1507 uint64_t &SplatBits, uint64_t &SplatUndef,
1508 int &SplatSize) {
1509 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1510 // the same as the lower 64-bits, ignoring undefs.
1511 uint64_t Bits64 = Bits128[0] | Bits128[1];
1512 uint64_t Undef64 = Undef128[0] & Undef128[1];
1513 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1514 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1515 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1516 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1517
1518 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1519 if (MinSplatBits < 64) {
1520
1521 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1522 // undefs.
1523 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1524 if (MinSplatBits < 32) {
1525
1526 // If the top 16-bits are different than the lower 16-bits, ignoring
1527 // undefs, we have an i32 splat.
1528 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1529 if (MinSplatBits < 16) {
1530 // If the top 8-bits are different than the lower 8-bits, ignoring
1531 // undefs, we have an i16 splat.
1532 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1533 // Otherwise, we have an 8-bit splat.
1534 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1535 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1536 SplatSize = 1;
1537 return true;
1538 }
1539 } else {
1540 SplatBits = Bits16;
1541 SplatUndef = Undef16;
1542 SplatSize = 2;
1543 return true;
1544 }
1545 }
1546 } else {
1547 SplatBits = Bits32;
1548 SplatUndef = Undef32;
1549 SplatSize = 4;
1550 return true;
1551 }
1552 }
1553 } else {
1554 SplatBits = Bits128[0];
1555 SplatUndef = Undef128[0];
1556 SplatSize = 8;
1557 return true;
1558 }
1559 }
1560
1561 return false; // Can't be a splat if two pieces don't match.
1562}
1563
1564// If this is a case we can't handle, return null and let the default
1565// expansion code take care of it. If we CAN select this case, and if it
1566// selects to a single instruction, return Op. Otherwise, if we can codegen
1567// this case more efficiently than a constant pool load, lower it to the
1568// sequence of ops that should be used.
1569static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1570 MVT::ValueType VT = Op.getValueType();
1571 // If this is a vector of constants or undefs, get the bits. A bit in
1572 // UndefBits is set if the corresponding element of the vector is an
1573 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1574 // zero.
1575 uint64_t VectorBits[2];
1576 uint64_t UndefBits[2];
1577 uint64_t SplatBits, SplatUndef;
1578 int SplatSize;
1579 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1580 || !isConstantSplat(VectorBits, UndefBits,
1581 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1582 SplatBits, SplatUndef, SplatSize))
1583 return SDOperand(); // Not a constant vector, not a splat.
1584
1585 switch (VT) {
1586 default:
1587 case MVT::v4f32: {
1588 uint32_t Value32 = SplatBits;
1589 assert(SplatSize == 4
1590 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1591 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1592 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1593 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1594 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1595 break;
1596 }
1597 case MVT::v2f64: {
1598 uint64_t f64val = SplatBits;
1599 assert(SplatSize == 8
1600 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1603 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1604 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1605 break;
1606 }
1607 case MVT::v16i8: {
1608 // 8-bit constants have to be expanded to 16-bits
1609 unsigned short Value16 = SplatBits | (SplatBits << 8);
1610 SDOperand Ops[8];
1611 for (int i = 0; i < 8; ++i)
1612 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1613 return DAG.getNode(ISD::BIT_CONVERT, VT,
1614 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1615 }
1616 case MVT::v8i16: {
1617 unsigned short Value16;
1618 if (SplatSize == 2)
1619 Value16 = (unsigned short) (SplatBits & 0xffff);
1620 else
1621 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1622 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1623 SDOperand Ops[8];
1624 for (int i = 0; i < 8; ++i) Ops[i] = T;
1625 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1626 }
1627 case MVT::v4i32: {
1628 unsigned int Value = SplatBits;
1629 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1630 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1631 }
1632 case MVT::v2i64: {
1633 uint64_t val = SplatBits;
1634 uint32_t upper = uint32_t(val >> 32);
1635 uint32_t lower = uint32_t(val);
1636
1637 if (val != 0) {
1638 SDOperand LO32;
1639 SDOperand HI32;
1640 SmallVector<SDOperand, 16> ShufBytes;
1641 SDOperand Result;
1642 bool upper_special, lower_special;
1643
1644 // NOTE: This code creates common-case shuffle masks that can be easily
1645 // detected as common expressions. It is not attempting to create highly
1646 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1647
1648 // Detect if the upper or lower half is a special shuffle mask pattern:
1649 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1650 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1651
1652 // Create lower vector if not a special pattern
1653 if (!lower_special) {
1654 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1655 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1656 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1657 LO32C, LO32C, LO32C, LO32C));
1658 }
1659
1660 // Create upper vector if not a special pattern
1661 if (!upper_special) {
1662 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1663 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665 HI32C, HI32C, HI32C, HI32C));
1666 }
1667
1668 // If either upper or lower are special, then the two input operands are
1669 // the same (basically, one of them is a "don't care")
1670 if (lower_special)
1671 LO32 = HI32;
1672 if (upper_special)
1673 HI32 = LO32;
1674 if (lower_special && upper_special) {
1675 // Unhappy situation... both upper and lower are special, so punt with
1676 // a target constant:
1677 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1678 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1679 Zero, Zero);
1680 }
1681
1682 for (int i = 0; i < 4; ++i) {
1683 for (int j = 0; j < 4; ++j) {
1684 SDOperand V;
1685 bool process_upper, process_lower;
1686 uint64_t val;
1687
1688 process_upper = (upper_special && (i & 1) == 0);
1689 process_lower = (lower_special && (i & 1) == 1);
1690
1691 if (process_upper || process_lower) {
1692 if ((process_upper && upper == 0)
1693 || (process_lower && lower == 0))
1694 val = 0x80;
1695 else if ((process_upper && upper == 0xffffffff)
1696 || (process_lower && lower == 0xffffffff))
1697 val = 0xc0;
1698 else if ((process_upper && upper == 0x80000000)
1699 || (process_lower && lower == 0x80000000))
1700 val = (j == 0 ? 0xe0 : 0x80);
1701 } else
1702 val = i * 4 + j + ((i & 1) * 16);
1703
1704 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1705 }
1706 }
1707
1708 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1709 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1710 &ShufBytes[0], ShufBytes.size()));
1711 } else {
1712 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1713 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1714 return DAG.getNode(ISD::BIT_CONVERT, VT,
1715 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1716 Zero, Zero, Zero, Zero));
1717 }
1718 }
1719 }
1720
1721 return SDOperand();
1722}
1723
1724/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1725/// which the Cell can operate. The code inspects V3 to ascertain whether the
1726/// permutation vector, V3, is monotonically increasing with one "exception"
1727/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1728/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1729/// In either case, the net result is going to eventually invoke SHUFB to
1730/// permute/shuffle the bytes from V1 and V2.
1731/// \note
1732/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1733/// control word for byte/halfword/word insertion. This takes care of a single
1734/// element move from V2 into V1.
1735/// \note
1736/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1737static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1738 SDOperand V1 = Op.getOperand(0);
1739 SDOperand V2 = Op.getOperand(1);
1740 SDOperand PermMask = Op.getOperand(2);
1741
1742 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1743
1744 // If we have a single element being moved from V1 to V2, this can be handled
1745 // using the C*[DX] compute mask instructions, but the vector elements have
1746 // to be monotonically increasing with one exception element.
1747 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1748 unsigned EltsFromV2 = 0;
1749 unsigned V2Elt = 0;
1750 unsigned V2EltIdx0 = 0;
1751 unsigned CurrElt = 0;
1752 bool monotonic = true;
1753 if (EltVT == MVT::i8)
1754 V2EltIdx0 = 16;
1755 else if (EltVT == MVT::i16)
1756 V2EltIdx0 = 8;
1757 else if (EltVT == MVT::i32)
1758 V2EltIdx0 = 4;
1759 else
1760 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1761
1762 for (unsigned i = 0, e = PermMask.getNumOperands();
1763 EltsFromV2 <= 1 && monotonic && i != e;
1764 ++i) {
1765 unsigned SrcElt;
1766 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1767 SrcElt = 0;
1768 else
1769 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1770
1771 if (SrcElt >= V2EltIdx0) {
1772 ++EltsFromV2;
1773 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1774 } else if (CurrElt != SrcElt) {
1775 monotonic = false;
1776 }
1777
1778 ++CurrElt;
1779 }
1780
1781 if (EltsFromV2 == 1 && monotonic) {
1782 // Compute mask and shuffle
1783 MachineFunction &MF = DAG.getMachineFunction();
1784 SSARegMap *RegMap = MF.getSSARegMap();
1785 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1786 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1787 // Initialize temporary register to 0
1788 SDOperand InitTempReg =
1789 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1790 // Copy register's contents as index in INSERT_MASK:
1791 SDOperand ShufMaskOp =
1792 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1793 DAG.getTargetConstant(V2Elt, MVT::i32),
1794 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1795 // Use shuffle mask in SHUFB synthetic instruction:
1796 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1797 } else {
1798 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1799 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1800
1801 SmallVector<SDOperand, 16> ResultMask;
1802 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1803 unsigned SrcElt;
1804 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1805 SrcElt = 0;
1806 else
1807 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1808
1809 for (unsigned j = 0; j != BytesPerElement; ++j) {
1810 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1811 MVT::i8));
1812 }
1813 }
1814
1815 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1816 &ResultMask[0], ResultMask.size());
1817 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1818 }
1819}
1820
1821static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1822 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1823
1824 if (Op0.Val->getOpcode() == ISD::Constant) {
1825 // For a constant, build the appropriate constant vector, which will
1826 // eventually simplify to a vector register load.
1827
1828 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1829 SmallVector<SDOperand, 16> ConstVecValues;
1830 MVT::ValueType VT;
1831 size_t n_copies;
1832
1833 // Create a constant vector:
1834 switch (Op.getValueType()) {
1835 default: assert(0 && "Unexpected constant value type in "
1836 "LowerSCALAR_TO_VECTOR");
1837 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1838 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1839 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1840 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1841 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1842 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1843 }
1844
1845 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1846 for (size_t j = 0; j < n_copies; ++j)
1847 ConstVecValues.push_back(CValue);
1848
1849 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1850 &ConstVecValues[0], ConstVecValues.size());
1851 } else {
1852 // Otherwise, copy the value from one register to another:
1853 switch (Op0.getValueType()) {
1854 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1855 case MVT::i8:
1856 case MVT::i16:
1857 case MVT::i32:
1858 case MVT::i64:
1859 case MVT::f32:
1860 case MVT::f64:
1861 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1862 }
1863 }
1864
1865 return SDOperand();
1866}
1867
1868static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1869 switch (Op.getValueType()) {
1870 case MVT::v4i32: {
1871 SDOperand rA = Op.getOperand(0);
1872 SDOperand rB = Op.getOperand(1);
1873 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1874 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1875 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1876 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1877
1878 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1879 break;
1880 }
1881
1882 // Multiply two v8i16 vectors (pipeline friendly version):
1883 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1884 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1885 // c) Use SELB to select upper and lower halves from the intermediate results
1886 //
1887 // NOTE: We really want to move the FSMBI to earlier to actually get the
1888 // dual-issue. This code does manage to do this, even if it's a little on
1889 // the wacky side
1890 case MVT::v8i16: {
1891 MachineFunction &MF = DAG.getMachineFunction();
1892 SSARegMap *RegMap = MF.getSSARegMap();
1893 SDOperand Chain = Op.getOperand(0);
1894 SDOperand rA = Op.getOperand(0);
1895 SDOperand rB = Op.getOperand(1);
1896 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1897 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1898
1899 SDOperand FSMBOp =
1900 DAG.getCopyToReg(Chain, FSMBIreg,
1901 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1902 DAG.getConstant(0xcccc, MVT::i32)));
1903
1904 SDOperand HHProd =
1905 DAG.getCopyToReg(FSMBOp, HiProdReg,
1906 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1907
1908 SDOperand HHProd_v4i32 =
1909 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1910 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1911
1912 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1913 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1914 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1915 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1916 HHProd_v4i32,
1917 DAG.getConstant(16, MVT::i16))),
1918 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1919 }
1920
1921 // This M00sE is N@stI! (apologies to Monty Python)
1922 //
1923 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1924 // is to break it all apart, sign extend, and reassemble the various
1925 // intermediate products.
1926 case MVT::v16i8: {
1927 MachineFunction &MF = DAG.getMachineFunction();
1928 SSARegMap *RegMap = MF.getSSARegMap();
1929 SDOperand Chain = Op.getOperand(0);
1930 SDOperand rA = Op.getOperand(0);
1931 SDOperand rB = Op.getOperand(1);
1932 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1933 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1934
1935 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1936 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1938
1939 SDOperand LLProd =
1940 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1941 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1942 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1943
1944 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1945
1946 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1947
1948 SDOperand LHProd =
1949 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1950 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1951
1952 SDOperand FSMBdef_2222 =
1953 DAG.getCopyToReg(Chain, FSMBreg_2222,
1954 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1955 DAG.getConstant(0x2222, MVT::i32)));
1956
1957 SDOperand FSMBuse_2222 =
1958 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1959
1960 SDOperand LoProd_1 =
1961 DAG.getCopyToReg(Chain, LoProd_reg,
1962 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1963 FSMBuse_2222));
1964
1965 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1966
1967 SDOperand LoProd =
1968 DAG.getNode(ISD::AND, MVT::v4i32,
1969 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1970 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1971 LoProdMask, LoProdMask,
1972 LoProdMask, LoProdMask));
1973
1974 SDOperand rAH =
1975 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1976 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1977
1978 SDOperand rBH =
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1981
1982 SDOperand HLProd =
1983 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1986
1987 SDOperand HHProd_1 =
1988 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1991 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1992 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1993
1994 SDOperand HHProd =
1995 DAG.getCopyToReg(Chain, HiProd_reg,
1996 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1997 HLProd,
1998 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1999 FSMBuse_2222));
2000
2001 SDOperand HiProd =
2002 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2003 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2004
2005 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2006 DAG.getNode(ISD::OR, MVT::v4i32,
2007 LoProd, HiProd));
2008 }
2009
2010 default:
2011 cerr << "CellSPU: Unknown vector multiplication, got "
2012 << MVT::getValueTypeString(Op.getValueType())
2013 << "\n";
2014 abort();
2015 /*NOTREACHED*/
2016 }
2017
2018 return SDOperand();
2019}
2020
2021static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2022 MachineFunction &MF = DAG.getMachineFunction();
2023 SSARegMap *RegMap = MF.getSSARegMap();
2024
2025 SDOperand A = Op.getOperand(0);
2026 SDOperand B = Op.getOperand(1);
2027 unsigned VT = Op.getValueType();
2028
2029 unsigned VRegBR, VRegC;
2030
2031 if (VT == MVT::f32) {
2032 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2033 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2034 } else {
2035 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2036 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2037 }
2038 // TODO: make sure we're feeding FPInterp the right arguments
2039 // Right now: fi B, frest(B)
2040
2041 // Computes BRcpl =
2042 // (Floating Interpolate (FP Reciprocal Estimate B))
2043 SDOperand BRcpl =
2044 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2045 DAG.getNode(SPUISD::FPInterp, VT, B,
2046 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2047
2048 // Computes A * BRcpl and stores in a temporary register
2049 SDOperand AxBRcpl =
2050 DAG.getCopyToReg(BRcpl, VRegC,
2051 DAG.getNode(ISD::FMUL, VT, A,
2052 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2053 // What's the Chain variable do? It's magic!
2054 // TODO: set Chain = Op(0).getEntryNode()
2055
2056 return DAG.getNode(ISD::FADD, VT,
2057 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2058 DAG.getNode(ISD::FMUL, VT,
2059 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2060 DAG.getNode(ISD::FSUB, VT, A,
2061 DAG.getNode(ISD::FMUL, VT, B,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2063}
2064
2065// Expands double-precision FDIV
2066// Expects two doubles as inputs X and Y, does a floating point
2067// reciprocal estimate, and three iterations of Newton-Raphson
2068// to increase accuracy.
2069//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2070// MachineFunction &MF = DAG.getMachineFunction();
2071// SSARegMap *RegMap = MF.getSSARegMap();
2072//
2073// SDOperand X = Op.getOperand(0);
2074// SDOperand Y = Op.getOperand(1);
2075//}
2076
2077static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2078 unsigned VT = Op.getValueType();
2079 SDOperand N = Op.getOperand(0);
2080 SDOperand Elt = Op.getOperand(1);
2081 SDOperand ShufMask[16];
2082 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2083
2084 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2085
2086 int EltNo = (int) C->getValue();
2087
2088 // sanity checks:
2089 if (VT == MVT::i8 && EltNo >= 16)
2090 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2091 else if (VT == MVT::i16 && EltNo >= 8)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2093 else if (VT == MVT::i32 && EltNo >= 4)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2095 else if (VT == MVT::i64 && EltNo >= 2)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2097
2098 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2099 // i32 and i64: Element 0 is the preferred slot
2100 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2101 }
2102
2103 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002104 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002105 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2106
2107 switch (VT) {
2108 case MVT::i8: {
2109 prefslot_begin = prefslot_end = 3;
2110 break;
2111 }
2112 case MVT::i16: {
2113 prefslot_begin = 2; prefslot_end = 3;
2114 break;
2115 }
2116 case MVT::i32: {
2117 prefslot_begin = 0; prefslot_end = 3;
2118 break;
2119 }
2120 case MVT::i64: {
2121 prefslot_begin = 0; prefslot_end = 7;
2122 break;
2123 }
2124 }
2125
Scott Michel0e5665b2007-12-19 21:17:42 +00002126 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2127 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2128
Scott Michel266bc8f2007-12-04 22:23:35 +00002129 for (int i = 0; i < 16; ++i) {
2130 // zero fill uppper part of preferred slot, don't care about the
2131 // other slots:
2132 unsigned int mask_val;
2133
2134 if (i <= prefslot_end) {
2135 mask_val =
2136 ((i < prefslot_begin)
2137 ? 0x80
2138 : elt_byte + (i - prefslot_begin));
2139
Scott Michel0e5665b2007-12-19 21:17:42 +00002140 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002141 } else
2142 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2143 }
2144
2145 SDOperand ShufMaskVec =
2146 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2147 &ShufMask[0],
2148 sizeof(ShufMask) / sizeof(ShufMask[0]));
2149
2150 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2151 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2152 N, N, ShufMaskVec));
2153
2154}
2155
2156static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2157 SDOperand VecOp = Op.getOperand(0);
2158 SDOperand ValOp = Op.getOperand(1);
2159 SDOperand IdxOp = Op.getOperand(2);
2160 MVT::ValueType VT = Op.getValueType();
2161
2162 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2163 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2164
2165 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2166 // Use $2 because it's always 16-byte aligned and it's available:
2167 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2168
2169 SDOperand result =
2170 DAG.getNode(SPUISD::SHUFB, VT,
2171 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2172 VecOp,
2173 DAG.getNode(SPUISD::INSERT_MASK, VT,
2174 DAG.getNode(ISD::ADD, PtrVT,
2175 PtrBase,
2176 DAG.getConstant(CN->getValue(),
2177 PtrVT))));
2178
2179 return result;
2180}
2181
2182static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2183 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2184
2185 assert(Op.getValueType() == MVT::i8);
2186 switch (Opc) {
2187 default:
2188 assert(0 && "Unhandled i8 math operator");
2189 /*NOTREACHED*/
2190 break;
2191 case ISD::SUB: {
2192 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2193 // the result:
2194 SDOperand N1 = Op.getOperand(1);
2195 N0 = (N0.getOpcode() != ISD::Constant
2196 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2197 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2198 N1 = (N1.getOpcode() != ISD::Constant
2199 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2200 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2201 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202 DAG.getNode(Opc, MVT::i16, N0, N1));
2203 }
2204 case ISD::ROTR:
2205 case ISD::ROTL: {
2206 SDOperand N1 = Op.getOperand(1);
2207 unsigned N1Opc;
2208 N0 = (N0.getOpcode() != ISD::Constant
2209 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2211 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2212 N1 = (N1.getOpcode() != ISD::Constant
2213 ? DAG.getNode(N1Opc, MVT::i16, N1)
2214 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2215 SDOperand ExpandArg =
2216 DAG.getNode(ISD::OR, MVT::i16, N0,
2217 DAG.getNode(ISD::SHL, MVT::i16,
2218 N0, DAG.getConstant(8, MVT::i16)));
2219 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2220 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2221 }
2222 case ISD::SRL:
2223 case ISD::SHL: {
2224 SDOperand N1 = Op.getOperand(1);
2225 unsigned N1Opc;
2226 N0 = (N0.getOpcode() != ISD::Constant
2227 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2228 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2229 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2230 N1 = (N1.getOpcode() != ISD::Constant
2231 ? DAG.getNode(N1Opc, MVT::i16, N1)
2232 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2233 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2234 DAG.getNode(Opc, MVT::i16, N0, N1));
2235 }
2236 case ISD::SRA: {
2237 SDOperand N1 = Op.getOperand(1);
2238 unsigned N1Opc;
2239 N0 = (N0.getOpcode() != ISD::Constant
2240 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2241 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2242 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2243 N1 = (N1.getOpcode() != ISD::Constant
2244 ? DAG.getNode(N1Opc, MVT::i16, N1)
2245 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2246 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2247 DAG.getNode(Opc, MVT::i16, N0, N1));
2248 }
2249 case ISD::MUL: {
2250 SDOperand N1 = Op.getOperand(1);
2251 unsigned N1Opc;
2252 N0 = (N0.getOpcode() != ISD::Constant
2253 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2254 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2255 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2256 N1 = (N1.getOpcode() != ISD::Constant
2257 ? DAG.getNode(N1Opc, MVT::i16, N1)
2258 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2259 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2260 DAG.getNode(Opc, MVT::i16, N0, N1));
2261 break;
2262 }
2263 }
2264
2265 return SDOperand();
2266}
2267
2268//! Lower byte immediate operations for v16i8 vectors:
2269static SDOperand
2270LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2271 SDOperand ConstVec;
2272 SDOperand Arg;
2273 MVT::ValueType VT = Op.getValueType();
2274
2275 ConstVec = Op.getOperand(0);
2276 Arg = Op.getOperand(1);
2277 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2278 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2279 ConstVec = ConstVec.getOperand(0);
2280 } else {
2281 ConstVec = Op.getOperand(1);
2282 Arg = Op.getOperand(0);
2283 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2284 ConstVec = ConstVec.getOperand(0);
2285 }
2286 }
2287 }
2288
2289 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2290 uint64_t VectorBits[2];
2291 uint64_t UndefBits[2];
2292 uint64_t SplatBits, SplatUndef;
2293 int SplatSize;
2294
2295 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2296 && isConstantSplat(VectorBits, UndefBits,
2297 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2298 SplatBits, SplatUndef, SplatSize)) {
2299 SDOperand tcVec[16];
2300 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2301 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2302
2303 // Turn the BUILD_VECTOR into a set of target constants:
2304 for (size_t i = 0; i < tcVecSize; ++i)
2305 tcVec[i] = tc;
2306
2307 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2308 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2309 }
2310 }
2311
2312 return SDOperand();
2313}
2314
2315//! Lower i32 multiplication
2316static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2317 unsigned Opc) {
2318 switch (VT) {
2319 default:
2320 cerr << "CellSPU: Unknown LowerMUL value type, got "
2321 << MVT::getValueTypeString(Op.getValueType())
2322 << "\n";
2323 abort();
2324 /*NOTREACHED*/
2325
2326 case MVT::i32: {
2327 SDOperand rA = Op.getOperand(0);
2328 SDOperand rB = Op.getOperand(1);
2329
2330 return DAG.getNode(ISD::ADD, MVT::i32,
2331 DAG.getNode(ISD::ADD, MVT::i32,
2332 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2333 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2334 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2335 }
2336 }
2337
2338 return SDOperand();
2339}
2340
2341//! Custom lowering for CTPOP (count population)
2342/*!
2343 Custom lowering code that counts the number ones in the input
2344 operand. SPU has such an instruction, but it counts the number of
2345 ones per byte, which then have to be accumulated.
2346*/
2347static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2348 unsigned VT = Op.getValueType();
2349 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2350
2351 switch (VT) {
2352 case MVT::i8: {
2353 SDOperand N = Op.getOperand(0);
2354 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2355
2356 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2357 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2358
2359 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2360 }
2361
2362 case MVT::i16: {
2363 MachineFunction &MF = DAG.getMachineFunction();
2364 SSARegMap *RegMap = MF.getSSARegMap();
2365
2366 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2367
2368 SDOperand N = Op.getOperand(0);
2369 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2370 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2371 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2372
2373 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2374 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2375
2376 // CNTB_result becomes the chain to which all of the virtual registers
2377 // CNTB_reg, SUM1_reg become associated:
2378 SDOperand CNTB_result =
2379 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2380
2381 SDOperand CNTB_rescopy =
2382 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2383
2384 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2385
2386 return DAG.getNode(ISD::AND, MVT::i16,
2387 DAG.getNode(ISD::ADD, MVT::i16,
2388 DAG.getNode(ISD::SRL, MVT::i16,
2389 Tmp1, Shift1),
2390 Tmp1),
2391 Mask0);
2392 }
2393
2394 case MVT::i32: {
2395 MachineFunction &MF = DAG.getMachineFunction();
2396 SSARegMap *RegMap = MF.getSSARegMap();
2397
2398 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2399 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2400
2401 SDOperand N = Op.getOperand(0);
2402 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2403 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2404 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2405 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2406
2407 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2408 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2409
2410 // CNTB_result becomes the chain to which all of the virtual registers
2411 // CNTB_reg, SUM1_reg become associated:
2412 SDOperand CNTB_result =
2413 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2414
2415 SDOperand CNTB_rescopy =
2416 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2417
2418 SDOperand Comp1 =
2419 DAG.getNode(ISD::SRL, MVT::i32,
2420 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2421
2422 SDOperand Sum1 =
2423 DAG.getNode(ISD::ADD, MVT::i32,
2424 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2425
2426 SDOperand Sum1_rescopy =
2427 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2428
2429 SDOperand Comp2 =
2430 DAG.getNode(ISD::SRL, MVT::i32,
2431 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2432 Shift2);
2433 SDOperand Sum2 =
2434 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2435 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2436
2437 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2438 }
2439
2440 case MVT::i64:
2441 break;
2442 }
2443
2444 return SDOperand();
2445}
2446
2447/// LowerOperation - Provide custom lowering hooks for some operations.
2448///
2449SDOperand
2450SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2451{
2452 switch (Op.getOpcode()) {
2453 default: {
2454 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2455 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2456 cerr << "*Op.Val:\n";
2457 Op.Val->dump();
2458 abort();
2459 }
2460 case ISD::LOAD:
2461 case ISD::SEXTLOAD:
2462 case ISD::ZEXTLOAD:
2463 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::STORE:
2465 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::ConstantPool:
2467 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::GlobalAddress:
2469 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::JumpTable:
2471 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2472 case ISD::Constant:
2473 return LowerConstant(Op, DAG);
2474 case ISD::ConstantFP:
2475 return LowerConstantFP(Op, DAG);
2476 case ISD::FORMAL_ARGUMENTS:
2477 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2478 case ISD::CALL:
2479 return LowerCALL(Op, DAG);
2480 case ISD::RET:
2481 return LowerRET(Op, DAG, getTargetMachine());
2482
2483 // i8 math ops:
2484 case ISD::SUB:
2485 case ISD::ROTR:
2486 case ISD::ROTL:
2487 case ISD::SRL:
2488 case ISD::SHL:
2489 case ISD::SRA:
2490 return LowerI8Math(Op, DAG, Op.getOpcode());
2491
2492 // Vector-related lowering.
2493 case ISD::BUILD_VECTOR:
2494 return LowerBUILD_VECTOR(Op, DAG);
2495 case ISD::SCALAR_TO_VECTOR:
2496 return LowerSCALAR_TO_VECTOR(Op, DAG);
2497 case ISD::VECTOR_SHUFFLE:
2498 return LowerVECTOR_SHUFFLE(Op, DAG);
2499 case ISD::EXTRACT_VECTOR_ELT:
2500 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2501 case ISD::INSERT_VECTOR_ELT:
2502 return LowerINSERT_VECTOR_ELT(Op, DAG);
2503
2504 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2505 case ISD::AND:
2506 case ISD::OR:
2507 case ISD::XOR:
2508 return LowerByteImmed(Op, DAG);
2509
2510 // Vector and i8 multiply:
2511 case ISD::MUL:
2512 if (MVT::isVector(Op.getValueType()))
2513 return LowerVectorMUL(Op, DAG);
2514 else if (Op.getValueType() == MVT::i8)
2515 return LowerI8Math(Op, DAG, Op.getOpcode());
2516 else
2517 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2518
2519 case ISD::FDIV:
2520 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2521 return LowerFDIVf32(Op, DAG);
2522// else if (Op.getValueType() == MVT::f64)
2523// return LowerFDIVf64(Op, DAG);
2524 else
2525 assert(0 && "Calling FDIV on unsupported MVT");
2526
2527 case ISD::CTPOP:
2528 return LowerCTPOP(Op, DAG);
2529 }
2530
2531 return SDOperand();
2532}
2533
2534//===----------------------------------------------------------------------===//
2535// Other Lowering Code
2536//===----------------------------------------------------------------------===//
2537
2538MachineBasicBlock *
2539SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2540 MachineBasicBlock *BB)
2541{
2542 return BB;
2543}
2544
2545//===----------------------------------------------------------------------===//
2546// Target Optimization Hooks
2547//===----------------------------------------------------------------------===//
2548
2549SDOperand
2550SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2551{
2552#if 0
2553 TargetMachine &TM = getTargetMachine();
2554 SelectionDAG &DAG = DCI.DAG;
2555#endif
2556 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2557
2558 switch (N->getOpcode()) {
2559 default: break;
2560
2561 // Look for obvious optimizations for shift left:
2562 // a) Replace 0 << V with 0
2563 // b) Replace V << 0 with V
2564 //
2565 // N.B: llvm will generate an undef node if the shift amount is greater than
2566 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2567 case SPU::SHLIr32:
2568 case SPU::SHLHIr16:
2569 case SPU::SHLQBIIvec:
2570 case SPU::ROTHIr16:
2571 case SPU::ROTHIr16_i32:
2572 case SPU::ROTIr32:
2573 case SPU::ROTIr32_i16:
2574 case SPU::ROTQBYIvec:
2575 case SPU::ROTQBYBIvec:
2576 case SPU::ROTQBIIvec:
2577 case SPU::ROTHMIr16:
2578 case SPU::ROTMIr32:
2579 case SPU::ROTQMBYIvec: {
2580 if (N0.getOpcode() == ISD::Constant) {
2581 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2582 if (C->getValue() == 0) // 0 << V -> 0.
2583 return N0;
2584 }
2585 }
2586 SDOperand N1 = N->getOperand(1);
2587 if (N1.getOpcode() == ISD::Constant) {
2588 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2589 if (C->getValue() == 0) // V << 0 -> V
2590 return N1;
2591 }
2592 }
2593 break;
2594 }
2595 }
2596
2597 return SDOperand();
2598}
2599
2600//===----------------------------------------------------------------------===//
2601// Inline Assembly Support
2602//===----------------------------------------------------------------------===//
2603
2604/// getConstraintType - Given a constraint letter, return the type of
2605/// constraint it is for this target.
2606SPUTargetLowering::ConstraintType
2607SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2608 if (ConstraintLetter.size() == 1) {
2609 switch (ConstraintLetter[0]) {
2610 default: break;
2611 case 'b':
2612 case 'r':
2613 case 'f':
2614 case 'v':
2615 case 'y':
2616 return C_RegisterClass;
2617 }
2618 }
2619 return TargetLowering::getConstraintType(ConstraintLetter);
2620}
2621
2622std::pair<unsigned, const TargetRegisterClass*>
2623SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2624 MVT::ValueType VT) const
2625{
2626 if (Constraint.size() == 1) {
2627 // GCC RS6000 Constraint Letters
2628 switch (Constraint[0]) {
2629 case 'b': // R1-R31
2630 case 'r': // R0-R31
2631 if (VT == MVT::i64)
2632 return std::make_pair(0U, SPU::R64CRegisterClass);
2633 return std::make_pair(0U, SPU::R32CRegisterClass);
2634 case 'f':
2635 if (VT == MVT::f32)
2636 return std::make_pair(0U, SPU::R32FPRegisterClass);
2637 else if (VT == MVT::f64)
2638 return std::make_pair(0U, SPU::R64FPRegisterClass);
2639 break;
2640 case 'v':
2641 return std::make_pair(0U, SPU::GPRCRegisterClass);
2642 }
2643 }
2644
2645 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2646}
2647
2648void
2649SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2650 uint64_t Mask,
2651 uint64_t &KnownZero,
2652 uint64_t &KnownOne,
2653 const SelectionDAG &DAG,
2654 unsigned Depth ) const {
2655 KnownZero = 0;
2656 KnownOne = 0;
2657}
2658
2659// LowerAsmOperandForConstraint
2660void
2661SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2662 char ConstraintLetter,
2663 std::vector<SDOperand> &Ops,
2664 SelectionDAG &DAG) {
2665 // Default, for the time being, to the base class handler
2666 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2667}
2668
2669/// isLegalAddressImmediate - Return true if the integer value can be used
2670/// as the offset of the target addressing mode.
2671bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2672 // SPU's addresses are 256K:
2673 return (V > -(1 << 18) && V < (1 << 18) - 1);
2674}
2675
2676bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2677 return false;
2678}