blob: 27119f2cc72e71b48e5177846b85dca24c6b8b33 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
Scott Michel2466c372007-12-05 01:40:25 +00006// Department at The Aerospace Corporation and is distributed under the
7// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00008//
9//===----------------------------------------------------------------------===//
10//
11// This file implements the SPUTargetLowering class.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SPURegisterNames.h"
16#include "SPUISelLowering.h"
17#include "SPUTargetMachine.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SSARegMap.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39 std::map<unsigned, const char *> node_names;
40
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
45 };
46
47 const valtype_map_s valtype_map[] = {
48 { MVT::i1, 3 },
49 { MVT::i8, 3 },
50 { MVT::i16, 2 },
51 { MVT::i32, 0 },
52 { MVT::f32, 0 },
53 { MVT::i64, 0 },
54 { MVT::f64, 0 },
55 { MVT::i128, 0 }
56 };
57
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
62
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
66 break;
67 }
68 }
69
70#ifndef NDEBUG
71 if (retval == 0) {
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
74 << "\n";
75 abort();
76 }
77#endif
78
79 return retval;
80 }
81
82 //! Predicate that returns true if operand is a memory target
83 /*!
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
87 address.
88 */
89 bool isMemoryOperand(const SDOperand &Op)
90 {
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
105 }
106}
107
108SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
110 SPUTM(TM)
111{
112 // Fold away setcc operations if possible.
113 setPow2DivIsCheap();
114
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
118
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
136
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
141
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 ++sctype) {
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
156 }
157
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
160 // SPUDAGToDAGISel.
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
178
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
186
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
206
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
209
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
213
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265
Scott Michel86c041f2007-12-20 00:44:13 +0000266 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel266bc8f2007-12-04 22:23:35 +0000270
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
291
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
353 }
354
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
364
365 setStackPointerRegisterToSaveRestore(SPU::R1);
366
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
369
370 computeRegisterProperties();
371}
372
373const char *
374SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375{
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 }
418
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420
421 return ((i != node_names.end()) ? i->second : 0);
422}
423
424//===----------------------------------------------------------------------===//
425// Calling convention code:
426//===----------------------------------------------------------------------===//
427
428#include "SPUGenCallingConv.inc"
429
430//===----------------------------------------------------------------------===//
431// LowerOperation implementation
432//===----------------------------------------------------------------------===//
433
434/// Custom lower loads for CellSPU
435/*!
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
438 */
439static SDOperand
440LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
Scott Michel86c041f2007-12-20 00:44:13 +0000444 MVT::ValueType BasepOpc = basep.Val->getOpcode();
Scott Michel266bc8f2007-12-04 22:23:35 +0000445 MVT::ValueType VT = LN->getLoadedVT();
446 MVT::ValueType OpVT = Op.Val->getValueType(0);
447 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
448 ISD::LoadExtType ExtType = LN->getExtensionType();
449 unsigned alignment = LN->getAlignment();
450 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
451 SDOperand Ops[8];
452
Scott Michel86c041f2007-12-20 00:44:13 +0000453 if (BasepOpc == ISD::FrameIndex) {
454 // Loading from a frame index is always properly aligned. Always.
455 return SDOperand();
456 }
457
Scott Michel266bc8f2007-12-04 22:23:35 +0000458 // For an extending load of an i1 variable, just call it i8 (or whatever we
459 // were passed) and make it zero-extended:
460 if (VT == MVT::i1) {
461 VT = OpVT;
462 ExtType = ISD::ZEXTLOAD;
463 }
464
465 switch (LN->getAddressingMode()) {
466 case ISD::UNINDEXED: {
467 SDOperand result;
468 SDOperand rot_op, rotamt;
469 SDOperand ptrp;
470 int c_offset;
471 int c_rotamt;
472
473 // The vector type we really want to be when we load the 16-byte chunk
474 MVT::ValueType vecVT, opVecVT;
475
Scott Michel86c041f2007-12-20 00:44:13 +0000476 vecVT = MVT::v16i8;
Scott Michel266bc8f2007-12-04 22:23:35 +0000477 if (VT != MVT::i1)
478 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
Scott Michel266bc8f2007-12-04 22:23:35 +0000479 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
480
481 if (basep.getOpcode() == ISD::ADD) {
482 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
483
484 assert(CN != NULL
485 && "LowerLOAD: ISD::ADD operand 1 is not constant");
486
487 c_offset = (int) CN->getValue();
488 c_rotamt = (int) (c_offset & 0xf);
489
490 // Adjust the rotation amount to ensure that the final result ends up in
491 // the preferred slot:
492 c_rotamt -= vtm->prefslot_byte;
493 ptrp = basep.getOperand(0);
494 } else {
495 c_offset = 0;
496 c_rotamt = -vtm->prefslot_byte;
497 ptrp = basep;
498 }
499
500 if (alignment == 16) {
501 // 16-byte aligned load into preferred slot, no rotation
502 if (c_rotamt == 0) {
503 if (isMemoryOperand(ptrp))
504 // Return unchanged
505 return SDOperand();
506 else {
507 // Return modified D-Form address for pointer:
508 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
509 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
510 if (VT == OpVT)
511 return DAG.getLoad(VT, LN->getChain(), ptrp,
512 LN->getSrcValue(), LN->getSrcValueOffset(),
513 LN->isVolatile(), 16);
514 else
515 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
516 LN->getSrcValueOffset(), OpVT,
517 LN->isVolatile(), 16);
518 }
519 } else {
520 // Need to rotate...
521 if (c_rotamt < 0)
522 c_rotamt += 16;
523 // Realign the base pointer, with a D-Form address
524 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
525 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
526 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
527 else
528 basep = ptrp;
529
530 // Rotate the load:
531 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
532 LN->getSrcValue(), LN->getSrcValueOffset(),
533 LN->isVolatile(), 16);
534 the_chain = rot_op.getValue(1);
535 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
536
537 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
538 Ops[0] = the_chain;
539 Ops[1] = rot_op;
540 Ops[2] = rotamt;
541
542 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
543 the_chain = result.getValue(1);
544
545 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
546 SDVTList scalarvts;
547 Ops[0] = the_chain;
548 Ops[1] = result;
549 if (OpVT == VT) {
550 scalarvts = DAG.getVTList(VT, MVT::Other);
551 } else {
552 scalarvts = DAG.getVTList(OpVT, MVT::Other);
553 }
554
555 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
556 result);
557 Ops[0] = the_chain;
558 Ops[1] = result;
559 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
560 the_chain = result.getValue(1);
561 } else {
562 // Handle the sign and zero-extending loads for i1 and i8:
563 unsigned NewOpC;
564
565 if (ExtType == ISD::SEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_SEXT
568 : SPUISD::EXTRACT_I8_SEXT);
Chris Lattner52ec3752007-12-22 22:47:03 +0000569 } else {
570 assert(ExtType == ISD::ZEXTLOAD);
Scott Michel266bc8f2007-12-04 22:23:35 +0000571 NewOpC = (OpVT == MVT::i1
572 ? SPUISD::EXTRACT_I1_ZEXT
573 : SPUISD::EXTRACT_I8_ZEXT);
574 }
575
576 result = DAG.getNode(NewOpC, OpVT, result);
577 }
578
579 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
580 SDOperand retops[2] = { result, the_chain };
581
582 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
583 return result;
584 /*UNREACHED*/
585 }
586 } else {
587 // Misaligned 16-byte load:
588 if (basep.getOpcode() == ISD::LOAD) {
589 LN = cast<LoadSDNode>(basep);
590 if (LN->getAlignment() == 16) {
591 // We can verify that we're really loading from a 16-byte aligned
592 // chunk. Encapsulate basep as a D-Form address and return a new
593 // load:
594 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
595 DAG.getConstant(0, PtrVT));
596 if (OpVT == VT)
597 return DAG.getLoad(VT, LN->getChain(), basep,
598 LN->getSrcValue(), LN->getSrcValueOffset(),
599 LN->isVolatile(), 16);
600 else
601 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
602 LN->getSrcValue(), LN->getSrcValueOffset(),
603 OpVT, LN->isVolatile(), 16);
604 }
605 }
606
607 // Catch all other cases where we can't guarantee that we have a
608 // 16-byte aligned entity, which means resorting to an X-form
609 // address scheme:
610
611 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
Scott Michel86c041f2007-12-20 00:44:13 +0000612 SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
613 SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
Scott Michel266bc8f2007-12-04 22:23:35 +0000614
615 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
616
617 SDOperand alignLoad =
618 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
619 LN->getSrcValue(), LN->getSrcValueOffset(),
620 LN->isVolatile(), 16);
621
622 SDOperand insertEltOp =
623 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
624
625 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
626 alignLoad,
627 alignLoad,
628 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
629
630 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
631
632 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
633 SDOperand retops[2] = { result, the_chain };
634
635 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
636 return result;
637 }
638 break;
639 }
640 case ISD::PRE_INC:
641 case ISD::PRE_DEC:
642 case ISD::POST_INC:
643 case ISD::POST_DEC:
644 case ISD::LAST_INDEXED_MODE:
645 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 "UNINDEXED\n";
647 cerr << (unsigned) LN->getAddressingMode() << "\n";
648 abort();
649 /*NOTREACHED*/
650 }
651
652 return SDOperand();
653}
654
655/// Custom lower stores for CellSPU
656/*!
657 All CellSPU stores are aligned to 16-byte boundaries, so for elements
658 within a 16-byte block, we have to generate a shuffle to insert the
659 requested element into its place, then store the resulting block.
660 */
661static SDOperand
662LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
663 StoreSDNode *SN = cast<StoreSDNode>(Op);
664 SDOperand Value = SN->getValue();
665 MVT::ValueType VT = Value.getValueType();
666 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
667 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
668 SDOperand the_chain = SN->getChain();
Chris Lattner4d321c52007-12-05 18:32:18 +0000669 //unsigned alignment = SN->getAlignment();
670 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000671
672 switch (SN->getAddressingMode()) {
673 case ISD::UNINDEXED: {
674 SDOperand basep = SN->getBasePtr();
675 SDOperand ptrOp;
676 int offset;
677
Scott Michel9999e682007-12-19 07:35:06 +0000678 if (basep.getOpcode() == ISD::FrameIndex) {
679 // FrameIndex nodes are always properly aligned. Really.
680 return SDOperand();
681 }
682
Scott Michel266bc8f2007-12-04 22:23:35 +0000683 if (basep.getOpcode() == ISD::ADD) {
684 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
685 assert(CN != NULL
686 && "LowerSTORE: ISD::ADD operand 1 is not constant");
687 offset = unsigned(CN->getValue());
688 ptrOp = basep.getOperand(0);
689 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
690 << offset
691 << "\n");
692 } else {
693 ptrOp = basep;
694 offset = 0;
695 }
696
697 // The vector type we really want to load from the 16-byte chunk, except
698 // in the case of MVT::i1, which has to be v16i8.
699 unsigned vecVT, stVecVT;
700
701 if (StVT != MVT::i1)
702 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
703 else
704 stVecVT = MVT::v16i8;
705 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
706
Scott Michel9999e682007-12-19 07:35:06 +0000707 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
708 // the actual dform addr offs($reg).
709 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
710 DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000711
712 // Create the 16-byte aligned vector load
713 SDOperand alignLoad =
714 DAG.getLoad(vecVT, the_chain, basep,
715 SN->getSrcValue(), SN->getSrcValueOffset(),
716 SN->isVolatile(), 16);
717 the_chain = alignLoad.getValue(1);
718
719 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
720 SDOperand theValue = SN->getValue();
721 SDOperand result;
722
723 if (StVT != VT
724 && (theValue.getOpcode() == ISD::AssertZext
725 || theValue.getOpcode() == ISD::AssertSext)) {
726 // Drill down and get the value for zero- and sign-extended
727 // quantities
728 theValue = theValue.getOperand(0);
729 }
730
731 SDOperand insertEltOp =
732 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
733 DAG.getNode(SPUISD::DFormAddr, PtrVT,
734 ptrOp,
735 DAG.getConstant((offset & 0xf), PtrVT)));
736
737 result = DAG.getNode(SPUISD::SHUFB, vecVT,
738 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
739 alignLoad,
740 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
741
742 result = DAG.getStore(the_chain, result, basep,
743 LN->getSrcValue(), LN->getSrcValueOffset(),
744 LN->isVolatile(), LN->getAlignment());
745
746 return result;
747 /*UNREACHED*/
748 }
749 case ISD::PRE_INC:
750 case ISD::PRE_DEC:
751 case ISD::POST_INC:
752 case ISD::POST_DEC:
753 case ISD::LAST_INDEXED_MODE:
754 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
755 "UNINDEXED\n";
756 cerr << (unsigned) SN->getAddressingMode() << "\n";
757 abort();
758 /*NOTREACHED*/
759 }
760
761 return SDOperand();
762}
763
764/// Generate the address of a constant pool entry.
765static SDOperand
766LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
767 MVT::ValueType PtrVT = Op.getValueType();
768 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
769 Constant *C = CP->getConstVal();
770 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
771 const TargetMachine &TM = DAG.getTarget();
772 SDOperand Zero = DAG.getConstant(0, PtrVT);
773
774 if (TM.getRelocationModel() == Reloc::Static) {
775 if (!ST->usingLargeMem()) {
776 // Just return the SDOperand with the constant pool address in it.
777 return CPI;
778 } else {
779 // Generate hi/lo address pair
780 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
781 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
782
783 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
784 }
785 }
786
787 assert(0 &&
788 "LowerConstantPool: Relocation model other than static not supported.");
789 return SDOperand();
790}
791
792static SDOperand
793LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
794 MVT::ValueType PtrVT = Op.getValueType();
795 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
796 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
797 SDOperand Zero = DAG.getConstant(0, PtrVT);
798 const TargetMachine &TM = DAG.getTarget();
799
800 if (TM.getRelocationModel() == Reloc::Static) {
801 if (!ST->usingLargeMem()) {
802 // Just return the SDOperand with the jump table address in it.
803 return JTI;
804 } else {
805 // Generate hi/lo address pair
806 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
807 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
808
809 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
810 }
811 }
812
813 assert(0 &&
814 "LowerJumpTable: Relocation model other than static not supported.");
815 return SDOperand();
816}
817
818static SDOperand
819LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
820 MVT::ValueType PtrVT = Op.getValueType();
821 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
822 GlobalValue *GV = GSDN->getGlobal();
823 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
824 SDOperand Zero = DAG.getConstant(0, PtrVT);
825 const TargetMachine &TM = DAG.getTarget();
826
827 if (TM.getRelocationModel() == Reloc::Static) {
828 if (!ST->usingLargeMem()) {
829 // Generate a local store address
830 return GA;
831 } else {
832 // Generate hi/lo address pair
833 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
834 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
835
836 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
837 }
838 } else {
839 cerr << "LowerGlobalAddress: Relocation model other than static not "
840 << "supported.\n";
841 abort();
842 /*NOTREACHED*/
843 }
844
845 return SDOperand();
846}
847
848//! Custom lower i64 integer constants
849/*!
850 This code inserts all of the necessary juggling that needs to occur to load
851 a 64-bit constant into a register.
852 */
853static SDOperand
854LowerConstant(SDOperand Op, SelectionDAG &DAG) {
855 unsigned VT = Op.getValueType();
856 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
857
858 if (VT == MVT::i64) {
859 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
860 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
861 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
862
863 } else {
864 cerr << "LowerConstant: unhandled constant type "
865 << MVT::getValueTypeString(VT)
866 << "\n";
867 abort();
868 /*NOTREACHED*/
869 }
870
871 return SDOperand();
872}
873
874//! Custom lower single precision floating point constants
875/*!
876 "float" immediates can be lowered as if they were unsigned 32-bit integers.
877 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
878 target description.
879 */
880static SDOperand
881LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
882 unsigned VT = Op.getValueType();
883 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
884
885 assert((FP != 0) &&
886 "LowerConstantFP: Node is not ConstantFPSDNode");
887
Scott Michel266bc8f2007-12-04 22:23:35 +0000888 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000889 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000890 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000891 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000892 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000893 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000894 return DAG.getNode(ISD::BIT_CONVERT, VT,
895 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
896 }
897
898 return SDOperand();
899}
900
901static SDOperand
902LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
903{
904 MachineFunction &MF = DAG.getMachineFunction();
905 MachineFrameInfo *MFI = MF.getFrameInfo();
906 SSARegMap *RegMap = MF.getSSARegMap();
907 SmallVector<SDOperand, 8> ArgValues;
908 SDOperand Root = Op.getOperand(0);
909 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
910
911 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
912 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
913
914 unsigned ArgOffset = SPUFrameInfo::minStackSize();
915 unsigned ArgRegIdx = 0;
916 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
917
918 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
919
920 // Add DAG nodes to load the arguments or copy them out of registers.
921 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
922 SDOperand ArgVal;
923 bool needsLoad = false;
924 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
925 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
926
927 switch (ObjectVT) {
928 default: {
929 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
930 << MVT::getValueTypeString(ObjectVT)
931 << "\n";
932 abort();
933 }
934 case MVT::i8:
935 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel504c3692007-12-17 22:32:34 +0000936 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000937 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
938 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
939 ++ArgRegIdx;
940 } else {
941 needsLoad = true;
942 }
943 break;
944 case MVT::i16:
945 if (!isVarArg && ArgRegIdx < NumArgRegs) {
946 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
947 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
948 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
949 ++ArgRegIdx;
950 } else {
951 needsLoad = true;
952 }
953 break;
954 case MVT::i32:
955 if (!isVarArg && ArgRegIdx < NumArgRegs) {
956 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
957 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
958 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
959 ++ArgRegIdx;
960 } else {
961 needsLoad = true;
962 }
963 break;
964 case MVT::i64:
965 if (!isVarArg && ArgRegIdx < NumArgRegs) {
966 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
967 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
968 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
969 ++ArgRegIdx;
970 } else {
971 needsLoad = true;
972 }
973 break;
974 case MVT::f32:
975 if (!isVarArg && ArgRegIdx < NumArgRegs) {
976 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
977 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
978 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
979 ++ArgRegIdx;
980 } else {
981 needsLoad = true;
982 }
983 break;
984 case MVT::f64:
985 if (!isVarArg && ArgRegIdx < NumArgRegs) {
986 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
987 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
988 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
989 ++ArgRegIdx;
990 } else {
991 needsLoad = true;
992 }
993 break;
994 case MVT::v2f64:
995 case MVT::v4f32:
996 case MVT::v4i32:
997 case MVT::v8i16:
998 case MVT::v16i8:
999 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1000 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1001 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1002 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1003 ++ArgRegIdx;
1004 } else {
1005 needsLoad = true;
1006 }
1007 break;
1008 }
1009
1010 // We need to load the argument to a virtual register if we determined above
1011 // that we ran out of physical registers of the appropriate type
1012 if (needsLoad) {
1013 // If the argument is actually used, emit a load from the right stack
1014 // slot.
1015 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1016 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1017 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1018 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1019 } else {
1020 // Don't emit a dead load.
1021 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1022 }
1023
1024 ArgOffset += StackSlotSize;
1025 }
1026
1027 ArgValues.push_back(ArgVal);
1028 }
1029
1030 // If the function takes variable number of arguments, make a frame index for
1031 // the start of the first vararg value... for expansion of llvm.va_start.
1032 if (isVarArg) {
1033 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1034 ArgOffset);
1035 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1036 // If this function is vararg, store any remaining integer argument regs to
1037 // their spots on the stack so that they may be loaded by deferencing the
1038 // result of va_next.
1039 SmallVector<SDOperand, 8> MemOps;
1040 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1041 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1042 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1043 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1044 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1045 MemOps.push_back(Store);
1046 // Increment the address by four for the next argument to store
1047 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1048 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1049 }
1050 if (!MemOps.empty())
1051 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1052 }
1053
1054 ArgValues.push_back(Root);
1055
1056 // Return the new list of results.
1057 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1058 Op.Val->value_end());
1059 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1060}
1061
1062/// isLSAAddress - Return the immediate to use if the specified
1063/// value is representable as a LSA address.
1064static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1065 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1066 if (!C) return 0;
1067
1068 int Addr = C->getValue();
1069 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1070 (Addr << 14 >> 14) != Addr)
1071 return 0; // Top 14 bits have to be sext of immediate.
1072
1073 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1074}
1075
1076static
1077SDOperand
1078LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1079 SDOperand Chain = Op.getOperand(0);
1080#if 0
1081 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1082 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1083#endif
1084 SDOperand Callee = Op.getOperand(4);
1085 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1086 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1087 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1088 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1089
1090 // Handy pointer type
1091 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1092
1093 // Accumulate how many bytes are to be pushed on the stack, including the
1094 // linkage area, and parameter passing area. According to the SPU ABI,
1095 // we minimally need space for [LR] and [SP]
1096 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1097
1098 // Set up a copy of the stack pointer for use loading and storing any
1099 // arguments that may not fit in the registers available for argument
1100 // passing.
1101 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1102
1103 // Figure out which arguments are going to go in registers, and which in
1104 // memory.
1105 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1106 unsigned ArgRegIdx = 0;
1107
1108 // Keep track of registers passing arguments
1109 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1110 // And the arguments passed on the stack
1111 SmallVector<SDOperand, 8> MemOpChains;
1112
1113 for (unsigned i = 0; i != NumOps; ++i) {
1114 SDOperand Arg = Op.getOperand(5+2*i);
1115
1116 // PtrOff will be used to store the current argument to the stack if a
1117 // register cannot be found for it.
1118 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1119 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1120
1121 switch (Arg.getValueType()) {
1122 default: assert(0 && "Unexpected ValueType for argument!");
1123 case MVT::i32:
1124 case MVT::i64:
1125 case MVT::i128:
1126 if (ArgRegIdx != NumArgRegs) {
1127 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1128 } else {
1129 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1130 ArgOffset += StackSlotSize;
1131 }
1132 break;
1133 case MVT::f32:
1134 case MVT::f64:
1135 if (ArgRegIdx != NumArgRegs) {
1136 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137 } else {
1138 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139 ArgOffset += StackSlotSize;
1140 }
1141 break;
1142 case MVT::v4f32:
1143 case MVT::v4i32:
1144 case MVT::v8i16:
1145 case MVT::v16i8:
1146 if (ArgRegIdx != NumArgRegs) {
1147 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1148 } else {
1149 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1150 ArgOffset += StackSlotSize;
1151 }
1152 break;
1153 }
1154 }
1155
1156 // Update number of stack bytes actually used, insert a call sequence start
1157 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1158 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1159
1160 if (!MemOpChains.empty()) {
1161 // Adjust the stack pointer for the stack arguments.
1162 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1163 &MemOpChains[0], MemOpChains.size());
1164 }
1165
1166 // Build a sequence of copy-to-reg nodes chained together with token chain
1167 // and flag operands which copy the outgoing args into the appropriate regs.
1168 SDOperand InFlag;
1169 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1170 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1171 InFlag);
1172 InFlag = Chain.getValue(1);
1173 }
1174
1175 std::vector<MVT::ValueType> NodeTys;
1176 NodeTys.push_back(MVT::Other); // Returns a chain
1177 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1178
1179 SmallVector<SDOperand, 8> Ops;
1180 unsigned CallOpc = SPUISD::CALL;
1181
1182 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1183 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1184 // node so that legalize doesn't hack it.
1185 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1186 GlobalValue *GV = G->getGlobal();
1187 unsigned CalleeVT = Callee.getValueType();
1188
1189 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1190 // style calls, otherwise, external symbols are BRASL calls.
1191 // NOTE:
1192 // This may be an unsafe assumption for JIT and really large compilation
1193 // units.
1194 if (GV->isDeclaration()) {
1195 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1196 } else {
1197 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1198 DAG.getTargetGlobalAddress(GV, CalleeVT),
1199 DAG.getConstant(0, PtrVT));
1200 }
1201 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1202 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1203 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1204 // If this is an absolute destination address that appears to be a legal
1205 // local store address, use the munged value.
1206 Callee = SDOperand(Dest, 0);
1207
1208 Ops.push_back(Chain);
1209 Ops.push_back(Callee);
1210
1211 // Add argument registers to the end of the list so that they are known live
1212 // into the call.
1213 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1214 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1215 RegsToPass[i].second.getValueType()));
1216
1217 if (InFlag.Val)
1218 Ops.push_back(InFlag);
1219 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1220 InFlag = Chain.getValue(1);
1221
1222 SDOperand ResultVals[3];
1223 unsigned NumResults = 0;
1224 NodeTys.clear();
1225
1226 // If the call has results, copy the values out of the ret val registers.
1227 switch (Op.Val->getValueType(0)) {
1228 default: assert(0 && "Unexpected ret value!");
1229 case MVT::Other: break;
1230 case MVT::i32:
1231 if (Op.Val->getValueType(1) == MVT::i32) {
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1235 Chain.getValue(2)).getValue(1);
1236 ResultVals[1] = Chain.getValue(0);
1237 NumResults = 2;
1238 NodeTys.push_back(MVT::i32);
1239 } else {
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1242 NumResults = 1;
1243 }
1244 NodeTys.push_back(MVT::i32);
1245 break;
1246 case MVT::i64:
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1249 NumResults = 1;
1250 NodeTys.push_back(MVT::i64);
1251 break;
1252 case MVT::f32:
1253 case MVT::f64:
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1255 InFlag).getValue(1);
1256 ResultVals[0] = Chain.getValue(0);
1257 NumResults = 1;
1258 NodeTys.push_back(Op.Val->getValueType(0));
1259 break;
1260 case MVT::v2f64:
1261 case MVT::v4f32:
1262 case MVT::v4i32:
1263 case MVT::v8i16:
1264 case MVT::v16i8:
1265 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1266 InFlag).getValue(1);
1267 ResultVals[0] = Chain.getValue(0);
1268 NumResults = 1;
1269 NodeTys.push_back(Op.Val->getValueType(0));
1270 break;
1271 }
1272
1273 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1274 DAG.getConstant(NumStackBytes, PtrVT));
1275 NodeTys.push_back(MVT::Other);
1276
1277 // If the function returns void, just return the chain.
1278 if (NumResults == 0)
1279 return Chain;
1280
1281 // Otherwise, merge everything together with a MERGE_VALUES node.
1282 ResultVals[NumResults++] = Chain;
1283 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1284 ResultVals, NumResults);
1285 return Res.getValue(Op.ResNo);
1286}
1287
1288static SDOperand
1289LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1290 SmallVector<CCValAssign, 16> RVLocs;
1291 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1292 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1293 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1294 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1295
1296 // If this is the first return lowered for this function, add the regs to the
1297 // liveout set for the function.
1298 if (DAG.getMachineFunction().liveout_empty()) {
1299 for (unsigned i = 0; i != RVLocs.size(); ++i)
1300 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1301 }
1302
1303 SDOperand Chain = Op.getOperand(0);
1304 SDOperand Flag;
1305
1306 // Copy the result values into the output registers.
1307 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1308 CCValAssign &VA = RVLocs[i];
1309 assert(VA.isRegLoc() && "Can only return in registers!");
1310 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1311 Flag = Chain.getValue(1);
1312 }
1313
1314 if (Flag.Val)
1315 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1316 else
1317 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1318}
1319
1320
1321//===----------------------------------------------------------------------===//
1322// Vector related lowering:
1323//===----------------------------------------------------------------------===//
1324
1325static ConstantSDNode *
1326getVecImm(SDNode *N) {
1327 SDOperand OpVal(0, 0);
1328
1329 // Check to see if this buildvec has a single non-undef value in its elements.
1330 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1331 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1332 if (OpVal.Val == 0)
1333 OpVal = N->getOperand(i);
1334 else if (OpVal != N->getOperand(i))
1335 return 0;
1336 }
1337
1338 if (OpVal.Val != 0) {
1339 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1340 return CN;
1341 }
1342 }
1343
1344 return 0; // All UNDEF: use implicit def.; not Constant node
1345}
1346
1347/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1348/// and the value fits into an unsigned 18-bit constant, and if so, return the
1349/// constant
1350SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1351 MVT::ValueType ValueType) {
1352 if (ConstantSDNode *CN = getVecImm(N)) {
1353 uint64_t Value = CN->getValue();
1354 if (Value <= 0x3ffff)
1355 return DAG.getConstant(Value, ValueType);
1356 }
1357
1358 return SDOperand();
1359}
1360
1361/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1362/// and the value fits into a signed 16-bit constant, and if so, return the
1363/// constant
1364SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365 MVT::ValueType ValueType) {
1366 if (ConstantSDNode *CN = getVecImm(N)) {
1367 if (ValueType == MVT::i32) {
1368 int Value = (int) CN->getValue();
1369 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1370
1371 if (Value == SExtValue)
1372 return DAG.getConstant(Value, ValueType);
1373 } else if (ValueType == MVT::i16) {
1374 short Value = (short) CN->getValue();
1375 int SExtValue = ((int) Value << 16) >> 16;
1376
1377 if (Value == (short) SExtValue)
1378 return DAG.getConstant(Value, ValueType);
1379 } else if (ValueType == MVT::i64) {
1380 int64_t Value = CN->getValue();
1381 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1382
1383 if (Value == SExtValue)
1384 return DAG.getConstant(Value, ValueType);
1385 }
1386 }
1387
1388 return SDOperand();
1389}
1390
1391/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1392/// and the value fits into a signed 10-bit constant, and if so, return the
1393/// constant
1394SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1395 MVT::ValueType ValueType) {
1396 if (ConstantSDNode *CN = getVecImm(N)) {
1397 int Value = (int) CN->getValue();
1398 if ((ValueType == MVT::i32 && isS10Constant(Value))
1399 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1400 return DAG.getConstant(Value, ValueType);
1401 }
1402
1403 return SDOperand();
1404}
1405
1406/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1407/// and the value fits into a signed 8-bit constant, and if so, return the
1408/// constant.
1409///
1410/// @note: The incoming vector is v16i8 because that's the only way we can load
1411/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412/// same value.
1413SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414 MVT::ValueType ValueType) {
1415 if (ConstantSDNode *CN = getVecImm(N)) {
1416 int Value = (int) CN->getValue();
1417 if (ValueType == MVT::i16
1418 && Value <= 0xffff /* truncated from uint64_t */
1419 && ((short) Value >> 8) == ((short) Value & 0xff))
1420 return DAG.getConstant(Value & 0xff, ValueType);
1421 else if (ValueType == MVT::i8
1422 && (Value & 0xff) == Value)
1423 return DAG.getConstant(Value, ValueType);
1424 }
1425
1426 return SDOperand();
1427}
1428
1429/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1430/// and the value fits into a signed 16-bit constant, and if so, return the
1431/// constant
1432SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433 MVT::ValueType ValueType) {
1434 if (ConstantSDNode *CN = getVecImm(N)) {
1435 uint64_t Value = CN->getValue();
1436 if ((ValueType == MVT::i32
1437 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1438 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1439 return DAG.getConstant(Value >> 16, ValueType);
1440 }
1441
1442 return SDOperand();
1443}
1444
1445/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1446SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1447 if (ConstantSDNode *CN = getVecImm(N)) {
1448 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1449 }
1450
1451 return SDOperand();
1452}
1453
1454/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1455SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1456 if (ConstantSDNode *CN = getVecImm(N)) {
1457 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1458 }
1459
1460 return SDOperand();
1461}
1462
1463// If this is a vector of constants or undefs, get the bits. A bit in
1464// UndefBits is set if the corresponding element of the vector is an
1465// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1466// zero. Return true if this is not an array of constants, false if it is.
1467//
1468static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1469 uint64_t UndefBits[2]) {
1470 // Start with zero'd results.
1471 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472
1473 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1474 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1475 SDOperand OpVal = BV->getOperand(i);
1476
1477 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1478 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1479
1480 uint64_t EltBits = 0;
1481 if (OpVal.getOpcode() == ISD::UNDEF) {
1482 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1483 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484 continue;
1485 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1486 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1487 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1488 const APFloat &apf = CN->getValueAPF();
1489 EltBits = (CN->getValueType(0) == MVT::f32
1490 ? FloatToBits(apf.convertToFloat())
1491 : DoubleToBits(apf.convertToDouble()));
1492 } else {
1493 // Nonconstant element.
1494 return true;
1495 }
1496
1497 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1498 }
1499
1500 //printf("%llx %llx %llx %llx\n",
1501 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1502 return false;
1503}
1504
1505/// If this is a splat (repetition) of a value across the whole vector, return
1506/// the smallest size that splats it. For example, "0x01010101010101..." is a
1507/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1508/// SplatSize = 1 byte.
1509static bool isConstantSplat(const uint64_t Bits128[2],
1510 const uint64_t Undef128[2],
1511 int MinSplatBits,
1512 uint64_t &SplatBits, uint64_t &SplatUndef,
1513 int &SplatSize) {
1514 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1515 // the same as the lower 64-bits, ignoring undefs.
1516 uint64_t Bits64 = Bits128[0] | Bits128[1];
1517 uint64_t Undef64 = Undef128[0] & Undef128[1];
1518 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1519 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1520 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1521 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522
1523 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1524 if (MinSplatBits < 64) {
1525
1526 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527 // undefs.
1528 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1529 if (MinSplatBits < 32) {
1530
1531 // If the top 16-bits are different than the lower 16-bits, ignoring
1532 // undefs, we have an i32 splat.
1533 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1534 if (MinSplatBits < 16) {
1535 // If the top 8-bits are different than the lower 8-bits, ignoring
1536 // undefs, we have an i16 splat.
1537 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1538 // Otherwise, we have an 8-bit splat.
1539 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1540 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1541 SplatSize = 1;
1542 return true;
1543 }
1544 } else {
1545 SplatBits = Bits16;
1546 SplatUndef = Undef16;
1547 SplatSize = 2;
1548 return true;
1549 }
1550 }
1551 } else {
1552 SplatBits = Bits32;
1553 SplatUndef = Undef32;
1554 SplatSize = 4;
1555 return true;
1556 }
1557 }
1558 } else {
1559 SplatBits = Bits128[0];
1560 SplatUndef = Undef128[0];
1561 SplatSize = 8;
1562 return true;
1563 }
1564 }
1565
1566 return false; // Can't be a splat if two pieces don't match.
1567}
1568
1569// If this is a case we can't handle, return null and let the default
1570// expansion code take care of it. If we CAN select this case, and if it
1571// selects to a single instruction, return Op. Otherwise, if we can codegen
1572// this case more efficiently than a constant pool load, lower it to the
1573// sequence of ops that should be used.
1574static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1575 MVT::ValueType VT = Op.getValueType();
1576 // If this is a vector of constants or undefs, get the bits. A bit in
1577 // UndefBits is set if the corresponding element of the vector is an
1578 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1579 // zero.
1580 uint64_t VectorBits[2];
1581 uint64_t UndefBits[2];
1582 uint64_t SplatBits, SplatUndef;
1583 int SplatSize;
1584 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1585 || !isConstantSplat(VectorBits, UndefBits,
1586 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1587 SplatBits, SplatUndef, SplatSize))
1588 return SDOperand(); // Not a constant vector, not a splat.
1589
1590 switch (VT) {
1591 default:
1592 case MVT::v4f32: {
1593 uint32_t Value32 = SplatBits;
1594 assert(SplatSize == 4
1595 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1596 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1597 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1598 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1599 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1600 break;
1601 }
1602 case MVT::v2f64: {
1603 uint64_t f64val = SplatBits;
1604 assert(SplatSize == 8
1605 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1606 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1607 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1608 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1609 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1610 break;
1611 }
1612 case MVT::v16i8: {
1613 // 8-bit constants have to be expanded to 16-bits
1614 unsigned short Value16 = SplatBits | (SplatBits << 8);
1615 SDOperand Ops[8];
1616 for (int i = 0; i < 8; ++i)
1617 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1618 return DAG.getNode(ISD::BIT_CONVERT, VT,
1619 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1620 }
1621 case MVT::v8i16: {
1622 unsigned short Value16;
1623 if (SplatSize == 2)
1624 Value16 = (unsigned short) (SplatBits & 0xffff);
1625 else
1626 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1627 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1628 SDOperand Ops[8];
1629 for (int i = 0; i < 8; ++i) Ops[i] = T;
1630 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1631 }
1632 case MVT::v4i32: {
1633 unsigned int Value = SplatBits;
1634 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1636 }
1637 case MVT::v2i64: {
1638 uint64_t val = SplatBits;
1639 uint32_t upper = uint32_t(val >> 32);
1640 uint32_t lower = uint32_t(val);
1641
1642 if (val != 0) {
1643 SDOperand LO32;
1644 SDOperand HI32;
1645 SmallVector<SDOperand, 16> ShufBytes;
1646 SDOperand Result;
1647 bool upper_special, lower_special;
1648
1649 // NOTE: This code creates common-case shuffle masks that can be easily
1650 // detected as common expressions. It is not attempting to create highly
1651 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1652
1653 // Detect if the upper or lower half is a special shuffle mask pattern:
1654 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1655 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1656
1657 // Create lower vector if not a special pattern
1658 if (!lower_special) {
1659 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1660 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 LO32C, LO32C, LO32C, LO32C));
1663 }
1664
1665 // Create upper vector if not a special pattern
1666 if (!upper_special) {
1667 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1668 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1669 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1670 HI32C, HI32C, HI32C, HI32C));
1671 }
1672
1673 // If either upper or lower are special, then the two input operands are
1674 // the same (basically, one of them is a "don't care")
1675 if (lower_special)
1676 LO32 = HI32;
1677 if (upper_special)
1678 HI32 = LO32;
1679 if (lower_special && upper_special) {
1680 // Unhappy situation... both upper and lower are special, so punt with
1681 // a target constant:
1682 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1683 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1684 Zero, Zero);
1685 }
1686
1687 for (int i = 0; i < 4; ++i) {
1688 for (int j = 0; j < 4; ++j) {
1689 SDOperand V;
1690 bool process_upper, process_lower;
Chris Lattner52ec3752007-12-22 22:47:03 +00001691 uint64_t val = 0;
Scott Michel266bc8f2007-12-04 22:23:35 +00001692
1693 process_upper = (upper_special && (i & 1) == 0);
1694 process_lower = (lower_special && (i & 1) == 1);
1695
1696 if (process_upper || process_lower) {
1697 if ((process_upper && upper == 0)
1698 || (process_lower && lower == 0))
1699 val = 0x80;
1700 else if ((process_upper && upper == 0xffffffff)
1701 || (process_lower && lower == 0xffffffff))
1702 val = 0xc0;
1703 else if ((process_upper && upper == 0x80000000)
1704 || (process_lower && lower == 0x80000000))
1705 val = (j == 0 ? 0xe0 : 0x80);
1706 } else
1707 val = i * 4 + j + ((i & 1) * 16);
1708
1709 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1710 }
1711 }
1712
1713 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1714 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1715 &ShufBytes[0], ShufBytes.size()));
1716 } else {
1717 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1718 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1719 return DAG.getNode(ISD::BIT_CONVERT, VT,
1720 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1721 Zero, Zero, Zero, Zero));
1722 }
1723 }
1724 }
1725
1726 return SDOperand();
1727}
1728
1729/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1730/// which the Cell can operate. The code inspects V3 to ascertain whether the
1731/// permutation vector, V3, is monotonically increasing with one "exception"
1732/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1733/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1734/// In either case, the net result is going to eventually invoke SHUFB to
1735/// permute/shuffle the bytes from V1 and V2.
1736/// \note
1737/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1738/// control word for byte/halfword/word insertion. This takes care of a single
1739/// element move from V2 into V1.
1740/// \note
1741/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1742static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1743 SDOperand V1 = Op.getOperand(0);
1744 SDOperand V2 = Op.getOperand(1);
1745 SDOperand PermMask = Op.getOperand(2);
1746
1747 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1748
1749 // If we have a single element being moved from V1 to V2, this can be handled
1750 // using the C*[DX] compute mask instructions, but the vector elements have
1751 // to be monotonically increasing with one exception element.
1752 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1753 unsigned EltsFromV2 = 0;
1754 unsigned V2Elt = 0;
1755 unsigned V2EltIdx0 = 0;
1756 unsigned CurrElt = 0;
1757 bool monotonic = true;
1758 if (EltVT == MVT::i8)
1759 V2EltIdx0 = 16;
1760 else if (EltVT == MVT::i16)
1761 V2EltIdx0 = 8;
1762 else if (EltVT == MVT::i32)
1763 V2EltIdx0 = 4;
1764 else
1765 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1766
1767 for (unsigned i = 0, e = PermMask.getNumOperands();
1768 EltsFromV2 <= 1 && monotonic && i != e;
1769 ++i) {
1770 unsigned SrcElt;
1771 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1772 SrcElt = 0;
1773 else
1774 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1775
1776 if (SrcElt >= V2EltIdx0) {
1777 ++EltsFromV2;
1778 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1779 } else if (CurrElt != SrcElt) {
1780 monotonic = false;
1781 }
1782
1783 ++CurrElt;
1784 }
1785
1786 if (EltsFromV2 == 1 && monotonic) {
1787 // Compute mask and shuffle
1788 MachineFunction &MF = DAG.getMachineFunction();
1789 SSARegMap *RegMap = MF.getSSARegMap();
1790 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1791 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1792 // Initialize temporary register to 0
1793 SDOperand InitTempReg =
1794 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1795 // Copy register's contents as index in INSERT_MASK:
1796 SDOperand ShufMaskOp =
1797 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1798 DAG.getTargetConstant(V2Elt, MVT::i32),
1799 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1800 // Use shuffle mask in SHUFB synthetic instruction:
1801 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1802 } else {
1803 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1804 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1805
1806 SmallVector<SDOperand, 16> ResultMask;
1807 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1808 unsigned SrcElt;
1809 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1810 SrcElt = 0;
1811 else
1812 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1813
1814 for (unsigned j = 0; j != BytesPerElement; ++j) {
1815 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1816 MVT::i8));
1817 }
1818 }
1819
1820 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1821 &ResultMask[0], ResultMask.size());
1822 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1823 }
1824}
1825
1826static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1827 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1828
1829 if (Op0.Val->getOpcode() == ISD::Constant) {
1830 // For a constant, build the appropriate constant vector, which will
1831 // eventually simplify to a vector register load.
1832
1833 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1834 SmallVector<SDOperand, 16> ConstVecValues;
1835 MVT::ValueType VT;
1836 size_t n_copies;
1837
1838 // Create a constant vector:
1839 switch (Op.getValueType()) {
1840 default: assert(0 && "Unexpected constant value type in "
1841 "LowerSCALAR_TO_VECTOR");
1842 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1843 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1844 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1845 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1846 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1847 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1848 }
1849
1850 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1851 for (size_t j = 0; j < n_copies; ++j)
1852 ConstVecValues.push_back(CValue);
1853
1854 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1855 &ConstVecValues[0], ConstVecValues.size());
1856 } else {
1857 // Otherwise, copy the value from one register to another:
1858 switch (Op0.getValueType()) {
1859 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1860 case MVT::i8:
1861 case MVT::i16:
1862 case MVT::i32:
1863 case MVT::i64:
1864 case MVT::f32:
1865 case MVT::f64:
1866 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1867 }
1868 }
1869
1870 return SDOperand();
1871}
1872
1873static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1874 switch (Op.getValueType()) {
1875 case MVT::v4i32: {
1876 SDOperand rA = Op.getOperand(0);
1877 SDOperand rB = Op.getOperand(1);
1878 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1879 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1880 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1881 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1882
1883 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1884 break;
1885 }
1886
1887 // Multiply two v8i16 vectors (pipeline friendly version):
1888 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1889 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1890 // c) Use SELB to select upper and lower halves from the intermediate results
1891 //
1892 // NOTE: We really want to move the FSMBI to earlier to actually get the
1893 // dual-issue. This code does manage to do this, even if it's a little on
1894 // the wacky side
1895 case MVT::v8i16: {
1896 MachineFunction &MF = DAG.getMachineFunction();
1897 SSARegMap *RegMap = MF.getSSARegMap();
1898 SDOperand Chain = Op.getOperand(0);
1899 SDOperand rA = Op.getOperand(0);
1900 SDOperand rB = Op.getOperand(1);
1901 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1902 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1903
1904 SDOperand FSMBOp =
1905 DAG.getCopyToReg(Chain, FSMBIreg,
1906 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1907 DAG.getConstant(0xcccc, MVT::i32)));
1908
1909 SDOperand HHProd =
1910 DAG.getCopyToReg(FSMBOp, HiProdReg,
1911 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1912
1913 SDOperand HHProd_v4i32 =
1914 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1915 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1916
1917 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1918 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1919 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1920 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1921 HHProd_v4i32,
1922 DAG.getConstant(16, MVT::i16))),
1923 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1924 }
1925
1926 // This M00sE is N@stI! (apologies to Monty Python)
1927 //
1928 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1929 // is to break it all apart, sign extend, and reassemble the various
1930 // intermediate products.
1931 case MVT::v16i8: {
1932 MachineFunction &MF = DAG.getMachineFunction();
1933 SSARegMap *RegMap = MF.getSSARegMap();
1934 SDOperand Chain = Op.getOperand(0);
1935 SDOperand rA = Op.getOperand(0);
1936 SDOperand rB = Op.getOperand(1);
1937 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1938 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1939
1940 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1941 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1942 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1943
1944 SDOperand LLProd =
1945 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1947 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1948
1949 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1950
1951 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1952
1953 SDOperand LHProd =
1954 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1955 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1956
1957 SDOperand FSMBdef_2222 =
1958 DAG.getCopyToReg(Chain, FSMBreg_2222,
1959 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1960 DAG.getConstant(0x2222, MVT::i32)));
1961
1962 SDOperand FSMBuse_2222 =
1963 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1964
1965 SDOperand LoProd_1 =
1966 DAG.getCopyToReg(Chain, LoProd_reg,
1967 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1968 FSMBuse_2222));
1969
1970 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1971
1972 SDOperand LoProd =
1973 DAG.getNode(ISD::AND, MVT::v4i32,
1974 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1975 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1976 LoProdMask, LoProdMask,
1977 LoProdMask, LoProdMask));
1978
1979 SDOperand rAH =
1980 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1982
1983 SDOperand rBH =
1984 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1986
1987 SDOperand HLProd =
1988 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1991
1992 SDOperand HHProd_1 =
1993 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1994 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1995 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1997 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1998
1999 SDOperand HHProd =
2000 DAG.getCopyToReg(Chain, HiProd_reg,
2001 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2002 HLProd,
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2004 FSMBuse_2222));
2005
2006 SDOperand HiProd =
2007 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2008 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2009
2010 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2011 DAG.getNode(ISD::OR, MVT::v4i32,
2012 LoProd, HiProd));
2013 }
2014
2015 default:
2016 cerr << "CellSPU: Unknown vector multiplication, got "
2017 << MVT::getValueTypeString(Op.getValueType())
2018 << "\n";
2019 abort();
2020 /*NOTREACHED*/
2021 }
2022
2023 return SDOperand();
2024}
2025
2026static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 SSARegMap *RegMap = MF.getSSARegMap();
2029
2030 SDOperand A = Op.getOperand(0);
2031 SDOperand B = Op.getOperand(1);
2032 unsigned VT = Op.getValueType();
2033
2034 unsigned VRegBR, VRegC;
2035
2036 if (VT == MVT::f32) {
2037 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2038 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2039 } else {
2040 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2041 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2042 }
2043 // TODO: make sure we're feeding FPInterp the right arguments
2044 // Right now: fi B, frest(B)
2045
2046 // Computes BRcpl =
2047 // (Floating Interpolate (FP Reciprocal Estimate B))
2048 SDOperand BRcpl =
2049 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2050 DAG.getNode(SPUISD::FPInterp, VT, B,
2051 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2052
2053 // Computes A * BRcpl and stores in a temporary register
2054 SDOperand AxBRcpl =
2055 DAG.getCopyToReg(BRcpl, VRegC,
2056 DAG.getNode(ISD::FMUL, VT, A,
2057 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2058 // What's the Chain variable do? It's magic!
2059 // TODO: set Chain = Op(0).getEntryNode()
2060
2061 return DAG.getNode(ISD::FADD, VT,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2063 DAG.getNode(ISD::FMUL, VT,
2064 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2065 DAG.getNode(ISD::FSUB, VT, A,
2066 DAG.getNode(ISD::FMUL, VT, B,
2067 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2068}
2069
2070// Expands double-precision FDIV
2071// Expects two doubles as inputs X and Y, does a floating point
2072// reciprocal estimate, and three iterations of Newton-Raphson
2073// to increase accuracy.
2074//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2075// MachineFunction &MF = DAG.getMachineFunction();
2076// SSARegMap *RegMap = MF.getSSARegMap();
2077//
2078// SDOperand X = Op.getOperand(0);
2079// SDOperand Y = Op.getOperand(1);
2080//}
2081
2082static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2083 unsigned VT = Op.getValueType();
2084 SDOperand N = Op.getOperand(0);
2085 SDOperand Elt = Op.getOperand(1);
2086 SDOperand ShufMask[16];
2087 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2088
2089 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2090
2091 int EltNo = (int) C->getValue();
2092
2093 // sanity checks:
2094 if (VT == MVT::i8 && EltNo >= 16)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2096 else if (VT == MVT::i16 && EltNo >= 8)
2097 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2098 else if (VT == MVT::i32 && EltNo >= 4)
2099 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2100 else if (VT == MVT::i64 && EltNo >= 2)
2101 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2102
2103 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2104 // i32 and i64: Element 0 is the preferred slot
2105 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2106 }
2107
2108 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002109 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002110 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2111
2112 switch (VT) {
2113 case MVT::i8: {
2114 prefslot_begin = prefslot_end = 3;
2115 break;
2116 }
2117 case MVT::i16: {
2118 prefslot_begin = 2; prefslot_end = 3;
2119 break;
2120 }
2121 case MVT::i32: {
2122 prefslot_begin = 0; prefslot_end = 3;
2123 break;
2124 }
2125 case MVT::i64: {
2126 prefslot_begin = 0; prefslot_end = 7;
2127 break;
2128 }
2129 }
2130
Scott Michel0e5665b2007-12-19 21:17:42 +00002131 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2132 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2133
Scott Michel266bc8f2007-12-04 22:23:35 +00002134 for (int i = 0; i < 16; ++i) {
2135 // zero fill uppper part of preferred slot, don't care about the
2136 // other slots:
2137 unsigned int mask_val;
2138
2139 if (i <= prefslot_end) {
2140 mask_val =
2141 ((i < prefslot_begin)
2142 ? 0x80
2143 : elt_byte + (i - prefslot_begin));
2144
Scott Michel0e5665b2007-12-19 21:17:42 +00002145 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002146 } else
2147 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2148 }
2149
2150 SDOperand ShufMaskVec =
2151 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2152 &ShufMask[0],
2153 sizeof(ShufMask) / sizeof(ShufMask[0]));
2154
2155 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2156 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2157 N, N, ShufMaskVec));
2158
2159}
2160
2161static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2162 SDOperand VecOp = Op.getOperand(0);
2163 SDOperand ValOp = Op.getOperand(1);
2164 SDOperand IdxOp = Op.getOperand(2);
2165 MVT::ValueType VT = Op.getValueType();
2166
2167 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2168 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2169
2170 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2171 // Use $2 because it's always 16-byte aligned and it's available:
2172 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2173
2174 SDOperand result =
2175 DAG.getNode(SPUISD::SHUFB, VT,
2176 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2177 VecOp,
2178 DAG.getNode(SPUISD::INSERT_MASK, VT,
2179 DAG.getNode(ISD::ADD, PtrVT,
2180 PtrBase,
2181 DAG.getConstant(CN->getValue(),
2182 PtrVT))));
2183
2184 return result;
2185}
2186
2187static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2188 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2189
2190 assert(Op.getValueType() == MVT::i8);
2191 switch (Opc) {
2192 default:
2193 assert(0 && "Unhandled i8 math operator");
2194 /*NOTREACHED*/
2195 break;
2196 case ISD::SUB: {
2197 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2198 // the result:
2199 SDOperand N1 = Op.getOperand(1);
2200 N0 = (N0.getOpcode() != ISD::Constant
2201 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2202 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2203 N1 = (N1.getOpcode() != ISD::Constant
2204 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2205 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2206 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2207 DAG.getNode(Opc, MVT::i16, N0, N1));
2208 }
2209 case ISD::ROTR:
2210 case ISD::ROTL: {
2211 SDOperand N1 = Op.getOperand(1);
2212 unsigned N1Opc;
2213 N0 = (N0.getOpcode() != ISD::Constant
2214 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2215 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2216 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2217 N1 = (N1.getOpcode() != ISD::Constant
2218 ? DAG.getNode(N1Opc, MVT::i16, N1)
2219 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2220 SDOperand ExpandArg =
2221 DAG.getNode(ISD::OR, MVT::i16, N0,
2222 DAG.getNode(ISD::SHL, MVT::i16,
2223 N0, DAG.getConstant(8, MVT::i16)));
2224 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2225 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2226 }
2227 case ISD::SRL:
2228 case ISD::SHL: {
2229 SDOperand N1 = Op.getOperand(1);
2230 unsigned N1Opc;
2231 N0 = (N0.getOpcode() != ISD::Constant
2232 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2233 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2234 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2235 N1 = (N1.getOpcode() != ISD::Constant
2236 ? DAG.getNode(N1Opc, MVT::i16, N1)
2237 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2238 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2239 DAG.getNode(Opc, MVT::i16, N0, N1));
2240 }
2241 case ISD::SRA: {
2242 SDOperand N1 = Op.getOperand(1);
2243 unsigned N1Opc;
2244 N0 = (N0.getOpcode() != ISD::Constant
2245 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2246 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2247 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2248 N1 = (N1.getOpcode() != ISD::Constant
2249 ? DAG.getNode(N1Opc, MVT::i16, N1)
2250 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2251 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2252 DAG.getNode(Opc, MVT::i16, N0, N1));
2253 }
2254 case ISD::MUL: {
2255 SDOperand N1 = Op.getOperand(1);
2256 unsigned N1Opc;
2257 N0 = (N0.getOpcode() != ISD::Constant
2258 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2259 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2260 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2261 N1 = (N1.getOpcode() != ISD::Constant
2262 ? DAG.getNode(N1Opc, MVT::i16, N1)
2263 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2264 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2265 DAG.getNode(Opc, MVT::i16, N0, N1));
2266 break;
2267 }
2268 }
2269
2270 return SDOperand();
2271}
2272
2273//! Lower byte immediate operations for v16i8 vectors:
2274static SDOperand
2275LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2276 SDOperand ConstVec;
2277 SDOperand Arg;
2278 MVT::ValueType VT = Op.getValueType();
2279
2280 ConstVec = Op.getOperand(0);
2281 Arg = Op.getOperand(1);
2282 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2283 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2284 ConstVec = ConstVec.getOperand(0);
2285 } else {
2286 ConstVec = Op.getOperand(1);
2287 Arg = Op.getOperand(0);
2288 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2289 ConstVec = ConstVec.getOperand(0);
2290 }
2291 }
2292 }
2293
2294 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2295 uint64_t VectorBits[2];
2296 uint64_t UndefBits[2];
2297 uint64_t SplatBits, SplatUndef;
2298 int SplatSize;
2299
2300 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2301 && isConstantSplat(VectorBits, UndefBits,
2302 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2303 SplatBits, SplatUndef, SplatSize)) {
2304 SDOperand tcVec[16];
2305 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2306 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2307
2308 // Turn the BUILD_VECTOR into a set of target constants:
2309 for (size_t i = 0; i < tcVecSize; ++i)
2310 tcVec[i] = tc;
2311
2312 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2313 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2314 }
2315 }
2316
2317 return SDOperand();
2318}
2319
2320//! Lower i32 multiplication
2321static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2322 unsigned Opc) {
2323 switch (VT) {
2324 default:
2325 cerr << "CellSPU: Unknown LowerMUL value type, got "
2326 << MVT::getValueTypeString(Op.getValueType())
2327 << "\n";
2328 abort();
2329 /*NOTREACHED*/
2330
2331 case MVT::i32: {
2332 SDOperand rA = Op.getOperand(0);
2333 SDOperand rB = Op.getOperand(1);
2334
2335 return DAG.getNode(ISD::ADD, MVT::i32,
2336 DAG.getNode(ISD::ADD, MVT::i32,
2337 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2338 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2339 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2340 }
2341 }
2342
2343 return SDOperand();
2344}
2345
2346//! Custom lowering for CTPOP (count population)
2347/*!
2348 Custom lowering code that counts the number ones in the input
2349 operand. SPU has such an instruction, but it counts the number of
2350 ones per byte, which then have to be accumulated.
2351*/
2352static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2353 unsigned VT = Op.getValueType();
2354 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2355
2356 switch (VT) {
2357 case MVT::i8: {
2358 SDOperand N = Op.getOperand(0);
2359 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2360
2361 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2362 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2363
2364 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2365 }
2366
2367 case MVT::i16: {
2368 MachineFunction &MF = DAG.getMachineFunction();
2369 SSARegMap *RegMap = MF.getSSARegMap();
2370
2371 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2372
2373 SDOperand N = Op.getOperand(0);
2374 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2375 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2376 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2377
2378 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2379 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2380
2381 // CNTB_result becomes the chain to which all of the virtual registers
2382 // CNTB_reg, SUM1_reg become associated:
2383 SDOperand CNTB_result =
2384 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2385
2386 SDOperand CNTB_rescopy =
2387 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2388
2389 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2390
2391 return DAG.getNode(ISD::AND, MVT::i16,
2392 DAG.getNode(ISD::ADD, MVT::i16,
2393 DAG.getNode(ISD::SRL, MVT::i16,
2394 Tmp1, Shift1),
2395 Tmp1),
2396 Mask0);
2397 }
2398
2399 case MVT::i32: {
2400 MachineFunction &MF = DAG.getMachineFunction();
2401 SSARegMap *RegMap = MF.getSSARegMap();
2402
2403 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2404 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2405
2406 SDOperand N = Op.getOperand(0);
2407 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2408 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2409 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2410 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2411
2412 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2413 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2414
2415 // CNTB_result becomes the chain to which all of the virtual registers
2416 // CNTB_reg, SUM1_reg become associated:
2417 SDOperand CNTB_result =
2418 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2419
2420 SDOperand CNTB_rescopy =
2421 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2422
2423 SDOperand Comp1 =
2424 DAG.getNode(ISD::SRL, MVT::i32,
2425 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2426
2427 SDOperand Sum1 =
2428 DAG.getNode(ISD::ADD, MVT::i32,
2429 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2430
2431 SDOperand Sum1_rescopy =
2432 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2433
2434 SDOperand Comp2 =
2435 DAG.getNode(ISD::SRL, MVT::i32,
2436 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2437 Shift2);
2438 SDOperand Sum2 =
2439 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2440 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2441
2442 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2443 }
2444
2445 case MVT::i64:
2446 break;
2447 }
2448
2449 return SDOperand();
2450}
2451
2452/// LowerOperation - Provide custom lowering hooks for some operations.
2453///
2454SDOperand
2455SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2456{
2457 switch (Op.getOpcode()) {
2458 default: {
2459 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2460 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2461 cerr << "*Op.Val:\n";
2462 Op.Val->dump();
2463 abort();
2464 }
2465 case ISD::LOAD:
2466 case ISD::SEXTLOAD:
2467 case ISD::ZEXTLOAD:
2468 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2469 case ISD::STORE:
2470 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2471 case ISD::ConstantPool:
2472 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2473 case ISD::GlobalAddress:
2474 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2475 case ISD::JumpTable:
2476 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2477 case ISD::Constant:
2478 return LowerConstant(Op, DAG);
2479 case ISD::ConstantFP:
2480 return LowerConstantFP(Op, DAG);
2481 case ISD::FORMAL_ARGUMENTS:
2482 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2483 case ISD::CALL:
2484 return LowerCALL(Op, DAG);
2485 case ISD::RET:
2486 return LowerRET(Op, DAG, getTargetMachine());
2487
2488 // i8 math ops:
2489 case ISD::SUB:
2490 case ISD::ROTR:
2491 case ISD::ROTL:
2492 case ISD::SRL:
2493 case ISD::SHL:
2494 case ISD::SRA:
2495 return LowerI8Math(Op, DAG, Op.getOpcode());
2496
2497 // Vector-related lowering.
2498 case ISD::BUILD_VECTOR:
2499 return LowerBUILD_VECTOR(Op, DAG);
2500 case ISD::SCALAR_TO_VECTOR:
2501 return LowerSCALAR_TO_VECTOR(Op, DAG);
2502 case ISD::VECTOR_SHUFFLE:
2503 return LowerVECTOR_SHUFFLE(Op, DAG);
2504 case ISD::EXTRACT_VECTOR_ELT:
2505 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2506 case ISD::INSERT_VECTOR_ELT:
2507 return LowerINSERT_VECTOR_ELT(Op, DAG);
2508
2509 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2510 case ISD::AND:
2511 case ISD::OR:
2512 case ISD::XOR:
2513 return LowerByteImmed(Op, DAG);
2514
2515 // Vector and i8 multiply:
2516 case ISD::MUL:
2517 if (MVT::isVector(Op.getValueType()))
2518 return LowerVectorMUL(Op, DAG);
2519 else if (Op.getValueType() == MVT::i8)
2520 return LowerI8Math(Op, DAG, Op.getOpcode());
2521 else
2522 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2523
2524 case ISD::FDIV:
2525 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2526 return LowerFDIVf32(Op, DAG);
2527// else if (Op.getValueType() == MVT::f64)
2528// return LowerFDIVf64(Op, DAG);
2529 else
2530 assert(0 && "Calling FDIV on unsupported MVT");
2531
2532 case ISD::CTPOP:
2533 return LowerCTPOP(Op, DAG);
2534 }
2535
2536 return SDOperand();
2537}
2538
2539//===----------------------------------------------------------------------===//
2540// Other Lowering Code
2541//===----------------------------------------------------------------------===//
2542
2543MachineBasicBlock *
2544SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2545 MachineBasicBlock *BB)
2546{
2547 return BB;
2548}
2549
2550//===----------------------------------------------------------------------===//
2551// Target Optimization Hooks
2552//===----------------------------------------------------------------------===//
2553
2554SDOperand
2555SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2556{
2557#if 0
2558 TargetMachine &TM = getTargetMachine();
2559 SelectionDAG &DAG = DCI.DAG;
2560#endif
2561 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2562
2563 switch (N->getOpcode()) {
2564 default: break;
2565
2566 // Look for obvious optimizations for shift left:
2567 // a) Replace 0 << V with 0
2568 // b) Replace V << 0 with V
2569 //
2570 // N.B: llvm will generate an undef node if the shift amount is greater than
2571 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2572 case SPU::SHLIr32:
2573 case SPU::SHLHIr16:
2574 case SPU::SHLQBIIvec:
2575 case SPU::ROTHIr16:
2576 case SPU::ROTHIr16_i32:
2577 case SPU::ROTIr32:
2578 case SPU::ROTIr32_i16:
2579 case SPU::ROTQBYIvec:
2580 case SPU::ROTQBYBIvec:
2581 case SPU::ROTQBIIvec:
2582 case SPU::ROTHMIr16:
2583 case SPU::ROTMIr32:
2584 case SPU::ROTQMBYIvec: {
2585 if (N0.getOpcode() == ISD::Constant) {
2586 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2587 if (C->getValue() == 0) // 0 << V -> 0.
2588 return N0;
2589 }
2590 }
2591 SDOperand N1 = N->getOperand(1);
2592 if (N1.getOpcode() == ISD::Constant) {
2593 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2594 if (C->getValue() == 0) // V << 0 -> V
2595 return N1;
2596 }
2597 }
2598 break;
2599 }
2600 }
2601
2602 return SDOperand();
2603}
2604
2605//===----------------------------------------------------------------------===//
2606// Inline Assembly Support
2607//===----------------------------------------------------------------------===//
2608
2609/// getConstraintType - Given a constraint letter, return the type of
2610/// constraint it is for this target.
2611SPUTargetLowering::ConstraintType
2612SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2613 if (ConstraintLetter.size() == 1) {
2614 switch (ConstraintLetter[0]) {
2615 default: break;
2616 case 'b':
2617 case 'r':
2618 case 'f':
2619 case 'v':
2620 case 'y':
2621 return C_RegisterClass;
2622 }
2623 }
2624 return TargetLowering::getConstraintType(ConstraintLetter);
2625}
2626
2627std::pair<unsigned, const TargetRegisterClass*>
2628SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2629 MVT::ValueType VT) const
2630{
2631 if (Constraint.size() == 1) {
2632 // GCC RS6000 Constraint Letters
2633 switch (Constraint[0]) {
2634 case 'b': // R1-R31
2635 case 'r': // R0-R31
2636 if (VT == MVT::i64)
2637 return std::make_pair(0U, SPU::R64CRegisterClass);
2638 return std::make_pair(0U, SPU::R32CRegisterClass);
2639 case 'f':
2640 if (VT == MVT::f32)
2641 return std::make_pair(0U, SPU::R32FPRegisterClass);
2642 else if (VT == MVT::f64)
2643 return std::make_pair(0U, SPU::R64FPRegisterClass);
2644 break;
2645 case 'v':
2646 return std::make_pair(0U, SPU::GPRCRegisterClass);
2647 }
2648 }
2649
2650 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2651}
2652
2653void
2654SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2655 uint64_t Mask,
2656 uint64_t &KnownZero,
2657 uint64_t &KnownOne,
2658 const SelectionDAG &DAG,
2659 unsigned Depth ) const {
2660 KnownZero = 0;
2661 KnownOne = 0;
2662}
2663
2664// LowerAsmOperandForConstraint
2665void
2666SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2667 char ConstraintLetter,
2668 std::vector<SDOperand> &Ops,
2669 SelectionDAG &DAG) {
2670 // Default, for the time being, to the base class handler
2671 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2672}
2673
2674/// isLegalAddressImmediate - Return true if the integer value can be used
2675/// as the offset of the target addressing mode.
2676bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2677 // SPU's addresses are 256K:
2678 return (V > -(1 << 18) && V < (1 << 18) - 1);
2679}
2680
2681bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2682 return false;
2683}