blob: 6b6af64baf6b1f6ecaed849920442924e2612c26 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
Scott Michel2466c372007-12-05 01:40:25 +00006// Department at The Aerospace Corporation and is distributed under the
7// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00008//
9//===----------------------------------------------------------------------===//
10//
11// This file implements the SPUTargetLowering class.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SPURegisterNames.h"
16#include "SPUISelLowering.h"
17#include "SPUTargetMachine.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SSARegMap.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39 std::map<unsigned, const char *> node_names;
40
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
45 };
46
47 const valtype_map_s valtype_map[] = {
48 { MVT::i1, 3 },
49 { MVT::i8, 3 },
50 { MVT::i16, 2 },
51 { MVT::i32, 0 },
52 { MVT::f32, 0 },
53 { MVT::i64, 0 },
54 { MVT::f64, 0 },
55 { MVT::i128, 0 }
56 };
57
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
62
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
66 break;
67 }
68 }
69
70#ifndef NDEBUG
71 if (retval == 0) {
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
74 << "\n";
75 abort();
76 }
77#endif
78
79 return retval;
80 }
81
82 //! Predicate that returns true if operand is a memory target
83 /*!
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
87 address.
88 */
89 bool isMemoryOperand(const SDOperand &Op)
90 {
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
105 }
106}
107
108SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
110 SPUTM(TM)
111{
112 // Fold away setcc operations if possible.
113 setPow2DivIsCheap();
114
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
118
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
136
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
141
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 ++sctype) {
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
156 }
157
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
160 // SPUDAGToDAGISel.
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
178
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
186
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
206
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
209
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
213
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265
Scott Michel86c041f2007-12-20 00:44:13 +0000266 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel266bc8f2007-12-04 22:23:35 +0000270
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
291
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
353 }
354
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
364
365 setStackPointerRegisterToSaveRestore(SPU::R1);
366
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
369
370 computeRegisterProperties();
371}
372
373const char *
374SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375{
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 }
418
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420
421 return ((i != node_names.end()) ? i->second : 0);
422}
423
424//===----------------------------------------------------------------------===//
425// Calling convention code:
426//===----------------------------------------------------------------------===//
427
428#include "SPUGenCallingConv.inc"
429
430//===----------------------------------------------------------------------===//
431// LowerOperation implementation
432//===----------------------------------------------------------------------===//
433
434/// Custom lower loads for CellSPU
435/*!
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
438 */
439static SDOperand
440LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
Scott Michel86c041f2007-12-20 00:44:13 +0000444 MVT::ValueType BasepOpc = basep.Val->getOpcode();
Scott Michel266bc8f2007-12-04 22:23:35 +0000445 MVT::ValueType VT = LN->getLoadedVT();
446 MVT::ValueType OpVT = Op.Val->getValueType(0);
447 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
448 ISD::LoadExtType ExtType = LN->getExtensionType();
449 unsigned alignment = LN->getAlignment();
450 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
451 SDOperand Ops[8];
452
Scott Michel86c041f2007-12-20 00:44:13 +0000453 if (BasepOpc == ISD::FrameIndex) {
454 // Loading from a frame index is always properly aligned. Always.
455 return SDOperand();
456 }
457
Scott Michel266bc8f2007-12-04 22:23:35 +0000458 // For an extending load of an i1 variable, just call it i8 (or whatever we
459 // were passed) and make it zero-extended:
460 if (VT == MVT::i1) {
461 VT = OpVT;
462 ExtType = ISD::ZEXTLOAD;
463 }
464
465 switch (LN->getAddressingMode()) {
466 case ISD::UNINDEXED: {
467 SDOperand result;
468 SDOperand rot_op, rotamt;
469 SDOperand ptrp;
470 int c_offset;
471 int c_rotamt;
472
473 // The vector type we really want to be when we load the 16-byte chunk
474 MVT::ValueType vecVT, opVecVT;
475
Scott Michel86c041f2007-12-20 00:44:13 +0000476 vecVT = MVT::v16i8;
Scott Michel266bc8f2007-12-04 22:23:35 +0000477 if (VT != MVT::i1)
478 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
Scott Michel266bc8f2007-12-04 22:23:35 +0000479 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
480
481 if (basep.getOpcode() == ISD::ADD) {
482 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
483
484 assert(CN != NULL
485 && "LowerLOAD: ISD::ADD operand 1 is not constant");
486
487 c_offset = (int) CN->getValue();
488 c_rotamt = (int) (c_offset & 0xf);
489
490 // Adjust the rotation amount to ensure that the final result ends up in
491 // the preferred slot:
492 c_rotamt -= vtm->prefslot_byte;
493 ptrp = basep.getOperand(0);
494 } else {
495 c_offset = 0;
496 c_rotamt = -vtm->prefslot_byte;
497 ptrp = basep;
498 }
499
500 if (alignment == 16) {
501 // 16-byte aligned load into preferred slot, no rotation
502 if (c_rotamt == 0) {
503 if (isMemoryOperand(ptrp))
504 // Return unchanged
505 return SDOperand();
506 else {
507 // Return modified D-Form address for pointer:
508 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
509 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
510 if (VT == OpVT)
511 return DAG.getLoad(VT, LN->getChain(), ptrp,
512 LN->getSrcValue(), LN->getSrcValueOffset(),
513 LN->isVolatile(), 16);
514 else
515 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
516 LN->getSrcValueOffset(), OpVT,
517 LN->isVolatile(), 16);
518 }
519 } else {
520 // Need to rotate...
521 if (c_rotamt < 0)
522 c_rotamt += 16;
523 // Realign the base pointer, with a D-Form address
524 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
525 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
526 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
527 else
528 basep = ptrp;
529
530 // Rotate the load:
531 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
532 LN->getSrcValue(), LN->getSrcValueOffset(),
533 LN->isVolatile(), 16);
534 the_chain = rot_op.getValue(1);
535 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
536
537 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
538 Ops[0] = the_chain;
539 Ops[1] = rot_op;
540 Ops[2] = rotamt;
541
542 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
543 the_chain = result.getValue(1);
544
545 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
546 SDVTList scalarvts;
547 Ops[0] = the_chain;
548 Ops[1] = result;
549 if (OpVT == VT) {
550 scalarvts = DAG.getVTList(VT, MVT::Other);
551 } else {
552 scalarvts = DAG.getVTList(OpVT, MVT::Other);
553 }
554
555 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
556 result);
557 Ops[0] = the_chain;
558 Ops[1] = result;
559 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
560 the_chain = result.getValue(1);
561 } else {
562 // Handle the sign and zero-extending loads for i1 and i8:
563 unsigned NewOpC;
564
565 if (ExtType == ISD::SEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_SEXT
568 : SPUISD::EXTRACT_I8_SEXT);
569 } else if (ExtType == ISD::ZEXTLOAD) {
570 NewOpC = (OpVT == MVT::i1
571 ? SPUISD::EXTRACT_I1_ZEXT
572 : SPUISD::EXTRACT_I8_ZEXT);
573 }
574
575 result = DAG.getNode(NewOpC, OpVT, result);
576 }
577
578 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
579 SDOperand retops[2] = { result, the_chain };
580
581 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
582 return result;
583 /*UNREACHED*/
584 }
585 } else {
586 // Misaligned 16-byte load:
587 if (basep.getOpcode() == ISD::LOAD) {
588 LN = cast<LoadSDNode>(basep);
589 if (LN->getAlignment() == 16) {
590 // We can verify that we're really loading from a 16-byte aligned
591 // chunk. Encapsulate basep as a D-Form address and return a new
592 // load:
593 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
594 DAG.getConstant(0, PtrVT));
595 if (OpVT == VT)
596 return DAG.getLoad(VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 LN->isVolatile(), 16);
599 else
600 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
601 LN->getSrcValue(), LN->getSrcValueOffset(),
602 OpVT, LN->isVolatile(), 16);
603 }
604 }
605
606 // Catch all other cases where we can't guarantee that we have a
607 // 16-byte aligned entity, which means resorting to an X-form
608 // address scheme:
609
610 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
Scott Michel86c041f2007-12-20 00:44:13 +0000611 SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
612 SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
Scott Michel266bc8f2007-12-04 22:23:35 +0000613
614 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
615
616 SDOperand alignLoad =
617 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
618 LN->getSrcValue(), LN->getSrcValueOffset(),
619 LN->isVolatile(), 16);
620
621 SDOperand insertEltOp =
622 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
623
624 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
625 alignLoad,
626 alignLoad,
627 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
628
629 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
630
631 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
632 SDOperand retops[2] = { result, the_chain };
633
634 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
635 return result;
636 }
637 break;
638 }
639 case ISD::PRE_INC:
640 case ISD::PRE_DEC:
641 case ISD::POST_INC:
642 case ISD::POST_DEC:
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
645 "UNINDEXED\n";
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
647 abort();
648 /*NOTREACHED*/
649 }
650
651 return SDOperand();
652}
653
654/// Custom lower stores for CellSPU
655/*!
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
659 */
660static SDOperand
661LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDOperand Value = SN->getValue();
664 MVT::ValueType VT = Value.getValueType();
665 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
666 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 SDOperand the_chain = SN->getChain();
Chris Lattner4d321c52007-12-05 18:32:18 +0000668 //unsigned alignment = SN->getAlignment();
669 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000670
671 switch (SN->getAddressingMode()) {
672 case ISD::UNINDEXED: {
673 SDOperand basep = SN->getBasePtr();
674 SDOperand ptrOp;
675 int offset;
676
Scott Michel9999e682007-12-19 07:35:06 +0000677 if (basep.getOpcode() == ISD::FrameIndex) {
678 // FrameIndex nodes are always properly aligned. Really.
679 return SDOperand();
680 }
681
Scott Michel266bc8f2007-12-04 22:23:35 +0000682 if (basep.getOpcode() == ISD::ADD) {
683 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
684 assert(CN != NULL
685 && "LowerSTORE: ISD::ADD operand 1 is not constant");
686 offset = unsigned(CN->getValue());
687 ptrOp = basep.getOperand(0);
688 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
689 << offset
690 << "\n");
691 } else {
692 ptrOp = basep;
693 offset = 0;
694 }
695
696 // The vector type we really want to load from the 16-byte chunk, except
697 // in the case of MVT::i1, which has to be v16i8.
698 unsigned vecVT, stVecVT;
699
700 if (StVT != MVT::i1)
701 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
702 else
703 stVecVT = MVT::v16i8;
704 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
705
Scott Michel9999e682007-12-19 07:35:06 +0000706 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
707 // the actual dform addr offs($reg).
708 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
709 DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000710
711 // Create the 16-byte aligned vector load
712 SDOperand alignLoad =
713 DAG.getLoad(vecVT, the_chain, basep,
714 SN->getSrcValue(), SN->getSrcValueOffset(),
715 SN->isVolatile(), 16);
716 the_chain = alignLoad.getValue(1);
717
718 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
719 SDOperand theValue = SN->getValue();
720 SDOperand result;
721
722 if (StVT != VT
723 && (theValue.getOpcode() == ISD::AssertZext
724 || theValue.getOpcode() == ISD::AssertSext)) {
725 // Drill down and get the value for zero- and sign-extended
726 // quantities
727 theValue = theValue.getOperand(0);
728 }
729
730 SDOperand insertEltOp =
731 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
732 DAG.getNode(SPUISD::DFormAddr, PtrVT,
733 ptrOp,
734 DAG.getConstant((offset & 0xf), PtrVT)));
735
736 result = DAG.getNode(SPUISD::SHUFB, vecVT,
737 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
738 alignLoad,
739 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
740
741 result = DAG.getStore(the_chain, result, basep,
742 LN->getSrcValue(), LN->getSrcValueOffset(),
743 LN->isVolatile(), LN->getAlignment());
744
745 return result;
746 /*UNREACHED*/
747 }
748 case ISD::PRE_INC:
749 case ISD::PRE_DEC:
750 case ISD::POST_INC:
751 case ISD::POST_DEC:
752 case ISD::LAST_INDEXED_MODE:
753 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
754 "UNINDEXED\n";
755 cerr << (unsigned) SN->getAddressingMode() << "\n";
756 abort();
757 /*NOTREACHED*/
758 }
759
760 return SDOperand();
761}
762
763/// Generate the address of a constant pool entry.
764static SDOperand
765LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
766 MVT::ValueType PtrVT = Op.getValueType();
767 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
768 Constant *C = CP->getConstVal();
769 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
770 const TargetMachine &TM = DAG.getTarget();
771 SDOperand Zero = DAG.getConstant(0, PtrVT);
772
773 if (TM.getRelocationModel() == Reloc::Static) {
774 if (!ST->usingLargeMem()) {
775 // Just return the SDOperand with the constant pool address in it.
776 return CPI;
777 } else {
778 // Generate hi/lo address pair
779 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
780 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
781
782 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
783 }
784 }
785
786 assert(0 &&
787 "LowerConstantPool: Relocation model other than static not supported.");
788 return SDOperand();
789}
790
791static SDOperand
792LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
793 MVT::ValueType PtrVT = Op.getValueType();
794 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
795 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
796 SDOperand Zero = DAG.getConstant(0, PtrVT);
797 const TargetMachine &TM = DAG.getTarget();
798
799 if (TM.getRelocationModel() == Reloc::Static) {
800 if (!ST->usingLargeMem()) {
801 // Just return the SDOperand with the jump table address in it.
802 return JTI;
803 } else {
804 // Generate hi/lo address pair
805 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
806 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
807
808 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
809 }
810 }
811
812 assert(0 &&
813 "LowerJumpTable: Relocation model other than static not supported.");
814 return SDOperand();
815}
816
817static SDOperand
818LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
819 MVT::ValueType PtrVT = Op.getValueType();
820 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
821 GlobalValue *GV = GSDN->getGlobal();
822 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
823 SDOperand Zero = DAG.getConstant(0, PtrVT);
824 const TargetMachine &TM = DAG.getTarget();
825
826 if (TM.getRelocationModel() == Reloc::Static) {
827 if (!ST->usingLargeMem()) {
828 // Generate a local store address
829 return GA;
830 } else {
831 // Generate hi/lo address pair
832 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
833 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
834
835 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
836 }
837 } else {
838 cerr << "LowerGlobalAddress: Relocation model other than static not "
839 << "supported.\n";
840 abort();
841 /*NOTREACHED*/
842 }
843
844 return SDOperand();
845}
846
847//! Custom lower i64 integer constants
848/*!
849 This code inserts all of the necessary juggling that needs to occur to load
850 a 64-bit constant into a register.
851 */
852static SDOperand
853LowerConstant(SDOperand Op, SelectionDAG &DAG) {
854 unsigned VT = Op.getValueType();
855 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
856
857 if (VT == MVT::i64) {
858 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
859 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
860 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
861
862 } else {
863 cerr << "LowerConstant: unhandled constant type "
864 << MVT::getValueTypeString(VT)
865 << "\n";
866 abort();
867 /*NOTREACHED*/
868 }
869
870 return SDOperand();
871}
872
873//! Custom lower single precision floating point constants
874/*!
875 "float" immediates can be lowered as if they were unsigned 32-bit integers.
876 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
877 target description.
878 */
879static SDOperand
880LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
881 unsigned VT = Op.getValueType();
882 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
883
884 assert((FP != 0) &&
885 "LowerConstantFP: Node is not ConstantFPSDNode");
886
Scott Michel266bc8f2007-12-04 22:23:35 +0000887 if (VT == MVT::f32) {
Scott Michel170783a2007-12-19 20:15:47 +0000888 float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel266bc8f2007-12-04 22:23:35 +0000889 return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel170783a2007-12-19 20:15:47 +0000890 DAG.getTargetConstantFP(targetConst, VT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000891 } else if (VT == MVT::f64) {
Scott Michel170783a2007-12-19 20:15:47 +0000892 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel266bc8f2007-12-04 22:23:35 +0000893 return DAG.getNode(ISD::BIT_CONVERT, VT,
894 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
895 }
896
897 return SDOperand();
898}
899
900static SDOperand
901LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
902{
903 MachineFunction &MF = DAG.getMachineFunction();
904 MachineFrameInfo *MFI = MF.getFrameInfo();
905 SSARegMap *RegMap = MF.getSSARegMap();
906 SmallVector<SDOperand, 8> ArgValues;
907 SDOperand Root = Op.getOperand(0);
908 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
909
910 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
911 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
912
913 unsigned ArgOffset = SPUFrameInfo::minStackSize();
914 unsigned ArgRegIdx = 0;
915 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
916
917 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
918
919 // Add DAG nodes to load the arguments or copy them out of registers.
920 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
921 SDOperand ArgVal;
922 bool needsLoad = false;
923 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
924 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
925
926 switch (ObjectVT) {
927 default: {
928 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
929 << MVT::getValueTypeString(ObjectVT)
930 << "\n";
931 abort();
932 }
933 case MVT::i8:
934 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel504c3692007-12-17 22:32:34 +0000935 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000936 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
937 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
938 ++ArgRegIdx;
939 } else {
940 needsLoad = true;
941 }
942 break;
943 case MVT::i16:
944 if (!isVarArg && ArgRegIdx < NumArgRegs) {
945 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
946 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
947 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
948 ++ArgRegIdx;
949 } else {
950 needsLoad = true;
951 }
952 break;
953 case MVT::i32:
954 if (!isVarArg && ArgRegIdx < NumArgRegs) {
955 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
956 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
957 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
958 ++ArgRegIdx;
959 } else {
960 needsLoad = true;
961 }
962 break;
963 case MVT::i64:
964 if (!isVarArg && ArgRegIdx < NumArgRegs) {
965 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
966 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
967 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
968 ++ArgRegIdx;
969 } else {
970 needsLoad = true;
971 }
972 break;
973 case MVT::f32:
974 if (!isVarArg && ArgRegIdx < NumArgRegs) {
975 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
976 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
977 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
978 ++ArgRegIdx;
979 } else {
980 needsLoad = true;
981 }
982 break;
983 case MVT::f64:
984 if (!isVarArg && ArgRegIdx < NumArgRegs) {
985 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
986 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
987 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
988 ++ArgRegIdx;
989 } else {
990 needsLoad = true;
991 }
992 break;
993 case MVT::v2f64:
994 case MVT::v4f32:
995 case MVT::v4i32:
996 case MVT::v8i16:
997 case MVT::v16i8:
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1000 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1002 ++ArgRegIdx;
1003 } else {
1004 needsLoad = true;
1005 }
1006 break;
1007 }
1008
1009 // We need to load the argument to a virtual register if we determined above
1010 // that we ran out of physical registers of the appropriate type
1011 if (needsLoad) {
1012 // If the argument is actually used, emit a load from the right stack
1013 // slot.
1014 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1015 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1016 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1017 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1018 } else {
1019 // Don't emit a dead load.
1020 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1021 }
1022
1023 ArgOffset += StackSlotSize;
1024 }
1025
1026 ArgValues.push_back(ArgVal);
1027 }
1028
1029 // If the function takes variable number of arguments, make a frame index for
1030 // the start of the first vararg value... for expansion of llvm.va_start.
1031 if (isVarArg) {
1032 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1033 ArgOffset);
1034 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035 // If this function is vararg, store any remaining integer argument regs to
1036 // their spots on the stack so that they may be loaded by deferencing the
1037 // result of va_next.
1038 SmallVector<SDOperand, 8> MemOps;
1039 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1040 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1041 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1042 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1043 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1044 MemOps.push_back(Store);
1045 // Increment the address by four for the next argument to store
1046 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1047 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1048 }
1049 if (!MemOps.empty())
1050 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1051 }
1052
1053 ArgValues.push_back(Root);
1054
1055 // Return the new list of results.
1056 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1057 Op.Val->value_end());
1058 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1059}
1060
1061/// isLSAAddress - Return the immediate to use if the specified
1062/// value is representable as a LSA address.
1063static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1064 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1065 if (!C) return 0;
1066
1067 int Addr = C->getValue();
1068 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1069 (Addr << 14 >> 14) != Addr)
1070 return 0; // Top 14 bits have to be sext of immediate.
1071
1072 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1073}
1074
1075static
1076SDOperand
1077LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1078 SDOperand Chain = Op.getOperand(0);
1079#if 0
1080 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1081 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1082#endif
1083 SDOperand Callee = Op.getOperand(4);
1084 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1085 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1086 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1087 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1088
1089 // Handy pointer type
1090 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1091
1092 // Accumulate how many bytes are to be pushed on the stack, including the
1093 // linkage area, and parameter passing area. According to the SPU ABI,
1094 // we minimally need space for [LR] and [SP]
1095 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1096
1097 // Set up a copy of the stack pointer for use loading and storing any
1098 // arguments that may not fit in the registers available for argument
1099 // passing.
1100 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1101
1102 // Figure out which arguments are going to go in registers, and which in
1103 // memory.
1104 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1105 unsigned ArgRegIdx = 0;
1106
1107 // Keep track of registers passing arguments
1108 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1109 // And the arguments passed on the stack
1110 SmallVector<SDOperand, 8> MemOpChains;
1111
1112 for (unsigned i = 0; i != NumOps; ++i) {
1113 SDOperand Arg = Op.getOperand(5+2*i);
1114
1115 // PtrOff will be used to store the current argument to the stack if a
1116 // register cannot be found for it.
1117 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1118 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1119
1120 switch (Arg.getValueType()) {
1121 default: assert(0 && "Unexpected ValueType for argument!");
1122 case MVT::i32:
1123 case MVT::i64:
1124 case MVT::i128:
1125 if (ArgRegIdx != NumArgRegs) {
1126 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127 } else {
1128 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129 ArgOffset += StackSlotSize;
1130 }
1131 break;
1132 case MVT::f32:
1133 case MVT::f64:
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 } else {
1137 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1139 }
1140 break;
1141 case MVT::v4f32:
1142 case MVT::v4i32:
1143 case MVT::v8i16:
1144 case MVT::v16i8:
1145 if (ArgRegIdx != NumArgRegs) {
1146 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1147 } else {
1148 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1149 ArgOffset += StackSlotSize;
1150 }
1151 break;
1152 }
1153 }
1154
1155 // Update number of stack bytes actually used, insert a call sequence start
1156 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1157 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1158
1159 if (!MemOpChains.empty()) {
1160 // Adjust the stack pointer for the stack arguments.
1161 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1162 &MemOpChains[0], MemOpChains.size());
1163 }
1164
1165 // Build a sequence of copy-to-reg nodes chained together with token chain
1166 // and flag operands which copy the outgoing args into the appropriate regs.
1167 SDOperand InFlag;
1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1170 InFlag);
1171 InFlag = Chain.getValue(1);
1172 }
1173
1174 std::vector<MVT::ValueType> NodeTys;
1175 NodeTys.push_back(MVT::Other); // Returns a chain
1176 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1177
1178 SmallVector<SDOperand, 8> Ops;
1179 unsigned CallOpc = SPUISD::CALL;
1180
1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1182 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1183 // node so that legalize doesn't hack it.
1184 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1185 GlobalValue *GV = G->getGlobal();
1186 unsigned CalleeVT = Callee.getValueType();
1187
1188 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1189 // style calls, otherwise, external symbols are BRASL calls.
1190 // NOTE:
1191 // This may be an unsafe assumption for JIT and really large compilation
1192 // units.
1193 if (GV->isDeclaration()) {
1194 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1195 } else {
1196 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1197 DAG.getTargetGlobalAddress(GV, CalleeVT),
1198 DAG.getConstant(0, PtrVT));
1199 }
1200 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1201 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1202 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1203 // If this is an absolute destination address that appears to be a legal
1204 // local store address, use the munged value.
1205 Callee = SDOperand(Dest, 0);
1206
1207 Ops.push_back(Chain);
1208 Ops.push_back(Callee);
1209
1210 // Add argument registers to the end of the list so that they are known live
1211 // into the call.
1212 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214 RegsToPass[i].second.getValueType()));
1215
1216 if (InFlag.Val)
1217 Ops.push_back(InFlag);
1218 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1219 InFlag = Chain.getValue(1);
1220
1221 SDOperand ResultVals[3];
1222 unsigned NumResults = 0;
1223 NodeTys.clear();
1224
1225 // If the call has results, copy the values out of the ret val registers.
1226 switch (Op.Val->getValueType(0)) {
1227 default: assert(0 && "Unexpected ret value!");
1228 case MVT::Other: break;
1229 case MVT::i32:
1230 if (Op.Val->getValueType(1) == MVT::i32) {
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1232 ResultVals[0] = Chain.getValue(0);
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1234 Chain.getValue(2)).getValue(1);
1235 ResultVals[1] = Chain.getValue(0);
1236 NumResults = 2;
1237 NodeTys.push_back(MVT::i32);
1238 } else {
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1241 NumResults = 1;
1242 }
1243 NodeTys.push_back(MVT::i32);
1244 break;
1245 case MVT::i64:
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1248 NumResults = 1;
1249 NodeTys.push_back(MVT::i64);
1250 break;
1251 case MVT::f32:
1252 case MVT::f64:
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1254 InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1256 NumResults = 1;
1257 NodeTys.push_back(Op.Val->getValueType(0));
1258 break;
1259 case MVT::v2f64:
1260 case MVT::v4f32:
1261 case MVT::v4i32:
1262 case MVT::v8i16:
1263 case MVT::v16i8:
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1265 InFlag).getValue(1);
1266 ResultVals[0] = Chain.getValue(0);
1267 NumResults = 1;
1268 NodeTys.push_back(Op.Val->getValueType(0));
1269 break;
1270 }
1271
1272 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1273 DAG.getConstant(NumStackBytes, PtrVT));
1274 NodeTys.push_back(MVT::Other);
1275
1276 // If the function returns void, just return the chain.
1277 if (NumResults == 0)
1278 return Chain;
1279
1280 // Otherwise, merge everything together with a MERGE_VALUES node.
1281 ResultVals[NumResults++] = Chain;
1282 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1283 ResultVals, NumResults);
1284 return Res.getValue(Op.ResNo);
1285}
1286
1287static SDOperand
1288LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1289 SmallVector<CCValAssign, 16> RVLocs;
1290 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1291 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1292 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1293 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1294
1295 // If this is the first return lowered for this function, add the regs to the
1296 // liveout set for the function.
1297 if (DAG.getMachineFunction().liveout_empty()) {
1298 for (unsigned i = 0; i != RVLocs.size(); ++i)
1299 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1300 }
1301
1302 SDOperand Chain = Op.getOperand(0);
1303 SDOperand Flag;
1304
1305 // Copy the result values into the output registers.
1306 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1307 CCValAssign &VA = RVLocs[i];
1308 assert(VA.isRegLoc() && "Can only return in registers!");
1309 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1310 Flag = Chain.getValue(1);
1311 }
1312
1313 if (Flag.Val)
1314 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1315 else
1316 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317}
1318
1319
1320//===----------------------------------------------------------------------===//
1321// Vector related lowering:
1322//===----------------------------------------------------------------------===//
1323
1324static ConstantSDNode *
1325getVecImm(SDNode *N) {
1326 SDOperand OpVal(0, 0);
1327
1328 // Check to see if this buildvec has a single non-undef value in its elements.
1329 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1330 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1331 if (OpVal.Val == 0)
1332 OpVal = N->getOperand(i);
1333 else if (OpVal != N->getOperand(i))
1334 return 0;
1335 }
1336
1337 if (OpVal.Val != 0) {
1338 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1339 return CN;
1340 }
1341 }
1342
1343 return 0; // All UNDEF: use implicit def.; not Constant node
1344}
1345
1346/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1347/// and the value fits into an unsigned 18-bit constant, and if so, return the
1348/// constant
1349SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1350 MVT::ValueType ValueType) {
1351 if (ConstantSDNode *CN = getVecImm(N)) {
1352 uint64_t Value = CN->getValue();
1353 if (Value <= 0x3ffff)
1354 return DAG.getConstant(Value, ValueType);
1355 }
1356
1357 return SDOperand();
1358}
1359
1360/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361/// and the value fits into a signed 16-bit constant, and if so, return the
1362/// constant
1363SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1364 MVT::ValueType ValueType) {
1365 if (ConstantSDNode *CN = getVecImm(N)) {
1366 if (ValueType == MVT::i32) {
1367 int Value = (int) CN->getValue();
1368 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1369
1370 if (Value == SExtValue)
1371 return DAG.getConstant(Value, ValueType);
1372 } else if (ValueType == MVT::i16) {
1373 short Value = (short) CN->getValue();
1374 int SExtValue = ((int) Value << 16) >> 16;
1375
1376 if (Value == (short) SExtValue)
1377 return DAG.getConstant(Value, ValueType);
1378 } else if (ValueType == MVT::i64) {
1379 int64_t Value = CN->getValue();
1380 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1381
1382 if (Value == SExtValue)
1383 return DAG.getConstant(Value, ValueType);
1384 }
1385 }
1386
1387 return SDOperand();
1388}
1389
1390/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1391/// and the value fits into a signed 10-bit constant, and if so, return the
1392/// constant
1393SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1394 MVT::ValueType ValueType) {
1395 if (ConstantSDNode *CN = getVecImm(N)) {
1396 int Value = (int) CN->getValue();
1397 if ((ValueType == MVT::i32 && isS10Constant(Value))
1398 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1399 return DAG.getConstant(Value, ValueType);
1400 }
1401
1402 return SDOperand();
1403}
1404
1405/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406/// and the value fits into a signed 8-bit constant, and if so, return the
1407/// constant.
1408///
1409/// @note: The incoming vector is v16i8 because that's the only way we can load
1410/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1411/// same value.
1412SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1413 MVT::ValueType ValueType) {
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int Value = (int) CN->getValue();
1416 if (ValueType == MVT::i16
1417 && Value <= 0xffff /* truncated from uint64_t */
1418 && ((short) Value >> 8) == ((short) Value & 0xff))
1419 return DAG.getConstant(Value & 0xff, ValueType);
1420 else if (ValueType == MVT::i8
1421 && (Value & 0xff) == Value)
1422 return DAG.getConstant(Value, ValueType);
1423 }
1424
1425 return SDOperand();
1426}
1427
1428/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429/// and the value fits into a signed 16-bit constant, and if so, return the
1430/// constant
1431SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1432 MVT::ValueType ValueType) {
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getValue();
1435 if ((ValueType == MVT::i32
1436 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438 return DAG.getConstant(Value >> 16, ValueType);
1439 }
1440
1441 return SDOperand();
1442}
1443
1444/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1448 }
1449
1450 return SDOperand();
1451}
1452
1453/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1457 }
1458
1459 return SDOperand();
1460}
1461
1462// If this is a vector of constants or undefs, get the bits. A bit in
1463// UndefBits is set if the corresponding element of the vector is an
1464// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1465// zero. Return true if this is not an array of constants, false if it is.
1466//
1467static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468 uint64_t UndefBits[2]) {
1469 // Start with zero'd results.
1470 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1471
1472 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1473 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474 SDOperand OpVal = BV->getOperand(i);
1475
1476 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1477 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1478
1479 uint64_t EltBits = 0;
1480 if (OpVal.getOpcode() == ISD::UNDEF) {
1481 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1483 continue;
1484 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1486 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487 const APFloat &apf = CN->getValueAPF();
1488 EltBits = (CN->getValueType(0) == MVT::f32
1489 ? FloatToBits(apf.convertToFloat())
1490 : DoubleToBits(apf.convertToDouble()));
1491 } else {
1492 // Nonconstant element.
1493 return true;
1494 }
1495
1496 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1497 }
1498
1499 //printf("%llx %llx %llx %llx\n",
1500 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1501 return false;
1502}
1503
1504/// If this is a splat (repetition) of a value across the whole vector, return
1505/// the smallest size that splats it. For example, "0x01010101010101..." is a
1506/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1507/// SplatSize = 1 byte.
1508static bool isConstantSplat(const uint64_t Bits128[2],
1509 const uint64_t Undef128[2],
1510 int MinSplatBits,
1511 uint64_t &SplatBits, uint64_t &SplatUndef,
1512 int &SplatSize) {
1513 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1514 // the same as the lower 64-bits, ignoring undefs.
1515 uint64_t Bits64 = Bits128[0] | Bits128[1];
1516 uint64_t Undef64 = Undef128[0] & Undef128[1];
1517 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1520 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1521
1522 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523 if (MinSplatBits < 64) {
1524
1525 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1526 // undefs.
1527 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528 if (MinSplatBits < 32) {
1529
1530 // If the top 16-bits are different than the lower 16-bits, ignoring
1531 // undefs, we have an i32 splat.
1532 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533 if (MinSplatBits < 16) {
1534 // If the top 8-bits are different than the lower 8-bits, ignoring
1535 // undefs, we have an i16 splat.
1536 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1537 // Otherwise, we have an 8-bit splat.
1538 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1539 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1540 SplatSize = 1;
1541 return true;
1542 }
1543 } else {
1544 SplatBits = Bits16;
1545 SplatUndef = Undef16;
1546 SplatSize = 2;
1547 return true;
1548 }
1549 }
1550 } else {
1551 SplatBits = Bits32;
1552 SplatUndef = Undef32;
1553 SplatSize = 4;
1554 return true;
1555 }
1556 }
1557 } else {
1558 SplatBits = Bits128[0];
1559 SplatUndef = Undef128[0];
1560 SplatSize = 8;
1561 return true;
1562 }
1563 }
1564
1565 return false; // Can't be a splat if two pieces don't match.
1566}
1567
1568// If this is a case we can't handle, return null and let the default
1569// expansion code take care of it. If we CAN select this case, and if it
1570// selects to a single instruction, return Op. Otherwise, if we can codegen
1571// this case more efficiently than a constant pool load, lower it to the
1572// sequence of ops that should be used.
1573static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1574 MVT::ValueType VT = Op.getValueType();
1575 // If this is a vector of constants or undefs, get the bits. A bit in
1576 // UndefBits is set if the corresponding element of the vector is an
1577 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1578 // zero.
1579 uint64_t VectorBits[2];
1580 uint64_t UndefBits[2];
1581 uint64_t SplatBits, SplatUndef;
1582 int SplatSize;
1583 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1584 || !isConstantSplat(VectorBits, UndefBits,
1585 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1586 SplatBits, SplatUndef, SplatSize))
1587 return SDOperand(); // Not a constant vector, not a splat.
1588
1589 switch (VT) {
1590 default:
1591 case MVT::v4f32: {
1592 uint32_t Value32 = SplatBits;
1593 assert(SplatSize == 4
1594 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1598 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1599 break;
1600 }
1601 case MVT::v2f64: {
1602 uint64_t f64val = SplatBits;
1603 assert(SplatSize == 8
1604 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1607 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1608 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1609 break;
1610 }
1611 case MVT::v16i8: {
1612 // 8-bit constants have to be expanded to 16-bits
1613 unsigned short Value16 = SplatBits | (SplatBits << 8);
1614 SDOperand Ops[8];
1615 for (int i = 0; i < 8; ++i)
1616 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1617 return DAG.getNode(ISD::BIT_CONVERT, VT,
1618 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1619 }
1620 case MVT::v8i16: {
1621 unsigned short Value16;
1622 if (SplatSize == 2)
1623 Value16 = (unsigned short) (SplatBits & 0xffff);
1624 else
1625 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1626 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1627 SDOperand Ops[8];
1628 for (int i = 0; i < 8; ++i) Ops[i] = T;
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1630 }
1631 case MVT::v4i32: {
1632 unsigned int Value = SplatBits;
1633 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1634 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1635 }
1636 case MVT::v2i64: {
1637 uint64_t val = SplatBits;
1638 uint32_t upper = uint32_t(val >> 32);
1639 uint32_t lower = uint32_t(val);
1640
1641 if (val != 0) {
1642 SDOperand LO32;
1643 SDOperand HI32;
1644 SmallVector<SDOperand, 16> ShufBytes;
1645 SDOperand Result;
1646 bool upper_special, lower_special;
1647
1648 // NOTE: This code creates common-case shuffle masks that can be easily
1649 // detected as common expressions. It is not attempting to create highly
1650 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1651
1652 // Detect if the upper or lower half is a special shuffle mask pattern:
1653 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1654 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1655
1656 // Create lower vector if not a special pattern
1657 if (!lower_special) {
1658 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1659 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1660 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1661 LO32C, LO32C, LO32C, LO32C));
1662 }
1663
1664 // Create upper vector if not a special pattern
1665 if (!upper_special) {
1666 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1667 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669 HI32C, HI32C, HI32C, HI32C));
1670 }
1671
1672 // If either upper or lower are special, then the two input operands are
1673 // the same (basically, one of them is a "don't care")
1674 if (lower_special)
1675 LO32 = HI32;
1676 if (upper_special)
1677 HI32 = LO32;
1678 if (lower_special && upper_special) {
1679 // Unhappy situation... both upper and lower are special, so punt with
1680 // a target constant:
1681 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1682 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1683 Zero, Zero);
1684 }
1685
1686 for (int i = 0; i < 4; ++i) {
1687 for (int j = 0; j < 4; ++j) {
1688 SDOperand V;
1689 bool process_upper, process_lower;
1690 uint64_t val;
1691
1692 process_upper = (upper_special && (i & 1) == 0);
1693 process_lower = (lower_special && (i & 1) == 1);
1694
1695 if (process_upper || process_lower) {
1696 if ((process_upper && upper == 0)
1697 || (process_lower && lower == 0))
1698 val = 0x80;
1699 else if ((process_upper && upper == 0xffffffff)
1700 || (process_lower && lower == 0xffffffff))
1701 val = 0xc0;
1702 else if ((process_upper && upper == 0x80000000)
1703 || (process_lower && lower == 0x80000000))
1704 val = (j == 0 ? 0xe0 : 0x80);
1705 } else
1706 val = i * 4 + j + ((i & 1) * 16);
1707
1708 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1709 }
1710 }
1711
1712 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1713 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1714 &ShufBytes[0], ShufBytes.size()));
1715 } else {
1716 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1717 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1718 return DAG.getNode(ISD::BIT_CONVERT, VT,
1719 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720 Zero, Zero, Zero, Zero));
1721 }
1722 }
1723 }
1724
1725 return SDOperand();
1726}
1727
1728/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729/// which the Cell can operate. The code inspects V3 to ascertain whether the
1730/// permutation vector, V3, is monotonically increasing with one "exception"
1731/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733/// In either case, the net result is going to eventually invoke SHUFB to
1734/// permute/shuffle the bytes from V1 and V2.
1735/// \note
1736/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1737/// control word for byte/halfword/word insertion. This takes care of a single
1738/// element move from V2 into V1.
1739/// \note
1740/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1742 SDOperand V1 = Op.getOperand(0);
1743 SDOperand V2 = Op.getOperand(1);
1744 SDOperand PermMask = Op.getOperand(2);
1745
1746 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1747
1748 // If we have a single element being moved from V1 to V2, this can be handled
1749 // using the C*[DX] compute mask instructions, but the vector elements have
1750 // to be monotonically increasing with one exception element.
1751 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1752 unsigned EltsFromV2 = 0;
1753 unsigned V2Elt = 0;
1754 unsigned V2EltIdx0 = 0;
1755 unsigned CurrElt = 0;
1756 bool monotonic = true;
1757 if (EltVT == MVT::i8)
1758 V2EltIdx0 = 16;
1759 else if (EltVT == MVT::i16)
1760 V2EltIdx0 = 8;
1761 else if (EltVT == MVT::i32)
1762 V2EltIdx0 = 4;
1763 else
1764 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1765
1766 for (unsigned i = 0, e = PermMask.getNumOperands();
1767 EltsFromV2 <= 1 && monotonic && i != e;
1768 ++i) {
1769 unsigned SrcElt;
1770 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1771 SrcElt = 0;
1772 else
1773 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1774
1775 if (SrcElt >= V2EltIdx0) {
1776 ++EltsFromV2;
1777 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778 } else if (CurrElt != SrcElt) {
1779 monotonic = false;
1780 }
1781
1782 ++CurrElt;
1783 }
1784
1785 if (EltsFromV2 == 1 && monotonic) {
1786 // Compute mask and shuffle
1787 MachineFunction &MF = DAG.getMachineFunction();
1788 SSARegMap *RegMap = MF.getSSARegMap();
1789 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1790 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791 // Initialize temporary register to 0
1792 SDOperand InitTempReg =
1793 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794 // Copy register's contents as index in INSERT_MASK:
1795 SDOperand ShufMaskOp =
1796 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1797 DAG.getTargetConstant(V2Elt, MVT::i32),
1798 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799 // Use shuffle mask in SHUFB synthetic instruction:
1800 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1801 } else {
1802 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1803 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1804
1805 SmallVector<SDOperand, 16> ResultMask;
1806 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1807 unsigned SrcElt;
1808 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1809 SrcElt = 0;
1810 else
1811 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1812
1813 for (unsigned j = 0; j != BytesPerElement; ++j) {
1814 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1815 MVT::i8));
1816 }
1817 }
1818
1819 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1820 &ResultMask[0], ResultMask.size());
1821 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1822 }
1823}
1824
1825static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1826 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1827
1828 if (Op0.Val->getOpcode() == ISD::Constant) {
1829 // For a constant, build the appropriate constant vector, which will
1830 // eventually simplify to a vector register load.
1831
1832 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1833 SmallVector<SDOperand, 16> ConstVecValues;
1834 MVT::ValueType VT;
1835 size_t n_copies;
1836
1837 // Create a constant vector:
1838 switch (Op.getValueType()) {
1839 default: assert(0 && "Unexpected constant value type in "
1840 "LowerSCALAR_TO_VECTOR");
1841 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1842 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1843 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1844 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1845 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1846 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1847 }
1848
1849 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1850 for (size_t j = 0; j < n_copies; ++j)
1851 ConstVecValues.push_back(CValue);
1852
1853 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1854 &ConstVecValues[0], ConstVecValues.size());
1855 } else {
1856 // Otherwise, copy the value from one register to another:
1857 switch (Op0.getValueType()) {
1858 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1859 case MVT::i8:
1860 case MVT::i16:
1861 case MVT::i32:
1862 case MVT::i64:
1863 case MVT::f32:
1864 case MVT::f64:
1865 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1866 }
1867 }
1868
1869 return SDOperand();
1870}
1871
1872static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1873 switch (Op.getValueType()) {
1874 case MVT::v4i32: {
1875 SDOperand rA = Op.getOperand(0);
1876 SDOperand rB = Op.getOperand(1);
1877 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1878 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1879 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1880 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1881
1882 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1883 break;
1884 }
1885
1886 // Multiply two v8i16 vectors (pipeline friendly version):
1887 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1888 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1889 // c) Use SELB to select upper and lower halves from the intermediate results
1890 //
1891 // NOTE: We really want to move the FSMBI to earlier to actually get the
1892 // dual-issue. This code does manage to do this, even if it's a little on
1893 // the wacky side
1894 case MVT::v8i16: {
1895 MachineFunction &MF = DAG.getMachineFunction();
1896 SSARegMap *RegMap = MF.getSSARegMap();
1897 SDOperand Chain = Op.getOperand(0);
1898 SDOperand rA = Op.getOperand(0);
1899 SDOperand rB = Op.getOperand(1);
1900 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1901 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1902
1903 SDOperand FSMBOp =
1904 DAG.getCopyToReg(Chain, FSMBIreg,
1905 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1906 DAG.getConstant(0xcccc, MVT::i32)));
1907
1908 SDOperand HHProd =
1909 DAG.getCopyToReg(FSMBOp, HiProdReg,
1910 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1911
1912 SDOperand HHProd_v4i32 =
1913 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1914 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1915
1916 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1917 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1918 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1919 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1920 HHProd_v4i32,
1921 DAG.getConstant(16, MVT::i16))),
1922 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1923 }
1924
1925 // This M00sE is N@stI! (apologies to Monty Python)
1926 //
1927 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1928 // is to break it all apart, sign extend, and reassemble the various
1929 // intermediate products.
1930 case MVT::v16i8: {
1931 MachineFunction &MF = DAG.getMachineFunction();
1932 SSARegMap *RegMap = MF.getSSARegMap();
1933 SDOperand Chain = Op.getOperand(0);
1934 SDOperand rA = Op.getOperand(0);
1935 SDOperand rB = Op.getOperand(1);
1936 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1937 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1938
1939 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1940 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1941 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1942
1943 SDOperand LLProd =
1944 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1945 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1947
1948 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1949
1950 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1951
1952 SDOperand LHProd =
1953 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1954 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1955
1956 SDOperand FSMBdef_2222 =
1957 DAG.getCopyToReg(Chain, FSMBreg_2222,
1958 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1959 DAG.getConstant(0x2222, MVT::i32)));
1960
1961 SDOperand FSMBuse_2222 =
1962 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1963
1964 SDOperand LoProd_1 =
1965 DAG.getCopyToReg(Chain, LoProd_reg,
1966 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1967 FSMBuse_2222));
1968
1969 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1970
1971 SDOperand LoProd =
1972 DAG.getNode(ISD::AND, MVT::v4i32,
1973 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1974 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1975 LoProdMask, LoProdMask,
1976 LoProdMask, LoProdMask));
1977
1978 SDOperand rAH =
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1981
1982 SDOperand rBH =
1983 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1985
1986 SDOperand HLProd =
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1990
1991 SDOperand HHProd_1 =
1992 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1994 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1995 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1996 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1997
1998 SDOperand HHProd =
1999 DAG.getCopyToReg(Chain, HiProd_reg,
2000 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2001 HLProd,
2002 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2003 FSMBuse_2222));
2004
2005 SDOperand HiProd =
2006 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2007 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2008
2009 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2010 DAG.getNode(ISD::OR, MVT::v4i32,
2011 LoProd, HiProd));
2012 }
2013
2014 default:
2015 cerr << "CellSPU: Unknown vector multiplication, got "
2016 << MVT::getValueTypeString(Op.getValueType())
2017 << "\n";
2018 abort();
2019 /*NOTREACHED*/
2020 }
2021
2022 return SDOperand();
2023}
2024
2025static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2026 MachineFunction &MF = DAG.getMachineFunction();
2027 SSARegMap *RegMap = MF.getSSARegMap();
2028
2029 SDOperand A = Op.getOperand(0);
2030 SDOperand B = Op.getOperand(1);
2031 unsigned VT = Op.getValueType();
2032
2033 unsigned VRegBR, VRegC;
2034
2035 if (VT == MVT::f32) {
2036 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2037 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2038 } else {
2039 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2040 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2041 }
2042 // TODO: make sure we're feeding FPInterp the right arguments
2043 // Right now: fi B, frest(B)
2044
2045 // Computes BRcpl =
2046 // (Floating Interpolate (FP Reciprocal Estimate B))
2047 SDOperand BRcpl =
2048 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2049 DAG.getNode(SPUISD::FPInterp, VT, B,
2050 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2051
2052 // Computes A * BRcpl and stores in a temporary register
2053 SDOperand AxBRcpl =
2054 DAG.getCopyToReg(BRcpl, VRegC,
2055 DAG.getNode(ISD::FMUL, VT, A,
2056 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2057 // What's the Chain variable do? It's magic!
2058 // TODO: set Chain = Op(0).getEntryNode()
2059
2060 return DAG.getNode(ISD::FADD, VT,
2061 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2062 DAG.getNode(ISD::FMUL, VT,
2063 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2064 DAG.getNode(ISD::FSUB, VT, A,
2065 DAG.getNode(ISD::FMUL, VT, B,
2066 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2067}
2068
2069// Expands double-precision FDIV
2070// Expects two doubles as inputs X and Y, does a floating point
2071// reciprocal estimate, and three iterations of Newton-Raphson
2072// to increase accuracy.
2073//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2074// MachineFunction &MF = DAG.getMachineFunction();
2075// SSARegMap *RegMap = MF.getSSARegMap();
2076//
2077// SDOperand X = Op.getOperand(0);
2078// SDOperand Y = Op.getOperand(1);
2079//}
2080
2081static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2082 unsigned VT = Op.getValueType();
2083 SDOperand N = Op.getOperand(0);
2084 SDOperand Elt = Op.getOperand(1);
2085 SDOperand ShufMask[16];
2086 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2087
2088 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2089
2090 int EltNo = (int) C->getValue();
2091
2092 // sanity checks:
2093 if (VT == MVT::i8 && EltNo >= 16)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2095 else if (VT == MVT::i16 && EltNo >= 8)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2097 else if (VT == MVT::i32 && EltNo >= 4)
2098 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2099 else if (VT == MVT::i64 && EltNo >= 2)
2100 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2101
2102 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2103 // i32 and i64: Element 0 is the preferred slot
2104 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2105 }
2106
2107 // Need to generate shuffle mask and extract:
Scott Michel0e5665b2007-12-19 21:17:42 +00002108 int prefslot_begin = -1, prefslot_end = -1;
Scott Michel266bc8f2007-12-04 22:23:35 +00002109 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2110
2111 switch (VT) {
2112 case MVT::i8: {
2113 prefslot_begin = prefslot_end = 3;
2114 break;
2115 }
2116 case MVT::i16: {
2117 prefslot_begin = 2; prefslot_end = 3;
2118 break;
2119 }
2120 case MVT::i32: {
2121 prefslot_begin = 0; prefslot_end = 3;
2122 break;
2123 }
2124 case MVT::i64: {
2125 prefslot_begin = 0; prefslot_end = 7;
2126 break;
2127 }
2128 }
2129
Scott Michel0e5665b2007-12-19 21:17:42 +00002130 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2131 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2132
Scott Michel266bc8f2007-12-04 22:23:35 +00002133 for (int i = 0; i < 16; ++i) {
2134 // zero fill uppper part of preferred slot, don't care about the
2135 // other slots:
2136 unsigned int mask_val;
2137
2138 if (i <= prefslot_end) {
2139 mask_val =
2140 ((i < prefslot_begin)
2141 ? 0x80
2142 : elt_byte + (i - prefslot_begin));
2143
Scott Michel0e5665b2007-12-19 21:17:42 +00002144 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel266bc8f2007-12-04 22:23:35 +00002145 } else
2146 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2147 }
2148
2149 SDOperand ShufMaskVec =
2150 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2151 &ShufMask[0],
2152 sizeof(ShufMask) / sizeof(ShufMask[0]));
2153
2154 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2155 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2156 N, N, ShufMaskVec));
2157
2158}
2159
2160static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2161 SDOperand VecOp = Op.getOperand(0);
2162 SDOperand ValOp = Op.getOperand(1);
2163 SDOperand IdxOp = Op.getOperand(2);
2164 MVT::ValueType VT = Op.getValueType();
2165
2166 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2167 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2168
2169 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2170 // Use $2 because it's always 16-byte aligned and it's available:
2171 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2172
2173 SDOperand result =
2174 DAG.getNode(SPUISD::SHUFB, VT,
2175 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2176 VecOp,
2177 DAG.getNode(SPUISD::INSERT_MASK, VT,
2178 DAG.getNode(ISD::ADD, PtrVT,
2179 PtrBase,
2180 DAG.getConstant(CN->getValue(),
2181 PtrVT))));
2182
2183 return result;
2184}
2185
2186static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2187 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2188
2189 assert(Op.getValueType() == MVT::i8);
2190 switch (Opc) {
2191 default:
2192 assert(0 && "Unhandled i8 math operator");
2193 /*NOTREACHED*/
2194 break;
2195 case ISD::SUB: {
2196 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2197 // the result:
2198 SDOperand N1 = Op.getOperand(1);
2199 N0 = (N0.getOpcode() != ISD::Constant
2200 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2201 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2202 N1 = (N1.getOpcode() != ISD::Constant
2203 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2204 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2205 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2206 DAG.getNode(Opc, MVT::i16, N0, N1));
2207 }
2208 case ISD::ROTR:
2209 case ISD::ROTL: {
2210 SDOperand N1 = Op.getOperand(1);
2211 unsigned N1Opc;
2212 N0 = (N0.getOpcode() != ISD::Constant
2213 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2214 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2215 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2216 N1 = (N1.getOpcode() != ISD::Constant
2217 ? DAG.getNode(N1Opc, MVT::i16, N1)
2218 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2219 SDOperand ExpandArg =
2220 DAG.getNode(ISD::OR, MVT::i16, N0,
2221 DAG.getNode(ISD::SHL, MVT::i16,
2222 N0, DAG.getConstant(8, MVT::i16)));
2223 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2225 }
2226 case ISD::SRL:
2227 case ISD::SHL: {
2228 SDOperand N1 = Op.getOperand(1);
2229 unsigned N1Opc;
2230 N0 = (N0.getOpcode() != ISD::Constant
2231 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2232 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2233 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2234 N1 = (N1.getOpcode() != ISD::Constant
2235 ? DAG.getNode(N1Opc, MVT::i16, N1)
2236 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2237 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2238 DAG.getNode(Opc, MVT::i16, N0, N1));
2239 }
2240 case ISD::SRA: {
2241 SDOperand N1 = Op.getOperand(1);
2242 unsigned N1Opc;
2243 N0 = (N0.getOpcode() != ISD::Constant
2244 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2245 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2246 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2247 N1 = (N1.getOpcode() != ISD::Constant
2248 ? DAG.getNode(N1Opc, MVT::i16, N1)
2249 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2250 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2251 DAG.getNode(Opc, MVT::i16, N0, N1));
2252 }
2253 case ISD::MUL: {
2254 SDOperand N1 = Op.getOperand(1);
2255 unsigned N1Opc;
2256 N0 = (N0.getOpcode() != ISD::Constant
2257 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2258 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2259 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2260 N1 = (N1.getOpcode() != ISD::Constant
2261 ? DAG.getNode(N1Opc, MVT::i16, N1)
2262 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2263 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2264 DAG.getNode(Opc, MVT::i16, N0, N1));
2265 break;
2266 }
2267 }
2268
2269 return SDOperand();
2270}
2271
2272//! Lower byte immediate operations for v16i8 vectors:
2273static SDOperand
2274LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2275 SDOperand ConstVec;
2276 SDOperand Arg;
2277 MVT::ValueType VT = Op.getValueType();
2278
2279 ConstVec = Op.getOperand(0);
2280 Arg = Op.getOperand(1);
2281 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2282 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2283 ConstVec = ConstVec.getOperand(0);
2284 } else {
2285 ConstVec = Op.getOperand(1);
2286 Arg = Op.getOperand(0);
2287 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2288 ConstVec = ConstVec.getOperand(0);
2289 }
2290 }
2291 }
2292
2293 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2294 uint64_t VectorBits[2];
2295 uint64_t UndefBits[2];
2296 uint64_t SplatBits, SplatUndef;
2297 int SplatSize;
2298
2299 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2300 && isConstantSplat(VectorBits, UndefBits,
2301 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2302 SplatBits, SplatUndef, SplatSize)) {
2303 SDOperand tcVec[16];
2304 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2305 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2306
2307 // Turn the BUILD_VECTOR into a set of target constants:
2308 for (size_t i = 0; i < tcVecSize; ++i)
2309 tcVec[i] = tc;
2310
2311 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2312 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2313 }
2314 }
2315
2316 return SDOperand();
2317}
2318
2319//! Lower i32 multiplication
2320static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2321 unsigned Opc) {
2322 switch (VT) {
2323 default:
2324 cerr << "CellSPU: Unknown LowerMUL value type, got "
2325 << MVT::getValueTypeString(Op.getValueType())
2326 << "\n";
2327 abort();
2328 /*NOTREACHED*/
2329
2330 case MVT::i32: {
2331 SDOperand rA = Op.getOperand(0);
2332 SDOperand rB = Op.getOperand(1);
2333
2334 return DAG.getNode(ISD::ADD, MVT::i32,
2335 DAG.getNode(ISD::ADD, MVT::i32,
2336 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2337 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2338 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2339 }
2340 }
2341
2342 return SDOperand();
2343}
2344
2345//! Custom lowering for CTPOP (count population)
2346/*!
2347 Custom lowering code that counts the number ones in the input
2348 operand. SPU has such an instruction, but it counts the number of
2349 ones per byte, which then have to be accumulated.
2350*/
2351static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2352 unsigned VT = Op.getValueType();
2353 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2354
2355 switch (VT) {
2356 case MVT::i8: {
2357 SDOperand N = Op.getOperand(0);
2358 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2359
2360 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2361 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2362
2363 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2364 }
2365
2366 case MVT::i16: {
2367 MachineFunction &MF = DAG.getMachineFunction();
2368 SSARegMap *RegMap = MF.getSSARegMap();
2369
2370 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2371
2372 SDOperand N = Op.getOperand(0);
2373 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2374 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2375 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2376
2377 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2378 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2379
2380 // CNTB_result becomes the chain to which all of the virtual registers
2381 // CNTB_reg, SUM1_reg become associated:
2382 SDOperand CNTB_result =
2383 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2384
2385 SDOperand CNTB_rescopy =
2386 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2387
2388 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2389
2390 return DAG.getNode(ISD::AND, MVT::i16,
2391 DAG.getNode(ISD::ADD, MVT::i16,
2392 DAG.getNode(ISD::SRL, MVT::i16,
2393 Tmp1, Shift1),
2394 Tmp1),
2395 Mask0);
2396 }
2397
2398 case MVT::i32: {
2399 MachineFunction &MF = DAG.getMachineFunction();
2400 SSARegMap *RegMap = MF.getSSARegMap();
2401
2402 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2403 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2404
2405 SDOperand N = Op.getOperand(0);
2406 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2407 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2408 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2409 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2410
2411 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2412 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2413
2414 // CNTB_result becomes the chain to which all of the virtual registers
2415 // CNTB_reg, SUM1_reg become associated:
2416 SDOperand CNTB_result =
2417 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2418
2419 SDOperand CNTB_rescopy =
2420 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2421
2422 SDOperand Comp1 =
2423 DAG.getNode(ISD::SRL, MVT::i32,
2424 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2425
2426 SDOperand Sum1 =
2427 DAG.getNode(ISD::ADD, MVT::i32,
2428 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2429
2430 SDOperand Sum1_rescopy =
2431 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2432
2433 SDOperand Comp2 =
2434 DAG.getNode(ISD::SRL, MVT::i32,
2435 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2436 Shift2);
2437 SDOperand Sum2 =
2438 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2439 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2440
2441 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2442 }
2443
2444 case MVT::i64:
2445 break;
2446 }
2447
2448 return SDOperand();
2449}
2450
2451/// LowerOperation - Provide custom lowering hooks for some operations.
2452///
2453SDOperand
2454SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2455{
2456 switch (Op.getOpcode()) {
2457 default: {
2458 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2459 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2460 cerr << "*Op.Val:\n";
2461 Op.Val->dump();
2462 abort();
2463 }
2464 case ISD::LOAD:
2465 case ISD::SEXTLOAD:
2466 case ISD::ZEXTLOAD:
2467 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::STORE:
2469 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::ConstantPool:
2471 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2472 case ISD::GlobalAddress:
2473 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2474 case ISD::JumpTable:
2475 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2476 case ISD::Constant:
2477 return LowerConstant(Op, DAG);
2478 case ISD::ConstantFP:
2479 return LowerConstantFP(Op, DAG);
2480 case ISD::FORMAL_ARGUMENTS:
2481 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2482 case ISD::CALL:
2483 return LowerCALL(Op, DAG);
2484 case ISD::RET:
2485 return LowerRET(Op, DAG, getTargetMachine());
2486
2487 // i8 math ops:
2488 case ISD::SUB:
2489 case ISD::ROTR:
2490 case ISD::ROTL:
2491 case ISD::SRL:
2492 case ISD::SHL:
2493 case ISD::SRA:
2494 return LowerI8Math(Op, DAG, Op.getOpcode());
2495
2496 // Vector-related lowering.
2497 case ISD::BUILD_VECTOR:
2498 return LowerBUILD_VECTOR(Op, DAG);
2499 case ISD::SCALAR_TO_VECTOR:
2500 return LowerSCALAR_TO_VECTOR(Op, DAG);
2501 case ISD::VECTOR_SHUFFLE:
2502 return LowerVECTOR_SHUFFLE(Op, DAG);
2503 case ISD::EXTRACT_VECTOR_ELT:
2504 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2505 case ISD::INSERT_VECTOR_ELT:
2506 return LowerINSERT_VECTOR_ELT(Op, DAG);
2507
2508 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2509 case ISD::AND:
2510 case ISD::OR:
2511 case ISD::XOR:
2512 return LowerByteImmed(Op, DAG);
2513
2514 // Vector and i8 multiply:
2515 case ISD::MUL:
2516 if (MVT::isVector(Op.getValueType()))
2517 return LowerVectorMUL(Op, DAG);
2518 else if (Op.getValueType() == MVT::i8)
2519 return LowerI8Math(Op, DAG, Op.getOpcode());
2520 else
2521 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2522
2523 case ISD::FDIV:
2524 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2525 return LowerFDIVf32(Op, DAG);
2526// else if (Op.getValueType() == MVT::f64)
2527// return LowerFDIVf64(Op, DAG);
2528 else
2529 assert(0 && "Calling FDIV on unsupported MVT");
2530
2531 case ISD::CTPOP:
2532 return LowerCTPOP(Op, DAG);
2533 }
2534
2535 return SDOperand();
2536}
2537
2538//===----------------------------------------------------------------------===//
2539// Other Lowering Code
2540//===----------------------------------------------------------------------===//
2541
2542MachineBasicBlock *
2543SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2544 MachineBasicBlock *BB)
2545{
2546 return BB;
2547}
2548
2549//===----------------------------------------------------------------------===//
2550// Target Optimization Hooks
2551//===----------------------------------------------------------------------===//
2552
2553SDOperand
2554SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2555{
2556#if 0
2557 TargetMachine &TM = getTargetMachine();
2558 SelectionDAG &DAG = DCI.DAG;
2559#endif
2560 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2561
2562 switch (N->getOpcode()) {
2563 default: break;
2564
2565 // Look for obvious optimizations for shift left:
2566 // a) Replace 0 << V with 0
2567 // b) Replace V << 0 with V
2568 //
2569 // N.B: llvm will generate an undef node if the shift amount is greater than
2570 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2571 case SPU::SHLIr32:
2572 case SPU::SHLHIr16:
2573 case SPU::SHLQBIIvec:
2574 case SPU::ROTHIr16:
2575 case SPU::ROTHIr16_i32:
2576 case SPU::ROTIr32:
2577 case SPU::ROTIr32_i16:
2578 case SPU::ROTQBYIvec:
2579 case SPU::ROTQBYBIvec:
2580 case SPU::ROTQBIIvec:
2581 case SPU::ROTHMIr16:
2582 case SPU::ROTMIr32:
2583 case SPU::ROTQMBYIvec: {
2584 if (N0.getOpcode() == ISD::Constant) {
2585 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2586 if (C->getValue() == 0) // 0 << V -> 0.
2587 return N0;
2588 }
2589 }
2590 SDOperand N1 = N->getOperand(1);
2591 if (N1.getOpcode() == ISD::Constant) {
2592 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2593 if (C->getValue() == 0) // V << 0 -> V
2594 return N1;
2595 }
2596 }
2597 break;
2598 }
2599 }
2600
2601 return SDOperand();
2602}
2603
2604//===----------------------------------------------------------------------===//
2605// Inline Assembly Support
2606//===----------------------------------------------------------------------===//
2607
2608/// getConstraintType - Given a constraint letter, return the type of
2609/// constraint it is for this target.
2610SPUTargetLowering::ConstraintType
2611SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2612 if (ConstraintLetter.size() == 1) {
2613 switch (ConstraintLetter[0]) {
2614 default: break;
2615 case 'b':
2616 case 'r':
2617 case 'f':
2618 case 'v':
2619 case 'y':
2620 return C_RegisterClass;
2621 }
2622 }
2623 return TargetLowering::getConstraintType(ConstraintLetter);
2624}
2625
2626std::pair<unsigned, const TargetRegisterClass*>
2627SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2628 MVT::ValueType VT) const
2629{
2630 if (Constraint.size() == 1) {
2631 // GCC RS6000 Constraint Letters
2632 switch (Constraint[0]) {
2633 case 'b': // R1-R31
2634 case 'r': // R0-R31
2635 if (VT == MVT::i64)
2636 return std::make_pair(0U, SPU::R64CRegisterClass);
2637 return std::make_pair(0U, SPU::R32CRegisterClass);
2638 case 'f':
2639 if (VT == MVT::f32)
2640 return std::make_pair(0U, SPU::R32FPRegisterClass);
2641 else if (VT == MVT::f64)
2642 return std::make_pair(0U, SPU::R64FPRegisterClass);
2643 break;
2644 case 'v':
2645 return std::make_pair(0U, SPU::GPRCRegisterClass);
2646 }
2647 }
2648
2649 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2650}
2651
2652void
2653SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2654 uint64_t Mask,
2655 uint64_t &KnownZero,
2656 uint64_t &KnownOne,
2657 const SelectionDAG &DAG,
2658 unsigned Depth ) const {
2659 KnownZero = 0;
2660 KnownOne = 0;
2661}
2662
2663// LowerAsmOperandForConstraint
2664void
2665SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2666 char ConstraintLetter,
2667 std::vector<SDOperand> &Ops,
2668 SelectionDAG &DAG) {
2669 // Default, for the time being, to the base class handler
2670 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2671}
2672
2673/// isLegalAddressImmediate - Return true if the integer value can be used
2674/// as the offset of the target addressing mode.
2675bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2676 // SPU's addresses are 256K:
2677 return (V > -(1 << 18) && V < (1 << 18) - 1);
2678}
2679
2680bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2681 return false;
2682}