blob: 253fafb8e95efdaf7ca1d42989132eea6b4009dd [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
Scott Michel2466c372007-12-05 01:40:25 +00006// Department at The Aerospace Corporation and is distributed under the
7// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00008//
9//===----------------------------------------------------------------------===//
10//
11// This file implements the SPUTargetLowering class.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SPURegisterNames.h"
16#include "SPUISelLowering.h"
17#include "SPUTargetMachine.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SSARegMap.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39 std::map<unsigned, const char *> node_names;
40
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
45 };
46
47 const valtype_map_s valtype_map[] = {
48 { MVT::i1, 3 },
49 { MVT::i8, 3 },
50 { MVT::i16, 2 },
51 { MVT::i32, 0 },
52 { MVT::f32, 0 },
53 { MVT::i64, 0 },
54 { MVT::f64, 0 },
55 { MVT::i128, 0 }
56 };
57
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
62
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
66 break;
67 }
68 }
69
70#ifndef NDEBUG
71 if (retval == 0) {
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
74 << "\n";
75 abort();
76 }
77#endif
78
79 return retval;
80 }
81
82 //! Predicate that returns true if operand is a memory target
83 /*!
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
87 address.
88 */
89 bool isMemoryOperand(const SDOperand &Op)
90 {
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
105 }
106}
107
108SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
110 SPUTM(TM)
111{
112 // Fold away setcc operations if possible.
113 setPow2DivIsCheap();
114
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
118
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
136
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
141
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 ++sctype) {
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
156 }
157
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
160 // SPUDAGToDAGISel.
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
178
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
186
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
206
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
209
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
213
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
270
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
291
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
353 }
354
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
364
365 setStackPointerRegisterToSaveRestore(SPU::R1);
366
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
369
370 computeRegisterProperties();
371}
372
373const char *
374SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375{
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 }
418
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420
421 return ((i != node_names.end()) ? i->second : 0);
422}
423
424//===----------------------------------------------------------------------===//
425// Calling convention code:
426//===----------------------------------------------------------------------===//
427
428#include "SPUGenCallingConv.inc"
429
430//===----------------------------------------------------------------------===//
431// LowerOperation implementation
432//===----------------------------------------------------------------------===//
433
434/// Custom lower loads for CellSPU
435/*!
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
438 */
439static SDOperand
440LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
450 SDOperand Ops[8];
451
452 // For an extending load of an i1 variable, just call it i8 (or whatever we
453 // were passed) and make it zero-extended:
454 if (VT == MVT::i1) {
455 VT = OpVT;
456 ExtType = ISD::ZEXTLOAD;
457 }
458
459 switch (LN->getAddressingMode()) {
460 case ISD::UNINDEXED: {
461 SDOperand result;
462 SDOperand rot_op, rotamt;
463 SDOperand ptrp;
464 int c_offset;
465 int c_rotamt;
466
467 // The vector type we really want to be when we load the 16-byte chunk
468 MVT::ValueType vecVT, opVecVT;
469
470 if (VT != MVT::i1)
471 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
472 else
473 vecVT = MVT::v16i8;
474
475 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
476
477 if (basep.getOpcode() == ISD::ADD) {
478 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
479
480 assert(CN != NULL
481 && "LowerLOAD: ISD::ADD operand 1 is not constant");
482
483 c_offset = (int) CN->getValue();
484 c_rotamt = (int) (c_offset & 0xf);
485
486 // Adjust the rotation amount to ensure that the final result ends up in
487 // the preferred slot:
488 c_rotamt -= vtm->prefslot_byte;
489 ptrp = basep.getOperand(0);
490 } else {
491 c_offset = 0;
492 c_rotamt = -vtm->prefslot_byte;
493 ptrp = basep;
494 }
495
496 if (alignment == 16) {
497 // 16-byte aligned load into preferred slot, no rotation
498 if (c_rotamt == 0) {
499 if (isMemoryOperand(ptrp))
500 // Return unchanged
501 return SDOperand();
502 else {
503 // Return modified D-Form address for pointer:
504 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
505 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
506 if (VT == OpVT)
507 return DAG.getLoad(VT, LN->getChain(), ptrp,
508 LN->getSrcValue(), LN->getSrcValueOffset(),
509 LN->isVolatile(), 16);
510 else
511 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
512 LN->getSrcValueOffset(), OpVT,
513 LN->isVolatile(), 16);
514 }
515 } else {
516 // Need to rotate...
517 if (c_rotamt < 0)
518 c_rotamt += 16;
519 // Realign the base pointer, with a D-Form address
520 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
521 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
522 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
523 else
524 basep = ptrp;
525
526 // Rotate the load:
527 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
528 LN->getSrcValue(), LN->getSrcValueOffset(),
529 LN->isVolatile(), 16);
530 the_chain = rot_op.getValue(1);
531 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
532
533 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
534 Ops[0] = the_chain;
535 Ops[1] = rot_op;
536 Ops[2] = rotamt;
537
538 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
539 the_chain = result.getValue(1);
540
541 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
542 SDVTList scalarvts;
543 Ops[0] = the_chain;
544 Ops[1] = result;
545 if (OpVT == VT) {
546 scalarvts = DAG.getVTList(VT, MVT::Other);
547 } else {
548 scalarvts = DAG.getVTList(OpVT, MVT::Other);
549 }
550
551 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
552 result);
553 Ops[0] = the_chain;
554 Ops[1] = result;
555 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
556 the_chain = result.getValue(1);
557 } else {
558 // Handle the sign and zero-extending loads for i1 and i8:
559 unsigned NewOpC;
560
561 if (ExtType == ISD::SEXTLOAD) {
562 NewOpC = (OpVT == MVT::i1
563 ? SPUISD::EXTRACT_I1_SEXT
564 : SPUISD::EXTRACT_I8_SEXT);
565 } else if (ExtType == ISD::ZEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_ZEXT
568 : SPUISD::EXTRACT_I8_ZEXT);
569 }
570
571 result = DAG.getNode(NewOpC, OpVT, result);
572 }
573
574 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
575 SDOperand retops[2] = { result, the_chain };
576
577 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
578 return result;
579 /*UNREACHED*/
580 }
581 } else {
582 // Misaligned 16-byte load:
583 if (basep.getOpcode() == ISD::LOAD) {
584 LN = cast<LoadSDNode>(basep);
585 if (LN->getAlignment() == 16) {
586 // We can verify that we're really loading from a 16-byte aligned
587 // chunk. Encapsulate basep as a D-Form address and return a new
588 // load:
589 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
590 DAG.getConstant(0, PtrVT));
591 if (OpVT == VT)
592 return DAG.getLoad(VT, LN->getChain(), basep,
593 LN->getSrcValue(), LN->getSrcValueOffset(),
594 LN->isVolatile(), 16);
595 else
596 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 OpVT, LN->isVolatile(), 16);
599 }
600 }
601
602 // Catch all other cases where we can't guarantee that we have a
603 // 16-byte aligned entity, which means resorting to an X-form
604 // address scheme:
605
606 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
607 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
608 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
609
610 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
611
612 SDOperand alignLoad =
613 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
614 LN->getSrcValue(), LN->getSrcValueOffset(),
615 LN->isVolatile(), 16);
616
617 SDOperand insertEltOp =
618 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
619
620 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
621 alignLoad,
622 alignLoad,
623 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
624
625 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
626
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = { result, the_chain };
629
630 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
631 return result;
632 }
633 break;
634 }
635 case ISD::PRE_INC:
636 case ISD::PRE_DEC:
637 case ISD::POST_INC:
638 case ISD::POST_DEC:
639 case ISD::LAST_INDEXED_MODE:
640 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
641 "UNINDEXED\n";
642 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 abort();
644 /*NOTREACHED*/
645 }
646
647 return SDOperand();
648}
649
650/// Custom lower stores for CellSPU
651/*!
652 All CellSPU stores are aligned to 16-byte boundaries, so for elements
653 within a 16-byte block, we have to generate a shuffle to insert the
654 requested element into its place, then store the resulting block.
655 */
656static SDOperand
657LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
658 StoreSDNode *SN = cast<StoreSDNode>(Op);
659 SDOperand Value = SN->getValue();
660 MVT::ValueType VT = Value.getValueType();
661 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
662 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
663 SDOperand the_chain = SN->getChain();
Chris Lattner4d321c52007-12-05 18:32:18 +0000664 //unsigned alignment = SN->getAlignment();
665 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000666
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 SDOperand basep = SN->getBasePtr();
670 SDOperand ptrOp;
671 int offset;
672
Scott Michel9999e682007-12-19 07:35:06 +0000673 if (basep.getOpcode() == ISD::FrameIndex) {
674 // FrameIndex nodes are always properly aligned. Really.
675 return SDOperand();
676 }
677
Scott Michel266bc8f2007-12-04 22:23:35 +0000678 if (basep.getOpcode() == ISD::ADD) {
679 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
680 assert(CN != NULL
681 && "LowerSTORE: ISD::ADD operand 1 is not constant");
682 offset = unsigned(CN->getValue());
683 ptrOp = basep.getOperand(0);
684 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
685 << offset
686 << "\n");
687 } else {
688 ptrOp = basep;
689 offset = 0;
690 }
691
692 // The vector type we really want to load from the 16-byte chunk, except
693 // in the case of MVT::i1, which has to be v16i8.
694 unsigned vecVT, stVecVT;
695
696 if (StVT != MVT::i1)
697 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
698 else
699 stVecVT = MVT::v16i8;
700 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
701
Scott Michel9999e682007-12-19 07:35:06 +0000702 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
703 // the actual dform addr offs($reg).
704 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
705 DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel266bc8f2007-12-04 22:23:35 +0000706
707 // Create the 16-byte aligned vector load
708 SDOperand alignLoad =
709 DAG.getLoad(vecVT, the_chain, basep,
710 SN->getSrcValue(), SN->getSrcValueOffset(),
711 SN->isVolatile(), 16);
712 the_chain = alignLoad.getValue(1);
713
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
715 SDOperand theValue = SN->getValue();
716 SDOperand result;
717
718 if (StVT != VT
719 && (theValue.getOpcode() == ISD::AssertZext
720 || theValue.getOpcode() == ISD::AssertSext)) {
721 // Drill down and get the value for zero- and sign-extended
722 // quantities
723 theValue = theValue.getOperand(0);
724 }
725
726 SDOperand insertEltOp =
727 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
728 DAG.getNode(SPUISD::DFormAddr, PtrVT,
729 ptrOp,
730 DAG.getConstant((offset & 0xf), PtrVT)));
731
732 result = DAG.getNode(SPUISD::SHUFB, vecVT,
733 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
734 alignLoad,
735 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
736
737 result = DAG.getStore(the_chain, result, basep,
738 LN->getSrcValue(), LN->getSrcValueOffset(),
739 LN->isVolatile(), LN->getAlignment());
740
741 return result;
742 /*UNREACHED*/
743 }
744 case ISD::PRE_INC:
745 case ISD::PRE_DEC:
746 case ISD::POST_INC:
747 case ISD::POST_DEC:
748 case ISD::LAST_INDEXED_MODE:
749 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
750 "UNINDEXED\n";
751 cerr << (unsigned) SN->getAddressingMode() << "\n";
752 abort();
753 /*NOTREACHED*/
754 }
755
756 return SDOperand();
757}
758
759/// Generate the address of a constant pool entry.
760static SDOperand
761LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
762 MVT::ValueType PtrVT = Op.getValueType();
763 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
764 Constant *C = CP->getConstVal();
765 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
766 const TargetMachine &TM = DAG.getTarget();
767 SDOperand Zero = DAG.getConstant(0, PtrVT);
768
769 if (TM.getRelocationModel() == Reloc::Static) {
770 if (!ST->usingLargeMem()) {
771 // Just return the SDOperand with the constant pool address in it.
772 return CPI;
773 } else {
774 // Generate hi/lo address pair
775 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
776 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
777
778 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
779 }
780 }
781
782 assert(0 &&
783 "LowerConstantPool: Relocation model other than static not supported.");
784 return SDOperand();
785}
786
787static SDOperand
788LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
789 MVT::ValueType PtrVT = Op.getValueType();
790 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
791 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
792 SDOperand Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
794
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 // Just return the SDOperand with the jump table address in it.
798 return JTI;
799 } else {
800 // Generate hi/lo address pair
801 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
802 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
803
804 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
805 }
806 }
807
808 assert(0 &&
809 "LowerJumpTable: Relocation model other than static not supported.");
810 return SDOperand();
811}
812
813static SDOperand
814LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
815 MVT::ValueType PtrVT = Op.getValueType();
816 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
817 GlobalValue *GV = GSDN->getGlobal();
818 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
819 SDOperand Zero = DAG.getConstant(0, PtrVT);
820 const TargetMachine &TM = DAG.getTarget();
821
822 if (TM.getRelocationModel() == Reloc::Static) {
823 if (!ST->usingLargeMem()) {
824 // Generate a local store address
825 return GA;
826 } else {
827 // Generate hi/lo address pair
828 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
829 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
830
831 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
832 }
833 } else {
834 cerr << "LowerGlobalAddress: Relocation model other than static not "
835 << "supported.\n";
836 abort();
837 /*NOTREACHED*/
838 }
839
840 return SDOperand();
841}
842
843//! Custom lower i64 integer constants
844/*!
845 This code inserts all of the necessary juggling that needs to occur to load
846 a 64-bit constant into a register.
847 */
848static SDOperand
849LowerConstant(SDOperand Op, SelectionDAG &DAG) {
850 unsigned VT = Op.getValueType();
851 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
852
853 if (VT == MVT::i64) {
854 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
855 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
856 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
857
858 } else {
859 cerr << "LowerConstant: unhandled constant type "
860 << MVT::getValueTypeString(VT)
861 << "\n";
862 abort();
863 /*NOTREACHED*/
864 }
865
866 return SDOperand();
867}
868
869//! Custom lower single precision floating point constants
870/*!
871 "float" immediates can be lowered as if they were unsigned 32-bit integers.
872 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
873 target description.
874 */
875static SDOperand
876LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
877 unsigned VT = Op.getValueType();
878 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
879
880 assert((FP != 0) &&
881 "LowerConstantFP: Node is not ConstantFPSDNode");
882
883 const APFloat &apf = FP->getValueAPF();
884
885 if (VT == MVT::f32) {
886 return DAG.getNode(SPUISD::SFPConstant, VT,
887 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
888 } else if (VT == MVT::f64) {
889 uint64_t dbits = DoubleToBits(apf.convertToDouble());
890 return DAG.getNode(ISD::BIT_CONVERT, VT,
891 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
892 }
893
894 return SDOperand();
895}
896
897static SDOperand
898LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
899{
900 MachineFunction &MF = DAG.getMachineFunction();
901 MachineFrameInfo *MFI = MF.getFrameInfo();
902 SSARegMap *RegMap = MF.getSSARegMap();
903 SmallVector<SDOperand, 8> ArgValues;
904 SDOperand Root = Op.getOperand(0);
905 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
906
907 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
908 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
909
910 unsigned ArgOffset = SPUFrameInfo::minStackSize();
911 unsigned ArgRegIdx = 0;
912 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
913
914 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
915
916 // Add DAG nodes to load the arguments or copy them out of registers.
917 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
918 SDOperand ArgVal;
919 bool needsLoad = false;
920 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
921 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
922
923 switch (ObjectVT) {
924 default: {
925 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
926 << MVT::getValueTypeString(ObjectVT)
927 << "\n";
928 abort();
929 }
930 case MVT::i8:
931 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel504c3692007-12-17 22:32:34 +0000932 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000933 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
934 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
935 ++ArgRegIdx;
936 } else {
937 needsLoad = true;
938 }
939 break;
940 case MVT::i16:
941 if (!isVarArg && ArgRegIdx < NumArgRegs) {
942 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
943 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
944 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
945 ++ArgRegIdx;
946 } else {
947 needsLoad = true;
948 }
949 break;
950 case MVT::i32:
951 if (!isVarArg && ArgRegIdx < NumArgRegs) {
952 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
953 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
954 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
955 ++ArgRegIdx;
956 } else {
957 needsLoad = true;
958 }
959 break;
960 case MVT::i64:
961 if (!isVarArg && ArgRegIdx < NumArgRegs) {
962 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
963 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
964 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
965 ++ArgRegIdx;
966 } else {
967 needsLoad = true;
968 }
969 break;
970 case MVT::f32:
971 if (!isVarArg && ArgRegIdx < NumArgRegs) {
972 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
973 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
974 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
975 ++ArgRegIdx;
976 } else {
977 needsLoad = true;
978 }
979 break;
980 case MVT::f64:
981 if (!isVarArg && ArgRegIdx < NumArgRegs) {
982 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
983 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
984 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
985 ++ArgRegIdx;
986 } else {
987 needsLoad = true;
988 }
989 break;
990 case MVT::v2f64:
991 case MVT::v4f32:
992 case MVT::v4i32:
993 case MVT::v8i16:
994 case MVT::v16i8:
995 if (!isVarArg && ArgRegIdx < NumArgRegs) {
996 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
997 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
998 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
999 ++ArgRegIdx;
1000 } else {
1001 needsLoad = true;
1002 }
1003 break;
1004 }
1005
1006 // We need to load the argument to a virtual register if we determined above
1007 // that we ran out of physical registers of the appropriate type
1008 if (needsLoad) {
1009 // If the argument is actually used, emit a load from the right stack
1010 // slot.
1011 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1012 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1013 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1014 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1015 } else {
1016 // Don't emit a dead load.
1017 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1018 }
1019
1020 ArgOffset += StackSlotSize;
1021 }
1022
1023 ArgValues.push_back(ArgVal);
1024 }
1025
1026 // If the function takes variable number of arguments, make a frame index for
1027 // the start of the first vararg value... for expansion of llvm.va_start.
1028 if (isVarArg) {
1029 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1030 ArgOffset);
1031 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1032 // If this function is vararg, store any remaining integer argument regs to
1033 // their spots on the stack so that they may be loaded by deferencing the
1034 // result of va_next.
1035 SmallVector<SDOperand, 8> MemOps;
1036 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1037 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1038 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1039 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1040 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1041 MemOps.push_back(Store);
1042 // Increment the address by four for the next argument to store
1043 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1044 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1045 }
1046 if (!MemOps.empty())
1047 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1048 }
1049
1050 ArgValues.push_back(Root);
1051
1052 // Return the new list of results.
1053 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1054 Op.Val->value_end());
1055 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1056}
1057
1058/// isLSAAddress - Return the immediate to use if the specified
1059/// value is representable as a LSA address.
1060static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1062 if (!C) return 0;
1063
1064 int Addr = C->getValue();
1065 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1066 (Addr << 14 >> 14) != Addr)
1067 return 0; // Top 14 bits have to be sext of immediate.
1068
1069 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1070}
1071
1072static
1073SDOperand
1074LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1075 SDOperand Chain = Op.getOperand(0);
1076#if 0
1077 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1078 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1079#endif
1080 SDOperand Callee = Op.getOperand(4);
1081 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1082 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1083 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1084 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1085
1086 // Handy pointer type
1087 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1088
1089 // Accumulate how many bytes are to be pushed on the stack, including the
1090 // linkage area, and parameter passing area. According to the SPU ABI,
1091 // we minimally need space for [LR] and [SP]
1092 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1093
1094 // Set up a copy of the stack pointer for use loading and storing any
1095 // arguments that may not fit in the registers available for argument
1096 // passing.
1097 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1098
1099 // Figure out which arguments are going to go in registers, and which in
1100 // memory.
1101 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1102 unsigned ArgRegIdx = 0;
1103
1104 // Keep track of registers passing arguments
1105 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1106 // And the arguments passed on the stack
1107 SmallVector<SDOperand, 8> MemOpChains;
1108
1109 for (unsigned i = 0; i != NumOps; ++i) {
1110 SDOperand Arg = Op.getOperand(5+2*i);
1111
1112 // PtrOff will be used to store the current argument to the stack if a
1113 // register cannot be found for it.
1114 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1115 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1116
1117 switch (Arg.getValueType()) {
1118 default: assert(0 && "Unexpected ValueType for argument!");
1119 case MVT::i32:
1120 case MVT::i64:
1121 case MVT::i128:
1122 if (ArgRegIdx != NumArgRegs) {
1123 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1124 } else {
1125 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1126 ArgOffset += StackSlotSize;
1127 }
1128 break;
1129 case MVT::f32:
1130 case MVT::f64:
1131 if (ArgRegIdx != NumArgRegs) {
1132 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1133 } else {
1134 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1135 ArgOffset += StackSlotSize;
1136 }
1137 break;
1138 case MVT::v4f32:
1139 case MVT::v4i32:
1140 case MVT::v8i16:
1141 case MVT::v16i8:
1142 if (ArgRegIdx != NumArgRegs) {
1143 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144 } else {
1145 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146 ArgOffset += StackSlotSize;
1147 }
1148 break;
1149 }
1150 }
1151
1152 // Update number of stack bytes actually used, insert a call sequence start
1153 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1154 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1155
1156 if (!MemOpChains.empty()) {
1157 // Adjust the stack pointer for the stack arguments.
1158 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1159 &MemOpChains[0], MemOpChains.size());
1160 }
1161
1162 // Build a sequence of copy-to-reg nodes chained together with token chain
1163 // and flag operands which copy the outgoing args into the appropriate regs.
1164 SDOperand InFlag;
1165 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1166 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1167 InFlag);
1168 InFlag = Chain.getValue(1);
1169 }
1170
1171 std::vector<MVT::ValueType> NodeTys;
1172 NodeTys.push_back(MVT::Other); // Returns a chain
1173 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1174
1175 SmallVector<SDOperand, 8> Ops;
1176 unsigned CallOpc = SPUISD::CALL;
1177
1178 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1179 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1180 // node so that legalize doesn't hack it.
1181 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1182 GlobalValue *GV = G->getGlobal();
1183 unsigned CalleeVT = Callee.getValueType();
1184
1185 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1186 // style calls, otherwise, external symbols are BRASL calls.
1187 // NOTE:
1188 // This may be an unsafe assumption for JIT and really large compilation
1189 // units.
1190 if (GV->isDeclaration()) {
1191 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1192 } else {
1193 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1194 DAG.getTargetGlobalAddress(GV, CalleeVT),
1195 DAG.getConstant(0, PtrVT));
1196 }
1197 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1198 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1199 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1200 // If this is an absolute destination address that appears to be a legal
1201 // local store address, use the munged value.
1202 Callee = SDOperand(Dest, 0);
1203
1204 Ops.push_back(Chain);
1205 Ops.push_back(Callee);
1206
1207 // Add argument registers to the end of the list so that they are known live
1208 // into the call.
1209 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211 RegsToPass[i].second.getValueType()));
1212
1213 if (InFlag.Val)
1214 Ops.push_back(InFlag);
1215 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1216 InFlag = Chain.getValue(1);
1217
1218 SDOperand ResultVals[3];
1219 unsigned NumResults = 0;
1220 NodeTys.clear();
1221
1222 // If the call has results, copy the values out of the ret val registers.
1223 switch (Op.Val->getValueType(0)) {
1224 default: assert(0 && "Unexpected ret value!");
1225 case MVT::Other: break;
1226 case MVT::i32:
1227 if (Op.Val->getValueType(1) == MVT::i32) {
1228 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1229 ResultVals[0] = Chain.getValue(0);
1230 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1231 Chain.getValue(2)).getValue(1);
1232 ResultVals[1] = Chain.getValue(0);
1233 NumResults = 2;
1234 NodeTys.push_back(MVT::i32);
1235 } else {
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1237 ResultVals[0] = Chain.getValue(0);
1238 NumResults = 1;
1239 }
1240 NodeTys.push_back(MVT::i32);
1241 break;
1242 case MVT::i64:
1243 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1245 NumResults = 1;
1246 NodeTys.push_back(MVT::i64);
1247 break;
1248 case MVT::f32:
1249 case MVT::f64:
1250 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1251 InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1253 NumResults = 1;
1254 NodeTys.push_back(Op.Val->getValueType(0));
1255 break;
1256 case MVT::v2f64:
1257 case MVT::v4f32:
1258 case MVT::v4i32:
1259 case MVT::v8i16:
1260 case MVT::v16i8:
1261 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1262 InFlag).getValue(1);
1263 ResultVals[0] = Chain.getValue(0);
1264 NumResults = 1;
1265 NodeTys.push_back(Op.Val->getValueType(0));
1266 break;
1267 }
1268
1269 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1270 DAG.getConstant(NumStackBytes, PtrVT));
1271 NodeTys.push_back(MVT::Other);
1272
1273 // If the function returns void, just return the chain.
1274 if (NumResults == 0)
1275 return Chain;
1276
1277 // Otherwise, merge everything together with a MERGE_VALUES node.
1278 ResultVals[NumResults++] = Chain;
1279 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1280 ResultVals, NumResults);
1281 return Res.getValue(Op.ResNo);
1282}
1283
1284static SDOperand
1285LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1286 SmallVector<CCValAssign, 16> RVLocs;
1287 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1288 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1289 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1290 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1291
1292 // If this is the first return lowered for this function, add the regs to the
1293 // liveout set for the function.
1294 if (DAG.getMachineFunction().liveout_empty()) {
1295 for (unsigned i = 0; i != RVLocs.size(); ++i)
1296 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1297 }
1298
1299 SDOperand Chain = Op.getOperand(0);
1300 SDOperand Flag;
1301
1302 // Copy the result values into the output registers.
1303 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1304 CCValAssign &VA = RVLocs[i];
1305 assert(VA.isRegLoc() && "Can only return in registers!");
1306 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1307 Flag = Chain.getValue(1);
1308 }
1309
1310 if (Flag.Val)
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1312 else
1313 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1314}
1315
1316
1317//===----------------------------------------------------------------------===//
1318// Vector related lowering:
1319//===----------------------------------------------------------------------===//
1320
1321static ConstantSDNode *
1322getVecImm(SDNode *N) {
1323 SDOperand OpVal(0, 0);
1324
1325 // Check to see if this buildvec has a single non-undef value in its elements.
1326 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1327 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1328 if (OpVal.Val == 0)
1329 OpVal = N->getOperand(i);
1330 else if (OpVal != N->getOperand(i))
1331 return 0;
1332 }
1333
1334 if (OpVal.Val != 0) {
1335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1336 return CN;
1337 }
1338 }
1339
1340 return 0; // All UNDEF: use implicit def.; not Constant node
1341}
1342
1343/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1344/// and the value fits into an unsigned 18-bit constant, and if so, return the
1345/// constant
1346SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1347 MVT::ValueType ValueType) {
1348 if (ConstantSDNode *CN = getVecImm(N)) {
1349 uint64_t Value = CN->getValue();
1350 if (Value <= 0x3ffff)
1351 return DAG.getConstant(Value, ValueType);
1352 }
1353
1354 return SDOperand();
1355}
1356
1357/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1358/// and the value fits into a signed 16-bit constant, and if so, return the
1359/// constant
1360SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1361 MVT::ValueType ValueType) {
1362 if (ConstantSDNode *CN = getVecImm(N)) {
1363 if (ValueType == MVT::i32) {
1364 int Value = (int) CN->getValue();
1365 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1366
1367 if (Value == SExtValue)
1368 return DAG.getConstant(Value, ValueType);
1369 } else if (ValueType == MVT::i16) {
1370 short Value = (short) CN->getValue();
1371 int SExtValue = ((int) Value << 16) >> 16;
1372
1373 if (Value == (short) SExtValue)
1374 return DAG.getConstant(Value, ValueType);
1375 } else if (ValueType == MVT::i64) {
1376 int64_t Value = CN->getValue();
1377 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1378
1379 if (Value == SExtValue)
1380 return DAG.getConstant(Value, ValueType);
1381 }
1382 }
1383
1384 return SDOperand();
1385}
1386
1387/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1388/// and the value fits into a signed 10-bit constant, and if so, return the
1389/// constant
1390SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1391 MVT::ValueType ValueType) {
1392 if (ConstantSDNode *CN = getVecImm(N)) {
1393 int Value = (int) CN->getValue();
1394 if ((ValueType == MVT::i32 && isS10Constant(Value))
1395 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1396 return DAG.getConstant(Value, ValueType);
1397 }
1398
1399 return SDOperand();
1400}
1401
1402/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1403/// and the value fits into a signed 8-bit constant, and if so, return the
1404/// constant.
1405///
1406/// @note: The incoming vector is v16i8 because that's the only way we can load
1407/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1408/// same value.
1409SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1410 MVT::ValueType ValueType) {
1411 if (ConstantSDNode *CN = getVecImm(N)) {
1412 int Value = (int) CN->getValue();
1413 if (ValueType == MVT::i16
1414 && Value <= 0xffff /* truncated from uint64_t */
1415 && ((short) Value >> 8) == ((short) Value & 0xff))
1416 return DAG.getConstant(Value & 0xff, ValueType);
1417 else if (ValueType == MVT::i8
1418 && (Value & 0xff) == Value)
1419 return DAG.getConstant(Value, ValueType);
1420 }
1421
1422 return SDOperand();
1423}
1424
1425/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1426/// and the value fits into a signed 16-bit constant, and if so, return the
1427/// constant
1428SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1429 MVT::ValueType ValueType) {
1430 if (ConstantSDNode *CN = getVecImm(N)) {
1431 uint64_t Value = CN->getValue();
1432 if ((ValueType == MVT::i32
1433 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1434 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1435 return DAG.getConstant(Value >> 16, ValueType);
1436 }
1437
1438 return SDOperand();
1439}
1440
1441/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1442SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1443 if (ConstantSDNode *CN = getVecImm(N)) {
1444 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1445 }
1446
1447 return SDOperand();
1448}
1449
1450/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1451SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1452 if (ConstantSDNode *CN = getVecImm(N)) {
1453 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1454 }
1455
1456 return SDOperand();
1457}
1458
1459// If this is a vector of constants or undefs, get the bits. A bit in
1460// UndefBits is set if the corresponding element of the vector is an
1461// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1462// zero. Return true if this is not an array of constants, false if it is.
1463//
1464static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1465 uint64_t UndefBits[2]) {
1466 // Start with zero'd results.
1467 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1468
1469 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1470 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1471 SDOperand OpVal = BV->getOperand(i);
1472
1473 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1474 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1475
1476 uint64_t EltBits = 0;
1477 if (OpVal.getOpcode() == ISD::UNDEF) {
1478 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1479 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1480 continue;
1481 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1482 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1483 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1484 const APFloat &apf = CN->getValueAPF();
1485 EltBits = (CN->getValueType(0) == MVT::f32
1486 ? FloatToBits(apf.convertToFloat())
1487 : DoubleToBits(apf.convertToDouble()));
1488 } else {
1489 // Nonconstant element.
1490 return true;
1491 }
1492
1493 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1494 }
1495
1496 //printf("%llx %llx %llx %llx\n",
1497 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1498 return false;
1499}
1500
1501/// If this is a splat (repetition) of a value across the whole vector, return
1502/// the smallest size that splats it. For example, "0x01010101010101..." is a
1503/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1504/// SplatSize = 1 byte.
1505static bool isConstantSplat(const uint64_t Bits128[2],
1506 const uint64_t Undef128[2],
1507 int MinSplatBits,
1508 uint64_t &SplatBits, uint64_t &SplatUndef,
1509 int &SplatSize) {
1510 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1511 // the same as the lower 64-bits, ignoring undefs.
1512 uint64_t Bits64 = Bits128[0] | Bits128[1];
1513 uint64_t Undef64 = Undef128[0] & Undef128[1];
1514 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1515 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1516 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1517 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1518
1519 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1520 if (MinSplatBits < 64) {
1521
1522 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1523 // undefs.
1524 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1525 if (MinSplatBits < 32) {
1526
1527 // If the top 16-bits are different than the lower 16-bits, ignoring
1528 // undefs, we have an i32 splat.
1529 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1530 if (MinSplatBits < 16) {
1531 // If the top 8-bits are different than the lower 8-bits, ignoring
1532 // undefs, we have an i16 splat.
1533 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1534 // Otherwise, we have an 8-bit splat.
1535 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1536 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1537 SplatSize = 1;
1538 return true;
1539 }
1540 } else {
1541 SplatBits = Bits16;
1542 SplatUndef = Undef16;
1543 SplatSize = 2;
1544 return true;
1545 }
1546 }
1547 } else {
1548 SplatBits = Bits32;
1549 SplatUndef = Undef32;
1550 SplatSize = 4;
1551 return true;
1552 }
1553 }
1554 } else {
1555 SplatBits = Bits128[0];
1556 SplatUndef = Undef128[0];
1557 SplatSize = 8;
1558 return true;
1559 }
1560 }
1561
1562 return false; // Can't be a splat if two pieces don't match.
1563}
1564
1565// If this is a case we can't handle, return null and let the default
1566// expansion code take care of it. If we CAN select this case, and if it
1567// selects to a single instruction, return Op. Otherwise, if we can codegen
1568// this case more efficiently than a constant pool load, lower it to the
1569// sequence of ops that should be used.
1570static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1571 MVT::ValueType VT = Op.getValueType();
1572 // If this is a vector of constants or undefs, get the bits. A bit in
1573 // UndefBits is set if the corresponding element of the vector is an
1574 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1575 // zero.
1576 uint64_t VectorBits[2];
1577 uint64_t UndefBits[2];
1578 uint64_t SplatBits, SplatUndef;
1579 int SplatSize;
1580 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1581 || !isConstantSplat(VectorBits, UndefBits,
1582 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1583 SplatBits, SplatUndef, SplatSize))
1584 return SDOperand(); // Not a constant vector, not a splat.
1585
1586 switch (VT) {
1587 default:
1588 case MVT::v4f32: {
1589 uint32_t Value32 = SplatBits;
1590 assert(SplatSize == 4
1591 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1592 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1594 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1595 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1596 break;
1597 }
1598 case MVT::v2f64: {
1599 uint64_t f64val = SplatBits;
1600 assert(SplatSize == 8
1601 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1602 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1604 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1605 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1606 break;
1607 }
1608 case MVT::v16i8: {
1609 // 8-bit constants have to be expanded to 16-bits
1610 unsigned short Value16 = SplatBits | (SplatBits << 8);
1611 SDOperand Ops[8];
1612 for (int i = 0; i < 8; ++i)
1613 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1614 return DAG.getNode(ISD::BIT_CONVERT, VT,
1615 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1616 }
1617 case MVT::v8i16: {
1618 unsigned short Value16;
1619 if (SplatSize == 2)
1620 Value16 = (unsigned short) (SplatBits & 0xffff);
1621 else
1622 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1623 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1624 SDOperand Ops[8];
1625 for (int i = 0; i < 8; ++i) Ops[i] = T;
1626 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1627 }
1628 case MVT::v4i32: {
1629 unsigned int Value = SplatBits;
1630 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1631 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1632 }
1633 case MVT::v2i64: {
1634 uint64_t val = SplatBits;
1635 uint32_t upper = uint32_t(val >> 32);
1636 uint32_t lower = uint32_t(val);
1637
1638 if (val != 0) {
1639 SDOperand LO32;
1640 SDOperand HI32;
1641 SmallVector<SDOperand, 16> ShufBytes;
1642 SDOperand Result;
1643 bool upper_special, lower_special;
1644
1645 // NOTE: This code creates common-case shuffle masks that can be easily
1646 // detected as common expressions. It is not attempting to create highly
1647 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1648
1649 // Detect if the upper or lower half is a special shuffle mask pattern:
1650 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1651 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1652
1653 // Create lower vector if not a special pattern
1654 if (!lower_special) {
1655 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1656 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1657 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1658 LO32C, LO32C, LO32C, LO32C));
1659 }
1660
1661 // Create upper vector if not a special pattern
1662 if (!upper_special) {
1663 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1664 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1665 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1666 HI32C, HI32C, HI32C, HI32C));
1667 }
1668
1669 // If either upper or lower are special, then the two input operands are
1670 // the same (basically, one of them is a "don't care")
1671 if (lower_special)
1672 LO32 = HI32;
1673 if (upper_special)
1674 HI32 = LO32;
1675 if (lower_special && upper_special) {
1676 // Unhappy situation... both upper and lower are special, so punt with
1677 // a target constant:
1678 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1679 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1680 Zero, Zero);
1681 }
1682
1683 for (int i = 0; i < 4; ++i) {
1684 for (int j = 0; j < 4; ++j) {
1685 SDOperand V;
1686 bool process_upper, process_lower;
1687 uint64_t val;
1688
1689 process_upper = (upper_special && (i & 1) == 0);
1690 process_lower = (lower_special && (i & 1) == 1);
1691
1692 if (process_upper || process_lower) {
1693 if ((process_upper && upper == 0)
1694 || (process_lower && lower == 0))
1695 val = 0x80;
1696 else if ((process_upper && upper == 0xffffffff)
1697 || (process_lower && lower == 0xffffffff))
1698 val = 0xc0;
1699 else if ((process_upper && upper == 0x80000000)
1700 || (process_lower && lower == 0x80000000))
1701 val = (j == 0 ? 0xe0 : 0x80);
1702 } else
1703 val = i * 4 + j + ((i & 1) * 16);
1704
1705 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1706 }
1707 }
1708
1709 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1710 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1711 &ShufBytes[0], ShufBytes.size()));
1712 } else {
1713 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1714 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1715 return DAG.getNode(ISD::BIT_CONVERT, VT,
1716 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1717 Zero, Zero, Zero, Zero));
1718 }
1719 }
1720 }
1721
1722 return SDOperand();
1723}
1724
1725/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1726/// which the Cell can operate. The code inspects V3 to ascertain whether the
1727/// permutation vector, V3, is monotonically increasing with one "exception"
1728/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1729/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1730/// In either case, the net result is going to eventually invoke SHUFB to
1731/// permute/shuffle the bytes from V1 and V2.
1732/// \note
1733/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1734/// control word for byte/halfword/word insertion. This takes care of a single
1735/// element move from V2 into V1.
1736/// \note
1737/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1738static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1739 SDOperand V1 = Op.getOperand(0);
1740 SDOperand V2 = Op.getOperand(1);
1741 SDOperand PermMask = Op.getOperand(2);
1742
1743 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1744
1745 // If we have a single element being moved from V1 to V2, this can be handled
1746 // using the C*[DX] compute mask instructions, but the vector elements have
1747 // to be monotonically increasing with one exception element.
1748 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1749 unsigned EltsFromV2 = 0;
1750 unsigned V2Elt = 0;
1751 unsigned V2EltIdx0 = 0;
1752 unsigned CurrElt = 0;
1753 bool monotonic = true;
1754 if (EltVT == MVT::i8)
1755 V2EltIdx0 = 16;
1756 else if (EltVT == MVT::i16)
1757 V2EltIdx0 = 8;
1758 else if (EltVT == MVT::i32)
1759 V2EltIdx0 = 4;
1760 else
1761 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1762
1763 for (unsigned i = 0, e = PermMask.getNumOperands();
1764 EltsFromV2 <= 1 && monotonic && i != e;
1765 ++i) {
1766 unsigned SrcElt;
1767 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1768 SrcElt = 0;
1769 else
1770 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1771
1772 if (SrcElt >= V2EltIdx0) {
1773 ++EltsFromV2;
1774 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1775 } else if (CurrElt != SrcElt) {
1776 monotonic = false;
1777 }
1778
1779 ++CurrElt;
1780 }
1781
1782 if (EltsFromV2 == 1 && monotonic) {
1783 // Compute mask and shuffle
1784 MachineFunction &MF = DAG.getMachineFunction();
1785 SSARegMap *RegMap = MF.getSSARegMap();
1786 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1787 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1788 // Initialize temporary register to 0
1789 SDOperand InitTempReg =
1790 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1791 // Copy register's contents as index in INSERT_MASK:
1792 SDOperand ShufMaskOp =
1793 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1794 DAG.getTargetConstant(V2Elt, MVT::i32),
1795 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1796 // Use shuffle mask in SHUFB synthetic instruction:
1797 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1798 } else {
1799 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1800 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1801
1802 SmallVector<SDOperand, 16> ResultMask;
1803 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1804 unsigned SrcElt;
1805 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1806 SrcElt = 0;
1807 else
1808 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1809
1810 for (unsigned j = 0; j != BytesPerElement; ++j) {
1811 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1812 MVT::i8));
1813 }
1814 }
1815
1816 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1817 &ResultMask[0], ResultMask.size());
1818 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1819 }
1820}
1821
1822static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1823 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1824
1825 if (Op0.Val->getOpcode() == ISD::Constant) {
1826 // For a constant, build the appropriate constant vector, which will
1827 // eventually simplify to a vector register load.
1828
1829 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1830 SmallVector<SDOperand, 16> ConstVecValues;
1831 MVT::ValueType VT;
1832 size_t n_copies;
1833
1834 // Create a constant vector:
1835 switch (Op.getValueType()) {
1836 default: assert(0 && "Unexpected constant value type in "
1837 "LowerSCALAR_TO_VECTOR");
1838 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1839 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1840 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1841 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1842 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1843 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1844 }
1845
1846 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1847 for (size_t j = 0; j < n_copies; ++j)
1848 ConstVecValues.push_back(CValue);
1849
1850 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1851 &ConstVecValues[0], ConstVecValues.size());
1852 } else {
1853 // Otherwise, copy the value from one register to another:
1854 switch (Op0.getValueType()) {
1855 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1856 case MVT::i8:
1857 case MVT::i16:
1858 case MVT::i32:
1859 case MVT::i64:
1860 case MVT::f32:
1861 case MVT::f64:
1862 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1863 }
1864 }
1865
1866 return SDOperand();
1867}
1868
1869static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1870 switch (Op.getValueType()) {
1871 case MVT::v4i32: {
1872 SDOperand rA = Op.getOperand(0);
1873 SDOperand rB = Op.getOperand(1);
1874 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1875 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1876 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1877 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1878
1879 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1880 break;
1881 }
1882
1883 // Multiply two v8i16 vectors (pipeline friendly version):
1884 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1885 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1886 // c) Use SELB to select upper and lower halves from the intermediate results
1887 //
1888 // NOTE: We really want to move the FSMBI to earlier to actually get the
1889 // dual-issue. This code does manage to do this, even if it's a little on
1890 // the wacky side
1891 case MVT::v8i16: {
1892 MachineFunction &MF = DAG.getMachineFunction();
1893 SSARegMap *RegMap = MF.getSSARegMap();
1894 SDOperand Chain = Op.getOperand(0);
1895 SDOperand rA = Op.getOperand(0);
1896 SDOperand rB = Op.getOperand(1);
1897 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1898 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1899
1900 SDOperand FSMBOp =
1901 DAG.getCopyToReg(Chain, FSMBIreg,
1902 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1903 DAG.getConstant(0xcccc, MVT::i32)));
1904
1905 SDOperand HHProd =
1906 DAG.getCopyToReg(FSMBOp, HiProdReg,
1907 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1908
1909 SDOperand HHProd_v4i32 =
1910 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1911 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1912
1913 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1914 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1915 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1916 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1917 HHProd_v4i32,
1918 DAG.getConstant(16, MVT::i16))),
1919 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1920 }
1921
1922 // This M00sE is N@stI! (apologies to Monty Python)
1923 //
1924 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1925 // is to break it all apart, sign extend, and reassemble the various
1926 // intermediate products.
1927 case MVT::v16i8: {
1928 MachineFunction &MF = DAG.getMachineFunction();
1929 SSARegMap *RegMap = MF.getSSARegMap();
1930 SDOperand Chain = Op.getOperand(0);
1931 SDOperand rA = Op.getOperand(0);
1932 SDOperand rB = Op.getOperand(1);
1933 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1934 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1935
1936 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1938 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1939
1940 SDOperand LLProd =
1941 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1942 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1943 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1944
1945 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1946
1947 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1948
1949 SDOperand LHProd =
1950 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1951 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1952
1953 SDOperand FSMBdef_2222 =
1954 DAG.getCopyToReg(Chain, FSMBreg_2222,
1955 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1956 DAG.getConstant(0x2222, MVT::i32)));
1957
1958 SDOperand FSMBuse_2222 =
1959 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1960
1961 SDOperand LoProd_1 =
1962 DAG.getCopyToReg(Chain, LoProd_reg,
1963 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1964 FSMBuse_2222));
1965
1966 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1967
1968 SDOperand LoProd =
1969 DAG.getNode(ISD::AND, MVT::v4i32,
1970 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1971 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1972 LoProdMask, LoProdMask,
1973 LoProdMask, LoProdMask));
1974
1975 SDOperand rAH =
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1978
1979 SDOperand rBH =
1980 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1982
1983 SDOperand HLProd =
1984 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1987
1988 SDOperand HHProd_1 =
1989 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1994
1995 SDOperand HHProd =
1996 DAG.getCopyToReg(Chain, HiProd_reg,
1997 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1998 HLProd,
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2000 FSMBuse_2222));
2001
2002 SDOperand HiProd =
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2004 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2005
2006 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2007 DAG.getNode(ISD::OR, MVT::v4i32,
2008 LoProd, HiProd));
2009 }
2010
2011 default:
2012 cerr << "CellSPU: Unknown vector multiplication, got "
2013 << MVT::getValueTypeString(Op.getValueType())
2014 << "\n";
2015 abort();
2016 /*NOTREACHED*/
2017 }
2018
2019 return SDOperand();
2020}
2021
2022static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2023 MachineFunction &MF = DAG.getMachineFunction();
2024 SSARegMap *RegMap = MF.getSSARegMap();
2025
2026 SDOperand A = Op.getOperand(0);
2027 SDOperand B = Op.getOperand(1);
2028 unsigned VT = Op.getValueType();
2029
2030 unsigned VRegBR, VRegC;
2031
2032 if (VT == MVT::f32) {
2033 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2034 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2035 } else {
2036 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2037 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2038 }
2039 // TODO: make sure we're feeding FPInterp the right arguments
2040 // Right now: fi B, frest(B)
2041
2042 // Computes BRcpl =
2043 // (Floating Interpolate (FP Reciprocal Estimate B))
2044 SDOperand BRcpl =
2045 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2046 DAG.getNode(SPUISD::FPInterp, VT, B,
2047 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2048
2049 // Computes A * BRcpl and stores in a temporary register
2050 SDOperand AxBRcpl =
2051 DAG.getCopyToReg(BRcpl, VRegC,
2052 DAG.getNode(ISD::FMUL, VT, A,
2053 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2054 // What's the Chain variable do? It's magic!
2055 // TODO: set Chain = Op(0).getEntryNode()
2056
2057 return DAG.getNode(ISD::FADD, VT,
2058 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2059 DAG.getNode(ISD::FMUL, VT,
2060 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2061 DAG.getNode(ISD::FSUB, VT, A,
2062 DAG.getNode(ISD::FMUL, VT, B,
2063 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2064}
2065
2066// Expands double-precision FDIV
2067// Expects two doubles as inputs X and Y, does a floating point
2068// reciprocal estimate, and three iterations of Newton-Raphson
2069// to increase accuracy.
2070//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2071// MachineFunction &MF = DAG.getMachineFunction();
2072// SSARegMap *RegMap = MF.getSSARegMap();
2073//
2074// SDOperand X = Op.getOperand(0);
2075// SDOperand Y = Op.getOperand(1);
2076//}
2077
2078static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2079 unsigned VT = Op.getValueType();
2080 SDOperand N = Op.getOperand(0);
2081 SDOperand Elt = Op.getOperand(1);
2082 SDOperand ShufMask[16];
2083 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2084
2085 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2086
2087 int EltNo = (int) C->getValue();
2088
2089 // sanity checks:
2090 if (VT == MVT::i8 && EltNo >= 16)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2092 else if (VT == MVT::i16 && EltNo >= 8)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2094 else if (VT == MVT::i32 && EltNo >= 4)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2096 else if (VT == MVT::i64 && EltNo >= 2)
2097 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2098
2099 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2100 // i32 and i64: Element 0 is the preferred slot
2101 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2102 }
2103
2104 // Need to generate shuffle mask and extract:
2105 int prefslot_begin, prefslot_end;
2106 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2107
2108 switch (VT) {
2109 case MVT::i8: {
2110 prefslot_begin = prefslot_end = 3;
2111 break;
2112 }
2113 case MVT::i16: {
2114 prefslot_begin = 2; prefslot_end = 3;
2115 break;
2116 }
2117 case MVT::i32: {
2118 prefslot_begin = 0; prefslot_end = 3;
2119 break;
2120 }
2121 case MVT::i64: {
2122 prefslot_begin = 0; prefslot_end = 7;
2123 break;
2124 }
2125 }
2126
2127 for (int i = 0; i < 16; ++i) {
2128 // zero fill uppper part of preferred slot, don't care about the
2129 // other slots:
2130 unsigned int mask_val;
2131
2132 if (i <= prefslot_end) {
2133 mask_val =
2134 ((i < prefslot_begin)
2135 ? 0x80
2136 : elt_byte + (i - prefslot_begin));
2137
2138 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2139 } else
2140 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2141 }
2142
2143 SDOperand ShufMaskVec =
2144 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2145 &ShufMask[0],
2146 sizeof(ShufMask) / sizeof(ShufMask[0]));
2147
2148 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2149 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2150 N, N, ShufMaskVec));
2151
2152}
2153
2154static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2155 SDOperand VecOp = Op.getOperand(0);
2156 SDOperand ValOp = Op.getOperand(1);
2157 SDOperand IdxOp = Op.getOperand(2);
2158 MVT::ValueType VT = Op.getValueType();
2159
2160 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2161 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2162
2163 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2164 // Use $2 because it's always 16-byte aligned and it's available:
2165 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2166
2167 SDOperand result =
2168 DAG.getNode(SPUISD::SHUFB, VT,
2169 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2170 VecOp,
2171 DAG.getNode(SPUISD::INSERT_MASK, VT,
2172 DAG.getNode(ISD::ADD, PtrVT,
2173 PtrBase,
2174 DAG.getConstant(CN->getValue(),
2175 PtrVT))));
2176
2177 return result;
2178}
2179
2180static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2181 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2182
2183 assert(Op.getValueType() == MVT::i8);
2184 switch (Opc) {
2185 default:
2186 assert(0 && "Unhandled i8 math operator");
2187 /*NOTREACHED*/
2188 break;
2189 case ISD::SUB: {
2190 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2191 // the result:
2192 SDOperand N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196 N1 = (N1.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2198 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2199 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2200 DAG.getNode(Opc, MVT::i16, N0, N1));
2201 }
2202 case ISD::ROTR:
2203 case ISD::ROTL: {
2204 SDOperand N1 = Op.getOperand(1);
2205 unsigned N1Opc;
2206 N0 = (N0.getOpcode() != ISD::Constant
2207 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2208 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2209 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2210 N1 = (N1.getOpcode() != ISD::Constant
2211 ? DAG.getNode(N1Opc, MVT::i16, N1)
2212 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2213 SDOperand ExpandArg =
2214 DAG.getNode(ISD::OR, MVT::i16, N0,
2215 DAG.getNode(ISD::SHL, MVT::i16,
2216 N0, DAG.getConstant(8, MVT::i16)));
2217 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2218 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2219 }
2220 case ISD::SRL:
2221 case ISD::SHL: {
2222 SDOperand N1 = Op.getOperand(1);
2223 unsigned N1Opc;
2224 N0 = (N0.getOpcode() != ISD::Constant
2225 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2226 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232 DAG.getNode(Opc, MVT::i16, N0, N1));
2233 }
2234 case ISD::SRA: {
2235 SDOperand N1 = Op.getOperand(1);
2236 unsigned N1Opc;
2237 N0 = (N0.getOpcode() != ISD::Constant
2238 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2241 N1 = (N1.getOpcode() != ISD::Constant
2242 ? DAG.getNode(N1Opc, MVT::i16, N1)
2243 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245 DAG.getNode(Opc, MVT::i16, N0, N1));
2246 }
2247 case ISD::MUL: {
2248 SDOperand N1 = Op.getOperand(1);
2249 unsigned N1Opc;
2250 N0 = (N0.getOpcode() != ISD::Constant
2251 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2252 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2253 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2254 N1 = (N1.getOpcode() != ISD::Constant
2255 ? DAG.getNode(N1Opc, MVT::i16, N1)
2256 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2257 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2258 DAG.getNode(Opc, MVT::i16, N0, N1));
2259 break;
2260 }
2261 }
2262
2263 return SDOperand();
2264}
2265
2266//! Lower byte immediate operations for v16i8 vectors:
2267static SDOperand
2268LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2269 SDOperand ConstVec;
2270 SDOperand Arg;
2271 MVT::ValueType VT = Op.getValueType();
2272
2273 ConstVec = Op.getOperand(0);
2274 Arg = Op.getOperand(1);
2275 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2276 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2277 ConstVec = ConstVec.getOperand(0);
2278 } else {
2279 ConstVec = Op.getOperand(1);
2280 Arg = Op.getOperand(0);
2281 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2282 ConstVec = ConstVec.getOperand(0);
2283 }
2284 }
2285 }
2286
2287 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2288 uint64_t VectorBits[2];
2289 uint64_t UndefBits[2];
2290 uint64_t SplatBits, SplatUndef;
2291 int SplatSize;
2292
2293 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2294 && isConstantSplat(VectorBits, UndefBits,
2295 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2296 SplatBits, SplatUndef, SplatSize)) {
2297 SDOperand tcVec[16];
2298 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2299 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2300
2301 // Turn the BUILD_VECTOR into a set of target constants:
2302 for (size_t i = 0; i < tcVecSize; ++i)
2303 tcVec[i] = tc;
2304
2305 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2306 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2307 }
2308 }
2309
2310 return SDOperand();
2311}
2312
2313//! Lower i32 multiplication
2314static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2315 unsigned Opc) {
2316 switch (VT) {
2317 default:
2318 cerr << "CellSPU: Unknown LowerMUL value type, got "
2319 << MVT::getValueTypeString(Op.getValueType())
2320 << "\n";
2321 abort();
2322 /*NOTREACHED*/
2323
2324 case MVT::i32: {
2325 SDOperand rA = Op.getOperand(0);
2326 SDOperand rB = Op.getOperand(1);
2327
2328 return DAG.getNode(ISD::ADD, MVT::i32,
2329 DAG.getNode(ISD::ADD, MVT::i32,
2330 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2331 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2332 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2333 }
2334 }
2335
2336 return SDOperand();
2337}
2338
2339//! Custom lowering for CTPOP (count population)
2340/*!
2341 Custom lowering code that counts the number ones in the input
2342 operand. SPU has such an instruction, but it counts the number of
2343 ones per byte, which then have to be accumulated.
2344*/
2345static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2346 unsigned VT = Op.getValueType();
2347 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2348
2349 switch (VT) {
2350 case MVT::i8: {
2351 SDOperand N = Op.getOperand(0);
2352 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2353
2354 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2355 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2356
2357 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2358 }
2359
2360 case MVT::i16: {
2361 MachineFunction &MF = DAG.getMachineFunction();
2362 SSARegMap *RegMap = MF.getSSARegMap();
2363
2364 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2365
2366 SDOperand N = Op.getOperand(0);
2367 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2368 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2369 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2370
2371 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2372 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2373
2374 // CNTB_result becomes the chain to which all of the virtual registers
2375 // CNTB_reg, SUM1_reg become associated:
2376 SDOperand CNTB_result =
2377 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2378
2379 SDOperand CNTB_rescopy =
2380 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2381
2382 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2383
2384 return DAG.getNode(ISD::AND, MVT::i16,
2385 DAG.getNode(ISD::ADD, MVT::i16,
2386 DAG.getNode(ISD::SRL, MVT::i16,
2387 Tmp1, Shift1),
2388 Tmp1),
2389 Mask0);
2390 }
2391
2392 case MVT::i32: {
2393 MachineFunction &MF = DAG.getMachineFunction();
2394 SSARegMap *RegMap = MF.getSSARegMap();
2395
2396 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2397 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2398
2399 SDOperand N = Op.getOperand(0);
2400 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2401 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2402 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2403 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2404
2405 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2406 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2407
2408 // CNTB_result becomes the chain to which all of the virtual registers
2409 // CNTB_reg, SUM1_reg become associated:
2410 SDOperand CNTB_result =
2411 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2412
2413 SDOperand CNTB_rescopy =
2414 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2415
2416 SDOperand Comp1 =
2417 DAG.getNode(ISD::SRL, MVT::i32,
2418 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2419
2420 SDOperand Sum1 =
2421 DAG.getNode(ISD::ADD, MVT::i32,
2422 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2423
2424 SDOperand Sum1_rescopy =
2425 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2426
2427 SDOperand Comp2 =
2428 DAG.getNode(ISD::SRL, MVT::i32,
2429 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2430 Shift2);
2431 SDOperand Sum2 =
2432 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2433 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2434
2435 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2436 }
2437
2438 case MVT::i64:
2439 break;
2440 }
2441
2442 return SDOperand();
2443}
2444
2445/// LowerOperation - Provide custom lowering hooks for some operations.
2446///
2447SDOperand
2448SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2449{
2450 switch (Op.getOpcode()) {
2451 default: {
2452 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2453 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2454 cerr << "*Op.Val:\n";
2455 Op.Val->dump();
2456 abort();
2457 }
2458 case ISD::LOAD:
2459 case ISD::SEXTLOAD:
2460 case ISD::ZEXTLOAD:
2461 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2462 case ISD::STORE:
2463 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::ConstantPool:
2465 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::GlobalAddress:
2467 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::JumpTable:
2469 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::Constant:
2471 return LowerConstant(Op, DAG);
2472 case ISD::ConstantFP:
2473 return LowerConstantFP(Op, DAG);
2474 case ISD::FORMAL_ARGUMENTS:
2475 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2476 case ISD::CALL:
2477 return LowerCALL(Op, DAG);
2478 case ISD::RET:
2479 return LowerRET(Op, DAG, getTargetMachine());
2480
2481 // i8 math ops:
2482 case ISD::SUB:
2483 case ISD::ROTR:
2484 case ISD::ROTL:
2485 case ISD::SRL:
2486 case ISD::SHL:
2487 case ISD::SRA:
2488 return LowerI8Math(Op, DAG, Op.getOpcode());
2489
2490 // Vector-related lowering.
2491 case ISD::BUILD_VECTOR:
2492 return LowerBUILD_VECTOR(Op, DAG);
2493 case ISD::SCALAR_TO_VECTOR:
2494 return LowerSCALAR_TO_VECTOR(Op, DAG);
2495 case ISD::VECTOR_SHUFFLE:
2496 return LowerVECTOR_SHUFFLE(Op, DAG);
2497 case ISD::EXTRACT_VECTOR_ELT:
2498 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2499 case ISD::INSERT_VECTOR_ELT:
2500 return LowerINSERT_VECTOR_ELT(Op, DAG);
2501
2502 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2503 case ISD::AND:
2504 case ISD::OR:
2505 case ISD::XOR:
2506 return LowerByteImmed(Op, DAG);
2507
2508 // Vector and i8 multiply:
2509 case ISD::MUL:
2510 if (MVT::isVector(Op.getValueType()))
2511 return LowerVectorMUL(Op, DAG);
2512 else if (Op.getValueType() == MVT::i8)
2513 return LowerI8Math(Op, DAG, Op.getOpcode());
2514 else
2515 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2516
2517 case ISD::FDIV:
2518 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2519 return LowerFDIVf32(Op, DAG);
2520// else if (Op.getValueType() == MVT::f64)
2521// return LowerFDIVf64(Op, DAG);
2522 else
2523 assert(0 && "Calling FDIV on unsupported MVT");
2524
2525 case ISD::CTPOP:
2526 return LowerCTPOP(Op, DAG);
2527 }
2528
2529 return SDOperand();
2530}
2531
2532//===----------------------------------------------------------------------===//
2533// Other Lowering Code
2534//===----------------------------------------------------------------------===//
2535
2536MachineBasicBlock *
2537SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2538 MachineBasicBlock *BB)
2539{
2540 return BB;
2541}
2542
2543//===----------------------------------------------------------------------===//
2544// Target Optimization Hooks
2545//===----------------------------------------------------------------------===//
2546
2547SDOperand
2548SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2549{
2550#if 0
2551 TargetMachine &TM = getTargetMachine();
2552 SelectionDAG &DAG = DCI.DAG;
2553#endif
2554 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2555
2556 switch (N->getOpcode()) {
2557 default: break;
2558
2559 // Look for obvious optimizations for shift left:
2560 // a) Replace 0 << V with 0
2561 // b) Replace V << 0 with V
2562 //
2563 // N.B: llvm will generate an undef node if the shift amount is greater than
2564 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2565 case SPU::SHLIr32:
2566 case SPU::SHLHIr16:
2567 case SPU::SHLQBIIvec:
2568 case SPU::ROTHIr16:
2569 case SPU::ROTHIr16_i32:
2570 case SPU::ROTIr32:
2571 case SPU::ROTIr32_i16:
2572 case SPU::ROTQBYIvec:
2573 case SPU::ROTQBYBIvec:
2574 case SPU::ROTQBIIvec:
2575 case SPU::ROTHMIr16:
2576 case SPU::ROTMIr32:
2577 case SPU::ROTQMBYIvec: {
2578 if (N0.getOpcode() == ISD::Constant) {
2579 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2580 if (C->getValue() == 0) // 0 << V -> 0.
2581 return N0;
2582 }
2583 }
2584 SDOperand N1 = N->getOperand(1);
2585 if (N1.getOpcode() == ISD::Constant) {
2586 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2587 if (C->getValue() == 0) // V << 0 -> V
2588 return N1;
2589 }
2590 }
2591 break;
2592 }
2593 }
2594
2595 return SDOperand();
2596}
2597
2598//===----------------------------------------------------------------------===//
2599// Inline Assembly Support
2600//===----------------------------------------------------------------------===//
2601
2602/// getConstraintType - Given a constraint letter, return the type of
2603/// constraint it is for this target.
2604SPUTargetLowering::ConstraintType
2605SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2606 if (ConstraintLetter.size() == 1) {
2607 switch (ConstraintLetter[0]) {
2608 default: break;
2609 case 'b':
2610 case 'r':
2611 case 'f':
2612 case 'v':
2613 case 'y':
2614 return C_RegisterClass;
2615 }
2616 }
2617 return TargetLowering::getConstraintType(ConstraintLetter);
2618}
2619
2620std::pair<unsigned, const TargetRegisterClass*>
2621SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2622 MVT::ValueType VT) const
2623{
2624 if (Constraint.size() == 1) {
2625 // GCC RS6000 Constraint Letters
2626 switch (Constraint[0]) {
2627 case 'b': // R1-R31
2628 case 'r': // R0-R31
2629 if (VT == MVT::i64)
2630 return std::make_pair(0U, SPU::R64CRegisterClass);
2631 return std::make_pair(0U, SPU::R32CRegisterClass);
2632 case 'f':
2633 if (VT == MVT::f32)
2634 return std::make_pair(0U, SPU::R32FPRegisterClass);
2635 else if (VT == MVT::f64)
2636 return std::make_pair(0U, SPU::R64FPRegisterClass);
2637 break;
2638 case 'v':
2639 return std::make_pair(0U, SPU::GPRCRegisterClass);
2640 }
2641 }
2642
2643 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2644}
2645
2646void
2647SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2648 uint64_t Mask,
2649 uint64_t &KnownZero,
2650 uint64_t &KnownOne,
2651 const SelectionDAG &DAG,
2652 unsigned Depth ) const {
2653 KnownZero = 0;
2654 KnownOne = 0;
2655}
2656
2657// LowerAsmOperandForConstraint
2658void
2659SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2660 char ConstraintLetter,
2661 std::vector<SDOperand> &Ops,
2662 SelectionDAG &DAG) {
2663 // Default, for the time being, to the base class handler
2664 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2665}
2666
2667/// isLegalAddressImmediate - Return true if the integer value can be used
2668/// as the offset of the target addressing mode.
2669bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2670 // SPU's addresses are 256K:
2671 return (V > -(1 << 18) && V < (1 << 18) - 1);
2672}
2673
2674bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2675 return false;
2676}