blob: d7091eb9b88d5fd59c550288e3a4f9ae7b253787 [file] [log] [blame]
Scott Michel266bc8f2007-12-04 22:23:35 +00001//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by a team from the Computer Systems Research
Scott Michel2466c372007-12-05 01:40:25 +00006// Department at The Aerospace Corporation and is distributed under the
7// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel266bc8f2007-12-04 22:23:35 +00008//
9//===----------------------------------------------------------------------===//
10//
11// This file implements the SPUTargetLowering class.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SPURegisterNames.h"
16#include "SPUISelLowering.h"
17#include "SPUTargetMachine.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SSARegMap.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39 std::map<unsigned, const char *> node_names;
40
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
45 };
46
47 const valtype_map_s valtype_map[] = {
48 { MVT::i1, 3 },
49 { MVT::i8, 3 },
50 { MVT::i16, 2 },
51 { MVT::i32, 0 },
52 { MVT::f32, 0 },
53 { MVT::i64, 0 },
54 { MVT::f64, 0 },
55 { MVT::i128, 0 }
56 };
57
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
62
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
66 break;
67 }
68 }
69
70#ifndef NDEBUG
71 if (retval == 0) {
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
74 << "\n";
75 abort();
76 }
77#endif
78
79 return retval;
80 }
81
82 //! Predicate that returns true if operand is a memory target
83 /*!
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
87 address.
88 */
89 bool isMemoryOperand(const SDOperand &Op)
90 {
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
105 }
106}
107
108SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
110 SPUTM(TM)
111{
112 // Fold away setcc operations if possible.
113 setPow2DivIsCheap();
114
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
118
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel504c3692007-12-17 22:32:34 +0000122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
136
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
141
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 ++sctype) {
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
156 }
157
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
160 // SPUDAGToDAGISel.
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
178
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
186
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
206
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
209
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
213
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
270
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
291
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
353 }
354
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
364
365 setStackPointerRegisterToSaveRestore(SPU::R1);
366
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
369
370 computeRegisterProperties();
371}
372
373const char *
374SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375{
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 }
418
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420
421 return ((i != node_names.end()) ? i->second : 0);
422}
423
424//===----------------------------------------------------------------------===//
425// Calling convention code:
426//===----------------------------------------------------------------------===//
427
428#include "SPUGenCallingConv.inc"
429
430//===----------------------------------------------------------------------===//
431// LowerOperation implementation
432//===----------------------------------------------------------------------===//
433
434/// Custom lower loads for CellSPU
435/*!
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
438 */
439static SDOperand
440LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
450 SDOperand Ops[8];
451
452 // For an extending load of an i1 variable, just call it i8 (or whatever we
453 // were passed) and make it zero-extended:
454 if (VT == MVT::i1) {
455 VT = OpVT;
456 ExtType = ISD::ZEXTLOAD;
457 }
458
459 switch (LN->getAddressingMode()) {
460 case ISD::UNINDEXED: {
461 SDOperand result;
462 SDOperand rot_op, rotamt;
463 SDOperand ptrp;
464 int c_offset;
465 int c_rotamt;
466
467 // The vector type we really want to be when we load the 16-byte chunk
468 MVT::ValueType vecVT, opVecVT;
469
470 if (VT != MVT::i1)
471 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
472 else
473 vecVT = MVT::v16i8;
474
475 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
476
477 if (basep.getOpcode() == ISD::ADD) {
478 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
479
480 assert(CN != NULL
481 && "LowerLOAD: ISD::ADD operand 1 is not constant");
482
483 c_offset = (int) CN->getValue();
484 c_rotamt = (int) (c_offset & 0xf);
485
486 // Adjust the rotation amount to ensure that the final result ends up in
487 // the preferred slot:
488 c_rotamt -= vtm->prefslot_byte;
489 ptrp = basep.getOperand(0);
490 } else {
491 c_offset = 0;
492 c_rotamt = -vtm->prefslot_byte;
493 ptrp = basep;
494 }
495
496 if (alignment == 16) {
497 // 16-byte aligned load into preferred slot, no rotation
498 if (c_rotamt == 0) {
499 if (isMemoryOperand(ptrp))
500 // Return unchanged
501 return SDOperand();
502 else {
503 // Return modified D-Form address for pointer:
504 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
505 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
506 if (VT == OpVT)
507 return DAG.getLoad(VT, LN->getChain(), ptrp,
508 LN->getSrcValue(), LN->getSrcValueOffset(),
509 LN->isVolatile(), 16);
510 else
511 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
512 LN->getSrcValueOffset(), OpVT,
513 LN->isVolatile(), 16);
514 }
515 } else {
516 // Need to rotate...
517 if (c_rotamt < 0)
518 c_rotamt += 16;
519 // Realign the base pointer, with a D-Form address
520 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
521 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
522 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
523 else
524 basep = ptrp;
525
526 // Rotate the load:
527 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
528 LN->getSrcValue(), LN->getSrcValueOffset(),
529 LN->isVolatile(), 16);
530 the_chain = rot_op.getValue(1);
531 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
532
533 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
534 Ops[0] = the_chain;
535 Ops[1] = rot_op;
536 Ops[2] = rotamt;
537
538 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
539 the_chain = result.getValue(1);
540
541 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
542 SDVTList scalarvts;
543 Ops[0] = the_chain;
544 Ops[1] = result;
545 if (OpVT == VT) {
546 scalarvts = DAG.getVTList(VT, MVT::Other);
547 } else {
548 scalarvts = DAG.getVTList(OpVT, MVT::Other);
549 }
550
551 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
552 result);
553 Ops[0] = the_chain;
554 Ops[1] = result;
555 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
556 the_chain = result.getValue(1);
557 } else {
558 // Handle the sign and zero-extending loads for i1 and i8:
559 unsigned NewOpC;
560
561 if (ExtType == ISD::SEXTLOAD) {
562 NewOpC = (OpVT == MVT::i1
563 ? SPUISD::EXTRACT_I1_SEXT
564 : SPUISD::EXTRACT_I8_SEXT);
565 } else if (ExtType == ISD::ZEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_ZEXT
568 : SPUISD::EXTRACT_I8_ZEXT);
569 }
570
571 result = DAG.getNode(NewOpC, OpVT, result);
572 }
573
574 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
575 SDOperand retops[2] = { result, the_chain };
576
577 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
578 return result;
579 /*UNREACHED*/
580 }
581 } else {
582 // Misaligned 16-byte load:
583 if (basep.getOpcode() == ISD::LOAD) {
584 LN = cast<LoadSDNode>(basep);
585 if (LN->getAlignment() == 16) {
586 // We can verify that we're really loading from a 16-byte aligned
587 // chunk. Encapsulate basep as a D-Form address and return a new
588 // load:
589 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
590 DAG.getConstant(0, PtrVT));
591 if (OpVT == VT)
592 return DAG.getLoad(VT, LN->getChain(), basep,
593 LN->getSrcValue(), LN->getSrcValueOffset(),
594 LN->isVolatile(), 16);
595 else
596 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 OpVT, LN->isVolatile(), 16);
599 }
600 }
601
602 // Catch all other cases where we can't guarantee that we have a
603 // 16-byte aligned entity, which means resorting to an X-form
604 // address scheme:
605
606 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
607 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
608 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
609
610 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
611
612 SDOperand alignLoad =
613 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
614 LN->getSrcValue(), LN->getSrcValueOffset(),
615 LN->isVolatile(), 16);
616
617 SDOperand insertEltOp =
618 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
619
620 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
621 alignLoad,
622 alignLoad,
623 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
624
625 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
626
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = { result, the_chain };
629
630 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
631 return result;
632 }
633 break;
634 }
635 case ISD::PRE_INC:
636 case ISD::PRE_DEC:
637 case ISD::POST_INC:
638 case ISD::POST_DEC:
639 case ISD::LAST_INDEXED_MODE:
640 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
641 "UNINDEXED\n";
642 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 abort();
644 /*NOTREACHED*/
645 }
646
647 return SDOperand();
648}
649
650/// Custom lower stores for CellSPU
651/*!
652 All CellSPU stores are aligned to 16-byte boundaries, so for elements
653 within a 16-byte block, we have to generate a shuffle to insert the
654 requested element into its place, then store the resulting block.
655 */
656static SDOperand
657LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
658 StoreSDNode *SN = cast<StoreSDNode>(Op);
659 SDOperand Value = SN->getValue();
660 MVT::ValueType VT = Value.getValueType();
661 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
662 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
663 SDOperand the_chain = SN->getChain();
Chris Lattner4d321c52007-12-05 18:32:18 +0000664 //unsigned alignment = SN->getAlignment();
665 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel266bc8f2007-12-04 22:23:35 +0000666
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 SDOperand basep = SN->getBasePtr();
670 SDOperand ptrOp;
671 int offset;
672
673 if (basep.getOpcode() == ISD::ADD) {
674 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
675 assert(CN != NULL
676 && "LowerSTORE: ISD::ADD operand 1 is not constant");
677 offset = unsigned(CN->getValue());
678 ptrOp = basep.getOperand(0);
679 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
680 << offset
681 << "\n");
682 } else {
683 ptrOp = basep;
684 offset = 0;
685 }
686
687 // The vector type we really want to load from the 16-byte chunk, except
688 // in the case of MVT::i1, which has to be v16i8.
689 unsigned vecVT, stVecVT;
690
691 if (StVT != MVT::i1)
692 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
693 else
694 stVecVT = MVT::v16i8;
695 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
696
697 // Realign the pointer as a D-Form address (ptrOp is the pointer,
698 // to force a register load with the address; basep is the actual
699 // dform addr offs($reg).
700 ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
701 DAG.getConstant(0, PtrVT));
702 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
703 ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
704
705 // Create the 16-byte aligned vector load
706 SDOperand alignLoad =
707 DAG.getLoad(vecVT, the_chain, basep,
708 SN->getSrcValue(), SN->getSrcValueOffset(),
709 SN->isVolatile(), 16);
710 the_chain = alignLoad.getValue(1);
711
712 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
713 SDOperand theValue = SN->getValue();
714 SDOperand result;
715
716 if (StVT != VT
717 && (theValue.getOpcode() == ISD::AssertZext
718 || theValue.getOpcode() == ISD::AssertSext)) {
719 // Drill down and get the value for zero- and sign-extended
720 // quantities
721 theValue = theValue.getOperand(0);
722 }
723
724 SDOperand insertEltOp =
725 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
726 DAG.getNode(SPUISD::DFormAddr, PtrVT,
727 ptrOp,
728 DAG.getConstant((offset & 0xf), PtrVT)));
729
730 result = DAG.getNode(SPUISD::SHUFB, vecVT,
731 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
732 alignLoad,
733 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
734
735 result = DAG.getStore(the_chain, result, basep,
736 LN->getSrcValue(), LN->getSrcValueOffset(),
737 LN->isVolatile(), LN->getAlignment());
738
739 return result;
740 /*UNREACHED*/
741 }
742 case ISD::PRE_INC:
743 case ISD::PRE_DEC:
744 case ISD::POST_INC:
745 case ISD::POST_DEC:
746 case ISD::LAST_INDEXED_MODE:
747 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
748 "UNINDEXED\n";
749 cerr << (unsigned) SN->getAddressingMode() << "\n";
750 abort();
751 /*NOTREACHED*/
752 }
753
754 return SDOperand();
755}
756
757/// Generate the address of a constant pool entry.
758static SDOperand
759LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
760 MVT::ValueType PtrVT = Op.getValueType();
761 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
762 Constant *C = CP->getConstVal();
763 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
764 const TargetMachine &TM = DAG.getTarget();
765 SDOperand Zero = DAG.getConstant(0, PtrVT);
766
767 if (TM.getRelocationModel() == Reloc::Static) {
768 if (!ST->usingLargeMem()) {
769 // Just return the SDOperand with the constant pool address in it.
770 return CPI;
771 } else {
772 // Generate hi/lo address pair
773 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
774 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
775
776 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
777 }
778 }
779
780 assert(0 &&
781 "LowerConstantPool: Relocation model other than static not supported.");
782 return SDOperand();
783}
784
785static SDOperand
786LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
787 MVT::ValueType PtrVT = Op.getValueType();
788 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
789 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
790 SDOperand Zero = DAG.getConstant(0, PtrVT);
791 const TargetMachine &TM = DAG.getTarget();
792
793 if (TM.getRelocationModel() == Reloc::Static) {
794 if (!ST->usingLargeMem()) {
795 // Just return the SDOperand with the jump table address in it.
796 return JTI;
797 } else {
798 // Generate hi/lo address pair
799 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
800 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
801
802 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
803 }
804 }
805
806 assert(0 &&
807 "LowerJumpTable: Relocation model other than static not supported.");
808 return SDOperand();
809}
810
811static SDOperand
812LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
813 MVT::ValueType PtrVT = Op.getValueType();
814 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
815 GlobalValue *GV = GSDN->getGlobal();
816 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
817 SDOperand Zero = DAG.getConstant(0, PtrVT);
818 const TargetMachine &TM = DAG.getTarget();
819
820 if (TM.getRelocationModel() == Reloc::Static) {
821 if (!ST->usingLargeMem()) {
822 // Generate a local store address
823 return GA;
824 } else {
825 // Generate hi/lo address pair
826 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
827 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
828
829 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
830 }
831 } else {
832 cerr << "LowerGlobalAddress: Relocation model other than static not "
833 << "supported.\n";
834 abort();
835 /*NOTREACHED*/
836 }
837
838 return SDOperand();
839}
840
841//! Custom lower i64 integer constants
842/*!
843 This code inserts all of the necessary juggling that needs to occur to load
844 a 64-bit constant into a register.
845 */
846static SDOperand
847LowerConstant(SDOperand Op, SelectionDAG &DAG) {
848 unsigned VT = Op.getValueType();
849 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
850
851 if (VT == MVT::i64) {
852 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
853 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
854 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
855
856 } else {
857 cerr << "LowerConstant: unhandled constant type "
858 << MVT::getValueTypeString(VT)
859 << "\n";
860 abort();
861 /*NOTREACHED*/
862 }
863
864 return SDOperand();
865}
866
867//! Custom lower single precision floating point constants
868/*!
869 "float" immediates can be lowered as if they were unsigned 32-bit integers.
870 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
871 target description.
872 */
873static SDOperand
874LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
875 unsigned VT = Op.getValueType();
876 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
877
878 assert((FP != 0) &&
879 "LowerConstantFP: Node is not ConstantFPSDNode");
880
881 const APFloat &apf = FP->getValueAPF();
882
883 if (VT == MVT::f32) {
884 return DAG.getNode(SPUISD::SFPConstant, VT,
885 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
886 } else if (VT == MVT::f64) {
887 uint64_t dbits = DoubleToBits(apf.convertToDouble());
888 return DAG.getNode(ISD::BIT_CONVERT, VT,
889 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
890 }
891
892 return SDOperand();
893}
894
895static SDOperand
896LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
897{
898 MachineFunction &MF = DAG.getMachineFunction();
899 MachineFrameInfo *MFI = MF.getFrameInfo();
900 SSARegMap *RegMap = MF.getSSARegMap();
901 SmallVector<SDOperand, 8> ArgValues;
902 SDOperand Root = Op.getOperand(0);
903 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
904
905 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
906 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
907
908 unsigned ArgOffset = SPUFrameInfo::minStackSize();
909 unsigned ArgRegIdx = 0;
910 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
911
912 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
913
914 // Add DAG nodes to load the arguments or copy them out of registers.
915 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
916 SDOperand ArgVal;
917 bool needsLoad = false;
918 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
919 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
920
921 switch (ObjectVT) {
922 default: {
923 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
924 << MVT::getValueTypeString(ObjectVT)
925 << "\n";
926 abort();
927 }
928 case MVT::i8:
929 if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel504c3692007-12-17 22:32:34 +0000930 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel266bc8f2007-12-04 22:23:35 +0000931 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
932 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
933 ++ArgRegIdx;
934 } else {
935 needsLoad = true;
936 }
937 break;
938 case MVT::i16:
939 if (!isVarArg && ArgRegIdx < NumArgRegs) {
940 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
941 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
942 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
943 ++ArgRegIdx;
944 } else {
945 needsLoad = true;
946 }
947 break;
948 case MVT::i32:
949 if (!isVarArg && ArgRegIdx < NumArgRegs) {
950 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
951 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
952 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
953 ++ArgRegIdx;
954 } else {
955 needsLoad = true;
956 }
957 break;
958 case MVT::i64:
959 if (!isVarArg && ArgRegIdx < NumArgRegs) {
960 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
961 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
962 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
963 ++ArgRegIdx;
964 } else {
965 needsLoad = true;
966 }
967 break;
968 case MVT::f32:
969 if (!isVarArg && ArgRegIdx < NumArgRegs) {
970 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
971 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
972 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
973 ++ArgRegIdx;
974 } else {
975 needsLoad = true;
976 }
977 break;
978 case MVT::f64:
979 if (!isVarArg && ArgRegIdx < NumArgRegs) {
980 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
981 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
982 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
983 ++ArgRegIdx;
984 } else {
985 needsLoad = true;
986 }
987 break;
988 case MVT::v2f64:
989 case MVT::v4f32:
990 case MVT::v4i32:
991 case MVT::v8i16:
992 case MVT::v16i8:
993 if (!isVarArg && ArgRegIdx < NumArgRegs) {
994 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
995 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
996 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
997 ++ArgRegIdx;
998 } else {
999 needsLoad = true;
1000 }
1001 break;
1002 }
1003
1004 // We need to load the argument to a virtual register if we determined above
1005 // that we ran out of physical registers of the appropriate type
1006 if (needsLoad) {
1007 // If the argument is actually used, emit a load from the right stack
1008 // slot.
1009 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1010 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1011 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1012 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1013 } else {
1014 // Don't emit a dead load.
1015 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1016 }
1017
1018 ArgOffset += StackSlotSize;
1019 }
1020
1021 ArgValues.push_back(ArgVal);
1022 }
1023
1024 // If the function takes variable number of arguments, make a frame index for
1025 // the start of the first vararg value... for expansion of llvm.va_start.
1026 if (isVarArg) {
1027 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1028 ArgOffset);
1029 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1030 // If this function is vararg, store any remaining integer argument regs to
1031 // their spots on the stack so that they may be loaded by deferencing the
1032 // result of va_next.
1033 SmallVector<SDOperand, 8> MemOps;
1034 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1035 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1036 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1037 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1038 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1039 MemOps.push_back(Store);
1040 // Increment the address by four for the next argument to store
1041 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1042 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1043 }
1044 if (!MemOps.empty())
1045 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1046 }
1047
1048 ArgValues.push_back(Root);
1049
1050 // Return the new list of results.
1051 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1052 Op.Val->value_end());
1053 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1054}
1055
1056/// isLSAAddress - Return the immediate to use if the specified
1057/// value is representable as a LSA address.
1058static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1059 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1060 if (!C) return 0;
1061
1062 int Addr = C->getValue();
1063 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1064 (Addr << 14 >> 14) != Addr)
1065 return 0; // Top 14 bits have to be sext of immediate.
1066
1067 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1068}
1069
1070static
1071SDOperand
1072LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1073 SDOperand Chain = Op.getOperand(0);
1074#if 0
1075 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1076 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1077#endif
1078 SDOperand Callee = Op.getOperand(4);
1079 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1080 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1081 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1082 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1083
1084 // Handy pointer type
1085 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1086
1087 // Accumulate how many bytes are to be pushed on the stack, including the
1088 // linkage area, and parameter passing area. According to the SPU ABI,
1089 // we minimally need space for [LR] and [SP]
1090 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1091
1092 // Set up a copy of the stack pointer for use loading and storing any
1093 // arguments that may not fit in the registers available for argument
1094 // passing.
1095 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1096
1097 // Figure out which arguments are going to go in registers, and which in
1098 // memory.
1099 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1100 unsigned ArgRegIdx = 0;
1101
1102 // Keep track of registers passing arguments
1103 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1104 // And the arguments passed on the stack
1105 SmallVector<SDOperand, 8> MemOpChains;
1106
1107 for (unsigned i = 0; i != NumOps; ++i) {
1108 SDOperand Arg = Op.getOperand(5+2*i);
1109
1110 // PtrOff will be used to store the current argument to the stack if a
1111 // register cannot be found for it.
1112 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1113 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1114
1115 switch (Arg.getValueType()) {
1116 default: assert(0 && "Unexpected ValueType for argument!");
1117 case MVT::i32:
1118 case MVT::i64:
1119 case MVT::i128:
1120 if (ArgRegIdx != NumArgRegs) {
1121 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 } else {
1123 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1124 ArgOffset += StackSlotSize;
1125 }
1126 break;
1127 case MVT::f32:
1128 case MVT::f64:
1129 if (ArgRegIdx != NumArgRegs) {
1130 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1131 } else {
1132 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1133 ArgOffset += StackSlotSize;
1134 }
1135 break;
1136 case MVT::v4f32:
1137 case MVT::v4i32:
1138 case MVT::v8i16:
1139 case MVT::v16i8:
1140 if (ArgRegIdx != NumArgRegs) {
1141 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1142 } else {
1143 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1144 ArgOffset += StackSlotSize;
1145 }
1146 break;
1147 }
1148 }
1149
1150 // Update number of stack bytes actually used, insert a call sequence start
1151 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1152 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1153
1154 if (!MemOpChains.empty()) {
1155 // Adjust the stack pointer for the stack arguments.
1156 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1157 &MemOpChains[0], MemOpChains.size());
1158 }
1159
1160 // Build a sequence of copy-to-reg nodes chained together with token chain
1161 // and flag operands which copy the outgoing args into the appropriate regs.
1162 SDOperand InFlag;
1163 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1164 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1165 InFlag);
1166 InFlag = Chain.getValue(1);
1167 }
1168
1169 std::vector<MVT::ValueType> NodeTys;
1170 NodeTys.push_back(MVT::Other); // Returns a chain
1171 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1172
1173 SmallVector<SDOperand, 8> Ops;
1174 unsigned CallOpc = SPUISD::CALL;
1175
1176 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1177 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1178 // node so that legalize doesn't hack it.
1179 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1180 GlobalValue *GV = G->getGlobal();
1181 unsigned CalleeVT = Callee.getValueType();
1182
1183 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1184 // style calls, otherwise, external symbols are BRASL calls.
1185 // NOTE:
1186 // This may be an unsafe assumption for JIT and really large compilation
1187 // units.
1188 if (GV->isDeclaration()) {
1189 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1190 } else {
1191 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1192 DAG.getTargetGlobalAddress(GV, CalleeVT),
1193 DAG.getConstant(0, PtrVT));
1194 }
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1196 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1197 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1198 // If this is an absolute destination address that appears to be a legal
1199 // local store address, use the munged value.
1200 Callee = SDOperand(Dest, 0);
1201
1202 Ops.push_back(Chain);
1203 Ops.push_back(Callee);
1204
1205 // Add argument registers to the end of the list so that they are known live
1206 // into the call.
1207 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1208 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1209 RegsToPass[i].second.getValueType()));
1210
1211 if (InFlag.Val)
1212 Ops.push_back(InFlag);
1213 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1214 InFlag = Chain.getValue(1);
1215
1216 SDOperand ResultVals[3];
1217 unsigned NumResults = 0;
1218 NodeTys.clear();
1219
1220 // If the call has results, copy the values out of the ret val registers.
1221 switch (Op.Val->getValueType(0)) {
1222 default: assert(0 && "Unexpected ret value!");
1223 case MVT::Other: break;
1224 case MVT::i32:
1225 if (Op.Val->getValueType(1) == MVT::i32) {
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1227 ResultVals[0] = Chain.getValue(0);
1228 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1229 Chain.getValue(2)).getValue(1);
1230 ResultVals[1] = Chain.getValue(0);
1231 NumResults = 2;
1232 NodeTys.push_back(MVT::i32);
1233 } else {
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1236 NumResults = 1;
1237 }
1238 NodeTys.push_back(MVT::i32);
1239 break;
1240 case MVT::i64:
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1243 NumResults = 1;
1244 NodeTys.push_back(MVT::i64);
1245 break;
1246 case MVT::f32:
1247 case MVT::f64:
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1249 InFlag).getValue(1);
1250 ResultVals[0] = Chain.getValue(0);
1251 NumResults = 1;
1252 NodeTys.push_back(Op.Val->getValueType(0));
1253 break;
1254 case MVT::v2f64:
1255 case MVT::v4f32:
1256 case MVT::v4i32:
1257 case MVT::v8i16:
1258 case MVT::v16i8:
1259 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1260 InFlag).getValue(1);
1261 ResultVals[0] = Chain.getValue(0);
1262 NumResults = 1;
1263 NodeTys.push_back(Op.Val->getValueType(0));
1264 break;
1265 }
1266
1267 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1268 DAG.getConstant(NumStackBytes, PtrVT));
1269 NodeTys.push_back(MVT::Other);
1270
1271 // If the function returns void, just return the chain.
1272 if (NumResults == 0)
1273 return Chain;
1274
1275 // Otherwise, merge everything together with a MERGE_VALUES node.
1276 ResultVals[NumResults++] = Chain;
1277 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1278 ResultVals, NumResults);
1279 return Res.getValue(Op.ResNo);
1280}
1281
1282static SDOperand
1283LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1284 SmallVector<CCValAssign, 16> RVLocs;
1285 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1286 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1287 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1288 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1289
1290 // If this is the first return lowered for this function, add the regs to the
1291 // liveout set for the function.
1292 if (DAG.getMachineFunction().liveout_empty()) {
1293 for (unsigned i = 0; i != RVLocs.size(); ++i)
1294 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1295 }
1296
1297 SDOperand Chain = Op.getOperand(0);
1298 SDOperand Flag;
1299
1300 // Copy the result values into the output registers.
1301 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1302 CCValAssign &VA = RVLocs[i];
1303 assert(VA.isRegLoc() && "Can only return in registers!");
1304 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1305 Flag = Chain.getValue(1);
1306 }
1307
1308 if (Flag.Val)
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1310 else
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1312}
1313
1314
1315//===----------------------------------------------------------------------===//
1316// Vector related lowering:
1317//===----------------------------------------------------------------------===//
1318
1319static ConstantSDNode *
1320getVecImm(SDNode *N) {
1321 SDOperand OpVal(0, 0);
1322
1323 // Check to see if this buildvec has a single non-undef value in its elements.
1324 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1325 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1326 if (OpVal.Val == 0)
1327 OpVal = N->getOperand(i);
1328 else if (OpVal != N->getOperand(i))
1329 return 0;
1330 }
1331
1332 if (OpVal.Val != 0) {
1333 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1334 return CN;
1335 }
1336 }
1337
1338 return 0; // All UNDEF: use implicit def.; not Constant node
1339}
1340
1341/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1342/// and the value fits into an unsigned 18-bit constant, and if so, return the
1343/// constant
1344SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1345 MVT::ValueType ValueType) {
1346 if (ConstantSDNode *CN = getVecImm(N)) {
1347 uint64_t Value = CN->getValue();
1348 if (Value <= 0x3ffff)
1349 return DAG.getConstant(Value, ValueType);
1350 }
1351
1352 return SDOperand();
1353}
1354
1355/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1356/// and the value fits into a signed 16-bit constant, and if so, return the
1357/// constant
1358SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1359 MVT::ValueType ValueType) {
1360 if (ConstantSDNode *CN = getVecImm(N)) {
1361 if (ValueType == MVT::i32) {
1362 int Value = (int) CN->getValue();
1363 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1364
1365 if (Value == SExtValue)
1366 return DAG.getConstant(Value, ValueType);
1367 } else if (ValueType == MVT::i16) {
1368 short Value = (short) CN->getValue();
1369 int SExtValue = ((int) Value << 16) >> 16;
1370
1371 if (Value == (short) SExtValue)
1372 return DAG.getConstant(Value, ValueType);
1373 } else if (ValueType == MVT::i64) {
1374 int64_t Value = CN->getValue();
1375 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1376
1377 if (Value == SExtValue)
1378 return DAG.getConstant(Value, ValueType);
1379 }
1380 }
1381
1382 return SDOperand();
1383}
1384
1385/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1386/// and the value fits into a signed 10-bit constant, and if so, return the
1387/// constant
1388SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1389 MVT::ValueType ValueType) {
1390 if (ConstantSDNode *CN = getVecImm(N)) {
1391 int Value = (int) CN->getValue();
1392 if ((ValueType == MVT::i32 && isS10Constant(Value))
1393 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1394 return DAG.getConstant(Value, ValueType);
1395 }
1396
1397 return SDOperand();
1398}
1399
1400/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1401/// and the value fits into a signed 8-bit constant, and if so, return the
1402/// constant.
1403///
1404/// @note: The incoming vector is v16i8 because that's the only way we can load
1405/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1406/// same value.
1407SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1408 MVT::ValueType ValueType) {
1409 if (ConstantSDNode *CN = getVecImm(N)) {
1410 int Value = (int) CN->getValue();
1411 if (ValueType == MVT::i16
1412 && Value <= 0xffff /* truncated from uint64_t */
1413 && ((short) Value >> 8) == ((short) Value & 0xff))
1414 return DAG.getConstant(Value & 0xff, ValueType);
1415 else if (ValueType == MVT::i8
1416 && (Value & 0xff) == Value)
1417 return DAG.getConstant(Value, ValueType);
1418 }
1419
1420 return SDOperand();
1421}
1422
1423/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1424/// and the value fits into a signed 16-bit constant, and if so, return the
1425/// constant
1426SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1427 MVT::ValueType ValueType) {
1428 if (ConstantSDNode *CN = getVecImm(N)) {
1429 uint64_t Value = CN->getValue();
1430 if ((ValueType == MVT::i32
1431 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1432 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1433 return DAG.getConstant(Value >> 16, ValueType);
1434 }
1435
1436 return SDOperand();
1437}
1438
1439/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1440SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1441 if (ConstantSDNode *CN = getVecImm(N)) {
1442 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1443 }
1444
1445 return SDOperand();
1446}
1447
1448/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1449SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1450 if (ConstantSDNode *CN = getVecImm(N)) {
1451 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1452 }
1453
1454 return SDOperand();
1455}
1456
1457// If this is a vector of constants or undefs, get the bits. A bit in
1458// UndefBits is set if the corresponding element of the vector is an
1459// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1460// zero. Return true if this is not an array of constants, false if it is.
1461//
1462static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1463 uint64_t UndefBits[2]) {
1464 // Start with zero'd results.
1465 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1466
1467 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1468 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1469 SDOperand OpVal = BV->getOperand(i);
1470
1471 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1472 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1473
1474 uint64_t EltBits = 0;
1475 if (OpVal.getOpcode() == ISD::UNDEF) {
1476 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1477 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1478 continue;
1479 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1480 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1481 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1482 const APFloat &apf = CN->getValueAPF();
1483 EltBits = (CN->getValueType(0) == MVT::f32
1484 ? FloatToBits(apf.convertToFloat())
1485 : DoubleToBits(apf.convertToDouble()));
1486 } else {
1487 // Nonconstant element.
1488 return true;
1489 }
1490
1491 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1492 }
1493
1494 //printf("%llx %llx %llx %llx\n",
1495 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1496 return false;
1497}
1498
1499/// If this is a splat (repetition) of a value across the whole vector, return
1500/// the smallest size that splats it. For example, "0x01010101010101..." is a
1501/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1502/// SplatSize = 1 byte.
1503static bool isConstantSplat(const uint64_t Bits128[2],
1504 const uint64_t Undef128[2],
1505 int MinSplatBits,
1506 uint64_t &SplatBits, uint64_t &SplatUndef,
1507 int &SplatSize) {
1508 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1509 // the same as the lower 64-bits, ignoring undefs.
1510 uint64_t Bits64 = Bits128[0] | Bits128[1];
1511 uint64_t Undef64 = Undef128[0] & Undef128[1];
1512 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1513 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1514 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1515 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1516
1517 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1518 if (MinSplatBits < 64) {
1519
1520 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1521 // undefs.
1522 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1523 if (MinSplatBits < 32) {
1524
1525 // If the top 16-bits are different than the lower 16-bits, ignoring
1526 // undefs, we have an i32 splat.
1527 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1528 if (MinSplatBits < 16) {
1529 // If the top 8-bits are different than the lower 8-bits, ignoring
1530 // undefs, we have an i16 splat.
1531 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1532 // Otherwise, we have an 8-bit splat.
1533 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1534 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1535 SplatSize = 1;
1536 return true;
1537 }
1538 } else {
1539 SplatBits = Bits16;
1540 SplatUndef = Undef16;
1541 SplatSize = 2;
1542 return true;
1543 }
1544 }
1545 } else {
1546 SplatBits = Bits32;
1547 SplatUndef = Undef32;
1548 SplatSize = 4;
1549 return true;
1550 }
1551 }
1552 } else {
1553 SplatBits = Bits128[0];
1554 SplatUndef = Undef128[0];
1555 SplatSize = 8;
1556 return true;
1557 }
1558 }
1559
1560 return false; // Can't be a splat if two pieces don't match.
1561}
1562
1563// If this is a case we can't handle, return null and let the default
1564// expansion code take care of it. If we CAN select this case, and if it
1565// selects to a single instruction, return Op. Otherwise, if we can codegen
1566// this case more efficiently than a constant pool load, lower it to the
1567// sequence of ops that should be used.
1568static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1569 MVT::ValueType VT = Op.getValueType();
1570 // If this is a vector of constants or undefs, get the bits. A bit in
1571 // UndefBits is set if the corresponding element of the vector is an
1572 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1573 // zero.
1574 uint64_t VectorBits[2];
1575 uint64_t UndefBits[2];
1576 uint64_t SplatBits, SplatUndef;
1577 int SplatSize;
1578 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1579 || !isConstantSplat(VectorBits, UndefBits,
1580 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1581 SplatBits, SplatUndef, SplatSize))
1582 return SDOperand(); // Not a constant vector, not a splat.
1583
1584 switch (VT) {
1585 default:
1586 case MVT::v4f32: {
1587 uint32_t Value32 = SplatBits;
1588 assert(SplatSize == 4
1589 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1590 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1592 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1593 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1594 break;
1595 }
1596 case MVT::v2f64: {
1597 uint64_t f64val = SplatBits;
1598 assert(SplatSize == 8
1599 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1600 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1601 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1602 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1603 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1604 break;
1605 }
1606 case MVT::v16i8: {
1607 // 8-bit constants have to be expanded to 16-bits
1608 unsigned short Value16 = SplatBits | (SplatBits << 8);
1609 SDOperand Ops[8];
1610 for (int i = 0; i < 8; ++i)
1611 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1612 return DAG.getNode(ISD::BIT_CONVERT, VT,
1613 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1614 }
1615 case MVT::v8i16: {
1616 unsigned short Value16;
1617 if (SplatSize == 2)
1618 Value16 = (unsigned short) (SplatBits & 0xffff);
1619 else
1620 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1621 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1622 SDOperand Ops[8];
1623 for (int i = 0; i < 8; ++i) Ops[i] = T;
1624 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1625 }
1626 case MVT::v4i32: {
1627 unsigned int Value = SplatBits;
1628 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1630 }
1631 case MVT::v2i64: {
1632 uint64_t val = SplatBits;
1633 uint32_t upper = uint32_t(val >> 32);
1634 uint32_t lower = uint32_t(val);
1635
1636 if (val != 0) {
1637 SDOperand LO32;
1638 SDOperand HI32;
1639 SmallVector<SDOperand, 16> ShufBytes;
1640 SDOperand Result;
1641 bool upper_special, lower_special;
1642
1643 // NOTE: This code creates common-case shuffle masks that can be easily
1644 // detected as common expressions. It is not attempting to create highly
1645 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1646
1647 // Detect if the upper or lower half is a special shuffle mask pattern:
1648 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1649 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1650
1651 // Create lower vector if not a special pattern
1652 if (!lower_special) {
1653 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1654 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1655 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1656 LO32C, LO32C, LO32C, LO32C));
1657 }
1658
1659 // Create upper vector if not a special pattern
1660 if (!upper_special) {
1661 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1662 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1663 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1664 HI32C, HI32C, HI32C, HI32C));
1665 }
1666
1667 // If either upper or lower are special, then the two input operands are
1668 // the same (basically, one of them is a "don't care")
1669 if (lower_special)
1670 LO32 = HI32;
1671 if (upper_special)
1672 HI32 = LO32;
1673 if (lower_special && upper_special) {
1674 // Unhappy situation... both upper and lower are special, so punt with
1675 // a target constant:
1676 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1677 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1678 Zero, Zero);
1679 }
1680
1681 for (int i = 0; i < 4; ++i) {
1682 for (int j = 0; j < 4; ++j) {
1683 SDOperand V;
1684 bool process_upper, process_lower;
1685 uint64_t val;
1686
1687 process_upper = (upper_special && (i & 1) == 0);
1688 process_lower = (lower_special && (i & 1) == 1);
1689
1690 if (process_upper || process_lower) {
1691 if ((process_upper && upper == 0)
1692 || (process_lower && lower == 0))
1693 val = 0x80;
1694 else if ((process_upper && upper == 0xffffffff)
1695 || (process_lower && lower == 0xffffffff))
1696 val = 0xc0;
1697 else if ((process_upper && upper == 0x80000000)
1698 || (process_lower && lower == 0x80000000))
1699 val = (j == 0 ? 0xe0 : 0x80);
1700 } else
1701 val = i * 4 + j + ((i & 1) * 16);
1702
1703 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1704 }
1705 }
1706
1707 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1708 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1709 &ShufBytes[0], ShufBytes.size()));
1710 } else {
1711 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1712 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1713 return DAG.getNode(ISD::BIT_CONVERT, VT,
1714 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1715 Zero, Zero, Zero, Zero));
1716 }
1717 }
1718 }
1719
1720 return SDOperand();
1721}
1722
1723/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1724/// which the Cell can operate. The code inspects V3 to ascertain whether the
1725/// permutation vector, V3, is monotonically increasing with one "exception"
1726/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1727/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1728/// In either case, the net result is going to eventually invoke SHUFB to
1729/// permute/shuffle the bytes from V1 and V2.
1730/// \note
1731/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1732/// control word for byte/halfword/word insertion. This takes care of a single
1733/// element move from V2 into V1.
1734/// \note
1735/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1736static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1737 SDOperand V1 = Op.getOperand(0);
1738 SDOperand V2 = Op.getOperand(1);
1739 SDOperand PermMask = Op.getOperand(2);
1740
1741 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1742
1743 // If we have a single element being moved from V1 to V2, this can be handled
1744 // using the C*[DX] compute mask instructions, but the vector elements have
1745 // to be monotonically increasing with one exception element.
1746 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1747 unsigned EltsFromV2 = 0;
1748 unsigned V2Elt = 0;
1749 unsigned V2EltIdx0 = 0;
1750 unsigned CurrElt = 0;
1751 bool monotonic = true;
1752 if (EltVT == MVT::i8)
1753 V2EltIdx0 = 16;
1754 else if (EltVT == MVT::i16)
1755 V2EltIdx0 = 8;
1756 else if (EltVT == MVT::i32)
1757 V2EltIdx0 = 4;
1758 else
1759 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1760
1761 for (unsigned i = 0, e = PermMask.getNumOperands();
1762 EltsFromV2 <= 1 && monotonic && i != e;
1763 ++i) {
1764 unsigned SrcElt;
1765 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1766 SrcElt = 0;
1767 else
1768 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1769
1770 if (SrcElt >= V2EltIdx0) {
1771 ++EltsFromV2;
1772 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1773 } else if (CurrElt != SrcElt) {
1774 monotonic = false;
1775 }
1776
1777 ++CurrElt;
1778 }
1779
1780 if (EltsFromV2 == 1 && monotonic) {
1781 // Compute mask and shuffle
1782 MachineFunction &MF = DAG.getMachineFunction();
1783 SSARegMap *RegMap = MF.getSSARegMap();
1784 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1785 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1786 // Initialize temporary register to 0
1787 SDOperand InitTempReg =
1788 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1789 // Copy register's contents as index in INSERT_MASK:
1790 SDOperand ShufMaskOp =
1791 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1792 DAG.getTargetConstant(V2Elt, MVT::i32),
1793 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1794 // Use shuffle mask in SHUFB synthetic instruction:
1795 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1796 } else {
1797 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1798 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1799
1800 SmallVector<SDOperand, 16> ResultMask;
1801 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1802 unsigned SrcElt;
1803 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1804 SrcElt = 0;
1805 else
1806 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1807
1808 for (unsigned j = 0; j != BytesPerElement; ++j) {
1809 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1810 MVT::i8));
1811 }
1812 }
1813
1814 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1815 &ResultMask[0], ResultMask.size());
1816 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1817 }
1818}
1819
1820static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1821 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1822
1823 if (Op0.Val->getOpcode() == ISD::Constant) {
1824 // For a constant, build the appropriate constant vector, which will
1825 // eventually simplify to a vector register load.
1826
1827 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1828 SmallVector<SDOperand, 16> ConstVecValues;
1829 MVT::ValueType VT;
1830 size_t n_copies;
1831
1832 // Create a constant vector:
1833 switch (Op.getValueType()) {
1834 default: assert(0 && "Unexpected constant value type in "
1835 "LowerSCALAR_TO_VECTOR");
1836 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1837 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1838 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1839 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1840 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1841 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1842 }
1843
1844 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1845 for (size_t j = 0; j < n_copies; ++j)
1846 ConstVecValues.push_back(CValue);
1847
1848 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1849 &ConstVecValues[0], ConstVecValues.size());
1850 } else {
1851 // Otherwise, copy the value from one register to another:
1852 switch (Op0.getValueType()) {
1853 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1854 case MVT::i8:
1855 case MVT::i16:
1856 case MVT::i32:
1857 case MVT::i64:
1858 case MVT::f32:
1859 case MVT::f64:
1860 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1861 }
1862 }
1863
1864 return SDOperand();
1865}
1866
1867static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1868 switch (Op.getValueType()) {
1869 case MVT::v4i32: {
1870 SDOperand rA = Op.getOperand(0);
1871 SDOperand rB = Op.getOperand(1);
1872 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1873 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1874 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1875 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1876
1877 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1878 break;
1879 }
1880
1881 // Multiply two v8i16 vectors (pipeline friendly version):
1882 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1883 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1884 // c) Use SELB to select upper and lower halves from the intermediate results
1885 //
1886 // NOTE: We really want to move the FSMBI to earlier to actually get the
1887 // dual-issue. This code does manage to do this, even if it's a little on
1888 // the wacky side
1889 case MVT::v8i16: {
1890 MachineFunction &MF = DAG.getMachineFunction();
1891 SSARegMap *RegMap = MF.getSSARegMap();
1892 SDOperand Chain = Op.getOperand(0);
1893 SDOperand rA = Op.getOperand(0);
1894 SDOperand rB = Op.getOperand(1);
1895 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1896 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1897
1898 SDOperand FSMBOp =
1899 DAG.getCopyToReg(Chain, FSMBIreg,
1900 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1901 DAG.getConstant(0xcccc, MVT::i32)));
1902
1903 SDOperand HHProd =
1904 DAG.getCopyToReg(FSMBOp, HiProdReg,
1905 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1906
1907 SDOperand HHProd_v4i32 =
1908 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1909 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1910
1911 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1912 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1913 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1914 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1915 HHProd_v4i32,
1916 DAG.getConstant(16, MVT::i16))),
1917 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1918 }
1919
1920 // This M00sE is N@stI! (apologies to Monty Python)
1921 //
1922 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1923 // is to break it all apart, sign extend, and reassemble the various
1924 // intermediate products.
1925 case MVT::v16i8: {
1926 MachineFunction &MF = DAG.getMachineFunction();
1927 SSARegMap *RegMap = MF.getSSARegMap();
1928 SDOperand Chain = Op.getOperand(0);
1929 SDOperand rA = Op.getOperand(0);
1930 SDOperand rB = Op.getOperand(1);
1931 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1932 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1933
1934 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1935 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1936 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1937
1938 SDOperand LLProd =
1939 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1940 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1941 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1942
1943 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1944
1945 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1946
1947 SDOperand LHProd =
1948 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1949 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1950
1951 SDOperand FSMBdef_2222 =
1952 DAG.getCopyToReg(Chain, FSMBreg_2222,
1953 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1954 DAG.getConstant(0x2222, MVT::i32)));
1955
1956 SDOperand FSMBuse_2222 =
1957 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1958
1959 SDOperand LoProd_1 =
1960 DAG.getCopyToReg(Chain, LoProd_reg,
1961 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1962 FSMBuse_2222));
1963
1964 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1965
1966 SDOperand LoProd =
1967 DAG.getNode(ISD::AND, MVT::v4i32,
1968 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1969 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1970 LoProdMask, LoProdMask,
1971 LoProdMask, LoProdMask));
1972
1973 SDOperand rAH =
1974 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1975 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1976
1977 SDOperand rBH =
1978 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1980
1981 SDOperand HLProd =
1982 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1985
1986 SDOperand HHProd_1 =
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1992
1993 SDOperand HHProd =
1994 DAG.getCopyToReg(Chain, HiProd_reg,
1995 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1996 HLProd,
1997 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1998 FSMBuse_2222));
1999
2000 SDOperand HiProd =
2001 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2002 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2003
2004 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2005 DAG.getNode(ISD::OR, MVT::v4i32,
2006 LoProd, HiProd));
2007 }
2008
2009 default:
2010 cerr << "CellSPU: Unknown vector multiplication, got "
2011 << MVT::getValueTypeString(Op.getValueType())
2012 << "\n";
2013 abort();
2014 /*NOTREACHED*/
2015 }
2016
2017 return SDOperand();
2018}
2019
2020static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2021 MachineFunction &MF = DAG.getMachineFunction();
2022 SSARegMap *RegMap = MF.getSSARegMap();
2023
2024 SDOperand A = Op.getOperand(0);
2025 SDOperand B = Op.getOperand(1);
2026 unsigned VT = Op.getValueType();
2027
2028 unsigned VRegBR, VRegC;
2029
2030 if (VT == MVT::f32) {
2031 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2032 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2033 } else {
2034 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2035 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2036 }
2037 // TODO: make sure we're feeding FPInterp the right arguments
2038 // Right now: fi B, frest(B)
2039
2040 // Computes BRcpl =
2041 // (Floating Interpolate (FP Reciprocal Estimate B))
2042 SDOperand BRcpl =
2043 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2044 DAG.getNode(SPUISD::FPInterp, VT, B,
2045 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2046
2047 // Computes A * BRcpl and stores in a temporary register
2048 SDOperand AxBRcpl =
2049 DAG.getCopyToReg(BRcpl, VRegC,
2050 DAG.getNode(ISD::FMUL, VT, A,
2051 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2052 // What's the Chain variable do? It's magic!
2053 // TODO: set Chain = Op(0).getEntryNode()
2054
2055 return DAG.getNode(ISD::FADD, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2057 DAG.getNode(ISD::FMUL, VT,
2058 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2059 DAG.getNode(ISD::FSUB, VT, A,
2060 DAG.getNode(ISD::FMUL, VT, B,
2061 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2062}
2063
2064// Expands double-precision FDIV
2065// Expects two doubles as inputs X and Y, does a floating point
2066// reciprocal estimate, and three iterations of Newton-Raphson
2067// to increase accuracy.
2068//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2069// MachineFunction &MF = DAG.getMachineFunction();
2070// SSARegMap *RegMap = MF.getSSARegMap();
2071//
2072// SDOperand X = Op.getOperand(0);
2073// SDOperand Y = Op.getOperand(1);
2074//}
2075
2076static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2077 unsigned VT = Op.getValueType();
2078 SDOperand N = Op.getOperand(0);
2079 SDOperand Elt = Op.getOperand(1);
2080 SDOperand ShufMask[16];
2081 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2082
2083 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2084
2085 int EltNo = (int) C->getValue();
2086
2087 // sanity checks:
2088 if (VT == MVT::i8 && EltNo >= 16)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2090 else if (VT == MVT::i16 && EltNo >= 8)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2092 else if (VT == MVT::i32 && EltNo >= 4)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2094 else if (VT == MVT::i64 && EltNo >= 2)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2096
2097 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2098 // i32 and i64: Element 0 is the preferred slot
2099 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2100 }
2101
2102 // Need to generate shuffle mask and extract:
2103 int prefslot_begin, prefslot_end;
2104 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2105
2106 switch (VT) {
2107 case MVT::i8: {
2108 prefslot_begin = prefslot_end = 3;
2109 break;
2110 }
2111 case MVT::i16: {
2112 prefslot_begin = 2; prefslot_end = 3;
2113 break;
2114 }
2115 case MVT::i32: {
2116 prefslot_begin = 0; prefslot_end = 3;
2117 break;
2118 }
2119 case MVT::i64: {
2120 prefslot_begin = 0; prefslot_end = 7;
2121 break;
2122 }
2123 }
2124
2125 for (int i = 0; i < 16; ++i) {
2126 // zero fill uppper part of preferred slot, don't care about the
2127 // other slots:
2128 unsigned int mask_val;
2129
2130 if (i <= prefslot_end) {
2131 mask_val =
2132 ((i < prefslot_begin)
2133 ? 0x80
2134 : elt_byte + (i - prefslot_begin));
2135
2136 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2137 } else
2138 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2139 }
2140
2141 SDOperand ShufMaskVec =
2142 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2143 &ShufMask[0],
2144 sizeof(ShufMask) / sizeof(ShufMask[0]));
2145
2146 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2147 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2148 N, N, ShufMaskVec));
2149
2150}
2151
2152static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2153 SDOperand VecOp = Op.getOperand(0);
2154 SDOperand ValOp = Op.getOperand(1);
2155 SDOperand IdxOp = Op.getOperand(2);
2156 MVT::ValueType VT = Op.getValueType();
2157
2158 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2159 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2160
2161 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2162 // Use $2 because it's always 16-byte aligned and it's available:
2163 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2164
2165 SDOperand result =
2166 DAG.getNode(SPUISD::SHUFB, VT,
2167 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2168 VecOp,
2169 DAG.getNode(SPUISD::INSERT_MASK, VT,
2170 DAG.getNode(ISD::ADD, PtrVT,
2171 PtrBase,
2172 DAG.getConstant(CN->getValue(),
2173 PtrVT))));
2174
2175 return result;
2176}
2177
2178static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2179 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2180
2181 assert(Op.getValueType() == MVT::i8);
2182 switch (Opc) {
2183 default:
2184 assert(0 && "Unhandled i8 math operator");
2185 /*NOTREACHED*/
2186 break;
2187 case ISD::SUB: {
2188 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2189 // the result:
2190 SDOperand N1 = Op.getOperand(1);
2191 N0 = (N0.getOpcode() != ISD::Constant
2192 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2193 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2194 N1 = (N1.getOpcode() != ISD::Constant
2195 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2196 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2197 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2198 DAG.getNode(Opc, MVT::i16, N0, N1));
2199 }
2200 case ISD::ROTR:
2201 case ISD::ROTL: {
2202 SDOperand N1 = Op.getOperand(1);
2203 unsigned N1Opc;
2204 N0 = (N0.getOpcode() != ISD::Constant
2205 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2206 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2207 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2208 N1 = (N1.getOpcode() != ISD::Constant
2209 ? DAG.getNode(N1Opc, MVT::i16, N1)
2210 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2211 SDOperand ExpandArg =
2212 DAG.getNode(ISD::OR, MVT::i16, N0,
2213 DAG.getNode(ISD::SHL, MVT::i16,
2214 N0, DAG.getConstant(8, MVT::i16)));
2215 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2216 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2217 }
2218 case ISD::SRL:
2219 case ISD::SHL: {
2220 SDOperand N1 = Op.getOperand(1);
2221 unsigned N1Opc;
2222 N0 = (N0.getOpcode() != ISD::Constant
2223 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2224 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2225 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2226 N1 = (N1.getOpcode() != ISD::Constant
2227 ? DAG.getNode(N1Opc, MVT::i16, N1)
2228 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2229 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2230 DAG.getNode(Opc, MVT::i16, N0, N1));
2231 }
2232 case ISD::SRA: {
2233 SDOperand N1 = Op.getOperand(1);
2234 unsigned N1Opc;
2235 N0 = (N0.getOpcode() != ISD::Constant
2236 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2237 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2238 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2239 N1 = (N1.getOpcode() != ISD::Constant
2240 ? DAG.getNode(N1Opc, MVT::i16, N1)
2241 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2242 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2243 DAG.getNode(Opc, MVT::i16, N0, N1));
2244 }
2245 case ISD::MUL: {
2246 SDOperand N1 = Op.getOperand(1);
2247 unsigned N1Opc;
2248 N0 = (N0.getOpcode() != ISD::Constant
2249 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2250 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2251 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2252 N1 = (N1.getOpcode() != ISD::Constant
2253 ? DAG.getNode(N1Opc, MVT::i16, N1)
2254 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2255 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2256 DAG.getNode(Opc, MVT::i16, N0, N1));
2257 break;
2258 }
2259 }
2260
2261 return SDOperand();
2262}
2263
2264//! Lower byte immediate operations for v16i8 vectors:
2265static SDOperand
2266LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2267 SDOperand ConstVec;
2268 SDOperand Arg;
2269 MVT::ValueType VT = Op.getValueType();
2270
2271 ConstVec = Op.getOperand(0);
2272 Arg = Op.getOperand(1);
2273 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2274 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2275 ConstVec = ConstVec.getOperand(0);
2276 } else {
2277 ConstVec = Op.getOperand(1);
2278 Arg = Op.getOperand(0);
2279 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2280 ConstVec = ConstVec.getOperand(0);
2281 }
2282 }
2283 }
2284
2285 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2286 uint64_t VectorBits[2];
2287 uint64_t UndefBits[2];
2288 uint64_t SplatBits, SplatUndef;
2289 int SplatSize;
2290
2291 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2292 && isConstantSplat(VectorBits, UndefBits,
2293 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2294 SplatBits, SplatUndef, SplatSize)) {
2295 SDOperand tcVec[16];
2296 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2297 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2298
2299 // Turn the BUILD_VECTOR into a set of target constants:
2300 for (size_t i = 0; i < tcVecSize; ++i)
2301 tcVec[i] = tc;
2302
2303 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2304 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2305 }
2306 }
2307
2308 return SDOperand();
2309}
2310
2311//! Lower i32 multiplication
2312static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2313 unsigned Opc) {
2314 switch (VT) {
2315 default:
2316 cerr << "CellSPU: Unknown LowerMUL value type, got "
2317 << MVT::getValueTypeString(Op.getValueType())
2318 << "\n";
2319 abort();
2320 /*NOTREACHED*/
2321
2322 case MVT::i32: {
2323 SDOperand rA = Op.getOperand(0);
2324 SDOperand rB = Op.getOperand(1);
2325
2326 return DAG.getNode(ISD::ADD, MVT::i32,
2327 DAG.getNode(ISD::ADD, MVT::i32,
2328 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2329 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2330 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2331 }
2332 }
2333
2334 return SDOperand();
2335}
2336
2337//! Custom lowering for CTPOP (count population)
2338/*!
2339 Custom lowering code that counts the number ones in the input
2340 operand. SPU has such an instruction, but it counts the number of
2341 ones per byte, which then have to be accumulated.
2342*/
2343static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2344 unsigned VT = Op.getValueType();
2345 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2346
2347 switch (VT) {
2348 case MVT::i8: {
2349 SDOperand N = Op.getOperand(0);
2350 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2351
2352 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2353 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2354
2355 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2356 }
2357
2358 case MVT::i16: {
2359 MachineFunction &MF = DAG.getMachineFunction();
2360 SSARegMap *RegMap = MF.getSSARegMap();
2361
2362 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2363
2364 SDOperand N = Op.getOperand(0);
2365 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2366 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2367 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2368
2369 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2370 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2371
2372 // CNTB_result becomes the chain to which all of the virtual registers
2373 // CNTB_reg, SUM1_reg become associated:
2374 SDOperand CNTB_result =
2375 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2376
2377 SDOperand CNTB_rescopy =
2378 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2379
2380 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2381
2382 return DAG.getNode(ISD::AND, MVT::i16,
2383 DAG.getNode(ISD::ADD, MVT::i16,
2384 DAG.getNode(ISD::SRL, MVT::i16,
2385 Tmp1, Shift1),
2386 Tmp1),
2387 Mask0);
2388 }
2389
2390 case MVT::i32: {
2391 MachineFunction &MF = DAG.getMachineFunction();
2392 SSARegMap *RegMap = MF.getSSARegMap();
2393
2394 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2395 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2396
2397 SDOperand N = Op.getOperand(0);
2398 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2399 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2400 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2401 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2402
2403 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2404 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2405
2406 // CNTB_result becomes the chain to which all of the virtual registers
2407 // CNTB_reg, SUM1_reg become associated:
2408 SDOperand CNTB_result =
2409 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2410
2411 SDOperand CNTB_rescopy =
2412 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2413
2414 SDOperand Comp1 =
2415 DAG.getNode(ISD::SRL, MVT::i32,
2416 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2417
2418 SDOperand Sum1 =
2419 DAG.getNode(ISD::ADD, MVT::i32,
2420 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2421
2422 SDOperand Sum1_rescopy =
2423 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2424
2425 SDOperand Comp2 =
2426 DAG.getNode(ISD::SRL, MVT::i32,
2427 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2428 Shift2);
2429 SDOperand Sum2 =
2430 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2431 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2432
2433 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2434 }
2435
2436 case MVT::i64:
2437 break;
2438 }
2439
2440 return SDOperand();
2441}
2442
2443/// LowerOperation - Provide custom lowering hooks for some operations.
2444///
2445SDOperand
2446SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2447{
2448 switch (Op.getOpcode()) {
2449 default: {
2450 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2451 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2452 cerr << "*Op.Val:\n";
2453 Op.Val->dump();
2454 abort();
2455 }
2456 case ISD::LOAD:
2457 case ISD::SEXTLOAD:
2458 case ISD::ZEXTLOAD:
2459 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2460 case ISD::STORE:
2461 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2462 case ISD::ConstantPool:
2463 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::GlobalAddress:
2465 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::JumpTable:
2467 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::Constant:
2469 return LowerConstant(Op, DAG);
2470 case ISD::ConstantFP:
2471 return LowerConstantFP(Op, DAG);
2472 case ISD::FORMAL_ARGUMENTS:
2473 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2474 case ISD::CALL:
2475 return LowerCALL(Op, DAG);
2476 case ISD::RET:
2477 return LowerRET(Op, DAG, getTargetMachine());
2478
2479 // i8 math ops:
2480 case ISD::SUB:
2481 case ISD::ROTR:
2482 case ISD::ROTL:
2483 case ISD::SRL:
2484 case ISD::SHL:
2485 case ISD::SRA:
2486 return LowerI8Math(Op, DAG, Op.getOpcode());
2487
2488 // Vector-related lowering.
2489 case ISD::BUILD_VECTOR:
2490 return LowerBUILD_VECTOR(Op, DAG);
2491 case ISD::SCALAR_TO_VECTOR:
2492 return LowerSCALAR_TO_VECTOR(Op, DAG);
2493 case ISD::VECTOR_SHUFFLE:
2494 return LowerVECTOR_SHUFFLE(Op, DAG);
2495 case ISD::EXTRACT_VECTOR_ELT:
2496 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2497 case ISD::INSERT_VECTOR_ELT:
2498 return LowerINSERT_VECTOR_ELT(Op, DAG);
2499
2500 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2501 case ISD::AND:
2502 case ISD::OR:
2503 case ISD::XOR:
2504 return LowerByteImmed(Op, DAG);
2505
2506 // Vector and i8 multiply:
2507 case ISD::MUL:
2508 if (MVT::isVector(Op.getValueType()))
2509 return LowerVectorMUL(Op, DAG);
2510 else if (Op.getValueType() == MVT::i8)
2511 return LowerI8Math(Op, DAG, Op.getOpcode());
2512 else
2513 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2514
2515 case ISD::FDIV:
2516 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2517 return LowerFDIVf32(Op, DAG);
2518// else if (Op.getValueType() == MVT::f64)
2519// return LowerFDIVf64(Op, DAG);
2520 else
2521 assert(0 && "Calling FDIV on unsupported MVT");
2522
2523 case ISD::CTPOP:
2524 return LowerCTPOP(Op, DAG);
2525 }
2526
2527 return SDOperand();
2528}
2529
2530//===----------------------------------------------------------------------===//
2531// Other Lowering Code
2532//===----------------------------------------------------------------------===//
2533
2534MachineBasicBlock *
2535SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2536 MachineBasicBlock *BB)
2537{
2538 return BB;
2539}
2540
2541//===----------------------------------------------------------------------===//
2542// Target Optimization Hooks
2543//===----------------------------------------------------------------------===//
2544
2545SDOperand
2546SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2547{
2548#if 0
2549 TargetMachine &TM = getTargetMachine();
2550 SelectionDAG &DAG = DCI.DAG;
2551#endif
2552 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2553
2554 switch (N->getOpcode()) {
2555 default: break;
2556
2557 // Look for obvious optimizations for shift left:
2558 // a) Replace 0 << V with 0
2559 // b) Replace V << 0 with V
2560 //
2561 // N.B: llvm will generate an undef node if the shift amount is greater than
2562 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2563 case SPU::SHLIr32:
2564 case SPU::SHLHIr16:
2565 case SPU::SHLQBIIvec:
2566 case SPU::ROTHIr16:
2567 case SPU::ROTHIr16_i32:
2568 case SPU::ROTIr32:
2569 case SPU::ROTIr32_i16:
2570 case SPU::ROTQBYIvec:
2571 case SPU::ROTQBYBIvec:
2572 case SPU::ROTQBIIvec:
2573 case SPU::ROTHMIr16:
2574 case SPU::ROTMIr32:
2575 case SPU::ROTQMBYIvec: {
2576 if (N0.getOpcode() == ISD::Constant) {
2577 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2578 if (C->getValue() == 0) // 0 << V -> 0.
2579 return N0;
2580 }
2581 }
2582 SDOperand N1 = N->getOperand(1);
2583 if (N1.getOpcode() == ISD::Constant) {
2584 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2585 if (C->getValue() == 0) // V << 0 -> V
2586 return N1;
2587 }
2588 }
2589 break;
2590 }
2591 }
2592
2593 return SDOperand();
2594}
2595
2596//===----------------------------------------------------------------------===//
2597// Inline Assembly Support
2598//===----------------------------------------------------------------------===//
2599
2600/// getConstraintType - Given a constraint letter, return the type of
2601/// constraint it is for this target.
2602SPUTargetLowering::ConstraintType
2603SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2604 if (ConstraintLetter.size() == 1) {
2605 switch (ConstraintLetter[0]) {
2606 default: break;
2607 case 'b':
2608 case 'r':
2609 case 'f':
2610 case 'v':
2611 case 'y':
2612 return C_RegisterClass;
2613 }
2614 }
2615 return TargetLowering::getConstraintType(ConstraintLetter);
2616}
2617
2618std::pair<unsigned, const TargetRegisterClass*>
2619SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2620 MVT::ValueType VT) const
2621{
2622 if (Constraint.size() == 1) {
2623 // GCC RS6000 Constraint Letters
2624 switch (Constraint[0]) {
2625 case 'b': // R1-R31
2626 case 'r': // R0-R31
2627 if (VT == MVT::i64)
2628 return std::make_pair(0U, SPU::R64CRegisterClass);
2629 return std::make_pair(0U, SPU::R32CRegisterClass);
2630 case 'f':
2631 if (VT == MVT::f32)
2632 return std::make_pair(0U, SPU::R32FPRegisterClass);
2633 else if (VT == MVT::f64)
2634 return std::make_pair(0U, SPU::R64FPRegisterClass);
2635 break;
2636 case 'v':
2637 return std::make_pair(0U, SPU::GPRCRegisterClass);
2638 }
2639 }
2640
2641 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2642}
2643
2644void
2645SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2646 uint64_t Mask,
2647 uint64_t &KnownZero,
2648 uint64_t &KnownOne,
2649 const SelectionDAG &DAG,
2650 unsigned Depth ) const {
2651 KnownZero = 0;
2652 KnownOne = 0;
2653}
2654
2655// LowerAsmOperandForConstraint
2656void
2657SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2658 char ConstraintLetter,
2659 std::vector<SDOperand> &Ops,
2660 SelectionDAG &DAG) {
2661 // Default, for the time being, to the base class handler
2662 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2663}
2664
2665/// isLegalAddressImmediate - Return true if the integer value can be used
2666/// as the offset of the target addressing mode.
2667bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2668 // SPU's addresses are 256K:
2669 return (V > -(1 << 18) && V < (1 << 18) - 1);
2670}
2671
2672bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2673 return false;
2674}