Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 3a23c6fec9e9bff6c37590dad8919e8cb8de8885 [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
Scott Michel	2466c37	2007-12-05 01:40:25 +0000	[diff] [blame]	6	// Department at The Aerospace Corporation and is distributed under the
				7	// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	8	//
				9	//===----------------------------------------------------------------------===//
				10	//
				11	// This file implements the SPUTargetLowering class.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SPURegisterNames.h"
				16	#include "SPUISelLowering.h"
				17	#include "SPUTargetMachine.h"
				18	#include "llvm/ADT/VectorExtras.h"
				19	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/SelectionDAG.h"
				25	#include "llvm/CodeGen/SSARegMap.h"
				26	#include "llvm/Constants.h"
				27	#include "llvm/Function.h"
				28	#include "llvm/Intrinsics.h"
				29	#include "llvm/Support/Debug.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	#include <map>
				34
				35	using namespace llvm;
				36
				37	// Used in getTargetNodeName() below
				38	namespace {
				39	std::map<unsigned, const char *> node_names;
				40
				41	//! MVT::ValueType mapping to useful data for Cell SPU
				42	struct valtype_map_s {
				43	const MVT::ValueType valtype;
				44	const int prefslot_byte;
				45	};
				46
				47	const valtype_map_s valtype_map[] = {
				48	{ MVT::i1, 3 },
				49	{ MVT::i8, 3 },
				50	{ MVT::i16, 2 },
				51	{ MVT::i32, 0 },
				52	{ MVT::f32, 0 },
				53	{ MVT::i64, 0 },
				54	{ MVT::f64, 0 },
				55	{ MVT::i128, 0 }
				56	};
				57
				58	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				59
				60	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				61	const valtype_map_s *retval = 0;
				62
				63	for (size_t i = 0; i < n_valtype_map; ++i) {
				64	if (valtype_map[i].valtype == VT) {
				65	retval = valtype_map + i;
				66	break;
				67	}
				68	}
				69
				70	#ifndef NDEBUG
				71	if (retval == 0) {
				72	cerr << "getValueTypeMapEntry returns NULL for "
				73	<< MVT::getValueTypeString(VT)
				74	<< "\n";
				75	abort();
				76	}
				77	#endif
				78
				79	return retval;
				80	}
				81
				82	//! Predicate that returns true if operand is a memory target
				83	/*!
				84	\arg Op Operand to test
				85	\return true if the operand is a memory target (i.e., global
				86	address, external symbol, constant pool) or an existing D-Form
				87	address.
				88	*/
				89	bool isMemoryOperand(const SDOperand &Op)
				90	{
				91	const unsigned Opc = Op.getOpcode();
				92	return (Opc == ISD::GlobalAddress
				93	\|\| Opc == ISD::GlobalTLSAddress
				94	\|\| Opc == ISD::FrameIndex
				95	\|\| Opc == ISD::JumpTable
				96	\|\| Opc == ISD::ConstantPool
				97	\|\| Opc == ISD::ExternalSymbol
				98	\|\| Opc == ISD::TargetGlobalAddress
				99	\|\| Opc == ISD::TargetGlobalTLSAddress
				100	\|\| Opc == ISD::TargetFrameIndex
				101	\|\| Opc == ISD::TargetJumpTable
				102	\|\| Opc == ISD::TargetConstantPool
				103	\|\| Opc == ISD::TargetExternalSymbol
				104	\|\| Opc == SPUISD::DFormAddr);
				105	}
				106	}
				107
				108	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				109	: TargetLowering(TM),
				110	SPUTM(TM)
				111	{
				112	// Fold away setcc operations if possible.
				113	setPow2DivIsCheap();
				114
				115	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				116	setUseUnderscoreSetJmp(true);
				117	setUseUnderscoreLongJmp(true);
				118
				119	// Set up the SPU's register classes:
				120	// NOTE: i8 register class is not registered because we cannot determine when
				121	// we need to zero or sign extend for custom-lowered loads and stores.
				122	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				123	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				124	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				125	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				126	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
				127	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				128
				129	// SPU has no sign or zero extended loads for i1, i8, i16:
				130	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				131	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				132	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				133	setStoreXAction(MVT::i1, Custom);
				134
				135	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				136	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				137	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				138	setStoreXAction(MVT::i8, Custom);
				139
				140	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				141	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				142	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				143
				144	// SPU constant load actions are custom lowered:
				145	setOperationAction(ISD::Constant, MVT::i64, Custom);
				146	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				147	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				148
				149	// SPU's loads and stores have to be custom lowered:
				150	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				151	++sctype) {
				152	setOperationAction(ISD::LOAD, sctype, Custom);
				153	setOperationAction(ISD::STORE, sctype, Custom);
				154	}
				155
				156	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				157	// into BR_CCs. BR_CC instructions are custom selected in
				158	// SPUDAGToDAGISel.
				159	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				160
				161	// Expand the jumptable branches
				162	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				163	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				164	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				165
				166	// SPU has no intrinsics for these particular operations:
				167	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				168	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				169	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				170
				171	// PowerPC has no SREM/UREM instructions
				172	setOperationAction(ISD::SREM, MVT::i32, Expand);
				173	setOperationAction(ISD::UREM, MVT::i32, Expand);
				174	setOperationAction(ISD::SREM, MVT::i64, Expand);
				175	setOperationAction(ISD::UREM, MVT::i64, Expand);
				176
				177	// We don't support sin/cos/sqrt/fmod
				178	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				179	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				180	setOperationAction(ISD::FREM , MVT::f64, Expand);
				181	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				182	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				183	setOperationAction(ISD::FREM , MVT::f32, Expand);
				184
				185	// If we're enabling GP optimizations, use hardware square root
				186	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				187	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				188
				189	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				190	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				191
				192	// SPU can do rotate right and left, so legalize it... but customize for i8
				193	// because instructions don't exist.
				194	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				195	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				196	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				197	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				198	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				199	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				200	// SPU has no native version of shift left/right for i8
				201	setOperationAction(ISD::SHL, MVT::i8, Custom);
				202	setOperationAction(ISD::SRL, MVT::i8, Custom);
				203	setOperationAction(ISD::SRA, MVT::i8, Custom);
				204
				205	// Custom lower i32 multiplications
				206	setOperationAction(ISD::MUL, MVT::i32, Custom);
				207
				208	// Need to custom handle (some) common i8 math ops
				209	setOperationAction(ISD::SUB, MVT::i8, Custom);
				210	setOperationAction(ISD::MUL, MVT::i8, Custom);
				211
				212	// SPU does not have BSWAP. It does have i32 support CTLZ.
				213	// CTPOP has to be custom lowered.
				214	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				215	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				216
				217	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				218	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				219	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				221
				222	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				223	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				224
				225	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				226
				227	// SPU does not have select or setcc
				228	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				229	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				230	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				233	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				234	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				235
				236	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				237	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				238	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				241	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				242	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				243
				244	// SPU has a legal FP -> signed INT instruction
				245	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				246	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				247	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				248	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				249
				250	// FDIV on SPU requires custom lowering
				251	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				252	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				253
				254	// SPU has [U\|S]INT_TO_FP
				255	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				256	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				258	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				259	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				261	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				262	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				263
				264	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
				265	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
				266	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
				268
				269	// We cannot sextinreg(i1). Expand to shifts.
				270	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				271
				272	// Support label based line numbers.
				273	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				274	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				275
				276	// We want to legalize GlobalAddress and ConstantPool nodes into the
				277	// appropriate instructions to materialize the address.
				278	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				279	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				280	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				281	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				282	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				283	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				284	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				285	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				286
				287	// RET must be custom lowered, to meet ABI requirements
				288	setOperationAction(ISD::RET, MVT::Other, Custom);
				289
				290	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				291	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				292
				293	// Use the default implementation.
				294	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				295	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				296	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				297	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				298	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				299	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				300	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				301
				302	// Cell SPU has instructions for converting between i64 and fp.
				303	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				304	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				305
				306	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				307	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				308
				309	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				310	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				311
				312	// First set operation action for all vector types to expand. Then we
				313	// will selectively turn on ones that can be effectively codegen'd.
				314	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				315	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				316	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				320
				321	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				322	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				323	// add/sub are legal for all supported vector VT's.
				324	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				325	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				326	// mul has to be custom lowered.
				327	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				328
				329	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				330	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				331	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				335
				336	// These operations need to be expanded:
				337	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				338	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				339	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				342
				343	// Custom lower build_vector, constant pool spills, insert and
				344	// extract vector elements:
				345	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				346	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				347	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				351	}
				352
				353	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				354	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				355	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				356	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				357	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				358
				359	setSetCCResultType(MVT::i32);
				360	setShiftAmountType(MVT::i32);
				361	setSetCCResultContents(ZeroOrOneSetCCResult);
				362
				363	setStackPointerRegisterToSaveRestore(SPU::R1);
				364
				365	// We have target-specific dag combine patterns for the following nodes:
				366	// e.g., setTargetDAGCombine(ISD::SUB);
				367
				368	computeRegisterProperties();
				369	}
				370
				371	const char *
				372	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				373	{
				374	if (node_names.empty()) {
				375	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				376	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				377	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				378	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				379	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				380	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				381	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				382	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				383	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				384	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				385	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				386	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				387	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				388	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				389	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				390	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				391	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				393	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				394	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				395	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				396	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				397	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				398	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				399	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				400	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				401	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				402	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				403	"SPUISD::ROTBYTES_RIGHT_Z";
				404	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				405	"SPUISD::ROTBYTES_RIGHT_S";
				406	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				407	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				408	"SPUISD::ROTBYTES_LEFT_CHAINED";
				409	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				410	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				411	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				412	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				413	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				414	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				415	}
				416
				417	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				418
				419	return ((i != node_names.end()) ? i->second : 0);
				420	}
				421
				422	//===----------------------------------------------------------------------===//
				423	// Calling convention code:
				424	//===----------------------------------------------------------------------===//
				425
				426	#include "SPUGenCallingConv.inc"
				427
				428	//===----------------------------------------------------------------------===//
				429	// LowerOperation implementation
				430	//===----------------------------------------------------------------------===//
				431
				432	/// Custom lower loads for CellSPU
				433	/*!
				434	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				435	within a 16-byte block, we have to rotate to extract the requested element.
				436	*/
				437	static SDOperand
				438	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				439	LoadSDNode *LN = cast<LoadSDNode>(Op);
				440	SDOperand basep = LN->getBasePtr();
				441	SDOperand the_chain = LN->getChain();
				442	MVT::ValueType VT = LN->getLoadedVT();
				443	MVT::ValueType OpVT = Op.Val->getValueType(0);
				444	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				445	ISD::LoadExtType ExtType = LN->getExtensionType();
				446	unsigned alignment = LN->getAlignment();
				447	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				448	SDOperand Ops[8];
				449
				450	// For an extending load of an i1 variable, just call it i8 (or whatever we
				451	// were passed) and make it zero-extended:
				452	if (VT == MVT::i1) {
				453	VT = OpVT;
				454	ExtType = ISD::ZEXTLOAD;
				455	}
				456
				457	switch (LN->getAddressingMode()) {
				458	case ISD::UNINDEXED: {
				459	SDOperand result;
				460	SDOperand rot_op, rotamt;
				461	SDOperand ptrp;
				462	int c_offset;
				463	int c_rotamt;
				464
				465	// The vector type we really want to be when we load the 16-byte chunk
				466	MVT::ValueType vecVT, opVecVT;
				467
				468	if (VT != MVT::i1)
				469	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				470	else
				471	vecVT = MVT::v16i8;
				472
				473	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				474
				475	if (basep.getOpcode() == ISD::ADD) {
				476	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				477
				478	assert(CN != NULL
				479	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				480
				481	c_offset = (int) CN->getValue();
				482	c_rotamt = (int) (c_offset & 0xf);
				483
				484	// Adjust the rotation amount to ensure that the final result ends up in
				485	// the preferred slot:
				486	c_rotamt -= vtm->prefslot_byte;
				487	ptrp = basep.getOperand(0);
				488	} else {
				489	c_offset = 0;
				490	c_rotamt = -vtm->prefslot_byte;
				491	ptrp = basep;
				492	}
				493
				494	if (alignment == 16) {
				495	// 16-byte aligned load into preferred slot, no rotation
				496	if (c_rotamt == 0) {
				497	if (isMemoryOperand(ptrp))
				498	// Return unchanged
				499	return SDOperand();
				500	else {
				501	// Return modified D-Form address for pointer:
				502	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				503	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				504	if (VT == OpVT)
				505	return DAG.getLoad(VT, LN->getChain(), ptrp,
				506	LN->getSrcValue(), LN->getSrcValueOffset(),
				507	LN->isVolatile(), 16);
				508	else
				509	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				510	LN->getSrcValueOffset(), OpVT,
				511	LN->isVolatile(), 16);
				512	}
				513	} else {
				514	// Need to rotate...
				515	if (c_rotamt < 0)
				516	c_rotamt += 16;
				517	// Realign the base pointer, with a D-Form address
				518	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				519	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				520	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				521	else
				522	basep = ptrp;
				523
				524	// Rotate the load:
				525	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				526	LN->getSrcValue(), LN->getSrcValueOffset(),
				527	LN->isVolatile(), 16);
				528	the_chain = rot_op.getValue(1);
				529	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				530
				531	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				532	Ops[0] = the_chain;
				533	Ops[1] = rot_op;
				534	Ops[2] = rotamt;
				535
				536	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				537	the_chain = result.getValue(1);
				538
				539	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				540	SDVTList scalarvts;
				541	Ops[0] = the_chain;
				542	Ops[1] = result;
				543	if (OpVT == VT) {
				544	scalarvts = DAG.getVTList(VT, MVT::Other);
				545	} else {
				546	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				547	}
				548
				549	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				550	result);
				551	Ops[0] = the_chain;
				552	Ops[1] = result;
				553	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				554	the_chain = result.getValue(1);
				555	} else {
				556	// Handle the sign and zero-extending loads for i1 and i8:
				557	unsigned NewOpC;
				558
				559	if (ExtType == ISD::SEXTLOAD) {
				560	NewOpC = (OpVT == MVT::i1
				561	? SPUISD::EXTRACT_I1_SEXT
				562	: SPUISD::EXTRACT_I8_SEXT);
				563	} else if (ExtType == ISD::ZEXTLOAD) {
				564	NewOpC = (OpVT == MVT::i1
				565	? SPUISD::EXTRACT_I1_ZEXT
				566	: SPUISD::EXTRACT_I8_ZEXT);
				567	}
				568
				569	result = DAG.getNode(NewOpC, OpVT, result);
				570	}
				571
				572	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				573	SDOperand retops[2] = { result, the_chain };
				574
				575	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				576	return result;
				577	/UNREACHED/
				578	}
				579	} else {
				580	// Misaligned 16-byte load:
				581	if (basep.getOpcode() == ISD::LOAD) {
				582	LN = cast<LoadSDNode>(basep);
				583	if (LN->getAlignment() == 16) {
				584	// We can verify that we're really loading from a 16-byte aligned
				585	// chunk. Encapsulate basep as a D-Form address and return a new
				586	// load:
				587	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				588	DAG.getConstant(0, PtrVT));
				589	if (OpVT == VT)
				590	return DAG.getLoad(VT, LN->getChain(), basep,
				591	LN->getSrcValue(), LN->getSrcValueOffset(),
				592	LN->isVolatile(), 16);
				593	else
				594	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				595	LN->getSrcValue(), LN->getSrcValueOffset(),
				596	OpVT, LN->isVolatile(), 16);
				597	}
				598	}
				599
				600	// Catch all other cases where we can't guarantee that we have a
				601	// 16-byte aligned entity, which means resorting to an X-form
				602	// address scheme:
				603
				604	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
				605	SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
				606	SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
				607
				608	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				609
				610	SDOperand alignLoad =
				611	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				612	LN->getSrcValue(), LN->getSrcValueOffset(),
				613	LN->isVolatile(), 16);
				614
				615	SDOperand insertEltOp =
				616	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				617
				618	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				619	alignLoad,
				620	alignLoad,
				621	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				622
				623	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				624
				625	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				626	SDOperand retops[2] = { result, the_chain };
				627
				628	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				629	return result;
				630	}
				631	break;
				632	}
				633	case ISD::PRE_INC:
				634	case ISD::PRE_DEC:
				635	case ISD::POST_INC:
				636	case ISD::POST_DEC:
				637	case ISD::LAST_INDEXED_MODE:
				638	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				639	"UNINDEXED\n";
				640	cerr << (unsigned) LN->getAddressingMode() << "\n";
				641	abort();
				642	/NOTREACHED/
				643	}
				644
				645	return SDOperand();
				646	}
				647
				648	/// Custom lower stores for CellSPU
				649	/*!
				650	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				651	within a 16-byte block, we have to generate a shuffle to insert the
				652	requested element into its place, then store the resulting block.
				653	*/
				654	static SDOperand
				655	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				656	StoreSDNode *SN = cast<StoreSDNode>(Op);
				657	SDOperand Value = SN->getValue();
				658	MVT::ValueType VT = Value.getValueType();
				659	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				660	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				661	SDOperand the_chain = SN->getChain();
Chris Lattner	4d321c5	2007-12-05 18:32:18 +0000	[diff] [blame^]	662	//unsigned alignment = SN->getAlignment();
				663	//const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	664
				665	switch (SN->getAddressingMode()) {
				666	case ISD::UNINDEXED: {
				667	SDOperand basep = SN->getBasePtr();
				668	SDOperand ptrOp;
				669	int offset;
				670
				671	if (basep.getOpcode() == ISD::ADD) {
				672	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				673	assert(CN != NULL
				674	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				675	offset = unsigned(CN->getValue());
				676	ptrOp = basep.getOperand(0);
				677	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				678	<< offset
				679	<< "\n");
				680	} else {
				681	ptrOp = basep;
				682	offset = 0;
				683	}
				684
				685	// The vector type we really want to load from the 16-byte chunk, except
				686	// in the case of MVT::i1, which has to be v16i8.
				687	unsigned vecVT, stVecVT;
				688
				689	if (StVT != MVT::i1)
				690	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				691	else
				692	stVecVT = MVT::v16i8;
				693	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				694
				695	// Realign the pointer as a D-Form address (ptrOp is the pointer,
				696	// to force a register load with the address; basep is the actual
				697	// dform addr offs($reg).
				698	ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				699	DAG.getConstant(0, PtrVT));
				700	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				701	ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
				702
				703	// Create the 16-byte aligned vector load
				704	SDOperand alignLoad =
				705	DAG.getLoad(vecVT, the_chain, basep,
				706	SN->getSrcValue(), SN->getSrcValueOffset(),
				707	SN->isVolatile(), 16);
				708	the_chain = alignLoad.getValue(1);
				709
				710	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				711	SDOperand theValue = SN->getValue();
				712	SDOperand result;
				713
				714	if (StVT != VT
				715	&& (theValue.getOpcode() == ISD::AssertZext
				716	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				717	// Drill down and get the value for zero- and sign-extended
				718	// quantities
				719	theValue = theValue.getOperand(0);
				720	}
				721
				722	SDOperand insertEltOp =
				723	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				724	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				725	ptrOp,
				726	DAG.getConstant((offset & 0xf), PtrVT)));
				727
				728	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				729	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				730	alignLoad,
				731	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				732
				733	result = DAG.getStore(the_chain, result, basep,
				734	LN->getSrcValue(), LN->getSrcValueOffset(),
				735	LN->isVolatile(), LN->getAlignment());
				736
				737	return result;
				738	/UNREACHED/
				739	}
				740	case ISD::PRE_INC:
				741	case ISD::PRE_DEC:
				742	case ISD::POST_INC:
				743	case ISD::POST_DEC:
				744	case ISD::LAST_INDEXED_MODE:
				745	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				746	"UNINDEXED\n";
				747	cerr << (unsigned) SN->getAddressingMode() << "\n";
				748	abort();
				749	/NOTREACHED/
				750	}
				751
				752	return SDOperand();
				753	}
				754
				755	/// Generate the address of a constant pool entry.
				756	static SDOperand
				757	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				758	MVT::ValueType PtrVT = Op.getValueType();
				759	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				760	Constant *C = CP->getConstVal();
				761	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				762	const TargetMachine &TM = DAG.getTarget();
				763	SDOperand Zero = DAG.getConstant(0, PtrVT);
				764
				765	if (TM.getRelocationModel() == Reloc::Static) {
				766	if (!ST->usingLargeMem()) {
				767	// Just return the SDOperand with the constant pool address in it.
				768	return CPI;
				769	} else {
				770	// Generate hi/lo address pair
				771	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				772	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				773
				774	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				775	}
				776	}
				777
				778	assert(0 &&
				779	"LowerConstantPool: Relocation model other than static not supported.");
				780	return SDOperand();
				781	}
				782
				783	static SDOperand
				784	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				785	MVT::ValueType PtrVT = Op.getValueType();
				786	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				787	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				788	SDOperand Zero = DAG.getConstant(0, PtrVT);
				789	const TargetMachine &TM = DAG.getTarget();
				790
				791	if (TM.getRelocationModel() == Reloc::Static) {
				792	if (!ST->usingLargeMem()) {
				793	// Just return the SDOperand with the jump table address in it.
				794	return JTI;
				795	} else {
				796	// Generate hi/lo address pair
				797	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				798	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				799
				800	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				801	}
				802	}
				803
				804	assert(0 &&
				805	"LowerJumpTable: Relocation model other than static not supported.");
				806	return SDOperand();
				807	}
				808
				809	static SDOperand
				810	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				811	MVT::ValueType PtrVT = Op.getValueType();
				812	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				813	GlobalValue *GV = GSDN->getGlobal();
				814	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				815	SDOperand Zero = DAG.getConstant(0, PtrVT);
				816	const TargetMachine &TM = DAG.getTarget();
				817
				818	if (TM.getRelocationModel() == Reloc::Static) {
				819	if (!ST->usingLargeMem()) {
				820	// Generate a local store address
				821	return GA;
				822	} else {
				823	// Generate hi/lo address pair
				824	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				825	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				826
				827	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				828	}
				829	} else {
				830	cerr << "LowerGlobalAddress: Relocation model other than static not "
				831	<< "supported.\n";
				832	abort();
				833	/NOTREACHED/
				834	}
				835
				836	return SDOperand();
				837	}
				838
				839	//! Custom lower i64 integer constants
				840	/*!
				841	This code inserts all of the necessary juggling that needs to occur to load
				842	a 64-bit constant into a register.
				843	*/
				844	static SDOperand
				845	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				846	unsigned VT = Op.getValueType();
				847	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				848
				849	if (VT == MVT::i64) {
				850	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				851	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				852	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				853
				854	} else {
				855	cerr << "LowerConstant: unhandled constant type "
				856	<< MVT::getValueTypeString(VT)
				857	<< "\n";
				858	abort();
				859	/NOTREACHED/
				860	}
				861
				862	return SDOperand();
				863	}
				864
				865	//! Custom lower single precision floating point constants
				866	/*!
				867	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				868	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				869	target description.
				870	*/
				871	static SDOperand
				872	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				873	unsigned VT = Op.getValueType();
				874	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				875
				876	assert((FP != 0) &&
				877	"LowerConstantFP: Node is not ConstantFPSDNode");
				878
				879	const APFloat &apf = FP->getValueAPF();
				880
				881	if (VT == MVT::f32) {
				882	return DAG.getNode(SPUISD::SFPConstant, VT,
				883	DAG.getTargetConstantFP(apf.convertToFloat(), VT));
				884	} else if (VT == MVT::f64) {
				885	uint64_t dbits = DoubleToBits(apf.convertToDouble());
				886	return DAG.getNode(ISD::BIT_CONVERT, VT,
				887	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				888	}
				889
				890	return SDOperand();
				891	}
				892
				893	static SDOperand
				894	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				895	{
				896	MachineFunction &MF = DAG.getMachineFunction();
				897	MachineFrameInfo *MFI = MF.getFrameInfo();
				898	SSARegMap *RegMap = MF.getSSARegMap();
				899	SmallVector<SDOperand, 8> ArgValues;
				900	SDOperand Root = Op.getOperand(0);
				901	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				902
				903	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				904	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				905
				906	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				907	unsigned ArgRegIdx = 0;
				908	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				909
				910	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				911
				912	// Add DAG nodes to load the arguments or copy them out of registers.
				913	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				914	SDOperand ArgVal;
				915	bool needsLoad = false;
				916	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				917	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				918
				919	switch (ObjectVT) {
				920	default: {
				921	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				922	<< MVT::getValueTypeString(ObjectVT)
				923	<< "\n";
				924	abort();
				925	}
				926	case MVT::i8:
				927	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				928	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				929	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				930	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				931	++ArgRegIdx;
				932	} else {
				933	needsLoad = true;
				934	}
				935	break;
				936	case MVT::i16:
				937	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				938	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				939	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				940	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				941	++ArgRegIdx;
				942	} else {
				943	needsLoad = true;
				944	}
				945	break;
				946	case MVT::i32:
				947	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				948	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				949	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				950	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				951	++ArgRegIdx;
				952	} else {
				953	needsLoad = true;
				954	}
				955	break;
				956	case MVT::i64:
				957	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				958	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				959	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				960	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				961	++ArgRegIdx;
				962	} else {
				963	needsLoad = true;
				964	}
				965	break;
				966	case MVT::f32:
				967	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				968	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				969	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				970	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				971	++ArgRegIdx;
				972	} else {
				973	needsLoad = true;
				974	}
				975	break;
				976	case MVT::f64:
				977	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				978	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				979	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				980	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				981	++ArgRegIdx;
				982	} else {
				983	needsLoad = true;
				984	}
				985	break;
				986	case MVT::v2f64:
				987	case MVT::v4f32:
				988	case MVT::v4i32:
				989	case MVT::v8i16:
				990	case MVT::v16i8:
				991	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				992	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				993	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				994	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				995	++ArgRegIdx;
				996	} else {
				997	needsLoad = true;
				998	}
				999	break;
				1000	}
				1001
				1002	// We need to load the argument to a virtual register if we determined above
				1003	// that we ran out of physical registers of the appropriate type
				1004	if (needsLoad) {
				1005	// If the argument is actually used, emit a load from the right stack
				1006	// slot.
				1007	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1008	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1009	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1010	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1011	} else {
				1012	// Don't emit a dead load.
				1013	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1014	}
				1015
				1016	ArgOffset += StackSlotSize;
				1017	}
				1018
				1019	ArgValues.push_back(ArgVal);
				1020	}
				1021
				1022	// If the function takes variable number of arguments, make a frame index for
				1023	// the start of the first vararg value... for expansion of llvm.va_start.
				1024	if (isVarArg) {
				1025	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1026	ArgOffset);
				1027	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1028	// If this function is vararg, store any remaining integer argument regs to
				1029	// their spots on the stack so that they may be loaded by deferencing the
				1030	// result of va_next.
				1031	SmallVector<SDOperand, 8> MemOps;
				1032	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1033	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1034	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1035	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1036	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1037	MemOps.push_back(Store);
				1038	// Increment the address by four for the next argument to store
				1039	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1040	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1041	}
				1042	if (!MemOps.empty())
				1043	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1044	}
				1045
				1046	ArgValues.push_back(Root);
				1047
				1048	// Return the new list of results.
				1049	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1050	Op.Val->value_end());
				1051	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1052	}
				1053
				1054	/// isLSAAddress - Return the immediate to use if the specified
				1055	/// value is representable as a LSA address.
				1056	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1057	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1058	if (!C) return 0;
				1059
				1060	int Addr = C->getValue();
				1061	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1062	(Addr << 14 >> 14) != Addr)
				1063	return 0; // Top 14 bits have to be sext of immediate.
				1064
				1065	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1066	}
				1067
				1068	static
				1069	SDOperand
				1070	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1071	SDOperand Chain = Op.getOperand(0);
				1072	#if 0
				1073	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1074	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1075	#endif
				1076	SDOperand Callee = Op.getOperand(4);
				1077	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1078	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1079	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1080	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1081
				1082	// Handy pointer type
				1083	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1084
				1085	// Accumulate how many bytes are to be pushed on the stack, including the
				1086	// linkage area, and parameter passing area. According to the SPU ABI,
				1087	// we minimally need space for [LR] and [SP]
				1088	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1089
				1090	// Set up a copy of the stack pointer for use loading and storing any
				1091	// arguments that may not fit in the registers available for argument
				1092	// passing.
				1093	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1094
				1095	// Figure out which arguments are going to go in registers, and which in
				1096	// memory.
				1097	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1098	unsigned ArgRegIdx = 0;
				1099
				1100	// Keep track of registers passing arguments
				1101	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1102	// And the arguments passed on the stack
				1103	SmallVector<SDOperand, 8> MemOpChains;
				1104
				1105	for (unsigned i = 0; i != NumOps; ++i) {
				1106	SDOperand Arg = Op.getOperand(5+2*i);
				1107
				1108	// PtrOff will be used to store the current argument to the stack if a
				1109	// register cannot be found for it.
				1110	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1111	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1112
				1113	switch (Arg.getValueType()) {
				1114	default: assert(0 && "Unexpected ValueType for argument!");
				1115	case MVT::i32:
				1116	case MVT::i64:
				1117	case MVT::i128:
				1118	if (ArgRegIdx != NumArgRegs) {
				1119	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1120	} else {
				1121	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1122	ArgOffset += StackSlotSize;
				1123	}
				1124	break;
				1125	case MVT::f32:
				1126	case MVT::f64:
				1127	if (ArgRegIdx != NumArgRegs) {
				1128	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1129	} else {
				1130	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1131	ArgOffset += StackSlotSize;
				1132	}
				1133	break;
				1134	case MVT::v4f32:
				1135	case MVT::v4i32:
				1136	case MVT::v8i16:
				1137	case MVT::v16i8:
				1138	if (ArgRegIdx != NumArgRegs) {
				1139	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1140	} else {
				1141	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1142	ArgOffset += StackSlotSize;
				1143	}
				1144	break;
				1145	}
				1146	}
				1147
				1148	// Update number of stack bytes actually used, insert a call sequence start
				1149	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1150	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1151
				1152	if (!MemOpChains.empty()) {
				1153	// Adjust the stack pointer for the stack arguments.
				1154	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1155	&MemOpChains[0], MemOpChains.size());
				1156	}
				1157
				1158	// Build a sequence of copy-to-reg nodes chained together with token chain
				1159	// and flag operands which copy the outgoing args into the appropriate regs.
				1160	SDOperand InFlag;
				1161	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1162	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1163	InFlag);
				1164	InFlag = Chain.getValue(1);
				1165	}
				1166
				1167	std::vector<MVT::ValueType> NodeTys;
				1168	NodeTys.push_back(MVT::Other); // Returns a chain
				1169	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1170
				1171	SmallVector<SDOperand, 8> Ops;
				1172	unsigned CallOpc = SPUISD::CALL;
				1173
				1174	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1175	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1176	// node so that legalize doesn't hack it.
				1177	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1178	GlobalValue *GV = G->getGlobal();
				1179	unsigned CalleeVT = Callee.getValueType();
				1180
				1181	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1182	// style calls, otherwise, external symbols are BRASL calls.
				1183	// NOTE:
				1184	// This may be an unsafe assumption for JIT and really large compilation
				1185	// units.
				1186	if (GV->isDeclaration()) {
				1187	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1188	} else {
				1189	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1190	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1191	DAG.getConstant(0, PtrVT));
				1192	}
				1193	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1194	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1195	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1196	// If this is an absolute destination address that appears to be a legal
				1197	// local store address, use the munged value.
				1198	Callee = SDOperand(Dest, 0);
				1199
				1200	Ops.push_back(Chain);
				1201	Ops.push_back(Callee);
				1202
				1203	// Add argument registers to the end of the list so that they are known live
				1204	// into the call.
				1205	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1206	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1207	RegsToPass[i].second.getValueType()));
				1208
				1209	if (InFlag.Val)
				1210	Ops.push_back(InFlag);
				1211	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1212	InFlag = Chain.getValue(1);
				1213
				1214	SDOperand ResultVals[3];
				1215	unsigned NumResults = 0;
				1216	NodeTys.clear();
				1217
				1218	// If the call has results, copy the values out of the ret val registers.
				1219	switch (Op.Val->getValueType(0)) {
				1220	default: assert(0 && "Unexpected ret value!");
				1221	case MVT::Other: break;
				1222	case MVT::i32:
				1223	if (Op.Val->getValueType(1) == MVT::i32) {
				1224	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1225	ResultVals[0] = Chain.getValue(0);
				1226	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1227	Chain.getValue(2)).getValue(1);
				1228	ResultVals[1] = Chain.getValue(0);
				1229	NumResults = 2;
				1230	NodeTys.push_back(MVT::i32);
				1231	} else {
				1232	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1233	ResultVals[0] = Chain.getValue(0);
				1234	NumResults = 1;
				1235	}
				1236	NodeTys.push_back(MVT::i32);
				1237	break;
				1238	case MVT::i64:
				1239	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1240	ResultVals[0] = Chain.getValue(0);
				1241	NumResults = 1;
				1242	NodeTys.push_back(MVT::i64);
				1243	break;
				1244	case MVT::f32:
				1245	case MVT::f64:
				1246	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1247	InFlag).getValue(1);
				1248	ResultVals[0] = Chain.getValue(0);
				1249	NumResults = 1;
				1250	NodeTys.push_back(Op.Val->getValueType(0));
				1251	break;
				1252	case MVT::v2f64:
				1253	case MVT::v4f32:
				1254	case MVT::v4i32:
				1255	case MVT::v8i16:
				1256	case MVT::v16i8:
				1257	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1258	InFlag).getValue(1);
				1259	ResultVals[0] = Chain.getValue(0);
				1260	NumResults = 1;
				1261	NodeTys.push_back(Op.Val->getValueType(0));
				1262	break;
				1263	}
				1264
				1265	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1266	DAG.getConstant(NumStackBytes, PtrVT));
				1267	NodeTys.push_back(MVT::Other);
				1268
				1269	// If the function returns void, just return the chain.
				1270	if (NumResults == 0)
				1271	return Chain;
				1272
				1273	// Otherwise, merge everything together with a MERGE_VALUES node.
				1274	ResultVals[NumResults++] = Chain;
				1275	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1276	ResultVals, NumResults);
				1277	return Res.getValue(Op.ResNo);
				1278	}
				1279
				1280	static SDOperand
				1281	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1282	SmallVector<CCValAssign, 16> RVLocs;
				1283	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1284	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1285	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1286	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1287
				1288	// If this is the first return lowered for this function, add the regs to the
				1289	// liveout set for the function.
				1290	if (DAG.getMachineFunction().liveout_empty()) {
				1291	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1292	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1293	}
				1294
				1295	SDOperand Chain = Op.getOperand(0);
				1296	SDOperand Flag;
				1297
				1298	// Copy the result values into the output registers.
				1299	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1300	CCValAssign &VA = RVLocs[i];
				1301	assert(VA.isRegLoc() && "Can only return in registers!");
				1302	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1303	Flag = Chain.getValue(1);
				1304	}
				1305
				1306	if (Flag.Val)
				1307	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1308	else
				1309	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1310	}
				1311
				1312
				1313	//===----------------------------------------------------------------------===//
				1314	// Vector related lowering:
				1315	//===----------------------------------------------------------------------===//
				1316
				1317	static ConstantSDNode *
				1318	getVecImm(SDNode *N) {
				1319	SDOperand OpVal(0, 0);
				1320
				1321	// Check to see if this buildvec has a single non-undef value in its elements.
				1322	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1323	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1324	if (OpVal.Val == 0)
				1325	OpVal = N->getOperand(i);
				1326	else if (OpVal != N->getOperand(i))
				1327	return 0;
				1328	}
				1329
				1330	if (OpVal.Val != 0) {
				1331	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1332	return CN;
				1333	}
				1334	}
				1335
				1336	return 0; // All UNDEF: use implicit def.; not Constant node
				1337	}
				1338
				1339	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1340	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1341	/// constant
				1342	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1343	MVT::ValueType ValueType) {
				1344	if (ConstantSDNode *CN = getVecImm(N)) {
				1345	uint64_t Value = CN->getValue();
				1346	if (Value <= 0x3ffff)
				1347	return DAG.getConstant(Value, ValueType);
				1348	}
				1349
				1350	return SDOperand();
				1351	}
				1352
				1353	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1354	/// and the value fits into a signed 16-bit constant, and if so, return the
				1355	/// constant
				1356	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1357	MVT::ValueType ValueType) {
				1358	if (ConstantSDNode *CN = getVecImm(N)) {
				1359	if (ValueType == MVT::i32) {
				1360	int Value = (int) CN->getValue();
				1361	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1362
				1363	if (Value == SExtValue)
				1364	return DAG.getConstant(Value, ValueType);
				1365	} else if (ValueType == MVT::i16) {
				1366	short Value = (short) CN->getValue();
				1367	int SExtValue = ((int) Value << 16) >> 16;
				1368
				1369	if (Value == (short) SExtValue)
				1370	return DAG.getConstant(Value, ValueType);
				1371	} else if (ValueType == MVT::i64) {
				1372	int64_t Value = CN->getValue();
				1373	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1374
				1375	if (Value == SExtValue)
				1376	return DAG.getConstant(Value, ValueType);
				1377	}
				1378	}
				1379
				1380	return SDOperand();
				1381	}
				1382
				1383	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1384	/// and the value fits into a signed 10-bit constant, and if so, return the
				1385	/// constant
				1386	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1387	MVT::ValueType ValueType) {
				1388	if (ConstantSDNode *CN = getVecImm(N)) {
				1389	int Value = (int) CN->getValue();
				1390	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1391	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1392	return DAG.getConstant(Value, ValueType);
				1393	}
				1394
				1395	return SDOperand();
				1396	}
				1397
				1398	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1399	/// and the value fits into a signed 8-bit constant, and if so, return the
				1400	/// constant.
				1401	///
				1402	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1403	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1404	/// same value.
				1405	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1406	MVT::ValueType ValueType) {
				1407	if (ConstantSDNode *CN = getVecImm(N)) {
				1408	int Value = (int) CN->getValue();
				1409	if (ValueType == MVT::i16
				1410	&& Value <= 0xffff /* truncated from uint64_t */
				1411	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1412	return DAG.getConstant(Value & 0xff, ValueType);
				1413	else if (ValueType == MVT::i8
				1414	&& (Value & 0xff) == Value)
				1415	return DAG.getConstant(Value, ValueType);
				1416	}
				1417
				1418	return SDOperand();
				1419	}
				1420
				1421	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1422	/// and the value fits into a signed 16-bit constant, and if so, return the
				1423	/// constant
				1424	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1425	MVT::ValueType ValueType) {
				1426	if (ConstantSDNode *CN = getVecImm(N)) {
				1427	uint64_t Value = CN->getValue();
				1428	if ((ValueType == MVT::i32
				1429	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1430	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1431	return DAG.getConstant(Value >> 16, ValueType);
				1432	}
				1433
				1434	return SDOperand();
				1435	}
				1436
				1437	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1438	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1439	if (ConstantSDNode *CN = getVecImm(N)) {
				1440	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1441	}
				1442
				1443	return SDOperand();
				1444	}
				1445
				1446	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1447	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1448	if (ConstantSDNode *CN = getVecImm(N)) {
				1449	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1450	}
				1451
				1452	return SDOperand();
				1453	}
				1454
				1455	// If this is a vector of constants or undefs, get the bits. A bit in
				1456	// UndefBits is set if the corresponding element of the vector is an
				1457	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1458	// zero. Return true if this is not an array of constants, false if it is.
				1459	//
				1460	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1461	uint64_t UndefBits[2]) {
				1462	// Start with zero'd results.
				1463	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1464
				1465	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1466	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1467	SDOperand OpVal = BV->getOperand(i);
				1468
				1469	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1470	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1471
				1472	uint64_t EltBits = 0;
				1473	if (OpVal.getOpcode() == ISD::UNDEF) {
				1474	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1475	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1476	continue;
				1477	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1478	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1479	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1480	const APFloat &apf = CN->getValueAPF();
				1481	EltBits = (CN->getValueType(0) == MVT::f32
				1482	? FloatToBits(apf.convertToFloat())
				1483	: DoubleToBits(apf.convertToDouble()));
				1484	} else {
				1485	// Nonconstant element.
				1486	return true;
				1487	}
				1488
				1489	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1490	}
				1491
				1492	//printf("%llx %llx %llx %llx\n",
				1493	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1494	return false;
				1495	}
				1496
				1497	/// If this is a splat (repetition) of a value across the whole vector, return
				1498	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1499	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1500	/// SplatSize = 1 byte.
				1501	static bool isConstantSplat(const uint64_t Bits128[2],
				1502	const uint64_t Undef128[2],
				1503	int MinSplatBits,
				1504	uint64_t &SplatBits, uint64_t &SplatUndef,
				1505	int &SplatSize) {
				1506	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1507	// the same as the lower 64-bits, ignoring undefs.
				1508	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1509	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1510	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1511	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1512	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1513	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1514
				1515	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1516	if (MinSplatBits < 64) {
				1517
				1518	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1519	// undefs.
				1520	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1521	if (MinSplatBits < 32) {
				1522
				1523	// If the top 16-bits are different than the lower 16-bits, ignoring
				1524	// undefs, we have an i32 splat.
				1525	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1526	if (MinSplatBits < 16) {
				1527	// If the top 8-bits are different than the lower 8-bits, ignoring
				1528	// undefs, we have an i16 splat.
				1529	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1530	// Otherwise, we have an 8-bit splat.
				1531	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1532	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1533	SplatSize = 1;
				1534	return true;
				1535	}
				1536	} else {
				1537	SplatBits = Bits16;
				1538	SplatUndef = Undef16;
				1539	SplatSize = 2;
				1540	return true;
				1541	}
				1542	}
				1543	} else {
				1544	SplatBits = Bits32;
				1545	SplatUndef = Undef32;
				1546	SplatSize = 4;
				1547	return true;
				1548	}
				1549	}
				1550	} else {
				1551	SplatBits = Bits128[0];
				1552	SplatUndef = Undef128[0];
				1553	SplatSize = 8;
				1554	return true;
				1555	}
				1556	}
				1557
				1558	return false; // Can't be a splat if two pieces don't match.
				1559	}
				1560
				1561	// If this is a case we can't handle, return null and let the default
				1562	// expansion code take care of it. If we CAN select this case, and if it
				1563	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1564	// this case more efficiently than a constant pool load, lower it to the
				1565	// sequence of ops that should be used.
				1566	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1567	MVT::ValueType VT = Op.getValueType();
				1568	// If this is a vector of constants or undefs, get the bits. A bit in
				1569	// UndefBits is set if the corresponding element of the vector is an
				1570	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1571	// zero.
				1572	uint64_t VectorBits[2];
				1573	uint64_t UndefBits[2];
				1574	uint64_t SplatBits, SplatUndef;
				1575	int SplatSize;
				1576	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1577	\|\| !isConstantSplat(VectorBits, UndefBits,
				1578	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1579	SplatBits, SplatUndef, SplatSize))
				1580	return SDOperand(); // Not a constant vector, not a splat.
				1581
				1582	switch (VT) {
				1583	default:
				1584	case MVT::v4f32: {
				1585	uint32_t Value32 = SplatBits;
				1586	assert(SplatSize == 4
				1587	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1588	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1589	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1590	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1591	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1592	break;
				1593	}
				1594	case MVT::v2f64: {
				1595	uint64_t f64val = SplatBits;
				1596	assert(SplatSize == 8
				1597	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1598	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1599	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1600	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1601	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1602	break;
				1603	}
				1604	case MVT::v16i8: {
				1605	// 8-bit constants have to be expanded to 16-bits
				1606	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1607	SDOperand Ops[8];
				1608	for (int i = 0; i < 8; ++i)
				1609	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1610	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1611	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1612	}
				1613	case MVT::v8i16: {
				1614	unsigned short Value16;
				1615	if (SplatSize == 2)
				1616	Value16 = (unsigned short) (SplatBits & 0xffff);
				1617	else
				1618	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1619	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1620	SDOperand Ops[8];
				1621	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1622	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1623	}
				1624	case MVT::v4i32: {
				1625	unsigned int Value = SplatBits;
				1626	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1627	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1628	}
				1629	case MVT::v2i64: {
				1630	uint64_t val = SplatBits;
				1631	uint32_t upper = uint32_t(val >> 32);
				1632	uint32_t lower = uint32_t(val);
				1633
				1634	if (val != 0) {
				1635	SDOperand LO32;
				1636	SDOperand HI32;
				1637	SmallVector<SDOperand, 16> ShufBytes;
				1638	SDOperand Result;
				1639	bool upper_special, lower_special;
				1640
				1641	// NOTE: This code creates common-case shuffle masks that can be easily
				1642	// detected as common expressions. It is not attempting to create highly
				1643	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1644
				1645	// Detect if the upper or lower half is a special shuffle mask pattern:
				1646	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1647	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1648
				1649	// Create lower vector if not a special pattern
				1650	if (!lower_special) {
				1651	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1652	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1653	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1654	LO32C, LO32C, LO32C, LO32C));
				1655	}
				1656
				1657	// Create upper vector if not a special pattern
				1658	if (!upper_special) {
				1659	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1660	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1661	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1662	HI32C, HI32C, HI32C, HI32C));
				1663	}
				1664
				1665	// If either upper or lower are special, then the two input operands are
				1666	// the same (basically, one of them is a "don't care")
				1667	if (lower_special)
				1668	LO32 = HI32;
				1669	if (upper_special)
				1670	HI32 = LO32;
				1671	if (lower_special && upper_special) {
				1672	// Unhappy situation... both upper and lower are special, so punt with
				1673	// a target constant:
				1674	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1675	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1676	Zero, Zero);
				1677	}
				1678
				1679	for (int i = 0; i < 4; ++i) {
				1680	for (int j = 0; j < 4; ++j) {
				1681	SDOperand V;
				1682	bool process_upper, process_lower;
				1683	uint64_t val;
				1684
				1685	process_upper = (upper_special && (i & 1) == 0);
				1686	process_lower = (lower_special && (i & 1) == 1);
				1687
				1688	if (process_upper \|\| process_lower) {
				1689	if ((process_upper && upper == 0)
				1690	\|\| (process_lower && lower == 0))
				1691	val = 0x80;
				1692	else if ((process_upper && upper == 0xffffffff)
				1693	\|\| (process_lower && lower == 0xffffffff))
				1694	val = 0xc0;
				1695	else if ((process_upper && upper == 0x80000000)
				1696	\|\| (process_lower && lower == 0x80000000))
				1697	val = (j == 0 ? 0xe0 : 0x80);
				1698	} else
				1699	val = i * 4 + j + ((i & 1) * 16);
				1700
				1701	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1702	}
				1703	}
				1704
				1705	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1706	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1707	&ShufBytes[0], ShufBytes.size()));
				1708	} else {
				1709	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1710	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1711	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1712	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1713	Zero, Zero, Zero, Zero));
				1714	}
				1715	}
				1716	}
				1717
				1718	return SDOperand();
				1719	}
				1720
				1721	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1722	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1723	/// permutation vector, V3, is monotonically increasing with one "exception"
				1724	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1725	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1726	/// In either case, the net result is going to eventually invoke SHUFB to
				1727	/// permute/shuffle the bytes from V1 and V2.
				1728	/// \note
				1729	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1730	/// control word for byte/halfword/word insertion. This takes care of a single
				1731	/// element move from V2 into V1.
				1732	/// \note
				1733	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1734	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1735	SDOperand V1 = Op.getOperand(0);
				1736	SDOperand V2 = Op.getOperand(1);
				1737	SDOperand PermMask = Op.getOperand(2);
				1738
				1739	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1740
				1741	// If we have a single element being moved from V1 to V2, this can be handled
				1742	// using the C*[DX] compute mask instructions, but the vector elements have
				1743	// to be monotonically increasing with one exception element.
				1744	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1745	unsigned EltsFromV2 = 0;
				1746	unsigned V2Elt = 0;
				1747	unsigned V2EltIdx0 = 0;
				1748	unsigned CurrElt = 0;
				1749	bool monotonic = true;
				1750	if (EltVT == MVT::i8)
				1751	V2EltIdx0 = 16;
				1752	else if (EltVT == MVT::i16)
				1753	V2EltIdx0 = 8;
				1754	else if (EltVT == MVT::i32)
				1755	V2EltIdx0 = 4;
				1756	else
				1757	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1758
				1759	for (unsigned i = 0, e = PermMask.getNumOperands();
				1760	EltsFromV2 <= 1 && monotonic && i != e;
				1761	++i) {
				1762	unsigned SrcElt;
				1763	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1764	SrcElt = 0;
				1765	else
				1766	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1767
				1768	if (SrcElt >= V2EltIdx0) {
				1769	++EltsFromV2;
				1770	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1771	} else if (CurrElt != SrcElt) {
				1772	monotonic = false;
				1773	}
				1774
				1775	++CurrElt;
				1776	}
				1777
				1778	if (EltsFromV2 == 1 && monotonic) {
				1779	// Compute mask and shuffle
				1780	MachineFunction &MF = DAG.getMachineFunction();
				1781	SSARegMap *RegMap = MF.getSSARegMap();
				1782	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1783	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1784	// Initialize temporary register to 0
				1785	SDOperand InitTempReg =
				1786	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1787	// Copy register's contents as index in INSERT_MASK:
				1788	SDOperand ShufMaskOp =
				1789	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1790	DAG.getTargetConstant(V2Elt, MVT::i32),
				1791	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1792	// Use shuffle mask in SHUFB synthetic instruction:
				1793	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1794	} else {
				1795	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1796	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1797
				1798	SmallVector<SDOperand, 16> ResultMask;
				1799	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1800	unsigned SrcElt;
				1801	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1802	SrcElt = 0;
				1803	else
				1804	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1805
				1806	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1807	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1808	MVT::i8));
				1809	}
				1810	}
				1811
				1812	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1813	&ResultMask[0], ResultMask.size());
				1814	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1815	}
				1816	}
				1817
				1818	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1819	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1820
				1821	if (Op0.Val->getOpcode() == ISD::Constant) {
				1822	// For a constant, build the appropriate constant vector, which will
				1823	// eventually simplify to a vector register load.
				1824
				1825	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1826	SmallVector<SDOperand, 16> ConstVecValues;
				1827	MVT::ValueType VT;
				1828	size_t n_copies;
				1829
				1830	// Create a constant vector:
				1831	switch (Op.getValueType()) {
				1832	default: assert(0 && "Unexpected constant value type in "
				1833	"LowerSCALAR_TO_VECTOR");
				1834	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1835	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1836	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1837	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1838	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1839	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1840	}
				1841
				1842	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1843	for (size_t j = 0; j < n_copies; ++j)
				1844	ConstVecValues.push_back(CValue);
				1845
				1846	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1847	&ConstVecValues[0], ConstVecValues.size());
				1848	} else {
				1849	// Otherwise, copy the value from one register to another:
				1850	switch (Op0.getValueType()) {
				1851	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1852	case MVT::i8:
				1853	case MVT::i16:
				1854	case MVT::i32:
				1855	case MVT::i64:
				1856	case MVT::f32:
				1857	case MVT::f64:
				1858	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1859	}
				1860	}
				1861
				1862	return SDOperand();
				1863	}
				1864
				1865	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1866	switch (Op.getValueType()) {
				1867	case MVT::v4i32: {
				1868	SDOperand rA = Op.getOperand(0);
				1869	SDOperand rB = Op.getOperand(1);
				1870	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1871	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1872	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1873	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1874
				1875	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1876	break;
				1877	}
				1878
				1879	// Multiply two v8i16 vectors (pipeline friendly version):
				1880	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1881	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1882	// c) Use SELB to select upper and lower halves from the intermediate results
				1883	//
				1884	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1885	// dual-issue. This code does manage to do this, even if it's a little on
				1886	// the wacky side
				1887	case MVT::v8i16: {
				1888	MachineFunction &MF = DAG.getMachineFunction();
				1889	SSARegMap *RegMap = MF.getSSARegMap();
				1890	SDOperand Chain = Op.getOperand(0);
				1891	SDOperand rA = Op.getOperand(0);
				1892	SDOperand rB = Op.getOperand(1);
				1893	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1894	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1895
				1896	SDOperand FSMBOp =
				1897	DAG.getCopyToReg(Chain, FSMBIreg,
				1898	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1899	DAG.getConstant(0xcccc, MVT::i32)));
				1900
				1901	SDOperand HHProd =
				1902	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1903	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1904
				1905	SDOperand HHProd_v4i32 =
				1906	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1907	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1908
				1909	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1910	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1911	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1912	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1913	HHProd_v4i32,
				1914	DAG.getConstant(16, MVT::i16))),
				1915	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1916	}
				1917
				1918	// This M00sE is N@stI! (apologies to Monty Python)
				1919	//
				1920	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1921	// is to break it all apart, sign extend, and reassemble the various
				1922	// intermediate products.
				1923	case MVT::v16i8: {
				1924	MachineFunction &MF = DAG.getMachineFunction();
				1925	SSARegMap *RegMap = MF.getSSARegMap();
				1926	SDOperand Chain = Op.getOperand(0);
				1927	SDOperand rA = Op.getOperand(0);
				1928	SDOperand rB = Op.getOperand(1);
				1929	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1930	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1931
				1932	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1933	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1934	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1935
				1936	SDOperand LLProd =
				1937	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1938	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1939	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1940
				1941	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1942
				1943	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1944
				1945	SDOperand LHProd =
				1946	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1947	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1948
				1949	SDOperand FSMBdef_2222 =
				1950	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1951	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1952	DAG.getConstant(0x2222, MVT::i32)));
				1953
				1954	SDOperand FSMBuse_2222 =
				1955	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1956
				1957	SDOperand LoProd_1 =
				1958	DAG.getCopyToReg(Chain, LoProd_reg,
				1959	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1960	FSMBuse_2222));
				1961
				1962	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1963
				1964	SDOperand LoProd =
				1965	DAG.getNode(ISD::AND, MVT::v4i32,
				1966	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1967	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1968	LoProdMask, LoProdMask,
				1969	LoProdMask, LoProdMask));
				1970
				1971	SDOperand rAH =
				1972	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1973	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1974
				1975	SDOperand rBH =
				1976	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1977	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1978
				1979	SDOperand HLProd =
				1980	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1981	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1982	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1983
				1984	SDOperand HHProd_1 =
				1985	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1986	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1987	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1988	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1989	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1990
				1991	SDOperand HHProd =
				1992	DAG.getCopyToReg(Chain, HiProd_reg,
				1993	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1994	HLProd,
				1995	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				1996	FSMBuse_2222));
				1997
				1998	SDOperand HiProd =
				1999	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2000	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2001
				2002	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2003	DAG.getNode(ISD::OR, MVT::v4i32,
				2004	LoProd, HiProd));
				2005	}
				2006
				2007	default:
				2008	cerr << "CellSPU: Unknown vector multiplication, got "
				2009	<< MVT::getValueTypeString(Op.getValueType())
				2010	<< "\n";
				2011	abort();
				2012	/NOTREACHED/
				2013	}
				2014
				2015	return SDOperand();
				2016	}
				2017
				2018	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2019	MachineFunction &MF = DAG.getMachineFunction();
				2020	SSARegMap *RegMap = MF.getSSARegMap();
				2021
				2022	SDOperand A = Op.getOperand(0);
				2023	SDOperand B = Op.getOperand(1);
				2024	unsigned VT = Op.getValueType();
				2025
				2026	unsigned VRegBR, VRegC;
				2027
				2028	if (VT == MVT::f32) {
				2029	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2030	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2031	} else {
				2032	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2033	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2034	}
				2035	// TODO: make sure we're feeding FPInterp the right arguments
				2036	// Right now: fi B, frest(B)
				2037
				2038	// Computes BRcpl =
				2039	// (Floating Interpolate (FP Reciprocal Estimate B))
				2040	SDOperand BRcpl =
				2041	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2042	DAG.getNode(SPUISD::FPInterp, VT, B,
				2043	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2044
				2045	// Computes A * BRcpl and stores in a temporary register
				2046	SDOperand AxBRcpl =
				2047	DAG.getCopyToReg(BRcpl, VRegC,
				2048	DAG.getNode(ISD::FMUL, VT, A,
				2049	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2050	// What's the Chain variable do? It's magic!
				2051	// TODO: set Chain = Op(0).getEntryNode()
				2052
				2053	return DAG.getNode(ISD::FADD, VT,
				2054	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2055	DAG.getNode(ISD::FMUL, VT,
				2056	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2057	DAG.getNode(ISD::FSUB, VT, A,
				2058	DAG.getNode(ISD::FMUL, VT, B,
				2059	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2060	}
				2061
				2062	// Expands double-precision FDIV
				2063	// Expects two doubles as inputs X and Y, does a floating point
				2064	// reciprocal estimate, and three iterations of Newton-Raphson
				2065	// to increase accuracy.
				2066	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2067	// MachineFunction &MF = DAG.getMachineFunction();
				2068	// SSARegMap *RegMap = MF.getSSARegMap();
				2069	//
				2070	// SDOperand X = Op.getOperand(0);
				2071	// SDOperand Y = Op.getOperand(1);
				2072	//}
				2073
				2074	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2075	unsigned VT = Op.getValueType();
				2076	SDOperand N = Op.getOperand(0);
				2077	SDOperand Elt = Op.getOperand(1);
				2078	SDOperand ShufMask[16];
				2079	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2080
				2081	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2082
				2083	int EltNo = (int) C->getValue();
				2084
				2085	// sanity checks:
				2086	if (VT == MVT::i8 && EltNo >= 16)
				2087	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2088	else if (VT == MVT::i16 && EltNo >= 8)
				2089	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2090	else if (VT == MVT::i32 && EltNo >= 4)
				2091	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2092	else if (VT == MVT::i64 && EltNo >= 2)
				2093	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2094
				2095	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2096	// i32 and i64: Element 0 is the preferred slot
				2097	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2098	}
				2099
				2100	// Need to generate shuffle mask and extract:
				2101	int prefslot_begin, prefslot_end;
				2102	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2103
				2104	switch (VT) {
				2105	case MVT::i8: {
				2106	prefslot_begin = prefslot_end = 3;
				2107	break;
				2108	}
				2109	case MVT::i16: {
				2110	prefslot_begin = 2; prefslot_end = 3;
				2111	break;
				2112	}
				2113	case MVT::i32: {
				2114	prefslot_begin = 0; prefslot_end = 3;
				2115	break;
				2116	}
				2117	case MVT::i64: {
				2118	prefslot_begin = 0; prefslot_end = 7;
				2119	break;
				2120	}
				2121	}
				2122
				2123	for (int i = 0; i < 16; ++i) {
				2124	// zero fill uppper part of preferred slot, don't care about the
				2125	// other slots:
				2126	unsigned int mask_val;
				2127
				2128	if (i <= prefslot_end) {
				2129	mask_val =
				2130	((i < prefslot_begin)
				2131	? 0x80
				2132	: elt_byte + (i - prefslot_begin));
				2133
				2134	ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
				2135	} else
				2136	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2137	}
				2138
				2139	SDOperand ShufMaskVec =
				2140	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2141	&ShufMask[0],
				2142	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2143
				2144	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2145	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2146	N, N, ShufMaskVec));
				2147
				2148	}
				2149
				2150	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2151	SDOperand VecOp = Op.getOperand(0);
				2152	SDOperand ValOp = Op.getOperand(1);
				2153	SDOperand IdxOp = Op.getOperand(2);
				2154	MVT::ValueType VT = Op.getValueType();
				2155
				2156	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2157	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2158
				2159	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2160	// Use $2 because it's always 16-byte aligned and it's available:
				2161	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2162
				2163	SDOperand result =
				2164	DAG.getNode(SPUISD::SHUFB, VT,
				2165	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2166	VecOp,
				2167	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2168	DAG.getNode(ISD::ADD, PtrVT,
				2169	PtrBase,
				2170	DAG.getConstant(CN->getValue(),
				2171	PtrVT))));
				2172
				2173	return result;
				2174	}
				2175
				2176	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2177	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2178
				2179	assert(Op.getValueType() == MVT::i8);
				2180	switch (Opc) {
				2181	default:
				2182	assert(0 && "Unhandled i8 math operator");
				2183	/NOTREACHED/
				2184	break;
				2185	case ISD::SUB: {
				2186	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2187	// the result:
				2188	SDOperand N1 = Op.getOperand(1);
				2189	N0 = (N0.getOpcode() != ISD::Constant
				2190	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2191	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2192	N1 = (N1.getOpcode() != ISD::Constant
				2193	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2194	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2195	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2196	DAG.getNode(Opc, MVT::i16, N0, N1));
				2197	}
				2198	case ISD::ROTR:
				2199	case ISD::ROTL: {
				2200	SDOperand N1 = Op.getOperand(1);
				2201	unsigned N1Opc;
				2202	N0 = (N0.getOpcode() != ISD::Constant
				2203	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2204	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2205	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2206	N1 = (N1.getOpcode() != ISD::Constant
				2207	? DAG.getNode(N1Opc, MVT::i16, N1)
				2208	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2209	SDOperand ExpandArg =
				2210	DAG.getNode(ISD::OR, MVT::i16, N0,
				2211	DAG.getNode(ISD::SHL, MVT::i16,
				2212	N0, DAG.getConstant(8, MVT::i16)));
				2213	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2214	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2215	}
				2216	case ISD::SRL:
				2217	case ISD::SHL: {
				2218	SDOperand N1 = Op.getOperand(1);
				2219	unsigned N1Opc;
				2220	N0 = (N0.getOpcode() != ISD::Constant
				2221	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2222	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2223	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2224	N1 = (N1.getOpcode() != ISD::Constant
				2225	? DAG.getNode(N1Opc, MVT::i16, N1)
				2226	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2227	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2228	DAG.getNode(Opc, MVT::i16, N0, N1));
				2229	}
				2230	case ISD::SRA: {
				2231	SDOperand N1 = Op.getOperand(1);
				2232	unsigned N1Opc;
				2233	N0 = (N0.getOpcode() != ISD::Constant
				2234	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2235	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2236	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2237	N1 = (N1.getOpcode() != ISD::Constant
				2238	? DAG.getNode(N1Opc, MVT::i16, N1)
				2239	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2240	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2241	DAG.getNode(Opc, MVT::i16, N0, N1));
				2242	}
				2243	case ISD::MUL: {
				2244	SDOperand N1 = Op.getOperand(1);
				2245	unsigned N1Opc;
				2246	N0 = (N0.getOpcode() != ISD::Constant
				2247	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2248	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2249	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2250	N1 = (N1.getOpcode() != ISD::Constant
				2251	? DAG.getNode(N1Opc, MVT::i16, N1)
				2252	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2253	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2254	DAG.getNode(Opc, MVT::i16, N0, N1));
				2255	break;
				2256	}
				2257	}
				2258
				2259	return SDOperand();
				2260	}
				2261
				2262	//! Lower byte immediate operations for v16i8 vectors:
				2263	static SDOperand
				2264	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2265	SDOperand ConstVec;
				2266	SDOperand Arg;
				2267	MVT::ValueType VT = Op.getValueType();
				2268
				2269	ConstVec = Op.getOperand(0);
				2270	Arg = Op.getOperand(1);
				2271	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2272	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2273	ConstVec = ConstVec.getOperand(0);
				2274	} else {
				2275	ConstVec = Op.getOperand(1);
				2276	Arg = Op.getOperand(0);
				2277	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2278	ConstVec = ConstVec.getOperand(0);
				2279	}
				2280	}
				2281	}
				2282
				2283	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2284	uint64_t VectorBits[2];
				2285	uint64_t UndefBits[2];
				2286	uint64_t SplatBits, SplatUndef;
				2287	int SplatSize;
				2288
				2289	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2290	&& isConstantSplat(VectorBits, UndefBits,
				2291	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2292	SplatBits, SplatUndef, SplatSize)) {
				2293	SDOperand tcVec[16];
				2294	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2295	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2296
				2297	// Turn the BUILD_VECTOR into a set of target constants:
				2298	for (size_t i = 0; i < tcVecSize; ++i)
				2299	tcVec[i] = tc;
				2300
				2301	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2302	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2303	}
				2304	}
				2305
				2306	return SDOperand();
				2307	}
				2308
				2309	//! Lower i32 multiplication
				2310	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2311	unsigned Opc) {
				2312	switch (VT) {
				2313	default:
				2314	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2315	<< MVT::getValueTypeString(Op.getValueType())
				2316	<< "\n";
				2317	abort();
				2318	/NOTREACHED/
				2319
				2320	case MVT::i32: {
				2321	SDOperand rA = Op.getOperand(0);
				2322	SDOperand rB = Op.getOperand(1);
				2323
				2324	return DAG.getNode(ISD::ADD, MVT::i32,
				2325	DAG.getNode(ISD::ADD, MVT::i32,
				2326	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2327	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2328	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2329	}
				2330	}
				2331
				2332	return SDOperand();
				2333	}
				2334
				2335	//! Custom lowering for CTPOP (count population)
				2336	/*!
				2337	Custom lowering code that counts the number ones in the input
				2338	operand. SPU has such an instruction, but it counts the number of
				2339	ones per byte, which then have to be accumulated.
				2340	*/
				2341	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2342	unsigned VT = Op.getValueType();
				2343	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2344
				2345	switch (VT) {
				2346	case MVT::i8: {
				2347	SDOperand N = Op.getOperand(0);
				2348	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2349
				2350	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2351	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2352
				2353	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2354	}
				2355
				2356	case MVT::i16: {
				2357	MachineFunction &MF = DAG.getMachineFunction();
				2358	SSARegMap *RegMap = MF.getSSARegMap();
				2359
				2360	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2361
				2362	SDOperand N = Op.getOperand(0);
				2363	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2364	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2365	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2366
				2367	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2368	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2369
				2370	// CNTB_result becomes the chain to which all of the virtual registers
				2371	// CNTB_reg, SUM1_reg become associated:
				2372	SDOperand CNTB_result =
				2373	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2374
				2375	SDOperand CNTB_rescopy =
				2376	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2377
				2378	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2379
				2380	return DAG.getNode(ISD::AND, MVT::i16,
				2381	DAG.getNode(ISD::ADD, MVT::i16,
				2382	DAG.getNode(ISD::SRL, MVT::i16,
				2383	Tmp1, Shift1),
				2384	Tmp1),
				2385	Mask0);
				2386	}
				2387
				2388	case MVT::i32: {
				2389	MachineFunction &MF = DAG.getMachineFunction();
				2390	SSARegMap *RegMap = MF.getSSARegMap();
				2391
				2392	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2393	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2394
				2395	SDOperand N = Op.getOperand(0);
				2396	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2397	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2398	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2399	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2400
				2401	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2402	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2403
				2404	// CNTB_result becomes the chain to which all of the virtual registers
				2405	// CNTB_reg, SUM1_reg become associated:
				2406	SDOperand CNTB_result =
				2407	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2408
				2409	SDOperand CNTB_rescopy =
				2410	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2411
				2412	SDOperand Comp1 =
				2413	DAG.getNode(ISD::SRL, MVT::i32,
				2414	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2415
				2416	SDOperand Sum1 =
				2417	DAG.getNode(ISD::ADD, MVT::i32,
				2418	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2419
				2420	SDOperand Sum1_rescopy =
				2421	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2422
				2423	SDOperand Comp2 =
				2424	DAG.getNode(ISD::SRL, MVT::i32,
				2425	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2426	Shift2);
				2427	SDOperand Sum2 =
				2428	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2429	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2430
				2431	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2432	}
				2433
				2434	case MVT::i64:
				2435	break;
				2436	}
				2437
				2438	return SDOperand();
				2439	}
				2440
				2441	/// LowerOperation - Provide custom lowering hooks for some operations.
				2442	///
				2443	SDOperand
				2444	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2445	{
				2446	switch (Op.getOpcode()) {
				2447	default: {
				2448	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2449	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2450	cerr << "*Op.Val:\n";
				2451	Op.Val->dump();
				2452	abort();
				2453	}
				2454	case ISD::LOAD:
				2455	case ISD::SEXTLOAD:
				2456	case ISD::ZEXTLOAD:
				2457	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2458	case ISD::STORE:
				2459	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2460	case ISD::ConstantPool:
				2461	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2462	case ISD::GlobalAddress:
				2463	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2464	case ISD::JumpTable:
				2465	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2466	case ISD::Constant:
				2467	return LowerConstant(Op, DAG);
				2468	case ISD::ConstantFP:
				2469	return LowerConstantFP(Op, DAG);
				2470	case ISD::FORMAL_ARGUMENTS:
				2471	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2472	case ISD::CALL:
				2473	return LowerCALL(Op, DAG);
				2474	case ISD::RET:
				2475	return LowerRET(Op, DAG, getTargetMachine());
				2476
				2477	// i8 math ops:
				2478	case ISD::SUB:
				2479	case ISD::ROTR:
				2480	case ISD::ROTL:
				2481	case ISD::SRL:
				2482	case ISD::SHL:
				2483	case ISD::SRA:
				2484	return LowerI8Math(Op, DAG, Op.getOpcode());
				2485
				2486	// Vector-related lowering.
				2487	case ISD::BUILD_VECTOR:
				2488	return LowerBUILD_VECTOR(Op, DAG);
				2489	case ISD::SCALAR_TO_VECTOR:
				2490	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2491	case ISD::VECTOR_SHUFFLE:
				2492	return LowerVECTOR_SHUFFLE(Op, DAG);
				2493	case ISD::EXTRACT_VECTOR_ELT:
				2494	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2495	case ISD::INSERT_VECTOR_ELT:
				2496	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2497
				2498	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2499	case ISD::AND:
				2500	case ISD::OR:
				2501	case ISD::XOR:
				2502	return LowerByteImmed(Op, DAG);
				2503
				2504	// Vector and i8 multiply:
				2505	case ISD::MUL:
				2506	if (MVT::isVector(Op.getValueType()))
				2507	return LowerVectorMUL(Op, DAG);
				2508	else if (Op.getValueType() == MVT::i8)
				2509	return LowerI8Math(Op, DAG, Op.getOpcode());
				2510	else
				2511	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2512
				2513	case ISD::FDIV:
				2514	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2515	return LowerFDIVf32(Op, DAG);
				2516	// else if (Op.getValueType() == MVT::f64)
				2517	// return LowerFDIVf64(Op, DAG);
				2518	else
				2519	assert(0 && "Calling FDIV on unsupported MVT");
				2520
				2521	case ISD::CTPOP:
				2522	return LowerCTPOP(Op, DAG);
				2523	}
				2524
				2525	return SDOperand();
				2526	}
				2527
				2528	//===----------------------------------------------------------------------===//
				2529	// Other Lowering Code
				2530	//===----------------------------------------------------------------------===//
				2531
				2532	MachineBasicBlock *
				2533	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2534	MachineBasicBlock *BB)
				2535	{
				2536	return BB;
				2537	}
				2538
				2539	//===----------------------------------------------------------------------===//
				2540	// Target Optimization Hooks
				2541	//===----------------------------------------------------------------------===//
				2542
				2543	SDOperand
				2544	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2545	{
				2546	#if 0
				2547	TargetMachine &TM = getTargetMachine();
				2548	SelectionDAG &DAG = DCI.DAG;
				2549	#endif
				2550	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2551
				2552	switch (N->getOpcode()) {
				2553	default: break;
				2554
				2555	// Look for obvious optimizations for shift left:
				2556	// a) Replace 0 << V with 0
				2557	// b) Replace V << 0 with V
				2558	//
				2559	// N.B: llvm will generate an undef node if the shift amount is greater than
				2560	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2561	case SPU::SHLIr32:
				2562	case SPU::SHLHIr16:
				2563	case SPU::SHLQBIIvec:
				2564	case SPU::ROTHIr16:
				2565	case SPU::ROTHIr16_i32:
				2566	case SPU::ROTIr32:
				2567	case SPU::ROTIr32_i16:
				2568	case SPU::ROTQBYIvec:
				2569	case SPU::ROTQBYBIvec:
				2570	case SPU::ROTQBIIvec:
				2571	case SPU::ROTHMIr16:
				2572	case SPU::ROTMIr32:
				2573	case SPU::ROTQMBYIvec: {
				2574	if (N0.getOpcode() == ISD::Constant) {
				2575	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2576	if (C->getValue() == 0) // 0 << V -> 0.
				2577	return N0;
				2578	}
				2579	}
				2580	SDOperand N1 = N->getOperand(1);
				2581	if (N1.getOpcode() == ISD::Constant) {
				2582	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2583	if (C->getValue() == 0) // V << 0 -> V
				2584	return N1;
				2585	}
				2586	}
				2587	break;
				2588	}
				2589	}
				2590
				2591	return SDOperand();
				2592	}
				2593
				2594	//===----------------------------------------------------------------------===//
				2595	// Inline Assembly Support
				2596	//===----------------------------------------------------------------------===//
				2597
				2598	/// getConstraintType - Given a constraint letter, return the type of
				2599	/// constraint it is for this target.
				2600	SPUTargetLowering::ConstraintType
				2601	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2602	if (ConstraintLetter.size() == 1) {
				2603	switch (ConstraintLetter[0]) {
				2604	default: break;
				2605	case 'b':
				2606	case 'r':
				2607	case 'f':
				2608	case 'v':
				2609	case 'y':
				2610	return C_RegisterClass;
				2611	}
				2612	}
				2613	return TargetLowering::getConstraintType(ConstraintLetter);
				2614	}
				2615
				2616	std::pair<unsigned, const TargetRegisterClass*>
				2617	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2618	MVT::ValueType VT) const
				2619	{
				2620	if (Constraint.size() == 1) {
				2621	// GCC RS6000 Constraint Letters
				2622	switch (Constraint[0]) {
				2623	case 'b': // R1-R31
				2624	case 'r': // R0-R31
				2625	if (VT == MVT::i64)
				2626	return std::make_pair(0U, SPU::R64CRegisterClass);
				2627	return std::make_pair(0U, SPU::R32CRegisterClass);
				2628	case 'f':
				2629	if (VT == MVT::f32)
				2630	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2631	else if (VT == MVT::f64)
				2632	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2633	break;
				2634	case 'v':
				2635	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2636	}
				2637	}
				2638
				2639	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2640	}
				2641
				2642	void
				2643	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2644	uint64_t Mask,
				2645	uint64_t &KnownZero,
				2646	uint64_t &KnownOne,
				2647	const SelectionDAG &DAG,
				2648	unsigned Depth ) const {
				2649	KnownZero = 0;
				2650	KnownOne = 0;
				2651	}
				2652
				2653	// LowerAsmOperandForConstraint
				2654	void
				2655	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2656	char ConstraintLetter,
				2657	std::vector<SDOperand> &Ops,
				2658	SelectionDAG &DAG) {
				2659	// Default, for the time being, to the base class handler
				2660	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2661	}
				2662
				2663	/// isLegalAddressImmediate - Return true if the integer value can be used
				2664	/// as the offset of the target addressing mode.
				2665	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2666	// SPU's addresses are 256K:
				2667	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2668	}
				2669
				2670	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2671	return false;
				2672	}