Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 91c0024d744ca3abf971622677aed924cee5c3e3 [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
				6	// Department at The Aerospace Corporation.
				7	//
				8	// See README.txt for details.
				9	//
				10	//===----------------------------------------------------------------------===//
				11	//
				12	// This file implements the SPUTargetLowering class.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "SPURegisterNames.h"
				17	#include "SPUISelLowering.h"
				18	#include "SPUTargetMachine.h"
				19	#include "llvm/ADT/VectorExtras.h"
				20	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				21	#include "llvm/CodeGen/CallingConvLower.h"
				22	#include "llvm/CodeGen/MachineFrameInfo.h"
				23	#include "llvm/CodeGen/MachineFunction.h"
				24	#include "llvm/CodeGen/MachineInstrBuilder.h"
				25	#include "llvm/CodeGen/SelectionDAG.h"
				26	#include "llvm/CodeGen/SSARegMap.h"
				27	#include "llvm/Constants.h"
				28	#include "llvm/Function.h"
				29	#include "llvm/Intrinsics.h"
				30	#include "llvm/Support/Debug.h"
				31	#include "llvm/Support/MathExtras.h"
				32	#include "llvm/Target/TargetOptions.h"
				33
				34	#include <map>
				35
				36	using namespace llvm;
				37
				38	// Used in getTargetNodeName() below
				39	namespace {
				40	std::map<unsigned, const char *> node_names;
				41
				42	//! MVT::ValueType mapping to useful data for Cell SPU
				43	struct valtype_map_s {
				44	const MVT::ValueType valtype;
				45	const int prefslot_byte;
				46	};
				47
				48	const valtype_map_s valtype_map[] = {
				49	{ MVT::i1, 3 },
				50	{ MVT::i8, 3 },
				51	{ MVT::i16, 2 },
				52	{ MVT::i32, 0 },
				53	{ MVT::f32, 0 },
				54	{ MVT::i64, 0 },
				55	{ MVT::f64, 0 },
				56	{ MVT::i128, 0 }
				57	};
				58
				59	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				60
				61	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				62	const valtype_map_s *retval = 0;
				63
				64	for (size_t i = 0; i < n_valtype_map; ++i) {
				65	if (valtype_map[i].valtype == VT) {
				66	retval = valtype_map + i;
				67	break;
				68	}
				69	}
				70
				71	#ifndef NDEBUG
				72	if (retval == 0) {
				73	cerr << "getValueTypeMapEntry returns NULL for "
				74	<< MVT::getValueTypeString(VT)
				75	<< "\n";
				76	abort();
				77	}
				78	#endif
				79
				80	return retval;
				81	}
				82
				83	//! Predicate that returns true if operand is a memory target
				84	/*!
				85	\arg Op Operand to test
				86	\return true if the operand is a memory target (i.e., global
				87	address, external symbol, constant pool) or an existing D-Form
				88	address.
				89	*/
				90	bool isMemoryOperand(const SDOperand &Op)
				91	{
				92	const unsigned Opc = Op.getOpcode();
				93	return (Opc == ISD::GlobalAddress
				94	\|\| Opc == ISD::GlobalTLSAddress
				95	\|\| Opc == ISD::FrameIndex
				96	\|\| Opc == ISD::JumpTable
				97	\|\| Opc == ISD::ConstantPool
				98	\|\| Opc == ISD::ExternalSymbol
				99	\|\| Opc == ISD::TargetGlobalAddress
				100	\|\| Opc == ISD::TargetGlobalTLSAddress
				101	\|\| Opc == ISD::TargetFrameIndex
				102	\|\| Opc == ISD::TargetJumpTable
				103	\|\| Opc == ISD::TargetConstantPool
				104	\|\| Opc == ISD::TargetExternalSymbol
				105	\|\| Opc == SPUISD::DFormAddr);
				106	}
				107	}
				108
				109	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				110	: TargetLowering(TM),
				111	SPUTM(TM)
				112	{
				113	// Fold away setcc operations if possible.
				114	setPow2DivIsCheap();
				115
				116	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				117	setUseUnderscoreSetJmp(true);
				118	setUseUnderscoreLongJmp(true);
				119
				120	// Set up the SPU's register classes:
				121	// NOTE: i8 register class is not registered because we cannot determine when
				122	// we need to zero or sign extend for custom-lowered loads and stores.
				123	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				124	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				125	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				126	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				127	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
				128	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				129
				130	// SPU has no sign or zero extended loads for i1, i8, i16:
				131	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				132	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				133	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				134	setStoreXAction(MVT::i1, Custom);
				135
				136	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				137	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				138	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				139	setStoreXAction(MVT::i8, Custom);
				140
				141	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				142	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				143	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				144
				145	// SPU constant load actions are custom lowered:
				146	setOperationAction(ISD::Constant, MVT::i64, Custom);
				147	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				148	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				149
				150	// SPU's loads and stores have to be custom lowered:
				151	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				152	++sctype) {
				153	setOperationAction(ISD::LOAD, sctype, Custom);
				154	setOperationAction(ISD::STORE, sctype, Custom);
				155	}
				156
				157	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				158	// into BR_CCs. BR_CC instructions are custom selected in
				159	// SPUDAGToDAGISel.
				160	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				161
				162	// Expand the jumptable branches
				163	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				164	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				165	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				166
				167	// SPU has no intrinsics for these particular operations:
				168	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				169	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				170	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				171
				172	// PowerPC has no SREM/UREM instructions
				173	setOperationAction(ISD::SREM, MVT::i32, Expand);
				174	setOperationAction(ISD::UREM, MVT::i32, Expand);
				175	setOperationAction(ISD::SREM, MVT::i64, Expand);
				176	setOperationAction(ISD::UREM, MVT::i64, Expand);
				177
				178	// We don't support sin/cos/sqrt/fmod
				179	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				180	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				181	setOperationAction(ISD::FREM , MVT::f64, Expand);
				182	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				183	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				184	setOperationAction(ISD::FREM , MVT::f32, Expand);
				185
				186	// If we're enabling GP optimizations, use hardware square root
				187	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				188	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				189
				190	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				191	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				192
				193	// SPU can do rotate right and left, so legalize it... but customize for i8
				194	// because instructions don't exist.
				195	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				196	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				197	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				198	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				199	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				200	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				201	// SPU has no native version of shift left/right for i8
				202	setOperationAction(ISD::SHL, MVT::i8, Custom);
				203	setOperationAction(ISD::SRL, MVT::i8, Custom);
				204	setOperationAction(ISD::SRA, MVT::i8, Custom);
				205
				206	// Custom lower i32 multiplications
				207	setOperationAction(ISD::MUL, MVT::i32, Custom);
				208
				209	// Need to custom handle (some) common i8 math ops
				210	setOperationAction(ISD::SUB, MVT::i8, Custom);
				211	setOperationAction(ISD::MUL, MVT::i8, Custom);
				212
				213	// SPU does not have BSWAP. It does have i32 support CTLZ.
				214	// CTPOP has to be custom lowered.
				215	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				216	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				217
				218	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				219	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				221	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				222
				223	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				224	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				225
				226	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				227
				228	// SPU does not have select or setcc
				229	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				230	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				233	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				234	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				235	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				236
				237	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				238	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				241	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				242	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				243	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				244
				245	// SPU has a legal FP -> signed INT instruction
				246	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				247	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				248	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				250
				251	// FDIV on SPU requires custom lowering
				252	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				253	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				254
				255	// SPU has [U\|S]INT_TO_FP
				256	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				258	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				259	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				261	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				262	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				263	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				264
				265	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
				266	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
				268	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
				269
				270	// We cannot sextinreg(i1). Expand to shifts.
				271	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				272
				273	// Support label based line numbers.
				274	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				275	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				276
				277	// We want to legalize GlobalAddress and ConstantPool nodes into the
				278	// appropriate instructions to materialize the address.
				279	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				280	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				281	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				282	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				283	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				284	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				285	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				286	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				287
				288	// RET must be custom lowered, to meet ABI requirements
				289	setOperationAction(ISD::RET, MVT::Other, Custom);
				290
				291	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				292	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				293
				294	// Use the default implementation.
				295	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				296	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				297	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				298	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				299	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				300	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				301	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				302
				303	// Cell SPU has instructions for converting between i64 and fp.
				304	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				305	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				306
				307	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				308	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				309
				310	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				311	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				312
				313	// First set operation action for all vector types to expand. Then we
				314	// will selectively turn on ones that can be effectively codegen'd.
				315	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				316	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				320	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				321
				322	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				323	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				324	// add/sub are legal for all supported vector VT's.
				325	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				326	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				327	// mul has to be custom lowered.
				328	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				329
				330	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				331	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				335	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				336
				337	// These operations need to be expanded:
				338	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				339	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				343
				344	// Custom lower build_vector, constant pool spills, insert and
				345	// extract vector elements:
				346	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				347	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				351	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				352	}
				353
				354	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				355	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				356	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				357	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				358	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				359
				360	setSetCCResultType(MVT::i32);
				361	setShiftAmountType(MVT::i32);
				362	setSetCCResultContents(ZeroOrOneSetCCResult);
				363
				364	setStackPointerRegisterToSaveRestore(SPU::R1);
				365
				366	// We have target-specific dag combine patterns for the following nodes:
				367	// e.g., setTargetDAGCombine(ISD::SUB);
				368
				369	computeRegisterProperties();
				370	}
				371
				372	const char *
				373	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				374	{
				375	if (node_names.empty()) {
				376	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				377	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				378	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				379	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				380	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				381	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				382	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				383	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				384	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				385	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				386	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				387	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				388	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				389	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				390	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				391	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				393	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				394	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				395	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				396	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				397	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				398	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				399	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				400	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				401	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				402	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				403	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				404	"SPUISD::ROTBYTES_RIGHT_Z";
				405	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				406	"SPUISD::ROTBYTES_RIGHT_S";
				407	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				408	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				409	"SPUISD::ROTBYTES_LEFT_CHAINED";
				410	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				411	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				412	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				413	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				414	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				415	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				416	}
				417
				418	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				419
				420	return ((i != node_names.end()) ? i->second : 0);
				421	}
				422
				423	//===----------------------------------------------------------------------===//
				424	// Calling convention code:
				425	//===----------------------------------------------------------------------===//
				426
				427	#include "SPUGenCallingConv.inc"
				428
				429	//===----------------------------------------------------------------------===//
				430	// LowerOperation implementation
				431	//===----------------------------------------------------------------------===//
				432
				433	/// Custom lower loads for CellSPU
				434	/*!
				435	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				436	within a 16-byte block, we have to rotate to extract the requested element.
				437	*/
				438	static SDOperand
				439	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				440	LoadSDNode *LN = cast<LoadSDNode>(Op);
				441	SDOperand basep = LN->getBasePtr();
				442	SDOperand the_chain = LN->getChain();
				443	MVT::ValueType VT = LN->getLoadedVT();
				444	MVT::ValueType OpVT = Op.Val->getValueType(0);
				445	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				446	ISD::LoadExtType ExtType = LN->getExtensionType();
				447	unsigned alignment = LN->getAlignment();
				448	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				449	SDOperand Ops[8];
				450
				451	// For an extending load of an i1 variable, just call it i8 (or whatever we
				452	// were passed) and make it zero-extended:
				453	if (VT == MVT::i1) {
				454	VT = OpVT;
				455	ExtType = ISD::ZEXTLOAD;
				456	}
				457
				458	switch (LN->getAddressingMode()) {
				459	case ISD::UNINDEXED: {
				460	SDOperand result;
				461	SDOperand rot_op, rotamt;
				462	SDOperand ptrp;
				463	int c_offset;
				464	int c_rotamt;
				465
				466	// The vector type we really want to be when we load the 16-byte chunk
				467	MVT::ValueType vecVT, opVecVT;
				468
				469	if (VT != MVT::i1)
				470	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				471	else
				472	vecVT = MVT::v16i8;
				473
				474	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				475
				476	if (basep.getOpcode() == ISD::ADD) {
				477	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				478
				479	assert(CN != NULL
				480	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				481
				482	c_offset = (int) CN->getValue();
				483	c_rotamt = (int) (c_offset & 0xf);
				484
				485	// Adjust the rotation amount to ensure that the final result ends up in
				486	// the preferred slot:
				487	c_rotamt -= vtm->prefslot_byte;
				488	ptrp = basep.getOperand(0);
				489	} else {
				490	c_offset = 0;
				491	c_rotamt = -vtm->prefslot_byte;
				492	ptrp = basep;
				493	}
				494
				495	if (alignment == 16) {
				496	// 16-byte aligned load into preferred slot, no rotation
				497	if (c_rotamt == 0) {
				498	if (isMemoryOperand(ptrp))
				499	// Return unchanged
				500	return SDOperand();
				501	else {
				502	// Return modified D-Form address for pointer:
				503	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				504	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				505	if (VT == OpVT)
				506	return DAG.getLoad(VT, LN->getChain(), ptrp,
				507	LN->getSrcValue(), LN->getSrcValueOffset(),
				508	LN->isVolatile(), 16);
				509	else
				510	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				511	LN->getSrcValueOffset(), OpVT,
				512	LN->isVolatile(), 16);
				513	}
				514	} else {
				515	// Need to rotate...
				516	if (c_rotamt < 0)
				517	c_rotamt += 16;
				518	// Realign the base pointer, with a D-Form address
				519	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				520	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				521	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				522	else
				523	basep = ptrp;
				524
				525	// Rotate the load:
				526	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				527	LN->getSrcValue(), LN->getSrcValueOffset(),
				528	LN->isVolatile(), 16);
				529	the_chain = rot_op.getValue(1);
				530	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				531
				532	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				533	Ops[0] = the_chain;
				534	Ops[1] = rot_op;
				535	Ops[2] = rotamt;
				536
				537	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				538	the_chain = result.getValue(1);
				539
				540	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				541	SDVTList scalarvts;
				542	Ops[0] = the_chain;
				543	Ops[1] = result;
				544	if (OpVT == VT) {
				545	scalarvts = DAG.getVTList(VT, MVT::Other);
				546	} else {
				547	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				548	}
				549
				550	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				551	result);
				552	Ops[0] = the_chain;
				553	Ops[1] = result;
				554	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				555	the_chain = result.getValue(1);
				556	} else {
				557	// Handle the sign and zero-extending loads for i1 and i8:
				558	unsigned NewOpC;
				559
				560	if (ExtType == ISD::SEXTLOAD) {
				561	NewOpC = (OpVT == MVT::i1
				562	? SPUISD::EXTRACT_I1_SEXT
				563	: SPUISD::EXTRACT_I8_SEXT);
				564	} else if (ExtType == ISD::ZEXTLOAD) {
				565	NewOpC = (OpVT == MVT::i1
				566	? SPUISD::EXTRACT_I1_ZEXT
				567	: SPUISD::EXTRACT_I8_ZEXT);
				568	}
				569
				570	result = DAG.getNode(NewOpC, OpVT, result);
				571	}
				572
				573	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				574	SDOperand retops[2] = { result, the_chain };
				575
				576	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				577	return result;
				578	/UNREACHED/
				579	}
				580	} else {
				581	// Misaligned 16-byte load:
				582	if (basep.getOpcode() == ISD::LOAD) {
				583	LN = cast<LoadSDNode>(basep);
				584	if (LN->getAlignment() == 16) {
				585	// We can verify that we're really loading from a 16-byte aligned
				586	// chunk. Encapsulate basep as a D-Form address and return a new
				587	// load:
				588	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				589	DAG.getConstant(0, PtrVT));
				590	if (OpVT == VT)
				591	return DAG.getLoad(VT, LN->getChain(), basep,
				592	LN->getSrcValue(), LN->getSrcValueOffset(),
				593	LN->isVolatile(), 16);
				594	else
				595	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				596	LN->getSrcValue(), LN->getSrcValueOffset(),
				597	OpVT, LN->isVolatile(), 16);
				598	}
				599	}
				600
				601	// Catch all other cases where we can't guarantee that we have a
				602	// 16-byte aligned entity, which means resorting to an X-form
				603	// address scheme:
				604
				605	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
				606	SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
				607	SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
				608
				609	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				610
				611	SDOperand alignLoad =
				612	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				613	LN->getSrcValue(), LN->getSrcValueOffset(),
				614	LN->isVolatile(), 16);
				615
				616	SDOperand insertEltOp =
				617	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				618
				619	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				620	alignLoad,
				621	alignLoad,
				622	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				623
				624	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				625
				626	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				627	SDOperand retops[2] = { result, the_chain };
				628
				629	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				630	return result;
				631	}
				632	break;
				633	}
				634	case ISD::PRE_INC:
				635	case ISD::PRE_DEC:
				636	case ISD::POST_INC:
				637	case ISD::POST_DEC:
				638	case ISD::LAST_INDEXED_MODE:
				639	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				640	"UNINDEXED\n";
				641	cerr << (unsigned) LN->getAddressingMode() << "\n";
				642	abort();
				643	/NOTREACHED/
				644	}
				645
				646	return SDOperand();
				647	}
				648
				649	/// Custom lower stores for CellSPU
				650	/*!
				651	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				652	within a 16-byte block, we have to generate a shuffle to insert the
				653	requested element into its place, then store the resulting block.
				654	*/
				655	static SDOperand
				656	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				657	StoreSDNode *SN = cast<StoreSDNode>(Op);
				658	SDOperand Value = SN->getValue();
				659	MVT::ValueType VT = Value.getValueType();
				660	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				661	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				662	SDOperand the_chain = SN->getChain();
				663	unsigned alignment = SN->getAlignment();
				664	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				665
				666	switch (SN->getAddressingMode()) {
				667	case ISD::UNINDEXED: {
				668	SDOperand basep = SN->getBasePtr();
				669	SDOperand ptrOp;
				670	int offset;
				671
				672	if (basep.getOpcode() == ISD::ADD) {
				673	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				674	assert(CN != NULL
				675	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				676	offset = unsigned(CN->getValue());
				677	ptrOp = basep.getOperand(0);
				678	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				679	<< offset
				680	<< "\n");
				681	} else {
				682	ptrOp = basep;
				683	offset = 0;
				684	}
				685
				686	// The vector type we really want to load from the 16-byte chunk, except
				687	// in the case of MVT::i1, which has to be v16i8.
				688	unsigned vecVT, stVecVT;
				689
				690	if (StVT != MVT::i1)
				691	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				692	else
				693	stVecVT = MVT::v16i8;
				694	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				695
				696	// Realign the pointer as a D-Form address (ptrOp is the pointer,
				697	// to force a register load with the address; basep is the actual
				698	// dform addr offs($reg).
				699	ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				700	DAG.getConstant(0, PtrVT));
				701	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				702	ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
				703
				704	// Create the 16-byte aligned vector load
				705	SDOperand alignLoad =
				706	DAG.getLoad(vecVT, the_chain, basep,
				707	SN->getSrcValue(), SN->getSrcValueOffset(),
				708	SN->isVolatile(), 16);
				709	the_chain = alignLoad.getValue(1);
				710
				711	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				712	SDOperand theValue = SN->getValue();
				713	SDOperand result;
				714
				715	if (StVT != VT
				716	&& (theValue.getOpcode() == ISD::AssertZext
				717	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				718	// Drill down and get the value for zero- and sign-extended
				719	// quantities
				720	theValue = theValue.getOperand(0);
				721	}
				722
				723	SDOperand insertEltOp =
				724	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				725	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				726	ptrOp,
				727	DAG.getConstant((offset & 0xf), PtrVT)));
				728
				729	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				730	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				731	alignLoad,
				732	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				733
				734	result = DAG.getStore(the_chain, result, basep,
				735	LN->getSrcValue(), LN->getSrcValueOffset(),
				736	LN->isVolatile(), LN->getAlignment());
				737
				738	return result;
				739	/UNREACHED/
				740	}
				741	case ISD::PRE_INC:
				742	case ISD::PRE_DEC:
				743	case ISD::POST_INC:
				744	case ISD::POST_DEC:
				745	case ISD::LAST_INDEXED_MODE:
				746	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				747	"UNINDEXED\n";
				748	cerr << (unsigned) SN->getAddressingMode() << "\n";
				749	abort();
				750	/NOTREACHED/
				751	}
				752
				753	return SDOperand();
				754	}
				755
				756	/// Generate the address of a constant pool entry.
				757	static SDOperand
				758	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				759	MVT::ValueType PtrVT = Op.getValueType();
				760	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				761	Constant *C = CP->getConstVal();
				762	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				763	const TargetMachine &TM = DAG.getTarget();
				764	SDOperand Zero = DAG.getConstant(0, PtrVT);
				765
				766	if (TM.getRelocationModel() == Reloc::Static) {
				767	if (!ST->usingLargeMem()) {
				768	// Just return the SDOperand with the constant pool address in it.
				769	return CPI;
				770	} else {
				771	// Generate hi/lo address pair
				772	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				773	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				774
				775	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				776	}
				777	}
				778
				779	assert(0 &&
				780	"LowerConstantPool: Relocation model other than static not supported.");
				781	return SDOperand();
				782	}
				783
				784	static SDOperand
				785	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				786	MVT::ValueType PtrVT = Op.getValueType();
				787	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				788	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				789	SDOperand Zero = DAG.getConstant(0, PtrVT);
				790	const TargetMachine &TM = DAG.getTarget();
				791
				792	if (TM.getRelocationModel() == Reloc::Static) {
				793	if (!ST->usingLargeMem()) {
				794	// Just return the SDOperand with the jump table address in it.
				795	return JTI;
				796	} else {
				797	// Generate hi/lo address pair
				798	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				799	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				800
				801	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				802	}
				803	}
				804
				805	assert(0 &&
				806	"LowerJumpTable: Relocation model other than static not supported.");
				807	return SDOperand();
				808	}
				809
				810	static SDOperand
				811	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				812	MVT::ValueType PtrVT = Op.getValueType();
				813	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				814	GlobalValue *GV = GSDN->getGlobal();
				815	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				816	SDOperand Zero = DAG.getConstant(0, PtrVT);
				817	const TargetMachine &TM = DAG.getTarget();
				818
				819	if (TM.getRelocationModel() == Reloc::Static) {
				820	if (!ST->usingLargeMem()) {
				821	// Generate a local store address
				822	return GA;
				823	} else {
				824	// Generate hi/lo address pair
				825	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				826	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				827
				828	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				829	}
				830	} else {
				831	cerr << "LowerGlobalAddress: Relocation model other than static not "
				832	<< "supported.\n";
				833	abort();
				834	/NOTREACHED/
				835	}
				836
				837	return SDOperand();
				838	}
				839
				840	//! Custom lower i64 integer constants
				841	/*!
				842	This code inserts all of the necessary juggling that needs to occur to load
				843	a 64-bit constant into a register.
				844	*/
				845	static SDOperand
				846	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				847	unsigned VT = Op.getValueType();
				848	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				849
				850	if (VT == MVT::i64) {
				851	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				852	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				853	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				854
				855	} else {
				856	cerr << "LowerConstant: unhandled constant type "
				857	<< MVT::getValueTypeString(VT)
				858	<< "\n";
				859	abort();
				860	/NOTREACHED/
				861	}
				862
				863	return SDOperand();
				864	}
				865
				866	//! Custom lower single precision floating point constants
				867	/*!
				868	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				869	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				870	target description.
				871	*/
				872	static SDOperand
				873	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				874	unsigned VT = Op.getValueType();
				875	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				876
				877	assert((FP != 0) &&
				878	"LowerConstantFP: Node is not ConstantFPSDNode");
				879
				880	const APFloat &apf = FP->getValueAPF();
				881
				882	if (VT == MVT::f32) {
				883	return DAG.getNode(SPUISD::SFPConstant, VT,
				884	DAG.getTargetConstantFP(apf.convertToFloat(), VT));
				885	} else if (VT == MVT::f64) {
				886	uint64_t dbits = DoubleToBits(apf.convertToDouble());
				887	return DAG.getNode(ISD::BIT_CONVERT, VT,
				888	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				889	}
				890
				891	return SDOperand();
				892	}
				893
				894	static SDOperand
				895	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				896	{
				897	MachineFunction &MF = DAG.getMachineFunction();
				898	MachineFrameInfo *MFI = MF.getFrameInfo();
				899	SSARegMap *RegMap = MF.getSSARegMap();
				900	SmallVector<SDOperand, 8> ArgValues;
				901	SDOperand Root = Op.getOperand(0);
				902	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				903
				904	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				905	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				906
				907	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				908	unsigned ArgRegIdx = 0;
				909	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				910
				911	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				912
				913	// Add DAG nodes to load the arguments or copy them out of registers.
				914	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				915	SDOperand ArgVal;
				916	bool needsLoad = false;
				917	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				918	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				919
				920	switch (ObjectVT) {
				921	default: {
				922	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				923	<< MVT::getValueTypeString(ObjectVT)
				924	<< "\n";
				925	abort();
				926	}
				927	case MVT::i8:
				928	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				929	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				930	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				931	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				932	++ArgRegIdx;
				933	} else {
				934	needsLoad = true;
				935	}
				936	break;
				937	case MVT::i16:
				938	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				939	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				940	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				941	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				942	++ArgRegIdx;
				943	} else {
				944	needsLoad = true;
				945	}
				946	break;
				947	case MVT::i32:
				948	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				949	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				950	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				951	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				952	++ArgRegIdx;
				953	} else {
				954	needsLoad = true;
				955	}
				956	break;
				957	case MVT::i64:
				958	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				959	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				960	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				961	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				962	++ArgRegIdx;
				963	} else {
				964	needsLoad = true;
				965	}
				966	break;
				967	case MVT::f32:
				968	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				969	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				970	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				971	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				972	++ArgRegIdx;
				973	} else {
				974	needsLoad = true;
				975	}
				976	break;
				977	case MVT::f64:
				978	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				979	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				980	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				981	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				982	++ArgRegIdx;
				983	} else {
				984	needsLoad = true;
				985	}
				986	break;
				987	case MVT::v2f64:
				988	case MVT::v4f32:
				989	case MVT::v4i32:
				990	case MVT::v8i16:
				991	case MVT::v16i8:
				992	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				993	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				994	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				995	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				996	++ArgRegIdx;
				997	} else {
				998	needsLoad = true;
				999	}
				1000	break;
				1001	}
				1002
				1003	// We need to load the argument to a virtual register if we determined above
				1004	// that we ran out of physical registers of the appropriate type
				1005	if (needsLoad) {
				1006	// If the argument is actually used, emit a load from the right stack
				1007	// slot.
				1008	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1009	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1010	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1011	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1012	} else {
				1013	// Don't emit a dead load.
				1014	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1015	}
				1016
				1017	ArgOffset += StackSlotSize;
				1018	}
				1019
				1020	ArgValues.push_back(ArgVal);
				1021	}
				1022
				1023	// If the function takes variable number of arguments, make a frame index for
				1024	// the start of the first vararg value... for expansion of llvm.va_start.
				1025	if (isVarArg) {
				1026	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1027	ArgOffset);
				1028	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1029	// If this function is vararg, store any remaining integer argument regs to
				1030	// their spots on the stack so that they may be loaded by deferencing the
				1031	// result of va_next.
				1032	SmallVector<SDOperand, 8> MemOps;
				1033	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1034	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1035	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1036	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1037	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1038	MemOps.push_back(Store);
				1039	// Increment the address by four for the next argument to store
				1040	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1041	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1042	}
				1043	if (!MemOps.empty())
				1044	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1045	}
				1046
				1047	ArgValues.push_back(Root);
				1048
				1049	// Return the new list of results.
				1050	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1051	Op.Val->value_end());
				1052	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1053	}
				1054
				1055	/// isLSAAddress - Return the immediate to use if the specified
				1056	/// value is representable as a LSA address.
				1057	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1058	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1059	if (!C) return 0;
				1060
				1061	int Addr = C->getValue();
				1062	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1063	(Addr << 14 >> 14) != Addr)
				1064	return 0; // Top 14 bits have to be sext of immediate.
				1065
				1066	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1067	}
				1068
				1069	static
				1070	SDOperand
				1071	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1072	SDOperand Chain = Op.getOperand(0);
				1073	#if 0
				1074	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1075	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1076	#endif
				1077	SDOperand Callee = Op.getOperand(4);
				1078	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1079	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1080	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1081	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1082
				1083	// Handy pointer type
				1084	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1085
				1086	// Accumulate how many bytes are to be pushed on the stack, including the
				1087	// linkage area, and parameter passing area. According to the SPU ABI,
				1088	// we minimally need space for [LR] and [SP]
				1089	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1090
				1091	// Set up a copy of the stack pointer for use loading and storing any
				1092	// arguments that may not fit in the registers available for argument
				1093	// passing.
				1094	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1095
				1096	// Figure out which arguments are going to go in registers, and which in
				1097	// memory.
				1098	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1099	unsigned ArgRegIdx = 0;
				1100
				1101	// Keep track of registers passing arguments
				1102	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1103	// And the arguments passed on the stack
				1104	SmallVector<SDOperand, 8> MemOpChains;
				1105
				1106	for (unsigned i = 0; i != NumOps; ++i) {
				1107	SDOperand Arg = Op.getOperand(5+2*i);
				1108
				1109	// PtrOff will be used to store the current argument to the stack if a
				1110	// register cannot be found for it.
				1111	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1112	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1113
				1114	switch (Arg.getValueType()) {
				1115	default: assert(0 && "Unexpected ValueType for argument!");
				1116	case MVT::i32:
				1117	case MVT::i64:
				1118	case MVT::i128:
				1119	if (ArgRegIdx != NumArgRegs) {
				1120	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1121	} else {
				1122	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1123	ArgOffset += StackSlotSize;
				1124	}
				1125	break;
				1126	case MVT::f32:
				1127	case MVT::f64:
				1128	if (ArgRegIdx != NumArgRegs) {
				1129	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1130	} else {
				1131	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1132	ArgOffset += StackSlotSize;
				1133	}
				1134	break;
				1135	case MVT::v4f32:
				1136	case MVT::v4i32:
				1137	case MVT::v8i16:
				1138	case MVT::v16i8:
				1139	if (ArgRegIdx != NumArgRegs) {
				1140	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1141	} else {
				1142	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1143	ArgOffset += StackSlotSize;
				1144	}
				1145	break;
				1146	}
				1147	}
				1148
				1149	// Update number of stack bytes actually used, insert a call sequence start
				1150	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1151	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1152
				1153	if (!MemOpChains.empty()) {
				1154	// Adjust the stack pointer for the stack arguments.
				1155	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1156	&MemOpChains[0], MemOpChains.size());
				1157	}
				1158
				1159	// Build a sequence of copy-to-reg nodes chained together with token chain
				1160	// and flag operands which copy the outgoing args into the appropriate regs.
				1161	SDOperand InFlag;
				1162	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1163	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1164	InFlag);
				1165	InFlag = Chain.getValue(1);
				1166	}
				1167
				1168	std::vector<MVT::ValueType> NodeTys;
				1169	NodeTys.push_back(MVT::Other); // Returns a chain
				1170	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1171
				1172	SmallVector<SDOperand, 8> Ops;
				1173	unsigned CallOpc = SPUISD::CALL;
				1174
				1175	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1176	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1177	// node so that legalize doesn't hack it.
				1178	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1179	GlobalValue *GV = G->getGlobal();
				1180	unsigned CalleeVT = Callee.getValueType();
				1181
				1182	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1183	// style calls, otherwise, external symbols are BRASL calls.
				1184	// NOTE:
				1185	// This may be an unsafe assumption for JIT and really large compilation
				1186	// units.
				1187	if (GV->isDeclaration()) {
				1188	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1189	} else {
				1190	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1191	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1192	DAG.getConstant(0, PtrVT));
				1193	}
				1194	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1195	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1196	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1197	// If this is an absolute destination address that appears to be a legal
				1198	// local store address, use the munged value.
				1199	Callee = SDOperand(Dest, 0);
				1200
				1201	Ops.push_back(Chain);
				1202	Ops.push_back(Callee);
				1203
				1204	// Add argument registers to the end of the list so that they are known live
				1205	// into the call.
				1206	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1207	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1208	RegsToPass[i].second.getValueType()));
				1209
				1210	if (InFlag.Val)
				1211	Ops.push_back(InFlag);
				1212	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1213	InFlag = Chain.getValue(1);
				1214
				1215	SDOperand ResultVals[3];
				1216	unsigned NumResults = 0;
				1217	NodeTys.clear();
				1218
				1219	// If the call has results, copy the values out of the ret val registers.
				1220	switch (Op.Val->getValueType(0)) {
				1221	default: assert(0 && "Unexpected ret value!");
				1222	case MVT::Other: break;
				1223	case MVT::i32:
				1224	if (Op.Val->getValueType(1) == MVT::i32) {
				1225	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1226	ResultVals[0] = Chain.getValue(0);
				1227	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1228	Chain.getValue(2)).getValue(1);
				1229	ResultVals[1] = Chain.getValue(0);
				1230	NumResults = 2;
				1231	NodeTys.push_back(MVT::i32);
				1232	} else {
				1233	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1234	ResultVals[0] = Chain.getValue(0);
				1235	NumResults = 1;
				1236	}
				1237	NodeTys.push_back(MVT::i32);
				1238	break;
				1239	case MVT::i64:
				1240	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1241	ResultVals[0] = Chain.getValue(0);
				1242	NumResults = 1;
				1243	NodeTys.push_back(MVT::i64);
				1244	break;
				1245	case MVT::f32:
				1246	case MVT::f64:
				1247	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1248	InFlag).getValue(1);
				1249	ResultVals[0] = Chain.getValue(0);
				1250	NumResults = 1;
				1251	NodeTys.push_back(Op.Val->getValueType(0));
				1252	break;
				1253	case MVT::v2f64:
				1254	case MVT::v4f32:
				1255	case MVT::v4i32:
				1256	case MVT::v8i16:
				1257	case MVT::v16i8:
				1258	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1259	InFlag).getValue(1);
				1260	ResultVals[0] = Chain.getValue(0);
				1261	NumResults = 1;
				1262	NodeTys.push_back(Op.Val->getValueType(0));
				1263	break;
				1264	}
				1265
				1266	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1267	DAG.getConstant(NumStackBytes, PtrVT));
				1268	NodeTys.push_back(MVT::Other);
				1269
				1270	// If the function returns void, just return the chain.
				1271	if (NumResults == 0)
				1272	return Chain;
				1273
				1274	// Otherwise, merge everything together with a MERGE_VALUES node.
				1275	ResultVals[NumResults++] = Chain;
				1276	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1277	ResultVals, NumResults);
				1278	return Res.getValue(Op.ResNo);
				1279	}
				1280
				1281	static SDOperand
				1282	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1283	SmallVector<CCValAssign, 16> RVLocs;
				1284	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1285	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1286	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1287	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1288
				1289	// If this is the first return lowered for this function, add the regs to the
				1290	// liveout set for the function.
				1291	if (DAG.getMachineFunction().liveout_empty()) {
				1292	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1293	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1294	}
				1295
				1296	SDOperand Chain = Op.getOperand(0);
				1297	SDOperand Flag;
				1298
				1299	// Copy the result values into the output registers.
				1300	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1301	CCValAssign &VA = RVLocs[i];
				1302	assert(VA.isRegLoc() && "Can only return in registers!");
				1303	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1304	Flag = Chain.getValue(1);
				1305	}
				1306
				1307	if (Flag.Val)
				1308	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1309	else
				1310	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1311	}
				1312
				1313
				1314	//===----------------------------------------------------------------------===//
				1315	// Vector related lowering:
				1316	//===----------------------------------------------------------------------===//
				1317
				1318	static ConstantSDNode *
				1319	getVecImm(SDNode *N) {
				1320	SDOperand OpVal(0, 0);
				1321
				1322	// Check to see if this buildvec has a single non-undef value in its elements.
				1323	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1324	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1325	if (OpVal.Val == 0)
				1326	OpVal = N->getOperand(i);
				1327	else if (OpVal != N->getOperand(i))
				1328	return 0;
				1329	}
				1330
				1331	if (OpVal.Val != 0) {
				1332	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1333	return CN;
				1334	}
				1335	}
				1336
				1337	return 0; // All UNDEF: use implicit def.; not Constant node
				1338	}
				1339
				1340	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1341	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1342	/// constant
				1343	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1344	MVT::ValueType ValueType) {
				1345	if (ConstantSDNode *CN = getVecImm(N)) {
				1346	uint64_t Value = CN->getValue();
				1347	if (Value <= 0x3ffff)
				1348	return DAG.getConstant(Value, ValueType);
				1349	}
				1350
				1351	return SDOperand();
				1352	}
				1353
				1354	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1355	/// and the value fits into a signed 16-bit constant, and if so, return the
				1356	/// constant
				1357	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1358	MVT::ValueType ValueType) {
				1359	if (ConstantSDNode *CN = getVecImm(N)) {
				1360	if (ValueType == MVT::i32) {
				1361	int Value = (int) CN->getValue();
				1362	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1363
				1364	if (Value == SExtValue)
				1365	return DAG.getConstant(Value, ValueType);
				1366	} else if (ValueType == MVT::i16) {
				1367	short Value = (short) CN->getValue();
				1368	int SExtValue = ((int) Value << 16) >> 16;
				1369
				1370	if (Value == (short) SExtValue)
				1371	return DAG.getConstant(Value, ValueType);
				1372	} else if (ValueType == MVT::i64) {
				1373	int64_t Value = CN->getValue();
				1374	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1375
				1376	if (Value == SExtValue)
				1377	return DAG.getConstant(Value, ValueType);
				1378	}
				1379	}
				1380
				1381	return SDOperand();
				1382	}
				1383
				1384	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1385	/// and the value fits into a signed 10-bit constant, and if so, return the
				1386	/// constant
				1387	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1388	MVT::ValueType ValueType) {
				1389	if (ConstantSDNode *CN = getVecImm(N)) {
				1390	int Value = (int) CN->getValue();
				1391	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1392	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1393	return DAG.getConstant(Value, ValueType);
				1394	}
				1395
				1396	return SDOperand();
				1397	}
				1398
				1399	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1400	/// and the value fits into a signed 8-bit constant, and if so, return the
				1401	/// constant.
				1402	///
				1403	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1404	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1405	/// same value.
				1406	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1407	MVT::ValueType ValueType) {
				1408	if (ConstantSDNode *CN = getVecImm(N)) {
				1409	int Value = (int) CN->getValue();
				1410	if (ValueType == MVT::i16
				1411	&& Value <= 0xffff /* truncated from uint64_t */
				1412	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1413	return DAG.getConstant(Value & 0xff, ValueType);
				1414	else if (ValueType == MVT::i8
				1415	&& (Value & 0xff) == Value)
				1416	return DAG.getConstant(Value, ValueType);
				1417	}
				1418
				1419	return SDOperand();
				1420	}
				1421
				1422	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1423	/// and the value fits into a signed 16-bit constant, and if so, return the
				1424	/// constant
				1425	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1426	MVT::ValueType ValueType) {
				1427	if (ConstantSDNode *CN = getVecImm(N)) {
				1428	uint64_t Value = CN->getValue();
				1429	if ((ValueType == MVT::i32
				1430	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1431	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1432	return DAG.getConstant(Value >> 16, ValueType);
				1433	}
				1434
				1435	return SDOperand();
				1436	}
				1437
				1438	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1439	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1440	if (ConstantSDNode *CN = getVecImm(N)) {
				1441	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1442	}
				1443
				1444	return SDOperand();
				1445	}
				1446
				1447	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1448	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1449	if (ConstantSDNode *CN = getVecImm(N)) {
				1450	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1451	}
				1452
				1453	return SDOperand();
				1454	}
				1455
				1456	// If this is a vector of constants or undefs, get the bits. A bit in
				1457	// UndefBits is set if the corresponding element of the vector is an
				1458	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1459	// zero. Return true if this is not an array of constants, false if it is.
				1460	//
				1461	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1462	uint64_t UndefBits[2]) {
				1463	// Start with zero'd results.
				1464	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1465
				1466	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1467	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1468	SDOperand OpVal = BV->getOperand(i);
				1469
				1470	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1471	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1472
				1473	uint64_t EltBits = 0;
				1474	if (OpVal.getOpcode() == ISD::UNDEF) {
				1475	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1476	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1477	continue;
				1478	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1479	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1480	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1481	const APFloat &apf = CN->getValueAPF();
				1482	EltBits = (CN->getValueType(0) == MVT::f32
				1483	? FloatToBits(apf.convertToFloat())
				1484	: DoubleToBits(apf.convertToDouble()));
				1485	} else {
				1486	// Nonconstant element.
				1487	return true;
				1488	}
				1489
				1490	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1491	}
				1492
				1493	//printf("%llx %llx %llx %llx\n",
				1494	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1495	return false;
				1496	}
				1497
				1498	/// If this is a splat (repetition) of a value across the whole vector, return
				1499	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1500	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1501	/// SplatSize = 1 byte.
				1502	static bool isConstantSplat(const uint64_t Bits128[2],
				1503	const uint64_t Undef128[2],
				1504	int MinSplatBits,
				1505	uint64_t &SplatBits, uint64_t &SplatUndef,
				1506	int &SplatSize) {
				1507	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1508	// the same as the lower 64-bits, ignoring undefs.
				1509	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1510	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1511	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1512	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1513	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1514	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1515
				1516	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1517	if (MinSplatBits < 64) {
				1518
				1519	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1520	// undefs.
				1521	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1522	if (MinSplatBits < 32) {
				1523
				1524	// If the top 16-bits are different than the lower 16-bits, ignoring
				1525	// undefs, we have an i32 splat.
				1526	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1527	if (MinSplatBits < 16) {
				1528	// If the top 8-bits are different than the lower 8-bits, ignoring
				1529	// undefs, we have an i16 splat.
				1530	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1531	// Otherwise, we have an 8-bit splat.
				1532	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1533	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1534	SplatSize = 1;
				1535	return true;
				1536	}
				1537	} else {
				1538	SplatBits = Bits16;
				1539	SplatUndef = Undef16;
				1540	SplatSize = 2;
				1541	return true;
				1542	}
				1543	}
				1544	} else {
				1545	SplatBits = Bits32;
				1546	SplatUndef = Undef32;
				1547	SplatSize = 4;
				1548	return true;
				1549	}
				1550	}
				1551	} else {
				1552	SplatBits = Bits128[0];
				1553	SplatUndef = Undef128[0];
				1554	SplatSize = 8;
				1555	return true;
				1556	}
				1557	}
				1558
				1559	return false; // Can't be a splat if two pieces don't match.
				1560	}
				1561
				1562	// If this is a case we can't handle, return null and let the default
				1563	// expansion code take care of it. If we CAN select this case, and if it
				1564	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1565	// this case more efficiently than a constant pool load, lower it to the
				1566	// sequence of ops that should be used.
				1567	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1568	MVT::ValueType VT = Op.getValueType();
				1569	// If this is a vector of constants or undefs, get the bits. A bit in
				1570	// UndefBits is set if the corresponding element of the vector is an
				1571	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1572	// zero.
				1573	uint64_t VectorBits[2];
				1574	uint64_t UndefBits[2];
				1575	uint64_t SplatBits, SplatUndef;
				1576	int SplatSize;
				1577	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1578	\|\| !isConstantSplat(VectorBits, UndefBits,
				1579	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1580	SplatBits, SplatUndef, SplatSize))
				1581	return SDOperand(); // Not a constant vector, not a splat.
				1582
				1583	switch (VT) {
				1584	default:
				1585	case MVT::v4f32: {
				1586	uint32_t Value32 = SplatBits;
				1587	assert(SplatSize == 4
				1588	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1589	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1590	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1591	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1592	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1593	break;
				1594	}
				1595	case MVT::v2f64: {
				1596	uint64_t f64val = SplatBits;
				1597	assert(SplatSize == 8
				1598	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1599	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1600	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1601	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1602	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1603	break;
				1604	}
				1605	case MVT::v16i8: {
				1606	// 8-bit constants have to be expanded to 16-bits
				1607	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1608	SDOperand Ops[8];
				1609	for (int i = 0; i < 8; ++i)
				1610	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1611	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1612	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1613	}
				1614	case MVT::v8i16: {
				1615	unsigned short Value16;
				1616	if (SplatSize == 2)
				1617	Value16 = (unsigned short) (SplatBits & 0xffff);
				1618	else
				1619	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1620	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1621	SDOperand Ops[8];
				1622	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1623	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1624	}
				1625	case MVT::v4i32: {
				1626	unsigned int Value = SplatBits;
				1627	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1628	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1629	}
				1630	case MVT::v2i64: {
				1631	uint64_t val = SplatBits;
				1632	uint32_t upper = uint32_t(val >> 32);
				1633	uint32_t lower = uint32_t(val);
				1634
				1635	if (val != 0) {
				1636	SDOperand LO32;
				1637	SDOperand HI32;
				1638	SmallVector<SDOperand, 16> ShufBytes;
				1639	SDOperand Result;
				1640	bool upper_special, lower_special;
				1641
				1642	// NOTE: This code creates common-case shuffle masks that can be easily
				1643	// detected as common expressions. It is not attempting to create highly
				1644	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1645
				1646	// Detect if the upper or lower half is a special shuffle mask pattern:
				1647	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1648	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1649
				1650	// Create lower vector if not a special pattern
				1651	if (!lower_special) {
				1652	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1653	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1654	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1655	LO32C, LO32C, LO32C, LO32C));
				1656	}
				1657
				1658	// Create upper vector if not a special pattern
				1659	if (!upper_special) {
				1660	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1661	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1662	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1663	HI32C, HI32C, HI32C, HI32C));
				1664	}
				1665
				1666	// If either upper or lower are special, then the two input operands are
				1667	// the same (basically, one of them is a "don't care")
				1668	if (lower_special)
				1669	LO32 = HI32;
				1670	if (upper_special)
				1671	HI32 = LO32;
				1672	if (lower_special && upper_special) {
				1673	// Unhappy situation... both upper and lower are special, so punt with
				1674	// a target constant:
				1675	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1676	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1677	Zero, Zero);
				1678	}
				1679
				1680	for (int i = 0; i < 4; ++i) {
				1681	for (int j = 0; j < 4; ++j) {
				1682	SDOperand V;
				1683	bool process_upper, process_lower;
				1684	uint64_t val;
				1685
				1686	process_upper = (upper_special && (i & 1) == 0);
				1687	process_lower = (lower_special && (i & 1) == 1);
				1688
				1689	if (process_upper \|\| process_lower) {
				1690	if ((process_upper && upper == 0)
				1691	\|\| (process_lower && lower == 0))
				1692	val = 0x80;
				1693	else if ((process_upper && upper == 0xffffffff)
				1694	\|\| (process_lower && lower == 0xffffffff))
				1695	val = 0xc0;
				1696	else if ((process_upper && upper == 0x80000000)
				1697	\|\| (process_lower && lower == 0x80000000))
				1698	val = (j == 0 ? 0xe0 : 0x80);
				1699	} else
				1700	val = i * 4 + j + ((i & 1) * 16);
				1701
				1702	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1703	}
				1704	}
				1705
				1706	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1707	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1708	&ShufBytes[0], ShufBytes.size()));
				1709	} else {
				1710	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1711	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1712	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1713	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1714	Zero, Zero, Zero, Zero));
				1715	}
				1716	}
				1717	}
				1718
				1719	return SDOperand();
				1720	}
				1721
				1722	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1723	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1724	/// permutation vector, V3, is monotonically increasing with one "exception"
				1725	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1726	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1727	/// In either case, the net result is going to eventually invoke SHUFB to
				1728	/// permute/shuffle the bytes from V1 and V2.
				1729	/// \note
				1730	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1731	/// control word for byte/halfword/word insertion. This takes care of a single
				1732	/// element move from V2 into V1.
				1733	/// \note
				1734	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1735	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1736	SDOperand V1 = Op.getOperand(0);
				1737	SDOperand V2 = Op.getOperand(1);
				1738	SDOperand PermMask = Op.getOperand(2);
				1739
				1740	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1741
				1742	// If we have a single element being moved from V1 to V2, this can be handled
				1743	// using the C*[DX] compute mask instructions, but the vector elements have
				1744	// to be monotonically increasing with one exception element.
				1745	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1746	unsigned EltsFromV2 = 0;
				1747	unsigned V2Elt = 0;
				1748	unsigned V2EltIdx0 = 0;
				1749	unsigned CurrElt = 0;
				1750	bool monotonic = true;
				1751	if (EltVT == MVT::i8)
				1752	V2EltIdx0 = 16;
				1753	else if (EltVT == MVT::i16)
				1754	V2EltIdx0 = 8;
				1755	else if (EltVT == MVT::i32)
				1756	V2EltIdx0 = 4;
				1757	else
				1758	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1759
				1760	for (unsigned i = 0, e = PermMask.getNumOperands();
				1761	EltsFromV2 <= 1 && monotonic && i != e;
				1762	++i) {
				1763	unsigned SrcElt;
				1764	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1765	SrcElt = 0;
				1766	else
				1767	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1768
				1769	if (SrcElt >= V2EltIdx0) {
				1770	++EltsFromV2;
				1771	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1772	} else if (CurrElt != SrcElt) {
				1773	monotonic = false;
				1774	}
				1775
				1776	++CurrElt;
				1777	}
				1778
				1779	if (EltsFromV2 == 1 && monotonic) {
				1780	// Compute mask and shuffle
				1781	MachineFunction &MF = DAG.getMachineFunction();
				1782	SSARegMap *RegMap = MF.getSSARegMap();
				1783	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1784	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1785	// Initialize temporary register to 0
				1786	SDOperand InitTempReg =
				1787	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1788	// Copy register's contents as index in INSERT_MASK:
				1789	SDOperand ShufMaskOp =
				1790	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1791	DAG.getTargetConstant(V2Elt, MVT::i32),
				1792	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1793	// Use shuffle mask in SHUFB synthetic instruction:
				1794	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1795	} else {
				1796	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1797	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1798
				1799	SmallVector<SDOperand, 16> ResultMask;
				1800	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1801	unsigned SrcElt;
				1802	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1803	SrcElt = 0;
				1804	else
				1805	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1806
				1807	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1808	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1809	MVT::i8));
				1810	}
				1811	}
				1812
				1813	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1814	&ResultMask[0], ResultMask.size());
				1815	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1816	}
				1817	}
				1818
				1819	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1820	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1821
				1822	if (Op0.Val->getOpcode() == ISD::Constant) {
				1823	// For a constant, build the appropriate constant vector, which will
				1824	// eventually simplify to a vector register load.
				1825
				1826	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1827	SmallVector<SDOperand, 16> ConstVecValues;
				1828	MVT::ValueType VT;
				1829	size_t n_copies;
				1830
				1831	// Create a constant vector:
				1832	switch (Op.getValueType()) {
				1833	default: assert(0 && "Unexpected constant value type in "
				1834	"LowerSCALAR_TO_VECTOR");
				1835	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1836	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1837	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1838	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1839	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1840	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1841	}
				1842
				1843	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1844	for (size_t j = 0; j < n_copies; ++j)
				1845	ConstVecValues.push_back(CValue);
				1846
				1847	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1848	&ConstVecValues[0], ConstVecValues.size());
				1849	} else {
				1850	// Otherwise, copy the value from one register to another:
				1851	switch (Op0.getValueType()) {
				1852	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1853	case MVT::i8:
				1854	case MVT::i16:
				1855	case MVT::i32:
				1856	case MVT::i64:
				1857	case MVT::f32:
				1858	case MVT::f64:
				1859	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1860	}
				1861	}
				1862
				1863	return SDOperand();
				1864	}
				1865
				1866	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1867	switch (Op.getValueType()) {
				1868	case MVT::v4i32: {
				1869	SDOperand rA = Op.getOperand(0);
				1870	SDOperand rB = Op.getOperand(1);
				1871	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1872	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1873	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1874	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1875
				1876	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1877	break;
				1878	}
				1879
				1880	// Multiply two v8i16 vectors (pipeline friendly version):
				1881	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1882	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1883	// c) Use SELB to select upper and lower halves from the intermediate results
				1884	//
				1885	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1886	// dual-issue. This code does manage to do this, even if it's a little on
				1887	// the wacky side
				1888	case MVT::v8i16: {
				1889	MachineFunction &MF = DAG.getMachineFunction();
				1890	SSARegMap *RegMap = MF.getSSARegMap();
				1891	SDOperand Chain = Op.getOperand(0);
				1892	SDOperand rA = Op.getOperand(0);
				1893	SDOperand rB = Op.getOperand(1);
				1894	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1895	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1896
				1897	SDOperand FSMBOp =
				1898	DAG.getCopyToReg(Chain, FSMBIreg,
				1899	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1900	DAG.getConstant(0xcccc, MVT::i32)));
				1901
				1902	SDOperand HHProd =
				1903	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1904	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1905
				1906	SDOperand HHProd_v4i32 =
				1907	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1908	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1909
				1910	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1911	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1912	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1913	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1914	HHProd_v4i32,
				1915	DAG.getConstant(16, MVT::i16))),
				1916	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1917	}
				1918
				1919	// This M00sE is N@stI! (apologies to Monty Python)
				1920	//
				1921	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1922	// is to break it all apart, sign extend, and reassemble the various
				1923	// intermediate products.
				1924	case MVT::v16i8: {
				1925	MachineFunction &MF = DAG.getMachineFunction();
				1926	SSARegMap *RegMap = MF.getSSARegMap();
				1927	SDOperand Chain = Op.getOperand(0);
				1928	SDOperand rA = Op.getOperand(0);
				1929	SDOperand rB = Op.getOperand(1);
				1930	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1931	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1932
				1933	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1934	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1935	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1936
				1937	SDOperand LLProd =
				1938	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1939	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1940	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1941
				1942	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1943
				1944	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1945
				1946	SDOperand LHProd =
				1947	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1948	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1949
				1950	SDOperand FSMBdef_2222 =
				1951	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1952	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1953	DAG.getConstant(0x2222, MVT::i32)));
				1954
				1955	SDOperand FSMBuse_2222 =
				1956	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1957
				1958	SDOperand LoProd_1 =
				1959	DAG.getCopyToReg(Chain, LoProd_reg,
				1960	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1961	FSMBuse_2222));
				1962
				1963	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1964
				1965	SDOperand LoProd =
				1966	DAG.getNode(ISD::AND, MVT::v4i32,
				1967	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1968	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1969	LoProdMask, LoProdMask,
				1970	LoProdMask, LoProdMask));
				1971
				1972	SDOperand rAH =
				1973	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1974	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1975
				1976	SDOperand rBH =
				1977	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1978	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1979
				1980	SDOperand HLProd =
				1981	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1982	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1983	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1984
				1985	SDOperand HHProd_1 =
				1986	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1987	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1988	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1989	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1990	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1991
				1992	SDOperand HHProd =
				1993	DAG.getCopyToReg(Chain, HiProd_reg,
				1994	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1995	HLProd,
				1996	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				1997	FSMBuse_2222));
				1998
				1999	SDOperand HiProd =
				2000	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2001	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2002
				2003	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2004	DAG.getNode(ISD::OR, MVT::v4i32,
				2005	LoProd, HiProd));
				2006	}
				2007
				2008	default:
				2009	cerr << "CellSPU: Unknown vector multiplication, got "
				2010	<< MVT::getValueTypeString(Op.getValueType())
				2011	<< "\n";
				2012	abort();
				2013	/NOTREACHED/
				2014	}
				2015
				2016	return SDOperand();
				2017	}
				2018
				2019	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2020	MachineFunction &MF = DAG.getMachineFunction();
				2021	SSARegMap *RegMap = MF.getSSARegMap();
				2022
				2023	SDOperand A = Op.getOperand(0);
				2024	SDOperand B = Op.getOperand(1);
				2025	unsigned VT = Op.getValueType();
				2026
				2027	unsigned VRegBR, VRegC;
				2028
				2029	if (VT == MVT::f32) {
				2030	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2031	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2032	} else {
				2033	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2034	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2035	}
				2036	// TODO: make sure we're feeding FPInterp the right arguments
				2037	// Right now: fi B, frest(B)
				2038
				2039	// Computes BRcpl =
				2040	// (Floating Interpolate (FP Reciprocal Estimate B))
				2041	SDOperand BRcpl =
				2042	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2043	DAG.getNode(SPUISD::FPInterp, VT, B,
				2044	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2045
				2046	// Computes A * BRcpl and stores in a temporary register
				2047	SDOperand AxBRcpl =
				2048	DAG.getCopyToReg(BRcpl, VRegC,
				2049	DAG.getNode(ISD::FMUL, VT, A,
				2050	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2051	// What's the Chain variable do? It's magic!
				2052	// TODO: set Chain = Op(0).getEntryNode()
				2053
				2054	return DAG.getNode(ISD::FADD, VT,
				2055	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2056	DAG.getNode(ISD::FMUL, VT,
				2057	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2058	DAG.getNode(ISD::FSUB, VT, A,
				2059	DAG.getNode(ISD::FMUL, VT, B,
				2060	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2061	}
				2062
				2063	// Expands double-precision FDIV
				2064	// Expects two doubles as inputs X and Y, does a floating point
				2065	// reciprocal estimate, and three iterations of Newton-Raphson
				2066	// to increase accuracy.
				2067	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2068	// MachineFunction &MF = DAG.getMachineFunction();
				2069	// SSARegMap *RegMap = MF.getSSARegMap();
				2070	//
				2071	// SDOperand X = Op.getOperand(0);
				2072	// SDOperand Y = Op.getOperand(1);
				2073	//}
				2074
				2075	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2076	unsigned VT = Op.getValueType();
				2077	SDOperand N = Op.getOperand(0);
				2078	SDOperand Elt = Op.getOperand(1);
				2079	SDOperand ShufMask[16];
				2080	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2081
				2082	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2083
				2084	int EltNo = (int) C->getValue();
				2085
				2086	// sanity checks:
				2087	if (VT == MVT::i8 && EltNo >= 16)
				2088	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2089	else if (VT == MVT::i16 && EltNo >= 8)
				2090	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2091	else if (VT == MVT::i32 && EltNo >= 4)
				2092	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2093	else if (VT == MVT::i64 && EltNo >= 2)
				2094	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2095
				2096	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2097	// i32 and i64: Element 0 is the preferred slot
				2098	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2099	}
				2100
				2101	// Need to generate shuffle mask and extract:
				2102	int prefslot_begin, prefslot_end;
				2103	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2104
				2105	switch (VT) {
				2106	case MVT::i8: {
				2107	prefslot_begin = prefslot_end = 3;
				2108	break;
				2109	}
				2110	case MVT::i16: {
				2111	prefslot_begin = 2; prefslot_end = 3;
				2112	break;
				2113	}
				2114	case MVT::i32: {
				2115	prefslot_begin = 0; prefslot_end = 3;
				2116	break;
				2117	}
				2118	case MVT::i64: {
				2119	prefslot_begin = 0; prefslot_end = 7;
				2120	break;
				2121	}
				2122	}
				2123
				2124	for (int i = 0; i < 16; ++i) {
				2125	// zero fill uppper part of preferred slot, don't care about the
				2126	// other slots:
				2127	unsigned int mask_val;
				2128
				2129	if (i <= prefslot_end) {
				2130	mask_val =
				2131	((i < prefslot_begin)
				2132	? 0x80
				2133	: elt_byte + (i - prefslot_begin));
				2134
				2135	ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
				2136	} else
				2137	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2138	}
				2139
				2140	SDOperand ShufMaskVec =
				2141	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2142	&ShufMask[0],
				2143	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2144
				2145	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2146	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2147	N, N, ShufMaskVec));
				2148
				2149	}
				2150
				2151	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2152	SDOperand VecOp = Op.getOperand(0);
				2153	SDOperand ValOp = Op.getOperand(1);
				2154	SDOperand IdxOp = Op.getOperand(2);
				2155	MVT::ValueType VT = Op.getValueType();
				2156
				2157	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2158	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2159
				2160	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2161	// Use $2 because it's always 16-byte aligned and it's available:
				2162	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2163
				2164	SDOperand result =
				2165	DAG.getNode(SPUISD::SHUFB, VT,
				2166	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2167	VecOp,
				2168	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2169	DAG.getNode(ISD::ADD, PtrVT,
				2170	PtrBase,
				2171	DAG.getConstant(CN->getValue(),
				2172	PtrVT))));
				2173
				2174	return result;
				2175	}
				2176
				2177	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2178	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2179
				2180	assert(Op.getValueType() == MVT::i8);
				2181	switch (Opc) {
				2182	default:
				2183	assert(0 && "Unhandled i8 math operator");
				2184	/NOTREACHED/
				2185	break;
				2186	case ISD::SUB: {
				2187	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2188	// the result:
				2189	SDOperand N1 = Op.getOperand(1);
				2190	N0 = (N0.getOpcode() != ISD::Constant
				2191	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2192	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2193	N1 = (N1.getOpcode() != ISD::Constant
				2194	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2195	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2196	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2197	DAG.getNode(Opc, MVT::i16, N0, N1));
				2198	}
				2199	case ISD::ROTR:
				2200	case ISD::ROTL: {
				2201	SDOperand N1 = Op.getOperand(1);
				2202	unsigned N1Opc;
				2203	N0 = (N0.getOpcode() != ISD::Constant
				2204	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2205	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2206	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2207	N1 = (N1.getOpcode() != ISD::Constant
				2208	? DAG.getNode(N1Opc, MVT::i16, N1)
				2209	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2210	SDOperand ExpandArg =
				2211	DAG.getNode(ISD::OR, MVT::i16, N0,
				2212	DAG.getNode(ISD::SHL, MVT::i16,
				2213	N0, DAG.getConstant(8, MVT::i16)));
				2214	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2215	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2216	}
				2217	case ISD::SRL:
				2218	case ISD::SHL: {
				2219	SDOperand N1 = Op.getOperand(1);
				2220	unsigned N1Opc;
				2221	N0 = (N0.getOpcode() != ISD::Constant
				2222	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2223	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2224	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2225	N1 = (N1.getOpcode() != ISD::Constant
				2226	? DAG.getNode(N1Opc, MVT::i16, N1)
				2227	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2228	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2229	DAG.getNode(Opc, MVT::i16, N0, N1));
				2230	}
				2231	case ISD::SRA: {
				2232	SDOperand N1 = Op.getOperand(1);
				2233	unsigned N1Opc;
				2234	N0 = (N0.getOpcode() != ISD::Constant
				2235	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2236	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2237	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2238	N1 = (N1.getOpcode() != ISD::Constant
				2239	? DAG.getNode(N1Opc, MVT::i16, N1)
				2240	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2241	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2242	DAG.getNode(Opc, MVT::i16, N0, N1));
				2243	}
				2244	case ISD::MUL: {
				2245	SDOperand N1 = Op.getOperand(1);
				2246	unsigned N1Opc;
				2247	N0 = (N0.getOpcode() != ISD::Constant
				2248	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2249	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2250	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2251	N1 = (N1.getOpcode() != ISD::Constant
				2252	? DAG.getNode(N1Opc, MVT::i16, N1)
				2253	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2254	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2255	DAG.getNode(Opc, MVT::i16, N0, N1));
				2256	break;
				2257	}
				2258	}
				2259
				2260	return SDOperand();
				2261	}
				2262
				2263	//! Lower byte immediate operations for v16i8 vectors:
				2264	static SDOperand
				2265	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2266	SDOperand ConstVec;
				2267	SDOperand Arg;
				2268	MVT::ValueType VT = Op.getValueType();
				2269
				2270	ConstVec = Op.getOperand(0);
				2271	Arg = Op.getOperand(1);
				2272	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2273	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2274	ConstVec = ConstVec.getOperand(0);
				2275	} else {
				2276	ConstVec = Op.getOperand(1);
				2277	Arg = Op.getOperand(0);
				2278	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2279	ConstVec = ConstVec.getOperand(0);
				2280	}
				2281	}
				2282	}
				2283
				2284	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2285	uint64_t VectorBits[2];
				2286	uint64_t UndefBits[2];
				2287	uint64_t SplatBits, SplatUndef;
				2288	int SplatSize;
				2289
				2290	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2291	&& isConstantSplat(VectorBits, UndefBits,
				2292	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2293	SplatBits, SplatUndef, SplatSize)) {
				2294	SDOperand tcVec[16];
				2295	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2296	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2297
				2298	// Turn the BUILD_VECTOR into a set of target constants:
				2299	for (size_t i = 0; i < tcVecSize; ++i)
				2300	tcVec[i] = tc;
				2301
				2302	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2303	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2304	}
				2305	}
				2306
				2307	return SDOperand();
				2308	}
				2309
				2310	//! Lower i32 multiplication
				2311	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2312	unsigned Opc) {
				2313	switch (VT) {
				2314	default:
				2315	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2316	<< MVT::getValueTypeString(Op.getValueType())
				2317	<< "\n";
				2318	abort();
				2319	/NOTREACHED/
				2320
				2321	case MVT::i32: {
				2322	SDOperand rA = Op.getOperand(0);
				2323	SDOperand rB = Op.getOperand(1);
				2324
				2325	return DAG.getNode(ISD::ADD, MVT::i32,
				2326	DAG.getNode(ISD::ADD, MVT::i32,
				2327	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2328	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2329	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2330	}
				2331	}
				2332
				2333	return SDOperand();
				2334	}
				2335
				2336	//! Custom lowering for CTPOP (count population)
				2337	/*!
				2338	Custom lowering code that counts the number ones in the input
				2339	operand. SPU has such an instruction, but it counts the number of
				2340	ones per byte, which then have to be accumulated.
				2341	*/
				2342	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2343	unsigned VT = Op.getValueType();
				2344	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2345
				2346	switch (VT) {
				2347	case MVT::i8: {
				2348	SDOperand N = Op.getOperand(0);
				2349	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2350
				2351	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2352	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2353
				2354	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2355	}
				2356
				2357	case MVT::i16: {
				2358	MachineFunction &MF = DAG.getMachineFunction();
				2359	SSARegMap *RegMap = MF.getSSARegMap();
				2360
				2361	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2362
				2363	SDOperand N = Op.getOperand(0);
				2364	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2365	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2366	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2367
				2368	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2369	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2370
				2371	// CNTB_result becomes the chain to which all of the virtual registers
				2372	// CNTB_reg, SUM1_reg become associated:
				2373	SDOperand CNTB_result =
				2374	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2375
				2376	SDOperand CNTB_rescopy =
				2377	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2378
				2379	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2380
				2381	return DAG.getNode(ISD::AND, MVT::i16,
				2382	DAG.getNode(ISD::ADD, MVT::i16,
				2383	DAG.getNode(ISD::SRL, MVT::i16,
				2384	Tmp1, Shift1),
				2385	Tmp1),
				2386	Mask0);
				2387	}
				2388
				2389	case MVT::i32: {
				2390	MachineFunction &MF = DAG.getMachineFunction();
				2391	SSARegMap *RegMap = MF.getSSARegMap();
				2392
				2393	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2394	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2395
				2396	SDOperand N = Op.getOperand(0);
				2397	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2398	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2399	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2400	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2401
				2402	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2403	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2404
				2405	// CNTB_result becomes the chain to which all of the virtual registers
				2406	// CNTB_reg, SUM1_reg become associated:
				2407	SDOperand CNTB_result =
				2408	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2409
				2410	SDOperand CNTB_rescopy =
				2411	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2412
				2413	SDOperand Comp1 =
				2414	DAG.getNode(ISD::SRL, MVT::i32,
				2415	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2416
				2417	SDOperand Sum1 =
				2418	DAG.getNode(ISD::ADD, MVT::i32,
				2419	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2420
				2421	SDOperand Sum1_rescopy =
				2422	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2423
				2424	SDOperand Comp2 =
				2425	DAG.getNode(ISD::SRL, MVT::i32,
				2426	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2427	Shift2);
				2428	SDOperand Sum2 =
				2429	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2430	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2431
				2432	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2433	}
				2434
				2435	case MVT::i64:
				2436	break;
				2437	}
				2438
				2439	return SDOperand();
				2440	}
				2441
				2442	/// LowerOperation - Provide custom lowering hooks for some operations.
				2443	///
				2444	SDOperand
				2445	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2446	{
				2447	switch (Op.getOpcode()) {
				2448	default: {
				2449	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2450	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2451	cerr << "*Op.Val:\n";
				2452	Op.Val->dump();
				2453	abort();
				2454	}
				2455	case ISD::LOAD:
				2456	case ISD::SEXTLOAD:
				2457	case ISD::ZEXTLOAD:
				2458	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2459	case ISD::STORE:
				2460	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2461	case ISD::ConstantPool:
				2462	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2463	case ISD::GlobalAddress:
				2464	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2465	case ISD::JumpTable:
				2466	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2467	case ISD::Constant:
				2468	return LowerConstant(Op, DAG);
				2469	case ISD::ConstantFP:
				2470	return LowerConstantFP(Op, DAG);
				2471	case ISD::FORMAL_ARGUMENTS:
				2472	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2473	case ISD::CALL:
				2474	return LowerCALL(Op, DAG);
				2475	case ISD::RET:
				2476	return LowerRET(Op, DAG, getTargetMachine());
				2477
				2478	// i8 math ops:
				2479	case ISD::SUB:
				2480	case ISD::ROTR:
				2481	case ISD::ROTL:
				2482	case ISD::SRL:
				2483	case ISD::SHL:
				2484	case ISD::SRA:
				2485	return LowerI8Math(Op, DAG, Op.getOpcode());
				2486
				2487	// Vector-related lowering.
				2488	case ISD::BUILD_VECTOR:
				2489	return LowerBUILD_VECTOR(Op, DAG);
				2490	case ISD::SCALAR_TO_VECTOR:
				2491	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2492	case ISD::VECTOR_SHUFFLE:
				2493	return LowerVECTOR_SHUFFLE(Op, DAG);
				2494	case ISD::EXTRACT_VECTOR_ELT:
				2495	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2496	case ISD::INSERT_VECTOR_ELT:
				2497	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2498
				2499	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2500	case ISD::AND:
				2501	case ISD::OR:
				2502	case ISD::XOR:
				2503	return LowerByteImmed(Op, DAG);
				2504
				2505	// Vector and i8 multiply:
				2506	case ISD::MUL:
				2507	if (MVT::isVector(Op.getValueType()))
				2508	return LowerVectorMUL(Op, DAG);
				2509	else if (Op.getValueType() == MVT::i8)
				2510	return LowerI8Math(Op, DAG, Op.getOpcode());
				2511	else
				2512	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2513
				2514	case ISD::FDIV:
				2515	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2516	return LowerFDIVf32(Op, DAG);
				2517	// else if (Op.getValueType() == MVT::f64)
				2518	// return LowerFDIVf64(Op, DAG);
				2519	else
				2520	assert(0 && "Calling FDIV on unsupported MVT");
				2521
				2522	case ISD::CTPOP:
				2523	return LowerCTPOP(Op, DAG);
				2524	}
				2525
				2526	return SDOperand();
				2527	}
				2528
				2529	//===----------------------------------------------------------------------===//
				2530	// Other Lowering Code
				2531	//===----------------------------------------------------------------------===//
				2532
				2533	MachineBasicBlock *
				2534	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2535	MachineBasicBlock *BB)
				2536	{
				2537	return BB;
				2538	}
				2539
				2540	//===----------------------------------------------------------------------===//
				2541	// Target Optimization Hooks
				2542	//===----------------------------------------------------------------------===//
				2543
				2544	SDOperand
				2545	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2546	{
				2547	#if 0
				2548	TargetMachine &TM = getTargetMachine();
				2549	SelectionDAG &DAG = DCI.DAG;
				2550	#endif
				2551	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2552
				2553	switch (N->getOpcode()) {
				2554	default: break;
				2555
				2556	// Look for obvious optimizations for shift left:
				2557	// a) Replace 0 << V with 0
				2558	// b) Replace V << 0 with V
				2559	//
				2560	// N.B: llvm will generate an undef node if the shift amount is greater than
				2561	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2562	case SPU::SHLIr32:
				2563	case SPU::SHLHIr16:
				2564	case SPU::SHLQBIIvec:
				2565	case SPU::ROTHIr16:
				2566	case SPU::ROTHIr16_i32:
				2567	case SPU::ROTIr32:
				2568	case SPU::ROTIr32_i16:
				2569	case SPU::ROTQBYIvec:
				2570	case SPU::ROTQBYBIvec:
				2571	case SPU::ROTQBIIvec:
				2572	case SPU::ROTHMIr16:
				2573	case SPU::ROTMIr32:
				2574	case SPU::ROTQMBYIvec: {
				2575	if (N0.getOpcode() == ISD::Constant) {
				2576	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2577	if (C->getValue() == 0) // 0 << V -> 0.
				2578	return N0;
				2579	}
				2580	}
				2581	SDOperand N1 = N->getOperand(1);
				2582	if (N1.getOpcode() == ISD::Constant) {
				2583	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2584	if (C->getValue() == 0) // V << 0 -> V
				2585	return N1;
				2586	}
				2587	}
				2588	break;
				2589	}
				2590	}
				2591
				2592	return SDOperand();
				2593	}
				2594
				2595	//===----------------------------------------------------------------------===//
				2596	// Inline Assembly Support
				2597	//===----------------------------------------------------------------------===//
				2598
				2599	/// getConstraintType - Given a constraint letter, return the type of
				2600	/// constraint it is for this target.
				2601	SPUTargetLowering::ConstraintType
				2602	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2603	if (ConstraintLetter.size() == 1) {
				2604	switch (ConstraintLetter[0]) {
				2605	default: break;
				2606	case 'b':
				2607	case 'r':
				2608	case 'f':
				2609	case 'v':
				2610	case 'y':
				2611	return C_RegisterClass;
				2612	}
				2613	}
				2614	return TargetLowering::getConstraintType(ConstraintLetter);
				2615	}
				2616
				2617	std::pair<unsigned, const TargetRegisterClass*>
				2618	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2619	MVT::ValueType VT) const
				2620	{
				2621	if (Constraint.size() == 1) {
				2622	// GCC RS6000 Constraint Letters
				2623	switch (Constraint[0]) {
				2624	case 'b': // R1-R31
				2625	case 'r': // R0-R31
				2626	if (VT == MVT::i64)
				2627	return std::make_pair(0U, SPU::R64CRegisterClass);
				2628	return std::make_pair(0U, SPU::R32CRegisterClass);
				2629	case 'f':
				2630	if (VT == MVT::f32)
				2631	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2632	else if (VT == MVT::f64)
				2633	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2634	break;
				2635	case 'v':
				2636	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2637	}
				2638	}
				2639
				2640	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2641	}
				2642
				2643	void
				2644	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2645	uint64_t Mask,
				2646	uint64_t &KnownZero,
				2647	uint64_t &KnownOne,
				2648	const SelectionDAG &DAG,
				2649	unsigned Depth ) const {
				2650	KnownZero = 0;
				2651	KnownOne = 0;
				2652	}
				2653
				2654	// LowerAsmOperandForConstraint
				2655	void
				2656	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2657	char ConstraintLetter,
				2658	std::vector<SDOperand> &Ops,
				2659	SelectionDAG &DAG) {
				2660	// Default, for the time being, to the base class handler
				2661	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2662	}
				2663
				2664	/// isLegalAddressImmediate - Return true if the integer value can be used
				2665	/// as the offset of the target addressing mode.
				2666	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2667	// SPU's addresses are 256K:
				2668	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2669	}
				2670
				2671	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2672	return false;
				2673	}