Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 6b6af64baf6b1f6ecaed849920442924e2612c26 [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
Scott Michel	2466c37	2007-12-05 01:40:25 +0000	[diff] [blame]	6	// Department at The Aerospace Corporation and is distributed under the
				7	// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	8	//
				9	//===----------------------------------------------------------------------===//
				10	//
				11	// This file implements the SPUTargetLowering class.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SPURegisterNames.h"
				16	#include "SPUISelLowering.h"
				17	#include "SPUTargetMachine.h"
				18	#include "llvm/ADT/VectorExtras.h"
				19	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/SelectionDAG.h"
				25	#include "llvm/CodeGen/SSARegMap.h"
				26	#include "llvm/Constants.h"
				27	#include "llvm/Function.h"
				28	#include "llvm/Intrinsics.h"
				29	#include "llvm/Support/Debug.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	#include <map>
				34
				35	using namespace llvm;
				36
				37	// Used in getTargetNodeName() below
				38	namespace {
				39	std::map<unsigned, const char *> node_names;
				40
				41	//! MVT::ValueType mapping to useful data for Cell SPU
				42	struct valtype_map_s {
				43	const MVT::ValueType valtype;
				44	const int prefslot_byte;
				45	};
				46
				47	const valtype_map_s valtype_map[] = {
				48	{ MVT::i1, 3 },
				49	{ MVT::i8, 3 },
				50	{ MVT::i16, 2 },
				51	{ MVT::i32, 0 },
				52	{ MVT::f32, 0 },
				53	{ MVT::i64, 0 },
				54	{ MVT::f64, 0 },
				55	{ MVT::i128, 0 }
				56	};
				57
				58	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				59
				60	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				61	const valtype_map_s *retval = 0;
				62
				63	for (size_t i = 0; i < n_valtype_map; ++i) {
				64	if (valtype_map[i].valtype == VT) {
				65	retval = valtype_map + i;
				66	break;
				67	}
				68	}
				69
				70	#ifndef NDEBUG
				71	if (retval == 0) {
				72	cerr << "getValueTypeMapEntry returns NULL for "
				73	<< MVT::getValueTypeString(VT)
				74	<< "\n";
				75	abort();
				76	}
				77	#endif
				78
				79	return retval;
				80	}
				81
				82	//! Predicate that returns true if operand is a memory target
				83	/*!
				84	\arg Op Operand to test
				85	\return true if the operand is a memory target (i.e., global
				86	address, external symbol, constant pool) or an existing D-Form
				87	address.
				88	*/
				89	bool isMemoryOperand(const SDOperand &Op)
				90	{
				91	const unsigned Opc = Op.getOpcode();
				92	return (Opc == ISD::GlobalAddress
				93	\|\| Opc == ISD::GlobalTLSAddress
				94	\|\| Opc == ISD::FrameIndex
				95	\|\| Opc == ISD::JumpTable
				96	\|\| Opc == ISD::ConstantPool
				97	\|\| Opc == ISD::ExternalSymbol
				98	\|\| Opc == ISD::TargetGlobalAddress
				99	\|\| Opc == ISD::TargetGlobalTLSAddress
				100	\|\| Opc == ISD::TargetFrameIndex
				101	\|\| Opc == ISD::TargetJumpTable
				102	\|\| Opc == ISD::TargetConstantPool
				103	\|\| Opc == ISD::TargetExternalSymbol
				104	\|\| Opc == SPUISD::DFormAddr);
				105	}
				106	}
				107
				108	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				109	: TargetLowering(TM),
				110	SPUTM(TM)
				111	{
				112	// Fold away setcc operations if possible.
				113	setPow2DivIsCheap();
				114
				115	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				116	setUseUnderscoreSetJmp(true);
				117	setUseUnderscoreLongJmp(true);
				118
				119	// Set up the SPU's register classes:
				120	// NOTE: i8 register class is not registered because we cannot determine when
				121	// we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	122	// NOTE: Ignore the previous note. For now. :-)
				123	addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
				124	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				125	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				126	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				127	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				128	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	129	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				130
				131	// SPU has no sign or zero extended loads for i1, i8, i16:
				132	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				133	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				134	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				135	setStoreXAction(MVT::i1, Custom);
				136
				137	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				138	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				139	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				140	setStoreXAction(MVT::i8, Custom);
				141
				142	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				143	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				144	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				145
				146	// SPU constant load actions are custom lowered:
				147	setOperationAction(ISD::Constant, MVT::i64, Custom);
				148	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				149	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				150
				151	// SPU's loads and stores have to be custom lowered:
				152	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				153	++sctype) {
				154	setOperationAction(ISD::LOAD, sctype, Custom);
				155	setOperationAction(ISD::STORE, sctype, Custom);
				156	}
				157
				158	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				159	// into BR_CCs. BR_CC instructions are custom selected in
				160	// SPUDAGToDAGISel.
				161	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				162
				163	// Expand the jumptable branches
				164	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				165	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				166	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				167
				168	// SPU has no intrinsics for these particular operations:
				169	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				170	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				171	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				172
				173	// PowerPC has no SREM/UREM instructions
				174	setOperationAction(ISD::SREM, MVT::i32, Expand);
				175	setOperationAction(ISD::UREM, MVT::i32, Expand);
				176	setOperationAction(ISD::SREM, MVT::i64, Expand);
				177	setOperationAction(ISD::UREM, MVT::i64, Expand);
				178
				179	// We don't support sin/cos/sqrt/fmod
				180	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				181	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				182	setOperationAction(ISD::FREM , MVT::f64, Expand);
				183	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				184	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				185	setOperationAction(ISD::FREM , MVT::f32, Expand);
				186
				187	// If we're enabling GP optimizations, use hardware square root
				188	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				189	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				190
				191	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				192	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				193
				194	// SPU can do rotate right and left, so legalize it... but customize for i8
				195	// because instructions don't exist.
				196	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				197	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				198	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				199	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				200	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				201	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				202	// SPU has no native version of shift left/right for i8
				203	setOperationAction(ISD::SHL, MVT::i8, Custom);
				204	setOperationAction(ISD::SRL, MVT::i8, Custom);
				205	setOperationAction(ISD::SRA, MVT::i8, Custom);
				206
				207	// Custom lower i32 multiplications
				208	setOperationAction(ISD::MUL, MVT::i32, Custom);
				209
				210	// Need to custom handle (some) common i8 math ops
				211	setOperationAction(ISD::SUB, MVT::i8, Custom);
				212	setOperationAction(ISD::MUL, MVT::i8, Custom);
				213
				214	// SPU does not have BSWAP. It does have i32 support CTLZ.
				215	// CTPOP has to be custom lowered.
				216	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				217	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				218
				219	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				221	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				222	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				223
				224	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				225	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				226
				227	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				228
				229	// SPU does not have select or setcc
				230	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				233	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				234	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				235	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				236	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				237
				238	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				241	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				242	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				243	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				244	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				245
				246	// SPU has a legal FP -> signed INT instruction
				247	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				248	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				250	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				251
				252	// FDIV on SPU requires custom lowering
				253	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				254	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				255
				256	// SPU has [U\|S]INT_TO_FP
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				258	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				259	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				261	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				262	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				263	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				264	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				265
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	266	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
				268	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
				269	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	270
				271	// We cannot sextinreg(i1). Expand to shifts.
				272	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				273
				274	// Support label based line numbers.
				275	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				276	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				277
				278	// We want to legalize GlobalAddress and ConstantPool nodes into the
				279	// appropriate instructions to materialize the address.
				280	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				281	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				282	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				283	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				284	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				285	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				286	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				287	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				288
				289	// RET must be custom lowered, to meet ABI requirements
				290	setOperationAction(ISD::RET, MVT::Other, Custom);
				291
				292	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				293	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				294
				295	// Use the default implementation.
				296	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				297	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				298	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				299	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				300	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				301	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				302	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				303
				304	// Cell SPU has instructions for converting between i64 and fp.
				305	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				306	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				307
				308	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				309	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				310
				311	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				312	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				313
				314	// First set operation action for all vector types to expand. Then we
				315	// will selectively turn on ones that can be effectively codegen'd.
				316	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				320	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				321	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				322
				323	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				324	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				325	// add/sub are legal for all supported vector VT's.
				326	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				327	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				328	// mul has to be custom lowered.
				329	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				330
				331	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				335	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				336	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				337
				338	// These operations need to be expanded:
				339	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				344
				345	// Custom lower build_vector, constant pool spills, insert and
				346	// extract vector elements:
				347	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				351	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				352	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				353	}
				354
				355	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				356	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				357	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				358	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				359	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				360
				361	setSetCCResultType(MVT::i32);
				362	setShiftAmountType(MVT::i32);
				363	setSetCCResultContents(ZeroOrOneSetCCResult);
				364
				365	setStackPointerRegisterToSaveRestore(SPU::R1);
				366
				367	// We have target-specific dag combine patterns for the following nodes:
				368	// e.g., setTargetDAGCombine(ISD::SUB);
				369
				370	computeRegisterProperties();
				371	}
				372
				373	const char *
				374	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				375	{
				376	if (node_names.empty()) {
				377	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				378	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				379	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				380	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				381	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				382	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				383	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				384	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				385	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				386	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				387	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				388	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				389	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				390	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				391	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				393	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				394	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				395	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				396	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				397	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				398	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				399	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				400	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				401	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				402	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				403	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				404	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				405	"SPUISD::ROTBYTES_RIGHT_Z";
				406	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				407	"SPUISD::ROTBYTES_RIGHT_S";
				408	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				409	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				410	"SPUISD::ROTBYTES_LEFT_CHAINED";
				411	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				412	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				413	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				414	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				415	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				416	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				417	}
				418
				419	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				420
				421	return ((i != node_names.end()) ? i->second : 0);
				422	}
				423
				424	//===----------------------------------------------------------------------===//
				425	// Calling convention code:
				426	//===----------------------------------------------------------------------===//
				427
				428	#include "SPUGenCallingConv.inc"
				429
				430	//===----------------------------------------------------------------------===//
				431	// LowerOperation implementation
				432	//===----------------------------------------------------------------------===//
				433
				434	/// Custom lower loads for CellSPU
				435	/*!
				436	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				437	within a 16-byte block, we have to rotate to extract the requested element.
				438	*/
				439	static SDOperand
				440	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				441	LoadSDNode *LN = cast<LoadSDNode>(Op);
				442	SDOperand basep = LN->getBasePtr();
				443	SDOperand the_chain = LN->getChain();
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	444	MVT::ValueType BasepOpc = basep.Val->getOpcode();
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	445	MVT::ValueType VT = LN->getLoadedVT();
				446	MVT::ValueType OpVT = Op.Val->getValueType(0);
				447	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				448	ISD::LoadExtType ExtType = LN->getExtensionType();
				449	unsigned alignment = LN->getAlignment();
				450	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				451	SDOperand Ops[8];
				452
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	453	if (BasepOpc == ISD::FrameIndex) {
				454	// Loading from a frame index is always properly aligned. Always.
				455	return SDOperand();
				456	}
				457
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	458	// For an extending load of an i1 variable, just call it i8 (or whatever we
				459	// were passed) and make it zero-extended:
				460	if (VT == MVT::i1) {
				461	VT = OpVT;
				462	ExtType = ISD::ZEXTLOAD;
				463	}
				464
				465	switch (LN->getAddressingMode()) {
				466	case ISD::UNINDEXED: {
				467	SDOperand result;
				468	SDOperand rot_op, rotamt;
				469	SDOperand ptrp;
				470	int c_offset;
				471	int c_rotamt;
				472
				473	// The vector type we really want to be when we load the 16-byte chunk
				474	MVT::ValueType vecVT, opVecVT;
				475
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	476	vecVT = MVT::v16i8;
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	477	if (VT != MVT::i1)
				478	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	479	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				480
				481	if (basep.getOpcode() == ISD::ADD) {
				482	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				483
				484	assert(CN != NULL
				485	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				486
				487	c_offset = (int) CN->getValue();
				488	c_rotamt = (int) (c_offset & 0xf);
				489
				490	// Adjust the rotation amount to ensure that the final result ends up in
				491	// the preferred slot:
				492	c_rotamt -= vtm->prefslot_byte;
				493	ptrp = basep.getOperand(0);
				494	} else {
				495	c_offset = 0;
				496	c_rotamt = -vtm->prefslot_byte;
				497	ptrp = basep;
				498	}
				499
				500	if (alignment == 16) {
				501	// 16-byte aligned load into preferred slot, no rotation
				502	if (c_rotamt == 0) {
				503	if (isMemoryOperand(ptrp))
				504	// Return unchanged
				505	return SDOperand();
				506	else {
				507	// Return modified D-Form address for pointer:
				508	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				509	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				510	if (VT == OpVT)
				511	return DAG.getLoad(VT, LN->getChain(), ptrp,
				512	LN->getSrcValue(), LN->getSrcValueOffset(),
				513	LN->isVolatile(), 16);
				514	else
				515	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				516	LN->getSrcValueOffset(), OpVT,
				517	LN->isVolatile(), 16);
				518	}
				519	} else {
				520	// Need to rotate...
				521	if (c_rotamt < 0)
				522	c_rotamt += 16;
				523	// Realign the base pointer, with a D-Form address
				524	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				525	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				526	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				527	else
				528	basep = ptrp;
				529
				530	// Rotate the load:
				531	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				532	LN->getSrcValue(), LN->getSrcValueOffset(),
				533	LN->isVolatile(), 16);
				534	the_chain = rot_op.getValue(1);
				535	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				536
				537	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				538	Ops[0] = the_chain;
				539	Ops[1] = rot_op;
				540	Ops[2] = rotamt;
				541
				542	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				543	the_chain = result.getValue(1);
				544
				545	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				546	SDVTList scalarvts;
				547	Ops[0] = the_chain;
				548	Ops[1] = result;
				549	if (OpVT == VT) {
				550	scalarvts = DAG.getVTList(VT, MVT::Other);
				551	} else {
				552	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				553	}
				554
				555	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				556	result);
				557	Ops[0] = the_chain;
				558	Ops[1] = result;
				559	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				560	the_chain = result.getValue(1);
				561	} else {
				562	// Handle the sign and zero-extending loads for i1 and i8:
				563	unsigned NewOpC;
				564
				565	if (ExtType == ISD::SEXTLOAD) {
				566	NewOpC = (OpVT == MVT::i1
				567	? SPUISD::EXTRACT_I1_SEXT
				568	: SPUISD::EXTRACT_I8_SEXT);
				569	} else if (ExtType == ISD::ZEXTLOAD) {
				570	NewOpC = (OpVT == MVT::i1
				571	? SPUISD::EXTRACT_I1_ZEXT
				572	: SPUISD::EXTRACT_I8_ZEXT);
				573	}
				574
				575	result = DAG.getNode(NewOpC, OpVT, result);
				576	}
				577
				578	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				579	SDOperand retops[2] = { result, the_chain };
				580
				581	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				582	return result;
				583	/UNREACHED/
				584	}
				585	} else {
				586	// Misaligned 16-byte load:
				587	if (basep.getOpcode() == ISD::LOAD) {
				588	LN = cast<LoadSDNode>(basep);
				589	if (LN->getAlignment() == 16) {
				590	// We can verify that we're really loading from a 16-byte aligned
				591	// chunk. Encapsulate basep as a D-Form address and return a new
				592	// load:
				593	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				594	DAG.getConstant(0, PtrVT));
				595	if (OpVT == VT)
				596	return DAG.getLoad(VT, LN->getChain(), basep,
				597	LN->getSrcValue(), LN->getSrcValueOffset(),
				598	LN->isVolatile(), 16);
				599	else
				600	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				601	LN->getSrcValue(), LN->getSrcValueOffset(),
				602	OpVT, LN->isVolatile(), 16);
				603	}
				604	}
				605
				606	// Catch all other cases where we can't guarantee that we have a
				607	// 16-byte aligned entity, which means resorting to an X-form
				608	// address scheme:
				609
				610	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	611	SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
				612	SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	613
				614	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				615
				616	SDOperand alignLoad =
				617	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				618	LN->getSrcValue(), LN->getSrcValueOffset(),
				619	LN->isVolatile(), 16);
				620
				621	SDOperand insertEltOp =
				622	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				623
				624	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				625	alignLoad,
				626	alignLoad,
				627	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				628
				629	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				630
				631	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				632	SDOperand retops[2] = { result, the_chain };
				633
				634	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				635	return result;
				636	}
				637	break;
				638	}
				639	case ISD::PRE_INC:
				640	case ISD::PRE_DEC:
				641	case ISD::POST_INC:
				642	case ISD::POST_DEC:
				643	case ISD::LAST_INDEXED_MODE:
				644	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				645	"UNINDEXED\n";
				646	cerr << (unsigned) LN->getAddressingMode() << "\n";
				647	abort();
				648	/NOTREACHED/
				649	}
				650
				651	return SDOperand();
				652	}
				653
				654	/// Custom lower stores for CellSPU
				655	/*!
				656	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				657	within a 16-byte block, we have to generate a shuffle to insert the
				658	requested element into its place, then store the resulting block.
				659	*/
				660	static SDOperand
				661	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				662	StoreSDNode *SN = cast<StoreSDNode>(Op);
				663	SDOperand Value = SN->getValue();
				664	MVT::ValueType VT = Value.getValueType();
				665	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				666	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				667	SDOperand the_chain = SN->getChain();
Chris Lattner	4d321c5	2007-12-05 18:32:18 +0000	[diff] [blame]	668	//unsigned alignment = SN->getAlignment();
				669	//const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	670
				671	switch (SN->getAddressingMode()) {
				672	case ISD::UNINDEXED: {
				673	SDOperand basep = SN->getBasePtr();
				674	SDOperand ptrOp;
				675	int offset;
				676
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame]	677	if (basep.getOpcode() == ISD::FrameIndex) {
				678	// FrameIndex nodes are always properly aligned. Really.
				679	return SDOperand();
				680	}
				681
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	682	if (basep.getOpcode() == ISD::ADD) {
				683	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				684	assert(CN != NULL
				685	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				686	offset = unsigned(CN->getValue());
				687	ptrOp = basep.getOperand(0);
				688	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				689	<< offset
				690	<< "\n");
				691	} else {
				692	ptrOp = basep;
				693	offset = 0;
				694	}
				695
				696	// The vector type we really want to load from the 16-byte chunk, except
				697	// in the case of MVT::i1, which has to be v16i8.
				698	unsigned vecVT, stVecVT;
				699
				700	if (StVT != MVT::i1)
				701	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				702	else
				703	stVecVT = MVT::v16i8;
				704	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				705
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame]	706	// Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
				707	// the actual dform addr offs($reg).
				708	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				709	DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	710
				711	// Create the 16-byte aligned vector load
				712	SDOperand alignLoad =
				713	DAG.getLoad(vecVT, the_chain, basep,
				714	SN->getSrcValue(), SN->getSrcValueOffset(),
				715	SN->isVolatile(), 16);
				716	the_chain = alignLoad.getValue(1);
				717
				718	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				719	SDOperand theValue = SN->getValue();
				720	SDOperand result;
				721
				722	if (StVT != VT
				723	&& (theValue.getOpcode() == ISD::AssertZext
				724	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				725	// Drill down and get the value for zero- and sign-extended
				726	// quantities
				727	theValue = theValue.getOperand(0);
				728	}
				729
				730	SDOperand insertEltOp =
				731	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				732	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				733	ptrOp,
				734	DAG.getConstant((offset & 0xf), PtrVT)));
				735
				736	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				737	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				738	alignLoad,
				739	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				740
				741	result = DAG.getStore(the_chain, result, basep,
				742	LN->getSrcValue(), LN->getSrcValueOffset(),
				743	LN->isVolatile(), LN->getAlignment());
				744
				745	return result;
				746	/UNREACHED/
				747	}
				748	case ISD::PRE_INC:
				749	case ISD::PRE_DEC:
				750	case ISD::POST_INC:
				751	case ISD::POST_DEC:
				752	case ISD::LAST_INDEXED_MODE:
				753	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				754	"UNINDEXED\n";
				755	cerr << (unsigned) SN->getAddressingMode() << "\n";
				756	abort();
				757	/NOTREACHED/
				758	}
				759
				760	return SDOperand();
				761	}
				762
				763	/// Generate the address of a constant pool entry.
				764	static SDOperand
				765	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				766	MVT::ValueType PtrVT = Op.getValueType();
				767	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				768	Constant *C = CP->getConstVal();
				769	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				770	const TargetMachine &TM = DAG.getTarget();
				771	SDOperand Zero = DAG.getConstant(0, PtrVT);
				772
				773	if (TM.getRelocationModel() == Reloc::Static) {
				774	if (!ST->usingLargeMem()) {
				775	// Just return the SDOperand with the constant pool address in it.
				776	return CPI;
				777	} else {
				778	// Generate hi/lo address pair
				779	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				780	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				781
				782	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				783	}
				784	}
				785
				786	assert(0 &&
				787	"LowerConstantPool: Relocation model other than static not supported.");
				788	return SDOperand();
				789	}
				790
				791	static SDOperand
				792	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				793	MVT::ValueType PtrVT = Op.getValueType();
				794	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				795	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				796	SDOperand Zero = DAG.getConstant(0, PtrVT);
				797	const TargetMachine &TM = DAG.getTarget();
				798
				799	if (TM.getRelocationModel() == Reloc::Static) {
				800	if (!ST->usingLargeMem()) {
				801	// Just return the SDOperand with the jump table address in it.
				802	return JTI;
				803	} else {
				804	// Generate hi/lo address pair
				805	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				806	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				807
				808	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				809	}
				810	}
				811
				812	assert(0 &&
				813	"LowerJumpTable: Relocation model other than static not supported.");
				814	return SDOperand();
				815	}
				816
				817	static SDOperand
				818	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				819	MVT::ValueType PtrVT = Op.getValueType();
				820	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				821	GlobalValue *GV = GSDN->getGlobal();
				822	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				823	SDOperand Zero = DAG.getConstant(0, PtrVT);
				824	const TargetMachine &TM = DAG.getTarget();
				825
				826	if (TM.getRelocationModel() == Reloc::Static) {
				827	if (!ST->usingLargeMem()) {
				828	// Generate a local store address
				829	return GA;
				830	} else {
				831	// Generate hi/lo address pair
				832	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				833	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				834
				835	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				836	}
				837	} else {
				838	cerr << "LowerGlobalAddress: Relocation model other than static not "
				839	<< "supported.\n";
				840	abort();
				841	/NOTREACHED/
				842	}
				843
				844	return SDOperand();
				845	}
				846
				847	//! Custom lower i64 integer constants
				848	/*!
				849	This code inserts all of the necessary juggling that needs to occur to load
				850	a 64-bit constant into a register.
				851	*/
				852	static SDOperand
				853	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				854	unsigned VT = Op.getValueType();
				855	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				856
				857	if (VT == MVT::i64) {
				858	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				859	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				860	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				861
				862	} else {
				863	cerr << "LowerConstant: unhandled constant type "
				864	<< MVT::getValueTypeString(VT)
				865	<< "\n";
				866	abort();
				867	/NOTREACHED/
				868	}
				869
				870	return SDOperand();
				871	}
				872
				873	//! Custom lower single precision floating point constants
				874	/*!
				875	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				876	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				877	target description.
				878	*/
				879	static SDOperand
				880	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				881	unsigned VT = Op.getValueType();
				882	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				883
				884	assert((FP != 0) &&
				885	"LowerConstantFP: Node is not ConstantFPSDNode");
				886
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	887	if (VT == MVT::f32) {
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	888	float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	889	return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	890	DAG.getTargetConstantFP(targetConst, VT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	891	} else if (VT == MVT::f64) {
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	892	uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	893	return DAG.getNode(ISD::BIT_CONVERT, VT,
				894	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				895	}
				896
				897	return SDOperand();
				898	}
				899
				900	static SDOperand
				901	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				902	{
				903	MachineFunction &MF = DAG.getMachineFunction();
				904	MachineFrameInfo *MFI = MF.getFrameInfo();
				905	SSARegMap *RegMap = MF.getSSARegMap();
				906	SmallVector<SDOperand, 8> ArgValues;
				907	SDOperand Root = Op.getOperand(0);
				908	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				909
				910	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				911	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				912
				913	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				914	unsigned ArgRegIdx = 0;
				915	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				916
				917	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				918
				919	// Add DAG nodes to load the arguments or copy them out of registers.
				920	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				921	SDOperand ArgVal;
				922	bool needsLoad = false;
				923	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				924	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				925
				926	switch (ObjectVT) {
				927	default: {
				928	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				929	<< MVT::getValueTypeString(ObjectVT)
				930	<< "\n";
				931	abort();
				932	}
				933	case MVT::i8:
				934	if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	935	unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	936	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				937	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				938	++ArgRegIdx;
				939	} else {
				940	needsLoad = true;
				941	}
				942	break;
				943	case MVT::i16:
				944	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				945	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				946	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				947	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				948	++ArgRegIdx;
				949	} else {
				950	needsLoad = true;
				951	}
				952	break;
				953	case MVT::i32:
				954	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				955	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				956	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				957	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				958	++ArgRegIdx;
				959	} else {
				960	needsLoad = true;
				961	}
				962	break;
				963	case MVT::i64:
				964	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				965	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				966	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				967	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				968	++ArgRegIdx;
				969	} else {
				970	needsLoad = true;
				971	}
				972	break;
				973	case MVT::f32:
				974	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				975	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				976	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				977	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				978	++ArgRegIdx;
				979	} else {
				980	needsLoad = true;
				981	}
				982	break;
				983	case MVT::f64:
				984	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				985	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				986	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				987	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				988	++ArgRegIdx;
				989	} else {
				990	needsLoad = true;
				991	}
				992	break;
				993	case MVT::v2f64:
				994	case MVT::v4f32:
				995	case MVT::v4i32:
				996	case MVT::v8i16:
				997	case MVT::v16i8:
				998	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				999	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1000	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1001	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				1002	++ArgRegIdx;
				1003	} else {
				1004	needsLoad = true;
				1005	}
				1006	break;
				1007	}
				1008
				1009	// We need to load the argument to a virtual register if we determined above
				1010	// that we ran out of physical registers of the appropriate type
				1011	if (needsLoad) {
				1012	// If the argument is actually used, emit a load from the right stack
				1013	// slot.
				1014	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1015	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1016	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1017	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1018	} else {
				1019	// Don't emit a dead load.
				1020	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1021	}
				1022
				1023	ArgOffset += StackSlotSize;
				1024	}
				1025
				1026	ArgValues.push_back(ArgVal);
				1027	}
				1028
				1029	// If the function takes variable number of arguments, make a frame index for
				1030	// the start of the first vararg value... for expansion of llvm.va_start.
				1031	if (isVarArg) {
				1032	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1033	ArgOffset);
				1034	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1035	// If this function is vararg, store any remaining integer argument regs to
				1036	// their spots on the stack so that they may be loaded by deferencing the
				1037	// result of va_next.
				1038	SmallVector<SDOperand, 8> MemOps;
				1039	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1040	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1041	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1042	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1043	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1044	MemOps.push_back(Store);
				1045	// Increment the address by four for the next argument to store
				1046	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1047	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1048	}
				1049	if (!MemOps.empty())
				1050	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1051	}
				1052
				1053	ArgValues.push_back(Root);
				1054
				1055	// Return the new list of results.
				1056	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1057	Op.Val->value_end());
				1058	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1059	}
				1060
				1061	/// isLSAAddress - Return the immediate to use if the specified
				1062	/// value is representable as a LSA address.
				1063	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1064	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1065	if (!C) return 0;
				1066
				1067	int Addr = C->getValue();
				1068	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1069	(Addr << 14 >> 14) != Addr)
				1070	return 0; // Top 14 bits have to be sext of immediate.
				1071
				1072	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1073	}
				1074
				1075	static
				1076	SDOperand
				1077	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1078	SDOperand Chain = Op.getOperand(0);
				1079	#if 0
				1080	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1081	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1082	#endif
				1083	SDOperand Callee = Op.getOperand(4);
				1084	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1085	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1086	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1087	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1088
				1089	// Handy pointer type
				1090	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1091
				1092	// Accumulate how many bytes are to be pushed on the stack, including the
				1093	// linkage area, and parameter passing area. According to the SPU ABI,
				1094	// we minimally need space for [LR] and [SP]
				1095	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1096
				1097	// Set up a copy of the stack pointer for use loading and storing any
				1098	// arguments that may not fit in the registers available for argument
				1099	// passing.
				1100	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1101
				1102	// Figure out which arguments are going to go in registers, and which in
				1103	// memory.
				1104	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1105	unsigned ArgRegIdx = 0;
				1106
				1107	// Keep track of registers passing arguments
				1108	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1109	// And the arguments passed on the stack
				1110	SmallVector<SDOperand, 8> MemOpChains;
				1111
				1112	for (unsigned i = 0; i != NumOps; ++i) {
				1113	SDOperand Arg = Op.getOperand(5+2*i);
				1114
				1115	// PtrOff will be used to store the current argument to the stack if a
				1116	// register cannot be found for it.
				1117	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1118	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1119
				1120	switch (Arg.getValueType()) {
				1121	default: assert(0 && "Unexpected ValueType for argument!");
				1122	case MVT::i32:
				1123	case MVT::i64:
				1124	case MVT::i128:
				1125	if (ArgRegIdx != NumArgRegs) {
				1126	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1127	} else {
				1128	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1129	ArgOffset += StackSlotSize;
				1130	}
				1131	break;
				1132	case MVT::f32:
				1133	case MVT::f64:
				1134	if (ArgRegIdx != NumArgRegs) {
				1135	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1136	} else {
				1137	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1138	ArgOffset += StackSlotSize;
				1139	}
				1140	break;
				1141	case MVT::v4f32:
				1142	case MVT::v4i32:
				1143	case MVT::v8i16:
				1144	case MVT::v16i8:
				1145	if (ArgRegIdx != NumArgRegs) {
				1146	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1147	} else {
				1148	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1149	ArgOffset += StackSlotSize;
				1150	}
				1151	break;
				1152	}
				1153	}
				1154
				1155	// Update number of stack bytes actually used, insert a call sequence start
				1156	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1157	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1158
				1159	if (!MemOpChains.empty()) {
				1160	// Adjust the stack pointer for the stack arguments.
				1161	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1162	&MemOpChains[0], MemOpChains.size());
				1163	}
				1164
				1165	// Build a sequence of copy-to-reg nodes chained together with token chain
				1166	// and flag operands which copy the outgoing args into the appropriate regs.
				1167	SDOperand InFlag;
				1168	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1169	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1170	InFlag);
				1171	InFlag = Chain.getValue(1);
				1172	}
				1173
				1174	std::vector<MVT::ValueType> NodeTys;
				1175	NodeTys.push_back(MVT::Other); // Returns a chain
				1176	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1177
				1178	SmallVector<SDOperand, 8> Ops;
				1179	unsigned CallOpc = SPUISD::CALL;
				1180
				1181	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1182	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1183	// node so that legalize doesn't hack it.
				1184	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1185	GlobalValue *GV = G->getGlobal();
				1186	unsigned CalleeVT = Callee.getValueType();
				1187
				1188	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1189	// style calls, otherwise, external symbols are BRASL calls.
				1190	// NOTE:
				1191	// This may be an unsafe assumption for JIT and really large compilation
				1192	// units.
				1193	if (GV->isDeclaration()) {
				1194	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1195	} else {
				1196	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1197	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1198	DAG.getConstant(0, PtrVT));
				1199	}
				1200	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1201	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1202	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1203	// If this is an absolute destination address that appears to be a legal
				1204	// local store address, use the munged value.
				1205	Callee = SDOperand(Dest, 0);
				1206
				1207	Ops.push_back(Chain);
				1208	Ops.push_back(Callee);
				1209
				1210	// Add argument registers to the end of the list so that they are known live
				1211	// into the call.
				1212	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1213	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1214	RegsToPass[i].second.getValueType()));
				1215
				1216	if (InFlag.Val)
				1217	Ops.push_back(InFlag);
				1218	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1219	InFlag = Chain.getValue(1);
				1220
				1221	SDOperand ResultVals[3];
				1222	unsigned NumResults = 0;
				1223	NodeTys.clear();
				1224
				1225	// If the call has results, copy the values out of the ret val registers.
				1226	switch (Op.Val->getValueType(0)) {
				1227	default: assert(0 && "Unexpected ret value!");
				1228	case MVT::Other: break;
				1229	case MVT::i32:
				1230	if (Op.Val->getValueType(1) == MVT::i32) {
				1231	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1232	ResultVals[0] = Chain.getValue(0);
				1233	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1234	Chain.getValue(2)).getValue(1);
				1235	ResultVals[1] = Chain.getValue(0);
				1236	NumResults = 2;
				1237	NodeTys.push_back(MVT::i32);
				1238	} else {
				1239	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1240	ResultVals[0] = Chain.getValue(0);
				1241	NumResults = 1;
				1242	}
				1243	NodeTys.push_back(MVT::i32);
				1244	break;
				1245	case MVT::i64:
				1246	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1247	ResultVals[0] = Chain.getValue(0);
				1248	NumResults = 1;
				1249	NodeTys.push_back(MVT::i64);
				1250	break;
				1251	case MVT::f32:
				1252	case MVT::f64:
				1253	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1254	InFlag).getValue(1);
				1255	ResultVals[0] = Chain.getValue(0);
				1256	NumResults = 1;
				1257	NodeTys.push_back(Op.Val->getValueType(0));
				1258	break;
				1259	case MVT::v2f64:
				1260	case MVT::v4f32:
				1261	case MVT::v4i32:
				1262	case MVT::v8i16:
				1263	case MVT::v16i8:
				1264	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1265	InFlag).getValue(1);
				1266	ResultVals[0] = Chain.getValue(0);
				1267	NumResults = 1;
				1268	NodeTys.push_back(Op.Val->getValueType(0));
				1269	break;
				1270	}
				1271
				1272	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1273	DAG.getConstant(NumStackBytes, PtrVT));
				1274	NodeTys.push_back(MVT::Other);
				1275
				1276	// If the function returns void, just return the chain.
				1277	if (NumResults == 0)
				1278	return Chain;
				1279
				1280	// Otherwise, merge everything together with a MERGE_VALUES node.
				1281	ResultVals[NumResults++] = Chain;
				1282	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1283	ResultVals, NumResults);
				1284	return Res.getValue(Op.ResNo);
				1285	}
				1286
				1287	static SDOperand
				1288	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1289	SmallVector<CCValAssign, 16> RVLocs;
				1290	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1291	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1292	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1293	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1294
				1295	// If this is the first return lowered for this function, add the regs to the
				1296	// liveout set for the function.
				1297	if (DAG.getMachineFunction().liveout_empty()) {
				1298	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1299	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1300	}
				1301
				1302	SDOperand Chain = Op.getOperand(0);
				1303	SDOperand Flag;
				1304
				1305	// Copy the result values into the output registers.
				1306	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1307	CCValAssign &VA = RVLocs[i];
				1308	assert(VA.isRegLoc() && "Can only return in registers!");
				1309	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1310	Flag = Chain.getValue(1);
				1311	}
				1312
				1313	if (Flag.Val)
				1314	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1315	else
				1316	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1317	}
				1318
				1319
				1320	//===----------------------------------------------------------------------===//
				1321	// Vector related lowering:
				1322	//===----------------------------------------------------------------------===//
				1323
				1324	static ConstantSDNode *
				1325	getVecImm(SDNode *N) {
				1326	SDOperand OpVal(0, 0);
				1327
				1328	// Check to see if this buildvec has a single non-undef value in its elements.
				1329	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1330	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1331	if (OpVal.Val == 0)
				1332	OpVal = N->getOperand(i);
				1333	else if (OpVal != N->getOperand(i))
				1334	return 0;
				1335	}
				1336
				1337	if (OpVal.Val != 0) {
				1338	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1339	return CN;
				1340	}
				1341	}
				1342
				1343	return 0; // All UNDEF: use implicit def.; not Constant node
				1344	}
				1345
				1346	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1347	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1348	/// constant
				1349	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1350	MVT::ValueType ValueType) {
				1351	if (ConstantSDNode *CN = getVecImm(N)) {
				1352	uint64_t Value = CN->getValue();
				1353	if (Value <= 0x3ffff)
				1354	return DAG.getConstant(Value, ValueType);
				1355	}
				1356
				1357	return SDOperand();
				1358	}
				1359
				1360	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1361	/// and the value fits into a signed 16-bit constant, and if so, return the
				1362	/// constant
				1363	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1364	MVT::ValueType ValueType) {
				1365	if (ConstantSDNode *CN = getVecImm(N)) {
				1366	if (ValueType == MVT::i32) {
				1367	int Value = (int) CN->getValue();
				1368	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1369
				1370	if (Value == SExtValue)
				1371	return DAG.getConstant(Value, ValueType);
				1372	} else if (ValueType == MVT::i16) {
				1373	short Value = (short) CN->getValue();
				1374	int SExtValue = ((int) Value << 16) >> 16;
				1375
				1376	if (Value == (short) SExtValue)
				1377	return DAG.getConstant(Value, ValueType);
				1378	} else if (ValueType == MVT::i64) {
				1379	int64_t Value = CN->getValue();
				1380	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1381
				1382	if (Value == SExtValue)
				1383	return DAG.getConstant(Value, ValueType);
				1384	}
				1385	}
				1386
				1387	return SDOperand();
				1388	}
				1389
				1390	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1391	/// and the value fits into a signed 10-bit constant, and if so, return the
				1392	/// constant
				1393	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1394	MVT::ValueType ValueType) {
				1395	if (ConstantSDNode *CN = getVecImm(N)) {
				1396	int Value = (int) CN->getValue();
				1397	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1398	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1399	return DAG.getConstant(Value, ValueType);
				1400	}
				1401
				1402	return SDOperand();
				1403	}
				1404
				1405	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1406	/// and the value fits into a signed 8-bit constant, and if so, return the
				1407	/// constant.
				1408	///
				1409	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1410	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1411	/// same value.
				1412	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1413	MVT::ValueType ValueType) {
				1414	if (ConstantSDNode *CN = getVecImm(N)) {
				1415	int Value = (int) CN->getValue();
				1416	if (ValueType == MVT::i16
				1417	&& Value <= 0xffff /* truncated from uint64_t */
				1418	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1419	return DAG.getConstant(Value & 0xff, ValueType);
				1420	else if (ValueType == MVT::i8
				1421	&& (Value & 0xff) == Value)
				1422	return DAG.getConstant(Value, ValueType);
				1423	}
				1424
				1425	return SDOperand();
				1426	}
				1427
				1428	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1429	/// and the value fits into a signed 16-bit constant, and if so, return the
				1430	/// constant
				1431	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1432	MVT::ValueType ValueType) {
				1433	if (ConstantSDNode *CN = getVecImm(N)) {
				1434	uint64_t Value = CN->getValue();
				1435	if ((ValueType == MVT::i32
				1436	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1437	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1438	return DAG.getConstant(Value >> 16, ValueType);
				1439	}
				1440
				1441	return SDOperand();
				1442	}
				1443
				1444	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1445	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1446	if (ConstantSDNode *CN = getVecImm(N)) {
				1447	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1448	}
				1449
				1450	return SDOperand();
				1451	}
				1452
				1453	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1454	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1455	if (ConstantSDNode *CN = getVecImm(N)) {
				1456	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1457	}
				1458
				1459	return SDOperand();
				1460	}
				1461
				1462	// If this is a vector of constants or undefs, get the bits. A bit in
				1463	// UndefBits is set if the corresponding element of the vector is an
				1464	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1465	// zero. Return true if this is not an array of constants, false if it is.
				1466	//
				1467	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1468	uint64_t UndefBits[2]) {
				1469	// Start with zero'd results.
				1470	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1471
				1472	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1473	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1474	SDOperand OpVal = BV->getOperand(i);
				1475
				1476	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1477	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1478
				1479	uint64_t EltBits = 0;
				1480	if (OpVal.getOpcode() == ISD::UNDEF) {
				1481	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1482	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1483	continue;
				1484	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1485	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1486	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1487	const APFloat &apf = CN->getValueAPF();
				1488	EltBits = (CN->getValueType(0) == MVT::f32
				1489	? FloatToBits(apf.convertToFloat())
				1490	: DoubleToBits(apf.convertToDouble()));
				1491	} else {
				1492	// Nonconstant element.
				1493	return true;
				1494	}
				1495
				1496	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1497	}
				1498
				1499	//printf("%llx %llx %llx %llx\n",
				1500	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1501	return false;
				1502	}
				1503
				1504	/// If this is a splat (repetition) of a value across the whole vector, return
				1505	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1506	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1507	/// SplatSize = 1 byte.
				1508	static bool isConstantSplat(const uint64_t Bits128[2],
				1509	const uint64_t Undef128[2],
				1510	int MinSplatBits,
				1511	uint64_t &SplatBits, uint64_t &SplatUndef,
				1512	int &SplatSize) {
				1513	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1514	// the same as the lower 64-bits, ignoring undefs.
				1515	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1516	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1517	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1518	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1519	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1520	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1521
				1522	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1523	if (MinSplatBits < 64) {
				1524
				1525	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1526	// undefs.
				1527	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1528	if (MinSplatBits < 32) {
				1529
				1530	// If the top 16-bits are different than the lower 16-bits, ignoring
				1531	// undefs, we have an i32 splat.
				1532	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1533	if (MinSplatBits < 16) {
				1534	// If the top 8-bits are different than the lower 8-bits, ignoring
				1535	// undefs, we have an i16 splat.
				1536	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1537	// Otherwise, we have an 8-bit splat.
				1538	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1539	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1540	SplatSize = 1;
				1541	return true;
				1542	}
				1543	} else {
				1544	SplatBits = Bits16;
				1545	SplatUndef = Undef16;
				1546	SplatSize = 2;
				1547	return true;
				1548	}
				1549	}
				1550	} else {
				1551	SplatBits = Bits32;
				1552	SplatUndef = Undef32;
				1553	SplatSize = 4;
				1554	return true;
				1555	}
				1556	}
				1557	} else {
				1558	SplatBits = Bits128[0];
				1559	SplatUndef = Undef128[0];
				1560	SplatSize = 8;
				1561	return true;
				1562	}
				1563	}
				1564
				1565	return false; // Can't be a splat if two pieces don't match.
				1566	}
				1567
				1568	// If this is a case we can't handle, return null and let the default
				1569	// expansion code take care of it. If we CAN select this case, and if it
				1570	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1571	// this case more efficiently than a constant pool load, lower it to the
				1572	// sequence of ops that should be used.
				1573	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1574	MVT::ValueType VT = Op.getValueType();
				1575	// If this is a vector of constants or undefs, get the bits. A bit in
				1576	// UndefBits is set if the corresponding element of the vector is an
				1577	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1578	// zero.
				1579	uint64_t VectorBits[2];
				1580	uint64_t UndefBits[2];
				1581	uint64_t SplatBits, SplatUndef;
				1582	int SplatSize;
				1583	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1584	\|\| !isConstantSplat(VectorBits, UndefBits,
				1585	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1586	SplatBits, SplatUndef, SplatSize))
				1587	return SDOperand(); // Not a constant vector, not a splat.
				1588
				1589	switch (VT) {
				1590	default:
				1591	case MVT::v4f32: {
				1592	uint32_t Value32 = SplatBits;
				1593	assert(SplatSize == 4
				1594	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1595	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1596	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1597	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1598	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1599	break;
				1600	}
				1601	case MVT::v2f64: {
				1602	uint64_t f64val = SplatBits;
				1603	assert(SplatSize == 8
				1604	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1605	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1606	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1607	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1608	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1609	break;
				1610	}
				1611	case MVT::v16i8: {
				1612	// 8-bit constants have to be expanded to 16-bits
				1613	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1614	SDOperand Ops[8];
				1615	for (int i = 0; i < 8; ++i)
				1616	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1617	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1618	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1619	}
				1620	case MVT::v8i16: {
				1621	unsigned short Value16;
				1622	if (SplatSize == 2)
				1623	Value16 = (unsigned short) (SplatBits & 0xffff);
				1624	else
				1625	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1626	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1627	SDOperand Ops[8];
				1628	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1629	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1630	}
				1631	case MVT::v4i32: {
				1632	unsigned int Value = SplatBits;
				1633	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1634	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1635	}
				1636	case MVT::v2i64: {
				1637	uint64_t val = SplatBits;
				1638	uint32_t upper = uint32_t(val >> 32);
				1639	uint32_t lower = uint32_t(val);
				1640
				1641	if (val != 0) {
				1642	SDOperand LO32;
				1643	SDOperand HI32;
				1644	SmallVector<SDOperand, 16> ShufBytes;
				1645	SDOperand Result;
				1646	bool upper_special, lower_special;
				1647
				1648	// NOTE: This code creates common-case shuffle masks that can be easily
				1649	// detected as common expressions. It is not attempting to create highly
				1650	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1651
				1652	// Detect if the upper or lower half is a special shuffle mask pattern:
				1653	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1654	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1655
				1656	// Create lower vector if not a special pattern
				1657	if (!lower_special) {
				1658	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1659	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1660	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1661	LO32C, LO32C, LO32C, LO32C));
				1662	}
				1663
				1664	// Create upper vector if not a special pattern
				1665	if (!upper_special) {
				1666	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1667	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1668	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1669	HI32C, HI32C, HI32C, HI32C));
				1670	}
				1671
				1672	// If either upper or lower are special, then the two input operands are
				1673	// the same (basically, one of them is a "don't care")
				1674	if (lower_special)
				1675	LO32 = HI32;
				1676	if (upper_special)
				1677	HI32 = LO32;
				1678	if (lower_special && upper_special) {
				1679	// Unhappy situation... both upper and lower are special, so punt with
				1680	// a target constant:
				1681	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1682	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1683	Zero, Zero);
				1684	}
				1685
				1686	for (int i = 0; i < 4; ++i) {
				1687	for (int j = 0; j < 4; ++j) {
				1688	SDOperand V;
				1689	bool process_upper, process_lower;
				1690	uint64_t val;
				1691
				1692	process_upper = (upper_special && (i & 1) == 0);
				1693	process_lower = (lower_special && (i & 1) == 1);
				1694
				1695	if (process_upper \|\| process_lower) {
				1696	if ((process_upper && upper == 0)
				1697	\|\| (process_lower && lower == 0))
				1698	val = 0x80;
				1699	else if ((process_upper && upper == 0xffffffff)
				1700	\|\| (process_lower && lower == 0xffffffff))
				1701	val = 0xc0;
				1702	else if ((process_upper && upper == 0x80000000)
				1703	\|\| (process_lower && lower == 0x80000000))
				1704	val = (j == 0 ? 0xe0 : 0x80);
				1705	} else
				1706	val = i * 4 + j + ((i & 1) * 16);
				1707
				1708	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1709	}
				1710	}
				1711
				1712	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1713	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1714	&ShufBytes[0], ShufBytes.size()));
				1715	} else {
				1716	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1717	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1718	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1719	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1720	Zero, Zero, Zero, Zero));
				1721	}
				1722	}
				1723	}
				1724
				1725	return SDOperand();
				1726	}
				1727
				1728	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1729	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1730	/// permutation vector, V3, is monotonically increasing with one "exception"
				1731	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1732	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1733	/// In either case, the net result is going to eventually invoke SHUFB to
				1734	/// permute/shuffle the bytes from V1 and V2.
				1735	/// \note
				1736	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1737	/// control word for byte/halfword/word insertion. This takes care of a single
				1738	/// element move from V2 into V1.
				1739	/// \note
				1740	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1741	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1742	SDOperand V1 = Op.getOperand(0);
				1743	SDOperand V2 = Op.getOperand(1);
				1744	SDOperand PermMask = Op.getOperand(2);
				1745
				1746	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1747
				1748	// If we have a single element being moved from V1 to V2, this can be handled
				1749	// using the C*[DX] compute mask instructions, but the vector elements have
				1750	// to be monotonically increasing with one exception element.
				1751	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1752	unsigned EltsFromV2 = 0;
				1753	unsigned V2Elt = 0;
				1754	unsigned V2EltIdx0 = 0;
				1755	unsigned CurrElt = 0;
				1756	bool monotonic = true;
				1757	if (EltVT == MVT::i8)
				1758	V2EltIdx0 = 16;
				1759	else if (EltVT == MVT::i16)
				1760	V2EltIdx0 = 8;
				1761	else if (EltVT == MVT::i32)
				1762	V2EltIdx0 = 4;
				1763	else
				1764	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1765
				1766	for (unsigned i = 0, e = PermMask.getNumOperands();
				1767	EltsFromV2 <= 1 && monotonic && i != e;
				1768	++i) {
				1769	unsigned SrcElt;
				1770	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1771	SrcElt = 0;
				1772	else
				1773	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1774
				1775	if (SrcElt >= V2EltIdx0) {
				1776	++EltsFromV2;
				1777	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1778	} else if (CurrElt != SrcElt) {
				1779	monotonic = false;
				1780	}
				1781
				1782	++CurrElt;
				1783	}
				1784
				1785	if (EltsFromV2 == 1 && monotonic) {
				1786	// Compute mask and shuffle
				1787	MachineFunction &MF = DAG.getMachineFunction();
				1788	SSARegMap *RegMap = MF.getSSARegMap();
				1789	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1790	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1791	// Initialize temporary register to 0
				1792	SDOperand InitTempReg =
				1793	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1794	// Copy register's contents as index in INSERT_MASK:
				1795	SDOperand ShufMaskOp =
				1796	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1797	DAG.getTargetConstant(V2Elt, MVT::i32),
				1798	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1799	// Use shuffle mask in SHUFB synthetic instruction:
				1800	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1801	} else {
				1802	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1803	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1804
				1805	SmallVector<SDOperand, 16> ResultMask;
				1806	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1807	unsigned SrcElt;
				1808	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1809	SrcElt = 0;
				1810	else
				1811	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1812
				1813	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1814	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1815	MVT::i8));
				1816	}
				1817	}
				1818
				1819	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1820	&ResultMask[0], ResultMask.size());
				1821	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1822	}
				1823	}
				1824
				1825	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1826	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1827
				1828	if (Op0.Val->getOpcode() == ISD::Constant) {
				1829	// For a constant, build the appropriate constant vector, which will
				1830	// eventually simplify to a vector register load.
				1831
				1832	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1833	SmallVector<SDOperand, 16> ConstVecValues;
				1834	MVT::ValueType VT;
				1835	size_t n_copies;
				1836
				1837	// Create a constant vector:
				1838	switch (Op.getValueType()) {
				1839	default: assert(0 && "Unexpected constant value type in "
				1840	"LowerSCALAR_TO_VECTOR");
				1841	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1842	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1843	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1844	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1845	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1846	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1847	}
				1848
				1849	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1850	for (size_t j = 0; j < n_copies; ++j)
				1851	ConstVecValues.push_back(CValue);
				1852
				1853	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1854	&ConstVecValues[0], ConstVecValues.size());
				1855	} else {
				1856	// Otherwise, copy the value from one register to another:
				1857	switch (Op0.getValueType()) {
				1858	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1859	case MVT::i8:
				1860	case MVT::i16:
				1861	case MVT::i32:
				1862	case MVT::i64:
				1863	case MVT::f32:
				1864	case MVT::f64:
				1865	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1866	}
				1867	}
				1868
				1869	return SDOperand();
				1870	}
				1871
				1872	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1873	switch (Op.getValueType()) {
				1874	case MVT::v4i32: {
				1875	SDOperand rA = Op.getOperand(0);
				1876	SDOperand rB = Op.getOperand(1);
				1877	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1878	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1879	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1880	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1881
				1882	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1883	break;
				1884	}
				1885
				1886	// Multiply two v8i16 vectors (pipeline friendly version):
				1887	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1888	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1889	// c) Use SELB to select upper and lower halves from the intermediate results
				1890	//
				1891	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1892	// dual-issue. This code does manage to do this, even if it's a little on
				1893	// the wacky side
				1894	case MVT::v8i16: {
				1895	MachineFunction &MF = DAG.getMachineFunction();
				1896	SSARegMap *RegMap = MF.getSSARegMap();
				1897	SDOperand Chain = Op.getOperand(0);
				1898	SDOperand rA = Op.getOperand(0);
				1899	SDOperand rB = Op.getOperand(1);
				1900	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1901	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1902
				1903	SDOperand FSMBOp =
				1904	DAG.getCopyToReg(Chain, FSMBIreg,
				1905	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1906	DAG.getConstant(0xcccc, MVT::i32)));
				1907
				1908	SDOperand HHProd =
				1909	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1910	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1911
				1912	SDOperand HHProd_v4i32 =
				1913	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1914	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1915
				1916	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1917	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1918	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1919	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1920	HHProd_v4i32,
				1921	DAG.getConstant(16, MVT::i16))),
				1922	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1923	}
				1924
				1925	// This M00sE is N@stI! (apologies to Monty Python)
				1926	//
				1927	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1928	// is to break it all apart, sign extend, and reassemble the various
				1929	// intermediate products.
				1930	case MVT::v16i8: {
				1931	MachineFunction &MF = DAG.getMachineFunction();
				1932	SSARegMap *RegMap = MF.getSSARegMap();
				1933	SDOperand Chain = Op.getOperand(0);
				1934	SDOperand rA = Op.getOperand(0);
				1935	SDOperand rB = Op.getOperand(1);
				1936	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1937	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1938
				1939	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1940	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1941	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1942
				1943	SDOperand LLProd =
				1944	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1945	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1946	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1947
				1948	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1949
				1950	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1951
				1952	SDOperand LHProd =
				1953	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1954	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1955
				1956	SDOperand FSMBdef_2222 =
				1957	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1958	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1959	DAG.getConstant(0x2222, MVT::i32)));
				1960
				1961	SDOperand FSMBuse_2222 =
				1962	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1963
				1964	SDOperand LoProd_1 =
				1965	DAG.getCopyToReg(Chain, LoProd_reg,
				1966	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1967	FSMBuse_2222));
				1968
				1969	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1970
				1971	SDOperand LoProd =
				1972	DAG.getNode(ISD::AND, MVT::v4i32,
				1973	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1974	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1975	LoProdMask, LoProdMask,
				1976	LoProdMask, LoProdMask));
				1977
				1978	SDOperand rAH =
				1979	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1980	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1981
				1982	SDOperand rBH =
				1983	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1984	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1985
				1986	SDOperand HLProd =
				1987	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1988	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1989	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1990
				1991	SDOperand HHProd_1 =
				1992	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1993	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1994	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1995	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1996	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1997
				1998	SDOperand HHProd =
				1999	DAG.getCopyToReg(Chain, HiProd_reg,
				2000	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				2001	HLProd,
				2002	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				2003	FSMBuse_2222));
				2004
				2005	SDOperand HiProd =
				2006	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2007	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2008
				2009	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2010	DAG.getNode(ISD::OR, MVT::v4i32,
				2011	LoProd, HiProd));
				2012	}
				2013
				2014	default:
				2015	cerr << "CellSPU: Unknown vector multiplication, got "
				2016	<< MVT::getValueTypeString(Op.getValueType())
				2017	<< "\n";
				2018	abort();
				2019	/NOTREACHED/
				2020	}
				2021
				2022	return SDOperand();
				2023	}
				2024
				2025	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2026	MachineFunction &MF = DAG.getMachineFunction();
				2027	SSARegMap *RegMap = MF.getSSARegMap();
				2028
				2029	SDOperand A = Op.getOperand(0);
				2030	SDOperand B = Op.getOperand(1);
				2031	unsigned VT = Op.getValueType();
				2032
				2033	unsigned VRegBR, VRegC;
				2034
				2035	if (VT == MVT::f32) {
				2036	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2037	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2038	} else {
				2039	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2040	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2041	}
				2042	// TODO: make sure we're feeding FPInterp the right arguments
				2043	// Right now: fi B, frest(B)
				2044
				2045	// Computes BRcpl =
				2046	// (Floating Interpolate (FP Reciprocal Estimate B))
				2047	SDOperand BRcpl =
				2048	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2049	DAG.getNode(SPUISD::FPInterp, VT, B,
				2050	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2051
				2052	// Computes A * BRcpl and stores in a temporary register
				2053	SDOperand AxBRcpl =
				2054	DAG.getCopyToReg(BRcpl, VRegC,
				2055	DAG.getNode(ISD::FMUL, VT, A,
				2056	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2057	// What's the Chain variable do? It's magic!
				2058	// TODO: set Chain = Op(0).getEntryNode()
				2059
				2060	return DAG.getNode(ISD::FADD, VT,
				2061	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2062	DAG.getNode(ISD::FMUL, VT,
				2063	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2064	DAG.getNode(ISD::FSUB, VT, A,
				2065	DAG.getNode(ISD::FMUL, VT, B,
				2066	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2067	}
				2068
				2069	// Expands double-precision FDIV
				2070	// Expects two doubles as inputs X and Y, does a floating point
				2071	// reciprocal estimate, and three iterations of Newton-Raphson
				2072	// to increase accuracy.
				2073	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2074	// MachineFunction &MF = DAG.getMachineFunction();
				2075	// SSARegMap *RegMap = MF.getSSARegMap();
				2076	//
				2077	// SDOperand X = Op.getOperand(0);
				2078	// SDOperand Y = Op.getOperand(1);
				2079	//}
				2080
				2081	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2082	unsigned VT = Op.getValueType();
				2083	SDOperand N = Op.getOperand(0);
				2084	SDOperand Elt = Op.getOperand(1);
				2085	SDOperand ShufMask[16];
				2086	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2087
				2088	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2089
				2090	int EltNo = (int) C->getValue();
				2091
				2092	// sanity checks:
				2093	if (VT == MVT::i8 && EltNo >= 16)
				2094	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2095	else if (VT == MVT::i16 && EltNo >= 8)
				2096	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2097	else if (VT == MVT::i32 && EltNo >= 4)
				2098	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2099	else if (VT == MVT::i64 && EltNo >= 2)
				2100	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2101
				2102	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2103	// i32 and i64: Element 0 is the preferred slot
				2104	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2105	}
				2106
				2107	// Need to generate shuffle mask and extract:
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame]	2108	int prefslot_begin = -1, prefslot_end = -1;
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2109	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2110
				2111	switch (VT) {
				2112	case MVT::i8: {
				2113	prefslot_begin = prefslot_end = 3;
				2114	break;
				2115	}
				2116	case MVT::i16: {
				2117	prefslot_begin = 2; prefslot_end = 3;
				2118	break;
				2119	}
				2120	case MVT::i32: {
				2121	prefslot_begin = 0; prefslot_end = 3;
				2122	break;
				2123	}
				2124	case MVT::i64: {
				2125	prefslot_begin = 0; prefslot_end = 7;
				2126	break;
				2127	}
				2128	}
				2129
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame]	2130	assert(prefslot_begin != -1 && prefslot_end != -1 &&
				2131	"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
				2132
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2133	for (int i = 0; i < 16; ++i) {
				2134	// zero fill uppper part of preferred slot, don't care about the
				2135	// other slots:
				2136	unsigned int mask_val;
				2137
				2138	if (i <= prefslot_end) {
				2139	mask_val =
				2140	((i < prefslot_begin)
				2141	? 0x80
				2142	: elt_byte + (i - prefslot_begin));
				2143
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame]	2144	ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2145	} else
				2146	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2147	}
				2148
				2149	SDOperand ShufMaskVec =
				2150	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2151	&ShufMask[0],
				2152	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2153
				2154	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2155	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2156	N, N, ShufMaskVec));
				2157
				2158	}
				2159
				2160	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2161	SDOperand VecOp = Op.getOperand(0);
				2162	SDOperand ValOp = Op.getOperand(1);
				2163	SDOperand IdxOp = Op.getOperand(2);
				2164	MVT::ValueType VT = Op.getValueType();
				2165
				2166	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2167	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2168
				2169	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2170	// Use $2 because it's always 16-byte aligned and it's available:
				2171	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2172
				2173	SDOperand result =
				2174	DAG.getNode(SPUISD::SHUFB, VT,
				2175	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2176	VecOp,
				2177	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2178	DAG.getNode(ISD::ADD, PtrVT,
				2179	PtrBase,
				2180	DAG.getConstant(CN->getValue(),
				2181	PtrVT))));
				2182
				2183	return result;
				2184	}
				2185
				2186	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2187	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2188
				2189	assert(Op.getValueType() == MVT::i8);
				2190	switch (Opc) {
				2191	default:
				2192	assert(0 && "Unhandled i8 math operator");
				2193	/NOTREACHED/
				2194	break;
				2195	case ISD::SUB: {
				2196	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2197	// the result:
				2198	SDOperand N1 = Op.getOperand(1);
				2199	N0 = (N0.getOpcode() != ISD::Constant
				2200	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2201	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2202	N1 = (N1.getOpcode() != ISD::Constant
				2203	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2204	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2205	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2206	DAG.getNode(Opc, MVT::i16, N0, N1));
				2207	}
				2208	case ISD::ROTR:
				2209	case ISD::ROTL: {
				2210	SDOperand N1 = Op.getOperand(1);
				2211	unsigned N1Opc;
				2212	N0 = (N0.getOpcode() != ISD::Constant
				2213	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2214	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2215	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2216	N1 = (N1.getOpcode() != ISD::Constant
				2217	? DAG.getNode(N1Opc, MVT::i16, N1)
				2218	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2219	SDOperand ExpandArg =
				2220	DAG.getNode(ISD::OR, MVT::i16, N0,
				2221	DAG.getNode(ISD::SHL, MVT::i16,
				2222	N0, DAG.getConstant(8, MVT::i16)));
				2223	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2224	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2225	}
				2226	case ISD::SRL:
				2227	case ISD::SHL: {
				2228	SDOperand N1 = Op.getOperand(1);
				2229	unsigned N1Opc;
				2230	N0 = (N0.getOpcode() != ISD::Constant
				2231	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2232	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2233	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2234	N1 = (N1.getOpcode() != ISD::Constant
				2235	? DAG.getNode(N1Opc, MVT::i16, N1)
				2236	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2237	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2238	DAG.getNode(Opc, MVT::i16, N0, N1));
				2239	}
				2240	case ISD::SRA: {
				2241	SDOperand N1 = Op.getOperand(1);
				2242	unsigned N1Opc;
				2243	N0 = (N0.getOpcode() != ISD::Constant
				2244	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2245	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2246	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2247	N1 = (N1.getOpcode() != ISD::Constant
				2248	? DAG.getNode(N1Opc, MVT::i16, N1)
				2249	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2250	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2251	DAG.getNode(Opc, MVT::i16, N0, N1));
				2252	}
				2253	case ISD::MUL: {
				2254	SDOperand N1 = Op.getOperand(1);
				2255	unsigned N1Opc;
				2256	N0 = (N0.getOpcode() != ISD::Constant
				2257	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2258	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2259	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2260	N1 = (N1.getOpcode() != ISD::Constant
				2261	? DAG.getNode(N1Opc, MVT::i16, N1)
				2262	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2263	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2264	DAG.getNode(Opc, MVT::i16, N0, N1));
				2265	break;
				2266	}
				2267	}
				2268
				2269	return SDOperand();
				2270	}
				2271
				2272	//! Lower byte immediate operations for v16i8 vectors:
				2273	static SDOperand
				2274	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2275	SDOperand ConstVec;
				2276	SDOperand Arg;
				2277	MVT::ValueType VT = Op.getValueType();
				2278
				2279	ConstVec = Op.getOperand(0);
				2280	Arg = Op.getOperand(1);
				2281	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2282	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2283	ConstVec = ConstVec.getOperand(0);
				2284	} else {
				2285	ConstVec = Op.getOperand(1);
				2286	Arg = Op.getOperand(0);
				2287	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2288	ConstVec = ConstVec.getOperand(0);
				2289	}
				2290	}
				2291	}
				2292
				2293	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2294	uint64_t VectorBits[2];
				2295	uint64_t UndefBits[2];
				2296	uint64_t SplatBits, SplatUndef;
				2297	int SplatSize;
				2298
				2299	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2300	&& isConstantSplat(VectorBits, UndefBits,
				2301	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2302	SplatBits, SplatUndef, SplatSize)) {
				2303	SDOperand tcVec[16];
				2304	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2305	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2306
				2307	// Turn the BUILD_VECTOR into a set of target constants:
				2308	for (size_t i = 0; i < tcVecSize; ++i)
				2309	tcVec[i] = tc;
				2310
				2311	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2312	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2313	}
				2314	}
				2315
				2316	return SDOperand();
				2317	}
				2318
				2319	//! Lower i32 multiplication
				2320	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2321	unsigned Opc) {
				2322	switch (VT) {
				2323	default:
				2324	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2325	<< MVT::getValueTypeString(Op.getValueType())
				2326	<< "\n";
				2327	abort();
				2328	/NOTREACHED/
				2329
				2330	case MVT::i32: {
				2331	SDOperand rA = Op.getOperand(0);
				2332	SDOperand rB = Op.getOperand(1);
				2333
				2334	return DAG.getNode(ISD::ADD, MVT::i32,
				2335	DAG.getNode(ISD::ADD, MVT::i32,
				2336	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2337	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2338	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2339	}
				2340	}
				2341
				2342	return SDOperand();
				2343	}
				2344
				2345	//! Custom lowering for CTPOP (count population)
				2346	/*!
				2347	Custom lowering code that counts the number ones in the input
				2348	operand. SPU has such an instruction, but it counts the number of
				2349	ones per byte, which then have to be accumulated.
				2350	*/
				2351	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2352	unsigned VT = Op.getValueType();
				2353	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2354
				2355	switch (VT) {
				2356	case MVT::i8: {
				2357	SDOperand N = Op.getOperand(0);
				2358	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2359
				2360	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2361	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2362
				2363	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2364	}
				2365
				2366	case MVT::i16: {
				2367	MachineFunction &MF = DAG.getMachineFunction();
				2368	SSARegMap *RegMap = MF.getSSARegMap();
				2369
				2370	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2371
				2372	SDOperand N = Op.getOperand(0);
				2373	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2374	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2375	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2376
				2377	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2378	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2379
				2380	// CNTB_result becomes the chain to which all of the virtual registers
				2381	// CNTB_reg, SUM1_reg become associated:
				2382	SDOperand CNTB_result =
				2383	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2384
				2385	SDOperand CNTB_rescopy =
				2386	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2387
				2388	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2389
				2390	return DAG.getNode(ISD::AND, MVT::i16,
				2391	DAG.getNode(ISD::ADD, MVT::i16,
				2392	DAG.getNode(ISD::SRL, MVT::i16,
				2393	Tmp1, Shift1),
				2394	Tmp1),
				2395	Mask0);
				2396	}
				2397
				2398	case MVT::i32: {
				2399	MachineFunction &MF = DAG.getMachineFunction();
				2400	SSARegMap *RegMap = MF.getSSARegMap();
				2401
				2402	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2403	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2404
				2405	SDOperand N = Op.getOperand(0);
				2406	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2407	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2408	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2409	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2410
				2411	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2412	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2413
				2414	// CNTB_result becomes the chain to which all of the virtual registers
				2415	// CNTB_reg, SUM1_reg become associated:
				2416	SDOperand CNTB_result =
				2417	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2418
				2419	SDOperand CNTB_rescopy =
				2420	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2421
				2422	SDOperand Comp1 =
				2423	DAG.getNode(ISD::SRL, MVT::i32,
				2424	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2425
				2426	SDOperand Sum1 =
				2427	DAG.getNode(ISD::ADD, MVT::i32,
				2428	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2429
				2430	SDOperand Sum1_rescopy =
				2431	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2432
				2433	SDOperand Comp2 =
				2434	DAG.getNode(ISD::SRL, MVT::i32,
				2435	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2436	Shift2);
				2437	SDOperand Sum2 =
				2438	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2439	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2440
				2441	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2442	}
				2443
				2444	case MVT::i64:
				2445	break;
				2446	}
				2447
				2448	return SDOperand();
				2449	}
				2450
				2451	/// LowerOperation - Provide custom lowering hooks for some operations.
				2452	///
				2453	SDOperand
				2454	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2455	{
				2456	switch (Op.getOpcode()) {
				2457	default: {
				2458	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2459	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2460	cerr << "*Op.Val:\n";
				2461	Op.Val->dump();
				2462	abort();
				2463	}
				2464	case ISD::LOAD:
				2465	case ISD::SEXTLOAD:
				2466	case ISD::ZEXTLOAD:
				2467	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2468	case ISD::STORE:
				2469	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2470	case ISD::ConstantPool:
				2471	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2472	case ISD::GlobalAddress:
				2473	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2474	case ISD::JumpTable:
				2475	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2476	case ISD::Constant:
				2477	return LowerConstant(Op, DAG);
				2478	case ISD::ConstantFP:
				2479	return LowerConstantFP(Op, DAG);
				2480	case ISD::FORMAL_ARGUMENTS:
				2481	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2482	case ISD::CALL:
				2483	return LowerCALL(Op, DAG);
				2484	case ISD::RET:
				2485	return LowerRET(Op, DAG, getTargetMachine());
				2486
				2487	// i8 math ops:
				2488	case ISD::SUB:
				2489	case ISD::ROTR:
				2490	case ISD::ROTL:
				2491	case ISD::SRL:
				2492	case ISD::SHL:
				2493	case ISD::SRA:
				2494	return LowerI8Math(Op, DAG, Op.getOpcode());
				2495
				2496	// Vector-related lowering.
				2497	case ISD::BUILD_VECTOR:
				2498	return LowerBUILD_VECTOR(Op, DAG);
				2499	case ISD::SCALAR_TO_VECTOR:
				2500	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2501	case ISD::VECTOR_SHUFFLE:
				2502	return LowerVECTOR_SHUFFLE(Op, DAG);
				2503	case ISD::EXTRACT_VECTOR_ELT:
				2504	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2505	case ISD::INSERT_VECTOR_ELT:
				2506	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2507
				2508	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2509	case ISD::AND:
				2510	case ISD::OR:
				2511	case ISD::XOR:
				2512	return LowerByteImmed(Op, DAG);
				2513
				2514	// Vector and i8 multiply:
				2515	case ISD::MUL:
				2516	if (MVT::isVector(Op.getValueType()))
				2517	return LowerVectorMUL(Op, DAG);
				2518	else if (Op.getValueType() == MVT::i8)
				2519	return LowerI8Math(Op, DAG, Op.getOpcode());
				2520	else
				2521	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2522
				2523	case ISD::FDIV:
				2524	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2525	return LowerFDIVf32(Op, DAG);
				2526	// else if (Op.getValueType() == MVT::f64)
				2527	// return LowerFDIVf64(Op, DAG);
				2528	else
				2529	assert(0 && "Calling FDIV on unsupported MVT");
				2530
				2531	case ISD::CTPOP:
				2532	return LowerCTPOP(Op, DAG);
				2533	}
				2534
				2535	return SDOperand();
				2536	}
				2537
				2538	//===----------------------------------------------------------------------===//
				2539	// Other Lowering Code
				2540	//===----------------------------------------------------------------------===//
				2541
				2542	MachineBasicBlock *
				2543	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2544	MachineBasicBlock *BB)
				2545	{
				2546	return BB;
				2547	}
				2548
				2549	//===----------------------------------------------------------------------===//
				2550	// Target Optimization Hooks
				2551	//===----------------------------------------------------------------------===//
				2552
				2553	SDOperand
				2554	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2555	{
				2556	#if 0
				2557	TargetMachine &TM = getTargetMachine();
				2558	SelectionDAG &DAG = DCI.DAG;
				2559	#endif
				2560	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2561
				2562	switch (N->getOpcode()) {
				2563	default: break;
				2564
				2565	// Look for obvious optimizations for shift left:
				2566	// a) Replace 0 << V with 0
				2567	// b) Replace V << 0 with V
				2568	//
				2569	// N.B: llvm will generate an undef node if the shift amount is greater than
				2570	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2571	case SPU::SHLIr32:
				2572	case SPU::SHLHIr16:
				2573	case SPU::SHLQBIIvec:
				2574	case SPU::ROTHIr16:
				2575	case SPU::ROTHIr16_i32:
				2576	case SPU::ROTIr32:
				2577	case SPU::ROTIr32_i16:
				2578	case SPU::ROTQBYIvec:
				2579	case SPU::ROTQBYBIvec:
				2580	case SPU::ROTQBIIvec:
				2581	case SPU::ROTHMIr16:
				2582	case SPU::ROTMIr32:
				2583	case SPU::ROTQMBYIvec: {
				2584	if (N0.getOpcode() == ISD::Constant) {
				2585	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2586	if (C->getValue() == 0) // 0 << V -> 0.
				2587	return N0;
				2588	}
				2589	}
				2590	SDOperand N1 = N->getOperand(1);
				2591	if (N1.getOpcode() == ISD::Constant) {
				2592	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2593	if (C->getValue() == 0) // V << 0 -> V
				2594	return N1;
				2595	}
				2596	}
				2597	break;
				2598	}
				2599	}
				2600
				2601	return SDOperand();
				2602	}
				2603
				2604	//===----------------------------------------------------------------------===//
				2605	// Inline Assembly Support
				2606	//===----------------------------------------------------------------------===//
				2607
				2608	/// getConstraintType - Given a constraint letter, return the type of
				2609	/// constraint it is for this target.
				2610	SPUTargetLowering::ConstraintType
				2611	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2612	if (ConstraintLetter.size() == 1) {
				2613	switch (ConstraintLetter[0]) {
				2614	default: break;
				2615	case 'b':
				2616	case 'r':
				2617	case 'f':
				2618	case 'v':
				2619	case 'y':
				2620	return C_RegisterClass;
				2621	}
				2622	}
				2623	return TargetLowering::getConstraintType(ConstraintLetter);
				2624	}
				2625
				2626	std::pair<unsigned, const TargetRegisterClass*>
				2627	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2628	MVT::ValueType VT) const
				2629	{
				2630	if (Constraint.size() == 1) {
				2631	// GCC RS6000 Constraint Letters
				2632	switch (Constraint[0]) {
				2633	case 'b': // R1-R31
				2634	case 'r': // R0-R31
				2635	if (VT == MVT::i64)
				2636	return std::make_pair(0U, SPU::R64CRegisterClass);
				2637	return std::make_pair(0U, SPU::R32CRegisterClass);
				2638	case 'f':
				2639	if (VT == MVT::f32)
				2640	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2641	else if (VT == MVT::f64)
				2642	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2643	break;
				2644	case 'v':
				2645	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2646	}
				2647	}
				2648
				2649	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2650	}
				2651
				2652	void
				2653	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2654	uint64_t Mask,
				2655	uint64_t &KnownZero,
				2656	uint64_t &KnownOne,
				2657	const SelectionDAG &DAG,
				2658	unsigned Depth ) const {
				2659	KnownZero = 0;
				2660	KnownOne = 0;
				2661	}
				2662
				2663	// LowerAsmOperandForConstraint
				2664	void
				2665	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2666	char ConstraintLetter,
				2667	std::vector<SDOperand> &Ops,
				2668	SelectionDAG &DAG) {
				2669	// Default, for the time being, to the base class handler
				2670	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2671	}
				2672
				2673	/// isLegalAddressImmediate - Return true if the integer value can be used
				2674	/// as the offset of the target addressing mode.
				2675	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2676	// SPU's addresses are 256K:
				2677	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2678	}
				2679
				2680	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2681	return false;
				2682	}