Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: d7091eb9b88d5fd59c550288e3a4f9ae7b253787 [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
Scott Michel	2466c37	2007-12-05 01:40:25 +0000	[diff] [blame]	6	// Department at The Aerospace Corporation and is distributed under the
				7	// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	8	//
				9	//===----------------------------------------------------------------------===//
				10	//
				11	// This file implements the SPUTargetLowering class.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SPURegisterNames.h"
				16	#include "SPUISelLowering.h"
				17	#include "SPUTargetMachine.h"
				18	#include "llvm/ADT/VectorExtras.h"
				19	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/SelectionDAG.h"
				25	#include "llvm/CodeGen/SSARegMap.h"
				26	#include "llvm/Constants.h"
				27	#include "llvm/Function.h"
				28	#include "llvm/Intrinsics.h"
				29	#include "llvm/Support/Debug.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	#include <map>
				34
				35	using namespace llvm;
				36
				37	// Used in getTargetNodeName() below
				38	namespace {
				39	std::map<unsigned, const char *> node_names;
				40
				41	//! MVT::ValueType mapping to useful data for Cell SPU
				42	struct valtype_map_s {
				43	const MVT::ValueType valtype;
				44	const int prefslot_byte;
				45	};
				46
				47	const valtype_map_s valtype_map[] = {
				48	{ MVT::i1, 3 },
				49	{ MVT::i8, 3 },
				50	{ MVT::i16, 2 },
				51	{ MVT::i32, 0 },
				52	{ MVT::f32, 0 },
				53	{ MVT::i64, 0 },
				54	{ MVT::f64, 0 },
				55	{ MVT::i128, 0 }
				56	};
				57
				58	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				59
				60	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				61	const valtype_map_s *retval = 0;
				62
				63	for (size_t i = 0; i < n_valtype_map; ++i) {
				64	if (valtype_map[i].valtype == VT) {
				65	retval = valtype_map + i;
				66	break;
				67	}
				68	}
				69
				70	#ifndef NDEBUG
				71	if (retval == 0) {
				72	cerr << "getValueTypeMapEntry returns NULL for "
				73	<< MVT::getValueTypeString(VT)
				74	<< "\n";
				75	abort();
				76	}
				77	#endif
				78
				79	return retval;
				80	}
				81
				82	//! Predicate that returns true if operand is a memory target
				83	/*!
				84	\arg Op Operand to test
				85	\return true if the operand is a memory target (i.e., global
				86	address, external symbol, constant pool) or an existing D-Form
				87	address.
				88	*/
				89	bool isMemoryOperand(const SDOperand &Op)
				90	{
				91	const unsigned Opc = Op.getOpcode();
				92	return (Opc == ISD::GlobalAddress
				93	\|\| Opc == ISD::GlobalTLSAddress
				94	\|\| Opc == ISD::FrameIndex
				95	\|\| Opc == ISD::JumpTable
				96	\|\| Opc == ISD::ConstantPool
				97	\|\| Opc == ISD::ExternalSymbol
				98	\|\| Opc == ISD::TargetGlobalAddress
				99	\|\| Opc == ISD::TargetGlobalTLSAddress
				100	\|\| Opc == ISD::TargetFrameIndex
				101	\|\| Opc == ISD::TargetJumpTable
				102	\|\| Opc == ISD::TargetConstantPool
				103	\|\| Opc == ISD::TargetExternalSymbol
				104	\|\| Opc == SPUISD::DFormAddr);
				105	}
				106	}
				107
				108	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				109	: TargetLowering(TM),
				110	SPUTM(TM)
				111	{
				112	// Fold away setcc operations if possible.
				113	setPow2DivIsCheap();
				114
				115	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				116	setUseUnderscoreSetJmp(true);
				117	setUseUnderscoreLongJmp(true);
				118
				119	// Set up the SPU's register classes:
				120	// NOTE: i8 register class is not registered because we cannot determine when
				121	// we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	122	// NOTE: Ignore the previous note. For now. :-)
				123	addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
				124	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				125	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				126	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				127	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				128	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	129	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				130
				131	// SPU has no sign or zero extended loads for i1, i8, i16:
				132	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				133	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				134	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				135	setStoreXAction(MVT::i1, Custom);
				136
				137	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				138	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				139	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				140	setStoreXAction(MVT::i8, Custom);
				141
				142	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				143	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				144	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				145
				146	// SPU constant load actions are custom lowered:
				147	setOperationAction(ISD::Constant, MVT::i64, Custom);
				148	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				149	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				150
				151	// SPU's loads and stores have to be custom lowered:
				152	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				153	++sctype) {
				154	setOperationAction(ISD::LOAD, sctype, Custom);
				155	setOperationAction(ISD::STORE, sctype, Custom);
				156	}
				157
				158	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				159	// into BR_CCs. BR_CC instructions are custom selected in
				160	// SPUDAGToDAGISel.
				161	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				162
				163	// Expand the jumptable branches
				164	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				165	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				166	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				167
				168	// SPU has no intrinsics for these particular operations:
				169	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				170	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				171	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				172
				173	// PowerPC has no SREM/UREM instructions
				174	setOperationAction(ISD::SREM, MVT::i32, Expand);
				175	setOperationAction(ISD::UREM, MVT::i32, Expand);
				176	setOperationAction(ISD::SREM, MVT::i64, Expand);
				177	setOperationAction(ISD::UREM, MVT::i64, Expand);
				178
				179	// We don't support sin/cos/sqrt/fmod
				180	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				181	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				182	setOperationAction(ISD::FREM , MVT::f64, Expand);
				183	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				184	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				185	setOperationAction(ISD::FREM , MVT::f32, Expand);
				186
				187	// If we're enabling GP optimizations, use hardware square root
				188	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				189	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				190
				191	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				192	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				193
				194	// SPU can do rotate right and left, so legalize it... but customize for i8
				195	// because instructions don't exist.
				196	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				197	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				198	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				199	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				200	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				201	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				202	// SPU has no native version of shift left/right for i8
				203	setOperationAction(ISD::SHL, MVT::i8, Custom);
				204	setOperationAction(ISD::SRL, MVT::i8, Custom);
				205	setOperationAction(ISD::SRA, MVT::i8, Custom);
				206
				207	// Custom lower i32 multiplications
				208	setOperationAction(ISD::MUL, MVT::i32, Custom);
				209
				210	// Need to custom handle (some) common i8 math ops
				211	setOperationAction(ISD::SUB, MVT::i8, Custom);
				212	setOperationAction(ISD::MUL, MVT::i8, Custom);
				213
				214	// SPU does not have BSWAP. It does have i32 support CTLZ.
				215	// CTPOP has to be custom lowered.
				216	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				217	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				218
				219	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				221	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				222	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				223
				224	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				225	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				226
				227	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				228
				229	// SPU does not have select or setcc
				230	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				233	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				234	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				235	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				236	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				237
				238	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				241	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				242	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				243	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				244	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				245
				246	// SPU has a legal FP -> signed INT instruction
				247	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				248	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				250	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				251
				252	// FDIV on SPU requires custom lowering
				253	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				254	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				255
				256	// SPU has [U\|S]INT_TO_FP
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				258	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				259	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				261	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				262	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				263	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				264	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				265
				266	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
				268	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
				269	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
				270
				271	// We cannot sextinreg(i1). Expand to shifts.
				272	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				273
				274	// Support label based line numbers.
				275	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				276	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				277
				278	// We want to legalize GlobalAddress and ConstantPool nodes into the
				279	// appropriate instructions to materialize the address.
				280	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				281	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				282	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				283	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				284	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				285	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				286	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				287	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				288
				289	// RET must be custom lowered, to meet ABI requirements
				290	setOperationAction(ISD::RET, MVT::Other, Custom);
				291
				292	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				293	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				294
				295	// Use the default implementation.
				296	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				297	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				298	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				299	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				300	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				301	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				302	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				303
				304	// Cell SPU has instructions for converting between i64 and fp.
				305	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				306	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				307
				308	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				309	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				310
				311	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				312	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				313
				314	// First set operation action for all vector types to expand. Then we
				315	// will selectively turn on ones that can be effectively codegen'd.
				316	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				320	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				321	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				322
				323	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				324	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				325	// add/sub are legal for all supported vector VT's.
				326	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				327	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				328	// mul has to be custom lowered.
				329	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				330
				331	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				335	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				336	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				337
				338	// These operations need to be expanded:
				339	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				344
				345	// Custom lower build_vector, constant pool spills, insert and
				346	// extract vector elements:
				347	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				351	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				352	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				353	}
				354
				355	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				356	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				357	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				358	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				359	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				360
				361	setSetCCResultType(MVT::i32);
				362	setShiftAmountType(MVT::i32);
				363	setSetCCResultContents(ZeroOrOneSetCCResult);
				364
				365	setStackPointerRegisterToSaveRestore(SPU::R1);
				366
				367	// We have target-specific dag combine patterns for the following nodes:
				368	// e.g., setTargetDAGCombine(ISD::SUB);
				369
				370	computeRegisterProperties();
				371	}
				372
				373	const char *
				374	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				375	{
				376	if (node_names.empty()) {
				377	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				378	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				379	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				380	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				381	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				382	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				383	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				384	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				385	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				386	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				387	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				388	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				389	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				390	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				391	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				393	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				394	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				395	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				396	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				397	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				398	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				399	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				400	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				401	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				402	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				403	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				404	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				405	"SPUISD::ROTBYTES_RIGHT_Z";
				406	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				407	"SPUISD::ROTBYTES_RIGHT_S";
				408	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				409	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				410	"SPUISD::ROTBYTES_LEFT_CHAINED";
				411	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				412	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				413	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				414	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				415	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				416	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				417	}
				418
				419	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				420
				421	return ((i != node_names.end()) ? i->second : 0);
				422	}
				423
				424	//===----------------------------------------------------------------------===//
				425	// Calling convention code:
				426	//===----------------------------------------------------------------------===//
				427
				428	#include "SPUGenCallingConv.inc"
				429
				430	//===----------------------------------------------------------------------===//
				431	// LowerOperation implementation
				432	//===----------------------------------------------------------------------===//
				433
				434	/// Custom lower loads for CellSPU
				435	/*!
				436	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				437	within a 16-byte block, we have to rotate to extract the requested element.
				438	*/
				439	static SDOperand
				440	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				441	LoadSDNode *LN = cast<LoadSDNode>(Op);
				442	SDOperand basep = LN->getBasePtr();
				443	SDOperand the_chain = LN->getChain();
				444	MVT::ValueType VT = LN->getLoadedVT();
				445	MVT::ValueType OpVT = Op.Val->getValueType(0);
				446	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				447	ISD::LoadExtType ExtType = LN->getExtensionType();
				448	unsigned alignment = LN->getAlignment();
				449	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				450	SDOperand Ops[8];
				451
				452	// For an extending load of an i1 variable, just call it i8 (or whatever we
				453	// were passed) and make it zero-extended:
				454	if (VT == MVT::i1) {
				455	VT = OpVT;
				456	ExtType = ISD::ZEXTLOAD;
				457	}
				458
				459	switch (LN->getAddressingMode()) {
				460	case ISD::UNINDEXED: {
				461	SDOperand result;
				462	SDOperand rot_op, rotamt;
				463	SDOperand ptrp;
				464	int c_offset;
				465	int c_rotamt;
				466
				467	// The vector type we really want to be when we load the 16-byte chunk
				468	MVT::ValueType vecVT, opVecVT;
				469
				470	if (VT != MVT::i1)
				471	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				472	else
				473	vecVT = MVT::v16i8;
				474
				475	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				476
				477	if (basep.getOpcode() == ISD::ADD) {
				478	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				479
				480	assert(CN != NULL
				481	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				482
				483	c_offset = (int) CN->getValue();
				484	c_rotamt = (int) (c_offset & 0xf);
				485
				486	// Adjust the rotation amount to ensure that the final result ends up in
				487	// the preferred slot:
				488	c_rotamt -= vtm->prefslot_byte;
				489	ptrp = basep.getOperand(0);
				490	} else {
				491	c_offset = 0;
				492	c_rotamt = -vtm->prefslot_byte;
				493	ptrp = basep;
				494	}
				495
				496	if (alignment == 16) {
				497	// 16-byte aligned load into preferred slot, no rotation
				498	if (c_rotamt == 0) {
				499	if (isMemoryOperand(ptrp))
				500	// Return unchanged
				501	return SDOperand();
				502	else {
				503	// Return modified D-Form address for pointer:
				504	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				505	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				506	if (VT == OpVT)
				507	return DAG.getLoad(VT, LN->getChain(), ptrp,
				508	LN->getSrcValue(), LN->getSrcValueOffset(),
				509	LN->isVolatile(), 16);
				510	else
				511	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				512	LN->getSrcValueOffset(), OpVT,
				513	LN->isVolatile(), 16);
				514	}
				515	} else {
				516	// Need to rotate...
				517	if (c_rotamt < 0)
				518	c_rotamt += 16;
				519	// Realign the base pointer, with a D-Form address
				520	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				521	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				522	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				523	else
				524	basep = ptrp;
				525
				526	// Rotate the load:
				527	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				528	LN->getSrcValue(), LN->getSrcValueOffset(),
				529	LN->isVolatile(), 16);
				530	the_chain = rot_op.getValue(1);
				531	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				532
				533	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				534	Ops[0] = the_chain;
				535	Ops[1] = rot_op;
				536	Ops[2] = rotamt;
				537
				538	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				539	the_chain = result.getValue(1);
				540
				541	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				542	SDVTList scalarvts;
				543	Ops[0] = the_chain;
				544	Ops[1] = result;
				545	if (OpVT == VT) {
				546	scalarvts = DAG.getVTList(VT, MVT::Other);
				547	} else {
				548	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				549	}
				550
				551	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				552	result);
				553	Ops[0] = the_chain;
				554	Ops[1] = result;
				555	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				556	the_chain = result.getValue(1);
				557	} else {
				558	// Handle the sign and zero-extending loads for i1 and i8:
				559	unsigned NewOpC;
				560
				561	if (ExtType == ISD::SEXTLOAD) {
				562	NewOpC = (OpVT == MVT::i1
				563	? SPUISD::EXTRACT_I1_SEXT
				564	: SPUISD::EXTRACT_I8_SEXT);
				565	} else if (ExtType == ISD::ZEXTLOAD) {
				566	NewOpC = (OpVT == MVT::i1
				567	? SPUISD::EXTRACT_I1_ZEXT
				568	: SPUISD::EXTRACT_I8_ZEXT);
				569	}
				570
				571	result = DAG.getNode(NewOpC, OpVT, result);
				572	}
				573
				574	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				575	SDOperand retops[2] = { result, the_chain };
				576
				577	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				578	return result;
				579	/UNREACHED/
				580	}
				581	} else {
				582	// Misaligned 16-byte load:
				583	if (basep.getOpcode() == ISD::LOAD) {
				584	LN = cast<LoadSDNode>(basep);
				585	if (LN->getAlignment() == 16) {
				586	// We can verify that we're really loading from a 16-byte aligned
				587	// chunk. Encapsulate basep as a D-Form address and return a new
				588	// load:
				589	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				590	DAG.getConstant(0, PtrVT));
				591	if (OpVT == VT)
				592	return DAG.getLoad(VT, LN->getChain(), basep,
				593	LN->getSrcValue(), LN->getSrcValueOffset(),
				594	LN->isVolatile(), 16);
				595	else
				596	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				597	LN->getSrcValue(), LN->getSrcValueOffset(),
				598	OpVT, LN->isVolatile(), 16);
				599	}
				600	}
				601
				602	// Catch all other cases where we can't guarantee that we have a
				603	// 16-byte aligned entity, which means resorting to an X-form
				604	// address scheme:
				605
				606	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
				607	SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
				608	SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
				609
				610	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				611
				612	SDOperand alignLoad =
				613	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				614	LN->getSrcValue(), LN->getSrcValueOffset(),
				615	LN->isVolatile(), 16);
				616
				617	SDOperand insertEltOp =
				618	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				619
				620	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				621	alignLoad,
				622	alignLoad,
				623	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				624
				625	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				626
				627	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				628	SDOperand retops[2] = { result, the_chain };
				629
				630	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				631	return result;
				632	}
				633	break;
				634	}
				635	case ISD::PRE_INC:
				636	case ISD::PRE_DEC:
				637	case ISD::POST_INC:
				638	case ISD::POST_DEC:
				639	case ISD::LAST_INDEXED_MODE:
				640	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				641	"UNINDEXED\n";
				642	cerr << (unsigned) LN->getAddressingMode() << "\n";
				643	abort();
				644	/NOTREACHED/
				645	}
				646
				647	return SDOperand();
				648	}
				649
				650	/// Custom lower stores for CellSPU
				651	/*!
				652	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				653	within a 16-byte block, we have to generate a shuffle to insert the
				654	requested element into its place, then store the resulting block.
				655	*/
				656	static SDOperand
				657	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				658	StoreSDNode *SN = cast<StoreSDNode>(Op);
				659	SDOperand Value = SN->getValue();
				660	MVT::ValueType VT = Value.getValueType();
				661	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				662	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				663	SDOperand the_chain = SN->getChain();
Chris Lattner	4d321c5	2007-12-05 18:32:18 +0000	[diff] [blame]	664	//unsigned alignment = SN->getAlignment();
				665	//const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	666
				667	switch (SN->getAddressingMode()) {
				668	case ISD::UNINDEXED: {
				669	SDOperand basep = SN->getBasePtr();
				670	SDOperand ptrOp;
				671	int offset;
				672
				673	if (basep.getOpcode() == ISD::ADD) {
				674	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				675	assert(CN != NULL
				676	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				677	offset = unsigned(CN->getValue());
				678	ptrOp = basep.getOperand(0);
				679	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				680	<< offset
				681	<< "\n");
				682	} else {
				683	ptrOp = basep;
				684	offset = 0;
				685	}
				686
				687	// The vector type we really want to load from the 16-byte chunk, except
				688	// in the case of MVT::i1, which has to be v16i8.
				689	unsigned vecVT, stVecVT;
				690
				691	if (StVT != MVT::i1)
				692	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				693	else
				694	stVecVT = MVT::v16i8;
				695	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				696
				697	// Realign the pointer as a D-Form address (ptrOp is the pointer,
				698	// to force a register load with the address; basep is the actual
				699	// dform addr offs($reg).
				700	ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				701	DAG.getConstant(0, PtrVT));
				702	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				703	ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
				704
				705	// Create the 16-byte aligned vector load
				706	SDOperand alignLoad =
				707	DAG.getLoad(vecVT, the_chain, basep,
				708	SN->getSrcValue(), SN->getSrcValueOffset(),
				709	SN->isVolatile(), 16);
				710	the_chain = alignLoad.getValue(1);
				711
				712	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				713	SDOperand theValue = SN->getValue();
				714	SDOperand result;
				715
				716	if (StVT != VT
				717	&& (theValue.getOpcode() == ISD::AssertZext
				718	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				719	// Drill down and get the value for zero- and sign-extended
				720	// quantities
				721	theValue = theValue.getOperand(0);
				722	}
				723
				724	SDOperand insertEltOp =
				725	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				726	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				727	ptrOp,
				728	DAG.getConstant((offset & 0xf), PtrVT)));
				729
				730	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				731	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				732	alignLoad,
				733	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				734
				735	result = DAG.getStore(the_chain, result, basep,
				736	LN->getSrcValue(), LN->getSrcValueOffset(),
				737	LN->isVolatile(), LN->getAlignment());
				738
				739	return result;
				740	/UNREACHED/
				741	}
				742	case ISD::PRE_INC:
				743	case ISD::PRE_DEC:
				744	case ISD::POST_INC:
				745	case ISD::POST_DEC:
				746	case ISD::LAST_INDEXED_MODE:
				747	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				748	"UNINDEXED\n";
				749	cerr << (unsigned) SN->getAddressingMode() << "\n";
				750	abort();
				751	/NOTREACHED/
				752	}
				753
				754	return SDOperand();
				755	}
				756
				757	/// Generate the address of a constant pool entry.
				758	static SDOperand
				759	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				760	MVT::ValueType PtrVT = Op.getValueType();
				761	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				762	Constant *C = CP->getConstVal();
				763	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				764	const TargetMachine &TM = DAG.getTarget();
				765	SDOperand Zero = DAG.getConstant(0, PtrVT);
				766
				767	if (TM.getRelocationModel() == Reloc::Static) {
				768	if (!ST->usingLargeMem()) {
				769	// Just return the SDOperand with the constant pool address in it.
				770	return CPI;
				771	} else {
				772	// Generate hi/lo address pair
				773	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				774	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				775
				776	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				777	}
				778	}
				779
				780	assert(0 &&
				781	"LowerConstantPool: Relocation model other than static not supported.");
				782	return SDOperand();
				783	}
				784
				785	static SDOperand
				786	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				787	MVT::ValueType PtrVT = Op.getValueType();
				788	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				789	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				790	SDOperand Zero = DAG.getConstant(0, PtrVT);
				791	const TargetMachine &TM = DAG.getTarget();
				792
				793	if (TM.getRelocationModel() == Reloc::Static) {
				794	if (!ST->usingLargeMem()) {
				795	// Just return the SDOperand with the jump table address in it.
				796	return JTI;
				797	} else {
				798	// Generate hi/lo address pair
				799	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				800	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				801
				802	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				803	}
				804	}
				805
				806	assert(0 &&
				807	"LowerJumpTable: Relocation model other than static not supported.");
				808	return SDOperand();
				809	}
				810
				811	static SDOperand
				812	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				813	MVT::ValueType PtrVT = Op.getValueType();
				814	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				815	GlobalValue *GV = GSDN->getGlobal();
				816	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				817	SDOperand Zero = DAG.getConstant(0, PtrVT);
				818	const TargetMachine &TM = DAG.getTarget();
				819
				820	if (TM.getRelocationModel() == Reloc::Static) {
				821	if (!ST->usingLargeMem()) {
				822	// Generate a local store address
				823	return GA;
				824	} else {
				825	// Generate hi/lo address pair
				826	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				827	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				828
				829	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				830	}
				831	} else {
				832	cerr << "LowerGlobalAddress: Relocation model other than static not "
				833	<< "supported.\n";
				834	abort();
				835	/NOTREACHED/
				836	}
				837
				838	return SDOperand();
				839	}
				840
				841	//! Custom lower i64 integer constants
				842	/*!
				843	This code inserts all of the necessary juggling that needs to occur to load
				844	a 64-bit constant into a register.
				845	*/
				846	static SDOperand
				847	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				848	unsigned VT = Op.getValueType();
				849	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				850
				851	if (VT == MVT::i64) {
				852	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				853	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				854	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				855
				856	} else {
				857	cerr << "LowerConstant: unhandled constant type "
				858	<< MVT::getValueTypeString(VT)
				859	<< "\n";
				860	abort();
				861	/NOTREACHED/
				862	}
				863
				864	return SDOperand();
				865	}
				866
				867	//! Custom lower single precision floating point constants
				868	/*!
				869	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				870	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				871	target description.
				872	*/
				873	static SDOperand
				874	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				875	unsigned VT = Op.getValueType();
				876	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				877
				878	assert((FP != 0) &&
				879	"LowerConstantFP: Node is not ConstantFPSDNode");
				880
				881	const APFloat &apf = FP->getValueAPF();
				882
				883	if (VT == MVT::f32) {
				884	return DAG.getNode(SPUISD::SFPConstant, VT,
				885	DAG.getTargetConstantFP(apf.convertToFloat(), VT));
				886	} else if (VT == MVT::f64) {
				887	uint64_t dbits = DoubleToBits(apf.convertToDouble());
				888	return DAG.getNode(ISD::BIT_CONVERT, VT,
				889	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				890	}
				891
				892	return SDOperand();
				893	}
				894
				895	static SDOperand
				896	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				897	{
				898	MachineFunction &MF = DAG.getMachineFunction();
				899	MachineFrameInfo *MFI = MF.getFrameInfo();
				900	SSARegMap *RegMap = MF.getSSARegMap();
				901	SmallVector<SDOperand, 8> ArgValues;
				902	SDOperand Root = Op.getOperand(0);
				903	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				904
				905	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				906	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				907
				908	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				909	unsigned ArgRegIdx = 0;
				910	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				911
				912	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				913
				914	// Add DAG nodes to load the arguments or copy them out of registers.
				915	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				916	SDOperand ArgVal;
				917	bool needsLoad = false;
				918	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				919	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				920
				921	switch (ObjectVT) {
				922	default: {
				923	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				924	<< MVT::getValueTypeString(ObjectVT)
				925	<< "\n";
				926	abort();
				927	}
				928	case MVT::i8:
				929	if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	930	unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	931	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				932	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				933	++ArgRegIdx;
				934	} else {
				935	needsLoad = true;
				936	}
				937	break;
				938	case MVT::i16:
				939	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				940	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				941	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				942	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				943	++ArgRegIdx;
				944	} else {
				945	needsLoad = true;
				946	}
				947	break;
				948	case MVT::i32:
				949	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				950	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				951	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				952	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				953	++ArgRegIdx;
				954	} else {
				955	needsLoad = true;
				956	}
				957	break;
				958	case MVT::i64:
				959	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				960	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				961	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				962	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				963	++ArgRegIdx;
				964	} else {
				965	needsLoad = true;
				966	}
				967	break;
				968	case MVT::f32:
				969	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				970	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				971	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				972	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				973	++ArgRegIdx;
				974	} else {
				975	needsLoad = true;
				976	}
				977	break;
				978	case MVT::f64:
				979	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				980	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				981	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				982	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				983	++ArgRegIdx;
				984	} else {
				985	needsLoad = true;
				986	}
				987	break;
				988	case MVT::v2f64:
				989	case MVT::v4f32:
				990	case MVT::v4i32:
				991	case MVT::v8i16:
				992	case MVT::v16i8:
				993	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				994	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				995	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				996	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				997	++ArgRegIdx;
				998	} else {
				999	needsLoad = true;
				1000	}
				1001	break;
				1002	}
				1003
				1004	// We need to load the argument to a virtual register if we determined above
				1005	// that we ran out of physical registers of the appropriate type
				1006	if (needsLoad) {
				1007	// If the argument is actually used, emit a load from the right stack
				1008	// slot.
				1009	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1010	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1011	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1012	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1013	} else {
				1014	// Don't emit a dead load.
				1015	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1016	}
				1017
				1018	ArgOffset += StackSlotSize;
				1019	}
				1020
				1021	ArgValues.push_back(ArgVal);
				1022	}
				1023
				1024	// If the function takes variable number of arguments, make a frame index for
				1025	// the start of the first vararg value... for expansion of llvm.va_start.
				1026	if (isVarArg) {
				1027	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1028	ArgOffset);
				1029	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1030	// If this function is vararg, store any remaining integer argument regs to
				1031	// their spots on the stack so that they may be loaded by deferencing the
				1032	// result of va_next.
				1033	SmallVector<SDOperand, 8> MemOps;
				1034	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1035	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1036	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1037	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1038	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1039	MemOps.push_back(Store);
				1040	// Increment the address by four for the next argument to store
				1041	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1042	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1043	}
				1044	if (!MemOps.empty())
				1045	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1046	}
				1047
				1048	ArgValues.push_back(Root);
				1049
				1050	// Return the new list of results.
				1051	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1052	Op.Val->value_end());
				1053	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1054	}
				1055
				1056	/// isLSAAddress - Return the immediate to use if the specified
				1057	/// value is representable as a LSA address.
				1058	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1059	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1060	if (!C) return 0;
				1061
				1062	int Addr = C->getValue();
				1063	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1064	(Addr << 14 >> 14) != Addr)
				1065	return 0; // Top 14 bits have to be sext of immediate.
				1066
				1067	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1068	}
				1069
				1070	static
				1071	SDOperand
				1072	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1073	SDOperand Chain = Op.getOperand(0);
				1074	#if 0
				1075	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1076	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1077	#endif
				1078	SDOperand Callee = Op.getOperand(4);
				1079	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1080	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1081	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1082	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1083
				1084	// Handy pointer type
				1085	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1086
				1087	// Accumulate how many bytes are to be pushed on the stack, including the
				1088	// linkage area, and parameter passing area. According to the SPU ABI,
				1089	// we minimally need space for [LR] and [SP]
				1090	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1091
				1092	// Set up a copy of the stack pointer for use loading and storing any
				1093	// arguments that may not fit in the registers available for argument
				1094	// passing.
				1095	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1096
				1097	// Figure out which arguments are going to go in registers, and which in
				1098	// memory.
				1099	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1100	unsigned ArgRegIdx = 0;
				1101
				1102	// Keep track of registers passing arguments
				1103	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1104	// And the arguments passed on the stack
				1105	SmallVector<SDOperand, 8> MemOpChains;
				1106
				1107	for (unsigned i = 0; i != NumOps; ++i) {
				1108	SDOperand Arg = Op.getOperand(5+2*i);
				1109
				1110	// PtrOff will be used to store the current argument to the stack if a
				1111	// register cannot be found for it.
				1112	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1113	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1114
				1115	switch (Arg.getValueType()) {
				1116	default: assert(0 && "Unexpected ValueType for argument!");
				1117	case MVT::i32:
				1118	case MVT::i64:
				1119	case MVT::i128:
				1120	if (ArgRegIdx != NumArgRegs) {
				1121	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1122	} else {
				1123	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1124	ArgOffset += StackSlotSize;
				1125	}
				1126	break;
				1127	case MVT::f32:
				1128	case MVT::f64:
				1129	if (ArgRegIdx != NumArgRegs) {
				1130	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1131	} else {
				1132	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1133	ArgOffset += StackSlotSize;
				1134	}
				1135	break;
				1136	case MVT::v4f32:
				1137	case MVT::v4i32:
				1138	case MVT::v8i16:
				1139	case MVT::v16i8:
				1140	if (ArgRegIdx != NumArgRegs) {
				1141	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1142	} else {
				1143	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1144	ArgOffset += StackSlotSize;
				1145	}
				1146	break;
				1147	}
				1148	}
				1149
				1150	// Update number of stack bytes actually used, insert a call sequence start
				1151	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1152	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1153
				1154	if (!MemOpChains.empty()) {
				1155	// Adjust the stack pointer for the stack arguments.
				1156	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1157	&MemOpChains[0], MemOpChains.size());
				1158	}
				1159
				1160	// Build a sequence of copy-to-reg nodes chained together with token chain
				1161	// and flag operands which copy the outgoing args into the appropriate regs.
				1162	SDOperand InFlag;
				1163	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1164	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1165	InFlag);
				1166	InFlag = Chain.getValue(1);
				1167	}
				1168
				1169	std::vector<MVT::ValueType> NodeTys;
				1170	NodeTys.push_back(MVT::Other); // Returns a chain
				1171	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1172
				1173	SmallVector<SDOperand, 8> Ops;
				1174	unsigned CallOpc = SPUISD::CALL;
				1175
				1176	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1177	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1178	// node so that legalize doesn't hack it.
				1179	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1180	GlobalValue *GV = G->getGlobal();
				1181	unsigned CalleeVT = Callee.getValueType();
				1182
				1183	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1184	// style calls, otherwise, external symbols are BRASL calls.
				1185	// NOTE:
				1186	// This may be an unsafe assumption for JIT and really large compilation
				1187	// units.
				1188	if (GV->isDeclaration()) {
				1189	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1190	} else {
				1191	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1192	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1193	DAG.getConstant(0, PtrVT));
				1194	}
				1195	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1196	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1197	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1198	// If this is an absolute destination address that appears to be a legal
				1199	// local store address, use the munged value.
				1200	Callee = SDOperand(Dest, 0);
				1201
				1202	Ops.push_back(Chain);
				1203	Ops.push_back(Callee);
				1204
				1205	// Add argument registers to the end of the list so that they are known live
				1206	// into the call.
				1207	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1208	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1209	RegsToPass[i].second.getValueType()));
				1210
				1211	if (InFlag.Val)
				1212	Ops.push_back(InFlag);
				1213	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1214	InFlag = Chain.getValue(1);
				1215
				1216	SDOperand ResultVals[3];
				1217	unsigned NumResults = 0;
				1218	NodeTys.clear();
				1219
				1220	// If the call has results, copy the values out of the ret val registers.
				1221	switch (Op.Val->getValueType(0)) {
				1222	default: assert(0 && "Unexpected ret value!");
				1223	case MVT::Other: break;
				1224	case MVT::i32:
				1225	if (Op.Val->getValueType(1) == MVT::i32) {
				1226	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1227	ResultVals[0] = Chain.getValue(0);
				1228	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1229	Chain.getValue(2)).getValue(1);
				1230	ResultVals[1] = Chain.getValue(0);
				1231	NumResults = 2;
				1232	NodeTys.push_back(MVT::i32);
				1233	} else {
				1234	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1235	ResultVals[0] = Chain.getValue(0);
				1236	NumResults = 1;
				1237	}
				1238	NodeTys.push_back(MVT::i32);
				1239	break;
				1240	case MVT::i64:
				1241	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1242	ResultVals[0] = Chain.getValue(0);
				1243	NumResults = 1;
				1244	NodeTys.push_back(MVT::i64);
				1245	break;
				1246	case MVT::f32:
				1247	case MVT::f64:
				1248	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1249	InFlag).getValue(1);
				1250	ResultVals[0] = Chain.getValue(0);
				1251	NumResults = 1;
				1252	NodeTys.push_back(Op.Val->getValueType(0));
				1253	break;
				1254	case MVT::v2f64:
				1255	case MVT::v4f32:
				1256	case MVT::v4i32:
				1257	case MVT::v8i16:
				1258	case MVT::v16i8:
				1259	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1260	InFlag).getValue(1);
				1261	ResultVals[0] = Chain.getValue(0);
				1262	NumResults = 1;
				1263	NodeTys.push_back(Op.Val->getValueType(0));
				1264	break;
				1265	}
				1266
				1267	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1268	DAG.getConstant(NumStackBytes, PtrVT));
				1269	NodeTys.push_back(MVT::Other);
				1270
				1271	// If the function returns void, just return the chain.
				1272	if (NumResults == 0)
				1273	return Chain;
				1274
				1275	// Otherwise, merge everything together with a MERGE_VALUES node.
				1276	ResultVals[NumResults++] = Chain;
				1277	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1278	ResultVals, NumResults);
				1279	return Res.getValue(Op.ResNo);
				1280	}
				1281
				1282	static SDOperand
				1283	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1284	SmallVector<CCValAssign, 16> RVLocs;
				1285	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1286	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1287	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1288	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1289
				1290	// If this is the first return lowered for this function, add the regs to the
				1291	// liveout set for the function.
				1292	if (DAG.getMachineFunction().liveout_empty()) {
				1293	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1294	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1295	}
				1296
				1297	SDOperand Chain = Op.getOperand(0);
				1298	SDOperand Flag;
				1299
				1300	// Copy the result values into the output registers.
				1301	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1302	CCValAssign &VA = RVLocs[i];
				1303	assert(VA.isRegLoc() && "Can only return in registers!");
				1304	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1305	Flag = Chain.getValue(1);
				1306	}
				1307
				1308	if (Flag.Val)
				1309	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1310	else
				1311	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1312	}
				1313
				1314
				1315	//===----------------------------------------------------------------------===//
				1316	// Vector related lowering:
				1317	//===----------------------------------------------------------------------===//
				1318
				1319	static ConstantSDNode *
				1320	getVecImm(SDNode *N) {
				1321	SDOperand OpVal(0, 0);
				1322
				1323	// Check to see if this buildvec has a single non-undef value in its elements.
				1324	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1325	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1326	if (OpVal.Val == 0)
				1327	OpVal = N->getOperand(i);
				1328	else if (OpVal != N->getOperand(i))
				1329	return 0;
				1330	}
				1331
				1332	if (OpVal.Val != 0) {
				1333	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1334	return CN;
				1335	}
				1336	}
				1337
				1338	return 0; // All UNDEF: use implicit def.; not Constant node
				1339	}
				1340
				1341	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1342	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1343	/// constant
				1344	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1345	MVT::ValueType ValueType) {
				1346	if (ConstantSDNode *CN = getVecImm(N)) {
				1347	uint64_t Value = CN->getValue();
				1348	if (Value <= 0x3ffff)
				1349	return DAG.getConstant(Value, ValueType);
				1350	}
				1351
				1352	return SDOperand();
				1353	}
				1354
				1355	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1356	/// and the value fits into a signed 16-bit constant, and if so, return the
				1357	/// constant
				1358	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1359	MVT::ValueType ValueType) {
				1360	if (ConstantSDNode *CN = getVecImm(N)) {
				1361	if (ValueType == MVT::i32) {
				1362	int Value = (int) CN->getValue();
				1363	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1364
				1365	if (Value == SExtValue)
				1366	return DAG.getConstant(Value, ValueType);
				1367	} else if (ValueType == MVT::i16) {
				1368	short Value = (short) CN->getValue();
				1369	int SExtValue = ((int) Value << 16) >> 16;
				1370
				1371	if (Value == (short) SExtValue)
				1372	return DAG.getConstant(Value, ValueType);
				1373	} else if (ValueType == MVT::i64) {
				1374	int64_t Value = CN->getValue();
				1375	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1376
				1377	if (Value == SExtValue)
				1378	return DAG.getConstant(Value, ValueType);
				1379	}
				1380	}
				1381
				1382	return SDOperand();
				1383	}
				1384
				1385	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1386	/// and the value fits into a signed 10-bit constant, and if so, return the
				1387	/// constant
				1388	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1389	MVT::ValueType ValueType) {
				1390	if (ConstantSDNode *CN = getVecImm(N)) {
				1391	int Value = (int) CN->getValue();
				1392	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1393	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1394	return DAG.getConstant(Value, ValueType);
				1395	}
				1396
				1397	return SDOperand();
				1398	}
				1399
				1400	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1401	/// and the value fits into a signed 8-bit constant, and if so, return the
				1402	/// constant.
				1403	///
				1404	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1405	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1406	/// same value.
				1407	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1408	MVT::ValueType ValueType) {
				1409	if (ConstantSDNode *CN = getVecImm(N)) {
				1410	int Value = (int) CN->getValue();
				1411	if (ValueType == MVT::i16
				1412	&& Value <= 0xffff /* truncated from uint64_t */
				1413	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1414	return DAG.getConstant(Value & 0xff, ValueType);
				1415	else if (ValueType == MVT::i8
				1416	&& (Value & 0xff) == Value)
				1417	return DAG.getConstant(Value, ValueType);
				1418	}
				1419
				1420	return SDOperand();
				1421	}
				1422
				1423	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1424	/// and the value fits into a signed 16-bit constant, and if so, return the
				1425	/// constant
				1426	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1427	MVT::ValueType ValueType) {
				1428	if (ConstantSDNode *CN = getVecImm(N)) {
				1429	uint64_t Value = CN->getValue();
				1430	if ((ValueType == MVT::i32
				1431	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1432	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1433	return DAG.getConstant(Value >> 16, ValueType);
				1434	}
				1435
				1436	return SDOperand();
				1437	}
				1438
				1439	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1440	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1441	if (ConstantSDNode *CN = getVecImm(N)) {
				1442	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1443	}
				1444
				1445	return SDOperand();
				1446	}
				1447
				1448	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1449	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1450	if (ConstantSDNode *CN = getVecImm(N)) {
				1451	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1452	}
				1453
				1454	return SDOperand();
				1455	}
				1456
				1457	// If this is a vector of constants or undefs, get the bits. A bit in
				1458	// UndefBits is set if the corresponding element of the vector is an
				1459	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1460	// zero. Return true if this is not an array of constants, false if it is.
				1461	//
				1462	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1463	uint64_t UndefBits[2]) {
				1464	// Start with zero'd results.
				1465	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1466
				1467	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1468	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1469	SDOperand OpVal = BV->getOperand(i);
				1470
				1471	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1472	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1473
				1474	uint64_t EltBits = 0;
				1475	if (OpVal.getOpcode() == ISD::UNDEF) {
				1476	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1477	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1478	continue;
				1479	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1480	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1481	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1482	const APFloat &apf = CN->getValueAPF();
				1483	EltBits = (CN->getValueType(0) == MVT::f32
				1484	? FloatToBits(apf.convertToFloat())
				1485	: DoubleToBits(apf.convertToDouble()));
				1486	} else {
				1487	// Nonconstant element.
				1488	return true;
				1489	}
				1490
				1491	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1492	}
				1493
				1494	//printf("%llx %llx %llx %llx\n",
				1495	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1496	return false;
				1497	}
				1498
				1499	/// If this is a splat (repetition) of a value across the whole vector, return
				1500	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1501	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1502	/// SplatSize = 1 byte.
				1503	static bool isConstantSplat(const uint64_t Bits128[2],
				1504	const uint64_t Undef128[2],
				1505	int MinSplatBits,
				1506	uint64_t &SplatBits, uint64_t &SplatUndef,
				1507	int &SplatSize) {
				1508	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1509	// the same as the lower 64-bits, ignoring undefs.
				1510	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1511	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1512	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1513	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1514	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1515	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1516
				1517	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1518	if (MinSplatBits < 64) {
				1519
				1520	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1521	// undefs.
				1522	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1523	if (MinSplatBits < 32) {
				1524
				1525	// If the top 16-bits are different than the lower 16-bits, ignoring
				1526	// undefs, we have an i32 splat.
				1527	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1528	if (MinSplatBits < 16) {
				1529	// If the top 8-bits are different than the lower 8-bits, ignoring
				1530	// undefs, we have an i16 splat.
				1531	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1532	// Otherwise, we have an 8-bit splat.
				1533	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1534	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1535	SplatSize = 1;
				1536	return true;
				1537	}
				1538	} else {
				1539	SplatBits = Bits16;
				1540	SplatUndef = Undef16;
				1541	SplatSize = 2;
				1542	return true;
				1543	}
				1544	}
				1545	} else {
				1546	SplatBits = Bits32;
				1547	SplatUndef = Undef32;
				1548	SplatSize = 4;
				1549	return true;
				1550	}
				1551	}
				1552	} else {
				1553	SplatBits = Bits128[0];
				1554	SplatUndef = Undef128[0];
				1555	SplatSize = 8;
				1556	return true;
				1557	}
				1558	}
				1559
				1560	return false; // Can't be a splat if two pieces don't match.
				1561	}
				1562
				1563	// If this is a case we can't handle, return null and let the default
				1564	// expansion code take care of it. If we CAN select this case, and if it
				1565	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1566	// this case more efficiently than a constant pool load, lower it to the
				1567	// sequence of ops that should be used.
				1568	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1569	MVT::ValueType VT = Op.getValueType();
				1570	// If this is a vector of constants or undefs, get the bits. A bit in
				1571	// UndefBits is set if the corresponding element of the vector is an
				1572	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1573	// zero.
				1574	uint64_t VectorBits[2];
				1575	uint64_t UndefBits[2];
				1576	uint64_t SplatBits, SplatUndef;
				1577	int SplatSize;
				1578	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1579	\|\| !isConstantSplat(VectorBits, UndefBits,
				1580	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1581	SplatBits, SplatUndef, SplatSize))
				1582	return SDOperand(); // Not a constant vector, not a splat.
				1583
				1584	switch (VT) {
				1585	default:
				1586	case MVT::v4f32: {
				1587	uint32_t Value32 = SplatBits;
				1588	assert(SplatSize == 4
				1589	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1590	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1591	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1592	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1593	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1594	break;
				1595	}
				1596	case MVT::v2f64: {
				1597	uint64_t f64val = SplatBits;
				1598	assert(SplatSize == 8
				1599	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1600	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1601	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1602	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1603	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1604	break;
				1605	}
				1606	case MVT::v16i8: {
				1607	// 8-bit constants have to be expanded to 16-bits
				1608	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1609	SDOperand Ops[8];
				1610	for (int i = 0; i < 8; ++i)
				1611	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1612	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1613	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1614	}
				1615	case MVT::v8i16: {
				1616	unsigned short Value16;
				1617	if (SplatSize == 2)
				1618	Value16 = (unsigned short) (SplatBits & 0xffff);
				1619	else
				1620	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1621	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1622	SDOperand Ops[8];
				1623	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1624	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1625	}
				1626	case MVT::v4i32: {
				1627	unsigned int Value = SplatBits;
				1628	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1629	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1630	}
				1631	case MVT::v2i64: {
				1632	uint64_t val = SplatBits;
				1633	uint32_t upper = uint32_t(val >> 32);
				1634	uint32_t lower = uint32_t(val);
				1635
				1636	if (val != 0) {
				1637	SDOperand LO32;
				1638	SDOperand HI32;
				1639	SmallVector<SDOperand, 16> ShufBytes;
				1640	SDOperand Result;
				1641	bool upper_special, lower_special;
				1642
				1643	// NOTE: This code creates common-case shuffle masks that can be easily
				1644	// detected as common expressions. It is not attempting to create highly
				1645	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1646
				1647	// Detect if the upper or lower half is a special shuffle mask pattern:
				1648	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1649	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1650
				1651	// Create lower vector if not a special pattern
				1652	if (!lower_special) {
				1653	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1654	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1655	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1656	LO32C, LO32C, LO32C, LO32C));
				1657	}
				1658
				1659	// Create upper vector if not a special pattern
				1660	if (!upper_special) {
				1661	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1662	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1663	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1664	HI32C, HI32C, HI32C, HI32C));
				1665	}
				1666
				1667	// If either upper or lower are special, then the two input operands are
				1668	// the same (basically, one of them is a "don't care")
				1669	if (lower_special)
				1670	LO32 = HI32;
				1671	if (upper_special)
				1672	HI32 = LO32;
				1673	if (lower_special && upper_special) {
				1674	// Unhappy situation... both upper and lower are special, so punt with
				1675	// a target constant:
				1676	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1677	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1678	Zero, Zero);
				1679	}
				1680
				1681	for (int i = 0; i < 4; ++i) {
				1682	for (int j = 0; j < 4; ++j) {
				1683	SDOperand V;
				1684	bool process_upper, process_lower;
				1685	uint64_t val;
				1686
				1687	process_upper = (upper_special && (i & 1) == 0);
				1688	process_lower = (lower_special && (i & 1) == 1);
				1689
				1690	if (process_upper \|\| process_lower) {
				1691	if ((process_upper && upper == 0)
				1692	\|\| (process_lower && lower == 0))
				1693	val = 0x80;
				1694	else if ((process_upper && upper == 0xffffffff)
				1695	\|\| (process_lower && lower == 0xffffffff))
				1696	val = 0xc0;
				1697	else if ((process_upper && upper == 0x80000000)
				1698	\|\| (process_lower && lower == 0x80000000))
				1699	val = (j == 0 ? 0xe0 : 0x80);
				1700	} else
				1701	val = i * 4 + j + ((i & 1) * 16);
				1702
				1703	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1704	}
				1705	}
				1706
				1707	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1708	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1709	&ShufBytes[0], ShufBytes.size()));
				1710	} else {
				1711	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1712	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1713	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1714	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1715	Zero, Zero, Zero, Zero));
				1716	}
				1717	}
				1718	}
				1719
				1720	return SDOperand();
				1721	}
				1722
				1723	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1724	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1725	/// permutation vector, V3, is monotonically increasing with one "exception"
				1726	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1727	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1728	/// In either case, the net result is going to eventually invoke SHUFB to
				1729	/// permute/shuffle the bytes from V1 and V2.
				1730	/// \note
				1731	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1732	/// control word for byte/halfword/word insertion. This takes care of a single
				1733	/// element move from V2 into V1.
				1734	/// \note
				1735	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1736	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1737	SDOperand V1 = Op.getOperand(0);
				1738	SDOperand V2 = Op.getOperand(1);
				1739	SDOperand PermMask = Op.getOperand(2);
				1740
				1741	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1742
				1743	// If we have a single element being moved from V1 to V2, this can be handled
				1744	// using the C*[DX] compute mask instructions, but the vector elements have
				1745	// to be monotonically increasing with one exception element.
				1746	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1747	unsigned EltsFromV2 = 0;
				1748	unsigned V2Elt = 0;
				1749	unsigned V2EltIdx0 = 0;
				1750	unsigned CurrElt = 0;
				1751	bool monotonic = true;
				1752	if (EltVT == MVT::i8)
				1753	V2EltIdx0 = 16;
				1754	else if (EltVT == MVT::i16)
				1755	V2EltIdx0 = 8;
				1756	else if (EltVT == MVT::i32)
				1757	V2EltIdx0 = 4;
				1758	else
				1759	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1760
				1761	for (unsigned i = 0, e = PermMask.getNumOperands();
				1762	EltsFromV2 <= 1 && monotonic && i != e;
				1763	++i) {
				1764	unsigned SrcElt;
				1765	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1766	SrcElt = 0;
				1767	else
				1768	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1769
				1770	if (SrcElt >= V2EltIdx0) {
				1771	++EltsFromV2;
				1772	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1773	} else if (CurrElt != SrcElt) {
				1774	monotonic = false;
				1775	}
				1776
				1777	++CurrElt;
				1778	}
				1779
				1780	if (EltsFromV2 == 1 && monotonic) {
				1781	// Compute mask and shuffle
				1782	MachineFunction &MF = DAG.getMachineFunction();
				1783	SSARegMap *RegMap = MF.getSSARegMap();
				1784	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1785	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1786	// Initialize temporary register to 0
				1787	SDOperand InitTempReg =
				1788	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1789	// Copy register's contents as index in INSERT_MASK:
				1790	SDOperand ShufMaskOp =
				1791	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1792	DAG.getTargetConstant(V2Elt, MVT::i32),
				1793	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1794	// Use shuffle mask in SHUFB synthetic instruction:
				1795	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1796	} else {
				1797	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1798	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1799
				1800	SmallVector<SDOperand, 16> ResultMask;
				1801	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1802	unsigned SrcElt;
				1803	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1804	SrcElt = 0;
				1805	else
				1806	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1807
				1808	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1809	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1810	MVT::i8));
				1811	}
				1812	}
				1813
				1814	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1815	&ResultMask[0], ResultMask.size());
				1816	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1817	}
				1818	}
				1819
				1820	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1821	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1822
				1823	if (Op0.Val->getOpcode() == ISD::Constant) {
				1824	// For a constant, build the appropriate constant vector, which will
				1825	// eventually simplify to a vector register load.
				1826
				1827	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1828	SmallVector<SDOperand, 16> ConstVecValues;
				1829	MVT::ValueType VT;
				1830	size_t n_copies;
				1831
				1832	// Create a constant vector:
				1833	switch (Op.getValueType()) {
				1834	default: assert(0 && "Unexpected constant value type in "
				1835	"LowerSCALAR_TO_VECTOR");
				1836	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1837	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1838	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1839	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1840	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1841	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1842	}
				1843
				1844	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1845	for (size_t j = 0; j < n_copies; ++j)
				1846	ConstVecValues.push_back(CValue);
				1847
				1848	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1849	&ConstVecValues[0], ConstVecValues.size());
				1850	} else {
				1851	// Otherwise, copy the value from one register to another:
				1852	switch (Op0.getValueType()) {
				1853	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1854	case MVT::i8:
				1855	case MVT::i16:
				1856	case MVT::i32:
				1857	case MVT::i64:
				1858	case MVT::f32:
				1859	case MVT::f64:
				1860	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1861	}
				1862	}
				1863
				1864	return SDOperand();
				1865	}
				1866
				1867	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1868	switch (Op.getValueType()) {
				1869	case MVT::v4i32: {
				1870	SDOperand rA = Op.getOperand(0);
				1871	SDOperand rB = Op.getOperand(1);
				1872	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1873	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1874	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1875	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1876
				1877	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1878	break;
				1879	}
				1880
				1881	// Multiply two v8i16 vectors (pipeline friendly version):
				1882	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1883	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1884	// c) Use SELB to select upper and lower halves from the intermediate results
				1885	//
				1886	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1887	// dual-issue. This code does manage to do this, even if it's a little on
				1888	// the wacky side
				1889	case MVT::v8i16: {
				1890	MachineFunction &MF = DAG.getMachineFunction();
				1891	SSARegMap *RegMap = MF.getSSARegMap();
				1892	SDOperand Chain = Op.getOperand(0);
				1893	SDOperand rA = Op.getOperand(0);
				1894	SDOperand rB = Op.getOperand(1);
				1895	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1896	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1897
				1898	SDOperand FSMBOp =
				1899	DAG.getCopyToReg(Chain, FSMBIreg,
				1900	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1901	DAG.getConstant(0xcccc, MVT::i32)));
				1902
				1903	SDOperand HHProd =
				1904	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1905	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1906
				1907	SDOperand HHProd_v4i32 =
				1908	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1909	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1910
				1911	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1912	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1913	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1914	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1915	HHProd_v4i32,
				1916	DAG.getConstant(16, MVT::i16))),
				1917	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1918	}
				1919
				1920	// This M00sE is N@stI! (apologies to Monty Python)
				1921	//
				1922	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1923	// is to break it all apart, sign extend, and reassemble the various
				1924	// intermediate products.
				1925	case MVT::v16i8: {
				1926	MachineFunction &MF = DAG.getMachineFunction();
				1927	SSARegMap *RegMap = MF.getSSARegMap();
				1928	SDOperand Chain = Op.getOperand(0);
				1929	SDOperand rA = Op.getOperand(0);
				1930	SDOperand rB = Op.getOperand(1);
				1931	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1932	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1933
				1934	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1935	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1936	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1937
				1938	SDOperand LLProd =
				1939	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1940	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1941	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1942
				1943	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1944
				1945	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1946
				1947	SDOperand LHProd =
				1948	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1949	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1950
				1951	SDOperand FSMBdef_2222 =
				1952	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1953	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1954	DAG.getConstant(0x2222, MVT::i32)));
				1955
				1956	SDOperand FSMBuse_2222 =
				1957	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1958
				1959	SDOperand LoProd_1 =
				1960	DAG.getCopyToReg(Chain, LoProd_reg,
				1961	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1962	FSMBuse_2222));
				1963
				1964	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1965
				1966	SDOperand LoProd =
				1967	DAG.getNode(ISD::AND, MVT::v4i32,
				1968	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1969	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1970	LoProdMask, LoProdMask,
				1971	LoProdMask, LoProdMask));
				1972
				1973	SDOperand rAH =
				1974	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1975	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1976
				1977	SDOperand rBH =
				1978	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1979	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1980
				1981	SDOperand HLProd =
				1982	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1983	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1984	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1985
				1986	SDOperand HHProd_1 =
				1987	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1988	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1989	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1990	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1991	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1992
				1993	SDOperand HHProd =
				1994	DAG.getCopyToReg(Chain, HiProd_reg,
				1995	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1996	HLProd,
				1997	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				1998	FSMBuse_2222));
				1999
				2000	SDOperand HiProd =
				2001	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2002	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2003
				2004	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2005	DAG.getNode(ISD::OR, MVT::v4i32,
				2006	LoProd, HiProd));
				2007	}
				2008
				2009	default:
				2010	cerr << "CellSPU: Unknown vector multiplication, got "
				2011	<< MVT::getValueTypeString(Op.getValueType())
				2012	<< "\n";
				2013	abort();
				2014	/NOTREACHED/
				2015	}
				2016
				2017	return SDOperand();
				2018	}
				2019
				2020	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2021	MachineFunction &MF = DAG.getMachineFunction();
				2022	SSARegMap *RegMap = MF.getSSARegMap();
				2023
				2024	SDOperand A = Op.getOperand(0);
				2025	SDOperand B = Op.getOperand(1);
				2026	unsigned VT = Op.getValueType();
				2027
				2028	unsigned VRegBR, VRegC;
				2029
				2030	if (VT == MVT::f32) {
				2031	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2032	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2033	} else {
				2034	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2035	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2036	}
				2037	// TODO: make sure we're feeding FPInterp the right arguments
				2038	// Right now: fi B, frest(B)
				2039
				2040	// Computes BRcpl =
				2041	// (Floating Interpolate (FP Reciprocal Estimate B))
				2042	SDOperand BRcpl =
				2043	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2044	DAG.getNode(SPUISD::FPInterp, VT, B,
				2045	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2046
				2047	// Computes A * BRcpl and stores in a temporary register
				2048	SDOperand AxBRcpl =
				2049	DAG.getCopyToReg(BRcpl, VRegC,
				2050	DAG.getNode(ISD::FMUL, VT, A,
				2051	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2052	// What's the Chain variable do? It's magic!
				2053	// TODO: set Chain = Op(0).getEntryNode()
				2054
				2055	return DAG.getNode(ISD::FADD, VT,
				2056	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2057	DAG.getNode(ISD::FMUL, VT,
				2058	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2059	DAG.getNode(ISD::FSUB, VT, A,
				2060	DAG.getNode(ISD::FMUL, VT, B,
				2061	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2062	}
				2063
				2064	// Expands double-precision FDIV
				2065	// Expects two doubles as inputs X and Y, does a floating point
				2066	// reciprocal estimate, and three iterations of Newton-Raphson
				2067	// to increase accuracy.
				2068	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2069	// MachineFunction &MF = DAG.getMachineFunction();
				2070	// SSARegMap *RegMap = MF.getSSARegMap();
				2071	//
				2072	// SDOperand X = Op.getOperand(0);
				2073	// SDOperand Y = Op.getOperand(1);
				2074	//}
				2075
				2076	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2077	unsigned VT = Op.getValueType();
				2078	SDOperand N = Op.getOperand(0);
				2079	SDOperand Elt = Op.getOperand(1);
				2080	SDOperand ShufMask[16];
				2081	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2082
				2083	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2084
				2085	int EltNo = (int) C->getValue();
				2086
				2087	// sanity checks:
				2088	if (VT == MVT::i8 && EltNo >= 16)
				2089	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2090	else if (VT == MVT::i16 && EltNo >= 8)
				2091	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2092	else if (VT == MVT::i32 && EltNo >= 4)
				2093	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2094	else if (VT == MVT::i64 && EltNo >= 2)
				2095	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2096
				2097	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2098	// i32 and i64: Element 0 is the preferred slot
				2099	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2100	}
				2101
				2102	// Need to generate shuffle mask and extract:
				2103	int prefslot_begin, prefslot_end;
				2104	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2105
				2106	switch (VT) {
				2107	case MVT::i8: {
				2108	prefslot_begin = prefslot_end = 3;
				2109	break;
				2110	}
				2111	case MVT::i16: {
				2112	prefslot_begin = 2; prefslot_end = 3;
				2113	break;
				2114	}
				2115	case MVT::i32: {
				2116	prefslot_begin = 0; prefslot_end = 3;
				2117	break;
				2118	}
				2119	case MVT::i64: {
				2120	prefslot_begin = 0; prefslot_end = 7;
				2121	break;
				2122	}
				2123	}
				2124
				2125	for (int i = 0; i < 16; ++i) {
				2126	// zero fill uppper part of preferred slot, don't care about the
				2127	// other slots:
				2128	unsigned int mask_val;
				2129
				2130	if (i <= prefslot_end) {
				2131	mask_val =
				2132	((i < prefslot_begin)
				2133	? 0x80
				2134	: elt_byte + (i - prefslot_begin));
				2135
				2136	ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
				2137	} else
				2138	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2139	}
				2140
				2141	SDOperand ShufMaskVec =
				2142	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2143	&ShufMask[0],
				2144	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2145
				2146	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2147	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2148	N, N, ShufMaskVec));
				2149
				2150	}
				2151
				2152	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2153	SDOperand VecOp = Op.getOperand(0);
				2154	SDOperand ValOp = Op.getOperand(1);
				2155	SDOperand IdxOp = Op.getOperand(2);
				2156	MVT::ValueType VT = Op.getValueType();
				2157
				2158	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2159	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2160
				2161	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2162	// Use $2 because it's always 16-byte aligned and it's available:
				2163	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2164
				2165	SDOperand result =
				2166	DAG.getNode(SPUISD::SHUFB, VT,
				2167	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2168	VecOp,
				2169	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2170	DAG.getNode(ISD::ADD, PtrVT,
				2171	PtrBase,
				2172	DAG.getConstant(CN->getValue(),
				2173	PtrVT))));
				2174
				2175	return result;
				2176	}
				2177
				2178	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2179	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2180
				2181	assert(Op.getValueType() == MVT::i8);
				2182	switch (Opc) {
				2183	default:
				2184	assert(0 && "Unhandled i8 math operator");
				2185	/NOTREACHED/
				2186	break;
				2187	case ISD::SUB: {
				2188	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2189	// the result:
				2190	SDOperand N1 = Op.getOperand(1);
				2191	N0 = (N0.getOpcode() != ISD::Constant
				2192	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2193	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2194	N1 = (N1.getOpcode() != ISD::Constant
				2195	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2196	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2197	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2198	DAG.getNode(Opc, MVT::i16, N0, N1));
				2199	}
				2200	case ISD::ROTR:
				2201	case ISD::ROTL: {
				2202	SDOperand N1 = Op.getOperand(1);
				2203	unsigned N1Opc;
				2204	N0 = (N0.getOpcode() != ISD::Constant
				2205	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2206	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2207	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2208	N1 = (N1.getOpcode() != ISD::Constant
				2209	? DAG.getNode(N1Opc, MVT::i16, N1)
				2210	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2211	SDOperand ExpandArg =
				2212	DAG.getNode(ISD::OR, MVT::i16, N0,
				2213	DAG.getNode(ISD::SHL, MVT::i16,
				2214	N0, DAG.getConstant(8, MVT::i16)));
				2215	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2216	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2217	}
				2218	case ISD::SRL:
				2219	case ISD::SHL: {
				2220	SDOperand N1 = Op.getOperand(1);
				2221	unsigned N1Opc;
				2222	N0 = (N0.getOpcode() != ISD::Constant
				2223	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2224	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2225	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2226	N1 = (N1.getOpcode() != ISD::Constant
				2227	? DAG.getNode(N1Opc, MVT::i16, N1)
				2228	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2229	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2230	DAG.getNode(Opc, MVT::i16, N0, N1));
				2231	}
				2232	case ISD::SRA: {
				2233	SDOperand N1 = Op.getOperand(1);
				2234	unsigned N1Opc;
				2235	N0 = (N0.getOpcode() != ISD::Constant
				2236	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2237	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2238	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2239	N1 = (N1.getOpcode() != ISD::Constant
				2240	? DAG.getNode(N1Opc, MVT::i16, N1)
				2241	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2242	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2243	DAG.getNode(Opc, MVT::i16, N0, N1));
				2244	}
				2245	case ISD::MUL: {
				2246	SDOperand N1 = Op.getOperand(1);
				2247	unsigned N1Opc;
				2248	N0 = (N0.getOpcode() != ISD::Constant
				2249	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2250	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2251	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2252	N1 = (N1.getOpcode() != ISD::Constant
				2253	? DAG.getNode(N1Opc, MVT::i16, N1)
				2254	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2255	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2256	DAG.getNode(Opc, MVT::i16, N0, N1));
				2257	break;
				2258	}
				2259	}
				2260
				2261	return SDOperand();
				2262	}
				2263
				2264	//! Lower byte immediate operations for v16i8 vectors:
				2265	static SDOperand
				2266	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2267	SDOperand ConstVec;
				2268	SDOperand Arg;
				2269	MVT::ValueType VT = Op.getValueType();
				2270
				2271	ConstVec = Op.getOperand(0);
				2272	Arg = Op.getOperand(1);
				2273	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2274	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2275	ConstVec = ConstVec.getOperand(0);
				2276	} else {
				2277	ConstVec = Op.getOperand(1);
				2278	Arg = Op.getOperand(0);
				2279	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2280	ConstVec = ConstVec.getOperand(0);
				2281	}
				2282	}
				2283	}
				2284
				2285	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2286	uint64_t VectorBits[2];
				2287	uint64_t UndefBits[2];
				2288	uint64_t SplatBits, SplatUndef;
				2289	int SplatSize;
				2290
				2291	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2292	&& isConstantSplat(VectorBits, UndefBits,
				2293	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2294	SplatBits, SplatUndef, SplatSize)) {
				2295	SDOperand tcVec[16];
				2296	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2297	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2298
				2299	// Turn the BUILD_VECTOR into a set of target constants:
				2300	for (size_t i = 0; i < tcVecSize; ++i)
				2301	tcVec[i] = tc;
				2302
				2303	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2304	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2305	}
				2306	}
				2307
				2308	return SDOperand();
				2309	}
				2310
				2311	//! Lower i32 multiplication
				2312	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2313	unsigned Opc) {
				2314	switch (VT) {
				2315	default:
				2316	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2317	<< MVT::getValueTypeString(Op.getValueType())
				2318	<< "\n";
				2319	abort();
				2320	/NOTREACHED/
				2321
				2322	case MVT::i32: {
				2323	SDOperand rA = Op.getOperand(0);
				2324	SDOperand rB = Op.getOperand(1);
				2325
				2326	return DAG.getNode(ISD::ADD, MVT::i32,
				2327	DAG.getNode(ISD::ADD, MVT::i32,
				2328	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2329	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2330	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2331	}
				2332	}
				2333
				2334	return SDOperand();
				2335	}
				2336
				2337	//! Custom lowering for CTPOP (count population)
				2338	/*!
				2339	Custom lowering code that counts the number ones in the input
				2340	operand. SPU has such an instruction, but it counts the number of
				2341	ones per byte, which then have to be accumulated.
				2342	*/
				2343	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2344	unsigned VT = Op.getValueType();
				2345	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2346
				2347	switch (VT) {
				2348	case MVT::i8: {
				2349	SDOperand N = Op.getOperand(0);
				2350	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2351
				2352	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2353	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2354
				2355	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2356	}
				2357
				2358	case MVT::i16: {
				2359	MachineFunction &MF = DAG.getMachineFunction();
				2360	SSARegMap *RegMap = MF.getSSARegMap();
				2361
				2362	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2363
				2364	SDOperand N = Op.getOperand(0);
				2365	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2366	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2367	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2368
				2369	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2370	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2371
				2372	// CNTB_result becomes the chain to which all of the virtual registers
				2373	// CNTB_reg, SUM1_reg become associated:
				2374	SDOperand CNTB_result =
				2375	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2376
				2377	SDOperand CNTB_rescopy =
				2378	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2379
				2380	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2381
				2382	return DAG.getNode(ISD::AND, MVT::i16,
				2383	DAG.getNode(ISD::ADD, MVT::i16,
				2384	DAG.getNode(ISD::SRL, MVT::i16,
				2385	Tmp1, Shift1),
				2386	Tmp1),
				2387	Mask0);
				2388	}
				2389
				2390	case MVT::i32: {
				2391	MachineFunction &MF = DAG.getMachineFunction();
				2392	SSARegMap *RegMap = MF.getSSARegMap();
				2393
				2394	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2395	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2396
				2397	SDOperand N = Op.getOperand(0);
				2398	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2399	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2400	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2401	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2402
				2403	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2404	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2405
				2406	// CNTB_result becomes the chain to which all of the virtual registers
				2407	// CNTB_reg, SUM1_reg become associated:
				2408	SDOperand CNTB_result =
				2409	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2410
				2411	SDOperand CNTB_rescopy =
				2412	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2413
				2414	SDOperand Comp1 =
				2415	DAG.getNode(ISD::SRL, MVT::i32,
				2416	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2417
				2418	SDOperand Sum1 =
				2419	DAG.getNode(ISD::ADD, MVT::i32,
				2420	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2421
				2422	SDOperand Sum1_rescopy =
				2423	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2424
				2425	SDOperand Comp2 =
				2426	DAG.getNode(ISD::SRL, MVT::i32,
				2427	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2428	Shift2);
				2429	SDOperand Sum2 =
				2430	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2431	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2432
				2433	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2434	}
				2435
				2436	case MVT::i64:
				2437	break;
				2438	}
				2439
				2440	return SDOperand();
				2441	}
				2442
				2443	/// LowerOperation - Provide custom lowering hooks for some operations.
				2444	///
				2445	SDOperand
				2446	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2447	{
				2448	switch (Op.getOpcode()) {
				2449	default: {
				2450	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2451	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2452	cerr << "*Op.Val:\n";
				2453	Op.Val->dump();
				2454	abort();
				2455	}
				2456	case ISD::LOAD:
				2457	case ISD::SEXTLOAD:
				2458	case ISD::ZEXTLOAD:
				2459	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2460	case ISD::STORE:
				2461	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2462	case ISD::ConstantPool:
				2463	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2464	case ISD::GlobalAddress:
				2465	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2466	case ISD::JumpTable:
				2467	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2468	case ISD::Constant:
				2469	return LowerConstant(Op, DAG);
				2470	case ISD::ConstantFP:
				2471	return LowerConstantFP(Op, DAG);
				2472	case ISD::FORMAL_ARGUMENTS:
				2473	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2474	case ISD::CALL:
				2475	return LowerCALL(Op, DAG);
				2476	case ISD::RET:
				2477	return LowerRET(Op, DAG, getTargetMachine());
				2478
				2479	// i8 math ops:
				2480	case ISD::SUB:
				2481	case ISD::ROTR:
				2482	case ISD::ROTL:
				2483	case ISD::SRL:
				2484	case ISD::SHL:
				2485	case ISD::SRA:
				2486	return LowerI8Math(Op, DAG, Op.getOpcode());
				2487
				2488	// Vector-related lowering.
				2489	case ISD::BUILD_VECTOR:
				2490	return LowerBUILD_VECTOR(Op, DAG);
				2491	case ISD::SCALAR_TO_VECTOR:
				2492	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2493	case ISD::VECTOR_SHUFFLE:
				2494	return LowerVECTOR_SHUFFLE(Op, DAG);
				2495	case ISD::EXTRACT_VECTOR_ELT:
				2496	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2497	case ISD::INSERT_VECTOR_ELT:
				2498	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2499
				2500	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2501	case ISD::AND:
				2502	case ISD::OR:
				2503	case ISD::XOR:
				2504	return LowerByteImmed(Op, DAG);
				2505
				2506	// Vector and i8 multiply:
				2507	case ISD::MUL:
				2508	if (MVT::isVector(Op.getValueType()))
				2509	return LowerVectorMUL(Op, DAG);
				2510	else if (Op.getValueType() == MVT::i8)
				2511	return LowerI8Math(Op, DAG, Op.getOpcode());
				2512	else
				2513	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2514
				2515	case ISD::FDIV:
				2516	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2517	return LowerFDIVf32(Op, DAG);
				2518	// else if (Op.getValueType() == MVT::f64)
				2519	// return LowerFDIVf64(Op, DAG);
				2520	else
				2521	assert(0 && "Calling FDIV on unsupported MVT");
				2522
				2523	case ISD::CTPOP:
				2524	return LowerCTPOP(Op, DAG);
				2525	}
				2526
				2527	return SDOperand();
				2528	}
				2529
				2530	//===----------------------------------------------------------------------===//
				2531	// Other Lowering Code
				2532	//===----------------------------------------------------------------------===//
				2533
				2534	MachineBasicBlock *
				2535	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2536	MachineBasicBlock *BB)
				2537	{
				2538	return BB;
				2539	}
				2540
				2541	//===----------------------------------------------------------------------===//
				2542	// Target Optimization Hooks
				2543	//===----------------------------------------------------------------------===//
				2544
				2545	SDOperand
				2546	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2547	{
				2548	#if 0
				2549	TargetMachine &TM = getTargetMachine();
				2550	SelectionDAG &DAG = DCI.DAG;
				2551	#endif
				2552	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2553
				2554	switch (N->getOpcode()) {
				2555	default: break;
				2556
				2557	// Look for obvious optimizations for shift left:
				2558	// a) Replace 0 << V with 0
				2559	// b) Replace V << 0 with V
				2560	//
				2561	// N.B: llvm will generate an undef node if the shift amount is greater than
				2562	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2563	case SPU::SHLIr32:
				2564	case SPU::SHLHIr16:
				2565	case SPU::SHLQBIIvec:
				2566	case SPU::ROTHIr16:
				2567	case SPU::ROTHIr16_i32:
				2568	case SPU::ROTIr32:
				2569	case SPU::ROTIr32_i16:
				2570	case SPU::ROTQBYIvec:
				2571	case SPU::ROTQBYBIvec:
				2572	case SPU::ROTQBIIvec:
				2573	case SPU::ROTHMIr16:
				2574	case SPU::ROTMIr32:
				2575	case SPU::ROTQMBYIvec: {
				2576	if (N0.getOpcode() == ISD::Constant) {
				2577	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2578	if (C->getValue() == 0) // 0 << V -> 0.
				2579	return N0;
				2580	}
				2581	}
				2582	SDOperand N1 = N->getOperand(1);
				2583	if (N1.getOpcode() == ISD::Constant) {
				2584	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2585	if (C->getValue() == 0) // V << 0 -> V
				2586	return N1;
				2587	}
				2588	}
				2589	break;
				2590	}
				2591	}
				2592
				2593	return SDOperand();
				2594	}
				2595
				2596	//===----------------------------------------------------------------------===//
				2597	// Inline Assembly Support
				2598	//===----------------------------------------------------------------------===//
				2599
				2600	/// getConstraintType - Given a constraint letter, return the type of
				2601	/// constraint it is for this target.
				2602	SPUTargetLowering::ConstraintType
				2603	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2604	if (ConstraintLetter.size() == 1) {
				2605	switch (ConstraintLetter[0]) {
				2606	default: break;
				2607	case 'b':
				2608	case 'r':
				2609	case 'f':
				2610	case 'v':
				2611	case 'y':
				2612	return C_RegisterClass;
				2613	}
				2614	}
				2615	return TargetLowering::getConstraintType(ConstraintLetter);
				2616	}
				2617
				2618	std::pair<unsigned, const TargetRegisterClass*>
				2619	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2620	MVT::ValueType VT) const
				2621	{
				2622	if (Constraint.size() == 1) {
				2623	// GCC RS6000 Constraint Letters
				2624	switch (Constraint[0]) {
				2625	case 'b': // R1-R31
				2626	case 'r': // R0-R31
				2627	if (VT == MVT::i64)
				2628	return std::make_pair(0U, SPU::R64CRegisterClass);
				2629	return std::make_pair(0U, SPU::R32CRegisterClass);
				2630	case 'f':
				2631	if (VT == MVT::f32)
				2632	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2633	else if (VT == MVT::f64)
				2634	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2635	break;
				2636	case 'v':
				2637	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2638	}
				2639	}
				2640
				2641	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2642	}
				2643
				2644	void
				2645	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2646	uint64_t Mask,
				2647	uint64_t &KnownZero,
				2648	uint64_t &KnownOne,
				2649	const SelectionDAG &DAG,
				2650	unsigned Depth ) const {
				2651	KnownZero = 0;
				2652	KnownOne = 0;
				2653	}
				2654
				2655	// LowerAsmOperandForConstraint
				2656	void
				2657	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2658	char ConstraintLetter,
				2659	std::vector<SDOperand> &Ops,
				2660	SelectionDAG &DAG) {
				2661	// Default, for the time being, to the base class handler
				2662	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2663	}
				2664
				2665	/// isLegalAddressImmediate - Return true if the integer value can be used
				2666	/// as the offset of the target addressing mode.
				2667	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2668	// SPU's addresses are 256K:
				2669	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2670	}
				2671
				2672	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2673	return false;
				2674	}