Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 2ab4841c64ed42ac4fc56c3a92bef9ec2a74d2ee [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
Scott Michel	2466c37	2007-12-05 01:40:25 +0000	[diff] [blame]	6	// Department at The Aerospace Corporation and is distributed under the
				7	// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	8	//
				9	//===----------------------------------------------------------------------===//
				10	//
				11	// This file implements the SPUTargetLowering class.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SPURegisterNames.h"
				16	#include "SPUISelLowering.h"
				17	#include "SPUTargetMachine.h"
				18	#include "llvm/ADT/VectorExtras.h"
				19	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/SelectionDAG.h"
				25	#include "llvm/CodeGen/SSARegMap.h"
				26	#include "llvm/Constants.h"
				27	#include "llvm/Function.h"
				28	#include "llvm/Intrinsics.h"
				29	#include "llvm/Support/Debug.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	#include <map>
				34
				35	using namespace llvm;
				36
				37	// Used in getTargetNodeName() below
				38	namespace {
				39	std::map<unsigned, const char *> node_names;
				40
				41	//! MVT::ValueType mapping to useful data for Cell SPU
				42	struct valtype_map_s {
				43	const MVT::ValueType valtype;
				44	const int prefslot_byte;
				45	};
				46
				47	const valtype_map_s valtype_map[] = {
				48	{ MVT::i1, 3 },
				49	{ MVT::i8, 3 },
				50	{ MVT::i16, 2 },
				51	{ MVT::i32, 0 },
				52	{ MVT::f32, 0 },
				53	{ MVT::i64, 0 },
				54	{ MVT::f64, 0 },
				55	{ MVT::i128, 0 }
				56	};
				57
				58	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				59
				60	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				61	const valtype_map_s *retval = 0;
				62
				63	for (size_t i = 0; i < n_valtype_map; ++i) {
				64	if (valtype_map[i].valtype == VT) {
				65	retval = valtype_map + i;
				66	break;
				67	}
				68	}
				69
				70	#ifndef NDEBUG
				71	if (retval == 0) {
				72	cerr << "getValueTypeMapEntry returns NULL for "
				73	<< MVT::getValueTypeString(VT)
				74	<< "\n";
				75	abort();
				76	}
				77	#endif
				78
				79	return retval;
				80	}
				81
				82	//! Predicate that returns true if operand is a memory target
				83	/*!
				84	\arg Op Operand to test
				85	\return true if the operand is a memory target (i.e., global
				86	address, external symbol, constant pool) or an existing D-Form
				87	address.
				88	*/
				89	bool isMemoryOperand(const SDOperand &Op)
				90	{
				91	const unsigned Opc = Op.getOpcode();
				92	return (Opc == ISD::GlobalAddress
				93	\|\| Opc == ISD::GlobalTLSAddress
				94	\|\| Opc == ISD::FrameIndex
				95	\|\| Opc == ISD::JumpTable
				96	\|\| Opc == ISD::ConstantPool
				97	\|\| Opc == ISD::ExternalSymbol
				98	\|\| Opc == ISD::TargetGlobalAddress
				99	\|\| Opc == ISD::TargetGlobalTLSAddress
				100	\|\| Opc == ISD::TargetFrameIndex
				101	\|\| Opc == ISD::TargetJumpTable
				102	\|\| Opc == ISD::TargetConstantPool
				103	\|\| Opc == ISD::TargetExternalSymbol
				104	\|\| Opc == SPUISD::DFormAddr);
				105	}
				106	}
				107
				108	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				109	: TargetLowering(TM),
				110	SPUTM(TM)
				111	{
				112	// Fold away setcc operations if possible.
				113	setPow2DivIsCheap();
				114
				115	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				116	setUseUnderscoreSetJmp(true);
				117	setUseUnderscoreLongJmp(true);
				118
				119	// Set up the SPU's register classes:
				120	// NOTE: i8 register class is not registered because we cannot determine when
				121	// we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	122	// NOTE: Ignore the previous note. For now. :-)
				123	addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
				124	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				125	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				126	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				127	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				128	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	129	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				130
				131	// SPU has no sign or zero extended loads for i1, i8, i16:
				132	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				133	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				134	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				135	setStoreXAction(MVT::i1, Custom);
				136
				137	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				138	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				139	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				140	setStoreXAction(MVT::i8, Custom);
				141
				142	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				143	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				144	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				145
				146	// SPU constant load actions are custom lowered:
				147	setOperationAction(ISD::Constant, MVT::i64, Custom);
				148	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				149	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				150
				151	// SPU's loads and stores have to be custom lowered:
				152	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				153	++sctype) {
				154	setOperationAction(ISD::LOAD, sctype, Custom);
				155	setOperationAction(ISD::STORE, sctype, Custom);
				156	}
				157
				158	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				159	// into BR_CCs. BR_CC instructions are custom selected in
				160	// SPUDAGToDAGISel.
				161	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				162
				163	// Expand the jumptable branches
				164	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				165	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				166	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				167
				168	// SPU has no intrinsics for these particular operations:
				169	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				170	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				171	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				172
				173	// PowerPC has no SREM/UREM instructions
				174	setOperationAction(ISD::SREM, MVT::i32, Expand);
				175	setOperationAction(ISD::UREM, MVT::i32, Expand);
				176	setOperationAction(ISD::SREM, MVT::i64, Expand);
				177	setOperationAction(ISD::UREM, MVT::i64, Expand);
				178
				179	// We don't support sin/cos/sqrt/fmod
				180	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				181	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				182	setOperationAction(ISD::FREM , MVT::f64, Expand);
				183	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				184	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				185	setOperationAction(ISD::FREM , MVT::f32, Expand);
				186
				187	// If we're enabling GP optimizations, use hardware square root
				188	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				189	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				190
				191	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				192	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				193
				194	// SPU can do rotate right and left, so legalize it... but customize for i8
				195	// because instructions don't exist.
				196	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				197	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				198	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				199	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				200	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				201	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				202	// SPU has no native version of shift left/right for i8
				203	setOperationAction(ISD::SHL, MVT::i8, Custom);
				204	setOperationAction(ISD::SRL, MVT::i8, Custom);
				205	setOperationAction(ISD::SRA, MVT::i8, Custom);
				206
				207	// Custom lower i32 multiplications
				208	setOperationAction(ISD::MUL, MVT::i32, Custom);
				209
				210	// Need to custom handle (some) common i8 math ops
				211	setOperationAction(ISD::SUB, MVT::i8, Custom);
				212	setOperationAction(ISD::MUL, MVT::i8, Custom);
				213
				214	// SPU does not have BSWAP. It does have i32 support CTLZ.
				215	// CTPOP has to be custom lowered.
				216	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				217	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				218
				219	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				221	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				222	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				223
				224	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				225	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				226
				227	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				228
				229	// SPU does not have select or setcc
				230	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				233	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				234	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				235	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				236	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				237
				238	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				241	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				242	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				243	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				244	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				245
				246	// SPU has a legal FP -> signed INT instruction
				247	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				248	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				250	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				251
				252	// FDIV on SPU requires custom lowering
				253	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				254	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				255
				256	// SPU has [U\|S]INT_TO_FP
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				258	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				259	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				261	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				262	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				263	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				264	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				265
				266	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
				268	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
				269	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
				270
				271	// We cannot sextinreg(i1). Expand to shifts.
				272	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				273
				274	// Support label based line numbers.
				275	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				276	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				277
				278	// We want to legalize GlobalAddress and ConstantPool nodes into the
				279	// appropriate instructions to materialize the address.
				280	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				281	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				282	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				283	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				284	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				285	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				286	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				287	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				288
				289	// RET must be custom lowered, to meet ABI requirements
				290	setOperationAction(ISD::RET, MVT::Other, Custom);
				291
				292	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				293	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				294
				295	// Use the default implementation.
				296	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				297	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				298	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				299	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				300	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				301	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				302	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				303
				304	// Cell SPU has instructions for converting between i64 and fp.
				305	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				306	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				307
				308	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				309	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				310
				311	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				312	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				313
				314	// First set operation action for all vector types to expand. Then we
				315	// will selectively turn on ones that can be effectively codegen'd.
				316	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				320	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				321	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				322
				323	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				324	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				325	// add/sub are legal for all supported vector VT's.
				326	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				327	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				328	// mul has to be custom lowered.
				329	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				330
				331	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				335	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				336	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				337
				338	// These operations need to be expanded:
				339	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				344
				345	// Custom lower build_vector, constant pool spills, insert and
				346	// extract vector elements:
				347	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				351	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				352	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				353	}
				354
				355	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				356	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				357	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				358	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				359	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				360
				361	setSetCCResultType(MVT::i32);
				362	setShiftAmountType(MVT::i32);
				363	setSetCCResultContents(ZeroOrOneSetCCResult);
				364
				365	setStackPointerRegisterToSaveRestore(SPU::R1);
				366
				367	// We have target-specific dag combine patterns for the following nodes:
				368	// e.g., setTargetDAGCombine(ISD::SUB);
				369
				370	computeRegisterProperties();
				371	}
				372
				373	const char *
				374	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				375	{
				376	if (node_names.empty()) {
				377	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				378	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				379	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				380	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				381	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				382	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				383	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				384	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				385	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				386	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				387	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				388	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				389	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				390	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				391	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				393	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				394	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				395	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				396	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				397	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				398	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				399	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				400	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				401	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				402	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				403	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				404	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				405	"SPUISD::ROTBYTES_RIGHT_Z";
				406	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				407	"SPUISD::ROTBYTES_RIGHT_S";
				408	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				409	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				410	"SPUISD::ROTBYTES_LEFT_CHAINED";
				411	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				412	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				413	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				414	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				415	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				416	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				417	}
				418
				419	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				420
				421	return ((i != node_names.end()) ? i->second : 0);
				422	}
				423
				424	//===----------------------------------------------------------------------===//
				425	// Calling convention code:
				426	//===----------------------------------------------------------------------===//
				427
				428	#include "SPUGenCallingConv.inc"
				429
				430	//===----------------------------------------------------------------------===//
				431	// LowerOperation implementation
				432	//===----------------------------------------------------------------------===//
				433
				434	/// Custom lower loads for CellSPU
				435	/*!
				436	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				437	within a 16-byte block, we have to rotate to extract the requested element.
				438	*/
				439	static SDOperand
				440	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				441	LoadSDNode *LN = cast<LoadSDNode>(Op);
				442	SDOperand basep = LN->getBasePtr();
				443	SDOperand the_chain = LN->getChain();
				444	MVT::ValueType VT = LN->getLoadedVT();
				445	MVT::ValueType OpVT = Op.Val->getValueType(0);
				446	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				447	ISD::LoadExtType ExtType = LN->getExtensionType();
				448	unsigned alignment = LN->getAlignment();
				449	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				450	SDOperand Ops[8];
				451
				452	// For an extending load of an i1 variable, just call it i8 (or whatever we
				453	// were passed) and make it zero-extended:
				454	if (VT == MVT::i1) {
				455	VT = OpVT;
				456	ExtType = ISD::ZEXTLOAD;
				457	}
				458
				459	switch (LN->getAddressingMode()) {
				460	case ISD::UNINDEXED: {
				461	SDOperand result;
				462	SDOperand rot_op, rotamt;
				463	SDOperand ptrp;
				464	int c_offset;
				465	int c_rotamt;
				466
				467	// The vector type we really want to be when we load the 16-byte chunk
				468	MVT::ValueType vecVT, opVecVT;
				469
				470	if (VT != MVT::i1)
				471	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				472	else
				473	vecVT = MVT::v16i8;
				474
				475	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				476
				477	if (basep.getOpcode() == ISD::ADD) {
				478	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				479
				480	assert(CN != NULL
				481	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				482
				483	c_offset = (int) CN->getValue();
				484	c_rotamt = (int) (c_offset & 0xf);
				485
				486	// Adjust the rotation amount to ensure that the final result ends up in
				487	// the preferred slot:
				488	c_rotamt -= vtm->prefslot_byte;
				489	ptrp = basep.getOperand(0);
				490	} else {
				491	c_offset = 0;
				492	c_rotamt = -vtm->prefslot_byte;
				493	ptrp = basep;
				494	}
				495
				496	if (alignment == 16) {
				497	// 16-byte aligned load into preferred slot, no rotation
				498	if (c_rotamt == 0) {
				499	if (isMemoryOperand(ptrp))
				500	// Return unchanged
				501	return SDOperand();
				502	else {
				503	// Return modified D-Form address for pointer:
				504	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				505	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				506	if (VT == OpVT)
				507	return DAG.getLoad(VT, LN->getChain(), ptrp,
				508	LN->getSrcValue(), LN->getSrcValueOffset(),
				509	LN->isVolatile(), 16);
				510	else
				511	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				512	LN->getSrcValueOffset(), OpVT,
				513	LN->isVolatile(), 16);
				514	}
				515	} else {
				516	// Need to rotate...
				517	if (c_rotamt < 0)
				518	c_rotamt += 16;
				519	// Realign the base pointer, with a D-Form address
				520	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				521	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				522	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				523	else
				524	basep = ptrp;
				525
				526	// Rotate the load:
				527	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				528	LN->getSrcValue(), LN->getSrcValueOffset(),
				529	LN->isVolatile(), 16);
				530	the_chain = rot_op.getValue(1);
				531	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				532
				533	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				534	Ops[0] = the_chain;
				535	Ops[1] = rot_op;
				536	Ops[2] = rotamt;
				537
				538	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				539	the_chain = result.getValue(1);
				540
				541	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				542	SDVTList scalarvts;
				543	Ops[0] = the_chain;
				544	Ops[1] = result;
				545	if (OpVT == VT) {
				546	scalarvts = DAG.getVTList(VT, MVT::Other);
				547	} else {
				548	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				549	}
				550
				551	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				552	result);
				553	Ops[0] = the_chain;
				554	Ops[1] = result;
				555	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				556	the_chain = result.getValue(1);
				557	} else {
				558	// Handle the sign and zero-extending loads for i1 and i8:
				559	unsigned NewOpC;
				560
				561	if (ExtType == ISD::SEXTLOAD) {
				562	NewOpC = (OpVT == MVT::i1
				563	? SPUISD::EXTRACT_I1_SEXT
				564	: SPUISD::EXTRACT_I8_SEXT);
				565	} else if (ExtType == ISD::ZEXTLOAD) {
				566	NewOpC = (OpVT == MVT::i1
				567	? SPUISD::EXTRACT_I1_ZEXT
				568	: SPUISD::EXTRACT_I8_ZEXT);
				569	}
				570
				571	result = DAG.getNode(NewOpC, OpVT, result);
				572	}
				573
				574	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				575	SDOperand retops[2] = { result, the_chain };
				576
				577	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				578	return result;
				579	/UNREACHED/
				580	}
				581	} else {
				582	// Misaligned 16-byte load:
				583	if (basep.getOpcode() == ISD::LOAD) {
				584	LN = cast<LoadSDNode>(basep);
				585	if (LN->getAlignment() == 16) {
				586	// We can verify that we're really loading from a 16-byte aligned
				587	// chunk. Encapsulate basep as a D-Form address and return a new
				588	// load:
				589	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				590	DAG.getConstant(0, PtrVT));
				591	if (OpVT == VT)
				592	return DAG.getLoad(VT, LN->getChain(), basep,
				593	LN->getSrcValue(), LN->getSrcValueOffset(),
				594	LN->isVolatile(), 16);
				595	else
				596	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				597	LN->getSrcValue(), LN->getSrcValueOffset(),
				598	OpVT, LN->isVolatile(), 16);
				599	}
				600	}
				601
				602	// Catch all other cases where we can't guarantee that we have a
				603	// 16-byte aligned entity, which means resorting to an X-form
				604	// address scheme:
				605
				606	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
				607	SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
				608	SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
				609
				610	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				611
				612	SDOperand alignLoad =
				613	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				614	LN->getSrcValue(), LN->getSrcValueOffset(),
				615	LN->isVolatile(), 16);
				616
				617	SDOperand insertEltOp =
				618	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				619
				620	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				621	alignLoad,
				622	alignLoad,
				623	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				624
				625	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				626
				627	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				628	SDOperand retops[2] = { result, the_chain };
				629
				630	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				631	return result;
				632	}
				633	break;
				634	}
				635	case ISD::PRE_INC:
				636	case ISD::PRE_DEC:
				637	case ISD::POST_INC:
				638	case ISD::POST_DEC:
				639	case ISD::LAST_INDEXED_MODE:
				640	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				641	"UNINDEXED\n";
				642	cerr << (unsigned) LN->getAddressingMode() << "\n";
				643	abort();
				644	/NOTREACHED/
				645	}
				646
				647	return SDOperand();
				648	}
				649
				650	/// Custom lower stores for CellSPU
				651	/*!
				652	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				653	within a 16-byte block, we have to generate a shuffle to insert the
				654	requested element into its place, then store the resulting block.
				655	*/
				656	static SDOperand
				657	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				658	StoreSDNode *SN = cast<StoreSDNode>(Op);
				659	SDOperand Value = SN->getValue();
				660	MVT::ValueType VT = Value.getValueType();
				661	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				662	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				663	SDOperand the_chain = SN->getChain();
Chris Lattner	4d321c5	2007-12-05 18:32:18 +0000	[diff] [blame]	664	//unsigned alignment = SN->getAlignment();
				665	//const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	666
				667	switch (SN->getAddressingMode()) {
				668	case ISD::UNINDEXED: {
				669	SDOperand basep = SN->getBasePtr();
				670	SDOperand ptrOp;
				671	int offset;
				672
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame]	673	if (basep.getOpcode() == ISD::FrameIndex) {
				674	// FrameIndex nodes are always properly aligned. Really.
				675	return SDOperand();
				676	}
				677
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	678	if (basep.getOpcode() == ISD::ADD) {
				679	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				680	assert(CN != NULL
				681	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				682	offset = unsigned(CN->getValue());
				683	ptrOp = basep.getOperand(0);
				684	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				685	<< offset
				686	<< "\n");
				687	} else {
				688	ptrOp = basep;
				689	offset = 0;
				690	}
				691
				692	// The vector type we really want to load from the 16-byte chunk, except
				693	// in the case of MVT::i1, which has to be v16i8.
				694	unsigned vecVT, stVecVT;
				695
				696	if (StVT != MVT::i1)
				697	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				698	else
				699	stVecVT = MVT::v16i8;
				700	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				701
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame]	702	// Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
				703	// the actual dform addr offs($reg).
				704	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				705	DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	706
				707	// Create the 16-byte aligned vector load
				708	SDOperand alignLoad =
				709	DAG.getLoad(vecVT, the_chain, basep,
				710	SN->getSrcValue(), SN->getSrcValueOffset(),
				711	SN->isVolatile(), 16);
				712	the_chain = alignLoad.getValue(1);
				713
				714	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				715	SDOperand theValue = SN->getValue();
				716	SDOperand result;
				717
				718	if (StVT != VT
				719	&& (theValue.getOpcode() == ISD::AssertZext
				720	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				721	// Drill down and get the value for zero- and sign-extended
				722	// quantities
				723	theValue = theValue.getOperand(0);
				724	}
				725
				726	SDOperand insertEltOp =
				727	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				728	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				729	ptrOp,
				730	DAG.getConstant((offset & 0xf), PtrVT)));
				731
				732	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				733	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				734	alignLoad,
				735	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				736
				737	result = DAG.getStore(the_chain, result, basep,
				738	LN->getSrcValue(), LN->getSrcValueOffset(),
				739	LN->isVolatile(), LN->getAlignment());
				740
				741	return result;
				742	/UNREACHED/
				743	}
				744	case ISD::PRE_INC:
				745	case ISD::PRE_DEC:
				746	case ISD::POST_INC:
				747	case ISD::POST_DEC:
				748	case ISD::LAST_INDEXED_MODE:
				749	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				750	"UNINDEXED\n";
				751	cerr << (unsigned) SN->getAddressingMode() << "\n";
				752	abort();
				753	/NOTREACHED/
				754	}
				755
				756	return SDOperand();
				757	}
				758
				759	/// Generate the address of a constant pool entry.
				760	static SDOperand
				761	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				762	MVT::ValueType PtrVT = Op.getValueType();
				763	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				764	Constant *C = CP->getConstVal();
				765	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				766	const TargetMachine &TM = DAG.getTarget();
				767	SDOperand Zero = DAG.getConstant(0, PtrVT);
				768
				769	if (TM.getRelocationModel() == Reloc::Static) {
				770	if (!ST->usingLargeMem()) {
				771	// Just return the SDOperand with the constant pool address in it.
				772	return CPI;
				773	} else {
				774	// Generate hi/lo address pair
				775	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				776	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				777
				778	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				779	}
				780	}
				781
				782	assert(0 &&
				783	"LowerConstantPool: Relocation model other than static not supported.");
				784	return SDOperand();
				785	}
				786
				787	static SDOperand
				788	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				789	MVT::ValueType PtrVT = Op.getValueType();
				790	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				791	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				792	SDOperand Zero = DAG.getConstant(0, PtrVT);
				793	const TargetMachine &TM = DAG.getTarget();
				794
				795	if (TM.getRelocationModel() == Reloc::Static) {
				796	if (!ST->usingLargeMem()) {
				797	// Just return the SDOperand with the jump table address in it.
				798	return JTI;
				799	} else {
				800	// Generate hi/lo address pair
				801	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				802	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				803
				804	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				805	}
				806	}
				807
				808	assert(0 &&
				809	"LowerJumpTable: Relocation model other than static not supported.");
				810	return SDOperand();
				811	}
				812
				813	static SDOperand
				814	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				815	MVT::ValueType PtrVT = Op.getValueType();
				816	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				817	GlobalValue *GV = GSDN->getGlobal();
				818	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				819	SDOperand Zero = DAG.getConstant(0, PtrVT);
				820	const TargetMachine &TM = DAG.getTarget();
				821
				822	if (TM.getRelocationModel() == Reloc::Static) {
				823	if (!ST->usingLargeMem()) {
				824	// Generate a local store address
				825	return GA;
				826	} else {
				827	// Generate hi/lo address pair
				828	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				829	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				830
				831	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				832	}
				833	} else {
				834	cerr << "LowerGlobalAddress: Relocation model other than static not "
				835	<< "supported.\n";
				836	abort();
				837	/NOTREACHED/
				838	}
				839
				840	return SDOperand();
				841	}
				842
				843	//! Custom lower i64 integer constants
				844	/*!
				845	This code inserts all of the necessary juggling that needs to occur to load
				846	a 64-bit constant into a register.
				847	*/
				848	static SDOperand
				849	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				850	unsigned VT = Op.getValueType();
				851	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				852
				853	if (VT == MVT::i64) {
				854	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				855	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				856	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				857
				858	} else {
				859	cerr << "LowerConstant: unhandled constant type "
				860	<< MVT::getValueTypeString(VT)
				861	<< "\n";
				862	abort();
				863	/NOTREACHED/
				864	}
				865
				866	return SDOperand();
				867	}
				868
				869	//! Custom lower single precision floating point constants
				870	/*!
				871	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				872	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				873	target description.
				874	*/
				875	static SDOperand
				876	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				877	unsigned VT = Op.getValueType();
				878	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				879
				880	assert((FP != 0) &&
				881	"LowerConstantFP: Node is not ConstantFPSDNode");
				882
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	883	if (VT == MVT::f32) {
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	884	float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	885	return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	886	DAG.getTargetConstantFP(targetConst, VT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	887	} else if (VT == MVT::f64) {
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	888	uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	889	return DAG.getNode(ISD::BIT_CONVERT, VT,
				890	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				891	}
				892
				893	return SDOperand();
				894	}
				895
				896	static SDOperand
				897	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				898	{
				899	MachineFunction &MF = DAG.getMachineFunction();
				900	MachineFrameInfo *MFI = MF.getFrameInfo();
				901	SSARegMap *RegMap = MF.getSSARegMap();
				902	SmallVector<SDOperand, 8> ArgValues;
				903	SDOperand Root = Op.getOperand(0);
				904	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				905
				906	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				907	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				908
				909	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				910	unsigned ArgRegIdx = 0;
				911	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				912
				913	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				914
				915	// Add DAG nodes to load the arguments or copy them out of registers.
				916	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				917	SDOperand ArgVal;
				918	bool needsLoad = false;
				919	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				920	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				921
				922	switch (ObjectVT) {
				923	default: {
				924	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				925	<< MVT::getValueTypeString(ObjectVT)
				926	<< "\n";
				927	abort();
				928	}
				929	case MVT::i8:
				930	if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	931	unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	932	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				933	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				934	++ArgRegIdx;
				935	} else {
				936	needsLoad = true;
				937	}
				938	break;
				939	case MVT::i16:
				940	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				941	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				942	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				943	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				944	++ArgRegIdx;
				945	} else {
				946	needsLoad = true;
				947	}
				948	break;
				949	case MVT::i32:
				950	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				951	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				952	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				953	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				954	++ArgRegIdx;
				955	} else {
				956	needsLoad = true;
				957	}
				958	break;
				959	case MVT::i64:
				960	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				961	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				962	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				963	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				964	++ArgRegIdx;
				965	} else {
				966	needsLoad = true;
				967	}
				968	break;
				969	case MVT::f32:
				970	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				971	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				972	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				973	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				974	++ArgRegIdx;
				975	} else {
				976	needsLoad = true;
				977	}
				978	break;
				979	case MVT::f64:
				980	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				981	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				982	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				983	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				984	++ArgRegIdx;
				985	} else {
				986	needsLoad = true;
				987	}
				988	break;
				989	case MVT::v2f64:
				990	case MVT::v4f32:
				991	case MVT::v4i32:
				992	case MVT::v8i16:
				993	case MVT::v16i8:
				994	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				995	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				996	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				997	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				998	++ArgRegIdx;
				999	} else {
				1000	needsLoad = true;
				1001	}
				1002	break;
				1003	}
				1004
				1005	// We need to load the argument to a virtual register if we determined above
				1006	// that we ran out of physical registers of the appropriate type
				1007	if (needsLoad) {
				1008	// If the argument is actually used, emit a load from the right stack
				1009	// slot.
				1010	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1011	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1012	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1013	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1014	} else {
				1015	// Don't emit a dead load.
				1016	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1017	}
				1018
				1019	ArgOffset += StackSlotSize;
				1020	}
				1021
				1022	ArgValues.push_back(ArgVal);
				1023	}
				1024
				1025	// If the function takes variable number of arguments, make a frame index for
				1026	// the start of the first vararg value... for expansion of llvm.va_start.
				1027	if (isVarArg) {
				1028	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1029	ArgOffset);
				1030	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1031	// If this function is vararg, store any remaining integer argument regs to
				1032	// their spots on the stack so that they may be loaded by deferencing the
				1033	// result of va_next.
				1034	SmallVector<SDOperand, 8> MemOps;
				1035	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1036	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1037	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1038	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1039	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1040	MemOps.push_back(Store);
				1041	// Increment the address by four for the next argument to store
				1042	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1043	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1044	}
				1045	if (!MemOps.empty())
				1046	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1047	}
				1048
				1049	ArgValues.push_back(Root);
				1050
				1051	// Return the new list of results.
				1052	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1053	Op.Val->value_end());
				1054	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1055	}
				1056
				1057	/// isLSAAddress - Return the immediate to use if the specified
				1058	/// value is representable as a LSA address.
				1059	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1060	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1061	if (!C) return 0;
				1062
				1063	int Addr = C->getValue();
				1064	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1065	(Addr << 14 >> 14) != Addr)
				1066	return 0; // Top 14 bits have to be sext of immediate.
				1067
				1068	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1069	}
				1070
				1071	static
				1072	SDOperand
				1073	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1074	SDOperand Chain = Op.getOperand(0);
				1075	#if 0
				1076	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1077	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1078	#endif
				1079	SDOperand Callee = Op.getOperand(4);
				1080	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1081	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1082	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1083	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1084
				1085	// Handy pointer type
				1086	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1087
				1088	// Accumulate how many bytes are to be pushed on the stack, including the
				1089	// linkage area, and parameter passing area. According to the SPU ABI,
				1090	// we minimally need space for [LR] and [SP]
				1091	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1092
				1093	// Set up a copy of the stack pointer for use loading and storing any
				1094	// arguments that may not fit in the registers available for argument
				1095	// passing.
				1096	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1097
				1098	// Figure out which arguments are going to go in registers, and which in
				1099	// memory.
				1100	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1101	unsigned ArgRegIdx = 0;
				1102
				1103	// Keep track of registers passing arguments
				1104	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1105	// And the arguments passed on the stack
				1106	SmallVector<SDOperand, 8> MemOpChains;
				1107
				1108	for (unsigned i = 0; i != NumOps; ++i) {
				1109	SDOperand Arg = Op.getOperand(5+2*i);
				1110
				1111	// PtrOff will be used to store the current argument to the stack if a
				1112	// register cannot be found for it.
				1113	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1114	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1115
				1116	switch (Arg.getValueType()) {
				1117	default: assert(0 && "Unexpected ValueType for argument!");
				1118	case MVT::i32:
				1119	case MVT::i64:
				1120	case MVT::i128:
				1121	if (ArgRegIdx != NumArgRegs) {
				1122	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1123	} else {
				1124	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1125	ArgOffset += StackSlotSize;
				1126	}
				1127	break;
				1128	case MVT::f32:
				1129	case MVT::f64:
				1130	if (ArgRegIdx != NumArgRegs) {
				1131	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1132	} else {
				1133	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1134	ArgOffset += StackSlotSize;
				1135	}
				1136	break;
				1137	case MVT::v4f32:
				1138	case MVT::v4i32:
				1139	case MVT::v8i16:
				1140	case MVT::v16i8:
				1141	if (ArgRegIdx != NumArgRegs) {
				1142	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1143	} else {
				1144	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1145	ArgOffset += StackSlotSize;
				1146	}
				1147	break;
				1148	}
				1149	}
				1150
				1151	// Update number of stack bytes actually used, insert a call sequence start
				1152	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1153	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1154
				1155	if (!MemOpChains.empty()) {
				1156	// Adjust the stack pointer for the stack arguments.
				1157	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1158	&MemOpChains[0], MemOpChains.size());
				1159	}
				1160
				1161	// Build a sequence of copy-to-reg nodes chained together with token chain
				1162	// and flag operands which copy the outgoing args into the appropriate regs.
				1163	SDOperand InFlag;
				1164	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1165	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1166	InFlag);
				1167	InFlag = Chain.getValue(1);
				1168	}
				1169
				1170	std::vector<MVT::ValueType> NodeTys;
				1171	NodeTys.push_back(MVT::Other); // Returns a chain
				1172	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1173
				1174	SmallVector<SDOperand, 8> Ops;
				1175	unsigned CallOpc = SPUISD::CALL;
				1176
				1177	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1178	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1179	// node so that legalize doesn't hack it.
				1180	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1181	GlobalValue *GV = G->getGlobal();
				1182	unsigned CalleeVT = Callee.getValueType();
				1183
				1184	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1185	// style calls, otherwise, external symbols are BRASL calls.
				1186	// NOTE:
				1187	// This may be an unsafe assumption for JIT and really large compilation
				1188	// units.
				1189	if (GV->isDeclaration()) {
				1190	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1191	} else {
				1192	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1193	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1194	DAG.getConstant(0, PtrVT));
				1195	}
				1196	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1197	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1198	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1199	// If this is an absolute destination address that appears to be a legal
				1200	// local store address, use the munged value.
				1201	Callee = SDOperand(Dest, 0);
				1202
				1203	Ops.push_back(Chain);
				1204	Ops.push_back(Callee);
				1205
				1206	// Add argument registers to the end of the list so that they are known live
				1207	// into the call.
				1208	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1209	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1210	RegsToPass[i].second.getValueType()));
				1211
				1212	if (InFlag.Val)
				1213	Ops.push_back(InFlag);
				1214	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1215	InFlag = Chain.getValue(1);
				1216
				1217	SDOperand ResultVals[3];
				1218	unsigned NumResults = 0;
				1219	NodeTys.clear();
				1220
				1221	// If the call has results, copy the values out of the ret val registers.
				1222	switch (Op.Val->getValueType(0)) {
				1223	default: assert(0 && "Unexpected ret value!");
				1224	case MVT::Other: break;
				1225	case MVT::i32:
				1226	if (Op.Val->getValueType(1) == MVT::i32) {
				1227	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1228	ResultVals[0] = Chain.getValue(0);
				1229	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1230	Chain.getValue(2)).getValue(1);
				1231	ResultVals[1] = Chain.getValue(0);
				1232	NumResults = 2;
				1233	NodeTys.push_back(MVT::i32);
				1234	} else {
				1235	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1236	ResultVals[0] = Chain.getValue(0);
				1237	NumResults = 1;
				1238	}
				1239	NodeTys.push_back(MVT::i32);
				1240	break;
				1241	case MVT::i64:
				1242	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1243	ResultVals[0] = Chain.getValue(0);
				1244	NumResults = 1;
				1245	NodeTys.push_back(MVT::i64);
				1246	break;
				1247	case MVT::f32:
				1248	case MVT::f64:
				1249	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1250	InFlag).getValue(1);
				1251	ResultVals[0] = Chain.getValue(0);
				1252	NumResults = 1;
				1253	NodeTys.push_back(Op.Val->getValueType(0));
				1254	break;
				1255	case MVT::v2f64:
				1256	case MVT::v4f32:
				1257	case MVT::v4i32:
				1258	case MVT::v8i16:
				1259	case MVT::v16i8:
				1260	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1261	InFlag).getValue(1);
				1262	ResultVals[0] = Chain.getValue(0);
				1263	NumResults = 1;
				1264	NodeTys.push_back(Op.Val->getValueType(0));
				1265	break;
				1266	}
				1267
				1268	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1269	DAG.getConstant(NumStackBytes, PtrVT));
				1270	NodeTys.push_back(MVT::Other);
				1271
				1272	// If the function returns void, just return the chain.
				1273	if (NumResults == 0)
				1274	return Chain;
				1275
				1276	// Otherwise, merge everything together with a MERGE_VALUES node.
				1277	ResultVals[NumResults++] = Chain;
				1278	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1279	ResultVals, NumResults);
				1280	return Res.getValue(Op.ResNo);
				1281	}
				1282
				1283	static SDOperand
				1284	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1285	SmallVector<CCValAssign, 16> RVLocs;
				1286	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1287	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1288	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1289	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1290
				1291	// If this is the first return lowered for this function, add the regs to the
				1292	// liveout set for the function.
				1293	if (DAG.getMachineFunction().liveout_empty()) {
				1294	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1295	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1296	}
				1297
				1298	SDOperand Chain = Op.getOperand(0);
				1299	SDOperand Flag;
				1300
				1301	// Copy the result values into the output registers.
				1302	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1303	CCValAssign &VA = RVLocs[i];
				1304	assert(VA.isRegLoc() && "Can only return in registers!");
				1305	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1306	Flag = Chain.getValue(1);
				1307	}
				1308
				1309	if (Flag.Val)
				1310	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1311	else
				1312	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1313	}
				1314
				1315
				1316	//===----------------------------------------------------------------------===//
				1317	// Vector related lowering:
				1318	//===----------------------------------------------------------------------===//
				1319
				1320	static ConstantSDNode *
				1321	getVecImm(SDNode *N) {
				1322	SDOperand OpVal(0, 0);
				1323
				1324	// Check to see if this buildvec has a single non-undef value in its elements.
				1325	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1326	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1327	if (OpVal.Val == 0)
				1328	OpVal = N->getOperand(i);
				1329	else if (OpVal != N->getOperand(i))
				1330	return 0;
				1331	}
				1332
				1333	if (OpVal.Val != 0) {
				1334	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1335	return CN;
				1336	}
				1337	}
				1338
				1339	return 0; // All UNDEF: use implicit def.; not Constant node
				1340	}
				1341
				1342	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1343	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1344	/// constant
				1345	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1346	MVT::ValueType ValueType) {
				1347	if (ConstantSDNode *CN = getVecImm(N)) {
				1348	uint64_t Value = CN->getValue();
				1349	if (Value <= 0x3ffff)
				1350	return DAG.getConstant(Value, ValueType);
				1351	}
				1352
				1353	return SDOperand();
				1354	}
				1355
				1356	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1357	/// and the value fits into a signed 16-bit constant, and if so, return the
				1358	/// constant
				1359	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1360	MVT::ValueType ValueType) {
				1361	if (ConstantSDNode *CN = getVecImm(N)) {
				1362	if (ValueType == MVT::i32) {
				1363	int Value = (int) CN->getValue();
				1364	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1365
				1366	if (Value == SExtValue)
				1367	return DAG.getConstant(Value, ValueType);
				1368	} else if (ValueType == MVT::i16) {
				1369	short Value = (short) CN->getValue();
				1370	int SExtValue = ((int) Value << 16) >> 16;
				1371
				1372	if (Value == (short) SExtValue)
				1373	return DAG.getConstant(Value, ValueType);
				1374	} else if (ValueType == MVT::i64) {
				1375	int64_t Value = CN->getValue();
				1376	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1377
				1378	if (Value == SExtValue)
				1379	return DAG.getConstant(Value, ValueType);
				1380	}
				1381	}
				1382
				1383	return SDOperand();
				1384	}
				1385
				1386	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1387	/// and the value fits into a signed 10-bit constant, and if so, return the
				1388	/// constant
				1389	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1390	MVT::ValueType ValueType) {
				1391	if (ConstantSDNode *CN = getVecImm(N)) {
				1392	int Value = (int) CN->getValue();
				1393	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1394	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1395	return DAG.getConstant(Value, ValueType);
				1396	}
				1397
				1398	return SDOperand();
				1399	}
				1400
				1401	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1402	/// and the value fits into a signed 8-bit constant, and if so, return the
				1403	/// constant.
				1404	///
				1405	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1406	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1407	/// same value.
				1408	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1409	MVT::ValueType ValueType) {
				1410	if (ConstantSDNode *CN = getVecImm(N)) {
				1411	int Value = (int) CN->getValue();
				1412	if (ValueType == MVT::i16
				1413	&& Value <= 0xffff /* truncated from uint64_t */
				1414	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1415	return DAG.getConstant(Value & 0xff, ValueType);
				1416	else if (ValueType == MVT::i8
				1417	&& (Value & 0xff) == Value)
				1418	return DAG.getConstant(Value, ValueType);
				1419	}
				1420
				1421	return SDOperand();
				1422	}
				1423
				1424	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1425	/// and the value fits into a signed 16-bit constant, and if so, return the
				1426	/// constant
				1427	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1428	MVT::ValueType ValueType) {
				1429	if (ConstantSDNode *CN = getVecImm(N)) {
				1430	uint64_t Value = CN->getValue();
				1431	if ((ValueType == MVT::i32
				1432	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1433	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1434	return DAG.getConstant(Value >> 16, ValueType);
				1435	}
				1436
				1437	return SDOperand();
				1438	}
				1439
				1440	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1441	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1442	if (ConstantSDNode *CN = getVecImm(N)) {
				1443	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1444	}
				1445
				1446	return SDOperand();
				1447	}
				1448
				1449	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1450	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1451	if (ConstantSDNode *CN = getVecImm(N)) {
				1452	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1453	}
				1454
				1455	return SDOperand();
				1456	}
				1457
				1458	// If this is a vector of constants or undefs, get the bits. A bit in
				1459	// UndefBits is set if the corresponding element of the vector is an
				1460	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1461	// zero. Return true if this is not an array of constants, false if it is.
				1462	//
				1463	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1464	uint64_t UndefBits[2]) {
				1465	// Start with zero'd results.
				1466	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1467
				1468	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1469	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1470	SDOperand OpVal = BV->getOperand(i);
				1471
				1472	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1473	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1474
				1475	uint64_t EltBits = 0;
				1476	if (OpVal.getOpcode() == ISD::UNDEF) {
				1477	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1478	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1479	continue;
				1480	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1481	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1482	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1483	const APFloat &apf = CN->getValueAPF();
				1484	EltBits = (CN->getValueType(0) == MVT::f32
				1485	? FloatToBits(apf.convertToFloat())
				1486	: DoubleToBits(apf.convertToDouble()));
				1487	} else {
				1488	// Nonconstant element.
				1489	return true;
				1490	}
				1491
				1492	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1493	}
				1494
				1495	//printf("%llx %llx %llx %llx\n",
				1496	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1497	return false;
				1498	}
				1499
				1500	/// If this is a splat (repetition) of a value across the whole vector, return
				1501	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1502	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1503	/// SplatSize = 1 byte.
				1504	static bool isConstantSplat(const uint64_t Bits128[2],
				1505	const uint64_t Undef128[2],
				1506	int MinSplatBits,
				1507	uint64_t &SplatBits, uint64_t &SplatUndef,
				1508	int &SplatSize) {
				1509	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1510	// the same as the lower 64-bits, ignoring undefs.
				1511	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1512	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1513	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1514	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1515	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1516	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1517
				1518	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1519	if (MinSplatBits < 64) {
				1520
				1521	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1522	// undefs.
				1523	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1524	if (MinSplatBits < 32) {
				1525
				1526	// If the top 16-bits are different than the lower 16-bits, ignoring
				1527	// undefs, we have an i32 splat.
				1528	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1529	if (MinSplatBits < 16) {
				1530	// If the top 8-bits are different than the lower 8-bits, ignoring
				1531	// undefs, we have an i16 splat.
				1532	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1533	// Otherwise, we have an 8-bit splat.
				1534	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1535	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1536	SplatSize = 1;
				1537	return true;
				1538	}
				1539	} else {
				1540	SplatBits = Bits16;
				1541	SplatUndef = Undef16;
				1542	SplatSize = 2;
				1543	return true;
				1544	}
				1545	}
				1546	} else {
				1547	SplatBits = Bits32;
				1548	SplatUndef = Undef32;
				1549	SplatSize = 4;
				1550	return true;
				1551	}
				1552	}
				1553	} else {
				1554	SplatBits = Bits128[0];
				1555	SplatUndef = Undef128[0];
				1556	SplatSize = 8;
				1557	return true;
				1558	}
				1559	}
				1560
				1561	return false; // Can't be a splat if two pieces don't match.
				1562	}
				1563
				1564	// If this is a case we can't handle, return null and let the default
				1565	// expansion code take care of it. If we CAN select this case, and if it
				1566	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1567	// this case more efficiently than a constant pool load, lower it to the
				1568	// sequence of ops that should be used.
				1569	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1570	MVT::ValueType VT = Op.getValueType();
				1571	// If this is a vector of constants or undefs, get the bits. A bit in
				1572	// UndefBits is set if the corresponding element of the vector is an
				1573	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1574	// zero.
				1575	uint64_t VectorBits[2];
				1576	uint64_t UndefBits[2];
				1577	uint64_t SplatBits, SplatUndef;
				1578	int SplatSize;
				1579	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1580	\|\| !isConstantSplat(VectorBits, UndefBits,
				1581	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1582	SplatBits, SplatUndef, SplatSize))
				1583	return SDOperand(); // Not a constant vector, not a splat.
				1584
				1585	switch (VT) {
				1586	default:
				1587	case MVT::v4f32: {
				1588	uint32_t Value32 = SplatBits;
				1589	assert(SplatSize == 4
				1590	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1591	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1592	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1593	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1594	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1595	break;
				1596	}
				1597	case MVT::v2f64: {
				1598	uint64_t f64val = SplatBits;
				1599	assert(SplatSize == 8
				1600	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1601	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1602	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1603	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1604	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1605	break;
				1606	}
				1607	case MVT::v16i8: {
				1608	// 8-bit constants have to be expanded to 16-bits
				1609	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1610	SDOperand Ops[8];
				1611	for (int i = 0; i < 8; ++i)
				1612	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1613	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1614	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1615	}
				1616	case MVT::v8i16: {
				1617	unsigned short Value16;
				1618	if (SplatSize == 2)
				1619	Value16 = (unsigned short) (SplatBits & 0xffff);
				1620	else
				1621	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1622	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1623	SDOperand Ops[8];
				1624	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1625	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1626	}
				1627	case MVT::v4i32: {
				1628	unsigned int Value = SplatBits;
				1629	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1630	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1631	}
				1632	case MVT::v2i64: {
				1633	uint64_t val = SplatBits;
				1634	uint32_t upper = uint32_t(val >> 32);
				1635	uint32_t lower = uint32_t(val);
				1636
				1637	if (val != 0) {
				1638	SDOperand LO32;
				1639	SDOperand HI32;
				1640	SmallVector<SDOperand, 16> ShufBytes;
				1641	SDOperand Result;
				1642	bool upper_special, lower_special;
				1643
				1644	// NOTE: This code creates common-case shuffle masks that can be easily
				1645	// detected as common expressions. It is not attempting to create highly
				1646	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1647
				1648	// Detect if the upper or lower half is a special shuffle mask pattern:
				1649	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1650	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1651
				1652	// Create lower vector if not a special pattern
				1653	if (!lower_special) {
				1654	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1655	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1656	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1657	LO32C, LO32C, LO32C, LO32C));
				1658	}
				1659
				1660	// Create upper vector if not a special pattern
				1661	if (!upper_special) {
				1662	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1663	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1664	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1665	HI32C, HI32C, HI32C, HI32C));
				1666	}
				1667
				1668	// If either upper or lower are special, then the two input operands are
				1669	// the same (basically, one of them is a "don't care")
				1670	if (lower_special)
				1671	LO32 = HI32;
				1672	if (upper_special)
				1673	HI32 = LO32;
				1674	if (lower_special && upper_special) {
				1675	// Unhappy situation... both upper and lower are special, so punt with
				1676	// a target constant:
				1677	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1678	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1679	Zero, Zero);
				1680	}
				1681
				1682	for (int i = 0; i < 4; ++i) {
				1683	for (int j = 0; j < 4; ++j) {
				1684	SDOperand V;
				1685	bool process_upper, process_lower;
				1686	uint64_t val;
				1687
				1688	process_upper = (upper_special && (i & 1) == 0);
				1689	process_lower = (lower_special && (i & 1) == 1);
				1690
				1691	if (process_upper \|\| process_lower) {
				1692	if ((process_upper && upper == 0)
				1693	\|\| (process_lower && lower == 0))
				1694	val = 0x80;
				1695	else if ((process_upper && upper == 0xffffffff)
				1696	\|\| (process_lower && lower == 0xffffffff))
				1697	val = 0xc0;
				1698	else if ((process_upper && upper == 0x80000000)
				1699	\|\| (process_lower && lower == 0x80000000))
				1700	val = (j == 0 ? 0xe0 : 0x80);
				1701	} else
				1702	val = i * 4 + j + ((i & 1) * 16);
				1703
				1704	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1705	}
				1706	}
				1707
				1708	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1709	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1710	&ShufBytes[0], ShufBytes.size()));
				1711	} else {
				1712	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1713	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1714	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1715	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1716	Zero, Zero, Zero, Zero));
				1717	}
				1718	}
				1719	}
				1720
				1721	return SDOperand();
				1722	}
				1723
				1724	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1725	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1726	/// permutation vector, V3, is monotonically increasing with one "exception"
				1727	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1728	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1729	/// In either case, the net result is going to eventually invoke SHUFB to
				1730	/// permute/shuffle the bytes from V1 and V2.
				1731	/// \note
				1732	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1733	/// control word for byte/halfword/word insertion. This takes care of a single
				1734	/// element move from V2 into V1.
				1735	/// \note
				1736	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1737	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1738	SDOperand V1 = Op.getOperand(0);
				1739	SDOperand V2 = Op.getOperand(1);
				1740	SDOperand PermMask = Op.getOperand(2);
				1741
				1742	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1743
				1744	// If we have a single element being moved from V1 to V2, this can be handled
				1745	// using the C*[DX] compute mask instructions, but the vector elements have
				1746	// to be monotonically increasing with one exception element.
				1747	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1748	unsigned EltsFromV2 = 0;
				1749	unsigned V2Elt = 0;
				1750	unsigned V2EltIdx0 = 0;
				1751	unsigned CurrElt = 0;
				1752	bool monotonic = true;
				1753	if (EltVT == MVT::i8)
				1754	V2EltIdx0 = 16;
				1755	else if (EltVT == MVT::i16)
				1756	V2EltIdx0 = 8;
				1757	else if (EltVT == MVT::i32)
				1758	V2EltIdx0 = 4;
				1759	else
				1760	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1761
				1762	for (unsigned i = 0, e = PermMask.getNumOperands();
				1763	EltsFromV2 <= 1 && monotonic && i != e;
				1764	++i) {
				1765	unsigned SrcElt;
				1766	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1767	SrcElt = 0;
				1768	else
				1769	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1770
				1771	if (SrcElt >= V2EltIdx0) {
				1772	++EltsFromV2;
				1773	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1774	} else if (CurrElt != SrcElt) {
				1775	monotonic = false;
				1776	}
				1777
				1778	++CurrElt;
				1779	}
				1780
				1781	if (EltsFromV2 == 1 && monotonic) {
				1782	// Compute mask and shuffle
				1783	MachineFunction &MF = DAG.getMachineFunction();
				1784	SSARegMap *RegMap = MF.getSSARegMap();
				1785	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1786	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1787	// Initialize temporary register to 0
				1788	SDOperand InitTempReg =
				1789	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1790	// Copy register's contents as index in INSERT_MASK:
				1791	SDOperand ShufMaskOp =
				1792	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1793	DAG.getTargetConstant(V2Elt, MVT::i32),
				1794	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1795	// Use shuffle mask in SHUFB synthetic instruction:
				1796	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1797	} else {
				1798	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1799	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1800
				1801	SmallVector<SDOperand, 16> ResultMask;
				1802	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1803	unsigned SrcElt;
				1804	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1805	SrcElt = 0;
				1806	else
				1807	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1808
				1809	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1810	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1811	MVT::i8));
				1812	}
				1813	}
				1814
				1815	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1816	&ResultMask[0], ResultMask.size());
				1817	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1818	}
				1819	}
				1820
				1821	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1822	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1823
				1824	if (Op0.Val->getOpcode() == ISD::Constant) {
				1825	// For a constant, build the appropriate constant vector, which will
				1826	// eventually simplify to a vector register load.
				1827
				1828	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1829	SmallVector<SDOperand, 16> ConstVecValues;
				1830	MVT::ValueType VT;
				1831	size_t n_copies;
				1832
				1833	// Create a constant vector:
				1834	switch (Op.getValueType()) {
				1835	default: assert(0 && "Unexpected constant value type in "
				1836	"LowerSCALAR_TO_VECTOR");
				1837	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1838	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1839	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1840	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1841	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1842	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1843	}
				1844
				1845	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1846	for (size_t j = 0; j < n_copies; ++j)
				1847	ConstVecValues.push_back(CValue);
				1848
				1849	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1850	&ConstVecValues[0], ConstVecValues.size());
				1851	} else {
				1852	// Otherwise, copy the value from one register to another:
				1853	switch (Op0.getValueType()) {
				1854	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1855	case MVT::i8:
				1856	case MVT::i16:
				1857	case MVT::i32:
				1858	case MVT::i64:
				1859	case MVT::f32:
				1860	case MVT::f64:
				1861	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1862	}
				1863	}
				1864
				1865	return SDOperand();
				1866	}
				1867
				1868	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1869	switch (Op.getValueType()) {
				1870	case MVT::v4i32: {
				1871	SDOperand rA = Op.getOperand(0);
				1872	SDOperand rB = Op.getOperand(1);
				1873	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1874	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1875	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1876	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1877
				1878	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1879	break;
				1880	}
				1881
				1882	// Multiply two v8i16 vectors (pipeline friendly version):
				1883	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1884	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1885	// c) Use SELB to select upper and lower halves from the intermediate results
				1886	//
				1887	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1888	// dual-issue. This code does manage to do this, even if it's a little on
				1889	// the wacky side
				1890	case MVT::v8i16: {
				1891	MachineFunction &MF = DAG.getMachineFunction();
				1892	SSARegMap *RegMap = MF.getSSARegMap();
				1893	SDOperand Chain = Op.getOperand(0);
				1894	SDOperand rA = Op.getOperand(0);
				1895	SDOperand rB = Op.getOperand(1);
				1896	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1897	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1898
				1899	SDOperand FSMBOp =
				1900	DAG.getCopyToReg(Chain, FSMBIreg,
				1901	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1902	DAG.getConstant(0xcccc, MVT::i32)));
				1903
				1904	SDOperand HHProd =
				1905	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1906	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1907
				1908	SDOperand HHProd_v4i32 =
				1909	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1910	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1911
				1912	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1913	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1914	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1915	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1916	HHProd_v4i32,
				1917	DAG.getConstant(16, MVT::i16))),
				1918	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1919	}
				1920
				1921	// This M00sE is N@stI! (apologies to Monty Python)
				1922	//
				1923	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1924	// is to break it all apart, sign extend, and reassemble the various
				1925	// intermediate products.
				1926	case MVT::v16i8: {
				1927	MachineFunction &MF = DAG.getMachineFunction();
				1928	SSARegMap *RegMap = MF.getSSARegMap();
				1929	SDOperand Chain = Op.getOperand(0);
				1930	SDOperand rA = Op.getOperand(0);
				1931	SDOperand rB = Op.getOperand(1);
				1932	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1933	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1934
				1935	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1936	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1937	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1938
				1939	SDOperand LLProd =
				1940	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1941	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1942	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1943
				1944	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1945
				1946	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1947
				1948	SDOperand LHProd =
				1949	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1950	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1951
				1952	SDOperand FSMBdef_2222 =
				1953	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1954	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1955	DAG.getConstant(0x2222, MVT::i32)));
				1956
				1957	SDOperand FSMBuse_2222 =
				1958	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1959
				1960	SDOperand LoProd_1 =
				1961	DAG.getCopyToReg(Chain, LoProd_reg,
				1962	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1963	FSMBuse_2222));
				1964
				1965	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1966
				1967	SDOperand LoProd =
				1968	DAG.getNode(ISD::AND, MVT::v4i32,
				1969	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1970	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1971	LoProdMask, LoProdMask,
				1972	LoProdMask, LoProdMask));
				1973
				1974	SDOperand rAH =
				1975	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1976	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1977
				1978	SDOperand rBH =
				1979	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1980	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1981
				1982	SDOperand HLProd =
				1983	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1984	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1985	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1986
				1987	SDOperand HHProd_1 =
				1988	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1989	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1990	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1991	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1992	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1993
				1994	SDOperand HHProd =
				1995	DAG.getCopyToReg(Chain, HiProd_reg,
				1996	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1997	HLProd,
				1998	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				1999	FSMBuse_2222));
				2000
				2001	SDOperand HiProd =
				2002	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2003	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2004
				2005	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2006	DAG.getNode(ISD::OR, MVT::v4i32,
				2007	LoProd, HiProd));
				2008	}
				2009
				2010	default:
				2011	cerr << "CellSPU: Unknown vector multiplication, got "
				2012	<< MVT::getValueTypeString(Op.getValueType())
				2013	<< "\n";
				2014	abort();
				2015	/NOTREACHED/
				2016	}
				2017
				2018	return SDOperand();
				2019	}
				2020
				2021	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2022	MachineFunction &MF = DAG.getMachineFunction();
				2023	SSARegMap *RegMap = MF.getSSARegMap();
				2024
				2025	SDOperand A = Op.getOperand(0);
				2026	SDOperand B = Op.getOperand(1);
				2027	unsigned VT = Op.getValueType();
				2028
				2029	unsigned VRegBR, VRegC;
				2030
				2031	if (VT == MVT::f32) {
				2032	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2033	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2034	} else {
				2035	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2036	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2037	}
				2038	// TODO: make sure we're feeding FPInterp the right arguments
				2039	// Right now: fi B, frest(B)
				2040
				2041	// Computes BRcpl =
				2042	// (Floating Interpolate (FP Reciprocal Estimate B))
				2043	SDOperand BRcpl =
				2044	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2045	DAG.getNode(SPUISD::FPInterp, VT, B,
				2046	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2047
				2048	// Computes A * BRcpl and stores in a temporary register
				2049	SDOperand AxBRcpl =
				2050	DAG.getCopyToReg(BRcpl, VRegC,
				2051	DAG.getNode(ISD::FMUL, VT, A,
				2052	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2053	// What's the Chain variable do? It's magic!
				2054	// TODO: set Chain = Op(0).getEntryNode()
				2055
				2056	return DAG.getNode(ISD::FADD, VT,
				2057	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2058	DAG.getNode(ISD::FMUL, VT,
				2059	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2060	DAG.getNode(ISD::FSUB, VT, A,
				2061	DAG.getNode(ISD::FMUL, VT, B,
				2062	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2063	}
				2064
				2065	// Expands double-precision FDIV
				2066	// Expects two doubles as inputs X and Y, does a floating point
				2067	// reciprocal estimate, and three iterations of Newton-Raphson
				2068	// to increase accuracy.
				2069	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2070	// MachineFunction &MF = DAG.getMachineFunction();
				2071	// SSARegMap *RegMap = MF.getSSARegMap();
				2072	//
				2073	// SDOperand X = Op.getOperand(0);
				2074	// SDOperand Y = Op.getOperand(1);
				2075	//}
				2076
				2077	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2078	unsigned VT = Op.getValueType();
				2079	SDOperand N = Op.getOperand(0);
				2080	SDOperand Elt = Op.getOperand(1);
				2081	SDOperand ShufMask[16];
				2082	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2083
				2084	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2085
				2086	int EltNo = (int) C->getValue();
				2087
				2088	// sanity checks:
				2089	if (VT == MVT::i8 && EltNo >= 16)
				2090	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2091	else if (VT == MVT::i16 && EltNo >= 8)
				2092	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2093	else if (VT == MVT::i32 && EltNo >= 4)
				2094	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2095	else if (VT == MVT::i64 && EltNo >= 2)
				2096	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2097
				2098	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2099	// i32 and i64: Element 0 is the preferred slot
				2100	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2101	}
				2102
				2103	// Need to generate shuffle mask and extract:
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame^]	2104	int prefslot_begin = -1, prefslot_end = -1;
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2105	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2106
				2107	switch (VT) {
				2108	case MVT::i8: {
				2109	prefslot_begin = prefslot_end = 3;
				2110	break;
				2111	}
				2112	case MVT::i16: {
				2113	prefslot_begin = 2; prefslot_end = 3;
				2114	break;
				2115	}
				2116	case MVT::i32: {
				2117	prefslot_begin = 0; prefslot_end = 3;
				2118	break;
				2119	}
				2120	case MVT::i64: {
				2121	prefslot_begin = 0; prefslot_end = 7;
				2122	break;
				2123	}
				2124	}
				2125
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame^]	2126	assert(prefslot_begin != -1 && prefslot_end != -1 &&
				2127	"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
				2128
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2129	for (int i = 0; i < 16; ++i) {
				2130	// zero fill uppper part of preferred slot, don't care about the
				2131	// other slots:
				2132	unsigned int mask_val;
				2133
				2134	if (i <= prefslot_end) {
				2135	mask_val =
				2136	((i < prefslot_begin)
				2137	? 0x80
				2138	: elt_byte + (i - prefslot_begin));
				2139
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame^]	2140	ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2141	} else
				2142	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2143	}
				2144
				2145	SDOperand ShufMaskVec =
				2146	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2147	&ShufMask[0],
				2148	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2149
				2150	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2151	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2152	N, N, ShufMaskVec));
				2153
				2154	}
				2155
				2156	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2157	SDOperand VecOp = Op.getOperand(0);
				2158	SDOperand ValOp = Op.getOperand(1);
				2159	SDOperand IdxOp = Op.getOperand(2);
				2160	MVT::ValueType VT = Op.getValueType();
				2161
				2162	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2163	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2164
				2165	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2166	// Use $2 because it's always 16-byte aligned and it's available:
				2167	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2168
				2169	SDOperand result =
				2170	DAG.getNode(SPUISD::SHUFB, VT,
				2171	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2172	VecOp,
				2173	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2174	DAG.getNode(ISD::ADD, PtrVT,
				2175	PtrBase,
				2176	DAG.getConstant(CN->getValue(),
				2177	PtrVT))));
				2178
				2179	return result;
				2180	}
				2181
				2182	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2183	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2184
				2185	assert(Op.getValueType() == MVT::i8);
				2186	switch (Opc) {
				2187	default:
				2188	assert(0 && "Unhandled i8 math operator");
				2189	/NOTREACHED/
				2190	break;
				2191	case ISD::SUB: {
				2192	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2193	// the result:
				2194	SDOperand N1 = Op.getOperand(1);
				2195	N0 = (N0.getOpcode() != ISD::Constant
				2196	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2197	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2198	N1 = (N1.getOpcode() != ISD::Constant
				2199	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2200	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2201	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2202	DAG.getNode(Opc, MVT::i16, N0, N1));
				2203	}
				2204	case ISD::ROTR:
				2205	case ISD::ROTL: {
				2206	SDOperand N1 = Op.getOperand(1);
				2207	unsigned N1Opc;
				2208	N0 = (N0.getOpcode() != ISD::Constant
				2209	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2210	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2211	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2212	N1 = (N1.getOpcode() != ISD::Constant
				2213	? DAG.getNode(N1Opc, MVT::i16, N1)
				2214	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2215	SDOperand ExpandArg =
				2216	DAG.getNode(ISD::OR, MVT::i16, N0,
				2217	DAG.getNode(ISD::SHL, MVT::i16,
				2218	N0, DAG.getConstant(8, MVT::i16)));
				2219	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2220	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2221	}
				2222	case ISD::SRL:
				2223	case ISD::SHL: {
				2224	SDOperand N1 = Op.getOperand(1);
				2225	unsigned N1Opc;
				2226	N0 = (N0.getOpcode() != ISD::Constant
				2227	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2228	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2229	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2230	N1 = (N1.getOpcode() != ISD::Constant
				2231	? DAG.getNode(N1Opc, MVT::i16, N1)
				2232	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2233	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2234	DAG.getNode(Opc, MVT::i16, N0, N1));
				2235	}
				2236	case ISD::SRA: {
				2237	SDOperand N1 = Op.getOperand(1);
				2238	unsigned N1Opc;
				2239	N0 = (N0.getOpcode() != ISD::Constant
				2240	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2241	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2242	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2243	N1 = (N1.getOpcode() != ISD::Constant
				2244	? DAG.getNode(N1Opc, MVT::i16, N1)
				2245	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2246	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2247	DAG.getNode(Opc, MVT::i16, N0, N1));
				2248	}
				2249	case ISD::MUL: {
				2250	SDOperand N1 = Op.getOperand(1);
				2251	unsigned N1Opc;
				2252	N0 = (N0.getOpcode() != ISD::Constant
				2253	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2254	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2255	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2256	N1 = (N1.getOpcode() != ISD::Constant
				2257	? DAG.getNode(N1Opc, MVT::i16, N1)
				2258	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2259	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2260	DAG.getNode(Opc, MVT::i16, N0, N1));
				2261	break;
				2262	}
				2263	}
				2264
				2265	return SDOperand();
				2266	}
				2267
				2268	//! Lower byte immediate operations for v16i8 vectors:
				2269	static SDOperand
				2270	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2271	SDOperand ConstVec;
				2272	SDOperand Arg;
				2273	MVT::ValueType VT = Op.getValueType();
				2274
				2275	ConstVec = Op.getOperand(0);
				2276	Arg = Op.getOperand(1);
				2277	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2278	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2279	ConstVec = ConstVec.getOperand(0);
				2280	} else {
				2281	ConstVec = Op.getOperand(1);
				2282	Arg = Op.getOperand(0);
				2283	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2284	ConstVec = ConstVec.getOperand(0);
				2285	}
				2286	}
				2287	}
				2288
				2289	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2290	uint64_t VectorBits[2];
				2291	uint64_t UndefBits[2];
				2292	uint64_t SplatBits, SplatUndef;
				2293	int SplatSize;
				2294
				2295	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2296	&& isConstantSplat(VectorBits, UndefBits,
				2297	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2298	SplatBits, SplatUndef, SplatSize)) {
				2299	SDOperand tcVec[16];
				2300	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2301	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2302
				2303	// Turn the BUILD_VECTOR into a set of target constants:
				2304	for (size_t i = 0; i < tcVecSize; ++i)
				2305	tcVec[i] = tc;
				2306
				2307	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2308	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2309	}
				2310	}
				2311
				2312	return SDOperand();
				2313	}
				2314
				2315	//! Lower i32 multiplication
				2316	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2317	unsigned Opc) {
				2318	switch (VT) {
				2319	default:
				2320	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2321	<< MVT::getValueTypeString(Op.getValueType())
				2322	<< "\n";
				2323	abort();
				2324	/NOTREACHED/
				2325
				2326	case MVT::i32: {
				2327	SDOperand rA = Op.getOperand(0);
				2328	SDOperand rB = Op.getOperand(1);
				2329
				2330	return DAG.getNode(ISD::ADD, MVT::i32,
				2331	DAG.getNode(ISD::ADD, MVT::i32,
				2332	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2333	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2334	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2335	}
				2336	}
				2337
				2338	return SDOperand();
				2339	}
				2340
				2341	//! Custom lowering for CTPOP (count population)
				2342	/*!
				2343	Custom lowering code that counts the number ones in the input
				2344	operand. SPU has such an instruction, but it counts the number of
				2345	ones per byte, which then have to be accumulated.
				2346	*/
				2347	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2348	unsigned VT = Op.getValueType();
				2349	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2350
				2351	switch (VT) {
				2352	case MVT::i8: {
				2353	SDOperand N = Op.getOperand(0);
				2354	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2355
				2356	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2357	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2358
				2359	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2360	}
				2361
				2362	case MVT::i16: {
				2363	MachineFunction &MF = DAG.getMachineFunction();
				2364	SSARegMap *RegMap = MF.getSSARegMap();
				2365
				2366	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2367
				2368	SDOperand N = Op.getOperand(0);
				2369	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2370	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2371	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2372
				2373	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2374	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2375
				2376	// CNTB_result becomes the chain to which all of the virtual registers
				2377	// CNTB_reg, SUM1_reg become associated:
				2378	SDOperand CNTB_result =
				2379	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2380
				2381	SDOperand CNTB_rescopy =
				2382	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2383
				2384	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2385
				2386	return DAG.getNode(ISD::AND, MVT::i16,
				2387	DAG.getNode(ISD::ADD, MVT::i16,
				2388	DAG.getNode(ISD::SRL, MVT::i16,
				2389	Tmp1, Shift1),
				2390	Tmp1),
				2391	Mask0);
				2392	}
				2393
				2394	case MVT::i32: {
				2395	MachineFunction &MF = DAG.getMachineFunction();
				2396	SSARegMap *RegMap = MF.getSSARegMap();
				2397
				2398	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2399	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2400
				2401	SDOperand N = Op.getOperand(0);
				2402	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2403	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2404	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2405	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2406
				2407	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2408	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2409
				2410	// CNTB_result becomes the chain to which all of the virtual registers
				2411	// CNTB_reg, SUM1_reg become associated:
				2412	SDOperand CNTB_result =
				2413	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2414
				2415	SDOperand CNTB_rescopy =
				2416	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2417
				2418	SDOperand Comp1 =
				2419	DAG.getNode(ISD::SRL, MVT::i32,
				2420	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2421
				2422	SDOperand Sum1 =
				2423	DAG.getNode(ISD::ADD, MVT::i32,
				2424	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2425
				2426	SDOperand Sum1_rescopy =
				2427	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2428
				2429	SDOperand Comp2 =
				2430	DAG.getNode(ISD::SRL, MVT::i32,
				2431	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2432	Shift2);
				2433	SDOperand Sum2 =
				2434	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2435	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2436
				2437	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2438	}
				2439
				2440	case MVT::i64:
				2441	break;
				2442	}
				2443
				2444	return SDOperand();
				2445	}
				2446
				2447	/// LowerOperation - Provide custom lowering hooks for some operations.
				2448	///
				2449	SDOperand
				2450	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2451	{
				2452	switch (Op.getOpcode()) {
				2453	default: {
				2454	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2455	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2456	cerr << "*Op.Val:\n";
				2457	Op.Val->dump();
				2458	abort();
				2459	}
				2460	case ISD::LOAD:
				2461	case ISD::SEXTLOAD:
				2462	case ISD::ZEXTLOAD:
				2463	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2464	case ISD::STORE:
				2465	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2466	case ISD::ConstantPool:
				2467	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2468	case ISD::GlobalAddress:
				2469	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2470	case ISD::JumpTable:
				2471	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2472	case ISD::Constant:
				2473	return LowerConstant(Op, DAG);
				2474	case ISD::ConstantFP:
				2475	return LowerConstantFP(Op, DAG);
				2476	case ISD::FORMAL_ARGUMENTS:
				2477	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2478	case ISD::CALL:
				2479	return LowerCALL(Op, DAG);
				2480	case ISD::RET:
				2481	return LowerRET(Op, DAG, getTargetMachine());
				2482
				2483	// i8 math ops:
				2484	case ISD::SUB:
				2485	case ISD::ROTR:
				2486	case ISD::ROTL:
				2487	case ISD::SRL:
				2488	case ISD::SHL:
				2489	case ISD::SRA:
				2490	return LowerI8Math(Op, DAG, Op.getOpcode());
				2491
				2492	// Vector-related lowering.
				2493	case ISD::BUILD_VECTOR:
				2494	return LowerBUILD_VECTOR(Op, DAG);
				2495	case ISD::SCALAR_TO_VECTOR:
				2496	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2497	case ISD::VECTOR_SHUFFLE:
				2498	return LowerVECTOR_SHUFFLE(Op, DAG);
				2499	case ISD::EXTRACT_VECTOR_ELT:
				2500	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2501	case ISD::INSERT_VECTOR_ELT:
				2502	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2503
				2504	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2505	case ISD::AND:
				2506	case ISD::OR:
				2507	case ISD::XOR:
				2508	return LowerByteImmed(Op, DAG);
				2509
				2510	// Vector and i8 multiply:
				2511	case ISD::MUL:
				2512	if (MVT::isVector(Op.getValueType()))
				2513	return LowerVectorMUL(Op, DAG);
				2514	else if (Op.getValueType() == MVT::i8)
				2515	return LowerI8Math(Op, DAG, Op.getOpcode());
				2516	else
				2517	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2518
				2519	case ISD::FDIV:
				2520	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2521	return LowerFDIVf32(Op, DAG);
				2522	// else if (Op.getValueType() == MVT::f64)
				2523	// return LowerFDIVf64(Op, DAG);
				2524	else
				2525	assert(0 && "Calling FDIV on unsupported MVT");
				2526
				2527	case ISD::CTPOP:
				2528	return LowerCTPOP(Op, DAG);
				2529	}
				2530
				2531	return SDOperand();
				2532	}
				2533
				2534	//===----------------------------------------------------------------------===//
				2535	// Other Lowering Code
				2536	//===----------------------------------------------------------------------===//
				2537
				2538	MachineBasicBlock *
				2539	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2540	MachineBasicBlock *BB)
				2541	{
				2542	return BB;
				2543	}
				2544
				2545	//===----------------------------------------------------------------------===//
				2546	// Target Optimization Hooks
				2547	//===----------------------------------------------------------------------===//
				2548
				2549	SDOperand
				2550	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2551	{
				2552	#if 0
				2553	TargetMachine &TM = getTargetMachine();
				2554	SelectionDAG &DAG = DCI.DAG;
				2555	#endif
				2556	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2557
				2558	switch (N->getOpcode()) {
				2559	default: break;
				2560
				2561	// Look for obvious optimizations for shift left:
				2562	// a) Replace 0 << V with 0
				2563	// b) Replace V << 0 with V
				2564	//
				2565	// N.B: llvm will generate an undef node if the shift amount is greater than
				2566	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2567	case SPU::SHLIr32:
				2568	case SPU::SHLHIr16:
				2569	case SPU::SHLQBIIvec:
				2570	case SPU::ROTHIr16:
				2571	case SPU::ROTHIr16_i32:
				2572	case SPU::ROTIr32:
				2573	case SPU::ROTIr32_i16:
				2574	case SPU::ROTQBYIvec:
				2575	case SPU::ROTQBYBIvec:
				2576	case SPU::ROTQBIIvec:
				2577	case SPU::ROTHMIr16:
				2578	case SPU::ROTMIr32:
				2579	case SPU::ROTQMBYIvec: {
				2580	if (N0.getOpcode() == ISD::Constant) {
				2581	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2582	if (C->getValue() == 0) // 0 << V -> 0.
				2583	return N0;
				2584	}
				2585	}
				2586	SDOperand N1 = N->getOperand(1);
				2587	if (N1.getOpcode() == ISD::Constant) {
				2588	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2589	if (C->getValue() == 0) // V << 0 -> V
				2590	return N1;
				2591	}
				2592	}
				2593	break;
				2594	}
				2595	}
				2596
				2597	return SDOperand();
				2598	}
				2599
				2600	//===----------------------------------------------------------------------===//
				2601	// Inline Assembly Support
				2602	//===----------------------------------------------------------------------===//
				2603
				2604	/// getConstraintType - Given a constraint letter, return the type of
				2605	/// constraint it is for this target.
				2606	SPUTargetLowering::ConstraintType
				2607	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2608	if (ConstraintLetter.size() == 1) {
				2609	switch (ConstraintLetter[0]) {
				2610	default: break;
				2611	case 'b':
				2612	case 'r':
				2613	case 'f':
				2614	case 'v':
				2615	case 'y':
				2616	return C_RegisterClass;
				2617	}
				2618	}
				2619	return TargetLowering::getConstraintType(ConstraintLetter);
				2620	}
				2621
				2622	std::pair<unsigned, const TargetRegisterClass*>
				2623	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2624	MVT::ValueType VT) const
				2625	{
				2626	if (Constraint.size() == 1) {
				2627	// GCC RS6000 Constraint Letters
				2628	switch (Constraint[0]) {
				2629	case 'b': // R1-R31
				2630	case 'r': // R0-R31
				2631	if (VT == MVT::i64)
				2632	return std::make_pair(0U, SPU::R64CRegisterClass);
				2633	return std::make_pair(0U, SPU::R32CRegisterClass);
				2634	case 'f':
				2635	if (VT == MVT::f32)
				2636	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2637	else if (VT == MVT::f64)
				2638	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2639	break;
				2640	case 'v':
				2641	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2642	}
				2643	}
				2644
				2645	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2646	}
				2647
				2648	void
				2649	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2650	uint64_t Mask,
				2651	uint64_t &KnownZero,
				2652	uint64_t &KnownOne,
				2653	const SelectionDAG &DAG,
				2654	unsigned Depth ) const {
				2655	KnownZero = 0;
				2656	KnownOne = 0;
				2657	}
				2658
				2659	// LowerAsmOperandForConstraint
				2660	void
				2661	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2662	char ConstraintLetter,
				2663	std::vector<SDOperand> &Ops,
				2664	SelectionDAG &DAG) {
				2665	// Default, for the time being, to the base class handler
				2666	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2667	}
				2668
				2669	/// isLegalAddressImmediate - Return true if the integer value can be used
				2670	/// as the offset of the target addressing mode.
				2671	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2672	// SPU's addresses are 256K:
				2673	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2674	}
				2675
				2676	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2677	return false;
				2678	}