Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 27119f2cc72e71b48e5177846b85dca24c6b8b33 [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
Scott Michel	2466c37	2007-12-05 01:40:25 +0000	[diff] [blame]	6	// Department at The Aerospace Corporation and is distributed under the
				7	// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	8	//
				9	//===----------------------------------------------------------------------===//
				10	//
				11	// This file implements the SPUTargetLowering class.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SPURegisterNames.h"
				16	#include "SPUISelLowering.h"
				17	#include "SPUTargetMachine.h"
				18	#include "llvm/ADT/VectorExtras.h"
				19	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/SelectionDAG.h"
				25	#include "llvm/CodeGen/SSARegMap.h"
				26	#include "llvm/Constants.h"
				27	#include "llvm/Function.h"
				28	#include "llvm/Intrinsics.h"
				29	#include "llvm/Support/Debug.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	#include <map>
				34
				35	using namespace llvm;
				36
				37	// Used in getTargetNodeName() below
				38	namespace {
				39	std::map<unsigned, const char *> node_names;
				40
				41	//! MVT::ValueType mapping to useful data for Cell SPU
				42	struct valtype_map_s {
				43	const MVT::ValueType valtype;
				44	const int prefslot_byte;
				45	};
				46
				47	const valtype_map_s valtype_map[] = {
				48	{ MVT::i1, 3 },
				49	{ MVT::i8, 3 },
				50	{ MVT::i16, 2 },
				51	{ MVT::i32, 0 },
				52	{ MVT::f32, 0 },
				53	{ MVT::i64, 0 },
				54	{ MVT::f64, 0 },
				55	{ MVT::i128, 0 }
				56	};
				57
				58	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				59
				60	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				61	const valtype_map_s *retval = 0;
				62
				63	for (size_t i = 0; i < n_valtype_map; ++i) {
				64	if (valtype_map[i].valtype == VT) {
				65	retval = valtype_map + i;
				66	break;
				67	}
				68	}
				69
				70	#ifndef NDEBUG
				71	if (retval == 0) {
				72	cerr << "getValueTypeMapEntry returns NULL for "
				73	<< MVT::getValueTypeString(VT)
				74	<< "\n";
				75	abort();
				76	}
				77	#endif
				78
				79	return retval;
				80	}
				81
				82	//! Predicate that returns true if operand is a memory target
				83	/*!
				84	\arg Op Operand to test
				85	\return true if the operand is a memory target (i.e., global
				86	address, external symbol, constant pool) or an existing D-Form
				87	address.
				88	*/
				89	bool isMemoryOperand(const SDOperand &Op)
				90	{
				91	const unsigned Opc = Op.getOpcode();
				92	return (Opc == ISD::GlobalAddress
				93	\|\| Opc == ISD::GlobalTLSAddress
				94	\|\| Opc == ISD::FrameIndex
				95	\|\| Opc == ISD::JumpTable
				96	\|\| Opc == ISD::ConstantPool
				97	\|\| Opc == ISD::ExternalSymbol
				98	\|\| Opc == ISD::TargetGlobalAddress
				99	\|\| Opc == ISD::TargetGlobalTLSAddress
				100	\|\| Opc == ISD::TargetFrameIndex
				101	\|\| Opc == ISD::TargetJumpTable
				102	\|\| Opc == ISD::TargetConstantPool
				103	\|\| Opc == ISD::TargetExternalSymbol
				104	\|\| Opc == SPUISD::DFormAddr);
				105	}
				106	}
				107
				108	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				109	: TargetLowering(TM),
				110	SPUTM(TM)
				111	{
				112	// Fold away setcc operations if possible.
				113	setPow2DivIsCheap();
				114
				115	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				116	setUseUnderscoreSetJmp(true);
				117	setUseUnderscoreLongJmp(true);
				118
				119	// Set up the SPU's register classes:
				120	// NOTE: i8 register class is not registered because we cannot determine when
				121	// we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	122	// NOTE: Ignore the previous note. For now. :-)
				123	addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
				124	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				125	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				126	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				127	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				128	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	129	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				130
				131	// SPU has no sign or zero extended loads for i1, i8, i16:
				132	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				133	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				134	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				135	setStoreXAction(MVT::i1, Custom);
				136
				137	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				138	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				139	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				140	setStoreXAction(MVT::i8, Custom);
				141
				142	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				143	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				144	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				145
				146	// SPU constant load actions are custom lowered:
				147	setOperationAction(ISD::Constant, MVT::i64, Custom);
				148	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				149	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				150
				151	// SPU's loads and stores have to be custom lowered:
				152	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				153	++sctype) {
				154	setOperationAction(ISD::LOAD, sctype, Custom);
				155	setOperationAction(ISD::STORE, sctype, Custom);
				156	}
				157
				158	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				159	// into BR_CCs. BR_CC instructions are custom selected in
				160	// SPUDAGToDAGISel.
				161	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				162
				163	// Expand the jumptable branches
				164	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				165	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				166	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				167
				168	// SPU has no intrinsics for these particular operations:
				169	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				170	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				171	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				172
				173	// PowerPC has no SREM/UREM instructions
				174	setOperationAction(ISD::SREM, MVT::i32, Expand);
				175	setOperationAction(ISD::UREM, MVT::i32, Expand);
				176	setOperationAction(ISD::SREM, MVT::i64, Expand);
				177	setOperationAction(ISD::UREM, MVT::i64, Expand);
				178
				179	// We don't support sin/cos/sqrt/fmod
				180	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				181	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				182	setOperationAction(ISD::FREM , MVT::f64, Expand);
				183	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				184	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				185	setOperationAction(ISD::FREM , MVT::f32, Expand);
				186
				187	// If we're enabling GP optimizations, use hardware square root
				188	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				189	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				190
				191	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				192	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				193
				194	// SPU can do rotate right and left, so legalize it... but customize for i8
				195	// because instructions don't exist.
				196	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				197	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				198	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				199	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				200	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				201	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				202	// SPU has no native version of shift left/right for i8
				203	setOperationAction(ISD::SHL, MVT::i8, Custom);
				204	setOperationAction(ISD::SRL, MVT::i8, Custom);
				205	setOperationAction(ISD::SRA, MVT::i8, Custom);
				206
				207	// Custom lower i32 multiplications
				208	setOperationAction(ISD::MUL, MVT::i32, Custom);
				209
				210	// Need to custom handle (some) common i8 math ops
				211	setOperationAction(ISD::SUB, MVT::i8, Custom);
				212	setOperationAction(ISD::MUL, MVT::i8, Custom);
				213
				214	// SPU does not have BSWAP. It does have i32 support CTLZ.
				215	// CTPOP has to be custom lowered.
				216	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				217	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				218
				219	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				221	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				222	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				223
				224	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				225	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				226
				227	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				228
				229	// SPU does not have select or setcc
				230	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				233	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				234	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				235	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				236	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				237
				238	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				241	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				242	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				243	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				244	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				245
				246	// SPU has a legal FP -> signed INT instruction
				247	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				248	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				250	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				251
				252	// FDIV on SPU requires custom lowering
				253	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				254	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				255
				256	// SPU has [U\|S]INT_TO_FP
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				258	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				259	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				261	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				262	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				263	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				264	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				265
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	266	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
				268	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
				269	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	270
				271	// We cannot sextinreg(i1). Expand to shifts.
				272	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				273
				274	// Support label based line numbers.
				275	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				276	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				277
				278	// We want to legalize GlobalAddress and ConstantPool nodes into the
				279	// appropriate instructions to materialize the address.
				280	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				281	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				282	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				283	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				284	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				285	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				286	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				287	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				288
				289	// RET must be custom lowered, to meet ABI requirements
				290	setOperationAction(ISD::RET, MVT::Other, Custom);
				291
				292	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				293	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				294
				295	// Use the default implementation.
				296	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				297	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				298	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				299	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				300	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				301	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				302	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				303
				304	// Cell SPU has instructions for converting between i64 and fp.
				305	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				306	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				307
				308	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				309	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				310
				311	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				312	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				313
				314	// First set operation action for all vector types to expand. Then we
				315	// will selectively turn on ones that can be effectively codegen'd.
				316	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				320	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				321	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				322
				323	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				324	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				325	// add/sub are legal for all supported vector VT's.
				326	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				327	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				328	// mul has to be custom lowered.
				329	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				330
				331	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				335	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				336	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				337
				338	// These operations need to be expanded:
				339	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				344
				345	// Custom lower build_vector, constant pool spills, insert and
				346	// extract vector elements:
				347	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				351	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				352	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				353	}
				354
				355	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				356	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				357	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				358	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				359	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				360
				361	setSetCCResultType(MVT::i32);
				362	setShiftAmountType(MVT::i32);
				363	setSetCCResultContents(ZeroOrOneSetCCResult);
				364
				365	setStackPointerRegisterToSaveRestore(SPU::R1);
				366
				367	// We have target-specific dag combine patterns for the following nodes:
				368	// e.g., setTargetDAGCombine(ISD::SUB);
				369
				370	computeRegisterProperties();
				371	}
				372
				373	const char *
				374	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				375	{
				376	if (node_names.empty()) {
				377	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				378	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				379	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				380	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				381	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				382	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				383	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				384	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				385	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				386	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				387	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				388	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				389	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				390	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				391	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				393	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				394	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				395	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				396	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				397	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				398	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				399	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				400	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				401	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				402	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				403	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				404	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				405	"SPUISD::ROTBYTES_RIGHT_Z";
				406	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				407	"SPUISD::ROTBYTES_RIGHT_S";
				408	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				409	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				410	"SPUISD::ROTBYTES_LEFT_CHAINED";
				411	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				412	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				413	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				414	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				415	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				416	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				417	}
				418
				419	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				420
				421	return ((i != node_names.end()) ? i->second : 0);
				422	}
				423
				424	//===----------------------------------------------------------------------===//
				425	// Calling convention code:
				426	//===----------------------------------------------------------------------===//
				427
				428	#include "SPUGenCallingConv.inc"
				429
				430	//===----------------------------------------------------------------------===//
				431	// LowerOperation implementation
				432	//===----------------------------------------------------------------------===//
				433
				434	/// Custom lower loads for CellSPU
				435	/*!
				436	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				437	within a 16-byte block, we have to rotate to extract the requested element.
				438	*/
				439	static SDOperand
				440	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				441	LoadSDNode *LN = cast<LoadSDNode>(Op);
				442	SDOperand basep = LN->getBasePtr();
				443	SDOperand the_chain = LN->getChain();
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	444	MVT::ValueType BasepOpc = basep.Val->getOpcode();
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	445	MVT::ValueType VT = LN->getLoadedVT();
				446	MVT::ValueType OpVT = Op.Val->getValueType(0);
				447	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				448	ISD::LoadExtType ExtType = LN->getExtensionType();
				449	unsigned alignment = LN->getAlignment();
				450	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				451	SDOperand Ops[8];
				452
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	453	if (BasepOpc == ISD::FrameIndex) {
				454	// Loading from a frame index is always properly aligned. Always.
				455	return SDOperand();
				456	}
				457
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	458	// For an extending load of an i1 variable, just call it i8 (or whatever we
				459	// were passed) and make it zero-extended:
				460	if (VT == MVT::i1) {
				461	VT = OpVT;
				462	ExtType = ISD::ZEXTLOAD;
				463	}
				464
				465	switch (LN->getAddressingMode()) {
				466	case ISD::UNINDEXED: {
				467	SDOperand result;
				468	SDOperand rot_op, rotamt;
				469	SDOperand ptrp;
				470	int c_offset;
				471	int c_rotamt;
				472
				473	// The vector type we really want to be when we load the 16-byte chunk
				474	MVT::ValueType vecVT, opVecVT;
				475
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	476	vecVT = MVT::v16i8;
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	477	if (VT != MVT::i1)
				478	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	479	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				480
				481	if (basep.getOpcode() == ISD::ADD) {
				482	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				483
				484	assert(CN != NULL
				485	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				486
				487	c_offset = (int) CN->getValue();
				488	c_rotamt = (int) (c_offset & 0xf);
				489
				490	// Adjust the rotation amount to ensure that the final result ends up in
				491	// the preferred slot:
				492	c_rotamt -= vtm->prefslot_byte;
				493	ptrp = basep.getOperand(0);
				494	} else {
				495	c_offset = 0;
				496	c_rotamt = -vtm->prefslot_byte;
				497	ptrp = basep;
				498	}
				499
				500	if (alignment == 16) {
				501	// 16-byte aligned load into preferred slot, no rotation
				502	if (c_rotamt == 0) {
				503	if (isMemoryOperand(ptrp))
				504	// Return unchanged
				505	return SDOperand();
				506	else {
				507	// Return modified D-Form address for pointer:
				508	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				509	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				510	if (VT == OpVT)
				511	return DAG.getLoad(VT, LN->getChain(), ptrp,
				512	LN->getSrcValue(), LN->getSrcValueOffset(),
				513	LN->isVolatile(), 16);
				514	else
				515	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				516	LN->getSrcValueOffset(), OpVT,
				517	LN->isVolatile(), 16);
				518	}
				519	} else {
				520	// Need to rotate...
				521	if (c_rotamt < 0)
				522	c_rotamt += 16;
				523	// Realign the base pointer, with a D-Form address
				524	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				525	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				526	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				527	else
				528	basep = ptrp;
				529
				530	// Rotate the load:
				531	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				532	LN->getSrcValue(), LN->getSrcValueOffset(),
				533	LN->isVolatile(), 16);
				534	the_chain = rot_op.getValue(1);
				535	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				536
				537	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				538	Ops[0] = the_chain;
				539	Ops[1] = rot_op;
				540	Ops[2] = rotamt;
				541
				542	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				543	the_chain = result.getValue(1);
				544
				545	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				546	SDVTList scalarvts;
				547	Ops[0] = the_chain;
				548	Ops[1] = result;
				549	if (OpVT == VT) {
				550	scalarvts = DAG.getVTList(VT, MVT::Other);
				551	} else {
				552	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				553	}
				554
				555	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				556	result);
				557	Ops[0] = the_chain;
				558	Ops[1] = result;
				559	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				560	the_chain = result.getValue(1);
				561	} else {
				562	// Handle the sign and zero-extending loads for i1 and i8:
				563	unsigned NewOpC;
				564
				565	if (ExtType == ISD::SEXTLOAD) {
				566	NewOpC = (OpVT == MVT::i1
				567	? SPUISD::EXTRACT_I1_SEXT
				568	: SPUISD::EXTRACT_I8_SEXT);
Chris Lattner	52ec375	2007-12-22 22:47:03 +0000	[diff] [blame^]	569	} else {
				570	assert(ExtType == ISD::ZEXTLOAD);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	571	NewOpC = (OpVT == MVT::i1
				572	? SPUISD::EXTRACT_I1_ZEXT
				573	: SPUISD::EXTRACT_I8_ZEXT);
				574	}
				575
				576	result = DAG.getNode(NewOpC, OpVT, result);
				577	}
				578
				579	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				580	SDOperand retops[2] = { result, the_chain };
				581
				582	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				583	return result;
				584	/UNREACHED/
				585	}
				586	} else {
				587	// Misaligned 16-byte load:
				588	if (basep.getOpcode() == ISD::LOAD) {
				589	LN = cast<LoadSDNode>(basep);
				590	if (LN->getAlignment() == 16) {
				591	// We can verify that we're really loading from a 16-byte aligned
				592	// chunk. Encapsulate basep as a D-Form address and return a new
				593	// load:
				594	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				595	DAG.getConstant(0, PtrVT));
				596	if (OpVT == VT)
				597	return DAG.getLoad(VT, LN->getChain(), basep,
				598	LN->getSrcValue(), LN->getSrcValueOffset(),
				599	LN->isVolatile(), 16);
				600	else
				601	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				602	LN->getSrcValue(), LN->getSrcValueOffset(),
				603	OpVT, LN->isVolatile(), 16);
				604	}
				605	}
				606
				607	// Catch all other cases where we can't guarantee that we have a
				608	// 16-byte aligned entity, which means resorting to an X-form
				609	// address scheme:
				610
				611	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
Scott Michel	86c041f	2007-12-20 00:44:13 +0000	[diff] [blame]	612	SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
				613	SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	614
				615	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				616
				617	SDOperand alignLoad =
				618	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				619	LN->getSrcValue(), LN->getSrcValueOffset(),
				620	LN->isVolatile(), 16);
				621
				622	SDOperand insertEltOp =
				623	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				624
				625	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				626	alignLoad,
				627	alignLoad,
				628	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				629
				630	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				631
				632	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				633	SDOperand retops[2] = { result, the_chain };
				634
				635	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				636	return result;
				637	}
				638	break;
				639	}
				640	case ISD::PRE_INC:
				641	case ISD::PRE_DEC:
				642	case ISD::POST_INC:
				643	case ISD::POST_DEC:
				644	case ISD::LAST_INDEXED_MODE:
				645	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				646	"UNINDEXED\n";
				647	cerr << (unsigned) LN->getAddressingMode() << "\n";
				648	abort();
				649	/NOTREACHED/
				650	}
				651
				652	return SDOperand();
				653	}
				654
				655	/// Custom lower stores for CellSPU
				656	/*!
				657	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				658	within a 16-byte block, we have to generate a shuffle to insert the
				659	requested element into its place, then store the resulting block.
				660	*/
				661	static SDOperand
				662	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				663	StoreSDNode *SN = cast<StoreSDNode>(Op);
				664	SDOperand Value = SN->getValue();
				665	MVT::ValueType VT = Value.getValueType();
				666	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				667	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				668	SDOperand the_chain = SN->getChain();
Chris Lattner	4d321c5	2007-12-05 18:32:18 +0000	[diff] [blame]	669	//unsigned alignment = SN->getAlignment();
				670	//const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	671
				672	switch (SN->getAddressingMode()) {
				673	case ISD::UNINDEXED: {
				674	SDOperand basep = SN->getBasePtr();
				675	SDOperand ptrOp;
				676	int offset;
				677
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame]	678	if (basep.getOpcode() == ISD::FrameIndex) {
				679	// FrameIndex nodes are always properly aligned. Really.
				680	return SDOperand();
				681	}
				682
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	683	if (basep.getOpcode() == ISD::ADD) {
				684	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				685	assert(CN != NULL
				686	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				687	offset = unsigned(CN->getValue());
				688	ptrOp = basep.getOperand(0);
				689	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				690	<< offset
				691	<< "\n");
				692	} else {
				693	ptrOp = basep;
				694	offset = 0;
				695	}
				696
				697	// The vector type we really want to load from the 16-byte chunk, except
				698	// in the case of MVT::i1, which has to be v16i8.
				699	unsigned vecVT, stVecVT;
				700
				701	if (StVT != MVT::i1)
				702	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				703	else
				704	stVecVT = MVT::v16i8;
				705	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				706
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame]	707	// Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
				708	// the actual dform addr offs($reg).
				709	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				710	DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	711
				712	// Create the 16-byte aligned vector load
				713	SDOperand alignLoad =
				714	DAG.getLoad(vecVT, the_chain, basep,
				715	SN->getSrcValue(), SN->getSrcValueOffset(),
				716	SN->isVolatile(), 16);
				717	the_chain = alignLoad.getValue(1);
				718
				719	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				720	SDOperand theValue = SN->getValue();
				721	SDOperand result;
				722
				723	if (StVT != VT
				724	&& (theValue.getOpcode() == ISD::AssertZext
				725	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				726	// Drill down and get the value for zero- and sign-extended
				727	// quantities
				728	theValue = theValue.getOperand(0);
				729	}
				730
				731	SDOperand insertEltOp =
				732	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				733	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				734	ptrOp,
				735	DAG.getConstant((offset & 0xf), PtrVT)));
				736
				737	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				738	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				739	alignLoad,
				740	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				741
				742	result = DAG.getStore(the_chain, result, basep,
				743	LN->getSrcValue(), LN->getSrcValueOffset(),
				744	LN->isVolatile(), LN->getAlignment());
				745
				746	return result;
				747	/UNREACHED/
				748	}
				749	case ISD::PRE_INC:
				750	case ISD::PRE_DEC:
				751	case ISD::POST_INC:
				752	case ISD::POST_DEC:
				753	case ISD::LAST_INDEXED_MODE:
				754	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				755	"UNINDEXED\n";
				756	cerr << (unsigned) SN->getAddressingMode() << "\n";
				757	abort();
				758	/NOTREACHED/
				759	}
				760
				761	return SDOperand();
				762	}
				763
				764	/// Generate the address of a constant pool entry.
				765	static SDOperand
				766	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				767	MVT::ValueType PtrVT = Op.getValueType();
				768	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				769	Constant *C = CP->getConstVal();
				770	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				771	const TargetMachine &TM = DAG.getTarget();
				772	SDOperand Zero = DAG.getConstant(0, PtrVT);
				773
				774	if (TM.getRelocationModel() == Reloc::Static) {
				775	if (!ST->usingLargeMem()) {
				776	// Just return the SDOperand with the constant pool address in it.
				777	return CPI;
				778	} else {
				779	// Generate hi/lo address pair
				780	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				781	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				782
				783	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				784	}
				785	}
				786
				787	assert(0 &&
				788	"LowerConstantPool: Relocation model other than static not supported.");
				789	return SDOperand();
				790	}
				791
				792	static SDOperand
				793	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				794	MVT::ValueType PtrVT = Op.getValueType();
				795	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				796	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				797	SDOperand Zero = DAG.getConstant(0, PtrVT);
				798	const TargetMachine &TM = DAG.getTarget();
				799
				800	if (TM.getRelocationModel() == Reloc::Static) {
				801	if (!ST->usingLargeMem()) {
				802	// Just return the SDOperand with the jump table address in it.
				803	return JTI;
				804	} else {
				805	// Generate hi/lo address pair
				806	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				807	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				808
				809	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				810	}
				811	}
				812
				813	assert(0 &&
				814	"LowerJumpTable: Relocation model other than static not supported.");
				815	return SDOperand();
				816	}
				817
				818	static SDOperand
				819	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				820	MVT::ValueType PtrVT = Op.getValueType();
				821	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				822	GlobalValue *GV = GSDN->getGlobal();
				823	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				824	SDOperand Zero = DAG.getConstant(0, PtrVT);
				825	const TargetMachine &TM = DAG.getTarget();
				826
				827	if (TM.getRelocationModel() == Reloc::Static) {
				828	if (!ST->usingLargeMem()) {
				829	// Generate a local store address
				830	return GA;
				831	} else {
				832	// Generate hi/lo address pair
				833	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				834	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				835
				836	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				837	}
				838	} else {
				839	cerr << "LowerGlobalAddress: Relocation model other than static not "
				840	<< "supported.\n";
				841	abort();
				842	/NOTREACHED/
				843	}
				844
				845	return SDOperand();
				846	}
				847
				848	//! Custom lower i64 integer constants
				849	/*!
				850	This code inserts all of the necessary juggling that needs to occur to load
				851	a 64-bit constant into a register.
				852	*/
				853	static SDOperand
				854	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				855	unsigned VT = Op.getValueType();
				856	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				857
				858	if (VT == MVT::i64) {
				859	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				860	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				861	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				862
				863	} else {
				864	cerr << "LowerConstant: unhandled constant type "
				865	<< MVT::getValueTypeString(VT)
				866	<< "\n";
				867	abort();
				868	/NOTREACHED/
				869	}
				870
				871	return SDOperand();
				872	}
				873
				874	//! Custom lower single precision floating point constants
				875	/*!
				876	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				877	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				878	target description.
				879	*/
				880	static SDOperand
				881	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				882	unsigned VT = Op.getValueType();
				883	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				884
				885	assert((FP != 0) &&
				886	"LowerConstantFP: Node is not ConstantFPSDNode");
				887
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	888	if (VT == MVT::f32) {
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	889	float targetConst = FP->getValueAPF().convertToFloat();
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	890	return DAG.getNode(SPUISD::SFPConstant, VT,
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	891	DAG.getTargetConstantFP(targetConst, VT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	892	} else if (VT == MVT::f64) {
Scott Michel	170783a	2007-12-19 20:15:47 +0000	[diff] [blame]	893	uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	894	return DAG.getNode(ISD::BIT_CONVERT, VT,
				895	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				896	}
				897
				898	return SDOperand();
				899	}
				900
				901	static SDOperand
				902	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				903	{
				904	MachineFunction &MF = DAG.getMachineFunction();
				905	MachineFrameInfo *MFI = MF.getFrameInfo();
				906	SSARegMap *RegMap = MF.getSSARegMap();
				907	SmallVector<SDOperand, 8> ArgValues;
				908	SDOperand Root = Op.getOperand(0);
				909	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				910
				911	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				912	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				913
				914	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				915	unsigned ArgRegIdx = 0;
				916	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				917
				918	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				919
				920	// Add DAG nodes to load the arguments or copy them out of registers.
				921	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				922	SDOperand ArgVal;
				923	bool needsLoad = false;
				924	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				925	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				926
				927	switch (ObjectVT) {
				928	default: {
				929	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				930	<< MVT::getValueTypeString(ObjectVT)
				931	<< "\n";
				932	abort();
				933	}
				934	case MVT::i8:
				935	if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	936	unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	937	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				938	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				939	++ArgRegIdx;
				940	} else {
				941	needsLoad = true;
				942	}
				943	break;
				944	case MVT::i16:
				945	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				946	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				947	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				948	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				949	++ArgRegIdx;
				950	} else {
				951	needsLoad = true;
				952	}
				953	break;
				954	case MVT::i32:
				955	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				956	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				957	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				958	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				959	++ArgRegIdx;
				960	} else {
				961	needsLoad = true;
				962	}
				963	break;
				964	case MVT::i64:
				965	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				966	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				967	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				968	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				969	++ArgRegIdx;
				970	} else {
				971	needsLoad = true;
				972	}
				973	break;
				974	case MVT::f32:
				975	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				976	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				977	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				978	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				979	++ArgRegIdx;
				980	} else {
				981	needsLoad = true;
				982	}
				983	break;
				984	case MVT::f64:
				985	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				986	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				987	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				988	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				989	++ArgRegIdx;
				990	} else {
				991	needsLoad = true;
				992	}
				993	break;
				994	case MVT::v2f64:
				995	case MVT::v4f32:
				996	case MVT::v4i32:
				997	case MVT::v8i16:
				998	case MVT::v16i8:
				999	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				1000	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1001	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1002	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				1003	++ArgRegIdx;
				1004	} else {
				1005	needsLoad = true;
				1006	}
				1007	break;
				1008	}
				1009
				1010	// We need to load the argument to a virtual register if we determined above
				1011	// that we ran out of physical registers of the appropriate type
				1012	if (needsLoad) {
				1013	// If the argument is actually used, emit a load from the right stack
				1014	// slot.
				1015	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1016	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1017	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1018	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1019	} else {
				1020	// Don't emit a dead load.
				1021	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1022	}
				1023
				1024	ArgOffset += StackSlotSize;
				1025	}
				1026
				1027	ArgValues.push_back(ArgVal);
				1028	}
				1029
				1030	// If the function takes variable number of arguments, make a frame index for
				1031	// the start of the first vararg value... for expansion of llvm.va_start.
				1032	if (isVarArg) {
				1033	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1034	ArgOffset);
				1035	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1036	// If this function is vararg, store any remaining integer argument regs to
				1037	// their spots on the stack so that they may be loaded by deferencing the
				1038	// result of va_next.
				1039	SmallVector<SDOperand, 8> MemOps;
				1040	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1041	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1042	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1043	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1044	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1045	MemOps.push_back(Store);
				1046	// Increment the address by four for the next argument to store
				1047	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1048	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1049	}
				1050	if (!MemOps.empty())
				1051	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1052	}
				1053
				1054	ArgValues.push_back(Root);
				1055
				1056	// Return the new list of results.
				1057	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1058	Op.Val->value_end());
				1059	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1060	}
				1061
				1062	/// isLSAAddress - Return the immediate to use if the specified
				1063	/// value is representable as a LSA address.
				1064	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1065	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1066	if (!C) return 0;
				1067
				1068	int Addr = C->getValue();
				1069	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1070	(Addr << 14 >> 14) != Addr)
				1071	return 0; // Top 14 bits have to be sext of immediate.
				1072
				1073	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1074	}
				1075
				1076	static
				1077	SDOperand
				1078	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1079	SDOperand Chain = Op.getOperand(0);
				1080	#if 0
				1081	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1082	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1083	#endif
				1084	SDOperand Callee = Op.getOperand(4);
				1085	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1086	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1087	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1088	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1089
				1090	// Handy pointer type
				1091	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1092
				1093	// Accumulate how many bytes are to be pushed on the stack, including the
				1094	// linkage area, and parameter passing area. According to the SPU ABI,
				1095	// we minimally need space for [LR] and [SP]
				1096	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1097
				1098	// Set up a copy of the stack pointer for use loading and storing any
				1099	// arguments that may not fit in the registers available for argument
				1100	// passing.
				1101	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1102
				1103	// Figure out which arguments are going to go in registers, and which in
				1104	// memory.
				1105	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1106	unsigned ArgRegIdx = 0;
				1107
				1108	// Keep track of registers passing arguments
				1109	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1110	// And the arguments passed on the stack
				1111	SmallVector<SDOperand, 8> MemOpChains;
				1112
				1113	for (unsigned i = 0; i != NumOps; ++i) {
				1114	SDOperand Arg = Op.getOperand(5+2*i);
				1115
				1116	// PtrOff will be used to store the current argument to the stack if a
				1117	// register cannot be found for it.
				1118	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1119	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1120
				1121	switch (Arg.getValueType()) {
				1122	default: assert(0 && "Unexpected ValueType for argument!");
				1123	case MVT::i32:
				1124	case MVT::i64:
				1125	case MVT::i128:
				1126	if (ArgRegIdx != NumArgRegs) {
				1127	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1128	} else {
				1129	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1130	ArgOffset += StackSlotSize;
				1131	}
				1132	break;
				1133	case MVT::f32:
				1134	case MVT::f64:
				1135	if (ArgRegIdx != NumArgRegs) {
				1136	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1137	} else {
				1138	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1139	ArgOffset += StackSlotSize;
				1140	}
				1141	break;
				1142	case MVT::v4f32:
				1143	case MVT::v4i32:
				1144	case MVT::v8i16:
				1145	case MVT::v16i8:
				1146	if (ArgRegIdx != NumArgRegs) {
				1147	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1148	} else {
				1149	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1150	ArgOffset += StackSlotSize;
				1151	}
				1152	break;
				1153	}
				1154	}
				1155
				1156	// Update number of stack bytes actually used, insert a call sequence start
				1157	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1158	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1159
				1160	if (!MemOpChains.empty()) {
				1161	// Adjust the stack pointer for the stack arguments.
				1162	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1163	&MemOpChains[0], MemOpChains.size());
				1164	}
				1165
				1166	// Build a sequence of copy-to-reg nodes chained together with token chain
				1167	// and flag operands which copy the outgoing args into the appropriate regs.
				1168	SDOperand InFlag;
				1169	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1170	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1171	InFlag);
				1172	InFlag = Chain.getValue(1);
				1173	}
				1174
				1175	std::vector<MVT::ValueType> NodeTys;
				1176	NodeTys.push_back(MVT::Other); // Returns a chain
				1177	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1178
				1179	SmallVector<SDOperand, 8> Ops;
				1180	unsigned CallOpc = SPUISD::CALL;
				1181
				1182	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1183	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1184	// node so that legalize doesn't hack it.
				1185	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1186	GlobalValue *GV = G->getGlobal();
				1187	unsigned CalleeVT = Callee.getValueType();
				1188
				1189	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1190	// style calls, otherwise, external symbols are BRASL calls.
				1191	// NOTE:
				1192	// This may be an unsafe assumption for JIT and really large compilation
				1193	// units.
				1194	if (GV->isDeclaration()) {
				1195	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1196	} else {
				1197	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1198	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1199	DAG.getConstant(0, PtrVT));
				1200	}
				1201	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1202	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1203	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1204	// If this is an absolute destination address that appears to be a legal
				1205	// local store address, use the munged value.
				1206	Callee = SDOperand(Dest, 0);
				1207
				1208	Ops.push_back(Chain);
				1209	Ops.push_back(Callee);
				1210
				1211	// Add argument registers to the end of the list so that they are known live
				1212	// into the call.
				1213	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1214	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1215	RegsToPass[i].second.getValueType()));
				1216
				1217	if (InFlag.Val)
				1218	Ops.push_back(InFlag);
				1219	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1220	InFlag = Chain.getValue(1);
				1221
				1222	SDOperand ResultVals[3];
				1223	unsigned NumResults = 0;
				1224	NodeTys.clear();
				1225
				1226	// If the call has results, copy the values out of the ret val registers.
				1227	switch (Op.Val->getValueType(0)) {
				1228	default: assert(0 && "Unexpected ret value!");
				1229	case MVT::Other: break;
				1230	case MVT::i32:
				1231	if (Op.Val->getValueType(1) == MVT::i32) {
				1232	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1233	ResultVals[0] = Chain.getValue(0);
				1234	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1235	Chain.getValue(2)).getValue(1);
				1236	ResultVals[1] = Chain.getValue(0);
				1237	NumResults = 2;
				1238	NodeTys.push_back(MVT::i32);
				1239	} else {
				1240	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1241	ResultVals[0] = Chain.getValue(0);
				1242	NumResults = 1;
				1243	}
				1244	NodeTys.push_back(MVT::i32);
				1245	break;
				1246	case MVT::i64:
				1247	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1248	ResultVals[0] = Chain.getValue(0);
				1249	NumResults = 1;
				1250	NodeTys.push_back(MVT::i64);
				1251	break;
				1252	case MVT::f32:
				1253	case MVT::f64:
				1254	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1255	InFlag).getValue(1);
				1256	ResultVals[0] = Chain.getValue(0);
				1257	NumResults = 1;
				1258	NodeTys.push_back(Op.Val->getValueType(0));
				1259	break;
				1260	case MVT::v2f64:
				1261	case MVT::v4f32:
				1262	case MVT::v4i32:
				1263	case MVT::v8i16:
				1264	case MVT::v16i8:
				1265	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1266	InFlag).getValue(1);
				1267	ResultVals[0] = Chain.getValue(0);
				1268	NumResults = 1;
				1269	NodeTys.push_back(Op.Val->getValueType(0));
				1270	break;
				1271	}
				1272
				1273	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1274	DAG.getConstant(NumStackBytes, PtrVT));
				1275	NodeTys.push_back(MVT::Other);
				1276
				1277	// If the function returns void, just return the chain.
				1278	if (NumResults == 0)
				1279	return Chain;
				1280
				1281	// Otherwise, merge everything together with a MERGE_VALUES node.
				1282	ResultVals[NumResults++] = Chain;
				1283	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1284	ResultVals, NumResults);
				1285	return Res.getValue(Op.ResNo);
				1286	}
				1287
				1288	static SDOperand
				1289	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1290	SmallVector<CCValAssign, 16> RVLocs;
				1291	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1292	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1293	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1294	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1295
				1296	// If this is the first return lowered for this function, add the regs to the
				1297	// liveout set for the function.
				1298	if (DAG.getMachineFunction().liveout_empty()) {
				1299	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1300	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1301	}
				1302
				1303	SDOperand Chain = Op.getOperand(0);
				1304	SDOperand Flag;
				1305
				1306	// Copy the result values into the output registers.
				1307	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1308	CCValAssign &VA = RVLocs[i];
				1309	assert(VA.isRegLoc() && "Can only return in registers!");
				1310	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1311	Flag = Chain.getValue(1);
				1312	}
				1313
				1314	if (Flag.Val)
				1315	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1316	else
				1317	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1318	}
				1319
				1320
				1321	//===----------------------------------------------------------------------===//
				1322	// Vector related lowering:
				1323	//===----------------------------------------------------------------------===//
				1324
				1325	static ConstantSDNode *
				1326	getVecImm(SDNode *N) {
				1327	SDOperand OpVal(0, 0);
				1328
				1329	// Check to see if this buildvec has a single non-undef value in its elements.
				1330	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1331	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1332	if (OpVal.Val == 0)
				1333	OpVal = N->getOperand(i);
				1334	else if (OpVal != N->getOperand(i))
				1335	return 0;
				1336	}
				1337
				1338	if (OpVal.Val != 0) {
				1339	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1340	return CN;
				1341	}
				1342	}
				1343
				1344	return 0; // All UNDEF: use implicit def.; not Constant node
				1345	}
				1346
				1347	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1348	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1349	/// constant
				1350	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1351	MVT::ValueType ValueType) {
				1352	if (ConstantSDNode *CN = getVecImm(N)) {
				1353	uint64_t Value = CN->getValue();
				1354	if (Value <= 0x3ffff)
				1355	return DAG.getConstant(Value, ValueType);
				1356	}
				1357
				1358	return SDOperand();
				1359	}
				1360
				1361	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1362	/// and the value fits into a signed 16-bit constant, and if so, return the
				1363	/// constant
				1364	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1365	MVT::ValueType ValueType) {
				1366	if (ConstantSDNode *CN = getVecImm(N)) {
				1367	if (ValueType == MVT::i32) {
				1368	int Value = (int) CN->getValue();
				1369	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1370
				1371	if (Value == SExtValue)
				1372	return DAG.getConstant(Value, ValueType);
				1373	} else if (ValueType == MVT::i16) {
				1374	short Value = (short) CN->getValue();
				1375	int SExtValue = ((int) Value << 16) >> 16;
				1376
				1377	if (Value == (short) SExtValue)
				1378	return DAG.getConstant(Value, ValueType);
				1379	} else if (ValueType == MVT::i64) {
				1380	int64_t Value = CN->getValue();
				1381	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1382
				1383	if (Value == SExtValue)
				1384	return DAG.getConstant(Value, ValueType);
				1385	}
				1386	}
				1387
				1388	return SDOperand();
				1389	}
				1390
				1391	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1392	/// and the value fits into a signed 10-bit constant, and if so, return the
				1393	/// constant
				1394	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1395	MVT::ValueType ValueType) {
				1396	if (ConstantSDNode *CN = getVecImm(N)) {
				1397	int Value = (int) CN->getValue();
				1398	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1399	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1400	return DAG.getConstant(Value, ValueType);
				1401	}
				1402
				1403	return SDOperand();
				1404	}
				1405
				1406	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1407	/// and the value fits into a signed 8-bit constant, and if so, return the
				1408	/// constant.
				1409	///
				1410	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1411	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1412	/// same value.
				1413	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1414	MVT::ValueType ValueType) {
				1415	if (ConstantSDNode *CN = getVecImm(N)) {
				1416	int Value = (int) CN->getValue();
				1417	if (ValueType == MVT::i16
				1418	&& Value <= 0xffff /* truncated from uint64_t */
				1419	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1420	return DAG.getConstant(Value & 0xff, ValueType);
				1421	else if (ValueType == MVT::i8
				1422	&& (Value & 0xff) == Value)
				1423	return DAG.getConstant(Value, ValueType);
				1424	}
				1425
				1426	return SDOperand();
				1427	}
				1428
				1429	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1430	/// and the value fits into a signed 16-bit constant, and if so, return the
				1431	/// constant
				1432	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1433	MVT::ValueType ValueType) {
				1434	if (ConstantSDNode *CN = getVecImm(N)) {
				1435	uint64_t Value = CN->getValue();
				1436	if ((ValueType == MVT::i32
				1437	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1438	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1439	return DAG.getConstant(Value >> 16, ValueType);
				1440	}
				1441
				1442	return SDOperand();
				1443	}
				1444
				1445	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1446	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1447	if (ConstantSDNode *CN = getVecImm(N)) {
				1448	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1449	}
				1450
				1451	return SDOperand();
				1452	}
				1453
				1454	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1455	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1456	if (ConstantSDNode *CN = getVecImm(N)) {
				1457	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1458	}
				1459
				1460	return SDOperand();
				1461	}
				1462
				1463	// If this is a vector of constants or undefs, get the bits. A bit in
				1464	// UndefBits is set if the corresponding element of the vector is an
				1465	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1466	// zero. Return true if this is not an array of constants, false if it is.
				1467	//
				1468	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1469	uint64_t UndefBits[2]) {
				1470	// Start with zero'd results.
				1471	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1472
				1473	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1474	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1475	SDOperand OpVal = BV->getOperand(i);
				1476
				1477	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1478	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1479
				1480	uint64_t EltBits = 0;
				1481	if (OpVal.getOpcode() == ISD::UNDEF) {
				1482	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1483	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1484	continue;
				1485	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1486	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1487	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1488	const APFloat &apf = CN->getValueAPF();
				1489	EltBits = (CN->getValueType(0) == MVT::f32
				1490	? FloatToBits(apf.convertToFloat())
				1491	: DoubleToBits(apf.convertToDouble()));
				1492	} else {
				1493	// Nonconstant element.
				1494	return true;
				1495	}
				1496
				1497	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1498	}
				1499
				1500	//printf("%llx %llx %llx %llx\n",
				1501	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1502	return false;
				1503	}
				1504
				1505	/// If this is a splat (repetition) of a value across the whole vector, return
				1506	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1507	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1508	/// SplatSize = 1 byte.
				1509	static bool isConstantSplat(const uint64_t Bits128[2],
				1510	const uint64_t Undef128[2],
				1511	int MinSplatBits,
				1512	uint64_t &SplatBits, uint64_t &SplatUndef,
				1513	int &SplatSize) {
				1514	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1515	// the same as the lower 64-bits, ignoring undefs.
				1516	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1517	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1518	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1519	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1520	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1521	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1522
				1523	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1524	if (MinSplatBits < 64) {
				1525
				1526	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1527	// undefs.
				1528	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1529	if (MinSplatBits < 32) {
				1530
				1531	// If the top 16-bits are different than the lower 16-bits, ignoring
				1532	// undefs, we have an i32 splat.
				1533	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1534	if (MinSplatBits < 16) {
				1535	// If the top 8-bits are different than the lower 8-bits, ignoring
				1536	// undefs, we have an i16 splat.
				1537	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1538	// Otherwise, we have an 8-bit splat.
				1539	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1540	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1541	SplatSize = 1;
				1542	return true;
				1543	}
				1544	} else {
				1545	SplatBits = Bits16;
				1546	SplatUndef = Undef16;
				1547	SplatSize = 2;
				1548	return true;
				1549	}
				1550	}
				1551	} else {
				1552	SplatBits = Bits32;
				1553	SplatUndef = Undef32;
				1554	SplatSize = 4;
				1555	return true;
				1556	}
				1557	}
				1558	} else {
				1559	SplatBits = Bits128[0];
				1560	SplatUndef = Undef128[0];
				1561	SplatSize = 8;
				1562	return true;
				1563	}
				1564	}
				1565
				1566	return false; // Can't be a splat if two pieces don't match.
				1567	}
				1568
				1569	// If this is a case we can't handle, return null and let the default
				1570	// expansion code take care of it. If we CAN select this case, and if it
				1571	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1572	// this case more efficiently than a constant pool load, lower it to the
				1573	// sequence of ops that should be used.
				1574	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1575	MVT::ValueType VT = Op.getValueType();
				1576	// If this is a vector of constants or undefs, get the bits. A bit in
				1577	// UndefBits is set if the corresponding element of the vector is an
				1578	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1579	// zero.
				1580	uint64_t VectorBits[2];
				1581	uint64_t UndefBits[2];
				1582	uint64_t SplatBits, SplatUndef;
				1583	int SplatSize;
				1584	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1585	\|\| !isConstantSplat(VectorBits, UndefBits,
				1586	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1587	SplatBits, SplatUndef, SplatSize))
				1588	return SDOperand(); // Not a constant vector, not a splat.
				1589
				1590	switch (VT) {
				1591	default:
				1592	case MVT::v4f32: {
				1593	uint32_t Value32 = SplatBits;
				1594	assert(SplatSize == 4
				1595	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1596	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1597	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1598	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1599	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1600	break;
				1601	}
				1602	case MVT::v2f64: {
				1603	uint64_t f64val = SplatBits;
				1604	assert(SplatSize == 8
				1605	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1606	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1607	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1608	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1609	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1610	break;
				1611	}
				1612	case MVT::v16i8: {
				1613	// 8-bit constants have to be expanded to 16-bits
				1614	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1615	SDOperand Ops[8];
				1616	for (int i = 0; i < 8; ++i)
				1617	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1618	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1619	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1620	}
				1621	case MVT::v8i16: {
				1622	unsigned short Value16;
				1623	if (SplatSize == 2)
				1624	Value16 = (unsigned short) (SplatBits & 0xffff);
				1625	else
				1626	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1627	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1628	SDOperand Ops[8];
				1629	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1630	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1631	}
				1632	case MVT::v4i32: {
				1633	unsigned int Value = SplatBits;
				1634	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1635	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1636	}
				1637	case MVT::v2i64: {
				1638	uint64_t val = SplatBits;
				1639	uint32_t upper = uint32_t(val >> 32);
				1640	uint32_t lower = uint32_t(val);
				1641
				1642	if (val != 0) {
				1643	SDOperand LO32;
				1644	SDOperand HI32;
				1645	SmallVector<SDOperand, 16> ShufBytes;
				1646	SDOperand Result;
				1647	bool upper_special, lower_special;
				1648
				1649	// NOTE: This code creates common-case shuffle masks that can be easily
				1650	// detected as common expressions. It is not attempting to create highly
				1651	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1652
				1653	// Detect if the upper or lower half is a special shuffle mask pattern:
				1654	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1655	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1656
				1657	// Create lower vector if not a special pattern
				1658	if (!lower_special) {
				1659	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1660	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1661	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1662	LO32C, LO32C, LO32C, LO32C));
				1663	}
				1664
				1665	// Create upper vector if not a special pattern
				1666	if (!upper_special) {
				1667	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1668	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1669	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1670	HI32C, HI32C, HI32C, HI32C));
				1671	}
				1672
				1673	// If either upper or lower are special, then the two input operands are
				1674	// the same (basically, one of them is a "don't care")
				1675	if (lower_special)
				1676	LO32 = HI32;
				1677	if (upper_special)
				1678	HI32 = LO32;
				1679	if (lower_special && upper_special) {
				1680	// Unhappy situation... both upper and lower are special, so punt with
				1681	// a target constant:
				1682	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1683	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1684	Zero, Zero);
				1685	}
				1686
				1687	for (int i = 0; i < 4; ++i) {
				1688	for (int j = 0; j < 4; ++j) {
				1689	SDOperand V;
				1690	bool process_upper, process_lower;
Chris Lattner	52ec375	2007-12-22 22:47:03 +0000	[diff] [blame^]	1691	uint64_t val = 0;
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1692
				1693	process_upper = (upper_special && (i & 1) == 0);
				1694	process_lower = (lower_special && (i & 1) == 1);
				1695
				1696	if (process_upper \|\| process_lower) {
				1697	if ((process_upper && upper == 0)
				1698	\|\| (process_lower && lower == 0))
				1699	val = 0x80;
				1700	else if ((process_upper && upper == 0xffffffff)
				1701	\|\| (process_lower && lower == 0xffffffff))
				1702	val = 0xc0;
				1703	else if ((process_upper && upper == 0x80000000)
				1704	\|\| (process_lower && lower == 0x80000000))
				1705	val = (j == 0 ? 0xe0 : 0x80);
				1706	} else
				1707	val = i * 4 + j + ((i & 1) * 16);
				1708
				1709	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1710	}
				1711	}
				1712
				1713	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1714	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1715	&ShufBytes[0], ShufBytes.size()));
				1716	} else {
				1717	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1718	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1719	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1720	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1721	Zero, Zero, Zero, Zero));
				1722	}
				1723	}
				1724	}
				1725
				1726	return SDOperand();
				1727	}
				1728
				1729	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1730	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1731	/// permutation vector, V3, is monotonically increasing with one "exception"
				1732	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1733	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1734	/// In either case, the net result is going to eventually invoke SHUFB to
				1735	/// permute/shuffle the bytes from V1 and V2.
				1736	/// \note
				1737	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1738	/// control word for byte/halfword/word insertion. This takes care of a single
				1739	/// element move from V2 into V1.
				1740	/// \note
				1741	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1742	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1743	SDOperand V1 = Op.getOperand(0);
				1744	SDOperand V2 = Op.getOperand(1);
				1745	SDOperand PermMask = Op.getOperand(2);
				1746
				1747	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1748
				1749	// If we have a single element being moved from V1 to V2, this can be handled
				1750	// using the C*[DX] compute mask instructions, but the vector elements have
				1751	// to be monotonically increasing with one exception element.
				1752	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1753	unsigned EltsFromV2 = 0;
				1754	unsigned V2Elt = 0;
				1755	unsigned V2EltIdx0 = 0;
				1756	unsigned CurrElt = 0;
				1757	bool monotonic = true;
				1758	if (EltVT == MVT::i8)
				1759	V2EltIdx0 = 16;
				1760	else if (EltVT == MVT::i16)
				1761	V2EltIdx0 = 8;
				1762	else if (EltVT == MVT::i32)
				1763	V2EltIdx0 = 4;
				1764	else
				1765	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1766
				1767	for (unsigned i = 0, e = PermMask.getNumOperands();
				1768	EltsFromV2 <= 1 && monotonic && i != e;
				1769	++i) {
				1770	unsigned SrcElt;
				1771	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1772	SrcElt = 0;
				1773	else
				1774	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1775
				1776	if (SrcElt >= V2EltIdx0) {
				1777	++EltsFromV2;
				1778	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1779	} else if (CurrElt != SrcElt) {
				1780	monotonic = false;
				1781	}
				1782
				1783	++CurrElt;
				1784	}
				1785
				1786	if (EltsFromV2 == 1 && monotonic) {
				1787	// Compute mask and shuffle
				1788	MachineFunction &MF = DAG.getMachineFunction();
				1789	SSARegMap *RegMap = MF.getSSARegMap();
				1790	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1791	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1792	// Initialize temporary register to 0
				1793	SDOperand InitTempReg =
				1794	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1795	// Copy register's contents as index in INSERT_MASK:
				1796	SDOperand ShufMaskOp =
				1797	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1798	DAG.getTargetConstant(V2Elt, MVT::i32),
				1799	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1800	// Use shuffle mask in SHUFB synthetic instruction:
				1801	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1802	} else {
				1803	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1804	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1805
				1806	SmallVector<SDOperand, 16> ResultMask;
				1807	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1808	unsigned SrcElt;
				1809	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1810	SrcElt = 0;
				1811	else
				1812	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1813
				1814	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1815	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1816	MVT::i8));
				1817	}
				1818	}
				1819
				1820	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1821	&ResultMask[0], ResultMask.size());
				1822	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1823	}
				1824	}
				1825
				1826	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1827	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1828
				1829	if (Op0.Val->getOpcode() == ISD::Constant) {
				1830	// For a constant, build the appropriate constant vector, which will
				1831	// eventually simplify to a vector register load.
				1832
				1833	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1834	SmallVector<SDOperand, 16> ConstVecValues;
				1835	MVT::ValueType VT;
				1836	size_t n_copies;
				1837
				1838	// Create a constant vector:
				1839	switch (Op.getValueType()) {
				1840	default: assert(0 && "Unexpected constant value type in "
				1841	"LowerSCALAR_TO_VECTOR");
				1842	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1843	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1844	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1845	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1846	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1847	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1848	}
				1849
				1850	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1851	for (size_t j = 0; j < n_copies; ++j)
				1852	ConstVecValues.push_back(CValue);
				1853
				1854	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1855	&ConstVecValues[0], ConstVecValues.size());
				1856	} else {
				1857	// Otherwise, copy the value from one register to another:
				1858	switch (Op0.getValueType()) {
				1859	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1860	case MVT::i8:
				1861	case MVT::i16:
				1862	case MVT::i32:
				1863	case MVT::i64:
				1864	case MVT::f32:
				1865	case MVT::f64:
				1866	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1867	}
				1868	}
				1869
				1870	return SDOperand();
				1871	}
				1872
				1873	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1874	switch (Op.getValueType()) {
				1875	case MVT::v4i32: {
				1876	SDOperand rA = Op.getOperand(0);
				1877	SDOperand rB = Op.getOperand(1);
				1878	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1879	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1880	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1881	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1882
				1883	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1884	break;
				1885	}
				1886
				1887	// Multiply two v8i16 vectors (pipeline friendly version):
				1888	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1889	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1890	// c) Use SELB to select upper and lower halves from the intermediate results
				1891	//
				1892	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1893	// dual-issue. This code does manage to do this, even if it's a little on
				1894	// the wacky side
				1895	case MVT::v8i16: {
				1896	MachineFunction &MF = DAG.getMachineFunction();
				1897	SSARegMap *RegMap = MF.getSSARegMap();
				1898	SDOperand Chain = Op.getOperand(0);
				1899	SDOperand rA = Op.getOperand(0);
				1900	SDOperand rB = Op.getOperand(1);
				1901	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1902	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1903
				1904	SDOperand FSMBOp =
				1905	DAG.getCopyToReg(Chain, FSMBIreg,
				1906	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1907	DAG.getConstant(0xcccc, MVT::i32)));
				1908
				1909	SDOperand HHProd =
				1910	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1911	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1912
				1913	SDOperand HHProd_v4i32 =
				1914	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1915	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1916
				1917	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1918	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1919	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1920	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1921	HHProd_v4i32,
				1922	DAG.getConstant(16, MVT::i16))),
				1923	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1924	}
				1925
				1926	// This M00sE is N@stI! (apologies to Monty Python)
				1927	//
				1928	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1929	// is to break it all apart, sign extend, and reassemble the various
				1930	// intermediate products.
				1931	case MVT::v16i8: {
				1932	MachineFunction &MF = DAG.getMachineFunction();
				1933	SSARegMap *RegMap = MF.getSSARegMap();
				1934	SDOperand Chain = Op.getOperand(0);
				1935	SDOperand rA = Op.getOperand(0);
				1936	SDOperand rB = Op.getOperand(1);
				1937	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1938	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1939
				1940	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1941	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1942	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1943
				1944	SDOperand LLProd =
				1945	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1946	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1947	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1948
				1949	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1950
				1951	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1952
				1953	SDOperand LHProd =
				1954	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1955	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1956
				1957	SDOperand FSMBdef_2222 =
				1958	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1959	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1960	DAG.getConstant(0x2222, MVT::i32)));
				1961
				1962	SDOperand FSMBuse_2222 =
				1963	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1964
				1965	SDOperand LoProd_1 =
				1966	DAG.getCopyToReg(Chain, LoProd_reg,
				1967	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1968	FSMBuse_2222));
				1969
				1970	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1971
				1972	SDOperand LoProd =
				1973	DAG.getNode(ISD::AND, MVT::v4i32,
				1974	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1975	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1976	LoProdMask, LoProdMask,
				1977	LoProdMask, LoProdMask));
				1978
				1979	SDOperand rAH =
				1980	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1981	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1982
				1983	SDOperand rBH =
				1984	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1985	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1986
				1987	SDOperand HLProd =
				1988	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1989	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1990	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1991
				1992	SDOperand HHProd_1 =
				1993	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1994	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1995	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1996	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1997	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1998
				1999	SDOperand HHProd =
				2000	DAG.getCopyToReg(Chain, HiProd_reg,
				2001	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				2002	HLProd,
				2003	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				2004	FSMBuse_2222));
				2005
				2006	SDOperand HiProd =
				2007	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2008	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2009
				2010	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2011	DAG.getNode(ISD::OR, MVT::v4i32,
				2012	LoProd, HiProd));
				2013	}
				2014
				2015	default:
				2016	cerr << "CellSPU: Unknown vector multiplication, got "
				2017	<< MVT::getValueTypeString(Op.getValueType())
				2018	<< "\n";
				2019	abort();
				2020	/NOTREACHED/
				2021	}
				2022
				2023	return SDOperand();
				2024	}
				2025
				2026	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2027	MachineFunction &MF = DAG.getMachineFunction();
				2028	SSARegMap *RegMap = MF.getSSARegMap();
				2029
				2030	SDOperand A = Op.getOperand(0);
				2031	SDOperand B = Op.getOperand(1);
				2032	unsigned VT = Op.getValueType();
				2033
				2034	unsigned VRegBR, VRegC;
				2035
				2036	if (VT == MVT::f32) {
				2037	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2038	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2039	} else {
				2040	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2041	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2042	}
				2043	// TODO: make sure we're feeding FPInterp the right arguments
				2044	// Right now: fi B, frest(B)
				2045
				2046	// Computes BRcpl =
				2047	// (Floating Interpolate (FP Reciprocal Estimate B))
				2048	SDOperand BRcpl =
				2049	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2050	DAG.getNode(SPUISD::FPInterp, VT, B,
				2051	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2052
				2053	// Computes A * BRcpl and stores in a temporary register
				2054	SDOperand AxBRcpl =
				2055	DAG.getCopyToReg(BRcpl, VRegC,
				2056	DAG.getNode(ISD::FMUL, VT, A,
				2057	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2058	// What's the Chain variable do? It's magic!
				2059	// TODO: set Chain = Op(0).getEntryNode()
				2060
				2061	return DAG.getNode(ISD::FADD, VT,
				2062	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2063	DAG.getNode(ISD::FMUL, VT,
				2064	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2065	DAG.getNode(ISD::FSUB, VT, A,
				2066	DAG.getNode(ISD::FMUL, VT, B,
				2067	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2068	}
				2069
				2070	// Expands double-precision FDIV
				2071	// Expects two doubles as inputs X and Y, does a floating point
				2072	// reciprocal estimate, and three iterations of Newton-Raphson
				2073	// to increase accuracy.
				2074	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2075	// MachineFunction &MF = DAG.getMachineFunction();
				2076	// SSARegMap *RegMap = MF.getSSARegMap();
				2077	//
				2078	// SDOperand X = Op.getOperand(0);
				2079	// SDOperand Y = Op.getOperand(1);
				2080	//}
				2081
				2082	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2083	unsigned VT = Op.getValueType();
				2084	SDOperand N = Op.getOperand(0);
				2085	SDOperand Elt = Op.getOperand(1);
				2086	SDOperand ShufMask[16];
				2087	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2088
				2089	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2090
				2091	int EltNo = (int) C->getValue();
				2092
				2093	// sanity checks:
				2094	if (VT == MVT::i8 && EltNo >= 16)
				2095	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2096	else if (VT == MVT::i16 && EltNo >= 8)
				2097	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2098	else if (VT == MVT::i32 && EltNo >= 4)
				2099	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2100	else if (VT == MVT::i64 && EltNo >= 2)
				2101	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2102
				2103	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2104	// i32 and i64: Element 0 is the preferred slot
				2105	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2106	}
				2107
				2108	// Need to generate shuffle mask and extract:
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame]	2109	int prefslot_begin = -1, prefslot_end = -1;
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2110	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2111
				2112	switch (VT) {
				2113	case MVT::i8: {
				2114	prefslot_begin = prefslot_end = 3;
				2115	break;
				2116	}
				2117	case MVT::i16: {
				2118	prefslot_begin = 2; prefslot_end = 3;
				2119	break;
				2120	}
				2121	case MVT::i32: {
				2122	prefslot_begin = 0; prefslot_end = 3;
				2123	break;
				2124	}
				2125	case MVT::i64: {
				2126	prefslot_begin = 0; prefslot_end = 7;
				2127	break;
				2128	}
				2129	}
				2130
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame]	2131	assert(prefslot_begin != -1 && prefslot_end != -1 &&
				2132	"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
				2133
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2134	for (int i = 0; i < 16; ++i) {
				2135	// zero fill uppper part of preferred slot, don't care about the
				2136	// other slots:
				2137	unsigned int mask_val;
				2138
				2139	if (i <= prefslot_end) {
				2140	mask_val =
				2141	((i < prefslot_begin)
				2142	? 0x80
				2143	: elt_byte + (i - prefslot_begin));
				2144
Scott Michel	0e5665b	2007-12-19 21:17:42 +0000	[diff] [blame]	2145	ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	2146	} else
				2147	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2148	}
				2149
				2150	SDOperand ShufMaskVec =
				2151	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2152	&ShufMask[0],
				2153	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2154
				2155	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2156	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2157	N, N, ShufMaskVec));
				2158
				2159	}
				2160
				2161	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2162	SDOperand VecOp = Op.getOperand(0);
				2163	SDOperand ValOp = Op.getOperand(1);
				2164	SDOperand IdxOp = Op.getOperand(2);
				2165	MVT::ValueType VT = Op.getValueType();
				2166
				2167	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2168	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2169
				2170	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2171	// Use $2 because it's always 16-byte aligned and it's available:
				2172	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2173
				2174	SDOperand result =
				2175	DAG.getNode(SPUISD::SHUFB, VT,
				2176	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2177	VecOp,
				2178	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2179	DAG.getNode(ISD::ADD, PtrVT,
				2180	PtrBase,
				2181	DAG.getConstant(CN->getValue(),
				2182	PtrVT))));
				2183
				2184	return result;
				2185	}
				2186
				2187	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2188	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2189
				2190	assert(Op.getValueType() == MVT::i8);
				2191	switch (Opc) {
				2192	default:
				2193	assert(0 && "Unhandled i8 math operator");
				2194	/NOTREACHED/
				2195	break;
				2196	case ISD::SUB: {
				2197	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2198	// the result:
				2199	SDOperand N1 = Op.getOperand(1);
				2200	N0 = (N0.getOpcode() != ISD::Constant
				2201	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2202	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2203	N1 = (N1.getOpcode() != ISD::Constant
				2204	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2205	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2206	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2207	DAG.getNode(Opc, MVT::i16, N0, N1));
				2208	}
				2209	case ISD::ROTR:
				2210	case ISD::ROTL: {
				2211	SDOperand N1 = Op.getOperand(1);
				2212	unsigned N1Opc;
				2213	N0 = (N0.getOpcode() != ISD::Constant
				2214	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2215	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2216	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2217	N1 = (N1.getOpcode() != ISD::Constant
				2218	? DAG.getNode(N1Opc, MVT::i16, N1)
				2219	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2220	SDOperand ExpandArg =
				2221	DAG.getNode(ISD::OR, MVT::i16, N0,
				2222	DAG.getNode(ISD::SHL, MVT::i16,
				2223	N0, DAG.getConstant(8, MVT::i16)));
				2224	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2225	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2226	}
				2227	case ISD::SRL:
				2228	case ISD::SHL: {
				2229	SDOperand N1 = Op.getOperand(1);
				2230	unsigned N1Opc;
				2231	N0 = (N0.getOpcode() != ISD::Constant
				2232	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2233	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2234	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2235	N1 = (N1.getOpcode() != ISD::Constant
				2236	? DAG.getNode(N1Opc, MVT::i16, N1)
				2237	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2238	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2239	DAG.getNode(Opc, MVT::i16, N0, N1));
				2240	}
				2241	case ISD::SRA: {
				2242	SDOperand N1 = Op.getOperand(1);
				2243	unsigned N1Opc;
				2244	N0 = (N0.getOpcode() != ISD::Constant
				2245	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2246	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2247	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2248	N1 = (N1.getOpcode() != ISD::Constant
				2249	? DAG.getNode(N1Opc, MVT::i16, N1)
				2250	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2251	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2252	DAG.getNode(Opc, MVT::i16, N0, N1));
				2253	}
				2254	case ISD::MUL: {
				2255	SDOperand N1 = Op.getOperand(1);
				2256	unsigned N1Opc;
				2257	N0 = (N0.getOpcode() != ISD::Constant
				2258	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2259	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2260	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2261	N1 = (N1.getOpcode() != ISD::Constant
				2262	? DAG.getNode(N1Opc, MVT::i16, N1)
				2263	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2264	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2265	DAG.getNode(Opc, MVT::i16, N0, N1));
				2266	break;
				2267	}
				2268	}
				2269
				2270	return SDOperand();
				2271	}
				2272
				2273	//! Lower byte immediate operations for v16i8 vectors:
				2274	static SDOperand
				2275	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2276	SDOperand ConstVec;
				2277	SDOperand Arg;
				2278	MVT::ValueType VT = Op.getValueType();
				2279
				2280	ConstVec = Op.getOperand(0);
				2281	Arg = Op.getOperand(1);
				2282	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2283	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2284	ConstVec = ConstVec.getOperand(0);
				2285	} else {
				2286	ConstVec = Op.getOperand(1);
				2287	Arg = Op.getOperand(0);
				2288	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2289	ConstVec = ConstVec.getOperand(0);
				2290	}
				2291	}
				2292	}
				2293
				2294	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2295	uint64_t VectorBits[2];
				2296	uint64_t UndefBits[2];
				2297	uint64_t SplatBits, SplatUndef;
				2298	int SplatSize;
				2299
				2300	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2301	&& isConstantSplat(VectorBits, UndefBits,
				2302	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2303	SplatBits, SplatUndef, SplatSize)) {
				2304	SDOperand tcVec[16];
				2305	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2306	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2307
				2308	// Turn the BUILD_VECTOR into a set of target constants:
				2309	for (size_t i = 0; i < tcVecSize; ++i)
				2310	tcVec[i] = tc;
				2311
				2312	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2313	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2314	}
				2315	}
				2316
				2317	return SDOperand();
				2318	}
				2319
				2320	//! Lower i32 multiplication
				2321	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2322	unsigned Opc) {
				2323	switch (VT) {
				2324	default:
				2325	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2326	<< MVT::getValueTypeString(Op.getValueType())
				2327	<< "\n";
				2328	abort();
				2329	/NOTREACHED/
				2330
				2331	case MVT::i32: {
				2332	SDOperand rA = Op.getOperand(0);
				2333	SDOperand rB = Op.getOperand(1);
				2334
				2335	return DAG.getNode(ISD::ADD, MVT::i32,
				2336	DAG.getNode(ISD::ADD, MVT::i32,
				2337	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2338	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2339	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2340	}
				2341	}
				2342
				2343	return SDOperand();
				2344	}
				2345
				2346	//! Custom lowering for CTPOP (count population)
				2347	/*!
				2348	Custom lowering code that counts the number ones in the input
				2349	operand. SPU has such an instruction, but it counts the number of
				2350	ones per byte, which then have to be accumulated.
				2351	*/
				2352	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2353	unsigned VT = Op.getValueType();
				2354	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2355
				2356	switch (VT) {
				2357	case MVT::i8: {
				2358	SDOperand N = Op.getOperand(0);
				2359	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2360
				2361	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2362	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2363
				2364	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2365	}
				2366
				2367	case MVT::i16: {
				2368	MachineFunction &MF = DAG.getMachineFunction();
				2369	SSARegMap *RegMap = MF.getSSARegMap();
				2370
				2371	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2372
				2373	SDOperand N = Op.getOperand(0);
				2374	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2375	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2376	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2377
				2378	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2379	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2380
				2381	// CNTB_result becomes the chain to which all of the virtual registers
				2382	// CNTB_reg, SUM1_reg become associated:
				2383	SDOperand CNTB_result =
				2384	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2385
				2386	SDOperand CNTB_rescopy =
				2387	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2388
				2389	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2390
				2391	return DAG.getNode(ISD::AND, MVT::i16,
				2392	DAG.getNode(ISD::ADD, MVT::i16,
				2393	DAG.getNode(ISD::SRL, MVT::i16,
				2394	Tmp1, Shift1),
				2395	Tmp1),
				2396	Mask0);
				2397	}
				2398
				2399	case MVT::i32: {
				2400	MachineFunction &MF = DAG.getMachineFunction();
				2401	SSARegMap *RegMap = MF.getSSARegMap();
				2402
				2403	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2404	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2405
				2406	SDOperand N = Op.getOperand(0);
				2407	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2408	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2409	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2410	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2411
				2412	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2413	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2414
				2415	// CNTB_result becomes the chain to which all of the virtual registers
				2416	// CNTB_reg, SUM1_reg become associated:
				2417	SDOperand CNTB_result =
				2418	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2419
				2420	SDOperand CNTB_rescopy =
				2421	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2422
				2423	SDOperand Comp1 =
				2424	DAG.getNode(ISD::SRL, MVT::i32,
				2425	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2426
				2427	SDOperand Sum1 =
				2428	DAG.getNode(ISD::ADD, MVT::i32,
				2429	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2430
				2431	SDOperand Sum1_rescopy =
				2432	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2433
				2434	SDOperand Comp2 =
				2435	DAG.getNode(ISD::SRL, MVT::i32,
				2436	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2437	Shift2);
				2438	SDOperand Sum2 =
				2439	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2440	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2441
				2442	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2443	}
				2444
				2445	case MVT::i64:
				2446	break;
				2447	}
				2448
				2449	return SDOperand();
				2450	}
				2451
				2452	/// LowerOperation - Provide custom lowering hooks for some operations.
				2453	///
				2454	SDOperand
				2455	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2456	{
				2457	switch (Op.getOpcode()) {
				2458	default: {
				2459	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2460	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2461	cerr << "*Op.Val:\n";
				2462	Op.Val->dump();
				2463	abort();
				2464	}
				2465	case ISD::LOAD:
				2466	case ISD::SEXTLOAD:
				2467	case ISD::ZEXTLOAD:
				2468	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2469	case ISD::STORE:
				2470	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2471	case ISD::ConstantPool:
				2472	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2473	case ISD::GlobalAddress:
				2474	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2475	case ISD::JumpTable:
				2476	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2477	case ISD::Constant:
				2478	return LowerConstant(Op, DAG);
				2479	case ISD::ConstantFP:
				2480	return LowerConstantFP(Op, DAG);
				2481	case ISD::FORMAL_ARGUMENTS:
				2482	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2483	case ISD::CALL:
				2484	return LowerCALL(Op, DAG);
				2485	case ISD::RET:
				2486	return LowerRET(Op, DAG, getTargetMachine());
				2487
				2488	// i8 math ops:
				2489	case ISD::SUB:
				2490	case ISD::ROTR:
				2491	case ISD::ROTL:
				2492	case ISD::SRL:
				2493	case ISD::SHL:
				2494	case ISD::SRA:
				2495	return LowerI8Math(Op, DAG, Op.getOpcode());
				2496
				2497	// Vector-related lowering.
				2498	case ISD::BUILD_VECTOR:
				2499	return LowerBUILD_VECTOR(Op, DAG);
				2500	case ISD::SCALAR_TO_VECTOR:
				2501	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2502	case ISD::VECTOR_SHUFFLE:
				2503	return LowerVECTOR_SHUFFLE(Op, DAG);
				2504	case ISD::EXTRACT_VECTOR_ELT:
				2505	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2506	case ISD::INSERT_VECTOR_ELT:
				2507	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2508
				2509	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2510	case ISD::AND:
				2511	case ISD::OR:
				2512	case ISD::XOR:
				2513	return LowerByteImmed(Op, DAG);
				2514
				2515	// Vector and i8 multiply:
				2516	case ISD::MUL:
				2517	if (MVT::isVector(Op.getValueType()))
				2518	return LowerVectorMUL(Op, DAG);
				2519	else if (Op.getValueType() == MVT::i8)
				2520	return LowerI8Math(Op, DAG, Op.getOpcode());
				2521	else
				2522	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2523
				2524	case ISD::FDIV:
				2525	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2526	return LowerFDIVf32(Op, DAG);
				2527	// else if (Op.getValueType() == MVT::f64)
				2528	// return LowerFDIVf64(Op, DAG);
				2529	else
				2530	assert(0 && "Calling FDIV on unsupported MVT");
				2531
				2532	case ISD::CTPOP:
				2533	return LowerCTPOP(Op, DAG);
				2534	}
				2535
				2536	return SDOperand();
				2537	}
				2538
				2539	//===----------------------------------------------------------------------===//
				2540	// Other Lowering Code
				2541	//===----------------------------------------------------------------------===//
				2542
				2543	MachineBasicBlock *
				2544	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2545	MachineBasicBlock *BB)
				2546	{
				2547	return BB;
				2548	}
				2549
				2550	//===----------------------------------------------------------------------===//
				2551	// Target Optimization Hooks
				2552	//===----------------------------------------------------------------------===//
				2553
				2554	SDOperand
				2555	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2556	{
				2557	#if 0
				2558	TargetMachine &TM = getTargetMachine();
				2559	SelectionDAG &DAG = DCI.DAG;
				2560	#endif
				2561	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2562
				2563	switch (N->getOpcode()) {
				2564	default: break;
				2565
				2566	// Look for obvious optimizations for shift left:
				2567	// a) Replace 0 << V with 0
				2568	// b) Replace V << 0 with V
				2569	//
				2570	// N.B: llvm will generate an undef node if the shift amount is greater than
				2571	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2572	case SPU::SHLIr32:
				2573	case SPU::SHLHIr16:
				2574	case SPU::SHLQBIIvec:
				2575	case SPU::ROTHIr16:
				2576	case SPU::ROTHIr16_i32:
				2577	case SPU::ROTIr32:
				2578	case SPU::ROTIr32_i16:
				2579	case SPU::ROTQBYIvec:
				2580	case SPU::ROTQBYBIvec:
				2581	case SPU::ROTQBIIvec:
				2582	case SPU::ROTHMIr16:
				2583	case SPU::ROTMIr32:
				2584	case SPU::ROTQMBYIvec: {
				2585	if (N0.getOpcode() == ISD::Constant) {
				2586	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2587	if (C->getValue() == 0) // 0 << V -> 0.
				2588	return N0;
				2589	}
				2590	}
				2591	SDOperand N1 = N->getOperand(1);
				2592	if (N1.getOpcode() == ISD::Constant) {
				2593	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2594	if (C->getValue() == 0) // V << 0 -> V
				2595	return N1;
				2596	}
				2597	}
				2598	break;
				2599	}
				2600	}
				2601
				2602	return SDOperand();
				2603	}
				2604
				2605	//===----------------------------------------------------------------------===//
				2606	// Inline Assembly Support
				2607	//===----------------------------------------------------------------------===//
				2608
				2609	/// getConstraintType - Given a constraint letter, return the type of
				2610	/// constraint it is for this target.
				2611	SPUTargetLowering::ConstraintType
				2612	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2613	if (ConstraintLetter.size() == 1) {
				2614	switch (ConstraintLetter[0]) {
				2615	default: break;
				2616	case 'b':
				2617	case 'r':
				2618	case 'f':
				2619	case 'v':
				2620	case 'y':
				2621	return C_RegisterClass;
				2622	}
				2623	}
				2624	return TargetLowering::getConstraintType(ConstraintLetter);
				2625	}
				2626
				2627	std::pair<unsigned, const TargetRegisterClass*>
				2628	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2629	MVT::ValueType VT) const
				2630	{
				2631	if (Constraint.size() == 1) {
				2632	// GCC RS6000 Constraint Letters
				2633	switch (Constraint[0]) {
				2634	case 'b': // R1-R31
				2635	case 'r': // R0-R31
				2636	if (VT == MVT::i64)
				2637	return std::make_pair(0U, SPU::R64CRegisterClass);
				2638	return std::make_pair(0U, SPU::R32CRegisterClass);
				2639	case 'f':
				2640	if (VT == MVT::f32)
				2641	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2642	else if (VT == MVT::f64)
				2643	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2644	break;
				2645	case 'v':
				2646	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2647	}
				2648	}
				2649
				2650	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2651	}
				2652
				2653	void
				2654	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2655	uint64_t Mask,
				2656	uint64_t &KnownZero,
				2657	uint64_t &KnownOne,
				2658	const SelectionDAG &DAG,
				2659	unsigned Depth ) const {
				2660	KnownZero = 0;
				2661	KnownOne = 0;
				2662	}
				2663
				2664	// LowerAsmOperandForConstraint
				2665	void
				2666	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2667	char ConstraintLetter,
				2668	std::vector<SDOperand> &Ops,
				2669	SelectionDAG &DAG) {
				2670	// Default, for the time being, to the base class handler
				2671	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2672	}
				2673
				2674	/// isLegalAddressImmediate - Return true if the integer value can be used
				2675	/// as the offset of the target addressing mode.
				2676	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2677	// SPU's addresses are 256K:
				2678	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2679	}
				2680
				2681	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2682	return false;
				2683	}