Blame - lib/Target/CellSPU/SPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 253fafb8e95efdaf7ca1d42989132eea6b4009dd [file] [log] [blame]

Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	1	//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by a team from the Computer Systems Research
Scott Michel	2466c37	2007-12-05 01:40:25 +0000	[diff] [blame]	6	// Department at The Aerospace Corporation and is distributed under the
				7	// University of Illinois Open Source License. See LICENSE.TXT for details.
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	8	//
				9	//===----------------------------------------------------------------------===//
				10	//
				11	// This file implements the SPUTargetLowering class.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SPURegisterNames.h"
				16	#include "SPUISelLowering.h"
				17	#include "SPUTargetMachine.h"
				18	#include "llvm/ADT/VectorExtras.h"
				19	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				20	#include "llvm/CodeGen/CallingConvLower.h"
				21	#include "llvm/CodeGen/MachineFrameInfo.h"
				22	#include "llvm/CodeGen/MachineFunction.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/SelectionDAG.h"
				25	#include "llvm/CodeGen/SSARegMap.h"
				26	#include "llvm/Constants.h"
				27	#include "llvm/Function.h"
				28	#include "llvm/Intrinsics.h"
				29	#include "llvm/Support/Debug.h"
				30	#include "llvm/Support/MathExtras.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	#include <map>
				34
				35	using namespace llvm;
				36
				37	// Used in getTargetNodeName() below
				38	namespace {
				39	std::map<unsigned, const char *> node_names;
				40
				41	//! MVT::ValueType mapping to useful data for Cell SPU
				42	struct valtype_map_s {
				43	const MVT::ValueType valtype;
				44	const int prefslot_byte;
				45	};
				46
				47	const valtype_map_s valtype_map[] = {
				48	{ MVT::i1, 3 },
				49	{ MVT::i8, 3 },
				50	{ MVT::i16, 2 },
				51	{ MVT::i32, 0 },
				52	{ MVT::f32, 0 },
				53	{ MVT::i64, 0 },
				54	{ MVT::f64, 0 },
				55	{ MVT::i128, 0 }
				56	};
				57
				58	const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
				59
				60	const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
				61	const valtype_map_s *retval = 0;
				62
				63	for (size_t i = 0; i < n_valtype_map; ++i) {
				64	if (valtype_map[i].valtype == VT) {
				65	retval = valtype_map + i;
				66	break;
				67	}
				68	}
				69
				70	#ifndef NDEBUG
				71	if (retval == 0) {
				72	cerr << "getValueTypeMapEntry returns NULL for "
				73	<< MVT::getValueTypeString(VT)
				74	<< "\n";
				75	abort();
				76	}
				77	#endif
				78
				79	return retval;
				80	}
				81
				82	//! Predicate that returns true if operand is a memory target
				83	/*!
				84	\arg Op Operand to test
				85	\return true if the operand is a memory target (i.e., global
				86	address, external symbol, constant pool) or an existing D-Form
				87	address.
				88	*/
				89	bool isMemoryOperand(const SDOperand &Op)
				90	{
				91	const unsigned Opc = Op.getOpcode();
				92	return (Opc == ISD::GlobalAddress
				93	\|\| Opc == ISD::GlobalTLSAddress
				94	\|\| Opc == ISD::FrameIndex
				95	\|\| Opc == ISD::JumpTable
				96	\|\| Opc == ISD::ConstantPool
				97	\|\| Opc == ISD::ExternalSymbol
				98	\|\| Opc == ISD::TargetGlobalAddress
				99	\|\| Opc == ISD::TargetGlobalTLSAddress
				100	\|\| Opc == ISD::TargetFrameIndex
				101	\|\| Opc == ISD::TargetJumpTable
				102	\|\| Opc == ISD::TargetConstantPool
				103	\|\| Opc == ISD::TargetExternalSymbol
				104	\|\| Opc == SPUISD::DFormAddr);
				105	}
				106	}
				107
				108	SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
				109	: TargetLowering(TM),
				110	SPUTM(TM)
				111	{
				112	// Fold away setcc operations if possible.
				113	setPow2DivIsCheap();
				114
				115	// Use _setjmp/_longjmp instead of setjmp/longjmp.
				116	setUseUnderscoreSetJmp(true);
				117	setUseUnderscoreLongJmp(true);
				118
				119	// Set up the SPU's register classes:
				120	// NOTE: i8 register class is not registered because we cannot determine when
				121	// we need to zero or sign extend for custom-lowered loads and stores.
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	122	// NOTE: Ignore the previous note. For now. :-)
				123	addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
				124	addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
				125	addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
				126	addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
				127	addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
				128	addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	129	addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
				130
				131	// SPU has no sign or zero extended loads for i1, i8, i16:
				132	setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
				133	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
				134	setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				135	setStoreXAction(MVT::i1, Custom);
				136
				137	setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
				138	setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
				139	setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				140	setStoreXAction(MVT::i8, Custom);
				141
				142	setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
				143	setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
				144	setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
				145
				146	// SPU constant load actions are custom lowered:
				147	setOperationAction(ISD::Constant, MVT::i64, Custom);
				148	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
				149	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
				150
				151	// SPU's loads and stores have to be custom lowered:
				152	for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
				153	++sctype) {
				154	setOperationAction(ISD::LOAD, sctype, Custom);
				155	setOperationAction(ISD::STORE, sctype, Custom);
				156	}
				157
				158	// SPU supports BRCOND, although DAGCombine will convert BRCONDs
				159	// into BR_CCs. BR_CC instructions are custom selected in
				160	// SPUDAGToDAGISel.
				161	setOperationAction(ISD::BRCOND, MVT::Other, Legal);
				162
				163	// Expand the jumptable branches
				164	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				165	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
				166	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
				167
				168	// SPU has no intrinsics for these particular operations:
				169	setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
				170	setOperationAction(ISD::MEMSET, MVT::Other, Expand);
				171	setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
				172
				173	// PowerPC has no SREM/UREM instructions
				174	setOperationAction(ISD::SREM, MVT::i32, Expand);
				175	setOperationAction(ISD::UREM, MVT::i32, Expand);
				176	setOperationAction(ISD::SREM, MVT::i64, Expand);
				177	setOperationAction(ISD::UREM, MVT::i64, Expand);
				178
				179	// We don't support sin/cos/sqrt/fmod
				180	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				181	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				182	setOperationAction(ISD::FREM , MVT::f64, Expand);
				183	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				184	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				185	setOperationAction(ISD::FREM , MVT::f32, Expand);
				186
				187	// If we're enabling GP optimizations, use hardware square root
				188	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
				189	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
				190
				191	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				192	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
				193
				194	// SPU can do rotate right and left, so legalize it... but customize for i8
				195	// because instructions don't exist.
				196	setOperationAction(ISD::ROTR, MVT::i32, Legal);
				197	setOperationAction(ISD::ROTR, MVT::i16, Legal);
				198	setOperationAction(ISD::ROTR, MVT::i8, Custom);
				199	setOperationAction(ISD::ROTL, MVT::i32, Legal);
				200	setOperationAction(ISD::ROTL, MVT::i16, Legal);
				201	setOperationAction(ISD::ROTL, MVT::i8, Custom);
				202	// SPU has no native version of shift left/right for i8
				203	setOperationAction(ISD::SHL, MVT::i8, Custom);
				204	setOperationAction(ISD::SRL, MVT::i8, Custom);
				205	setOperationAction(ISD::SRA, MVT::i8, Custom);
				206
				207	// Custom lower i32 multiplications
				208	setOperationAction(ISD::MUL, MVT::i32, Custom);
				209
				210	// Need to custom handle (some) common i8 math ops
				211	setOperationAction(ISD::SUB, MVT::i8, Custom);
				212	setOperationAction(ISD::MUL, MVT::i8, Custom);
				213
				214	// SPU does not have BSWAP. It does have i32 support CTLZ.
				215	// CTPOP has to be custom lowered.
				216	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
				217	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
				218
				219	setOperationAction(ISD::CTPOP, MVT::i8, Custom);
				220	setOperationAction(ISD::CTPOP, MVT::i16, Custom);
				221	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
				222	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
				223
				224	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
				225	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
				226
				227	setOperationAction(ISD::CTLZ , MVT::i32, Legal);
				228
				229	// SPU does not have select or setcc
				230	setOperationAction(ISD::SELECT, MVT::i1, Expand);
				231	setOperationAction(ISD::SELECT, MVT::i8, Expand);
				232	setOperationAction(ISD::SELECT, MVT::i16, Expand);
				233	setOperationAction(ISD::SELECT, MVT::i32, Expand);
				234	setOperationAction(ISD::SELECT, MVT::i64, Expand);
				235	setOperationAction(ISD::SELECT, MVT::f32, Expand);
				236	setOperationAction(ISD::SELECT, MVT::f64, Expand);
				237
				238	setOperationAction(ISD::SETCC, MVT::i1, Expand);
				239	setOperationAction(ISD::SETCC, MVT::i8, Expand);
				240	setOperationAction(ISD::SETCC, MVT::i16, Expand);
				241	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				242	setOperationAction(ISD::SETCC, MVT::i64, Expand);
				243	setOperationAction(ISD::SETCC, MVT::f32, Expand);
				244	setOperationAction(ISD::SETCC, MVT::f64, Expand);
				245
				246	// SPU has a legal FP -> signed INT instruction
				247	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
				248	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				249	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
				250	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
				251
				252	// FDIV on SPU requires custom lowering
				253	setOperationAction(ISD::FDIV, MVT::f32, Custom);
				254	//setOperationAction(ISD::FDIV, MVT::f64, Custom);
				255
				256	// SPU has [U\|S]INT_TO_FP
				257	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
				258	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
				259	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
				260	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
				261	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
				262	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
				263	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				264	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
				265
				266	setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
				267	setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
				268	setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
				269	setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
				270
				271	// We cannot sextinreg(i1). Expand to shifts.
				272	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				273
				274	// Support label based line numbers.
				275	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				276	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				277
				278	// We want to legalize GlobalAddress and ConstantPool nodes into the
				279	// appropriate instructions to materialize the address.
				280	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
				281	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
				282	setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
				283	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
				284	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
				285	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
				286	setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
				287	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
				288
				289	// RET must be custom lowered, to meet ABI requirements
				290	setOperationAction(ISD::RET, MVT::Other, Custom);
				291
				292	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				293	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				294
				295	// Use the default implementation.
				296	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				297	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				298	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				299	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
				300	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
				301	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
				302	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
				303
				304	// Cell SPU has instructions for converting between i64 and fp.
				305	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
				306	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
				307
				308	// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
				309	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
				310
				311	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
				312	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
				313
				314	// First set operation action for all vector types to expand. Then we
				315	// will selectively turn on ones that can be effectively codegen'd.
				316	addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
				317	addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
				318	addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
				319	addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
				320	addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
				321	addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
				322
				323	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				324	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				325	// add/sub are legal for all supported vector VT's.
				326	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
				327	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
				328	// mul has to be custom lowered.
				329	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
				330
				331	setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
				332	setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
				333	setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
				334	setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
				335	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
				336	setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
				337
				338	// These operations need to be expanded:
				339	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				340	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				341	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				342	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				343	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
				344
				345	// Custom lower build_vector, constant pool spills, insert and
				346	// extract vector elements:
				347	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				348	setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
				349	setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
				350	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				351	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				352	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				353	}
				354
				355	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
				356	setOperationAction(ISD::AND, MVT::v16i8, Custom);
				357	setOperationAction(ISD::OR, MVT::v16i8, Custom);
				358	setOperationAction(ISD::XOR, MVT::v16i8, Custom);
				359	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
				360
				361	setSetCCResultType(MVT::i32);
				362	setShiftAmountType(MVT::i32);
				363	setSetCCResultContents(ZeroOrOneSetCCResult);
				364
				365	setStackPointerRegisterToSaveRestore(SPU::R1);
				366
				367	// We have target-specific dag combine patterns for the following nodes:
				368	// e.g., setTargetDAGCombine(ISD::SUB);
				369
				370	computeRegisterProperties();
				371	}
				372
				373	const char *
				374	SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
				375	{
				376	if (node_names.empty()) {
				377	node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
				378	node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
				379	node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
				380	node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
				381	node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
				382	node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
				383	node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
				384	node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
				385	node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
				386	node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
				387	node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
				388	node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
				389	node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
				390	node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
				391	node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
				392	node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
				393	node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
				394	node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
				395	node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
				396	node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
				397	node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
				398	node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
				399	node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
				400	node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
				401	node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
				402	node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
				403	node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
				404	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
				405	"SPUISD::ROTBYTES_RIGHT_Z";
				406	node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
				407	"SPUISD::ROTBYTES_RIGHT_S";
				408	node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
				409	node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
				410	"SPUISD::ROTBYTES_LEFT_CHAINED";
				411	node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
				412	node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
				413	node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
				414	node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
				415	node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
				416	node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
				417	}
				418
				419	std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
				420
				421	return ((i != node_names.end()) ? i->second : 0);
				422	}
				423
				424	//===----------------------------------------------------------------------===//
				425	// Calling convention code:
				426	//===----------------------------------------------------------------------===//
				427
				428	#include "SPUGenCallingConv.inc"
				429
				430	//===----------------------------------------------------------------------===//
				431	// LowerOperation implementation
				432	//===----------------------------------------------------------------------===//
				433
				434	/// Custom lower loads for CellSPU
				435	/*!
				436	All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
				437	within a 16-byte block, we have to rotate to extract the requested element.
				438	*/
				439	static SDOperand
				440	LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				441	LoadSDNode *LN = cast<LoadSDNode>(Op);
				442	SDOperand basep = LN->getBasePtr();
				443	SDOperand the_chain = LN->getChain();
				444	MVT::ValueType VT = LN->getLoadedVT();
				445	MVT::ValueType OpVT = Op.Val->getValueType(0);
				446	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				447	ISD::LoadExtType ExtType = LN->getExtensionType();
				448	unsigned alignment = LN->getAlignment();
				449	const valtype_map_s *vtm = getValueTypeMapEntry(VT);
				450	SDOperand Ops[8];
				451
				452	// For an extending load of an i1 variable, just call it i8 (or whatever we
				453	// were passed) and make it zero-extended:
				454	if (VT == MVT::i1) {
				455	VT = OpVT;
				456	ExtType = ISD::ZEXTLOAD;
				457	}
				458
				459	switch (LN->getAddressingMode()) {
				460	case ISD::UNINDEXED: {
				461	SDOperand result;
				462	SDOperand rot_op, rotamt;
				463	SDOperand ptrp;
				464	int c_offset;
				465	int c_rotamt;
				466
				467	// The vector type we really want to be when we load the 16-byte chunk
				468	MVT::ValueType vecVT, opVecVT;
				469
				470	if (VT != MVT::i1)
				471	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				472	else
				473	vecVT = MVT::v16i8;
				474
				475	opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
				476
				477	if (basep.getOpcode() == ISD::ADD) {
				478	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				479
				480	assert(CN != NULL
				481	&& "LowerLOAD: ISD::ADD operand 1 is not constant");
				482
				483	c_offset = (int) CN->getValue();
				484	c_rotamt = (int) (c_offset & 0xf);
				485
				486	// Adjust the rotation amount to ensure that the final result ends up in
				487	// the preferred slot:
				488	c_rotamt -= vtm->prefslot_byte;
				489	ptrp = basep.getOperand(0);
				490	} else {
				491	c_offset = 0;
				492	c_rotamt = -vtm->prefslot_byte;
				493	ptrp = basep;
				494	}
				495
				496	if (alignment == 16) {
				497	// 16-byte aligned load into preferred slot, no rotation
				498	if (c_rotamt == 0) {
				499	if (isMemoryOperand(ptrp))
				500	// Return unchanged
				501	return SDOperand();
				502	else {
				503	// Return modified D-Form address for pointer:
				504	ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				505	ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
				506	if (VT == OpVT)
				507	return DAG.getLoad(VT, LN->getChain(), ptrp,
				508	LN->getSrcValue(), LN->getSrcValueOffset(),
				509	LN->isVolatile(), 16);
				510	else
				511	return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
				512	LN->getSrcValueOffset(), OpVT,
				513	LN->isVolatile(), 16);
				514	}
				515	} else {
				516	// Need to rotate...
				517	if (c_rotamt < 0)
				518	c_rotamt += 16;
				519	// Realign the base pointer, with a D-Form address
				520	if ((c_offset & ~0xf) != 0 \|\| !isMemoryOperand(ptrp))
				521	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
				522	ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
				523	else
				524	basep = ptrp;
				525
				526	// Rotate the load:
				527	rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
				528	LN->getSrcValue(), LN->getSrcValueOffset(),
				529	LN->isVolatile(), 16);
				530	the_chain = rot_op.getValue(1);
				531	rotamt = DAG.getConstant(c_rotamt, MVT::i16);
				532
				533	SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
				534	Ops[0] = the_chain;
				535	Ops[1] = rot_op;
				536	Ops[2] = rotamt;
				537
				538	result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
				539	the_chain = result.getValue(1);
				540
				541	if (VT == OpVT \|\| ExtType == ISD::EXTLOAD) {
				542	SDVTList scalarvts;
				543	Ops[0] = the_chain;
				544	Ops[1] = result;
				545	if (OpVT == VT) {
				546	scalarvts = DAG.getVTList(VT, MVT::Other);
				547	} else {
				548	scalarvts = DAG.getVTList(OpVT, MVT::Other);
				549	}
				550
				551	result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
				552	result);
				553	Ops[0] = the_chain;
				554	Ops[1] = result;
				555	result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
				556	the_chain = result.getValue(1);
				557	} else {
				558	// Handle the sign and zero-extending loads for i1 and i8:
				559	unsigned NewOpC;
				560
				561	if (ExtType == ISD::SEXTLOAD) {
				562	NewOpC = (OpVT == MVT::i1
				563	? SPUISD::EXTRACT_I1_SEXT
				564	: SPUISD::EXTRACT_I8_SEXT);
				565	} else if (ExtType == ISD::ZEXTLOAD) {
				566	NewOpC = (OpVT == MVT::i1
				567	? SPUISD::EXTRACT_I1_ZEXT
				568	: SPUISD::EXTRACT_I8_ZEXT);
				569	}
				570
				571	result = DAG.getNode(NewOpC, OpVT, result);
				572	}
				573
				574	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				575	SDOperand retops[2] = { result, the_chain };
				576
				577	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				578	return result;
				579	/UNREACHED/
				580	}
				581	} else {
				582	// Misaligned 16-byte load:
				583	if (basep.getOpcode() == ISD::LOAD) {
				584	LN = cast<LoadSDNode>(basep);
				585	if (LN->getAlignment() == 16) {
				586	// We can verify that we're really loading from a 16-byte aligned
				587	// chunk. Encapsulate basep as a D-Form address and return a new
				588	// load:
				589	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
				590	DAG.getConstant(0, PtrVT));
				591	if (OpVT == VT)
				592	return DAG.getLoad(VT, LN->getChain(), basep,
				593	LN->getSrcValue(), LN->getSrcValueOffset(),
				594	LN->isVolatile(), 16);
				595	else
				596	return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
				597	LN->getSrcValue(), LN->getSrcValueOffset(),
				598	OpVT, LN->isVolatile(), 16);
				599	}
				600	}
				601
				602	// Catch all other cases where we can't guarantee that we have a
				603	// 16-byte aligned entity, which means resorting to an X-form
				604	// address scheme:
				605
				606	SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
				607	SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
				608	SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
				609
				610	ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
				611
				612	SDOperand alignLoad =
				613	DAG.getLoad(opVecVT, LN->getChain(), ptrp,
				614	LN->getSrcValue(), LN->getSrcValueOffset(),
				615	LN->isVolatile(), 16);
				616
				617	SDOperand insertEltOp =
				618	DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
				619
				620	result = DAG.getNode(SPUISD::SHUFB, opVecVT,
				621	alignLoad,
				622	alignLoad,
				623	DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
				624
				625	result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
				626
				627	SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
				628	SDOperand retops[2] = { result, the_chain };
				629
				630	result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
				631	return result;
				632	}
				633	break;
				634	}
				635	case ISD::PRE_INC:
				636	case ISD::PRE_DEC:
				637	case ISD::POST_INC:
				638	case ISD::POST_DEC:
				639	case ISD::LAST_INDEXED_MODE:
				640	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				641	"UNINDEXED\n";
				642	cerr << (unsigned) LN->getAddressingMode() << "\n";
				643	abort();
				644	/NOTREACHED/
				645	}
				646
				647	return SDOperand();
				648	}
				649
				650	/// Custom lower stores for CellSPU
				651	/*!
				652	All CellSPU stores are aligned to 16-byte boundaries, so for elements
				653	within a 16-byte block, we have to generate a shuffle to insert the
				654	requested element into its place, then store the resulting block.
				655	*/
				656	static SDOperand
				657	LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				658	StoreSDNode *SN = cast<StoreSDNode>(Op);
				659	SDOperand Value = SN->getValue();
				660	MVT::ValueType VT = Value.getValueType();
				661	MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
				662	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				663	SDOperand the_chain = SN->getChain();
Chris Lattner	4d321c5	2007-12-05 18:32:18 +0000	[diff] [blame]	664	//unsigned alignment = SN->getAlignment();
				665	//const valtype_map_s *vtm = getValueTypeMapEntry(VT);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	666
				667	switch (SN->getAddressingMode()) {
				668	case ISD::UNINDEXED: {
				669	SDOperand basep = SN->getBasePtr();
				670	SDOperand ptrOp;
				671	int offset;
				672
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame^]	673	if (basep.getOpcode() == ISD::FrameIndex) {
				674	// FrameIndex nodes are always properly aligned. Really.
				675	return SDOperand();
				676	}
				677
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	678	if (basep.getOpcode() == ISD::ADD) {
				679	const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
				680	assert(CN != NULL
				681	&& "LowerSTORE: ISD::ADD operand 1 is not constant");
				682	offset = unsigned(CN->getValue());
				683	ptrOp = basep.getOperand(0);
				684	DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
				685	<< offset
				686	<< "\n");
				687	} else {
				688	ptrOp = basep;
				689	offset = 0;
				690	}
				691
				692	// The vector type we really want to load from the 16-byte chunk, except
				693	// in the case of MVT::i1, which has to be v16i8.
				694	unsigned vecVT, stVecVT;
				695
				696	if (StVT != MVT::i1)
				697	stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
				698	else
				699	stVecVT = MVT::v16i8;
				700	vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				701
Scott Michel	9999e68	2007-12-19 07:35:06 +0000	[diff] [blame^]	702	// Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
				703	// the actual dform addr offs($reg).
				704	basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
				705	DAG.getConstant((offset & ~0xf), PtrVT));
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	706
				707	// Create the 16-byte aligned vector load
				708	SDOperand alignLoad =
				709	DAG.getLoad(vecVT, the_chain, basep,
				710	SN->getSrcValue(), SN->getSrcValueOffset(),
				711	SN->isVolatile(), 16);
				712	the_chain = alignLoad.getValue(1);
				713
				714	LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
				715	SDOperand theValue = SN->getValue();
				716	SDOperand result;
				717
				718	if (StVT != VT
				719	&& (theValue.getOpcode() == ISD::AssertZext
				720	\|\| theValue.getOpcode() == ISD::AssertSext)) {
				721	// Drill down and get the value for zero- and sign-extended
				722	// quantities
				723	theValue = theValue.getOperand(0);
				724	}
				725
				726	SDOperand insertEltOp =
				727	DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
				728	DAG.getNode(SPUISD::DFormAddr, PtrVT,
				729	ptrOp,
				730	DAG.getConstant((offset & 0xf), PtrVT)));
				731
				732	result = DAG.getNode(SPUISD::SHUFB, vecVT,
				733	DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
				734	alignLoad,
				735	DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
				736
				737	result = DAG.getStore(the_chain, result, basep,
				738	LN->getSrcValue(), LN->getSrcValueOffset(),
				739	LN->isVolatile(), LN->getAlignment());
				740
				741	return result;
				742	/UNREACHED/
				743	}
				744	case ISD::PRE_INC:
				745	case ISD::PRE_DEC:
				746	case ISD::POST_INC:
				747	case ISD::POST_DEC:
				748	case ISD::LAST_INDEXED_MODE:
				749	cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
				750	"UNINDEXED\n";
				751	cerr << (unsigned) SN->getAddressingMode() << "\n";
				752	abort();
				753	/NOTREACHED/
				754	}
				755
				756	return SDOperand();
				757	}
				758
				759	/// Generate the address of a constant pool entry.
				760	static SDOperand
				761	LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				762	MVT::ValueType PtrVT = Op.getValueType();
				763	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				764	Constant *C = CP->getConstVal();
				765	SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
				766	const TargetMachine &TM = DAG.getTarget();
				767	SDOperand Zero = DAG.getConstant(0, PtrVT);
				768
				769	if (TM.getRelocationModel() == Reloc::Static) {
				770	if (!ST->usingLargeMem()) {
				771	// Just return the SDOperand with the constant pool address in it.
				772	return CPI;
				773	} else {
				774	// Generate hi/lo address pair
				775	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
				776	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
				777
				778	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				779	}
				780	}
				781
				782	assert(0 &&
				783	"LowerConstantPool: Relocation model other than static not supported.");
				784	return SDOperand();
				785	}
				786
				787	static SDOperand
				788	LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				789	MVT::ValueType PtrVT = Op.getValueType();
				790	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				791	SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				792	SDOperand Zero = DAG.getConstant(0, PtrVT);
				793	const TargetMachine &TM = DAG.getTarget();
				794
				795	if (TM.getRelocationModel() == Reloc::Static) {
				796	if (!ST->usingLargeMem()) {
				797	// Just return the SDOperand with the jump table address in it.
				798	return JTI;
				799	} else {
				800	// Generate hi/lo address pair
				801	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
				802	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
				803
				804	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				805	}
				806	}
				807
				808	assert(0 &&
				809	"LowerJumpTable: Relocation model other than static not supported.");
				810	return SDOperand();
				811	}
				812
				813	static SDOperand
				814	LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
				815	MVT::ValueType PtrVT = Op.getValueType();
				816	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
				817	GlobalValue *GV = GSDN->getGlobal();
				818	SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
				819	SDOperand Zero = DAG.getConstant(0, PtrVT);
				820	const TargetMachine &TM = DAG.getTarget();
				821
				822	if (TM.getRelocationModel() == Reloc::Static) {
				823	if (!ST->usingLargeMem()) {
				824	// Generate a local store address
				825	return GA;
				826	} else {
				827	// Generate hi/lo address pair
				828	SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
				829	SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
				830
				831	return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
				832	}
				833	} else {
				834	cerr << "LowerGlobalAddress: Relocation model other than static not "
				835	<< "supported.\n";
				836	abort();
				837	/NOTREACHED/
				838	}
				839
				840	return SDOperand();
				841	}
				842
				843	//! Custom lower i64 integer constants
				844	/*!
				845	This code inserts all of the necessary juggling that needs to occur to load
				846	a 64-bit constant into a register.
				847	*/
				848	static SDOperand
				849	LowerConstant(SDOperand Op, SelectionDAG &DAG) {
				850	unsigned VT = Op.getValueType();
				851	ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
				852
				853	if (VT == MVT::i64) {
				854	SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
				855	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				856	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				857
				858	} else {
				859	cerr << "LowerConstant: unhandled constant type "
				860	<< MVT::getValueTypeString(VT)
				861	<< "\n";
				862	abort();
				863	/NOTREACHED/
				864	}
				865
				866	return SDOperand();
				867	}
				868
				869	//! Custom lower single precision floating point constants
				870	/*!
				871	"float" immediates can be lowered as if they were unsigned 32-bit integers.
				872	The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
				873	target description.
				874	*/
				875	static SDOperand
				876	LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
				877	unsigned VT = Op.getValueType();
				878	ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
				879
				880	assert((FP != 0) &&
				881	"LowerConstantFP: Node is not ConstantFPSDNode");
				882
				883	const APFloat &apf = FP->getValueAPF();
				884
				885	if (VT == MVT::f32) {
				886	return DAG.getNode(SPUISD::SFPConstant, VT,
				887	DAG.getTargetConstantFP(apf.convertToFloat(), VT));
				888	} else if (VT == MVT::f64) {
				889	uint64_t dbits = DoubleToBits(apf.convertToDouble());
				890	return DAG.getNode(ISD::BIT_CONVERT, VT,
				891	LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
				892	}
				893
				894	return SDOperand();
				895	}
				896
				897	static SDOperand
				898	LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
				899	{
				900	MachineFunction &MF = DAG.getMachineFunction();
				901	MachineFrameInfo *MFI = MF.getFrameInfo();
				902	SSARegMap *RegMap = MF.getSSARegMap();
				903	SmallVector<SDOperand, 8> ArgValues;
				904	SDOperand Root = Op.getOperand(0);
				905	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				906
				907	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				908	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				909
				910	unsigned ArgOffset = SPUFrameInfo::minStackSize();
				911	unsigned ArgRegIdx = 0;
				912	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				913
				914	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				915
				916	// Add DAG nodes to load the arguments or copy them out of registers.
				917	for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
				918	SDOperand ArgVal;
				919	bool needsLoad = false;
				920	MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
				921	unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
				922
				923	switch (ObjectVT) {
				924	default: {
				925	cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
				926	<< MVT::getValueTypeString(ObjectVT)
				927	<< "\n";
				928	abort();
				929	}
				930	case MVT::i8:
				931	if (!isVarArg && ArgRegIdx < NumArgRegs) {
Scott Michel	504c369	2007-12-17 22:32:34 +0000	[diff] [blame]	932	unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
Scott Michel	266bc8f	2007-12-04 22:23:35 +0000	[diff] [blame]	933	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				934	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
				935	++ArgRegIdx;
				936	} else {
				937	needsLoad = true;
				938	}
				939	break;
				940	case MVT::i16:
				941	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				942	unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				943	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				944	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
				945	++ArgRegIdx;
				946	} else {
				947	needsLoad = true;
				948	}
				949	break;
				950	case MVT::i32:
				951	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				952	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				953	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				954	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
				955	++ArgRegIdx;
				956	} else {
				957	needsLoad = true;
				958	}
				959	break;
				960	case MVT::i64:
				961	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				962	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
				963	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				964	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				965	++ArgRegIdx;
				966	} else {
				967	needsLoad = true;
				968	}
				969	break;
				970	case MVT::f32:
				971	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				972	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				973	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				974	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
				975	++ArgRegIdx;
				976	} else {
				977	needsLoad = true;
				978	}
				979	break;
				980	case MVT::f64:
				981	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				982	unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
				983	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				984	ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
				985	++ArgRegIdx;
				986	} else {
				987	needsLoad = true;
				988	}
				989	break;
				990	case MVT::v2f64:
				991	case MVT::v4f32:
				992	case MVT::v4i32:
				993	case MVT::v8i16:
				994	case MVT::v16i8:
				995	if (!isVarArg && ArgRegIdx < NumArgRegs) {
				996	unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				997	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				998	ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
				999	++ArgRegIdx;
				1000	} else {
				1001	needsLoad = true;
				1002	}
				1003	break;
				1004	}
				1005
				1006	// We need to load the argument to a virtual register if we determined above
				1007	// that we ran out of physical registers of the appropriate type
				1008	if (needsLoad) {
				1009	// If the argument is actually used, emit a load from the right stack
				1010	// slot.
				1011	if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
				1012	int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
				1013	SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
				1014	ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
				1015	} else {
				1016	// Don't emit a dead load.
				1017	ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
				1018	}
				1019
				1020	ArgOffset += StackSlotSize;
				1021	}
				1022
				1023	ArgValues.push_back(ArgVal);
				1024	}
				1025
				1026	// If the function takes variable number of arguments, make a frame index for
				1027	// the start of the first vararg value... for expansion of llvm.va_start.
				1028	if (isVarArg) {
				1029	VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
				1030	ArgOffset);
				1031	SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
				1032	// If this function is vararg, store any remaining integer argument regs to
				1033	// their spots on the stack so that they may be loaded by deferencing the
				1034	// result of va_next.
				1035	SmallVector<SDOperand, 8> MemOps;
				1036	for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
				1037	unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
				1038	MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
				1039	SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
				1040	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1041	MemOps.push_back(Store);
				1042	// Increment the address by four for the next argument to store
				1043	SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
				1044	FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
				1045	}
				1046	if (!MemOps.empty())
				1047	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
				1048	}
				1049
				1050	ArgValues.push_back(Root);
				1051
				1052	// Return the new list of results.
				1053	std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
				1054	Op.Val->value_end());
				1055	return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
				1056	}
				1057
				1058	/// isLSAAddress - Return the immediate to use if the specified
				1059	/// value is representable as a LSA address.
				1060	static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
				1061	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
				1062	if (!C) return 0;
				1063
				1064	int Addr = C->getValue();
				1065	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
				1066	(Addr << 14 >> 14) != Addr)
				1067	return 0; // Top 14 bits have to be sext of immediate.
				1068
				1069	return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
				1070	}
				1071
				1072	static
				1073	SDOperand
				1074	LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				1075	SDOperand Chain = Op.getOperand(0);
				1076	#if 0
				1077	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1078	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1079	#endif
				1080	SDOperand Callee = Op.getOperand(4);
				1081	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				1082	unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
				1083	const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
				1084	const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
				1085
				1086	// Handy pointer type
				1087	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1088
				1089	// Accumulate how many bytes are to be pushed on the stack, including the
				1090	// linkage area, and parameter passing area. According to the SPU ABI,
				1091	// we minimally need space for [LR] and [SP]
				1092	unsigned NumStackBytes = SPUFrameInfo::minStackSize();
				1093
				1094	// Set up a copy of the stack pointer for use loading and storing any
				1095	// arguments that may not fit in the registers available for argument
				1096	// passing.
				1097	SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
				1098
				1099	// Figure out which arguments are going to go in registers, and which in
				1100	// memory.
				1101	unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
				1102	unsigned ArgRegIdx = 0;
				1103
				1104	// Keep track of registers passing arguments
				1105	std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
				1106	// And the arguments passed on the stack
				1107	SmallVector<SDOperand, 8> MemOpChains;
				1108
				1109	for (unsigned i = 0; i != NumOps; ++i) {
				1110	SDOperand Arg = Op.getOperand(5+2*i);
				1111
				1112	// PtrOff will be used to store the current argument to the stack if a
				1113	// register cannot be found for it.
				1114	SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
				1115	PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
				1116
				1117	switch (Arg.getValueType()) {
				1118	default: assert(0 && "Unexpected ValueType for argument!");
				1119	case MVT::i32:
				1120	case MVT::i64:
				1121	case MVT::i128:
				1122	if (ArgRegIdx != NumArgRegs) {
				1123	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1124	} else {
				1125	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1126	ArgOffset += StackSlotSize;
				1127	}
				1128	break;
				1129	case MVT::f32:
				1130	case MVT::f64:
				1131	if (ArgRegIdx != NumArgRegs) {
				1132	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1133	} else {
				1134	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1135	ArgOffset += StackSlotSize;
				1136	}
				1137	break;
				1138	case MVT::v4f32:
				1139	case MVT::v4i32:
				1140	case MVT::v8i16:
				1141	case MVT::v16i8:
				1142	if (ArgRegIdx != NumArgRegs) {
				1143	RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
				1144	} else {
				1145	MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
				1146	ArgOffset += StackSlotSize;
				1147	}
				1148	break;
				1149	}
				1150	}
				1151
				1152	// Update number of stack bytes actually used, insert a call sequence start
				1153	NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
				1154	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
				1155
				1156	if (!MemOpChains.empty()) {
				1157	// Adjust the stack pointer for the stack arguments.
				1158	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1159	&MemOpChains[0], MemOpChains.size());
				1160	}
				1161
				1162	// Build a sequence of copy-to-reg nodes chained together with token chain
				1163	// and flag operands which copy the outgoing args into the appropriate regs.
				1164	SDOperand InFlag;
				1165	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1166	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1167	InFlag);
				1168	InFlag = Chain.getValue(1);
				1169	}
				1170
				1171	std::vector<MVT::ValueType> NodeTys;
				1172	NodeTys.push_back(MVT::Other); // Returns a chain
				1173	NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
				1174
				1175	SmallVector<SDOperand, 8> Ops;
				1176	unsigned CallOpc = SPUISD::CALL;
				1177
				1178	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
				1179	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
				1180	// node so that legalize doesn't hack it.
				1181	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1182	GlobalValue *GV = G->getGlobal();
				1183	unsigned CalleeVT = Callee.getValueType();
				1184
				1185	// Turn calls to targets that are defined (i.e., have bodies) into BRSL
				1186	// style calls, otherwise, external symbols are BRASL calls.
				1187	// NOTE:
				1188	// This may be an unsafe assumption for JIT and really large compilation
				1189	// units.
				1190	if (GV->isDeclaration()) {
				1191	Callee = DAG.getGlobalAddress(GV, CalleeVT);
				1192	} else {
				1193	Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
				1194	DAG.getTargetGlobalAddress(GV, CalleeVT),
				1195	DAG.getConstant(0, PtrVT));
				1196	}
				1197	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1198	Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
				1199	else if (SDNode *Dest = isLSAAddress(Callee, DAG))
				1200	// If this is an absolute destination address that appears to be a legal
				1201	// local store address, use the munged value.
				1202	Callee = SDOperand(Dest, 0);
				1203
				1204	Ops.push_back(Chain);
				1205	Ops.push_back(Callee);
				1206
				1207	// Add argument registers to the end of the list so that they are known live
				1208	// into the call.
				1209	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1210	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1211	RegsToPass[i].second.getValueType()));
				1212
				1213	if (InFlag.Val)
				1214	Ops.push_back(InFlag);
				1215	Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
				1216	InFlag = Chain.getValue(1);
				1217
				1218	SDOperand ResultVals[3];
				1219	unsigned NumResults = 0;
				1220	NodeTys.clear();
				1221
				1222	// If the call has results, copy the values out of the ret val registers.
				1223	switch (Op.Val->getValueType(0)) {
				1224	default: assert(0 && "Unexpected ret value!");
				1225	case MVT::Other: break;
				1226	case MVT::i32:
				1227	if (Op.Val->getValueType(1) == MVT::i32) {
				1228	Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
				1229	ResultVals[0] = Chain.getValue(0);
				1230	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
				1231	Chain.getValue(2)).getValue(1);
				1232	ResultVals[1] = Chain.getValue(0);
				1233	NumResults = 2;
				1234	NodeTys.push_back(MVT::i32);
				1235	} else {
				1236	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
				1237	ResultVals[0] = Chain.getValue(0);
				1238	NumResults = 1;
				1239	}
				1240	NodeTys.push_back(MVT::i32);
				1241	break;
				1242	case MVT::i64:
				1243	Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
				1244	ResultVals[0] = Chain.getValue(0);
				1245	NumResults = 1;
				1246	NodeTys.push_back(MVT::i64);
				1247	break;
				1248	case MVT::f32:
				1249	case MVT::f64:
				1250	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1251	InFlag).getValue(1);
				1252	ResultVals[0] = Chain.getValue(0);
				1253	NumResults = 1;
				1254	NodeTys.push_back(Op.Val->getValueType(0));
				1255	break;
				1256	case MVT::v2f64:
				1257	case MVT::v4f32:
				1258	case MVT::v4i32:
				1259	case MVT::v8i16:
				1260	case MVT::v16i8:
				1261	Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
				1262	InFlag).getValue(1);
				1263	ResultVals[0] = Chain.getValue(0);
				1264	NumResults = 1;
				1265	NodeTys.push_back(Op.Val->getValueType(0));
				1266	break;
				1267	}
				1268
				1269	Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
				1270	DAG.getConstant(NumStackBytes, PtrVT));
				1271	NodeTys.push_back(MVT::Other);
				1272
				1273	// If the function returns void, just return the chain.
				1274	if (NumResults == 0)
				1275	return Chain;
				1276
				1277	// Otherwise, merge everything together with a MERGE_VALUES node.
				1278	ResultVals[NumResults++] = Chain;
				1279	SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
				1280	ResultVals, NumResults);
				1281	return Res.getValue(Op.ResNo);
				1282	}
				1283
				1284	static SDOperand
				1285	LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
				1286	SmallVector<CCValAssign, 16> RVLocs;
				1287	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				1288	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				1289	CCState CCInfo(CC, isVarArg, TM, RVLocs);
				1290	CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
				1291
				1292	// If this is the first return lowered for this function, add the regs to the
				1293	// liveout set for the function.
				1294	if (DAG.getMachineFunction().liveout_empty()) {
				1295	for (unsigned i = 0; i != RVLocs.size(); ++i)
				1296	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				1297	}
				1298
				1299	SDOperand Chain = Op.getOperand(0);
				1300	SDOperand Flag;
				1301
				1302	// Copy the result values into the output registers.
				1303	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				1304	CCValAssign &VA = RVLocs[i];
				1305	assert(VA.isRegLoc() && "Can only return in registers!");
				1306	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
				1307	Flag = Chain.getValue(1);
				1308	}
				1309
				1310	if (Flag.Val)
				1311	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
				1312	else
				1313	return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
				1314	}
				1315
				1316
				1317	//===----------------------------------------------------------------------===//
				1318	// Vector related lowering:
				1319	//===----------------------------------------------------------------------===//
				1320
				1321	static ConstantSDNode *
				1322	getVecImm(SDNode *N) {
				1323	SDOperand OpVal(0, 0);
				1324
				1325	// Check to see if this buildvec has a single non-undef value in its elements.
				1326	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1327	if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
				1328	if (OpVal.Val == 0)
				1329	OpVal = N->getOperand(i);
				1330	else if (OpVal != N->getOperand(i))
				1331	return 0;
				1332	}
				1333
				1334	if (OpVal.Val != 0) {
				1335	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1336	return CN;
				1337	}
				1338	}
				1339
				1340	return 0; // All UNDEF: use implicit def.; not Constant node
				1341	}
				1342
				1343	/// get_vec_i18imm - Test if this vector is a vector filled with the same value
				1344	/// and the value fits into an unsigned 18-bit constant, and if so, return the
				1345	/// constant
				1346	SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
				1347	MVT::ValueType ValueType) {
				1348	if (ConstantSDNode *CN = getVecImm(N)) {
				1349	uint64_t Value = CN->getValue();
				1350	if (Value <= 0x3ffff)
				1351	return DAG.getConstant(Value, ValueType);
				1352	}
				1353
				1354	return SDOperand();
				1355	}
				1356
				1357	/// get_vec_i16imm - Test if this vector is a vector filled with the same value
				1358	/// and the value fits into a signed 16-bit constant, and if so, return the
				1359	/// constant
				1360	SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
				1361	MVT::ValueType ValueType) {
				1362	if (ConstantSDNode *CN = getVecImm(N)) {
				1363	if (ValueType == MVT::i32) {
				1364	int Value = (int) CN->getValue();
				1365	int SExtValue = ((Value & 0xffff) << 16) >> 16;
				1366
				1367	if (Value == SExtValue)
				1368	return DAG.getConstant(Value, ValueType);
				1369	} else if (ValueType == MVT::i16) {
				1370	short Value = (short) CN->getValue();
				1371	int SExtValue = ((int) Value << 16) >> 16;
				1372
				1373	if (Value == (short) SExtValue)
				1374	return DAG.getConstant(Value, ValueType);
				1375	} else if (ValueType == MVT::i64) {
				1376	int64_t Value = CN->getValue();
				1377	int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
				1378
				1379	if (Value == SExtValue)
				1380	return DAG.getConstant(Value, ValueType);
				1381	}
				1382	}
				1383
				1384	return SDOperand();
				1385	}
				1386
				1387	/// get_vec_i10imm - Test if this vector is a vector filled with the same value
				1388	/// and the value fits into a signed 10-bit constant, and if so, return the
				1389	/// constant
				1390	SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
				1391	MVT::ValueType ValueType) {
				1392	if (ConstantSDNode *CN = getVecImm(N)) {
				1393	int Value = (int) CN->getValue();
				1394	if ((ValueType == MVT::i32 && isS10Constant(Value))
				1395	\|\| (ValueType == MVT::i16 && isS10Constant((short) Value)))
				1396	return DAG.getConstant(Value, ValueType);
				1397	}
				1398
				1399	return SDOperand();
				1400	}
				1401
				1402	/// get_vec_i8imm - Test if this vector is a vector filled with the same value
				1403	/// and the value fits into a signed 8-bit constant, and if so, return the
				1404	/// constant.
				1405	///
				1406	/// @note: The incoming vector is v16i8 because that's the only way we can load
				1407	/// constant vectors. Thus, we test to see if the upper and lower bytes are the
				1408	/// same value.
				1409	SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
				1410	MVT::ValueType ValueType) {
				1411	if (ConstantSDNode *CN = getVecImm(N)) {
				1412	int Value = (int) CN->getValue();
				1413	if (ValueType == MVT::i16
				1414	&& Value <= 0xffff /* truncated from uint64_t */
				1415	&& ((short) Value >> 8) == ((short) Value & 0xff))
				1416	return DAG.getConstant(Value & 0xff, ValueType);
				1417	else if (ValueType == MVT::i8
				1418	&& (Value & 0xff) == Value)
				1419	return DAG.getConstant(Value, ValueType);
				1420	}
				1421
				1422	return SDOperand();
				1423	}
				1424
				1425	/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
				1426	/// and the value fits into a signed 16-bit constant, and if so, return the
				1427	/// constant
				1428	SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
				1429	MVT::ValueType ValueType) {
				1430	if (ConstantSDNode *CN = getVecImm(N)) {
				1431	uint64_t Value = CN->getValue();
				1432	if ((ValueType == MVT::i32
				1433	&& ((unsigned) Value & 0xffff0000) == (unsigned) Value)
				1434	\|\| (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
				1435	return DAG.getConstant(Value >> 16, ValueType);
				1436	}
				1437
				1438	return SDOperand();
				1439	}
				1440
				1441	/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
				1442	SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
				1443	if (ConstantSDNode *CN = getVecImm(N)) {
				1444	return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
				1445	}
				1446
				1447	return SDOperand();
				1448	}
				1449
				1450	/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
				1451	SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
				1452	if (ConstantSDNode *CN = getVecImm(N)) {
				1453	return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
				1454	}
				1455
				1456	return SDOperand();
				1457	}
				1458
				1459	// If this is a vector of constants or undefs, get the bits. A bit in
				1460	// UndefBits is set if the corresponding element of the vector is an
				1461	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1462	// zero. Return true if this is not an array of constants, false if it is.
				1463	//
				1464	static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
				1465	uint64_t UndefBits[2]) {
				1466	// Start with zero'd results.
				1467	VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
				1468
				1469	unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
				1470	for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
				1471	SDOperand OpVal = BV->getOperand(i);
				1472
				1473	unsigned PartNo = i >= e/2; // In the upper 128 bits?
				1474	unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
				1475
				1476	uint64_t EltBits = 0;
				1477	if (OpVal.getOpcode() == ISD::UNDEF) {
				1478	uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
				1479	UndefBits[PartNo] \|= EltUndefBits << (SlotNo*EltBitSize);
				1480	continue;
				1481	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
				1482	EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
				1483	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
				1484	const APFloat &apf = CN->getValueAPF();
				1485	EltBits = (CN->getValueType(0) == MVT::f32
				1486	? FloatToBits(apf.convertToFloat())
				1487	: DoubleToBits(apf.convertToDouble()));
				1488	} else {
				1489	// Nonconstant element.
				1490	return true;
				1491	}
				1492
				1493	VectorBits[PartNo] \|= EltBits << (SlotNo*EltBitSize);
				1494	}
				1495
				1496	//printf("%llx %llx %llx %llx\n",
				1497	// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
				1498	return false;
				1499	}
				1500
				1501	/// If this is a splat (repetition) of a value across the whole vector, return
				1502	/// the smallest size that splats it. For example, "0x01010101010101..." is a
				1503	/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
				1504	/// SplatSize = 1 byte.
				1505	static bool isConstantSplat(const uint64_t Bits128[2],
				1506	const uint64_t Undef128[2],
				1507	int MinSplatBits,
				1508	uint64_t &SplatBits, uint64_t &SplatUndef,
				1509	int &SplatSize) {
				1510	// Don't let undefs prevent splats from matching. See if the top 64-bits are
				1511	// the same as the lower 64-bits, ignoring undefs.
				1512	uint64_t Bits64 = Bits128[0] \| Bits128[1];
				1513	uint64_t Undef64 = Undef128[0] & Undef128[1];
				1514	uint32_t Bits32 = uint32_t(Bits64) \| uint32_t(Bits64 >> 32);
				1515	uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
				1516	uint16_t Bits16 = uint16_t(Bits32) \| uint16_t(Bits32 >> 16);
				1517	uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
				1518
				1519	if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
				1520	if (MinSplatBits < 64) {
				1521
				1522	// Check that the top 32-bits are the same as the lower 32-bits, ignoring
				1523	// undefs.
				1524	if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
				1525	if (MinSplatBits < 32) {
				1526
				1527	// If the top 16-bits are different than the lower 16-bits, ignoring
				1528	// undefs, we have an i32 splat.
				1529	if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
				1530	if (MinSplatBits < 16) {
				1531	// If the top 8-bits are different than the lower 8-bits, ignoring
				1532	// undefs, we have an i16 splat.
				1533	if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
				1534	// Otherwise, we have an 8-bit splat.
				1535	SplatBits = uint8_t(Bits16) \| uint8_t(Bits16 >> 8);
				1536	SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
				1537	SplatSize = 1;
				1538	return true;
				1539	}
				1540	} else {
				1541	SplatBits = Bits16;
				1542	SplatUndef = Undef16;
				1543	SplatSize = 2;
				1544	return true;
				1545	}
				1546	}
				1547	} else {
				1548	SplatBits = Bits32;
				1549	SplatUndef = Undef32;
				1550	SplatSize = 4;
				1551	return true;
				1552	}
				1553	}
				1554	} else {
				1555	SplatBits = Bits128[0];
				1556	SplatUndef = Undef128[0];
				1557	SplatSize = 8;
				1558	return true;
				1559	}
				1560	}
				1561
				1562	return false; // Can't be a splat if two pieces don't match.
				1563	}
				1564
				1565	// If this is a case we can't handle, return null and let the default
				1566	// expansion code take care of it. If we CAN select this case, and if it
				1567	// selects to a single instruction, return Op. Otherwise, if we can codegen
				1568	// this case more efficiently than a constant pool load, lower it to the
				1569	// sequence of ops that should be used.
				1570	static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1571	MVT::ValueType VT = Op.getValueType();
				1572	// If this is a vector of constants or undefs, get the bits. A bit in
				1573	// UndefBits is set if the corresponding element of the vector is an
				1574	// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
				1575	// zero.
				1576	uint64_t VectorBits[2];
				1577	uint64_t UndefBits[2];
				1578	uint64_t SplatBits, SplatUndef;
				1579	int SplatSize;
				1580	if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
				1581	\|\| !isConstantSplat(VectorBits, UndefBits,
				1582	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				1583	SplatBits, SplatUndef, SplatSize))
				1584	return SDOperand(); // Not a constant vector, not a splat.
				1585
				1586	switch (VT) {
				1587	default:
				1588	case MVT::v4f32: {
				1589	uint32_t Value32 = SplatBits;
				1590	assert(SplatSize == 4
				1591	&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
				1592	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1593	SDOperand T = DAG.getConstant(Value32, MVT::i32);
				1594	return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
				1595	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
				1596	break;
				1597	}
				1598	case MVT::v2f64: {
				1599	uint64_t f64val = SplatBits;
				1600	assert(SplatSize == 8
				1601	&& "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
				1602	// NOTE: pretend the constant is an integer. LLVM won't load FP constants
				1603	SDOperand T = DAG.getConstant(f64val, MVT::i64);
				1604	return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
				1605	DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
				1606	break;
				1607	}
				1608	case MVT::v16i8: {
				1609	// 8-bit constants have to be expanded to 16-bits
				1610	unsigned short Value16 = SplatBits \| (SplatBits << 8);
				1611	SDOperand Ops[8];
				1612	for (int i = 0; i < 8; ++i)
				1613	Ops[i] = DAG.getConstant(Value16, MVT::i16);
				1614	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1615	DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
				1616	}
				1617	case MVT::v8i16: {
				1618	unsigned short Value16;
				1619	if (SplatSize == 2)
				1620	Value16 = (unsigned short) (SplatBits & 0xffff);
				1621	else
				1622	Value16 = (unsigned short) (SplatBits \| (SplatBits << 8));
				1623	SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
				1624	SDOperand Ops[8];
				1625	for (int i = 0; i < 8; ++i) Ops[i] = T;
				1626	return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
				1627	}
				1628	case MVT::v4i32: {
				1629	unsigned int Value = SplatBits;
				1630	SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
				1631	return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
				1632	}
				1633	case MVT::v2i64: {
				1634	uint64_t val = SplatBits;
				1635	uint32_t upper = uint32_t(val >> 32);
				1636	uint32_t lower = uint32_t(val);
				1637
				1638	if (val != 0) {
				1639	SDOperand LO32;
				1640	SDOperand HI32;
				1641	SmallVector<SDOperand, 16> ShufBytes;
				1642	SDOperand Result;
				1643	bool upper_special, lower_special;
				1644
				1645	// NOTE: This code creates common-case shuffle masks that can be easily
				1646	// detected as common expressions. It is not attempting to create highly
				1647	// specialized masks to replace any and all 0's, 0xff's and 0x80's.
				1648
				1649	// Detect if the upper or lower half is a special shuffle mask pattern:
				1650	upper_special = (upper == 0 \|\| upper == 0xffffffff \|\| upper == 0x80000000);
				1651	lower_special = (lower == 0 \|\| lower == 0xffffffff \|\| lower == 0x80000000);
				1652
				1653	// Create lower vector if not a special pattern
				1654	if (!lower_special) {
				1655	SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
				1656	LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1657	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1658	LO32C, LO32C, LO32C, LO32C));
				1659	}
				1660
				1661	// Create upper vector if not a special pattern
				1662	if (!upper_special) {
				1663	SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
				1664	HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
				1665	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1666	HI32C, HI32C, HI32C, HI32C));
				1667	}
				1668
				1669	// If either upper or lower are special, then the two input operands are
				1670	// the same (basically, one of them is a "don't care")
				1671	if (lower_special)
				1672	LO32 = HI32;
				1673	if (upper_special)
				1674	HI32 = LO32;
				1675	if (lower_special && upper_special) {
				1676	// Unhappy situation... both upper and lower are special, so punt with
				1677	// a target constant:
				1678	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1679	HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
				1680	Zero, Zero);
				1681	}
				1682
				1683	for (int i = 0; i < 4; ++i) {
				1684	for (int j = 0; j < 4; ++j) {
				1685	SDOperand V;
				1686	bool process_upper, process_lower;
				1687	uint64_t val;
				1688
				1689	process_upper = (upper_special && (i & 1) == 0);
				1690	process_lower = (lower_special && (i & 1) == 1);
				1691
				1692	if (process_upper \|\| process_lower) {
				1693	if ((process_upper && upper == 0)
				1694	\|\| (process_lower && lower == 0))
				1695	val = 0x80;
				1696	else if ((process_upper && upper == 0xffffffff)
				1697	\|\| (process_lower && lower == 0xffffffff))
				1698	val = 0xc0;
				1699	else if ((process_upper && upper == 0x80000000)
				1700	\|\| (process_lower && lower == 0x80000000))
				1701	val = (j == 0 ? 0xe0 : 0x80);
				1702	} else
				1703	val = i * 4 + j + ((i & 1) * 16);
				1704
				1705	ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
				1706	}
				1707	}
				1708
				1709	return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
				1710	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1711	&ShufBytes[0], ShufBytes.size()));
				1712	} else {
				1713	// For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
				1714	SDOperand Zero = DAG.getConstant(0, MVT::i32);
				1715	return DAG.getNode(ISD::BIT_CONVERT, VT,
				1716	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1717	Zero, Zero, Zero, Zero));
				1718	}
				1719	}
				1720	}
				1721
				1722	return SDOperand();
				1723	}
				1724
				1725	/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
				1726	/// which the Cell can operate. The code inspects V3 to ascertain whether the
				1727	/// permutation vector, V3, is monotonically increasing with one "exception"
				1728	/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
				1729	/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
				1730	/// In either case, the net result is going to eventually invoke SHUFB to
				1731	/// permute/shuffle the bytes from V1 and V2.
				1732	/// \note
				1733	/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
				1734	/// control word for byte/halfword/word insertion. This takes care of a single
				1735	/// element move from V2 into V1.
				1736	/// \note
				1737	/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
				1738	static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				1739	SDOperand V1 = Op.getOperand(0);
				1740	SDOperand V2 = Op.getOperand(1);
				1741	SDOperand PermMask = Op.getOperand(2);
				1742
				1743	if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
				1744
				1745	// If we have a single element being moved from V1 to V2, this can be handled
				1746	// using the C*[DX] compute mask instructions, but the vector elements have
				1747	// to be monotonically increasing with one exception element.
				1748	MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
				1749	unsigned EltsFromV2 = 0;
				1750	unsigned V2Elt = 0;
				1751	unsigned V2EltIdx0 = 0;
				1752	unsigned CurrElt = 0;
				1753	bool monotonic = true;
				1754	if (EltVT == MVT::i8)
				1755	V2EltIdx0 = 16;
				1756	else if (EltVT == MVT::i16)
				1757	V2EltIdx0 = 8;
				1758	else if (EltVT == MVT::i32)
				1759	V2EltIdx0 = 4;
				1760	else
				1761	assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
				1762
				1763	for (unsigned i = 0, e = PermMask.getNumOperands();
				1764	EltsFromV2 <= 1 && monotonic && i != e;
				1765	++i) {
				1766	unsigned SrcElt;
				1767	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1768	SrcElt = 0;
				1769	else
				1770	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1771
				1772	if (SrcElt >= V2EltIdx0) {
				1773	++EltsFromV2;
				1774	V2Elt = (V2EltIdx0 - SrcElt) << 2;
				1775	} else if (CurrElt != SrcElt) {
				1776	monotonic = false;
				1777	}
				1778
				1779	++CurrElt;
				1780	}
				1781
				1782	if (EltsFromV2 == 1 && monotonic) {
				1783	// Compute mask and shuffle
				1784	MachineFunction &MF = DAG.getMachineFunction();
				1785	SSARegMap *RegMap = MF.getSSARegMap();
				1786	unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				1787	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				1788	// Initialize temporary register to 0
				1789	SDOperand InitTempReg =
				1790	DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
				1791	// Copy register's contents as index in INSERT_MASK:
				1792	SDOperand ShufMaskOp =
				1793	DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
				1794	DAG.getTargetConstant(V2Elt, MVT::i32),
				1795	DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
				1796	// Use shuffle mask in SHUFB synthetic instruction:
				1797	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
				1798	} else {
				1799	// Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
				1800	unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
				1801
				1802	SmallVector<SDOperand, 16> ResultMask;
				1803	for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
				1804	unsigned SrcElt;
				1805	if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
				1806	SrcElt = 0;
				1807	else
				1808	SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
				1809
				1810	for (unsigned j = 0; j != BytesPerElement; ++j) {
				1811	ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
				1812	MVT::i8));
				1813	}
				1814	}
				1815
				1816	SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				1817	&ResultMask[0], ResultMask.size());
				1818	return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
				1819	}
				1820	}
				1821
				1822	static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				1823	SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
				1824
				1825	if (Op0.Val->getOpcode() == ISD::Constant) {
				1826	// For a constant, build the appropriate constant vector, which will
				1827	// eventually simplify to a vector register load.
				1828
				1829	ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
				1830	SmallVector<SDOperand, 16> ConstVecValues;
				1831	MVT::ValueType VT;
				1832	size_t n_copies;
				1833
				1834	// Create a constant vector:
				1835	switch (Op.getValueType()) {
				1836	default: assert(0 && "Unexpected constant value type in "
				1837	"LowerSCALAR_TO_VECTOR");
				1838	case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
				1839	case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
				1840	case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
				1841	case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
				1842	case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
				1843	case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
				1844	}
				1845
				1846	SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
				1847	for (size_t j = 0; j < n_copies; ++j)
				1848	ConstVecValues.push_back(CValue);
				1849
				1850	return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
				1851	&ConstVecValues[0], ConstVecValues.size());
				1852	} else {
				1853	// Otherwise, copy the value from one register to another:
				1854	switch (Op0.getValueType()) {
				1855	default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
				1856	case MVT::i8:
				1857	case MVT::i16:
				1858	case MVT::i32:
				1859	case MVT::i64:
				1860	case MVT::f32:
				1861	case MVT::f64:
				1862	return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
				1863	}
				1864	}
				1865
				1866	return SDOperand();
				1867	}
				1868
				1869	static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
				1870	switch (Op.getValueType()) {
				1871	case MVT::v4i32: {
				1872	SDOperand rA = Op.getOperand(0);
				1873	SDOperand rB = Op.getOperand(1);
				1874	SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
				1875	SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
				1876	SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
				1877	SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
				1878
				1879	return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
				1880	break;
				1881	}
				1882
				1883	// Multiply two v8i16 vectors (pipeline friendly version):
				1884	// a) multiply lower halves, mask off upper 16-bit of 32-bit product
				1885	// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
				1886	// c) Use SELB to select upper and lower halves from the intermediate results
				1887	//
				1888	// NOTE: We really want to move the FSMBI to earlier to actually get the
				1889	// dual-issue. This code does manage to do this, even if it's a little on
				1890	// the wacky side
				1891	case MVT::v8i16: {
				1892	MachineFunction &MF = DAG.getMachineFunction();
				1893	SSARegMap *RegMap = MF.getSSARegMap();
				1894	SDOperand Chain = Op.getOperand(0);
				1895	SDOperand rA = Op.getOperand(0);
				1896	SDOperand rB = Op.getOperand(1);
				1897	unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1898	unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1899
				1900	SDOperand FSMBOp =
				1901	DAG.getCopyToReg(Chain, FSMBIreg,
				1902	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1903	DAG.getConstant(0xcccc, MVT::i32)));
				1904
				1905	SDOperand HHProd =
				1906	DAG.getCopyToReg(FSMBOp, HiProdReg,
				1907	DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
				1908
				1909	SDOperand HHProd_v4i32 =
				1910	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
				1911	DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
				1912
				1913	return DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1914	DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
				1915	DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
				1916	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				1917	HHProd_v4i32,
				1918	DAG.getConstant(16, MVT::i16))),
				1919	DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
				1920	}
				1921
				1922	// This M00sE is N@stI! (apologies to Monty Python)
				1923	//
				1924	// SPU doesn't know how to do any 8-bit multiplication, so the solution
				1925	// is to break it all apart, sign extend, and reassemble the various
				1926	// intermediate products.
				1927	case MVT::v16i8: {
				1928	MachineFunction &MF = DAG.getMachineFunction();
				1929	SSARegMap *RegMap = MF.getSSARegMap();
				1930	SDOperand Chain = Op.getOperand(0);
				1931	SDOperand rA = Op.getOperand(0);
				1932	SDOperand rB = Op.getOperand(1);
				1933	SDOperand c8 = DAG.getConstant(8, MVT::i8);
				1934	SDOperand c16 = DAG.getConstant(16, MVT::i8);
				1935
				1936	unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1937	unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1938	unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				1939
				1940	SDOperand LLProd =
				1941	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1942	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
				1943	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
				1944
				1945	SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
				1946
				1947	SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
				1948
				1949	SDOperand LHProd =
				1950	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
				1951	DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
				1952
				1953	SDOperand FSMBdef_2222 =
				1954	DAG.getCopyToReg(Chain, FSMBreg_2222,
				1955	DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
				1956	DAG.getConstant(0x2222, MVT::i32)));
				1957
				1958	SDOperand FSMBuse_2222 =
				1959	DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
				1960
				1961	SDOperand LoProd_1 =
				1962	DAG.getCopyToReg(Chain, LoProd_reg,
				1963	DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
				1964	FSMBuse_2222));
				1965
				1966	SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
				1967
				1968	SDOperand LoProd =
				1969	DAG.getNode(ISD::AND, MVT::v4i32,
				1970	DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
				1971	DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
				1972	LoProdMask, LoProdMask,
				1973	LoProdMask, LoProdMask));
				1974
				1975	SDOperand rAH =
				1976	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1977	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
				1978
				1979	SDOperand rBH =
				1980	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
				1981	DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
				1982
				1983	SDOperand HLProd =
				1984	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1985	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
				1986	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
				1987
				1988	SDOperand HHProd_1 =
				1989	DAG.getNode(SPUISD::MPY, MVT::v8i16,
				1990	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1991	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
				1992	DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
				1993	DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
				1994
				1995	SDOperand HHProd =
				1996	DAG.getCopyToReg(Chain, HiProd_reg,
				1997	DAG.getNode(SPUISD::SELB, MVT::v8i16,
				1998	HLProd,
				1999	DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
				2000	FSMBuse_2222));
				2001
				2002	SDOperand HiProd =
				2003	DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
				2004	DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
				2005
				2006	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
				2007	DAG.getNode(ISD::OR, MVT::v4i32,
				2008	LoProd, HiProd));
				2009	}
				2010
				2011	default:
				2012	cerr << "CellSPU: Unknown vector multiplication, got "
				2013	<< MVT::getValueTypeString(Op.getValueType())
				2014	<< "\n";
				2015	abort();
				2016	/NOTREACHED/
				2017	}
				2018
				2019	return SDOperand();
				2020	}
				2021
				2022	static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
				2023	MachineFunction &MF = DAG.getMachineFunction();
				2024	SSARegMap *RegMap = MF.getSSARegMap();
				2025
				2026	SDOperand A = Op.getOperand(0);
				2027	SDOperand B = Op.getOperand(1);
				2028	unsigned VT = Op.getValueType();
				2029
				2030	unsigned VRegBR, VRegC;
				2031
				2032	if (VT == MVT::f32) {
				2033	VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2034	VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
				2035	} else {
				2036	VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2037	VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
				2038	}
				2039	// TODO: make sure we're feeding FPInterp the right arguments
				2040	// Right now: fi B, frest(B)
				2041
				2042	// Computes BRcpl =
				2043	// (Floating Interpolate (FP Reciprocal Estimate B))
				2044	SDOperand BRcpl =
				2045	DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
				2046	DAG.getNode(SPUISD::FPInterp, VT, B,
				2047	DAG.getNode(SPUISD::FPRecipEst, VT, B)));
				2048
				2049	// Computes A * BRcpl and stores in a temporary register
				2050	SDOperand AxBRcpl =
				2051	DAG.getCopyToReg(BRcpl, VRegC,
				2052	DAG.getNode(ISD::FMUL, VT, A,
				2053	DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
				2054	// What's the Chain variable do? It's magic!
				2055	// TODO: set Chain = Op(0).getEntryNode()
				2056
				2057	return DAG.getNode(ISD::FADD, VT,
				2058	DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
				2059	DAG.getNode(ISD::FMUL, VT,
				2060	DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
				2061	DAG.getNode(ISD::FSUB, VT, A,
				2062	DAG.getNode(ISD::FMUL, VT, B,
				2063	DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
				2064	}
				2065
				2066	// Expands double-precision FDIV
				2067	// Expects two doubles as inputs X and Y, does a floating point
				2068	// reciprocal estimate, and three iterations of Newton-Raphson
				2069	// to increase accuracy.
				2070	//static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
				2071	// MachineFunction &MF = DAG.getMachineFunction();
				2072	// SSARegMap *RegMap = MF.getSSARegMap();
				2073	//
				2074	// SDOperand X = Op.getOperand(0);
				2075	// SDOperand Y = Op.getOperand(1);
				2076	//}
				2077
				2078	static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2079	unsigned VT = Op.getValueType();
				2080	SDOperand N = Op.getOperand(0);
				2081	SDOperand Elt = Op.getOperand(1);
				2082	SDOperand ShufMask[16];
				2083	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
				2084
				2085	assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
				2086
				2087	int EltNo = (int) C->getValue();
				2088
				2089	// sanity checks:
				2090	if (VT == MVT::i8 && EltNo >= 16)
				2091	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
				2092	else if (VT == MVT::i16 && EltNo >= 8)
				2093	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
				2094	else if (VT == MVT::i32 && EltNo >= 4)
				2095	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
				2096	else if (VT == MVT::i64 && EltNo >= 2)
				2097	assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
				2098
				2099	if (EltNo == 0 && (VT == MVT::i32 \|\| VT == MVT::i64)) {
				2100	// i32 and i64: Element 0 is the preferred slot
				2101	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
				2102	}
				2103
				2104	// Need to generate shuffle mask and extract:
				2105	int prefslot_begin, prefslot_end;
				2106	int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
				2107
				2108	switch (VT) {
				2109	case MVT::i8: {
				2110	prefslot_begin = prefslot_end = 3;
				2111	break;
				2112	}
				2113	case MVT::i16: {
				2114	prefslot_begin = 2; prefslot_end = 3;
				2115	break;
				2116	}
				2117	case MVT::i32: {
				2118	prefslot_begin = 0; prefslot_end = 3;
				2119	break;
				2120	}
				2121	case MVT::i64: {
				2122	prefslot_begin = 0; prefslot_end = 7;
				2123	break;
				2124	}
				2125	}
				2126
				2127	for (int i = 0; i < 16; ++i) {
				2128	// zero fill uppper part of preferred slot, don't care about the
				2129	// other slots:
				2130	unsigned int mask_val;
				2131
				2132	if (i <= prefslot_end) {
				2133	mask_val =
				2134	((i < prefslot_begin)
				2135	? 0x80
				2136	: elt_byte + (i - prefslot_begin));
				2137
				2138	ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
				2139	} else
				2140	ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
				2141	}
				2142
				2143	SDOperand ShufMaskVec =
				2144	DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
				2145	&ShufMask[0],
				2146	sizeof(ShufMask) / sizeof(ShufMask[0]));
				2147
				2148	return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
				2149	DAG.getNode(SPUISD::SHUFB, N.getValueType(),
				2150	N, N, ShufMaskVec));
				2151
				2152	}
				2153
				2154	static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				2155	SDOperand VecOp = Op.getOperand(0);
				2156	SDOperand ValOp = Op.getOperand(1);
				2157	SDOperand IdxOp = Op.getOperand(2);
				2158	MVT::ValueType VT = Op.getValueType();
				2159
				2160	ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
				2161	assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
				2162
				2163	MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
				2164	// Use $2 because it's always 16-byte aligned and it's available:
				2165	SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
				2166
				2167	SDOperand result =
				2168	DAG.getNode(SPUISD::SHUFB, VT,
				2169	DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
				2170	VecOp,
				2171	DAG.getNode(SPUISD::INSERT_MASK, VT,
				2172	DAG.getNode(ISD::ADD, PtrVT,
				2173	PtrBase,
				2174	DAG.getConstant(CN->getValue(),
				2175	PtrVT))));
				2176
				2177	return result;
				2178	}
				2179
				2180	static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
				2181	SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
				2182
				2183	assert(Op.getValueType() == MVT::i8);
				2184	switch (Opc) {
				2185	default:
				2186	assert(0 && "Unhandled i8 math operator");
				2187	/NOTREACHED/
				2188	break;
				2189	case ISD::SUB: {
				2190	// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
				2191	// the result:
				2192	SDOperand N1 = Op.getOperand(1);
				2193	N0 = (N0.getOpcode() != ISD::Constant
				2194	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2195	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2196	N1 = (N1.getOpcode() != ISD::Constant
				2197	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
				2198	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2199	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2200	DAG.getNode(Opc, MVT::i16, N0, N1));
				2201	}
				2202	case ISD::ROTR:
				2203	case ISD::ROTL: {
				2204	SDOperand N1 = Op.getOperand(1);
				2205	unsigned N1Opc;
				2206	N0 = (N0.getOpcode() != ISD::Constant
				2207	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2208	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2209	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2210	N1 = (N1.getOpcode() != ISD::Constant
				2211	? DAG.getNode(N1Opc, MVT::i16, N1)
				2212	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2213	SDOperand ExpandArg =
				2214	DAG.getNode(ISD::OR, MVT::i16, N0,
				2215	DAG.getNode(ISD::SHL, MVT::i16,
				2216	N0, DAG.getConstant(8, MVT::i16)));
				2217	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2218	DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
				2219	}
				2220	case ISD::SRL:
				2221	case ISD::SHL: {
				2222	SDOperand N1 = Op.getOperand(1);
				2223	unsigned N1Opc;
				2224	N0 = (N0.getOpcode() != ISD::Constant
				2225	? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
				2226	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2227	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
				2228	N1 = (N1.getOpcode() != ISD::Constant
				2229	? DAG.getNode(N1Opc, MVT::i16, N1)
				2230	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2231	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2232	DAG.getNode(Opc, MVT::i16, N0, N1));
				2233	}
				2234	case ISD::SRA: {
				2235	SDOperand N1 = Op.getOperand(1);
				2236	unsigned N1Opc;
				2237	N0 = (N0.getOpcode() != ISD::Constant
				2238	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2239	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2240	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2241	N1 = (N1.getOpcode() != ISD::Constant
				2242	? DAG.getNode(N1Opc, MVT::i16, N1)
				2243	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2244	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2245	DAG.getNode(Opc, MVT::i16, N0, N1));
				2246	}
				2247	case ISD::MUL: {
				2248	SDOperand N1 = Op.getOperand(1);
				2249	unsigned N1Opc;
				2250	N0 = (N0.getOpcode() != ISD::Constant
				2251	? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
				2252	: DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
				2253	N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
				2254	N1 = (N1.getOpcode() != ISD::Constant
				2255	? DAG.getNode(N1Opc, MVT::i16, N1)
				2256	: DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
				2257	return DAG.getNode(ISD::TRUNCATE, MVT::i8,
				2258	DAG.getNode(Opc, MVT::i16, N0, N1));
				2259	break;
				2260	}
				2261	}
				2262
				2263	return SDOperand();
				2264	}
				2265
				2266	//! Lower byte immediate operations for v16i8 vectors:
				2267	static SDOperand
				2268	LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
				2269	SDOperand ConstVec;
				2270	SDOperand Arg;
				2271	MVT::ValueType VT = Op.getValueType();
				2272
				2273	ConstVec = Op.getOperand(0);
				2274	Arg = Op.getOperand(1);
				2275	if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
				2276	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2277	ConstVec = ConstVec.getOperand(0);
				2278	} else {
				2279	ConstVec = Op.getOperand(1);
				2280	Arg = Op.getOperand(0);
				2281	if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
				2282	ConstVec = ConstVec.getOperand(0);
				2283	}
				2284	}
				2285	}
				2286
				2287	if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
				2288	uint64_t VectorBits[2];
				2289	uint64_t UndefBits[2];
				2290	uint64_t SplatBits, SplatUndef;
				2291	int SplatSize;
				2292
				2293	if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
				2294	&& isConstantSplat(VectorBits, UndefBits,
				2295	MVT::getSizeInBits(MVT::getVectorElementType(VT)),
				2296	SplatBits, SplatUndef, SplatSize)) {
				2297	SDOperand tcVec[16];
				2298	SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
				2299	const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
				2300
				2301	// Turn the BUILD_VECTOR into a set of target constants:
				2302	for (size_t i = 0; i < tcVecSize; ++i)
				2303	tcVec[i] = tc;
				2304
				2305	return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
				2306	DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
				2307	}
				2308	}
				2309
				2310	return SDOperand();
				2311	}
				2312
				2313	//! Lower i32 multiplication
				2314	static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
				2315	unsigned Opc) {
				2316	switch (VT) {
				2317	default:
				2318	cerr << "CellSPU: Unknown LowerMUL value type, got "
				2319	<< MVT::getValueTypeString(Op.getValueType())
				2320	<< "\n";
				2321	abort();
				2322	/NOTREACHED/
				2323
				2324	case MVT::i32: {
				2325	SDOperand rA = Op.getOperand(0);
				2326	SDOperand rB = Op.getOperand(1);
				2327
				2328	return DAG.getNode(ISD::ADD, MVT::i32,
				2329	DAG.getNode(ISD::ADD, MVT::i32,
				2330	DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
				2331	DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
				2332	DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
				2333	}
				2334	}
				2335
				2336	return SDOperand();
				2337	}
				2338
				2339	//! Custom lowering for CTPOP (count population)
				2340	/*!
				2341	Custom lowering code that counts the number ones in the input
				2342	operand. SPU has such an instruction, but it counts the number of
				2343	ones per byte, which then have to be accumulated.
				2344	*/
				2345	static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
				2346	unsigned VT = Op.getValueType();
				2347	unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
				2348
				2349	switch (VT) {
				2350	case MVT::i8: {
				2351	SDOperand N = Op.getOperand(0);
				2352	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2353
				2354	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2355	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2356
				2357	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
				2358	}
				2359
				2360	case MVT::i16: {
				2361	MachineFunction &MF = DAG.getMachineFunction();
				2362	SSARegMap *RegMap = MF.getSSARegMap();
				2363
				2364	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
				2365
				2366	SDOperand N = Op.getOperand(0);
				2367	SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
				2368	SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
				2369	SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
				2370
				2371	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2372	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2373
				2374	// CNTB_result becomes the chain to which all of the virtual registers
				2375	// CNTB_reg, SUM1_reg become associated:
				2376	SDOperand CNTB_result =
				2377	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
				2378
				2379	SDOperand CNTB_rescopy =
				2380	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2381
				2382	SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
				2383
				2384	return DAG.getNode(ISD::AND, MVT::i16,
				2385	DAG.getNode(ISD::ADD, MVT::i16,
				2386	DAG.getNode(ISD::SRL, MVT::i16,
				2387	Tmp1, Shift1),
				2388	Tmp1),
				2389	Mask0);
				2390	}
				2391
				2392	case MVT::i32: {
				2393	MachineFunction &MF = DAG.getMachineFunction();
				2394	SSARegMap *RegMap = MF.getSSARegMap();
				2395
				2396	unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2397	unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
				2398
				2399	SDOperand N = Op.getOperand(0);
				2400	SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
				2401	SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
				2402	SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
				2403	SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
				2404
				2405	SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
				2406	SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
				2407
				2408	// CNTB_result becomes the chain to which all of the virtual registers
				2409	// CNTB_reg, SUM1_reg become associated:
				2410	SDOperand CNTB_result =
				2411	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
				2412
				2413	SDOperand CNTB_rescopy =
				2414	DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
				2415
				2416	SDOperand Comp1 =
				2417	DAG.getNode(ISD::SRL, MVT::i32,
				2418	DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
				2419
				2420	SDOperand Sum1 =
				2421	DAG.getNode(ISD::ADD, MVT::i32,
				2422	Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
				2423
				2424	SDOperand Sum1_rescopy =
				2425	DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
				2426
				2427	SDOperand Comp2 =
				2428	DAG.getNode(ISD::SRL, MVT::i32,
				2429	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
				2430	Shift2);
				2431	SDOperand Sum2 =
				2432	DAG.getNode(ISD::ADD, MVT::i32, Comp2,
				2433	DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
				2434
				2435	return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
				2436	}
				2437
				2438	case MVT::i64:
				2439	break;
				2440	}
				2441
				2442	return SDOperand();
				2443	}
				2444
				2445	/// LowerOperation - Provide custom lowering hooks for some operations.
				2446	///
				2447	SDOperand
				2448	SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
				2449	{
				2450	switch (Op.getOpcode()) {
				2451	default: {
				2452	cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
				2453	cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
				2454	cerr << "*Op.Val:\n";
				2455	Op.Val->dump();
				2456	abort();
				2457	}
				2458	case ISD::LOAD:
				2459	case ISD::SEXTLOAD:
				2460	case ISD::ZEXTLOAD:
				2461	return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
				2462	case ISD::STORE:
				2463	return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
				2464	case ISD::ConstantPool:
				2465	return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
				2466	case ISD::GlobalAddress:
				2467	return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
				2468	case ISD::JumpTable:
				2469	return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
				2470	case ISD::Constant:
				2471	return LowerConstant(Op, DAG);
				2472	case ISD::ConstantFP:
				2473	return LowerConstantFP(Op, DAG);
				2474	case ISD::FORMAL_ARGUMENTS:
				2475	return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
				2476	case ISD::CALL:
				2477	return LowerCALL(Op, DAG);
				2478	case ISD::RET:
				2479	return LowerRET(Op, DAG, getTargetMachine());
				2480
				2481	// i8 math ops:
				2482	case ISD::SUB:
				2483	case ISD::ROTR:
				2484	case ISD::ROTL:
				2485	case ISD::SRL:
				2486	case ISD::SHL:
				2487	case ISD::SRA:
				2488	return LowerI8Math(Op, DAG, Op.getOpcode());
				2489
				2490	// Vector-related lowering.
				2491	case ISD::BUILD_VECTOR:
				2492	return LowerBUILD_VECTOR(Op, DAG);
				2493	case ISD::SCALAR_TO_VECTOR:
				2494	return LowerSCALAR_TO_VECTOR(Op, DAG);
				2495	case ISD::VECTOR_SHUFFLE:
				2496	return LowerVECTOR_SHUFFLE(Op, DAG);
				2497	case ISD::EXTRACT_VECTOR_ELT:
				2498	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				2499	case ISD::INSERT_VECTOR_ELT:
				2500	return LowerINSERT_VECTOR_ELT(Op, DAG);
				2501
				2502	// Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
				2503	case ISD::AND:
				2504	case ISD::OR:
				2505	case ISD::XOR:
				2506	return LowerByteImmed(Op, DAG);
				2507
				2508	// Vector and i8 multiply:
				2509	case ISD::MUL:
				2510	if (MVT::isVector(Op.getValueType()))
				2511	return LowerVectorMUL(Op, DAG);
				2512	else if (Op.getValueType() == MVT::i8)
				2513	return LowerI8Math(Op, DAG, Op.getOpcode());
				2514	else
				2515	return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
				2516
				2517	case ISD::FDIV:
				2518	if (Op.getValueType() == MVT::f32 \|\| Op.getValueType() == MVT::v4f32)
				2519	return LowerFDIVf32(Op, DAG);
				2520	// else if (Op.getValueType() == MVT::f64)
				2521	// return LowerFDIVf64(Op, DAG);
				2522	else
				2523	assert(0 && "Calling FDIV on unsupported MVT");
				2524
				2525	case ISD::CTPOP:
				2526	return LowerCTPOP(Op, DAG);
				2527	}
				2528
				2529	return SDOperand();
				2530	}
				2531
				2532	//===----------------------------------------------------------------------===//
				2533	// Other Lowering Code
				2534	//===----------------------------------------------------------------------===//
				2535
				2536	MachineBasicBlock *
				2537	SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				2538	MachineBasicBlock *BB)
				2539	{
				2540	return BB;
				2541	}
				2542
				2543	//===----------------------------------------------------------------------===//
				2544	// Target Optimization Hooks
				2545	//===----------------------------------------------------------------------===//
				2546
				2547	SDOperand
				2548	SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
				2549	{
				2550	#if 0
				2551	TargetMachine &TM = getTargetMachine();
				2552	SelectionDAG &DAG = DCI.DAG;
				2553	#endif
				2554	SDOperand N0 = N->getOperand(0); // everything has at least one operand
				2555
				2556	switch (N->getOpcode()) {
				2557	default: break;
				2558
				2559	// Look for obvious optimizations for shift left:
				2560	// a) Replace 0 << V with 0
				2561	// b) Replace V << 0 with V
				2562	//
				2563	// N.B: llvm will generate an undef node if the shift amount is greater than
				2564	// 15 (e.g.: V << 16), which will naturally trigger an assert.
				2565	case SPU::SHLIr32:
				2566	case SPU::SHLHIr16:
				2567	case SPU::SHLQBIIvec:
				2568	case SPU::ROTHIr16:
				2569	case SPU::ROTHIr16_i32:
				2570	case SPU::ROTIr32:
				2571	case SPU::ROTIr32_i16:
				2572	case SPU::ROTQBYIvec:
				2573	case SPU::ROTQBYBIvec:
				2574	case SPU::ROTQBIIvec:
				2575	case SPU::ROTHMIr16:
				2576	case SPU::ROTMIr32:
				2577	case SPU::ROTQMBYIvec: {
				2578	if (N0.getOpcode() == ISD::Constant) {
				2579	if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
				2580	if (C->getValue() == 0) // 0 << V -> 0.
				2581	return N0;
				2582	}
				2583	}
				2584	SDOperand N1 = N->getOperand(1);
				2585	if (N1.getOpcode() == ISD::Constant) {
				2586	if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
				2587	if (C->getValue() == 0) // V << 0 -> V
				2588	return N1;
				2589	}
				2590	}
				2591	break;
				2592	}
				2593	}
				2594
				2595	return SDOperand();
				2596	}
				2597
				2598	//===----------------------------------------------------------------------===//
				2599	// Inline Assembly Support
				2600	//===----------------------------------------------------------------------===//
				2601
				2602	/// getConstraintType - Given a constraint letter, return the type of
				2603	/// constraint it is for this target.
				2604	SPUTargetLowering::ConstraintType
				2605	SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
				2606	if (ConstraintLetter.size() == 1) {
				2607	switch (ConstraintLetter[0]) {
				2608	default: break;
				2609	case 'b':
				2610	case 'r':
				2611	case 'f':
				2612	case 'v':
				2613	case 'y':
				2614	return C_RegisterClass;
				2615	}
				2616	}
				2617	return TargetLowering::getConstraintType(ConstraintLetter);
				2618	}
				2619
				2620	std::pair<unsigned, const TargetRegisterClass*>
				2621	SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				2622	MVT::ValueType VT) const
				2623	{
				2624	if (Constraint.size() == 1) {
				2625	// GCC RS6000 Constraint Letters
				2626	switch (Constraint[0]) {
				2627	case 'b': // R1-R31
				2628	case 'r': // R0-R31
				2629	if (VT == MVT::i64)
				2630	return std::make_pair(0U, SPU::R64CRegisterClass);
				2631	return std::make_pair(0U, SPU::R32CRegisterClass);
				2632	case 'f':
				2633	if (VT == MVT::f32)
				2634	return std::make_pair(0U, SPU::R32FPRegisterClass);
				2635	else if (VT == MVT::f64)
				2636	return std::make_pair(0U, SPU::R64FPRegisterClass);
				2637	break;
				2638	case 'v':
				2639	return std::make_pair(0U, SPU::GPRCRegisterClass);
				2640	}
				2641	}
				2642
				2643	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				2644	}
				2645
				2646	void
				2647	SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				2648	uint64_t Mask,
				2649	uint64_t &KnownZero,
				2650	uint64_t &KnownOne,
				2651	const SelectionDAG &DAG,
				2652	unsigned Depth ) const {
				2653	KnownZero = 0;
				2654	KnownOne = 0;
				2655	}
				2656
				2657	// LowerAsmOperandForConstraint
				2658	void
				2659	SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				2660	char ConstraintLetter,
				2661	std::vector<SDOperand> &Ops,
				2662	SelectionDAG &DAG) {
				2663	// Default, for the time being, to the base class handler
				2664	TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
				2665	}
				2666
				2667	/// isLegalAddressImmediate - Return true if the integer value can be used
				2668	/// as the offset of the target addressing mode.
				2669	bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
				2670	// SPU's addresses are 256K:
				2671	return (V > -(1 << 18) && V < (1 << 18) - 1);
				2672	}
				2673
				2674	bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
				2675	return false;
				2676	}