Blame - lib/Target/X86/X86ISelLowering.cpp - fp2-dev/platform/external/llvm

blob: e52b6531faf47c47c4a179b09dc8580a9cd6082c [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the interfaces that X86 uses to lower LLVM code into a
				11	// selection DAG.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "X86.h"
				16	#include "X86InstrBuilder.h"
				17	#include "X86ISelLowering.h"
				18	#include "X86MachineFunctionInfo.h"
				19	#include "X86TargetMachine.h"
				20	#include "llvm/CallingConv.h"
				21	#include "llvm/Constants.h"
				22	#include "llvm/DerivedTypes.h"
				23	#include "llvm/GlobalVariable.h"
				24	#include "llvm/Function.h"
				25	#include "llvm/Intrinsics.h"
				26	#include "llvm/ADT/VectorExtras.h"
				27	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				28	#include "llvm/CodeGen/CallingConvLower.h"
				29	#include "llvm/CodeGen/MachineFrameInfo.h"
				30	#include "llvm/CodeGen/MachineFunction.h"
				31	#include "llvm/CodeGen/MachineInstrBuilder.h"
				32	#include "llvm/CodeGen/SelectionDAG.h"
				33	#include "llvm/CodeGen/SSARegMap.h"
				34	#include "llvm/Support/MathExtras.h"
				35	#include "llvm/Target/TargetOptions.h"
				36	#include "llvm/ADT/StringExtras.h"
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	37	#include "llvm/ParameterAttributes.h"
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	38	using namespace llvm;
				39
				40	X86TargetLowering::X86TargetLowering(TargetMachine &TM)
				41	: TargetLowering(TM) {
				42	Subtarget = &TM.getSubtarget<X86Subtarget>();
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	43	X86ScalarSSEf64 = Subtarget->hasSSE2();
				44	X86ScalarSSEf32 = Subtarget->hasSSE1();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	45	X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
				46
				47	RegInfo = TM.getRegisterInfo();
				48
				49	// Set up the TargetLowering object.
				50
				51	// X86 is weird, it always uses i8 for shift amounts and setcc results.
				52	setShiftAmountType(MVT::i8);
				53	setSetCCResultType(MVT::i8);
				54	setSetCCResultContents(ZeroOrOneSetCCResult);
				55	setSchedulingPreference(SchedulingForRegPressure);
				56	setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
				57	setStackPointerRegisterToSaveRestore(X86StackPtr);
				58
				59	if (Subtarget->isTargetDarwin()) {
				60	// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
				61	setUseUnderscoreSetJmp(false);
				62	setUseUnderscoreLongJmp(false);
				63	} else if (Subtarget->isTargetMingw()) {
				64	// MS runtime is weird: it exports _setjmp, but longjmp!
				65	setUseUnderscoreSetJmp(true);
				66	setUseUnderscoreLongJmp(false);
				67	} else {
				68	setUseUnderscoreSetJmp(true);
				69	setUseUnderscoreLongJmp(true);
				70	}
				71
				72	// Set up the register classes.
				73	addRegisterClass(MVT::i8, X86::GR8RegisterClass);
				74	addRegisterClass(MVT::i16, X86::GR16RegisterClass);
				75	addRegisterClass(MVT::i32, X86::GR32RegisterClass);
				76	if (Subtarget->is64Bit())
				77	addRegisterClass(MVT::i64, X86::GR64RegisterClass);
				78
				79	setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
				80
				81	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
				82	// operation.
				83	setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
				84	setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
				85	setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
				86
				87	if (Subtarget->is64Bit()) {
				88	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
				89	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				90	} else {
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	91	if (X86ScalarSSEf64)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	92	// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
				93	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
				94	else
				95	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
				96	}
				97
				98	// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
				99	// this operation.
				100	setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
				101	setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
				102	// SSE has no i16 to fp conversion, only i32
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	103	if (X86ScalarSSEf32) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	104	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	105	// f32 and f64 cases are Legal, f80 case is not
				106	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				107	} else {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	108	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
				109	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
				110	}
				111
Dale Johannesen	958b08b	2007-09-19 23:55:34 +0000	[diff] [blame]	112	// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
				113	// are Legal, f80 is custom lowered.
				114	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
				115	setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	116
				117	// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
				118	// this operation.
				119	setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
				120	setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
				121
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	122	if (X86ScalarSSEf32) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	123	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	124	// f32 and f64 cases are Legal, f80 case is not
				125	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	126	} else {
				127	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
				128	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
				129	}
				130
				131	// Handle FP_TO_UINT by promoting the destination to a larger signed
				132	// conversion.
				133	setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
				134	setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
				135	setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
				136
				137	if (Subtarget->is64Bit()) {
				138	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
				139	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				140	} else {
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	141	if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	142	// Expand FP_TO_UINT into a select.
				143	// FIXME: We would like to use a Custom expander here eventually to do
				144	// the optimal thing for SSE vs. the default expansion in the legalizer.
				145	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
				146	else
				147	// With SSE3 we can use fisttpll to convert to a signed i64.
				148	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
				149	}
				150
				151	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	152	if (!X86ScalarSSEf64) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	153	setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
				154	setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
				155	}
				156
				157	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
				158	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
				159	setOperationAction(ISD::BR_CC , MVT::Other, Expand);
				160	setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
				161	setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
				162	if (Subtarget->is64Bit())
Christopher Lamb	0a7c866	2007-08-10 21:48:46 +0000	[diff] [blame]	163	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
				164	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
				165	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	166	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
				167	setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
				168	setOperationAction(ISD::FREM , MVT::f64 , Expand);
				169
				170	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
				171	setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
				172	setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
				173	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
				174	setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
				175	setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
				176	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
				177	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
				178	setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
				179	if (Subtarget->is64Bit()) {
				180	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
				181	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
				182	setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
				183	}
				184
				185	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
				186	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
				187
				188	// These should be promoted to a larger select which is supported.
				189	setOperationAction(ISD::SELECT , MVT::i1 , Promote);
				190	setOperationAction(ISD::SELECT , MVT::i8 , Promote);
				191	// X86 wants to expand cmov itself.
				192	setOperationAction(ISD::SELECT , MVT::i16 , Custom);
				193	setOperationAction(ISD::SELECT , MVT::i32 , Custom);
				194	setOperationAction(ISD::SELECT , MVT::f32 , Custom);
				195	setOperationAction(ISD::SELECT , MVT::f64 , Custom);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	196	setOperationAction(ISD::SELECT , MVT::f80 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	197	setOperationAction(ISD::SETCC , MVT::i8 , Custom);
				198	setOperationAction(ISD::SETCC , MVT::i16 , Custom);
				199	setOperationAction(ISD::SETCC , MVT::i32 , Custom);
				200	setOperationAction(ISD::SETCC , MVT::f32 , Custom);
				201	setOperationAction(ISD::SETCC , MVT::f64 , Custom);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	202	setOperationAction(ISD::SETCC , MVT::f80 , Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	203	if (Subtarget->is64Bit()) {
				204	setOperationAction(ISD::SELECT , MVT::i64 , Custom);
				205	setOperationAction(ISD::SETCC , MVT::i64 , Custom);
				206	}
				207	// X86 ret instruction may pop stack.
				208	setOperationAction(ISD::RET , MVT::Other, Custom);
				209	if (!Subtarget->is64Bit())
				210	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
				211
				212	// Darwin ABI issue.
				213	setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
				214	setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
				215	setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
				216	setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
				217	setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
				218	if (Subtarget->is64Bit()) {
				219	setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
				220	setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
				221	setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
				222	setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
				223	}
				224	// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
				225	setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
				226	setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
				227	setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
				228	// X86 wants to expand memset / memcpy itself.
				229	setOperationAction(ISD::MEMSET , MVT::Other, Custom);
				230	setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
				231
				232	// We don't have line number support yet.
				233	setOperationAction(ISD::LOCATION, MVT::Other, Expand);
				234	setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
				235	// FIXME - use subtarget debug flags
				236	if (!Subtarget->isTargetDarwin() &&
				237	!Subtarget->isTargetELF() &&
				238	!Subtarget->isTargetCygMing())
				239	setOperationAction(ISD::LABEL, MVT::Other, Expand);
				240
				241	setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
				242	setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
				243	setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
				244	setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
				245	if (Subtarget->is64Bit()) {
				246	// FIXME: Verify
				247	setExceptionPointerRegister(X86::RAX);
				248	setExceptionSelectorRegister(X86::RDX);
				249	} else {
				250	setExceptionPointerRegister(X86::EAX);
				251	setExceptionSelectorRegister(X86::EDX);
				252	}
Anton Korobeynikov	23ca9c5	2007-09-03 00:36:06 +0000	[diff] [blame]	253	setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	254
Duncan Sands	7407a9f	2007-09-11 14:10:23 +0000	[diff] [blame]	255	setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	256
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	257	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
				258	setOperationAction(ISD::VASTART , MVT::Other, Custom);
				259	setOperationAction(ISD::VAARG , MVT::Other, Expand);
				260	setOperationAction(ISD::VAEND , MVT::Other, Expand);
				261	if (Subtarget->is64Bit())
				262	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
				263	else
				264	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
				265
				266	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
				267	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
				268	if (Subtarget->is64Bit())
				269	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
				270	if (Subtarget->isTargetCygMing())
				271	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
				272	else
				273	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
				274
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	275	if (X86ScalarSSEf64) {
				276	// f32 and f64 use SSE.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	277	// Set up the FP register classes.
				278	addRegisterClass(MVT::f32, X86::FR32RegisterClass);
				279	addRegisterClass(MVT::f64, X86::FR64RegisterClass);
				280
				281	// Use ANDPD to simulate FABS.
				282	setOperationAction(ISD::FABS , MVT::f64, Custom);
				283	setOperationAction(ISD::FABS , MVT::f32, Custom);
				284
				285	// Use XORP to simulate FNEG.
				286	setOperationAction(ISD::FNEG , MVT::f64, Custom);
				287	setOperationAction(ISD::FNEG , MVT::f32, Custom);
				288
				289	// Use ANDPD and ORPD to simulate FCOPYSIGN.
				290	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
				291	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
				292
				293	// We don't support sin/cos/fmod
				294	setOperationAction(ISD::FSIN , MVT::f64, Expand);
				295	setOperationAction(ISD::FCOS , MVT::f64, Expand);
				296	setOperationAction(ISD::FREM , MVT::f64, Expand);
				297	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				298	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				299	setOperationAction(ISD::FREM , MVT::f32, Expand);
				300
				301	// Expand FP immediates into loads from the stack, except for the special
				302	// cases we handle.
				303	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				304	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	305	addLegalFPImmediate(APFloat(+0.0)); // xorpd
				306	addLegalFPImmediate(APFloat(+0.0f)); // xorps
Dale Johannesen	8f83a6b	2007-08-09 01:04:01 +0000	[diff] [blame]	307
				308	// Conversions to long double (in X87) go through memory.
				309	setConvertAction(MVT::f32, MVT::f80, Expand);
				310	setConvertAction(MVT::f64, MVT::f80, Expand);
				311
				312	// Conversions from long double (in X87) go through memory.
				313	setConvertAction(MVT::f80, MVT::f32, Expand);
				314	setConvertAction(MVT::f80, MVT::f64, Expand);
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	315	} else if (X86ScalarSSEf32) {
				316	// Use SSE for f32, x87 for f64.
				317	// Set up the FP register classes.
				318	addRegisterClass(MVT::f32, X86::FR32RegisterClass);
				319	addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
				320
				321	// Use ANDPS to simulate FABS.
				322	setOperationAction(ISD::FABS , MVT::f32, Custom);
				323
				324	// Use XORP to simulate FNEG.
				325	setOperationAction(ISD::FNEG , MVT::f32, Custom);
				326
				327	setOperationAction(ISD::UNDEF, MVT::f64, Expand);
				328
				329	// Use ANDPS and ORPS to simulate FCOPYSIGN.
				330	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				331	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
				332
				333	// We don't support sin/cos/fmod
				334	setOperationAction(ISD::FSIN , MVT::f32, Expand);
				335	setOperationAction(ISD::FCOS , MVT::f32, Expand);
				336	setOperationAction(ISD::FREM , MVT::f32, Expand);
				337
				338	// Expand FP immediates into loads from the stack, except for the special
				339	// cases we handle.
				340	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				341	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
				342	addLegalFPImmediate(APFloat(+0.0f)); // xorps
				343	addLegalFPImmediate(APFloat(+0.0)); // FLD0
				344	addLegalFPImmediate(APFloat(+1.0)); // FLD1
				345	addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
				346	addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
				347
				348	// SSE->x87 conversions go through memory.
				349	setConvertAction(MVT::f32, MVT::f64, Expand);
				350	setConvertAction(MVT::f32, MVT::f80, Expand);
				351
				352	// x87->SSE truncations need to go through memory.
				353	setConvertAction(MVT::f80, MVT::f32, Expand);
				354	setConvertAction(MVT::f64, MVT::f32, Expand);
				355	// And x87->x87 truncations also.
				356	setConvertAction(MVT::f80, MVT::f64, Expand);
				357
				358	if (!UnsafeFPMath) {
				359	setOperationAction(ISD::FSIN , MVT::f64 , Expand);
				360	setOperationAction(ISD::FCOS , MVT::f64 , Expand);
				361	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	362	} else {
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	363	// f32 and f64 in x87.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	364	// Set up the FP register classes.
				365	addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
				366	addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
				367
				368	setOperationAction(ISD::UNDEF, MVT::f64, Expand);
				369	setOperationAction(ISD::UNDEF, MVT::f32, Expand);
				370	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
				371	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
Dale Johannesen	8f83a6b	2007-08-09 01:04:01 +0000	[diff] [blame]	372
				373	// Floating truncations need to go through memory.
				374	setConvertAction(MVT::f80, MVT::f32, Expand);
				375	setConvertAction(MVT::f64, MVT::f32, Expand);
				376	setConvertAction(MVT::f80, MVT::f64, Expand);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	377
				378	if (!UnsafeFPMath) {
				379	setOperationAction(ISD::FSIN , MVT::f64 , Expand);
				380	setOperationAction(ISD::FCOS , MVT::f64 , Expand);
				381	}
				382
				383	setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
				384	setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
Dale Johannesen	bbe2b70	2007-08-30 00:23:21 +0000	[diff] [blame]	385	addLegalFPImmediate(APFloat(+0.0)); // FLD0
				386	addLegalFPImmediate(APFloat(+1.0)); // FLD1
				387	addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
				388	addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	389	addLegalFPImmediate(APFloat(+0.0f)); // FLD0
				390	addLegalFPImmediate(APFloat(+1.0f)); // FLD1
				391	addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
				392	addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	393	}
				394
Dale Johannesen	4ab00bd	2007-08-05 18:49:15 +0000	[diff] [blame]	395	// Long double always uses X87.
				396	addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	397	setOperationAction(ISD::UNDEF, MVT::f80, Expand);
				398	setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
				399	setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
Dale Johannesen	4ab00bd	2007-08-05 18:49:15 +0000	[diff] [blame]	400
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	401	// First set operation action for all vector types to expand. Then we
				402	// will selectively turn on ones that can be effectively codegen'd.
				403	for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
				404	VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
				405	setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
				406	setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
				407	setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
				408	setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
				409	setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
				410	setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
				411	setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
				412	setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
				413	setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
				414	setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
				415	setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
				416	setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
				417	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
				418	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
				419	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				420	setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
				421	setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
				422	setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
				423	setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
				424	setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
				425	setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
				426	setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
				427	setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
				428	}
				429
				430	if (Subtarget->hasMMX()) {
				431	addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
				432	addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
				433	addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
				434	addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
				435
				436	// FIXME: add MMX packed arithmetics
				437
				438	setOperationAction(ISD::ADD, MVT::v8i8, Legal);
				439	setOperationAction(ISD::ADD, MVT::v4i16, Legal);
				440	setOperationAction(ISD::ADD, MVT::v2i32, Legal);
				441	setOperationAction(ISD::ADD, MVT::v1i64, Legal);
				442
				443	setOperationAction(ISD::SUB, MVT::v8i8, Legal);
				444	setOperationAction(ISD::SUB, MVT::v4i16, Legal);
				445	setOperationAction(ISD::SUB, MVT::v2i32, Legal);
				446
				447	setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
				448	setOperationAction(ISD::MUL, MVT::v4i16, Legal);
				449
				450	setOperationAction(ISD::AND, MVT::v8i8, Promote);
				451	AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
				452	setOperationAction(ISD::AND, MVT::v4i16, Promote);
				453	AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
				454	setOperationAction(ISD::AND, MVT::v2i32, Promote);
				455	AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
				456	setOperationAction(ISD::AND, MVT::v1i64, Legal);
				457
				458	setOperationAction(ISD::OR, MVT::v8i8, Promote);
				459	AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
				460	setOperationAction(ISD::OR, MVT::v4i16, Promote);
				461	AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
				462	setOperationAction(ISD::OR, MVT::v2i32, Promote);
				463	AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
				464	setOperationAction(ISD::OR, MVT::v1i64, Legal);
				465
				466	setOperationAction(ISD::XOR, MVT::v8i8, Promote);
				467	AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
				468	setOperationAction(ISD::XOR, MVT::v4i16, Promote);
				469	AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
				470	setOperationAction(ISD::XOR, MVT::v2i32, Promote);
				471	AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
				472	setOperationAction(ISD::XOR, MVT::v1i64, Legal);
				473
				474	setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
				475	AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
				476	setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
				477	AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
				478	setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
				479	AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
				480	setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
				481
				482	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
				483	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
				484	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
				485	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
				486
				487	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
				488	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
				489	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
				490	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
				491
				492	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
				493	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
				494	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
				495	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
				496	}
				497
				498	if (Subtarget->hasSSE1()) {
				499	addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
				500
				501	setOperationAction(ISD::FADD, MVT::v4f32, Legal);
				502	setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
				503	setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
				504	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
				505	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
				506	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	507	setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
				508	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
				509	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
				510	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
				511	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
				512	}
				513
				514	if (Subtarget->hasSSE2()) {
				515	addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
				516	addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
				517	addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
				518	addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
				519	addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
				520
				521	setOperationAction(ISD::ADD, MVT::v16i8, Legal);
				522	setOperationAction(ISD::ADD, MVT::v8i16, Legal);
				523	setOperationAction(ISD::ADD, MVT::v4i32, Legal);
				524	setOperationAction(ISD::ADD, MVT::v2i64, Legal);
				525	setOperationAction(ISD::SUB, MVT::v16i8, Legal);
				526	setOperationAction(ISD::SUB, MVT::v8i16, Legal);
				527	setOperationAction(ISD::SUB, MVT::v4i32, Legal);
				528	setOperationAction(ISD::SUB, MVT::v2i64, Legal);
				529	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
				530	setOperationAction(ISD::FADD, MVT::v2f64, Legal);
				531	setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
				532	setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
				533	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
				534	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
				535	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	536
				537	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
				538	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
				539	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
				540	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
				541	// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
				542	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
				543
				544	// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
				545	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				546	setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
				547	setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
				548	setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
				549	}
				550	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
				551	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
				552	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
				553	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
				554	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
				555	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
				556
				557	// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
				558	for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
				559	setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
				560	AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
				561	setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
				562	AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
				563	setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
				564	AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
				565	setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
				566	AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
				567	setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
				568	AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
				569	}
				570
				571	// Custom lower v2i64 and v2f64 selects.
				572	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
				573	setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
				574	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
				575	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
				576	}
				577
				578	// We want to custom lower some of our intrinsics.
				579	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				580
				581	// We have target-specific dag combine patterns for the following nodes:
				582	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
				583	setTargetDAGCombine(ISD::SELECT);
				584
				585	computeRegisterProperties();
				586
				587	// FIXME: These should be based on subtarget info. Plus, the values should
				588	// be smaller when we are in optimizing for size mode.
				589	maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
				590	maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
				591	maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
				592	allowUnalignedMemoryAccesses = true; // x86 supports it!
				593	}
				594
				595
				596	//===----------------------------------------------------------------------===//
				597	// Return Value Calling Convention Implementation
				598	//===----------------------------------------------------------------------===//
				599
				600	#include "X86GenCallingConv.inc"
				601
				602	/// LowerRET - Lower an ISD::RET node.
				603	SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
				604	assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
				605
				606	SmallVector<CCValAssign, 16> RVLocs;
				607	unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
				608	bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
				609	CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
				610	CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
				611
				612
				613	// If this is the first return lowered for this function, add the regs to the
				614	// liveout set for the function.
				615	if (DAG.getMachineFunction().liveout_empty()) {
				616	for (unsigned i = 0; i != RVLocs.size(); ++i)
				617	if (RVLocs[i].isRegLoc())
				618	DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
				619	}
				620
				621	SDOperand Chain = Op.getOperand(0);
				622	SDOperand Flag;
				623
				624	// Copy the result values into the output registers.
				625	if (RVLocs.size() != 1 \|\| !RVLocs[0].isRegLoc() \|\|
				626	RVLocs[0].getLocReg() != X86::ST0) {
				627	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				628	CCValAssign &VA = RVLocs[i];
				629	assert(VA.isRegLoc() && "Can only return in registers!");
				630	Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
				631	Flag);
				632	Flag = Chain.getValue(1);
				633	}
				634	} else {
				635	// We need to handle a destination of ST0 specially, because it isn't really
				636	// a register.
				637	SDOperand Value = Op.getOperand(1);
				638
				639	// If this is an FP return with ScalarSSE, we need to move the value from
				640	// an XMM register onto the fp-stack.
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	641	if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) \|\|
				642	(X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	643	SDOperand MemLoc;
				644
				645	// If this is a load into a scalarsse value, don't store the loaded value
				646	// back to the stack, only to reload it: just replace the scalar-sse load.
				647	if (ISD::isNON_EXTLoad(Value.Val) &&
				648	(Chain == Value.getValue(1) \|\| Chain == Value.getOperand(0))) {
				649	Chain = Value.getOperand(0);
				650	MemLoc = Value.getOperand(1);
				651	} else {
				652	// Spill the value to memory and reload it into top of stack.
				653	unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
				654	MachineFunction &MF = DAG.getMachineFunction();
				655	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				656	MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
				657	Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
				658	}
				659	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
				660	SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
				661	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				662	Chain = Value.getValue(1);
				663	}
				664
				665	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				666	SDOperand Ops[] = { Chain, Value };
				667	Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
				668	Flag = Chain.getValue(1);
				669	}
				670
				671	SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
				672	if (Flag.Val)
				673	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
				674	else
				675	return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
				676	}
				677
				678
				679	/// LowerCallResult - Lower the result values of an ISD::CALL into the
				680	/// appropriate copies out of appropriate physical registers. This assumes that
				681	/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
				682	/// being lowered. The returns a SDNode with the same number of values as the
				683	/// ISD::CALL.
				684	SDNode *X86TargetLowering::
				685	LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
				686	unsigned CallingConv, SelectionDAG &DAG) {
				687
				688	// Assign locations to each value returned by this call.
				689	SmallVector<CCValAssign, 16> RVLocs;
				690	bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
				691	CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
				692	CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
				693
				694
				695	SmallVector<SDOperand, 8> ResultVals;
				696
				697	// Copy all of the result registers out of their specified physreg.
				698	if (RVLocs.size() != 1 \|\| RVLocs[0].getLocReg() != X86::ST0) {
				699	for (unsigned i = 0; i != RVLocs.size(); ++i) {
				700	Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
				701	RVLocs[i].getValVT(), InFlag).getValue(1);
				702	InFlag = Chain.getValue(2);
				703	ResultVals.push_back(Chain.getValue(0));
				704	}
				705	} else {
				706	// Copies from the FP stack are special, as ST0 isn't a valid register
				707	// before the fp stackifier runs.
				708
				709	// Copy ST0 into an RFP register with FP_GET_RESULT.
				710	SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
				711	SDOperand GROps[] = { Chain, InFlag };
				712	SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
				713	Chain = RetVal.getValue(1);
				714	InFlag = RetVal.getValue(2);
				715
				716	// If we are using ScalarSSE, store ST(0) to the stack and reload it into
				717	// an XMM register.
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	718	if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) \|\|
				719	(X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	720	// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
				721	// shouldn't be necessary except that RFP cannot be live across
				722	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				723	MachineFunction &MF = DAG.getMachineFunction();
				724	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				725	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				726	SDOperand Ops[] = {
				727	Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
				728	};
				729	Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
				730	RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
				731	Chain = RetVal.getValue(1);
				732	}
				733	ResultVals.push_back(RetVal);
				734	}
				735
				736	// Merge everything together with a MERGE_VALUES node.
				737	ResultVals.push_back(Chain);
				738	return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
				739	&ResultVals[0], ResultVals.size()).Val;
				740	}
				741
				742
				743	//===----------------------------------------------------------------------===//
				744	// C & StdCall Calling Convention implementation
				745	//===----------------------------------------------------------------------===//
				746	// StdCall calling convention seems to be standard for many Windows' API
				747	// routines and around. It differs from C calling convention just a little:
				748	// callee should clean up the stack, not caller. Symbols should be also
				749	// decorated in some fancy way :) It doesn't support any vector arguments.
				750
				751	/// AddLiveIn - This helper function adds the specified physical register to the
				752	/// MachineFunction as a live in value. It also creates a corresponding virtual
				753	/// register for it.
				754	static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
				755	const TargetRegisterClass *RC) {
				756	assert(RC->contains(PReg) && "Not the correct regclass!");
				757	unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
				758	MF.addLiveIn(PReg, VReg);
				759	return VReg;
				760	}
				761
Rafael Espindola	03cbeb7	2007-09-14 15:48:13 +0000	[diff] [blame]	762	SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
				763	const CCValAssign &VA,
				764	MachineFrameInfo *MFI,
				765	SDOperand Root, unsigned i) {
				766	// Create the nodes corresponding to a load from this parameter slot.
				767	int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
				768	VA.getLocMemOffset());
				769	SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
				770
				771	unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
				772
				773	if (Flags & ISD::ParamFlags::ByVal)
				774	return FIN;
				775	else
				776	return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
				777	}
				778
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	779	SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
				780	bool isStdCall) {
				781	unsigned NumArgs = Op.Val->getNumValues() - 1;
				782	MachineFunction &MF = DAG.getMachineFunction();
				783	MachineFrameInfo *MFI = MF.getFrameInfo();
				784	SDOperand Root = Op.getOperand(0);
				785	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				786
				787	// Assign locations to all of the incoming arguments.
				788	SmallVector<CCValAssign, 16> ArgLocs;
				789	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				790	getTargetMachine(), ArgLocs);
				791	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
				792
				793	SmallVector<SDOperand, 8> ArgValues;
				794	unsigned LastVal = ~0U;
				795	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				796	CCValAssign &VA = ArgLocs[i];
				797	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				798	// places.
				799	assert(VA.getValNo() != LastVal &&
				800	"Don't support value assigned to multiple locs yet");
				801	LastVal = VA.getValNo();
				802
				803	if (VA.isRegLoc()) {
				804	MVT::ValueType RegVT = VA.getLocVT();
				805	TargetRegisterClass *RC;
				806	if (RegVT == MVT::i32)
				807	RC = X86::GR32RegisterClass;
				808	else {
				809	assert(MVT::isVector(RegVT));
				810	RC = X86::VR128RegisterClass;
				811	}
				812
				813	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				814	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				815
				816	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				817	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				818	// right size.
				819	if (VA.getLocInfo() == CCValAssign::SExt)
				820	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				821	DAG.getValueType(VA.getValVT()));
				822	else if (VA.getLocInfo() == CCValAssign::ZExt)
				823	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				824	DAG.getValueType(VA.getValVT()));
				825
				826	if (VA.getLocInfo() != CCValAssign::Full)
				827	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				828
				829	ArgValues.push_back(ArgValue);
				830	} else {
				831	assert(VA.isMemLoc());
Rafael Espindola	03cbeb7	2007-09-14 15:48:13 +0000	[diff] [blame]	832	ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	833	}
				834	}
				835
				836	unsigned StackSize = CCInfo.getNextStackOffset();
				837
				838	ArgValues.push_back(Root);
				839
				840	// If the function takes variable number of arguments, make a frame index for
				841	// the start of the first vararg value... for expansion of llvm.va_start.
				842	if (isVarArg)
				843	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				844
				845	if (isStdCall && !isVarArg) {
				846	BytesToPopOnReturn = StackSize; // Callee pops everything..
				847	BytesCallerReserves = 0;
				848	} else {
				849	BytesToPopOnReturn = 0; // Callee pops nothing.
				850
				851	// If this is an sret function, the return should pop the hidden pointer.
				852	if (NumArgs &&
				853	(cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
				854	ISD::ParamFlags::StructReturn))
				855	BytesToPopOnReturn = 4;
				856
				857	BytesCallerReserves = StackSize;
				858	}
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	859
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	860	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	861
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	862	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				863	FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	864
				865	// Return the new list of results.
				866	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				867	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				868	}
				869
				870	SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
				871	unsigned CC) {
				872	SDOperand Chain = Op.getOperand(0);
				873	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				874	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				875	SDOperand Callee = Op.getOperand(4);
				876	unsigned NumOps = (Op.getNumOperands() - 5) / 2;
				877
				878	// Analyze operands of the call, assigning locations to each operand.
				879	SmallVector<CCValAssign, 16> ArgLocs;
				880	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				881	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
				882
				883	// Get a count of how many bytes are to be pushed on the stack.
				884	unsigned NumBytes = CCInfo.getNextStackOffset();
				885
				886	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				887
				888	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				889	SmallVector<SDOperand, 8> MemOpChains;
				890
				891	SDOperand StackPtr;
				892
				893	// Walk the register/memloc assignments, inserting copies/loads.
				894	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				895	CCValAssign &VA = ArgLocs[i];
				896	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				897
				898	// Promote the value if needed.
				899	switch (VA.getLocInfo()) {
				900	default: assert(0 && "Unknown loc info!");
				901	case CCValAssign::Full: break;
				902	case CCValAssign::SExt:
				903	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				904	break;
				905	case CCValAssign::ZExt:
				906	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				907	break;
				908	case CCValAssign::AExt:
				909	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				910	break;
				911	}
				912
				913	if (VA.isRegLoc()) {
				914	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				915	} else {
				916	assert(VA.isMemLoc());
				917	if (StackPtr.Val == 0)
				918	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
Rafael Espindola	007b714	2007-09-21 15:50:22 +0000	[diff] [blame]	919
				920	MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
				921	Arg));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	922	}
				923	}
				924
				925	// If the first argument is an sret pointer, remember it.
				926	bool isSRet = NumOps &&
				927	(cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
				928	ISD::ParamFlags::StructReturn);
				929
				930	if (!MemOpChains.empty())
				931	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				932	&MemOpChains[0], MemOpChains.size());
				933
				934	// Build a sequence of copy-to-reg nodes chained together with token chain
				935	// and flag operands which copy the outgoing args into registers.
				936	SDOperand InFlag;
				937	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				938	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				939	InFlag);
				940	InFlag = Chain.getValue(1);
				941	}
				942
				943	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				944	// GOT pointer.
				945	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				946	Subtarget->isPICStyleGOT()) {
				947	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				948	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				949	InFlag);
				950	InFlag = Chain.getValue(1);
				951	}
				952
				953	// If the callee is a GlobalAddress node (quite common, every direct call is)
				954	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				955	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				956	// We should use extra load for direct calls to dllimported functions in
				957	// non-JIT mode.
				958	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				959	getTargetMachine(), true))
				960	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				961	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				962	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				963
				964	// Returns a chain & a flag for retval copy to use.
				965	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				966	SmallVector<SDOperand, 8> Ops;
				967	Ops.push_back(Chain);
				968	Ops.push_back(Callee);
				969
				970	// Add argument registers to the end of the list so that they are known live
				971	// into the call.
				972	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				973	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				974	RegsToPass[i].second.getValueType()));
				975
				976	// Add an implicit use GOT pointer in EBX.
				977	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				978	Subtarget->isPICStyleGOT())
				979	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				980
				981	if (InFlag.Val)
				982	Ops.push_back(InFlag);
				983
				984	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				985	NodeTys, &Ops[0], Ops.size());
				986	InFlag = Chain.getValue(1);
				987
				988	// Create the CALLSEQ_END node.
				989	unsigned NumBytesForCalleeToPush = 0;
				990
				991	if (CC == CallingConv::X86_StdCall) {
				992	if (isVarArg)
				993	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				994	else
				995	NumBytesForCalleeToPush = NumBytes;
				996	} else {
				997	// If this is is a call to a struct-return function, the callee
				998	// pops the hidden struct pointer, so we have to push it back.
				999	// This is common for Darwin/X86, Linux & Mingw32 targets.
				1000	NumBytesForCalleeToPush = isSRet ? 4 : 0;
				1001	}
				1002
				1003	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1004	Ops.clear();
				1005	Ops.push_back(Chain);
				1006	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1007	Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
				1008	Ops.push_back(InFlag);
				1009	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1010	InFlag = Chain.getValue(1);
				1011
				1012	// Handle result values, copying them out of physregs into vregs that we
				1013	// return.
				1014	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1015	}
				1016
				1017
				1018	//===----------------------------------------------------------------------===//
				1019	// FastCall Calling Convention implementation
				1020	//===----------------------------------------------------------------------===//
				1021	//
				1022	// The X86 'fastcall' calling convention passes up to two integer arguments in
				1023	// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
				1024	// and requires that the callee pop its arguments off the stack (allowing proper
				1025	// tail calls), and has the same return value conventions as C calling convs.
				1026	//
				1027	// This calling convention always arranges for the callee pop value to be 8n+4
				1028	// bytes, which is needed for tail recursion elimination and stack alignment
				1029	// reasons.
				1030	SDOperand
				1031	X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
				1032	MachineFunction &MF = DAG.getMachineFunction();
				1033	MachineFrameInfo *MFI = MF.getFrameInfo();
				1034	SDOperand Root = Op.getOperand(0);
				1035	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1036
				1037	// Assign locations to all of the incoming arguments.
				1038	SmallVector<CCValAssign, 16> ArgLocs;
				1039	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				1040	getTargetMachine(), ArgLocs);
				1041	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
				1042
				1043	SmallVector<SDOperand, 8> ArgValues;
				1044	unsigned LastVal = ~0U;
				1045	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1046	CCValAssign &VA = ArgLocs[i];
				1047	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				1048	// places.
				1049	assert(VA.getValNo() != LastVal &&
				1050	"Don't support value assigned to multiple locs yet");
				1051	LastVal = VA.getValNo();
				1052
				1053	if (VA.isRegLoc()) {
				1054	MVT::ValueType RegVT = VA.getLocVT();
				1055	TargetRegisterClass *RC;
				1056	if (RegVT == MVT::i32)
				1057	RC = X86::GR32RegisterClass;
				1058	else {
				1059	assert(MVT::isVector(RegVT));
				1060	RC = X86::VR128RegisterClass;
				1061	}
				1062
				1063	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1064	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1065
				1066	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1067	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1068	// right size.
				1069	if (VA.getLocInfo() == CCValAssign::SExt)
				1070	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1071	DAG.getValueType(VA.getValVT()));
				1072	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1073	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1074	DAG.getValueType(VA.getValVT()));
				1075
				1076	if (VA.getLocInfo() != CCValAssign::Full)
				1077	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1078
				1079	ArgValues.push_back(ArgValue);
				1080	} else {
				1081	assert(VA.isMemLoc());
Rafael Espindola	b53ef12	2007-09-21 14:55:38 +0000	[diff] [blame]	1082	ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1083	}
				1084	}
				1085
				1086	ArgValues.push_back(Root);
				1087
				1088	unsigned StackSize = CCInfo.getNextStackOffset();
				1089
				1090	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1091	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1092	// arguments and the arguments after the retaddr has been pushed are aligned.
				1093	if ((StackSize & 7) == 0)
				1094	StackSize += 4;
				1095	}
				1096
				1097	VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
				1098	RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1099	BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
				1100	BytesCallerReserves = 0;
				1101
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1102	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				1103	FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1104
				1105	// Return the new list of results.
				1106	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1107	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1108	}
				1109
Rafael Espindola	ddb88da	2007-08-31 15:06:30 +0000	[diff] [blame]	1110	SDOperand
				1111	X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
				1112	const SDOperand &StackPtr,
				1113	const CCValAssign &VA,
				1114	SDOperand Chain,
				1115	SDOperand Arg) {
				1116	SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
				1117	PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
				1118	SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
				1119	unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
				1120	if (Flags & ISD::ParamFlags::ByVal) {
				1121	unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
				1122	ISD::ParamFlags::ByValAlignOffs);
				1123
Rafael Espindola	ddb88da	2007-08-31 15:06:30 +0000	[diff] [blame]	1124	unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
				1125	ISD::ParamFlags::ByValSizeOffs;
				1126
				1127	SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
				1128	SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
				1129
				1130	return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
				1131	AlignNode);
				1132	} else {
				1133	return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
				1134	}
				1135	}
				1136
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1137	SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1138	unsigned CC) {
				1139	SDOperand Chain = Op.getOperand(0);
				1140	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1141	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1142	SDOperand Callee = Op.getOperand(4);
				1143
				1144	// Analyze operands of the call, assigning locations to each operand.
				1145	SmallVector<CCValAssign, 16> ArgLocs;
				1146	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1147	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
				1148
				1149	// Get a count of how many bytes are to be pushed on the stack.
				1150	unsigned NumBytes = CCInfo.getNextStackOffset();
				1151
				1152	if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
				1153	// Make sure the instruction takes 8n+4 bytes to make sure the start of the
				1154	// arguments and the arguments after the retaddr has been pushed are aligned.
				1155	if ((NumBytes & 7) == 0)
				1156	NumBytes += 4;
				1157	}
				1158
				1159	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1160
				1161	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1162	SmallVector<SDOperand, 8> MemOpChains;
				1163
				1164	SDOperand StackPtr;
				1165
				1166	// Walk the register/memloc assignments, inserting copies/loads.
				1167	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1168	CCValAssign &VA = ArgLocs[i];
				1169	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1170
				1171	// Promote the value if needed.
				1172	switch (VA.getLocInfo()) {
				1173	default: assert(0 && "Unknown loc info!");
				1174	case CCValAssign::Full: break;
				1175	case CCValAssign::SExt:
				1176	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1177	break;
				1178	case CCValAssign::ZExt:
				1179	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1180	break;
				1181	case CCValAssign::AExt:
				1182	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1183	break;
				1184	}
				1185
				1186	if (VA.isRegLoc()) {
				1187	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1188	} else {
				1189	assert(VA.isMemLoc());
				1190	if (StackPtr.Val == 0)
				1191	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
Rafael Espindola	007b714	2007-09-21 15:50:22 +0000	[diff] [blame]	1192
				1193	MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
				1194	Arg));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1195	}
				1196	}
				1197
				1198	if (!MemOpChains.empty())
				1199	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1200	&MemOpChains[0], MemOpChains.size());
				1201
				1202	// Build a sequence of copy-to-reg nodes chained together with token chain
				1203	// and flag operands which copy the outgoing args into registers.
				1204	SDOperand InFlag;
				1205	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1206	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1207	InFlag);
				1208	InFlag = Chain.getValue(1);
				1209	}
				1210
				1211	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1212	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1213	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1214	// We should use extra load for direct calls to dllimported functions in
				1215	// non-JIT mode.
				1216	if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1217	getTargetMachine(), true))
				1218	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1219	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1220	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1221
				1222	// ELF / PIC requires GOT in the EBX register before function calls via PLT
				1223	// GOT pointer.
				1224	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1225	Subtarget->isPICStyleGOT()) {
				1226	Chain = DAG.getCopyToReg(Chain, X86::EBX,
				1227	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				1228	InFlag);
				1229	InFlag = Chain.getValue(1);
				1230	}
				1231
				1232	// Returns a chain & a flag for retval copy to use.
				1233	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1234	SmallVector<SDOperand, 8> Ops;
				1235	Ops.push_back(Chain);
				1236	Ops.push_back(Callee);
				1237
				1238	// Add argument registers to the end of the list so that they are known live
				1239	// into the call.
				1240	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1241	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1242	RegsToPass[i].second.getValueType()));
				1243
				1244	// Add an implicit use GOT pointer in EBX.
				1245	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				1246	Subtarget->isPICStyleGOT())
				1247	Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
				1248
				1249	if (InFlag.Val)
				1250	Ops.push_back(InFlag);
				1251
				1252	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1253	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1254	NodeTys, &Ops[0], Ops.size());
				1255	InFlag = Chain.getValue(1);
				1256
				1257	// Returns a flag for retval copy to use.
				1258	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1259	Ops.clear();
				1260	Ops.push_back(Chain);
				1261	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1262	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1263	Ops.push_back(InFlag);
				1264	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1265	InFlag = Chain.getValue(1);
				1266
				1267	// Handle result values, copying them out of physregs into vregs that we
				1268	// return.
				1269	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1270	}
				1271
				1272
				1273	//===----------------------------------------------------------------------===//
				1274	// X86-64 C Calling Convention implementation
				1275	//===----------------------------------------------------------------------===//
				1276
				1277	SDOperand
				1278	X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
				1279	MachineFunction &MF = DAG.getMachineFunction();
				1280	MachineFrameInfo *MFI = MF.getFrameInfo();
				1281	SDOperand Root = Op.getOperand(0);
				1282	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1283
				1284	static const unsigned GPR64ArgRegs[] = {
				1285	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
				1286	};
				1287	static const unsigned XMMArgRegs[] = {
				1288	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1289	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1290	};
				1291
				1292
				1293	// Assign locations to all of the incoming arguments.
				1294	SmallVector<CCValAssign, 16> ArgLocs;
				1295	CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
				1296	getTargetMachine(), ArgLocs);
				1297	CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
				1298
				1299	SmallVector<SDOperand, 8> ArgValues;
				1300	unsigned LastVal = ~0U;
				1301	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1302	CCValAssign &VA = ArgLocs[i];
				1303	// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
				1304	// places.
				1305	assert(VA.getValNo() != LastVal &&
				1306	"Don't support value assigned to multiple locs yet");
				1307	LastVal = VA.getValNo();
				1308
				1309	if (VA.isRegLoc()) {
				1310	MVT::ValueType RegVT = VA.getLocVT();
				1311	TargetRegisterClass *RC;
				1312	if (RegVT == MVT::i32)
				1313	RC = X86::GR32RegisterClass;
				1314	else if (RegVT == MVT::i64)
				1315	RC = X86::GR64RegisterClass;
				1316	else if (RegVT == MVT::f32)
				1317	RC = X86::FR32RegisterClass;
				1318	else if (RegVT == MVT::f64)
				1319	RC = X86::FR64RegisterClass;
				1320	else {
				1321	assert(MVT::isVector(RegVT));
				1322	if (MVT::getSizeInBits(RegVT) == 64) {
				1323	RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
				1324	RegVT = MVT::i64;
				1325	} else
				1326	RC = X86::VR128RegisterClass;
				1327	}
				1328
				1329	unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
				1330	SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
				1331
				1332	// If this is an 8 or 16-bit value, it is really passed promoted to 32
				1333	// bits. Insert an assert[sz]ext to capture this, then truncate to the
				1334	// right size.
				1335	if (VA.getLocInfo() == CCValAssign::SExt)
				1336	ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
				1337	DAG.getValueType(VA.getValVT()));
				1338	else if (VA.getLocInfo() == CCValAssign::ZExt)
				1339	ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
				1340	DAG.getValueType(VA.getValVT()));
				1341
				1342	if (VA.getLocInfo() != CCValAssign::Full)
				1343	ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
				1344
				1345	// Handle MMX values passed in GPRs.
				1346	if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
				1347	MVT::getSizeInBits(RegVT) == 64)
				1348	ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
				1349
				1350	ArgValues.push_back(ArgValue);
				1351	} else {
				1352	assert(VA.isMemLoc());
Rafael Espindola	03cbeb7	2007-09-14 15:48:13 +0000	[diff] [blame]	1353	ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1354	}
				1355	}
				1356
				1357	unsigned StackSize = CCInfo.getNextStackOffset();
				1358
				1359	// If the function takes variable number of arguments, make a frame index for
				1360	// the start of the first vararg value... for expansion of llvm.va_start.
				1361	if (isVarArg) {
				1362	unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
				1363	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1364
				1365	// For X86-64, if there are vararg parameters that are passed via
				1366	// registers, then we must store them to their spots on the stack so they
				1367	// may be loaded by deferencing the result of va_next.
				1368	VarArgsGPOffset = NumIntRegs * 8;
				1369	VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
				1370	VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
				1371	RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
				1372
				1373	// Store the integer parameter registers.
				1374	SmallVector<SDOperand, 8> MemOps;
				1375	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				1376	SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1377	DAG.getConstant(VarArgsGPOffset, getPointerTy()));
				1378	for (; NumIntRegs != 6; ++NumIntRegs) {
				1379	unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
				1380	X86::GR64RegisterClass);
				1381	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
				1382	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1383	MemOps.push_back(Store);
				1384	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1385	DAG.getConstant(8, getPointerTy()));
				1386	}
				1387
				1388	// Now store the XMM (fp + vector) parameter registers.
				1389	FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
				1390	DAG.getConstant(VarArgsFPOffset, getPointerTy()));
				1391	for (; NumXMMRegs != 8; ++NumXMMRegs) {
				1392	unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
				1393	X86::VR128RegisterClass);
				1394	SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
				1395	SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
				1396	MemOps.push_back(Store);
				1397	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				1398	DAG.getConstant(16, getPointerTy()));
				1399	}
				1400	if (!MemOps.empty())
				1401	Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1402	&MemOps[0], MemOps.size());
				1403	}
				1404
				1405	ArgValues.push_back(Root);
				1406
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1407	BytesToPopOnReturn = 0; // Callee pops nothing.
				1408	BytesCallerReserves = StackSize;
				1409
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1410	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				1411	FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
				1412
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1413	// Return the new list of results.
				1414	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
				1415	&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
				1416	}
				1417
				1418	SDOperand
				1419	X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
				1420	unsigned CC) {
				1421	SDOperand Chain = Op.getOperand(0);
				1422	bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
				1423	bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
				1424	SDOperand Callee = Op.getOperand(4);
				1425
				1426	// Analyze operands of the call, assigning locations to each operand.
				1427	SmallVector<CCValAssign, 16> ArgLocs;
				1428	CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
				1429	CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
				1430
				1431	// Get a count of how many bytes are to be pushed on the stack.
				1432	unsigned NumBytes = CCInfo.getNextStackOffset();
				1433	Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
				1434
				1435	SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
				1436	SmallVector<SDOperand, 8> MemOpChains;
				1437
				1438	SDOperand StackPtr;
				1439
				1440	// Walk the register/memloc assignments, inserting copies/loads.
				1441	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
				1442	CCValAssign &VA = ArgLocs[i];
				1443	SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
				1444
				1445	// Promote the value if needed.
				1446	switch (VA.getLocInfo()) {
				1447	default: assert(0 && "Unknown loc info!");
				1448	case CCValAssign::Full: break;
				1449	case CCValAssign::SExt:
				1450	Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
				1451	break;
				1452	case CCValAssign::ZExt:
				1453	Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
				1454	break;
				1455	case CCValAssign::AExt:
				1456	Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
				1457	break;
				1458	}
				1459
				1460	if (VA.isRegLoc()) {
				1461	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
				1462	} else {
				1463	assert(VA.isMemLoc());
				1464	if (StackPtr.Val == 0)
				1465	StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
Rafael Espindola	b8bcfcd	2007-08-20 15:18:24 +0000	[diff] [blame]	1466
Rafael Espindola	ddb88da	2007-08-31 15:06:30 +0000	[diff] [blame]	1467	MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
				1468	Arg));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1469	}
				1470	}
				1471
				1472	if (!MemOpChains.empty())
				1473	Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
				1474	&MemOpChains[0], MemOpChains.size());
				1475
				1476	// Build a sequence of copy-to-reg nodes chained together with token chain
				1477	// and flag operands which copy the outgoing args into registers.
				1478	SDOperand InFlag;
				1479	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
				1480	Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
				1481	InFlag);
				1482	InFlag = Chain.getValue(1);
				1483	}
				1484
				1485	if (isVarArg) {
				1486	// From AMD64 ABI document:
				1487	// For calls that may call functions that use varargs or stdargs
				1488	// (prototype-less calls or calls to functions containing ellipsis (...) in
				1489	// the declaration) %al is used as hidden argument to specify the number
				1490	// of SSE registers used. The contents of %al do not need to match exactly
				1491	// the number of registers, but must be an ubound on the number of SSE
				1492	// registers used and is in the range 0 - 8 inclusive.
				1493
				1494	// Count the number of XMM registers allocated.
				1495	static const unsigned XMMArgRegs[] = {
				1496	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
				1497	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
				1498	};
				1499	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
				1500
				1501	Chain = DAG.getCopyToReg(Chain, X86::AL,
				1502	DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
				1503	InFlag = Chain.getValue(1);
				1504	}
				1505
				1506	// If the callee is a GlobalAddress node (quite common, every direct call is)
				1507	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
				1508	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				1509	// We should use extra load for direct calls to dllimported functions in
				1510	// non-JIT mode.
				1511	if (getTargetMachine().getCodeModel() != CodeModel::Large
				1512	&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
				1513	getTargetMachine(), true))
				1514	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
				1515	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
				1516	if (getTargetMachine().getCodeModel() != CodeModel::Large)
				1517	Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
				1518
				1519	// Returns a chain & a flag for retval copy to use.
				1520	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1521	SmallVector<SDOperand, 8> Ops;
				1522	Ops.push_back(Chain);
				1523	Ops.push_back(Callee);
				1524
				1525	// Add argument registers to the end of the list so that they are known live
				1526	// into the call.
				1527	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
				1528	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
				1529	RegsToPass[i].second.getValueType()));
				1530
				1531	if (InFlag.Val)
				1532	Ops.push_back(InFlag);
				1533
				1534	// FIXME: Do not generate X86ISD::TAILCALL for now.
				1535	Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
				1536	NodeTys, &Ops[0], Ops.size());
				1537	InFlag = Chain.getValue(1);
				1538
				1539	// Returns a flag for retval copy to use.
				1540	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				1541	Ops.clear();
				1542	Ops.push_back(Chain);
				1543	Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
				1544	Ops.push_back(DAG.getConstant(0, getPointerTy()));
				1545	Ops.push_back(InFlag);
				1546	Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
				1547	InFlag = Chain.getValue(1);
				1548
				1549	// Handle result values, copying them out of physregs into vregs that we
				1550	// return.
				1551	return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
				1552	}
				1553
				1554
				1555	//===----------------------------------------------------------------------===//
				1556	// Other Lowering Hooks
				1557	//===----------------------------------------------------------------------===//
				1558
				1559
				1560	SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1561	MachineFunction &MF = DAG.getMachineFunction();
				1562	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
				1563	int ReturnAddrIndex = FuncInfo->getRAIndex();
				1564
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1565	if (ReturnAddrIndex == 0) {
				1566	// Set up a frame object for the return address.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1567	if (Subtarget->is64Bit())
				1568	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
				1569	else
				1570	ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
Anton Korobeynikov	e844e47	2007-08-15 17:12:32 +0000	[diff] [blame]	1571
				1572	FuncInfo->setRAIndex(ReturnAddrIndex);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1573	}
				1574
				1575	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
				1576	}
				1577
				1578
				1579
				1580	/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
				1581	/// specific condition code. It returns a false if it cannot do a direct
				1582	/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
				1583	/// needed.
				1584	static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
				1585	unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
				1586	SelectionDAG &DAG) {
				1587	X86CC = X86::COND_INVALID;
				1588	if (!isFP) {
				1589	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
				1590	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
				1591	// X > -1 -> X == 0, jump !sign.
				1592	RHS = DAG.getConstant(0, RHS.getValueType());
				1593	X86CC = X86::COND_NS;
				1594	return true;
				1595	} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
				1596	// X < 0 -> X == 0, jump on sign.
				1597	X86CC = X86::COND_S;
				1598	return true;
Dan Gohman	37b3426	2007-09-17 14:49:27 +0000	[diff] [blame]	1599	} else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) {
				1600	// X < 1 -> X <= 0
				1601	RHS = DAG.getConstant(0, RHS.getValueType());
				1602	X86CC = X86::COND_LE;
				1603	return true;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1604	}
				1605	}
				1606
				1607	switch (SetCCOpcode) {
				1608	default: break;
				1609	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1610	case ISD::SETGT: X86CC = X86::COND_G; break;
				1611	case ISD::SETGE: X86CC = X86::COND_GE; break;
				1612	case ISD::SETLT: X86CC = X86::COND_L; break;
				1613	case ISD::SETLE: X86CC = X86::COND_LE; break;
				1614	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1615	case ISD::SETULT: X86CC = X86::COND_B; break;
				1616	case ISD::SETUGT: X86CC = X86::COND_A; break;
				1617	case ISD::SETULE: X86CC = X86::COND_BE; break;
				1618	case ISD::SETUGE: X86CC = X86::COND_AE; break;
				1619	}
				1620	} else {
				1621	// On a floating point condition, the flags are set as follows:
				1622	// ZF PF CF op
				1623	// 0 \| 0 \| 0 \| X > Y
				1624	// 0 \| 0 \| 1 \| X < Y
				1625	// 1 \| 0 \| 0 \| X == Y
				1626	// 1 \| 1 \| 1 \| unordered
				1627	bool Flip = false;
				1628	switch (SetCCOpcode) {
				1629	default: break;
				1630	case ISD::SETUEQ:
				1631	case ISD::SETEQ: X86CC = X86::COND_E; break;
				1632	case ISD::SETOLT: Flip = true; // Fallthrough
				1633	case ISD::SETOGT:
				1634	case ISD::SETGT: X86CC = X86::COND_A; break;
				1635	case ISD::SETOLE: Flip = true; // Fallthrough
				1636	case ISD::SETOGE:
				1637	case ISD::SETGE: X86CC = X86::COND_AE; break;
				1638	case ISD::SETUGT: Flip = true; // Fallthrough
				1639	case ISD::SETULT:
				1640	case ISD::SETLT: X86CC = X86::COND_B; break;
				1641	case ISD::SETUGE: Flip = true; // Fallthrough
				1642	case ISD::SETULE:
				1643	case ISD::SETLE: X86CC = X86::COND_BE; break;
				1644	case ISD::SETONE:
				1645	case ISD::SETNE: X86CC = X86::COND_NE; break;
				1646	case ISD::SETUO: X86CC = X86::COND_P; break;
				1647	case ISD::SETO: X86CC = X86::COND_NP; break;
				1648	}
				1649	if (Flip)
				1650	std::swap(LHS, RHS);
				1651	}
				1652
				1653	return X86CC != X86::COND_INVALID;
				1654	}
				1655
				1656	/// hasFPCMov - is there a floating point cmov for the specific X86 condition
				1657	/// code. Current x86 isa includes the following FP cmov instructions:
				1658	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
				1659	static bool hasFPCMov(unsigned X86CC) {
				1660	switch (X86CC) {
				1661	default:
				1662	return false;
				1663	case X86::COND_B:
				1664	case X86::COND_BE:
				1665	case X86::COND_E:
				1666	case X86::COND_P:
				1667	case X86::COND_A:
				1668	case X86::COND_AE:
				1669	case X86::COND_NE:
				1670	case X86::COND_NP:
				1671	return true;
				1672	}
				1673	}
				1674
				1675	/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
				1676	/// true if Op is undef or if its value falls within the specified range (L, H].
				1677	static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
				1678	if (Op.getOpcode() == ISD::UNDEF)
				1679	return true;
				1680
				1681	unsigned Val = cast<ConstantSDNode>(Op)->getValue();
				1682	return (Val >= Low && Val < Hi);
				1683	}
				1684
				1685	/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
				1686	/// true if Op is undef or if its value equal to the specified value.
				1687	static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
				1688	if (Op.getOpcode() == ISD::UNDEF)
				1689	return true;
				1690	return cast<ConstantSDNode>(Op)->getValue() == Val;
				1691	}
				1692
				1693	/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
				1694	/// specifies a shuffle of elements that is suitable for input to PSHUFD.
				1695	bool X86::isPSHUFDMask(SDNode *N) {
				1696	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1697
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1698	if (N->getNumOperands() != 2 && N->getNumOperands() != 4)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1699	return false;
				1700
				1701	// Check if the value doesn't reference the second vector.
				1702	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
				1703	SDOperand Arg = N->getOperand(i);
				1704	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1705	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	1706	if (cast<ConstantSDNode>(Arg)->getValue() >= e)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1707	return false;
				1708	}
				1709
				1710	return true;
				1711	}
				1712
				1713	/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
				1714	/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
				1715	bool X86::isPSHUFHWMask(SDNode *N) {
				1716	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1717
				1718	if (N->getNumOperands() != 8)
				1719	return false;
				1720
				1721	// Lower quadword copied in order.
				1722	for (unsigned i = 0; i != 4; ++i) {
				1723	SDOperand Arg = N->getOperand(i);
				1724	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1725	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1726	if (cast<ConstantSDNode>(Arg)->getValue() != i)
				1727	return false;
				1728	}
				1729
				1730	// Upper quadword shuffled.
				1731	for (unsigned i = 4; i != 8; ++i) {
				1732	SDOperand Arg = N->getOperand(i);
				1733	if (Arg.getOpcode() == ISD::UNDEF) continue;
				1734	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				1735	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				1736	if (Val < 4 \|\| Val > 7)
				1737	return false;
				1738	}
				1739
				1740	return true;
				1741	}
				1742
				1743	/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
				1744	/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
				1745	bool X86::isPSHUFLWMask(SDNode *N) {
				1746	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1747
				1748	if (N->getNumOperands() != 8)
				1749	return false;
				1750
				1751	// Upper quadword copied in order.
				1752	for (unsigned i = 4; i != 8; ++i)
				1753	if (!isUndefOrEqual(N->getOperand(i), i))
				1754	return false;
				1755
				1756	// Lower quadword shuffled.
				1757	for (unsigned i = 0; i != 4; ++i)
				1758	if (!isUndefOrInRange(N->getOperand(i), 0, 4))
				1759	return false;
				1760
				1761	return true;
				1762	}
				1763
				1764	/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
				1765	/// specifies a shuffle of elements that is suitable for input to SHUFP*.
				1766	static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
				1767	if (NumElems != 2 && NumElems != 4) return false;
				1768
				1769	unsigned Half = NumElems / 2;
				1770	for (unsigned i = 0; i < Half; ++i)
				1771	if (!isUndefOrInRange(Elems[i], 0, NumElems))
				1772	return false;
				1773	for (unsigned i = Half; i < NumElems; ++i)
				1774	if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
				1775	return false;
				1776
				1777	return true;
				1778	}
				1779
				1780	bool X86::isSHUFPMask(SDNode *N) {
				1781	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1782	return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
				1783	}
				1784
				1785	/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
				1786	/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
				1787	/// half elements to come from vector 1 (which would equal the dest.) and
				1788	/// the upper half to come from vector 2.
				1789	static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
				1790	if (NumOps != 2 && NumOps != 4) return false;
				1791
				1792	unsigned Half = NumOps / 2;
				1793	for (unsigned i = 0; i < Half; ++i)
				1794	if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
				1795	return false;
				1796	for (unsigned i = Half; i < NumOps; ++i)
				1797	if (!isUndefOrInRange(Ops[i], 0, NumOps))
				1798	return false;
				1799	return true;
				1800	}
				1801
				1802	static bool isCommutedSHUFP(SDNode *N) {
				1803	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1804	return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
				1805	}
				1806
				1807	/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
				1808	/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
				1809	bool X86::isMOVHLPSMask(SDNode *N) {
				1810	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1811
				1812	if (N->getNumOperands() != 4)
				1813	return false;
				1814
				1815	// Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
				1816	return isUndefOrEqual(N->getOperand(0), 6) &&
				1817	isUndefOrEqual(N->getOperand(1), 7) &&
				1818	isUndefOrEqual(N->getOperand(2), 2) &&
				1819	isUndefOrEqual(N->getOperand(3), 3);
				1820	}
				1821
				1822	/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
				1823	/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
				1824	/// <2, 3, 2, 3>
				1825	bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
				1826	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1827
				1828	if (N->getNumOperands() != 4)
				1829	return false;
				1830
				1831	// Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
				1832	return isUndefOrEqual(N->getOperand(0), 2) &&
				1833	isUndefOrEqual(N->getOperand(1), 3) &&
				1834	isUndefOrEqual(N->getOperand(2), 2) &&
				1835	isUndefOrEqual(N->getOperand(3), 3);
				1836	}
				1837
				1838	/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
				1839	/// specifies a shuffle of elements that is suitable for input to MOVLP{S\|D}.
				1840	bool X86::isMOVLPMask(SDNode *N) {
				1841	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1842
				1843	unsigned NumElems = N->getNumOperands();
				1844	if (NumElems != 2 && NumElems != 4)
				1845	return false;
				1846
				1847	for (unsigned i = 0; i < NumElems/2; ++i)
				1848	if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
				1849	return false;
				1850
				1851	for (unsigned i = NumElems/2; i < NumElems; ++i)
				1852	if (!isUndefOrEqual(N->getOperand(i), i))
				1853	return false;
				1854
				1855	return true;
				1856	}
				1857
				1858	/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
				1859	/// specifies a shuffle of elements that is suitable for input to MOVHP{S\|D}
				1860	/// and MOVLHPS.
				1861	bool X86::isMOVHPMask(SDNode *N) {
				1862	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1863
				1864	unsigned NumElems = N->getNumOperands();
				1865	if (NumElems != 2 && NumElems != 4)
				1866	return false;
				1867
				1868	for (unsigned i = 0; i < NumElems/2; ++i)
				1869	if (!isUndefOrEqual(N->getOperand(i), i))
				1870	return false;
				1871
				1872	for (unsigned i = 0; i < NumElems/2; ++i) {
				1873	SDOperand Arg = N->getOperand(i + NumElems/2);
				1874	if (!isUndefOrEqual(Arg, i + NumElems))
				1875	return false;
				1876	}
				1877
				1878	return true;
				1879	}
				1880
				1881	/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
				1882	/// specifies a shuffle of elements that is suitable for input to UNPCKL.
				1883	bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
				1884	bool V2IsSplat = false) {
				1885	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1886	return false;
				1887
				1888	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1889	SDOperand BitI = Elts[i];
				1890	SDOperand BitI1 = Elts[i+1];
				1891	if (!isUndefOrEqual(BitI, j))
				1892	return false;
				1893	if (V2IsSplat) {
				1894	if (isUndefOrEqual(BitI1, NumElts))
				1895	return false;
				1896	} else {
				1897	if (!isUndefOrEqual(BitI1, j + NumElts))
				1898	return false;
				1899	}
				1900	}
				1901
				1902	return true;
				1903	}
				1904
				1905	bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
				1906	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1907	return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1908	}
				1909
				1910	/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
				1911	/// specifies a shuffle of elements that is suitable for input to UNPCKH.
				1912	bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
				1913	bool V2IsSplat = false) {
				1914	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1915	return false;
				1916
				1917	for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
				1918	SDOperand BitI = Elts[i];
				1919	SDOperand BitI1 = Elts[i+1];
				1920	if (!isUndefOrEqual(BitI, j + NumElts/2))
				1921	return false;
				1922	if (V2IsSplat) {
				1923	if (isUndefOrEqual(BitI1, NumElts))
				1924	return false;
				1925	} else {
				1926	if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
				1927	return false;
				1928	}
				1929	}
				1930
				1931	return true;
				1932	}
				1933
				1934	bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
				1935	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1936	return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
				1937	}
				1938
				1939	/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
				1940	/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
				1941	/// <0, 0, 1, 1>
				1942	bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
				1943	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1944
				1945	unsigned NumElems = N->getNumOperands();
				1946	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1947	return false;
				1948
				1949	for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
				1950	SDOperand BitI = N->getOperand(i);
				1951	SDOperand BitI1 = N->getOperand(i+1);
				1952
				1953	if (!isUndefOrEqual(BitI, j))
				1954	return false;
				1955	if (!isUndefOrEqual(BitI1, j))
				1956	return false;
				1957	}
				1958
				1959	return true;
				1960	}
				1961
				1962	/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
				1963	/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
				1964	/// <2, 2, 3, 3>
				1965	bool X86::isUNPCKH_v_undef_Mask(SDNode *N) {
				1966	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				1967
				1968	unsigned NumElems = N->getNumOperands();
				1969	if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
				1970	return false;
				1971
				1972	for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
				1973	SDOperand BitI = N->getOperand(i);
				1974	SDOperand BitI1 = N->getOperand(i + 1);
				1975
				1976	if (!isUndefOrEqual(BitI, j))
				1977	return false;
				1978	if (!isUndefOrEqual(BitI1, j))
				1979	return false;
				1980	}
				1981
				1982	return true;
				1983	}
				1984
				1985	/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
				1986	/// specifies a shuffle of elements that is suitable for input to MOVSS,
				1987	/// MOVSD, and MOVD, i.e. setting the lowest element.
				1988	static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
				1989	if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
				1990	return false;
				1991
				1992	if (!isUndefOrEqual(Elts[0], NumElts))
				1993	return false;
				1994
				1995	for (unsigned i = 1; i < NumElts; ++i) {
				1996	if (!isUndefOrEqual(Elts[i], i))
				1997	return false;
				1998	}
				1999
				2000	return true;
				2001	}
				2002
				2003	bool X86::isMOVLMask(SDNode *N) {
				2004	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2005	return ::isMOVLMask(N->op_begin(), N->getNumOperands());
				2006	}
				2007
				2008	/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
				2009	/// of what x86 movss want. X86 movs requires the lowest element to be lowest
				2010	/// element of vector 2 and the other elements to come from vector 1 in order.
				2011	static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
				2012	bool V2IsSplat = false,
				2013	bool V2IsUndef = false) {
				2014	if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
				2015	return false;
				2016
				2017	if (!isUndefOrEqual(Ops[0], 0))
				2018	return false;
				2019
				2020	for (unsigned i = 1; i < NumOps; ++i) {
				2021	SDOperand Arg = Ops[i];
				2022	if (!(isUndefOrEqual(Arg, i+NumOps) \|\|
				2023	(V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) \|\|
				2024	(V2IsSplat && isUndefOrEqual(Arg, NumOps))))
				2025	return false;
				2026	}
				2027
				2028	return true;
				2029	}
				2030
				2031	static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
				2032	bool V2IsUndef = false) {
				2033	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2034	return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
				2035	V2IsSplat, V2IsUndef);
				2036	}
				2037
				2038	/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				2039	/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
				2040	bool X86::isMOVSHDUPMask(SDNode *N) {
				2041	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2042
				2043	if (N->getNumOperands() != 4)
				2044	return false;
				2045
				2046	// Expect 1, 1, 3, 3
				2047	for (unsigned i = 0; i < 2; ++i) {
				2048	SDOperand Arg = N->getOperand(i);
				2049	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2050	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2051	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2052	if (Val != 1) return false;
				2053	}
				2054
				2055	bool HasHi = false;
				2056	for (unsigned i = 2; i < 4; ++i) {
				2057	SDOperand Arg = N->getOperand(i);
				2058	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2059	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2060	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2061	if (Val != 3) return false;
				2062	HasHi = true;
				2063	}
				2064
				2065	// Don't use movshdup if it can be done with a shufps.
				2066	return HasHi;
				2067	}
				2068
				2069	/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
				2070	/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
				2071	bool X86::isMOVSLDUPMask(SDNode *N) {
				2072	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2073
				2074	if (N->getNumOperands() != 4)
				2075	return false;
				2076
				2077	// Expect 0, 0, 2, 2
				2078	for (unsigned i = 0; i < 2; ++i) {
				2079	SDOperand Arg = N->getOperand(i);
				2080	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2081	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2082	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2083	if (Val != 0) return false;
				2084	}
				2085
				2086	bool HasHi = false;
				2087	for (unsigned i = 2; i < 4; ++i) {
				2088	SDOperand Arg = N->getOperand(i);
				2089	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2090	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2091	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2092	if (Val != 2) return false;
				2093	HasHi = true;
				2094	}
				2095
				2096	// Don't use movshdup if it can be done with a shufps.
				2097	return HasHi;
				2098	}
				2099
				2100	/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand
				2101	/// specifies a identity operation on the LHS or RHS.
				2102	static bool isIdentityMask(SDNode *N, bool RHS = false) {
				2103	unsigned NumElems = N->getNumOperands();
				2104	for (unsigned i = 0; i < NumElems; ++i)
				2105	if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0)))
				2106	return false;
				2107	return true;
				2108	}
				2109
				2110	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2111	/// a splat of a single element.
				2112	static bool isSplatMask(SDNode *N) {
				2113	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2114
				2115	// This is a splat operation if each element of the permute is the same, and
				2116	// if the value doesn't reference the second vector.
				2117	unsigned NumElems = N->getNumOperands();
				2118	SDOperand ElementBase;
				2119	unsigned i = 0;
				2120	for (; i != NumElems; ++i) {
				2121	SDOperand Elt = N->getOperand(i);
				2122	if (isa<ConstantSDNode>(Elt)) {
				2123	ElementBase = Elt;
				2124	break;
				2125	}
				2126	}
				2127
				2128	if (!ElementBase.Val)
				2129	return false;
				2130
				2131	for (; i != NumElems; ++i) {
				2132	SDOperand Arg = N->getOperand(i);
				2133	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2134	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2135	if (Arg != ElementBase) return false;
				2136	}
				2137
				2138	// Make sure it is a splat of the first vector operand.
				2139	return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
				2140	}
				2141
				2142	/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
				2143	/// a splat of a single element and it's a 2 or 4 element mask.
				2144	bool X86::isSplatMask(SDNode *N) {
				2145	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2146
				2147	// We can only splat 64-bit, and 32-bit quantities with a single instruction.
				2148	if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
				2149	return false;
				2150	return ::isSplatMask(N);
				2151	}
				2152
				2153	/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
				2154	/// specifies a splat of zero element.
				2155	bool X86::isSplatLoMask(SDNode *N) {
				2156	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2157
				2158	for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
				2159	if (!isUndefOrEqual(N->getOperand(i), 0))
				2160	return false;
				2161	return true;
				2162	}
				2163
				2164	/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
				2165	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
				2166	/// instructions.
				2167	unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
				2168	unsigned NumOperands = N->getNumOperands();
				2169	unsigned Shift = (NumOperands == 4) ? 2 : 1;
				2170	unsigned Mask = 0;
				2171	for (unsigned i = 0; i < NumOperands; ++i) {
				2172	unsigned Val = 0;
				2173	SDOperand Arg = N->getOperand(NumOperands-i-1);
				2174	if (Arg.getOpcode() != ISD::UNDEF)
				2175	Val = cast<ConstantSDNode>(Arg)->getValue();
				2176	if (Val >= NumOperands) Val -= NumOperands;
				2177	Mask \|= Val;
				2178	if (i != NumOperands - 1)
				2179	Mask <<= Shift;
				2180	}
				2181
				2182	return Mask;
				2183	}
				2184
				2185	/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
				2186	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
				2187	/// instructions.
				2188	unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
				2189	unsigned Mask = 0;
				2190	// 8 nodes, but we only care about the last 4.
				2191	for (unsigned i = 7; i >= 4; --i) {
				2192	unsigned Val = 0;
				2193	SDOperand Arg = N->getOperand(i);
				2194	if (Arg.getOpcode() != ISD::UNDEF)
				2195	Val = cast<ConstantSDNode>(Arg)->getValue();
				2196	Mask \|= (Val - 4);
				2197	if (i != 4)
				2198	Mask <<= 2;
				2199	}
				2200
				2201	return Mask;
				2202	}
				2203
				2204	/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
				2205	/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
				2206	/// instructions.
				2207	unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
				2208	unsigned Mask = 0;
				2209	// 8 nodes, but we only care about the first 4.
				2210	for (int i = 3; i >= 0; --i) {
				2211	unsigned Val = 0;
				2212	SDOperand Arg = N->getOperand(i);
				2213	if (Arg.getOpcode() != ISD::UNDEF)
				2214	Val = cast<ConstantSDNode>(Arg)->getValue();
				2215	Mask \|= Val;
				2216	if (i != 0)
				2217	Mask <<= 2;
				2218	}
				2219
				2220	return Mask;
				2221	}
				2222
				2223	/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
				2224	/// specifies a 8 element shuffle that can be broken into a pair of
				2225	/// PSHUFHW and PSHUFLW.
				2226	static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
				2227	assert(N->getOpcode() == ISD::BUILD_VECTOR);
				2228
				2229	if (N->getNumOperands() != 8)
				2230	return false;
				2231
				2232	// Lower quadword shuffled.
				2233	for (unsigned i = 0; i != 4; ++i) {
				2234	SDOperand Arg = N->getOperand(i);
				2235	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2236	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2237	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2238	if (Val > 4)
				2239	return false;
				2240	}
				2241
				2242	// Upper quadword shuffled.
				2243	for (unsigned i = 4; i != 8; ++i) {
				2244	SDOperand Arg = N->getOperand(i);
				2245	if (Arg.getOpcode() == ISD::UNDEF) continue;
				2246	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2247	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2248	if (Val < 4 \|\| Val > 7)
				2249	return false;
				2250	}
				2251
				2252	return true;
				2253	}
				2254
				2255	/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
				2256	/// values in ther permute mask.
				2257	static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
				2258	SDOperand &V2, SDOperand &Mask,
				2259	SelectionDAG &DAG) {
				2260	MVT::ValueType VT = Op.getValueType();
				2261	MVT::ValueType MaskVT = Mask.getValueType();
				2262	MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
				2263	unsigned NumElems = Mask.getNumOperands();
				2264	SmallVector<SDOperand, 8> MaskVec;
				2265
				2266	for (unsigned i = 0; i != NumElems; ++i) {
				2267	SDOperand Arg = Mask.getOperand(i);
				2268	if (Arg.getOpcode() == ISD::UNDEF) {
				2269	MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
				2270	continue;
				2271	}
				2272	assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
				2273	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2274	if (Val < NumElems)
				2275	MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
				2276	else
				2277	MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
				2278	}
				2279
				2280	std::swap(V1, V2);
				2281	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2282	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2283	}
				2284
				2285	/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
				2286	/// match movhlps. The lower half elements should come from upper half of
				2287	/// V1 (and in order), and the upper half elements should come from the upper
				2288	/// half of V2 (and in order).
				2289	static bool ShouldXformToMOVHLPS(SDNode *Mask) {
				2290	unsigned NumElems = Mask->getNumOperands();
				2291	if (NumElems != 4)
				2292	return false;
				2293	for (unsigned i = 0, e = 2; i != e; ++i)
				2294	if (!isUndefOrEqual(Mask->getOperand(i), i+2))
				2295	return false;
				2296	for (unsigned i = 2; i != 4; ++i)
				2297	if (!isUndefOrEqual(Mask->getOperand(i), i+4))
				2298	return false;
				2299	return true;
				2300	}
				2301
				2302	/// isScalarLoadToVector - Returns true if the node is a scalar load that
				2303	/// is promoted to a vector.
				2304	static inline bool isScalarLoadToVector(SDNode *N) {
				2305	if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
				2306	N = N->getOperand(0).Val;
				2307	return ISD::isNON_EXTLoad(N);
				2308	}
				2309	return false;
				2310	}
				2311
				2312	/// ShouldXformToMOVLP{S\|D} - Return true if the node should be transformed to
				2313	/// match movlp{s\|d}. The lower half elements should come from lower half of
				2314	/// V1 (and in order), and the upper half elements should come from the upper
				2315	/// half of V2 (and in order). And since V1 will become the source of the
				2316	/// MOVLP, it must be either a vector load or a scalar load to vector.
				2317	static bool ShouldXformToMOVLP(SDNode V1, SDNode V2, SDNode *Mask) {
				2318	if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
				2319	return false;
				2320	// Is V2 is a vector load, don't do this transformation. We will try to use
				2321	// load folding shufps op.
				2322	if (ISD::isNON_EXTLoad(V2))
				2323	return false;
				2324
				2325	unsigned NumElems = Mask->getNumOperands();
				2326	if (NumElems != 2 && NumElems != 4)
				2327	return false;
				2328	for (unsigned i = 0, e = NumElems/2; i != e; ++i)
				2329	if (!isUndefOrEqual(Mask->getOperand(i), i))
				2330	return false;
				2331	for (unsigned i = NumElems/2; i != NumElems; ++i)
				2332	if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
				2333	return false;
				2334	return true;
				2335	}
				2336
				2337	/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
				2338	/// all the same.
				2339	static bool isSplatVector(SDNode *N) {
				2340	if (N->getOpcode() != ISD::BUILD_VECTOR)
				2341	return false;
				2342
				2343	SDOperand SplatValue = N->getOperand(0);
				2344	for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
				2345	if (N->getOperand(i) != SplatValue)
				2346	return false;
				2347	return true;
				2348	}
				2349
				2350	/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2351	/// to an undef.
				2352	static bool isUndefShuffle(SDNode *N) {
				2353	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2354	return false;
				2355
				2356	SDOperand V1 = N->getOperand(0);
				2357	SDOperand V2 = N->getOperand(1);
				2358	SDOperand Mask = N->getOperand(2);
				2359	unsigned NumElems = Mask.getNumOperands();
				2360	for (unsigned i = 0; i != NumElems; ++i) {
				2361	SDOperand Arg = Mask.getOperand(i);
				2362	if (Arg.getOpcode() != ISD::UNDEF) {
				2363	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2364	if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
				2365	return false;
				2366	else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
				2367	return false;
				2368	}
				2369	}
				2370	return true;
				2371	}
				2372
				2373	/// isZeroNode - Returns true if Elt is a constant zero or a floating point
				2374	/// constant +0.0.
				2375	static inline bool isZeroNode(SDOperand Elt) {
				2376	return ((isa<ConstantSDNode>(Elt) &&
				2377	cast<ConstantSDNode>(Elt)->getValue() == 0) \|\|
				2378	(isa<ConstantFPSDNode>(Elt) &&
Dale Johannesen	df8a831	2007-08-31 04:03:46 +0000	[diff] [blame]	2379	cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2380	}
				2381
				2382	/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
				2383	/// to an zero vector.
				2384	static bool isZeroShuffle(SDNode *N) {
				2385	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
				2386	return false;
				2387
				2388	SDOperand V1 = N->getOperand(0);
				2389	SDOperand V2 = N->getOperand(1);
				2390	SDOperand Mask = N->getOperand(2);
				2391	unsigned NumElems = Mask.getNumOperands();
				2392	for (unsigned i = 0; i != NumElems; ++i) {
				2393	SDOperand Arg = Mask.getOperand(i);
				2394	if (Arg.getOpcode() != ISD::UNDEF) {
				2395	unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
				2396	if (Idx < NumElems) {
				2397	unsigned Opc = V1.Val->getOpcode();
				2398	if (Opc == ISD::UNDEF)
				2399	continue;
				2400	if (Opc != ISD::BUILD_VECTOR \|\|
				2401	!isZeroNode(V1.Val->getOperand(Idx)))
				2402	return false;
				2403	} else if (Idx >= NumElems) {
				2404	unsigned Opc = V2.Val->getOpcode();
				2405	if (Opc == ISD::UNDEF)
				2406	continue;
				2407	if (Opc != ISD::BUILD_VECTOR \|\|
				2408	!isZeroNode(V2.Val->getOperand(Idx - NumElems)))
				2409	return false;
				2410	}
				2411	}
				2412	}
				2413	return true;
				2414	}
				2415
				2416	/// getZeroVector - Returns a vector of specified type with all zero elements.
				2417	///
				2418	static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
				2419	assert(MVT::isVector(VT) && "Expected a vector type");
				2420	unsigned NumElems = MVT::getVectorNumElements(VT);
				2421	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2422	bool isFP = MVT::isFloatingPoint(EVT);
				2423	SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
				2424	SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
				2425	return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
				2426	}
				2427
				2428	/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
				2429	/// that point to V2 points to its first element.
				2430	static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
				2431	assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
				2432
				2433	bool Changed = false;
				2434	SmallVector<SDOperand, 8> MaskVec;
				2435	unsigned NumElems = Mask.getNumOperands();
				2436	for (unsigned i = 0; i != NumElems; ++i) {
				2437	SDOperand Arg = Mask.getOperand(i);
				2438	if (Arg.getOpcode() != ISD::UNDEF) {
				2439	unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
				2440	if (Val > NumElems) {
				2441	Arg = DAG.getConstant(NumElems, Arg.getValueType());
				2442	Changed = true;
				2443	}
				2444	}
				2445	MaskVec.push_back(Arg);
				2446	}
				2447
				2448	if (Changed)
				2449	Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
				2450	&MaskVec[0], MaskVec.size());
				2451	return Mask;
				2452	}
				2453
				2454	/// getMOVLMask - Returns a vector_shuffle mask for an movs{s\|d}, movd
				2455	/// operation of specified width.
				2456	static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
				2457	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2458	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2459
				2460	SmallVector<SDOperand, 8> MaskVec;
				2461	MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
				2462	for (unsigned i = 1; i != NumElems; ++i)
				2463	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2464	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2465	}
				2466
				2467	/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
				2468	/// of specified width.
				2469	static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
				2470	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2471	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2472	SmallVector<SDOperand, 8> MaskVec;
				2473	for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
				2474	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2475	MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
				2476	}
				2477	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2478	}
				2479
				2480	/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
				2481	/// of specified width.
				2482	static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
				2483	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2484	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2485	unsigned Half = NumElems/2;
				2486	SmallVector<SDOperand, 8> MaskVec;
				2487	for (unsigned i = 0; i != Half; ++i) {
				2488	MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
				2489	MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
				2490	}
				2491	return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
				2492	}
				2493
				2494	/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
				2495	///
				2496	static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
				2497	SDOperand V1 = Op.getOperand(0);
				2498	SDOperand Mask = Op.getOperand(2);
				2499	MVT::ValueType VT = Op.getValueType();
				2500	unsigned NumElems = Mask.getNumOperands();
				2501	Mask = getUnpacklMask(NumElems, DAG);
				2502	while (NumElems != 4) {
				2503	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
				2504	NumElems >>= 1;
				2505	}
				2506	V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
				2507
				2508	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				2509	Mask = getZeroVector(MaskVT, DAG);
				2510	SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
				2511	DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
				2512	return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
				2513	}
				2514
				2515	/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
				2516	/// vector of zero or undef vector.
				2517	static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
				2518	unsigned NumElems, unsigned Idx,
				2519	bool isZero, SelectionDAG &DAG) {
				2520	SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
				2521	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2522	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2523	SDOperand Zero = DAG.getConstant(0, EVT);
				2524	SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
				2525	MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
				2526	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2527	&MaskVec[0], MaskVec.size());
				2528	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2529	}
				2530
				2531	/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
				2532	///
				2533	static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
				2534	unsigned NumNonZero, unsigned NumZero,
				2535	SelectionDAG &DAG, TargetLowering &TLI) {
				2536	if (NumNonZero > 8)
				2537	return SDOperand();
				2538
				2539	SDOperand V(0, 0);
				2540	bool First = true;
				2541	for (unsigned i = 0; i < 16; ++i) {
				2542	bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
				2543	if (ThisIsNonZero && First) {
				2544	if (NumZero)
				2545	V = getZeroVector(MVT::v8i16, DAG);
				2546	else
				2547	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2548	First = false;
				2549	}
				2550
				2551	if ((i & 1) != 0) {
				2552	SDOperand ThisElt(0, 0), LastElt(0, 0);
				2553	bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
				2554	if (LastIsNonZero) {
				2555	LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
				2556	}
				2557	if (ThisIsNonZero) {
				2558	ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
				2559	ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
				2560	ThisElt, DAG.getConstant(8, MVT::i8));
				2561	if (LastIsNonZero)
				2562	ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
				2563	} else
				2564	ThisElt = LastElt;
				2565
				2566	if (ThisElt.Val)
				2567	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
				2568	DAG.getConstant(i/2, TLI.getPointerTy()));
				2569	}
				2570	}
				2571
				2572	return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
				2573	}
				2574
				2575	/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
				2576	///
				2577	static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
				2578	unsigned NumNonZero, unsigned NumZero,
				2579	SelectionDAG &DAG, TargetLowering &TLI) {
				2580	if (NumNonZero > 4)
				2581	return SDOperand();
				2582
				2583	SDOperand V(0, 0);
				2584	bool First = true;
				2585	for (unsigned i = 0; i < 8; ++i) {
				2586	bool isNonZero = (NonZeros & (1 << i)) != 0;
				2587	if (isNonZero) {
				2588	if (First) {
				2589	if (NumZero)
				2590	V = getZeroVector(MVT::v8i16, DAG);
				2591	else
				2592	V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
				2593	First = false;
				2594	}
				2595	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
				2596	DAG.getConstant(i, TLI.getPointerTy()));
				2597	}
				2598	}
				2599
				2600	return V;
				2601	}
				2602
				2603	SDOperand
				2604	X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				2605	// All zero's are handled with pxor.
				2606	if (ISD::isBuildVectorAllZeros(Op.Val))
				2607	return Op;
				2608
				2609	// All one's are handled with pcmpeqd.
				2610	if (ISD::isBuildVectorAllOnes(Op.Val))
				2611	return Op;
				2612
				2613	MVT::ValueType VT = Op.getValueType();
				2614	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				2615	unsigned EVTBits = MVT::getSizeInBits(EVT);
				2616
				2617	unsigned NumElems = Op.getNumOperands();
				2618	unsigned NumZero = 0;
				2619	unsigned NumNonZero = 0;
				2620	unsigned NonZeros = 0;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2621	unsigned NumNonZeroImms = 0;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2622	std::set<SDOperand> Values;
				2623	for (unsigned i = 0; i < NumElems; ++i) {
				2624	SDOperand Elt = Op.getOperand(i);
				2625	if (Elt.getOpcode() != ISD::UNDEF) {
				2626	Values.insert(Elt);
				2627	if (isZeroNode(Elt))
				2628	NumZero++;
				2629	else {
				2630	NonZeros \|= (1 << i);
				2631	NumNonZero++;
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2632	if (Elt.getOpcode() == ISD::Constant \|\|
				2633	Elt.getOpcode() == ISD::ConstantFP)
				2634	NumNonZeroImms++;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2635	}
				2636	}
				2637	}
				2638
				2639	if (NumNonZero == 0) {
				2640	if (NumZero == 0)
				2641	// All undef vector. Return an UNDEF.
				2642	return DAG.getNode(ISD::UNDEF, VT);
				2643	else
				2644	// A mix of zero and undef. Return a zero vector.
				2645	return getZeroVector(VT, DAG);
				2646	}
				2647
				2648	// Splat is obviously ok. Let legalizer expand it to a shuffle.
				2649	if (Values.size() == 1)
				2650	return SDOperand();
				2651
				2652	// Special case for single non-zero element.
				2653	if (NumNonZero == 1) {
				2654	unsigned Idx = CountTrailingZeros_32(NonZeros);
				2655	SDOperand Item = Op.getOperand(Idx);
				2656	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
				2657	if (Idx == 0)
				2658	// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
				2659	return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
				2660	NumZero > 0, DAG);
				2661
				2662	if (EVTBits == 32) {
				2663	// Turn it into a shuffle of zero and zero-extended scalar to vector.
				2664	Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
				2665	DAG);
				2666	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2667	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2668	SmallVector<SDOperand, 8> MaskVec;
				2669	for (unsigned i = 0; i < NumElems; i++)
				2670	MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
				2671	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2672	&MaskVec[0], MaskVec.size());
				2673	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
				2674	DAG.getNode(ISD::UNDEF, VT), Mask);
				2675	}
				2676	}
				2677
Dan Gohman	2146324	2007-07-24 22:55:08 +0000	[diff] [blame]	2678	// A vector full of immediates; various special cases are already
				2679	// handled, so this is best done with a single constant-pool load.
				2680	if (NumNonZero == NumNonZeroImms)
				2681	return SDOperand();
				2682
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2683	// Let legalizer expand 2-wide build_vectors.
				2684	if (EVTBits == 64)
				2685	return SDOperand();
				2686
				2687	// If element VT is < 32 bits, convert it to inserts into a zero vector.
				2688	if (EVTBits == 8 && NumElems == 16) {
				2689	SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
				2690	*this);
				2691	if (V.Val) return V;
				2692	}
				2693
				2694	if (EVTBits == 16 && NumElems == 8) {
				2695	SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
				2696	*this);
				2697	if (V.Val) return V;
				2698	}
				2699
				2700	// If element VT is == 32 bits, turn it into a number of shuffles.
				2701	SmallVector<SDOperand, 8> V;
				2702	V.resize(NumElems);
				2703	if (NumElems == 4 && NumZero > 0) {
				2704	for (unsigned i = 0; i < 4; ++i) {
				2705	bool isZero = !(NonZeros & (1 << i));
				2706	if (isZero)
				2707	V[i] = getZeroVector(VT, DAG);
				2708	else
				2709	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2710	}
				2711
				2712	for (unsigned i = 0; i < 2; ++i) {
				2713	switch ((NonZeros & (0x3 << i2)) >> (i2)) {
				2714	default: break;
				2715	case 0:
				2716	V[i] = V[i*2]; // Must be a zero vector.
				2717	break;
				2718	case 1:
				2719	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2+1], V[i2],
				2720	getMOVLMask(NumElems, DAG));
				2721	break;
				2722	case 2:
				2723	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2724	getMOVLMask(NumElems, DAG));
				2725	break;
				2726	case 3:
				2727	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i2], V[i2+1],
				2728	getUnpacklMask(NumElems, DAG));
				2729	break;
				2730	}
				2731	}
				2732
				2733	// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
				2734	// clears the upper bits.
				2735	// FIXME: we can do the same for v4f32 case when we know both parts of
				2736	// the lower half come from scalar_to_vector (loadf32). We should do
				2737	// that in post legalizer dag combiner with target specific hooks.
				2738	if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
				2739	return V[0];
				2740	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2741	MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
				2742	SmallVector<SDOperand, 8> MaskVec;
				2743	bool Reverse = (NonZeros & 0x3) == 2;
				2744	for (unsigned i = 0; i < 2; ++i)
				2745	if (Reverse)
				2746	MaskVec.push_back(DAG.getConstant(1-i, EVT));
				2747	else
				2748	MaskVec.push_back(DAG.getConstant(i, EVT));
				2749	Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
				2750	for (unsigned i = 0; i < 2; ++i)
				2751	if (Reverse)
				2752	MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
				2753	else
				2754	MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
				2755	SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2756	&MaskVec[0], MaskVec.size());
				2757	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
				2758	}
				2759
				2760	if (Values.size() > 2) {
				2761	// Expand into a number of unpckl*.
				2762	// e.g. for v4f32
				2763	// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
				2764	// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
				2765	// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
				2766	SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
				2767	for (unsigned i = 0; i < NumElems; ++i)
				2768	V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
				2769	NumElems >>= 1;
				2770	while (NumElems != 0) {
				2771	for (unsigned i = 0; i < NumElems; ++i)
				2772	V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
				2773	UnpckMask);
				2774	NumElems >>= 1;
				2775	}
				2776	return V[0];
				2777	}
				2778
				2779	return SDOperand();
				2780	}
				2781
				2782	SDOperand
				2783	X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
				2784	SDOperand V1 = Op.getOperand(0);
				2785	SDOperand V2 = Op.getOperand(1);
				2786	SDOperand PermMask = Op.getOperand(2);
				2787	MVT::ValueType VT = Op.getValueType();
				2788	unsigned NumElems = PermMask.getNumOperands();
				2789	bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
				2790	bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
				2791	bool V1IsSplat = false;
				2792	bool V2IsSplat = false;
				2793
				2794	if (isUndefShuffle(Op.Val))
				2795	return DAG.getNode(ISD::UNDEF, VT);
				2796
				2797	if (isZeroShuffle(Op.Val))
				2798	return getZeroVector(VT, DAG);
				2799
				2800	if (isIdentityMask(PermMask.Val))
				2801	return V1;
				2802	else if (isIdentityMask(PermMask.Val, true))
				2803	return V2;
				2804
				2805	if (isSplatMask(PermMask.Val)) {
				2806	if (NumElems <= 4) return Op;
				2807	// Promote it to a v4i32 splat.
				2808	return PromoteSplat(Op, DAG);
				2809	}
				2810
				2811	if (X86::isMOVLMask(PermMask.Val))
				2812	return (V1IsUndef) ? V2 : Op;
				2813
				2814	if (X86::isMOVSHDUPMask(PermMask.Val) \|\|
				2815	X86::isMOVSLDUPMask(PermMask.Val) \|\|
				2816	X86::isMOVHLPSMask(PermMask.Val) \|\|
				2817	X86::isMOVHPMask(PermMask.Val) \|\|
				2818	X86::isMOVLPMask(PermMask.Val))
				2819	return Op;
				2820
				2821	if (ShouldXformToMOVHLPS(PermMask.Val) \|\|
				2822	ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
				2823	return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2824
				2825	bool Commuted = false;
				2826	V1IsSplat = isSplatVector(V1.Val);
				2827	V2IsSplat = isSplatVector(V2.Val);
				2828	if ((V1IsSplat \|\| V1IsUndef) && !(V2IsSplat \|\| V2IsUndef)) {
				2829	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2830	std::swap(V1IsSplat, V2IsSplat);
				2831	std::swap(V1IsUndef, V2IsUndef);
				2832	Commuted = true;
				2833	}
				2834
				2835	if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
				2836	if (V2IsUndef) return V1;
				2837	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2838	if (V2IsSplat) {
				2839	// V2 is a splat, so the mask may be malformed. That is, it may point
				2840	// to any V2 element. The instruction selectior won't like this. Get
				2841	// a corrected mask and commute to form a proper MOVS{S\|D}.
				2842	SDOperand NewMask = getMOVLMask(NumElems, DAG);
				2843	if (NewMask.Val != PermMask.Val)
				2844	Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2845	}
				2846	return Op;
				2847	}
				2848
				2849	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2850	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2851	X86::isUNPCKLMask(PermMask.Val) \|\|
				2852	X86::isUNPCKHMask(PermMask.Val))
				2853	return Op;
				2854
				2855	if (V2IsSplat) {
				2856	// Normalize mask so all entries that point to V2 points to its first
				2857	// element then try to match unpck{h\|l} again. If match, return a
				2858	// new vector_shuffle with the corrected mask.
				2859	SDOperand NewMask = NormalizeMask(PermMask, DAG);
				2860	if (NewMask.Val != PermMask.Val) {
				2861	if (X86::isUNPCKLMask(PermMask.Val, true)) {
				2862	SDOperand NewMask = getUnpacklMask(NumElems, DAG);
				2863	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2864	} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
				2865	SDOperand NewMask = getUnpackhMask(NumElems, DAG);
				2866	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
				2867	}
				2868	}
				2869	}
				2870
				2871	// Normalize the node to match x86 shuffle ops if needed
				2872	if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
				2873	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2874
				2875	if (Commuted) {
				2876	// Commute is back and try unpck* again.
				2877	Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
				2878	if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) \|\|
				2879	X86::isUNPCKH_v_undef_Mask(PermMask.Val) \|\|
				2880	X86::isUNPCKLMask(PermMask.Val) \|\|
				2881	X86::isUNPCKHMask(PermMask.Val))
				2882	return Op;
				2883	}
				2884
				2885	// If VT is integer, try PSHUF* first, then SHUFP*.
				2886	if (MVT::isInteger(VT)) {
Dan Gohman	7dc1901	2007-08-02 21:17:01 +0000	[diff] [blame]	2887	// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
				2888	// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
				2889	if (((MVT::getSizeInBits(VT) != 64 \|\| NumElems == 4) &&
				2890	X86::isPSHUFDMask(PermMask.Val)) \|\|
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	2891	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2892	X86::isPSHUFLWMask(PermMask.Val)) {
				2893	if (V2.getOpcode() != ISD::UNDEF)
				2894	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2895	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2896	return Op;
				2897	}
				2898
				2899	if (X86::isSHUFPMask(PermMask.Val) &&
				2900	MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
				2901	return Op;
				2902
				2903	// Handle v8i16 shuffle high / low shuffle node pair.
				2904	if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
				2905	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
				2906	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				2907	SmallVector<SDOperand, 8> MaskVec;
				2908	for (unsigned i = 0; i != 4; ++i)
				2909	MaskVec.push_back(PermMask.getOperand(i));
				2910	for (unsigned i = 4; i != 8; ++i)
				2911	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2912	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2913	&MaskVec[0], MaskVec.size());
				2914	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2915	MaskVec.clear();
				2916	for (unsigned i = 0; i != 4; ++i)
				2917	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				2918	for (unsigned i = 4; i != 8; ++i)
				2919	MaskVec.push_back(PermMask.getOperand(i));
				2920	Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
				2921	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
				2922	}
				2923	} else {
				2924	// Floating point cases in the other order.
				2925	if (X86::isSHUFPMask(PermMask.Val))
				2926	return Op;
				2927	if (X86::isPSHUFDMask(PermMask.Val) \|\|
				2928	X86::isPSHUFHWMask(PermMask.Val) \|\|
				2929	X86::isPSHUFLWMask(PermMask.Val)) {
				2930	if (V2.getOpcode() != ISD::UNDEF)
				2931	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
				2932	DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
				2933	return Op;
				2934	}
				2935	}
				2936
				2937	if (NumElems == 4 &&
				2938	// Don't do this for MMX.
				2939	MVT::getSizeInBits(VT) != 64) {
				2940	MVT::ValueType MaskVT = PermMask.getValueType();
				2941	MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
				2942	SmallVector<std::pair<int, int>, 8> Locs;
				2943	Locs.reserve(NumElems);
				2944	SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2945	SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2946	unsigned NumHi = 0;
				2947	unsigned NumLo = 0;
				2948	// If no more than two elements come from either vector. This can be
				2949	// implemented with two shuffles. First shuffle gather the elements.
				2950	// The second shuffle, which takes the first shuffle as both of its
				2951	// vector operands, put the elements into the right order.
				2952	for (unsigned i = 0; i != NumElems; ++i) {
				2953	SDOperand Elt = PermMask.getOperand(i);
				2954	if (Elt.getOpcode() == ISD::UNDEF) {
				2955	Locs[i] = std::make_pair(-1, -1);
				2956	} else {
				2957	unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
				2958	if (Val < NumElems) {
				2959	Locs[i] = std::make_pair(0, NumLo);
				2960	Mask1[NumLo] = Elt;
				2961	NumLo++;
				2962	} else {
				2963	Locs[i] = std::make_pair(1, NumHi);
				2964	if (2+NumHi < NumElems)
				2965	Mask1[2+NumHi] = Elt;
				2966	NumHi++;
				2967	}
				2968	}
				2969	}
				2970	if (NumLo <= 2 && NumHi <= 2) {
				2971	V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				2972	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2973	&Mask1[0], Mask1.size()));
				2974	for (unsigned i = 0; i != NumElems; ++i) {
				2975	if (Locs[i].first == -1)
				2976	continue;
				2977	else {
				2978	unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
				2979	Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
				2980	Mask2[i] = DAG.getConstant(Idx, MaskEVT);
				2981	}
				2982	}
				2983
				2984	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
				2985	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				2986	&Mask2[0], Mask2.size()));
				2987	}
				2988
				2989	// Break it into (shuffle shuffle_hi, shuffle_lo).
				2990	Locs.clear();
				2991	SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2992	SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
				2993	SmallVector<SDOperand,8> *MaskPtr = &LoMask;
				2994	unsigned MaskIdx = 0;
				2995	unsigned LoIdx = 0;
				2996	unsigned HiIdx = NumElems/2;
				2997	for (unsigned i = 0; i != NumElems; ++i) {
				2998	if (i == NumElems/2) {
				2999	MaskPtr = &HiMask;
				3000	MaskIdx = 1;
				3001	LoIdx = 0;
				3002	HiIdx = NumElems/2;
				3003	}
				3004	SDOperand Elt = PermMask.getOperand(i);
				3005	if (Elt.getOpcode() == ISD::UNDEF) {
				3006	Locs[i] = std::make_pair(-1, -1);
				3007	} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
				3008	Locs[i] = std::make_pair(MaskIdx, LoIdx);
				3009	(*MaskPtr)[LoIdx] = Elt;
				3010	LoIdx++;
				3011	} else {
				3012	Locs[i] = std::make_pair(MaskIdx, HiIdx);
				3013	(*MaskPtr)[HiIdx] = Elt;
				3014	HiIdx++;
				3015	}
				3016	}
				3017
				3018	SDOperand LoShuffle =
				3019	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				3020	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3021	&LoMask[0], LoMask.size()));
				3022	SDOperand HiShuffle =
				3023	DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
				3024	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3025	&HiMask[0], HiMask.size()));
				3026	SmallVector<SDOperand, 8> MaskOps;
				3027	for (unsigned i = 0; i != NumElems; ++i) {
				3028	if (Locs[i].first == -1) {
				3029	MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
				3030	} else {
				3031	unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
				3032	MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
				3033	}
				3034	}
				3035	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
				3036	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3037	&MaskOps[0], MaskOps.size()));
				3038	}
				3039
				3040	return SDOperand();
				3041	}
				3042
				3043	SDOperand
				3044	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				3045	if (!isa<ConstantSDNode>(Op.getOperand(1)))
				3046	return SDOperand();
				3047
				3048	MVT::ValueType VT = Op.getValueType();
				3049	// TODO: handle v16i8.
				3050	if (MVT::getSizeInBits(VT) == 16) {
				3051	// Transform it so it match pextrw which produces a 32-bit result.
				3052	MVT::ValueType EVT = (MVT::ValueType)(VT+1);
				3053	SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
				3054	Op.getOperand(0), Op.getOperand(1));
				3055	SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
				3056	DAG.getValueType(VT));
				3057	return DAG.getNode(ISD::TRUNCATE, VT, Assert);
				3058	} else if (MVT::getSizeInBits(VT) == 32) {
				3059	SDOperand Vec = Op.getOperand(0);
				3060	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3061	if (Idx == 0)
				3062	return Op;
				3063	// SHUFPS the element to the lowest double word, then movss.
				3064	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3065	SmallVector<SDOperand, 8> IdxVec;
				3066	IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
				3067	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3068	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3069	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3070	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3071	&IdxVec[0], IdxVec.size());
				3072	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				3073	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				3074	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				3075	DAG.getConstant(0, getPointerTy()));
				3076	} else if (MVT::getSizeInBits(VT) == 64) {
				3077	SDOperand Vec = Op.getOperand(0);
				3078	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3079	if (Idx == 0)
				3080	return Op;
				3081
				3082	// UNPCKHPD the element to the lowest double word, then movsd.
				3083	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
				3084	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
				3085	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3086	SmallVector<SDOperand, 8> IdxVec;
				3087	IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
				3088	IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
				3089	SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3090	&IdxVec[0], IdxVec.size());
				3091	Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
				3092	Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
				3093	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
				3094	DAG.getConstant(0, getPointerTy()));
				3095	}
				3096
				3097	return SDOperand();
				3098	}
				3099
				3100	SDOperand
				3101	X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
				3102	// Transform it so it match pinsrw which expects a 16-bit value in a GR32
				3103	// as its second argument.
				3104	MVT::ValueType VT = Op.getValueType();
				3105	MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
				3106	SDOperand N0 = Op.getOperand(0);
				3107	SDOperand N1 = Op.getOperand(1);
				3108	SDOperand N2 = Op.getOperand(2);
				3109	if (MVT::getSizeInBits(BaseVT) == 16) {
				3110	if (N1.getValueType() != MVT::i32)
				3111	N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
				3112	if (N2.getValueType() != MVT::i32)
				3113	N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
				3114	return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
				3115	} else if (MVT::getSizeInBits(BaseVT) == 32) {
				3116	unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
				3117	if (Idx == 0) {
				3118	// Use a movss.
				3119	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
				3120	MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
				3121	MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
				3122	SmallVector<SDOperand, 8> MaskVec;
				3123	MaskVec.push_back(DAG.getConstant(4, BaseVT));
				3124	for (unsigned i = 1; i <= 3; ++i)
				3125	MaskVec.push_back(DAG.getConstant(i, BaseVT));
				3126	return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
				3127	DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
				3128	&MaskVec[0], MaskVec.size()));
				3129	} else {
				3130	// Use two pinsrw instructions to insert a 32 bit value.
				3131	Idx <<= 1;
				3132	if (MVT::isFloatingPoint(N1.getValueType())) {
Evan Cheng	1eea675	2007-07-31 06:21:44 +0000	[diff] [blame]	3133	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
				3134	N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
				3135	N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
				3136	DAG.getConstant(0, getPointerTy()));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3137	}
				3138	N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
				3139	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3140	DAG.getConstant(Idx, getPointerTy()));
				3141	N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
				3142	N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
				3143	DAG.getConstant(Idx+1, getPointerTy()));
				3144	return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
				3145	}
				3146	}
				3147
				3148	return SDOperand();
				3149	}
				3150
				3151	SDOperand
				3152	X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
				3153	SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
				3154	return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
				3155	}
				3156
				3157	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
				3158	// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
				3159	// one of the above mentioned nodes. It has to be wrapped because otherwise
				3160	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
				3161	// be used to form addressing mode. These wrapped nodes will be selected
				3162	// into MOV32ri.
				3163	SDOperand
				3164	X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
				3165	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
				3166	SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
				3167	getPointerTy(),
				3168	CP->getAlignment());
				3169	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3170	// With PIC, the address is actually $g + Offset.
				3171	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3172	!Subtarget->isPICStyleRIPRel()) {
				3173	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3174	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3175	Result);
				3176	}
				3177
				3178	return Result;
				3179	}
				3180
				3181	SDOperand
				3182	X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
				3183	GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
				3184	SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
				3185	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3186	// With PIC, the address is actually $g + Offset.
				3187	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3188	!Subtarget->isPICStyleRIPRel()) {
				3189	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3190	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3191	Result);
				3192	}
				3193
				3194	// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
				3195	// load the value at address GV, not the value of GV itself. This means that
				3196	// the GlobalAddress must be in the base or index register of the address, not
				3197	// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
				3198	// The same applies for external symbols during PIC codegen
				3199	if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
				3200	Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
				3201
				3202	return Result;
				3203	}
				3204
				3205	// Lower ISD::GlobalTLSAddress using the "general dynamic" model
				3206	static SDOperand
				3207	LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3208	const MVT::ValueType PtrVT) {
				3209	SDOperand InFlag;
				3210	SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
				3211	DAG.getNode(X86ISD::GlobalBaseReg,
				3212	PtrVT), InFlag);
				3213	InFlag = Chain.getValue(1);
				3214
				3215	// emit leal symbol@TLSGD(,%ebx,1), %eax
				3216	SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
				3217	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3218	GA->getValueType(0),
				3219	GA->getOffset());
				3220	SDOperand Ops[] = { Chain, TGA, InFlag };
				3221	SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
				3222	InFlag = Result.getValue(2);
				3223	Chain = Result.getValue(1);
				3224
				3225	// call ___tls_get_addr. This function receives its argument in
				3226	// the register EAX.
				3227	Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
				3228	InFlag = Chain.getValue(1);
				3229
				3230	NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3231	SDOperand Ops1[] = { Chain,
				3232	DAG.getTargetExternalSymbol("___tls_get_addr",
				3233	PtrVT),
				3234	DAG.getRegister(X86::EAX, PtrVT),
				3235	DAG.getRegister(X86::EBX, PtrVT),
				3236	InFlag };
				3237	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
				3238	InFlag = Chain.getValue(1);
				3239
				3240	return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
				3241	}
				3242
				3243	// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
				3244	// "local exec" model.
				3245	static SDOperand
				3246	LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
				3247	const MVT::ValueType PtrVT) {
				3248	// Get the Thread Pointer
				3249	SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
				3250	// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
				3251	// exec)
				3252	SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
				3253	GA->getValueType(0),
				3254	GA->getOffset());
				3255	SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
				3256
				3257	if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
				3258	Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
				3259
				3260	// The address of the thread local variable is the add of the thread
				3261	// pointer with the offset of the variable.
				3262	return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
				3263	}
				3264
				3265	SDOperand
				3266	X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
				3267	// TODO: implement the "local dynamic" model
				3268	// TODO: implement the "initial exec"model for pic executables
				3269	assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
				3270	"TLS not implemented for non-ELF and 64-bit targets");
				3271	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
				3272	// If the relocation model is PIC, use the "General Dynamic" TLS Model,
				3273	// otherwise use the "Local Exec"TLS Model
				3274	if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
				3275	return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
				3276	else
				3277	return LowerToTLSExecModel(GA, DAG, getPointerTy());
				3278	}
				3279
				3280	SDOperand
				3281	X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
				3282	const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
				3283	SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
				3284	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3285	// With PIC, the address is actually $g + Offset.
				3286	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3287	!Subtarget->isPICStyleRIPRel()) {
				3288	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3289	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3290	Result);
				3291	}
				3292
				3293	return Result;
				3294	}
				3295
				3296	SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
				3297	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
				3298	SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
				3299	Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
				3300	// With PIC, the address is actually $g + Offset.
				3301	if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
				3302	!Subtarget->isPICStyleRIPRel()) {
				3303	Result = DAG.getNode(ISD::ADD, getPointerTy(),
				3304	DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
				3305	Result);
				3306	}
				3307
				3308	return Result;
				3309	}
				3310
				3311	SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
				3312	assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
				3313	"Not an i64 shift!");
				3314	bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
				3315	SDOperand ShOpLo = Op.getOperand(0);
				3316	SDOperand ShOpHi = Op.getOperand(1);
				3317	SDOperand ShAmt = Op.getOperand(2);
				3318	SDOperand Tmp1 = isSRA ?
				3319	DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
				3320	DAG.getConstant(0, MVT::i32);
				3321
				3322	SDOperand Tmp2, Tmp3;
				3323	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3324	Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
				3325	Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
				3326	} else {
				3327	Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
				3328	Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
				3329	}
				3330
				3331	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3332	SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
				3333	DAG.getConstant(32, MVT::i8));
				3334	SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
				3335	SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
				3336
				3337	SDOperand Hi, Lo;
				3338	SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3339
				3340	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
				3341	SmallVector<SDOperand, 4> Ops;
				3342	if (Op.getOpcode() == ISD::SHL_PARTS) {
				3343	Ops.push_back(Tmp2);
				3344	Ops.push_back(Tmp3);
				3345	Ops.push_back(CC);
				3346	Ops.push_back(InFlag);
				3347	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3348	InFlag = Hi.getValue(1);
				3349
				3350	Ops.clear();
				3351	Ops.push_back(Tmp3);
				3352	Ops.push_back(Tmp1);
				3353	Ops.push_back(CC);
				3354	Ops.push_back(InFlag);
				3355	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3356	} else {
				3357	Ops.push_back(Tmp2);
				3358	Ops.push_back(Tmp3);
				3359	Ops.push_back(CC);
				3360	Ops.push_back(InFlag);
				3361	Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3362	InFlag = Lo.getValue(1);
				3363
				3364	Ops.clear();
				3365	Ops.push_back(Tmp3);
				3366	Ops.push_back(Tmp1);
				3367	Ops.push_back(CC);
				3368	Ops.push_back(InFlag);
				3369	Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3370	}
				3371
				3372	VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
				3373	Ops.clear();
				3374	Ops.push_back(Lo);
				3375	Ops.push_back(Hi);
				3376	return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
				3377	}
				3378
				3379	SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
				3380	assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
				3381	Op.getOperand(0).getValueType() >= MVT::i16 &&
				3382	"Unknown SINT_TO_FP to lower!");
				3383
				3384	SDOperand Result;
				3385	MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
				3386	unsigned Size = MVT::getSizeInBits(SrcVT)/8;
				3387	MachineFunction &MF = DAG.getMachineFunction();
				3388	int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
				3389	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3390	SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
				3391	StackSlot, NULL, 0);
				3392
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3393	// These are really Legal; caller falls through into that case.
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	3394	if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
				3395	return Result;
				3396	if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3397	return Result;
Dale Johannesen	958b08b	2007-09-19 23:55:34 +0000	[diff] [blame]	3398	if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
				3399	Subtarget->is64Bit())
				3400	return Result;
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3401
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3402	// Build the FILD
				3403	SDVTList Tys;
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	3404	bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) \|\|
				3405	(X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3406	if (useSSE)
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3407	Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
				3408	else
				3409	Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
				3410	SmallVector<SDOperand, 8> Ops;
				3411	Ops.push_back(Chain);
				3412	Ops.push_back(StackSlot);
				3413	Ops.push_back(DAG.getValueType(SrcVT));
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3414	Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3415	Tys, &Ops[0], Ops.size());
				3416
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3417	if (useSSE) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3418	Chain = Result.getValue(1);
				3419	SDOperand InFlag = Result.getValue(2);
				3420
				3421	// FIXME: Currently the FST is flagged to the FILD_FLAG. This
				3422	// shouldn't be necessary except that RFP cannot be live across
				3423	// multiple blocks. When stackifier is fixed, they can be uncoupled.
				3424	MachineFunction &MF = DAG.getMachineFunction();
				3425	int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
				3426	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3427	Tys = DAG.getVTList(MVT::Other);
				3428	SmallVector<SDOperand, 8> Ops;
				3429	Ops.push_back(Chain);
				3430	Ops.push_back(Result);
				3431	Ops.push_back(StackSlot);
				3432	Ops.push_back(DAG.getValueType(Op.getValueType()));
				3433	Ops.push_back(InFlag);
				3434	Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
				3435	Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
				3436	}
				3437
				3438	return Result;
				3439	}
				3440
				3441	SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
				3442	assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
				3443	"Unknown FP_TO_SINT to lower!");
				3444	// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
				3445	// stack slot.
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3446	SDOperand Result;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3447	MachineFunction &MF = DAG.getMachineFunction();
				3448	unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
				3449	int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3450	SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3451
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3452	// These are really Legal.
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	3453	if (Op.getValueType() == MVT::i32 &&
				3454	X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
				3455	return Result;
				3456	if (Op.getValueType() == MVT::i32 &&
				3457	X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3458	return Result;
Dale Johannesen	958b08b	2007-09-19 23:55:34 +0000	[diff] [blame]	3459	if (Subtarget->is64Bit() &&
				3460	Op.getValueType() == MVT::i64 &&
				3461	Op.getOperand(0).getValueType() != MVT::f80)
				3462	return Result;
Dale Johannesen	2fc2078	2007-09-14 22:26:36 +0000	[diff] [blame]	3463
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3464	unsigned Opc;
				3465	switch (Op.getValueType()) {
				3466	default: assert(0 && "Invalid FP_TO_SINT to lower!");
				3467	case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
				3468	case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
				3469	case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
				3470	}
				3471
				3472	SDOperand Chain = DAG.getEntryNode();
				3473	SDOperand Value = Op.getOperand(0);
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	3474	if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) \|\|
				3475	(X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3476	assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
				3477	Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
				3478	SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
				3479	SDOperand Ops[] = {
				3480	Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
				3481	};
				3482	Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
				3483	Chain = Value.getValue(1);
				3484	SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
				3485	StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
				3486	}
				3487
				3488	// Build the FP_TO_INT*_IN_MEM
				3489	SDOperand Ops[] = { Chain, Value, StackSlot };
				3490	SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
				3491
				3492	// Load the result.
				3493	return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
				3494	}
				3495
				3496	SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
				3497	MVT::ValueType VT = Op.getValueType();
				3498	MVT::ValueType EltVT = VT;
				3499	if (MVT::isVector(VT))
				3500	EltVT = MVT::getVectorElementType(VT);
				3501	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3502	std::vector<Constant*> CV;
				3503	if (EltVT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3504	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3505	CV.push_back(C);
				3506	CV.push_back(C);
				3507	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3508	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3509	CV.push_back(C);
				3510	CV.push_back(C);
				3511	CV.push_back(C);
				3512	CV.push_back(C);
				3513	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3514	Constant *C = ConstantVector::get(CV);
				3515	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3516	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3517	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3518	return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
				3519	}
				3520
				3521	SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
				3522	MVT::ValueType VT = Op.getValueType();
				3523	MVT::ValueType EltVT = VT;
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3524	unsigned EltNum = 1;
				3525	if (MVT::isVector(VT)) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3526	EltVT = MVT::getVectorElementType(VT);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3527	EltNum = MVT::getVectorNumElements(VT);
				3528	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3529	const Type *OpNTy = MVT::getTypeForValueType(EltVT);
				3530	std::vector<Constant*> CV;
				3531	if (EltVT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3532	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63)));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3533	CV.push_back(C);
				3534	CV.push_back(C);
				3535	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3536	Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31)));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3537	CV.push_back(C);
				3538	CV.push_back(C);
				3539	CV.push_back(C);
				3540	CV.push_back(C);
				3541	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3542	Constant *C = ConstantVector::get(CV);
				3543	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3544	SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3545	false, 16);
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3546	if (MVT::isVector(VT)) {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3547	return DAG.getNode(ISD::BIT_CONVERT, VT,
				3548	DAG.getNode(ISD::XOR, MVT::v2i64,
				3549	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
				3550	DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
				3551	} else {
Evan Cheng	92b8f78	2007-07-19 23:36:01 +0000	[diff] [blame]	3552	return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
				3553	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3554	}
				3555
				3556	SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
				3557	SDOperand Op0 = Op.getOperand(0);
				3558	SDOperand Op1 = Op.getOperand(1);
				3559	MVT::ValueType VT = Op.getValueType();
				3560	MVT::ValueType SrcVT = Op1.getValueType();
				3561	const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
				3562
				3563	// If second operand is smaller, extend it first.
				3564	if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
				3565	Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
				3566	SrcVT = VT;
Dale Johannesen	b9de9f0	2007-09-06 18:13:44 +0000	[diff] [blame]	3567	SrcTy = MVT::getTypeForValueType(SrcVT);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3568	}
				3569
				3570	// First get the sign bit of second operand.
				3571	std::vector<Constant*> CV;
				3572	if (SrcVT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3573	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63))));
				3574	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3575	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3576	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31))));
				3577	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3578	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3579	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3580	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3581	Constant *C = ConstantVector::get(CV);
				3582	SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3583	SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3584	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3585	SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
				3586
				3587	// Shift sign bit right or left if the two operands have different types.
				3588	if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
				3589	// Op0 is MVT::f32, Op1 is MVT::f64.
				3590	SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
				3591	SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
				3592	DAG.getConstant(32, MVT::i32));
				3593	SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
				3594	SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
				3595	DAG.getConstant(0, getPointerTy()));
				3596	}
				3597
				3598	// Clear first operand sign bit.
				3599	CV.clear();
				3600	if (VT == MVT::f64) {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3601	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63)))));
				3602	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3603	} else {
Dale Johannesen	1616e90	2007-09-11 18:32:33 +0000	[diff] [blame]	3604	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31)))));
				3605	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3606	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
				3607	CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3608	}
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	3609	C = ConstantVector::get(CV);
				3610	CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
				3611	SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
				3612	false, 16);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3613	SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
				3614
				3615	// Or the value with the sign bit.
				3616	return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
				3617	}
				3618
				3619	SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
				3620	SDOperand Chain) {
				3621	assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
				3622	SDOperand Cond;
				3623	SDOperand Op0 = Op.getOperand(0);
				3624	SDOperand Op1 = Op.getOperand(1);
				3625	SDOperand CC = Op.getOperand(2);
				3626	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
				3627	const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3628	const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				3629	bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
				3630	unsigned X86CC;
				3631
				3632	if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
				3633	Op0, Op1, DAG)) {
				3634	SDOperand Ops1[] = { Chain, Op0, Op1 };
				3635	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
				3636	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				3637	return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3638	}
				3639
				3640	assert(isFP && "Illegal integer SetCC!");
				3641
				3642	SDOperand COps[] = { Chain, Op0, Op1 };
				3643	Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
				3644
				3645	switch (SetCCOpcode) {
				3646	default: assert(false && "Illegal floating point SetCC!");
				3647	case ISD::SETOEQ: { // !PF & ZF
				3648	SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
				3649	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3650	SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
				3651	Tmp1.getValue(1) };
				3652	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3653	return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
				3654	}
				3655	case ISD::SETUNE: { // PF \| !ZF
				3656	SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
				3657	SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
				3658	SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
				3659	Tmp1.getValue(1) };
				3660	SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
				3661	return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
				3662	}
				3663	}
				3664	}
				3665
				3666	SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
				3667	bool addTest = true;
				3668	SDOperand Chain = DAG.getEntryNode();
				3669	SDOperand Cond = Op.getOperand(0);
				3670	SDOperand CC;
				3671	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3672
				3673	if (Cond.getOpcode() == ISD::SETCC)
				3674	Cond = LowerSETCC(Cond, DAG, Chain);
				3675
				3676	if (Cond.getOpcode() == X86ISD::SETCC) {
				3677	CC = Cond.getOperand(0);
				3678
				3679	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3680	// (since flag operand cannot be shared). Use it as the condition setting
				3681	// operand in place of the X86ISD::SETCC.
				3682	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3683	// to use a test instead of duplicating the X86ISD::CMP (for register
				3684	// pressure reason)?
				3685	SDOperand Cmp = Cond.getOperand(1);
				3686	unsigned Opc = Cmp.getOpcode();
Dale Johannesen	e0e0fd0	2007-09-23 14:52:20 +0000	[diff] [blame]	3687	bool IllegalFPCMov =
				3688	! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) \|\|
				3689	(X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) &&
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3690	!hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
				3691	if ((Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) &&
				3692	!IllegalFPCMov) {
				3693	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3694	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3695	addTest = false;
				3696	}
				3697	}
				3698
				3699	if (addTest) {
				3700	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3701	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3702	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3703	}
				3704
				3705	VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
				3706	SmallVector<SDOperand, 4> Ops;
				3707	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
				3708	// condition is true.
				3709	Ops.push_back(Op.getOperand(2));
				3710	Ops.push_back(Op.getOperand(1));
				3711	Ops.push_back(CC);
				3712	Ops.push_back(Cond.getValue(1));
				3713	return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
				3714	}
				3715
				3716	SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
				3717	bool addTest = true;
				3718	SDOperand Chain = Op.getOperand(0);
				3719	SDOperand Cond = Op.getOperand(1);
				3720	SDOperand Dest = Op.getOperand(2);
				3721	SDOperand CC;
				3722	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				3723
				3724	if (Cond.getOpcode() == ISD::SETCC)
				3725	Cond = LowerSETCC(Cond, DAG, Chain);
				3726
				3727	if (Cond.getOpcode() == X86ISD::SETCC) {
				3728	CC = Cond.getOperand(0);
				3729
				3730	// If condition flag is set by a X86ISD::CMP, then make a copy of it
				3731	// (since flag operand cannot be shared). Use it as the condition setting
				3732	// operand in place of the X86ISD::SETCC.
				3733	// If the X86ISD::SETCC has more than one use, then perhaps it's better
				3734	// to use a test instead of duplicating the X86ISD::CMP (for register
				3735	// pressure reason)?
				3736	SDOperand Cmp = Cond.getOperand(1);
				3737	unsigned Opc = Cmp.getOpcode();
				3738	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI) {
				3739	SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
				3740	Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
				3741	addTest = false;
				3742	}
				3743	}
				3744
				3745	if (addTest) {
				3746	CC = DAG.getConstant(X86::COND_NE, MVT::i8);
				3747	SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
				3748	Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
				3749	}
				3750	return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
				3751	Cond, Op.getOperand(2), CC, Cond.getValue(1));
				3752	}
				3753
				3754	SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
				3755	unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3756
				3757	if (Subtarget->is64Bit())
				3758	return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
				3759	else
				3760	switch (CallingConv) {
				3761	default:
				3762	assert(0 && "Unsupported calling convention");
				3763	case CallingConv::Fast:
				3764	// TODO: Implement fastcc
				3765	// Falls through
				3766	case CallingConv::C:
				3767	case CallingConv::X86_StdCall:
				3768	return LowerCCCCallTo(Op, DAG, CallingConv);
				3769	case CallingConv::X86_FastCall:
				3770	return LowerFastCCCallTo(Op, DAG, CallingConv);
				3771	}
				3772	}
				3773
				3774
				3775	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
				3776	// Calls to _alloca is needed to probe the stack when allocating more than 4k
				3777	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
				3778	// that the guard pages used by the OS virtual memory manager are allocated in
				3779	// correct sequence.
				3780	SDOperand
				3781	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
				3782	SelectionDAG &DAG) {
				3783	assert(Subtarget->isTargetCygMing() &&
				3784	"This should be used only on Cygwin/Mingw targets");
				3785
				3786	// Get the inputs.
				3787	SDOperand Chain = Op.getOperand(0);
				3788	SDOperand Size = Op.getOperand(1);
				3789	// FIXME: Ensure alignment here
				3790
				3791	SDOperand Flag;
				3792
				3793	MVT::ValueType IntPtr = getPointerTy();
				3794	MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
				3795
				3796	Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
				3797	Flag = Chain.getValue(1);
				3798
				3799	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
				3800	SDOperand Ops[] = { Chain,
				3801	DAG.getTargetExternalSymbol("_alloca", IntPtr),
				3802	DAG.getRegister(X86::EAX, IntPtr),
				3803	Flag };
				3804	Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
				3805	Flag = Chain.getValue(1);
				3806
				3807	Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
				3808
				3809	std::vector<MVT::ValueType> Tys;
				3810	Tys.push_back(SPTy);
				3811	Tys.push_back(MVT::Other);
				3812	SDOperand Ops1[2] = { Chain.getValue(0), Chain };
				3813	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
				3814	}
				3815
				3816	SDOperand
				3817	X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
				3818	MachineFunction &MF = DAG.getMachineFunction();
				3819	const Function* Fn = MF.getFunction();
				3820	if (Fn->hasExternalLinkage() &&
				3821	Subtarget->isTargetCygMing() &&
				3822	Fn->getName() == "main")
				3823	MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
				3824
				3825	unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
				3826	if (Subtarget->is64Bit())
				3827	return LowerX86_64CCCArguments(Op, DAG);
				3828	else
				3829	switch(CC) {
				3830	default:
				3831	assert(0 && "Unsupported calling convention");
				3832	case CallingConv::Fast:
				3833	// TODO: implement fastcc.
				3834
				3835	// Falls through
				3836	case CallingConv::C:
				3837	return LowerCCCArguments(Op, DAG);
				3838	case CallingConv::X86_StdCall:
				3839	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
				3840	return LowerCCCArguments(Op, DAG, true);
				3841	case CallingConv::X86_FastCall:
				3842	MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
				3843	return LowerFastCCArguments(Op, DAG);
				3844	}
				3845	}
				3846
				3847	SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
				3848	SDOperand InFlag(0, 0);
				3849	SDOperand Chain = Op.getOperand(0);
				3850	unsigned Align =
				3851	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				3852	if (Align == 0) Align = 1;
				3853
				3854	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	3855	// If not DWORD aligned or size is more than the threshold, call memset.
Rafael Espindola	b2e7a6b	2007-08-27 17:48:26 +0000	[diff] [blame]	3856	// The libc version is likely to be faster for these cases. It can use the
				3857	// address value and run time information about the CPU.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3858	if ((Align & 3) != 0 \|\|
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	3859	(I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	3860	MVT::ValueType IntPtr = getPointerTy();
				3861	const Type *IntPtrTy = getTargetData()->getIntPtrType();
				3862	TargetLowering::ArgListTy Args;
				3863	TargetLowering::ArgListEntry Entry;
				3864	Entry.Node = Op.getOperand(1);
				3865	Entry.Ty = IntPtrTy;
				3866	Args.push_back(Entry);
				3867	// Extend the unsigned i8 argument to be an int value for the call.
				3868	Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
				3869	Entry.Ty = IntPtrTy;
				3870	Args.push_back(Entry);
				3871	Entry.Node = Op.getOperand(3);
				3872	Args.push_back(Entry);
				3873	std::pair<SDOperand,SDOperand> CallResult =
				3874	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				3875	DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
				3876	return CallResult.second;
				3877	}
				3878
				3879	MVT::ValueType AVT;
				3880	SDOperand Count;
				3881	ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
				3882	unsigned BytesLeft = 0;
				3883	bool TwoRepStos = false;
				3884	if (ValC) {
				3885	unsigned ValReg;
				3886	uint64_t Val = ValC->getValue() & 255;
				3887
				3888	// If the value is a constant, then we can potentially use larger sets.
				3889	switch (Align & 3) {
				3890	case 2: // WORD aligned
				3891	AVT = MVT::i16;
				3892	ValReg = X86::AX;
				3893	Val = (Val << 8) \| Val;
				3894	break;
				3895	case 0: // DWORD aligned
				3896	AVT = MVT::i32;
				3897	ValReg = X86::EAX;
				3898	Val = (Val << 8) \| Val;
				3899	Val = (Val << 16) \| Val;
				3900	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
				3901	AVT = MVT::i64;
				3902	ValReg = X86::RAX;
				3903	Val = (Val << 32) \| Val;
				3904	}
				3905	break;
				3906	default: // Byte aligned
				3907	AVT = MVT::i8;
				3908	ValReg = X86::AL;
				3909	Count = Op.getOperand(3);
				3910	break;
				3911	}
				3912
				3913	if (AVT > MVT::i8) {
				3914	if (I) {
				3915	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				3916	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				3917	BytesLeft = I->getValue() % UBytes;
				3918	} else {
				3919	assert(AVT >= MVT::i32 &&
				3920	"Do not use rep;stos if not at least DWORD aligned");
				3921	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				3922	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				3923	TwoRepStos = true;
				3924	}
				3925	}
				3926
				3927	Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
				3928	InFlag);
				3929	InFlag = Chain.getValue(1);
				3930	} else {
				3931	AVT = MVT::i8;
				3932	Count = Op.getOperand(3);
				3933	Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
				3934	InFlag = Chain.getValue(1);
				3935	}
				3936
				3937	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				3938	Count, InFlag);
				3939	InFlag = Chain.getValue(1);
				3940	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				3941	Op.getOperand(1), InFlag);
				3942	InFlag = Chain.getValue(1);
				3943
				3944	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3945	SmallVector<SDOperand, 8> Ops;
				3946	Ops.push_back(Chain);
				3947	Ops.push_back(DAG.getValueType(AVT));
				3948	Ops.push_back(InFlag);
				3949	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3950
				3951	if (TwoRepStos) {
				3952	InFlag = Chain.getValue(1);
				3953	Count = Op.getOperand(3);
				3954	MVT::ValueType CVT = Count.getValueType();
				3955	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				3956	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				3957	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				3958	Left, InFlag);
				3959	InFlag = Chain.getValue(1);
				3960	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				3961	Ops.clear();
				3962	Ops.push_back(Chain);
				3963	Ops.push_back(DAG.getValueType(MVT::i8));
				3964	Ops.push_back(InFlag);
				3965	Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
				3966	} else if (BytesLeft) {
				3967	// Issue stores for the last 1 - 7 bytes.
				3968	SDOperand Value;
				3969	unsigned Val = ValC->getValue() & 255;
				3970	unsigned Offset = I->getValue() - BytesLeft;
				3971	SDOperand DstAddr = Op.getOperand(1);
				3972	MVT::ValueType AddrVT = DstAddr.getValueType();
				3973	if (BytesLeft >= 4) {
				3974	Val = (Val << 8) \| Val;
				3975	Val = (Val << 16) \| Val;
				3976	Value = DAG.getConstant(Val, MVT::i32);
				3977	Chain = DAG.getStore(Chain, Value,
				3978	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3979	DAG.getConstant(Offset, AddrVT)),
				3980	NULL, 0);
				3981	BytesLeft -= 4;
				3982	Offset += 4;
				3983	}
				3984	if (BytesLeft >= 2) {
				3985	Value = DAG.getConstant((Val << 8) \| Val, MVT::i16);
				3986	Chain = DAG.getStore(Chain, Value,
				3987	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3988	DAG.getConstant(Offset, AddrVT)),
				3989	NULL, 0);
				3990	BytesLeft -= 2;
				3991	Offset += 2;
				3992	}
				3993	if (BytesLeft == 1) {
				3994	Value = DAG.getConstant(Val, MVT::i8);
				3995	Chain = DAG.getStore(Chain, Value,
				3996	DAG.getNode(ISD::ADD, AddrVT, DstAddr,
				3997	DAG.getConstant(Offset, AddrVT)),
				3998	NULL, 0);
				3999	}
				4000	}
				4001
				4002	return Chain;
				4003	}
				4004
				4005	SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
				4006	SDOperand Chain = Op.getOperand(0);
				4007	unsigned Align =
				4008	(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
				4009	if (Align == 0) Align = 1;
				4010
				4011	ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	4012	// If not DWORD aligned or size is more than the threshold, call memcpy.
Rafael Espindola	b2e7a6b	2007-08-27 17:48:26 +0000	[diff] [blame]	4013	// The libc version is likely to be faster for these cases. It can use the
				4014	// address value and run time information about the CPU.
				4015	// With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4016	if ((Align & 3) != 0 \|\|
Rafael Espindola	5d3e762	2007-08-27 10:18:20 +0000	[diff] [blame]	4017	(I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4018	MVT::ValueType IntPtr = getPointerTy();
				4019	TargetLowering::ArgListTy Args;
				4020	TargetLowering::ArgListEntry Entry;
				4021	Entry.Ty = getTargetData()->getIntPtrType();
				4022	Entry.Node = Op.getOperand(1); Args.push_back(Entry);
				4023	Entry.Node = Op.getOperand(2); Args.push_back(Entry);
				4024	Entry.Node = Op.getOperand(3); Args.push_back(Entry);
				4025	std::pair<SDOperand,SDOperand> CallResult =
				4026	LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
				4027	DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
				4028	return CallResult.second;
				4029	}
				4030
				4031	MVT::ValueType AVT;
				4032	SDOperand Count;
				4033	unsigned BytesLeft = 0;
				4034	bool TwoRepMovs = false;
				4035	switch (Align & 3) {
				4036	case 2: // WORD aligned
				4037	AVT = MVT::i16;
				4038	break;
				4039	case 0: // DWORD aligned
				4040	AVT = MVT::i32;
				4041	if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
				4042	AVT = MVT::i64;
				4043	break;
				4044	default: // Byte aligned
				4045	AVT = MVT::i8;
				4046	Count = Op.getOperand(3);
				4047	break;
				4048	}
				4049
				4050	if (AVT > MVT::i8) {
				4051	if (I) {
				4052	unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
				4053	Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
				4054	BytesLeft = I->getValue() % UBytes;
				4055	} else {
				4056	assert(AVT >= MVT::i32 &&
				4057	"Do not use rep;movs if not at least DWORD aligned");
				4058	Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
				4059	Op.getOperand(3), DAG.getConstant(2, MVT::i8));
				4060	TwoRepMovs = true;
				4061	}
				4062	}
				4063
				4064	SDOperand InFlag(0, 0);
				4065	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
				4066	Count, InFlag);
				4067	InFlag = Chain.getValue(1);
				4068	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
				4069	Op.getOperand(1), InFlag);
				4070	InFlag = Chain.getValue(1);
				4071	Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
				4072	Op.getOperand(2), InFlag);
				4073	InFlag = Chain.getValue(1);
				4074
				4075	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4076	SmallVector<SDOperand, 8> Ops;
				4077	Ops.push_back(Chain);
				4078	Ops.push_back(DAG.getValueType(AVT));
				4079	Ops.push_back(InFlag);
				4080	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				4081
				4082	if (TwoRepMovs) {
				4083	InFlag = Chain.getValue(1);
				4084	Count = Op.getOperand(3);
				4085	MVT::ValueType CVT = Count.getValueType();
				4086	SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
				4087	DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
				4088	Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
				4089	Left, InFlag);
				4090	InFlag = Chain.getValue(1);
				4091	Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4092	Ops.clear();
				4093	Ops.push_back(Chain);
				4094	Ops.push_back(DAG.getValueType(MVT::i8));
				4095	Ops.push_back(InFlag);
				4096	Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
				4097	} else if (BytesLeft) {
				4098	// Issue loads and stores for the last 1 - 7 bytes.
				4099	unsigned Offset = I->getValue() - BytesLeft;
				4100	SDOperand DstAddr = Op.getOperand(1);
				4101	MVT::ValueType DstVT = DstAddr.getValueType();
				4102	SDOperand SrcAddr = Op.getOperand(2);
				4103	MVT::ValueType SrcVT = SrcAddr.getValueType();
				4104	SDOperand Value;
				4105	if (BytesLeft >= 4) {
				4106	Value = DAG.getLoad(MVT::i32, Chain,
				4107	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4108	DAG.getConstant(Offset, SrcVT)),
				4109	NULL, 0);
				4110	Chain = Value.getValue(1);
				4111	Chain = DAG.getStore(Chain, Value,
				4112	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4113	DAG.getConstant(Offset, DstVT)),
				4114	NULL, 0);
				4115	BytesLeft -= 4;
				4116	Offset += 4;
				4117	}
				4118	if (BytesLeft >= 2) {
				4119	Value = DAG.getLoad(MVT::i16, Chain,
				4120	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4121	DAG.getConstant(Offset, SrcVT)),
				4122	NULL, 0);
				4123	Chain = Value.getValue(1);
				4124	Chain = DAG.getStore(Chain, Value,
				4125	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4126	DAG.getConstant(Offset, DstVT)),
				4127	NULL, 0);
				4128	BytesLeft -= 2;
				4129	Offset += 2;
				4130	}
				4131
				4132	if (BytesLeft == 1) {
				4133	Value = DAG.getLoad(MVT::i8, Chain,
				4134	DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
				4135	DAG.getConstant(Offset, SrcVT)),
				4136	NULL, 0);
				4137	Chain = Value.getValue(1);
				4138	Chain = DAG.getStore(Chain, Value,
				4139	DAG.getNode(ISD::ADD, DstVT, DstAddr,
				4140	DAG.getConstant(Offset, DstVT)),
				4141	NULL, 0);
				4142	}
				4143	}
				4144
				4145	return Chain;
				4146	}
				4147
				4148	SDOperand
				4149	X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
				4150	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
				4151	SDOperand TheOp = Op.getOperand(0);
				4152	SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
				4153	if (Subtarget->is64Bit()) {
				4154	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
				4155	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
				4156	MVT::i64, Copy1.getValue(2));
				4157	SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
				4158	DAG.getConstant(32, MVT::i8));
				4159	SDOperand Ops[] = {
				4160	DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
				4161	};
				4162
				4163	Tys = DAG.getVTList(MVT::i64, MVT::Other);
				4164	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
				4165	}
				4166
				4167	SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
				4168	SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
				4169	MVT::i32, Copy1.getValue(2));
				4170	SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
				4171	Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
				4172	return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
				4173	}
				4174
				4175	SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
				4176	SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
				4177
				4178	if (!Subtarget->is64Bit()) {
				4179	// vastart just stores the address of the VarArgsFrameIndex slot into the
				4180	// memory location argument.
				4181	SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4182	return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
				4183	SV->getOffset());
				4184	}
				4185
				4186	// __va_list_tag:
				4187	// gp_offset (0 - 6 * 8)
				4188	// fp_offset (48 - 48 + 8 * 16)
				4189	// overflow_arg_area (point to parameters coming in memory).
				4190	// reg_save_area
				4191	SmallVector<SDOperand, 8> MemOps;
				4192	SDOperand FIN = Op.getOperand(1);
				4193	// Store gp_offset
				4194	SDOperand Store = DAG.getStore(Op.getOperand(0),
				4195	DAG.getConstant(VarArgsGPOffset, MVT::i32),
				4196	FIN, SV->getValue(), SV->getOffset());
				4197	MemOps.push_back(Store);
				4198
				4199	// Store fp_offset
				4200	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4201	DAG.getConstant(4, getPointerTy()));
				4202	Store = DAG.getStore(Op.getOperand(0),
				4203	DAG.getConstant(VarArgsFPOffset, MVT::i32),
				4204	FIN, SV->getValue(), SV->getOffset());
				4205	MemOps.push_back(Store);
				4206
				4207	// Store ptr to overflow_arg_area
				4208	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4209	DAG.getConstant(4, getPointerTy()));
				4210	SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
				4211	Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
				4212	SV->getOffset());
				4213	MemOps.push_back(Store);
				4214
				4215	// Store ptr to reg_save_area.
				4216	FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
				4217	DAG.getConstant(8, getPointerTy()));
				4218	SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
				4219	Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
				4220	SV->getOffset());
				4221	MemOps.push_back(Store);
				4222	return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
				4223	}
				4224
				4225	SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
				4226	// X86-64 va_list is a struct { i32, i32, i8, i8 }.
				4227	SDOperand Chain = Op.getOperand(0);
				4228	SDOperand DstPtr = Op.getOperand(1);
				4229	SDOperand SrcPtr = Op.getOperand(2);
				4230	SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
				4231	SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4232
				4233	SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
				4234	SrcSV->getValue(), SrcSV->getOffset());
				4235	Chain = SrcPtr.getValue(1);
				4236	for (unsigned i = 0; i < 3; ++i) {
				4237	SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
				4238	SrcSV->getValue(), SrcSV->getOffset());
				4239	Chain = Val.getValue(1);
				4240	Chain = DAG.getStore(Chain, Val, DstPtr,
				4241	DstSV->getValue(), DstSV->getOffset());
				4242	if (i == 2)
				4243	break;
				4244	SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
				4245	DAG.getConstant(8, getPointerTy()));
				4246	DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
				4247	DAG.getConstant(8, getPointerTy()));
				4248	}
				4249	return Chain;
				4250	}
				4251
				4252	SDOperand
				4253	X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
				4254	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
				4255	switch (IntNo) {
				4256	default: return SDOperand(); // Don't custom lower most intrinsics.
				4257	// Comparison intrinsics.
				4258	case Intrinsic::x86_sse_comieq_ss:
				4259	case Intrinsic::x86_sse_comilt_ss:
				4260	case Intrinsic::x86_sse_comile_ss:
				4261	case Intrinsic::x86_sse_comigt_ss:
				4262	case Intrinsic::x86_sse_comige_ss:
				4263	case Intrinsic::x86_sse_comineq_ss:
				4264	case Intrinsic::x86_sse_ucomieq_ss:
				4265	case Intrinsic::x86_sse_ucomilt_ss:
				4266	case Intrinsic::x86_sse_ucomile_ss:
				4267	case Intrinsic::x86_sse_ucomigt_ss:
				4268	case Intrinsic::x86_sse_ucomige_ss:
				4269	case Intrinsic::x86_sse_ucomineq_ss:
				4270	case Intrinsic::x86_sse2_comieq_sd:
				4271	case Intrinsic::x86_sse2_comilt_sd:
				4272	case Intrinsic::x86_sse2_comile_sd:
				4273	case Intrinsic::x86_sse2_comigt_sd:
				4274	case Intrinsic::x86_sse2_comige_sd:
				4275	case Intrinsic::x86_sse2_comineq_sd:
				4276	case Intrinsic::x86_sse2_ucomieq_sd:
				4277	case Intrinsic::x86_sse2_ucomilt_sd:
				4278	case Intrinsic::x86_sse2_ucomile_sd:
				4279	case Intrinsic::x86_sse2_ucomigt_sd:
				4280	case Intrinsic::x86_sse2_ucomige_sd:
				4281	case Intrinsic::x86_sse2_ucomineq_sd: {
				4282	unsigned Opc = 0;
				4283	ISD::CondCode CC = ISD::SETCC_INVALID;
				4284	switch (IntNo) {
				4285	default: break;
				4286	case Intrinsic::x86_sse_comieq_ss:
				4287	case Intrinsic::x86_sse2_comieq_sd:
				4288	Opc = X86ISD::COMI;
				4289	CC = ISD::SETEQ;
				4290	break;
				4291	case Intrinsic::x86_sse_comilt_ss:
				4292	case Intrinsic::x86_sse2_comilt_sd:
				4293	Opc = X86ISD::COMI;
				4294	CC = ISD::SETLT;
				4295	break;
				4296	case Intrinsic::x86_sse_comile_ss:
				4297	case Intrinsic::x86_sse2_comile_sd:
				4298	Opc = X86ISD::COMI;
				4299	CC = ISD::SETLE;
				4300	break;
				4301	case Intrinsic::x86_sse_comigt_ss:
				4302	case Intrinsic::x86_sse2_comigt_sd:
				4303	Opc = X86ISD::COMI;
				4304	CC = ISD::SETGT;
				4305	break;
				4306	case Intrinsic::x86_sse_comige_ss:
				4307	case Intrinsic::x86_sse2_comige_sd:
				4308	Opc = X86ISD::COMI;
				4309	CC = ISD::SETGE;
				4310	break;
				4311	case Intrinsic::x86_sse_comineq_ss:
				4312	case Intrinsic::x86_sse2_comineq_sd:
				4313	Opc = X86ISD::COMI;
				4314	CC = ISD::SETNE;
				4315	break;
				4316	case Intrinsic::x86_sse_ucomieq_ss:
				4317	case Intrinsic::x86_sse2_ucomieq_sd:
				4318	Opc = X86ISD::UCOMI;
				4319	CC = ISD::SETEQ;
				4320	break;
				4321	case Intrinsic::x86_sse_ucomilt_ss:
				4322	case Intrinsic::x86_sse2_ucomilt_sd:
				4323	Opc = X86ISD::UCOMI;
				4324	CC = ISD::SETLT;
				4325	break;
				4326	case Intrinsic::x86_sse_ucomile_ss:
				4327	case Intrinsic::x86_sse2_ucomile_sd:
				4328	Opc = X86ISD::UCOMI;
				4329	CC = ISD::SETLE;
				4330	break;
				4331	case Intrinsic::x86_sse_ucomigt_ss:
				4332	case Intrinsic::x86_sse2_ucomigt_sd:
				4333	Opc = X86ISD::UCOMI;
				4334	CC = ISD::SETGT;
				4335	break;
				4336	case Intrinsic::x86_sse_ucomige_ss:
				4337	case Intrinsic::x86_sse2_ucomige_sd:
				4338	Opc = X86ISD::UCOMI;
				4339	CC = ISD::SETGE;
				4340	break;
				4341	case Intrinsic::x86_sse_ucomineq_ss:
				4342	case Intrinsic::x86_sse2_ucomineq_sd:
				4343	Opc = X86ISD::UCOMI;
				4344	CC = ISD::SETNE;
				4345	break;
				4346	}
				4347
				4348	unsigned X86CC;
				4349	SDOperand LHS = Op.getOperand(1);
				4350	SDOperand RHS = Op.getOperand(2);
				4351	translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
				4352
				4353	const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
				4354	SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
				4355	SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
				4356	VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
				4357	SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
				4358	SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
				4359	return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
				4360	}
				4361	}
				4362	}
				4363
				4364	SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
				4365	// Depths > 0 not supported yet!
				4366	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4367	return SDOperand();
				4368
				4369	// Just load the return address
				4370	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4371	return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
				4372	}
				4373
				4374	SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
				4375	// Depths > 0 not supported yet!
				4376	if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
				4377	return SDOperand();
				4378
				4379	SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
				4380	return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
				4381	DAG.getConstant(4, getPointerTy()));
				4382	}
				4383
				4384	SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
				4385	SelectionDAG &DAG) {
				4386	// Is not yet supported on x86-64
				4387	if (Subtarget->is64Bit())
				4388	return SDOperand();
				4389
				4390	return DAG.getConstant(8, getPointerTy());
				4391	}
				4392
				4393	SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
				4394	{
				4395	assert(!Subtarget->is64Bit() &&
				4396	"Lowering of eh_return builtin is not supported yet on x86-64");
				4397
				4398	MachineFunction &MF = DAG.getMachineFunction();
				4399	SDOperand Chain = Op.getOperand(0);
				4400	SDOperand Offset = Op.getOperand(1);
				4401	SDOperand Handler = Op.getOperand(2);
				4402
				4403	SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
				4404	getPointerTy());
				4405
				4406	SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
				4407	DAG.getConstant(-4UL, getPointerTy()));
				4408	StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
				4409	Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
				4410	Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
				4411	MF.addLiveOut(X86::ECX);
				4412
				4413	return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
				4414	Chain, DAG.getRegister(X86::ECX, getPointerTy()));
				4415	}
				4416
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4417	SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
				4418	SelectionDAG &DAG) {
				4419	SDOperand Root = Op.getOperand(0);
				4420	SDOperand Trmp = Op.getOperand(1); // trampoline
				4421	SDOperand FPtr = Op.getOperand(2); // nested function
				4422	SDOperand Nest = Op.getOperand(3); // 'nest' parameter value
				4423
				4424	SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
				4425
				4426	if (Subtarget->is64Bit()) {
				4427	return SDOperand(); // not yet supported
				4428	} else {
				4429	Function Func = (Function )
				4430	cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
				4431	unsigned CC = Func->getCallingConv();
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4432	unsigned NestReg;
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4433
				4434	switch (CC) {
				4435	default:
				4436	assert(0 && "Unsupported calling convention");
				4437	case CallingConv::C:
				4438	case CallingConv::Fast:
				4439	case CallingConv::X86_StdCall: {
				4440	// Pass 'nest' parameter in ECX.
				4441	// Must be kept in sync with X86CallingConv.td
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4442	NestReg = X86::ECX;
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4443
				4444	// Check that ECX wasn't needed by an 'inreg' parameter.
				4445	const FunctionType *FTy = Func->getFunctionType();
				4446	const ParamAttrsList *Attrs = FTy->getParamAttrs();
				4447
				4448	if (Attrs && !Func->isVarArg()) {
				4449	unsigned InRegCount = 0;
				4450	unsigned Idx = 1;
				4451
				4452	for (FunctionType::param_iterator I = FTy->param_begin(),
				4453	E = FTy->param_end(); I != E; ++I, ++Idx)
				4454	if (Attrs->paramHasAttr(Idx, ParamAttr::InReg))
				4455	// FIXME: should only count parameters that are lowered to integers.
				4456	InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32;
				4457
				4458	if (InRegCount > 2) {
				4459	cerr << "Nest register in use - reduce number of inreg parameters!\n";
				4460	abort();
				4461	}
				4462	}
				4463	break;
				4464	}
				4465	case CallingConv::X86_FastCall:
				4466	// Pass 'nest' parameter in EAX.
				4467	// Must be kept in sync with X86CallingConv.td
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4468	NestReg = X86::EAX;
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4469	break;
				4470	}
				4471
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4472	const X86InstrInfo *TII =
				4473	((X86TargetMachine&)getTargetMachine()).getInstrInfo();
				4474
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4475	SDOperand OutChains[4];
				4476	SDOperand Addr, Disp;
				4477
				4478	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
				4479	Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
				4480
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4481	unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
				4482	unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg);
				4483	OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri\|N86Reg, MVT::i8),
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4484	Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
				4485
				4486	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
				4487	OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
				4488	TrmpSV->getOffset() + 1, false, 1);
				4489
Duncan Sands	466eadd	2007-08-29 19:01:20 +0000	[diff] [blame]	4490	unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4491	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
				4492	OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
				4493	TrmpSV->getValue() + 5, TrmpSV->getOffset());
				4494
				4495	Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
				4496	OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
				4497	TrmpSV->getOffset() + 6, false, 1);
				4498
Duncan Sands	7407a9f	2007-09-11 14:10:23 +0000	[diff] [blame]	4499	SDOperand Ops[] =
				4500	{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
				4501	return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4502	}
				4503	}
				4504
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4505	/// LowerOperation - Provide custom lowering hooks for some operations.
				4506	///
				4507	SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
				4508	switch (Op.getOpcode()) {
				4509	default: assert(0 && "Should not custom lower this!");
				4510	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
				4511	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
				4512	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
				4513	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
				4514	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
				4515	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
				4516	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
				4517	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
				4518	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
				4519	case ISD::SHL_PARTS:
				4520	case ISD::SRA_PARTS:
				4521	case ISD::SRL_PARTS: return LowerShift(Op, DAG);
				4522	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
				4523	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
				4524	case ISD::FABS: return LowerFABS(Op, DAG);
				4525	case ISD::FNEG: return LowerFNEG(Op, DAG);
				4526	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
				4527	case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
				4528	case ISD::SELECT: return LowerSELECT(Op, DAG);
				4529	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				4530	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
				4531	case ISD::CALL: return LowerCALL(Op, DAG);
				4532	case ISD::RET: return LowerRET(Op, DAG);
				4533	case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
				4534	case ISD::MEMSET: return LowerMEMSET(Op, DAG);
				4535	case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
				4536	case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
				4537	case ISD::VASTART: return LowerVASTART(Op, DAG);
				4538	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
				4539	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				4540	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
				4541	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
				4542	case ISD::FRAME_TO_ARGS_OFFSET:
				4543	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
				4544	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
				4545	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
Duncan Sands	d8455ca	2007-07-27 20:02:49 +0000	[diff] [blame]	4546	case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4547	}
				4548	return SDOperand();
				4549	}
				4550
				4551	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
				4552	switch (Opcode) {
				4553	default: return NULL;
				4554	case X86ISD::SHLD: return "X86ISD::SHLD";
				4555	case X86ISD::SHRD: return "X86ISD::SHRD";
				4556	case X86ISD::FAND: return "X86ISD::FAND";
				4557	case X86ISD::FOR: return "X86ISD::FOR";
				4558	case X86ISD::FXOR: return "X86ISD::FXOR";
				4559	case X86ISD::FSRL: return "X86ISD::FSRL";
				4560	case X86ISD::FILD: return "X86ISD::FILD";
				4561	case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
				4562	case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
				4563	case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
				4564	case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
				4565	case X86ISD::FLD: return "X86ISD::FLD";
				4566	case X86ISD::FST: return "X86ISD::FST";
				4567	case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
				4568	case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
				4569	case X86ISD::CALL: return "X86ISD::CALL";
				4570	case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
				4571	case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
				4572	case X86ISD::CMP: return "X86ISD::CMP";
				4573	case X86ISD::COMI: return "X86ISD::COMI";
				4574	case X86ISD::UCOMI: return "X86ISD::UCOMI";
				4575	case X86ISD::SETCC: return "X86ISD::SETCC";
				4576	case X86ISD::CMOV: return "X86ISD::CMOV";
				4577	case X86ISD::BRCOND: return "X86ISD::BRCOND";
				4578	case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
				4579	case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
				4580	case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4581	case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
				4582	case X86ISD::Wrapper: return "X86ISD::Wrapper";
				4583	case X86ISD::S2VEC: return "X86ISD::S2VEC";
				4584	case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
				4585	case X86ISD::PINSRW: return "X86ISD::PINSRW";
				4586	case X86ISD::FMAX: return "X86ISD::FMAX";
				4587	case X86ISD::FMIN: return "X86ISD::FMIN";
				4588	case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
				4589	case X86ISD::FRCP: return "X86ISD::FRCP";
				4590	case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
				4591	case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
				4592	case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
				4593	}
				4594	}
				4595
				4596	// isLegalAddressingMode - Return true if the addressing mode represented
				4597	// by AM is legal for this target, for a load/store of the specified type.
				4598	bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
				4599	const Type *Ty) const {
				4600	// X86 supports extremely general addressing modes.
				4601
				4602	// X86 allows a sign-extended 32-bit immediate field as a displacement.
				4603	if (AM.BaseOffs <= -(1LL << 32) \|\| AM.BaseOffs >= (1LL << 32)-1)
				4604	return false;
				4605
				4606	if (AM.BaseGV) {
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4607	// We can only fold this if we don't need an extra load.
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4608	if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
				4609	return false;
Evan Cheng	6a1f3f1	2007-08-01 23:46:47 +0000	[diff] [blame]	4610
				4611	// X86-64 only supports addr of globals in small code model.
				4612	if (Subtarget->is64Bit()) {
				4613	if (getTargetMachine().getCodeModel() != CodeModel::Small)
				4614	return false;
				4615	// If lower 4G is not available, then we must use rip-relative addressing.
				4616	if (AM.BaseOffs \|\| AM.Scale > 1)
				4617	return false;
				4618	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4619	}
				4620
				4621	switch (AM.Scale) {
				4622	case 0:
				4623	case 1:
				4624	case 2:
				4625	case 4:
				4626	case 8:
				4627	// These scales always work.
				4628	break;
				4629	case 3:
				4630	case 5:
				4631	case 9:
				4632	// These scales are formed with basereg+scalereg. Only accept if there is
				4633	// no basereg yet.
				4634	if (AM.HasBaseReg)
				4635	return false;
				4636	break;
				4637	default: // Other stuff never works.
				4638	return false;
				4639	}
				4640
				4641	return true;
				4642	}
				4643
				4644
				4645	/// isShuffleMaskLegal - Targets can use this to indicate that they only
				4646	/// support some VECTOR_SHUFFLE operations, those with specific masks.
				4647	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
				4648	/// are assumed to be legal.
				4649	bool
				4650	X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
				4651	// Only do shuffles on 128-bit vector types for now.
				4652	if (MVT::getSizeInBits(VT) == 64) return false;
				4653	return (Mask.Val->getNumOperands() <= 4 \|\|
				4654	isIdentityMask(Mask.Val) \|\|
				4655	isIdentityMask(Mask.Val, true) \|\|
				4656	isSplatMask(Mask.Val) \|\|
				4657	isPSHUFHW_PSHUFLWMask(Mask.Val) \|\|
				4658	X86::isUNPCKLMask(Mask.Val) \|\|
				4659	X86::isUNPCKHMask(Mask.Val) \|\|
				4660	X86::isUNPCKL_v_undef_Mask(Mask.Val) \|\|
				4661	X86::isUNPCKH_v_undef_Mask(Mask.Val));
				4662	}
				4663
				4664	bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
				4665	MVT::ValueType EVT,
				4666	SelectionDAG &DAG) const {
				4667	unsigned NumElts = BVOps.size();
				4668	// Only do shuffles on 128-bit vector types for now.
				4669	if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
				4670	if (NumElts == 2) return true;
				4671	if (NumElts == 4) {
				4672	return (isMOVLMask(&BVOps[0], 4) \|\|
				4673	isCommutedMOVL(&BVOps[0], 4, true) \|\|
				4674	isSHUFPMask(&BVOps[0], 4) \|\|
				4675	isCommutedSHUFP(&BVOps[0], 4));
				4676	}
				4677	return false;
				4678	}
				4679
				4680	//===----------------------------------------------------------------------===//
				4681	// X86 Scheduler Hooks
				4682	//===----------------------------------------------------------------------===//
				4683
				4684	MachineBasicBlock *
				4685	X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
				4686	MachineBasicBlock *BB) {
				4687	const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
				4688	switch (MI->getOpcode()) {
				4689	default: assert(false && "Unexpected instr type to insert");
				4690	case X86::CMOV_FR32:
				4691	case X86::CMOV_FR64:
				4692	case X86::CMOV_V4F32:
				4693	case X86::CMOV_V2F64:
				4694	case X86::CMOV_V2I64: {
				4695	// To "insert" a SELECT_CC instruction, we actually have to insert the
				4696	// diamond control-flow pattern. The incoming instruction knows the
				4697	// destination vreg to set, the condition code register to branch on, the
				4698	// true/false values to select between, and a branch opcode to use.
				4699	const BasicBlock *LLVM_BB = BB->getBasicBlock();
				4700	ilist<MachineBasicBlock>::iterator It = BB;
				4701	++It;
				4702
				4703	// thisMBB:
				4704	// ...
				4705	// TrueVal = ...
				4706	// cmpTY ccX, r1, r2
				4707	// bCC copy1MBB
				4708	// fallthrough --> copy0MBB
				4709	MachineBasicBlock *thisMBB = BB;
				4710	MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
				4711	MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
				4712	unsigned Opc =
				4713	X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
				4714	BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
				4715	MachineFunction *F = BB->getParent();
				4716	F->getBasicBlockList().insert(It, copy0MBB);
				4717	F->getBasicBlockList().insert(It, sinkMBB);
				4718	// Update machine-CFG edges by first adding all successors of the current
				4719	// block to the new block which will contain the Phi node for the select.
				4720	for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
				4721	e = BB->succ_end(); i != e; ++i)
				4722	sinkMBB->addSuccessor(*i);
				4723	// Next, remove all successors of the current block, and add the true
				4724	// and fallthrough blocks as its successors.
				4725	while(!BB->succ_empty())
				4726	BB->removeSuccessor(BB->succ_begin());
				4727	BB->addSuccessor(copy0MBB);
				4728	BB->addSuccessor(sinkMBB);
				4729
				4730	// copy0MBB:
				4731	// %FalseValue = ...
				4732	// # fallthrough to sinkMBB
				4733	BB = copy0MBB;
				4734
				4735	// Update machine-CFG edges
				4736	BB->addSuccessor(sinkMBB);
				4737
				4738	// sinkMBB:
				4739	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
				4740	// ...
				4741	BB = sinkMBB;
				4742	BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
				4743	.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
				4744	.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
				4745
				4746	delete MI; // The pseudo instruction is gone now.
				4747	return BB;
				4748	}
				4749
				4750	case X86::FP32_TO_INT16_IN_MEM:
				4751	case X86::FP32_TO_INT32_IN_MEM:
				4752	case X86::FP32_TO_INT64_IN_MEM:
				4753	case X86::FP64_TO_INT16_IN_MEM:
				4754	case X86::FP64_TO_INT32_IN_MEM:
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame]	4755	case X86::FP64_TO_INT64_IN_MEM:
				4756	case X86::FP80_TO_INT16_IN_MEM:
				4757	case X86::FP80_TO_INT32_IN_MEM:
				4758	case X86::FP80_TO_INT64_IN_MEM: {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4759	// Change the floating point control register to use "round towards zero"
				4760	// mode when truncating to an integer value.
				4761	MachineFunction *F = BB->getParent();
				4762	int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
				4763	addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
				4764
				4765	// Load the old value of the high byte of the control word...
				4766	unsigned OldCW =
				4767	F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
				4768	addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
				4769
				4770	// Set the high part to be round to zero...
				4771	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
				4772	.addImm(0xC7F);
				4773
				4774	// Reload the modified control word now...
				4775	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4776
				4777	// Restore the memory image of control word to original value
				4778	addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
				4779	.addReg(OldCW);
				4780
				4781	// Get the X86 opcode to use.
				4782	unsigned Opc;
				4783	switch (MI->getOpcode()) {
				4784	default: assert(0 && "illegal opcode!");
				4785	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
				4786	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
				4787	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
				4788	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
				4789	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
				4790	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
Dale Johannesen	6d0e36a	2007-08-07 01:17:37 +0000	[diff] [blame]	4791	case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
				4792	case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
				4793	case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4794	}
				4795
				4796	X86AddressMode AM;
				4797	MachineOperand &Op = MI->getOperand(0);
				4798	if (Op.isRegister()) {
				4799	AM.BaseType = X86AddressMode::RegBase;
				4800	AM.Base.Reg = Op.getReg();
				4801	} else {
				4802	AM.BaseType = X86AddressMode::FrameIndexBase;
				4803	AM.Base.FrameIndex = Op.getFrameIndex();
				4804	}
				4805	Op = MI->getOperand(1);
				4806	if (Op.isImmediate())
				4807	AM.Scale = Op.getImm();
				4808	Op = MI->getOperand(2);
				4809	if (Op.isImmediate())
				4810	AM.IndexReg = Op.getImm();
				4811	Op = MI->getOperand(3);
				4812	if (Op.isGlobalAddress()) {
				4813	AM.GV = Op.getGlobal();
				4814	} else {
				4815	AM.Disp = Op.getImm();
				4816	}
				4817	addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
				4818	.addReg(MI->getOperand(4).getReg());
				4819
				4820	// Reload the original control word now.
				4821	addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
				4822
				4823	delete MI; // The pseudo instruction is gone now.
				4824	return BB;
				4825	}
				4826	}
				4827	}
				4828
				4829	//===----------------------------------------------------------------------===//
				4830	// X86 Optimization Hooks
				4831	//===----------------------------------------------------------------------===//
				4832
				4833	void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
				4834	uint64_t Mask,
				4835	uint64_t &KnownZero,
				4836	uint64_t &KnownOne,
				4837	const SelectionDAG &DAG,
				4838	unsigned Depth) const {
				4839	unsigned Opc = Op.getOpcode();
				4840	assert((Opc >= ISD::BUILTIN_OP_END \|\|
				4841	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
				4842	Opc == ISD::INTRINSIC_W_CHAIN \|\|
				4843	Opc == ISD::INTRINSIC_VOID) &&
				4844	"Should use MaskedValueIsZero if you don't know whether Op"
				4845	" is a target node!");
				4846
				4847	KnownZero = KnownOne = 0; // Don't know anything.
				4848	switch (Opc) {
				4849	default: break;
				4850	case X86ISD::SETCC:
				4851	KnownZero \|= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
				4852	break;
				4853	}
				4854	}
				4855
				4856	/// getShuffleScalarElt - Returns the scalar element that will make up the ith
				4857	/// element of the result of the vector shuffle.
				4858	static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
				4859	MVT::ValueType VT = N->getValueType(0);
				4860	SDOperand PermMask = N->getOperand(2);
				4861	unsigned NumElems = PermMask.getNumOperands();
				4862	SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
				4863	i %= NumElems;
				4864	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
				4865	return (i == 0)
				4866	? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4867	} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
				4868	SDOperand Idx = PermMask.getOperand(i);
				4869	if (Idx.getOpcode() == ISD::UNDEF)
				4870	return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
				4871	return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
				4872	}
				4873	return SDOperand();
				4874	}
				4875
				4876	/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
				4877	/// node is a GlobalAddress + an offset.
				4878	static bool isGAPlusOffset(SDNode N, GlobalValue &GA, int64_t &Offset) {
				4879	unsigned Opc = N->getOpcode();
				4880	if (Opc == X86ISD::Wrapper) {
				4881	if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
				4882	GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
				4883	return true;
				4884	}
				4885	} else if (Opc == ISD::ADD) {
				4886	SDOperand N1 = N->getOperand(0);
				4887	SDOperand N2 = N->getOperand(1);
				4888	if (isGAPlusOffset(N1.Val, GA, Offset)) {
				4889	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
				4890	if (V) {
				4891	Offset += V->getSignExtended();
				4892	return true;
				4893	}
				4894	} else if (isGAPlusOffset(N2.Val, GA, Offset)) {
				4895	ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
				4896	if (V) {
				4897	Offset += V->getSignExtended();
				4898	return true;
				4899	}
				4900	}
				4901	}
				4902	return false;
				4903	}
				4904
				4905	/// isConsecutiveLoad - Returns true if N is loading from an address of Base
				4906	/// + Dist * Size.
				4907	static bool isConsecutiveLoad(SDNode N, SDNode Base, int Dist, int Size,
				4908	MachineFrameInfo *MFI) {
				4909	if (N->getOperand(0).Val != Base->getOperand(0).Val)
				4910	return false;
				4911
				4912	SDOperand Loc = N->getOperand(1);
				4913	SDOperand BaseLoc = Base->getOperand(1);
				4914	if (Loc.getOpcode() == ISD::FrameIndex) {
				4915	if (BaseLoc.getOpcode() != ISD::FrameIndex)
				4916	return false;
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4917	int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
				4918	int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4919	int FS = MFI->getObjectSize(FI);
				4920	int BFS = MFI->getObjectSize(BFI);
				4921	if (FS != BFS \|\| FS != Size) return false;
				4922	return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
				4923	} else {
				4924	GlobalValue *GV1 = NULL;
				4925	GlobalValue *GV2 = NULL;
				4926	int64_t Offset1 = 0;
				4927	int64_t Offset2 = 0;
				4928	bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
				4929	bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
				4930	if (isGA1 && isGA2 && GV1 == GV2)
				4931	return Offset1 == (Offset2 + Dist*Size);
				4932	}
				4933
				4934	return false;
				4935	}
				4936
				4937	static bool isBaseAlignment16(SDNode Base, MachineFrameInfo MFI,
				4938	const X86Subtarget *Subtarget) {
				4939	GlobalValue *GV;
				4940	int64_t Offset;
				4941	if (isGAPlusOffset(Base, GV, Offset))
				4942	return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
				4943	else {
				4944	assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
Dan Gohman	53491e9	2007-07-23 20:24:29 +0000	[diff] [blame]	4945	int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4946	if (BFI < 0)
				4947	// Fixed objects do not specify alignment, however the offsets are known.
				4948	return ((Subtarget->getStackAlignment() % 16) == 0 &&
				4949	(MFI->getObjectOffset(BFI) % 16) == 0);
				4950	else
				4951	return MFI->getObjectAlignment(BFI) >= 16;
				4952	}
				4953	return false;
				4954	}
				4955
				4956
				4957	/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
				4958	/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
				4959	/// if the load addresses are consecutive, non-overlapping, and in the right
				4960	/// order.
				4961	static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
				4962	const X86Subtarget *Subtarget) {
				4963	MachineFunction &MF = DAG.getMachineFunction();
				4964	MachineFrameInfo *MFI = MF.getFrameInfo();
				4965	MVT::ValueType VT = N->getValueType(0);
				4966	MVT::ValueType EVT = MVT::getVectorElementType(VT);
				4967	SDOperand PermMask = N->getOperand(2);
				4968	int NumElems = (int)PermMask.getNumOperands();
				4969	SDNode *Base = NULL;
				4970	for (int i = 0; i < NumElems; ++i) {
				4971	SDOperand Idx = PermMask.getOperand(i);
				4972	if (Idx.getOpcode() == ISD::UNDEF) {
				4973	if (!Base) return SDOperand();
				4974	} else {
				4975	SDOperand Arg =
				4976	getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
				4977	if (!Arg.Val \|\| !ISD::isNON_EXTLoad(Arg.Val))
				4978	return SDOperand();
				4979	if (!Base)
				4980	Base = Arg.Val;
				4981	else if (!isConsecutiveLoad(Arg.Val, Base,
				4982	i, MVT::getSizeInBits(EVT)/8,MFI))
				4983	return SDOperand();
				4984	}
				4985	}
				4986
				4987	bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4988	LoadSDNode *LD = cast<LoadSDNode>(Base);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4989	if (isAlign16) {
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4990	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4991	LD->getSrcValueOffset(), LD->isVolatile());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4992	} else {
Dan Gohman	1182170	2007-07-27 17:16:43 +0000	[diff] [blame]	4993	return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
				4994	LD->getSrcValueOffset(), LD->isVolatile(),
				4995	LD->getAlignment());
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	4996	}
				4997	}
				4998
				4999	/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
				5000	static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
				5001	const X86Subtarget *Subtarget) {
				5002	SDOperand Cond = N->getOperand(0);
				5003
				5004	// If we have SSE[12] support, try to form min/max nodes.
				5005	if (Subtarget->hasSSE2() &&
				5006	(N->getValueType(0) == MVT::f32 \|\| N->getValueType(0) == MVT::f64)) {
				5007	if (Cond.getOpcode() == ISD::SETCC) {
				5008	// Get the LHS/RHS of the select.
				5009	SDOperand LHS = N->getOperand(1);
				5010	SDOperand RHS = N->getOperand(2);
				5011	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
				5012
				5013	unsigned Opcode = 0;
				5014	if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
				5015	switch (CC) {
				5016	default: break;
				5017	case ISD::SETOLE: // (X <= Y) ? X : Y -> min
				5018	case ISD::SETULE:
				5019	case ISD::SETLE:
				5020	if (!UnsafeFPMath) break;
				5021	// FALL THROUGH.
				5022	case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
				5023	case ISD::SETLT:
				5024	Opcode = X86ISD::FMIN;
				5025	break;
				5026
				5027	case ISD::SETOGT: // (X > Y) ? X : Y -> max
				5028	case ISD::SETUGT:
				5029	case ISD::SETGT:
				5030	if (!UnsafeFPMath) break;
				5031	// FALL THROUGH.
				5032	case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
				5033	case ISD::SETGE:
				5034	Opcode = X86ISD::FMAX;
				5035	break;
				5036	}
				5037	} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
				5038	switch (CC) {
				5039	default: break;
				5040	case ISD::SETOGT: // (X > Y) ? Y : X -> min
				5041	case ISD::SETUGT:
				5042	case ISD::SETGT:
				5043	if (!UnsafeFPMath) break;
				5044	// FALL THROUGH.
				5045	case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
				5046	case ISD::SETGE:
				5047	Opcode = X86ISD::FMIN;
				5048	break;
				5049
				5050	case ISD::SETOLE: // (X <= Y) ? Y : X -> max
				5051	case ISD::SETULE:
				5052	case ISD::SETLE:
				5053	if (!UnsafeFPMath) break;
				5054	// FALL THROUGH.
				5055	case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
				5056	case ISD::SETLT:
				5057	Opcode = X86ISD::FMAX;
				5058	break;
				5059	}
				5060	}
				5061
				5062	if (Opcode)
				5063	return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
				5064	}
				5065
				5066	}
				5067
				5068	return SDOperand();
				5069	}
				5070
				5071
				5072	SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
				5073	DAGCombinerInfo &DCI) const {
				5074	SelectionDAG &DAG = DCI.DAG;
				5075	switch (N->getOpcode()) {
				5076	default: break;
				5077	case ISD::VECTOR_SHUFFLE:
				5078	return PerformShuffleCombine(N, DAG, Subtarget);
				5079	case ISD::SELECT:
				5080	return PerformSELECTCombine(N, DAG, Subtarget);
				5081	}
				5082
				5083	return SDOperand();
				5084	}
				5085
				5086	//===----------------------------------------------------------------------===//
				5087	// X86 Inline Assembly Support
				5088	//===----------------------------------------------------------------------===//
				5089
				5090	/// getConstraintType - Given a constraint letter, return the type of
				5091	/// constraint it is for this target.
				5092	X86TargetLowering::ConstraintType
				5093	X86TargetLowering::getConstraintType(const std::string &Constraint) const {
				5094	if (Constraint.size() == 1) {
				5095	switch (Constraint[0]) {
				5096	case 'A':
				5097	case 'r':
				5098	case 'R':
				5099	case 'l':
				5100	case 'q':
				5101	case 'Q':
				5102	case 'x':
				5103	case 'Y':
				5104	return C_RegisterClass;
				5105	default:
				5106	break;
				5107	}
				5108	}
				5109	return TargetLowering::getConstraintType(Constraint);
				5110	}
				5111
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5112	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
				5113	/// vector. If it is invalid, don't add anything to Ops.
				5114	void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
				5115	char Constraint,
				5116	std::vector<SDOperand>&Ops,
				5117	SelectionDAG &DAG) {
				5118	SDOperand Result(0, 0);
				5119
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5120	switch (Constraint) {
				5121	default: break;
				5122	case 'I':
				5123	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5124	if (C->getValue() <= 31) {
				5125	Result = DAG.getTargetConstant(C->getValue(), Op.getValueType());
				5126	break;
				5127	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5128	}
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5129	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5130	case 'N':
				5131	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5132	if (C->getValue() <= 255) {
				5133	Result = DAG.getTargetConstant(C->getValue(), Op.getValueType());
				5134	break;
				5135	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5136	}
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5137	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5138	case 'i': {
				5139	// Literal immediates are always ok.
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5140	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
				5141	Result = DAG.getTargetConstant(CST->getValue(), Op.getValueType());
				5142	break;
				5143	}
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5144
				5145	// If we are in non-pic codegen mode, we allow the address of a global (with
				5146	// an optional displacement) to be used with 'i'.
				5147	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
				5148	int64_t Offset = 0;
				5149
				5150	// Match either (GA) or (GA+C)
				5151	if (GA) {
				5152	Offset = GA->getOffset();
				5153	} else if (Op.getOpcode() == ISD::ADD) {
				5154	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5155	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5156	if (C && GA) {
				5157	Offset = GA->getOffset()+C->getValue();
				5158	} else {
				5159	C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
				5160	GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
				5161	if (C && GA)
				5162	Offset = GA->getOffset()+C->getValue();
				5163	else
				5164	C = 0, GA = 0;
				5165	}
				5166	}
				5167
				5168	if (GA) {
				5169	// If addressing this global requires a load (e.g. in PIC mode), we can't
				5170	// match.
				5171	if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
				5172	false))
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5173	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5174
				5175	Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
				5176	Offset);
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5177	Result = Op;
				5178	break;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5179	}
				5180
				5181	// Otherwise, not valid for this mode.
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5182	return;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5183	}
				5184	}
Chris Lattner	a531abc	2007-08-25 00:47:38 +0000	[diff] [blame]	5185
				5186	if (Result.Val) {
				5187	Ops.push_back(Result);
				5188	return;
				5189	}
				5190	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5191	}
				5192
				5193	std::vector<unsigned> X86TargetLowering::
				5194	getRegClassForInlineAsmConstraint(const std::string &Constraint,
				5195	MVT::ValueType VT) const {
				5196	if (Constraint.size() == 1) {
				5197	// FIXME: not handling fp-stack yet!
				5198	switch (Constraint[0]) { // GCC X86 Constraint Letters
				5199	default: break; // Unknown constraint letter
				5200	case 'A': // EAX/EDX
				5201	if (VT == MVT::i32 \|\| VT == MVT::i64)
				5202	return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
				5203	break;
				5204	case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
				5205	case 'Q': // Q_REGS
				5206	if (VT == MVT::i32)
				5207	return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
				5208	else if (VT == MVT::i16)
				5209	return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
				5210	else if (VT == MVT::i8)
Evan Cheng	f85c10f	2007-08-13 23:27:11 +0000	[diff] [blame]	5211	return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	5212	break;
				5213	}
				5214	}
				5215
				5216	return std::vector<unsigned>();
				5217	}
				5218
				5219	std::pair<unsigned, const TargetRegisterClass*>
				5220	X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
				5221	MVT::ValueType VT) const {
				5222	// First, see if this is a constraint that directly corresponds to an LLVM
				5223	// register class.
				5224	if (Constraint.size() == 1) {
				5225	// GCC Constraint Letters
				5226	switch (Constraint[0]) {
				5227	default: break;
				5228	case 'r': // GENERAL_REGS
				5229	case 'R': // LEGACY_REGS
				5230	case 'l': // INDEX_REGS
				5231	if (VT == MVT::i64 && Subtarget->is64Bit())
				5232	return std::make_pair(0U, X86::GR64RegisterClass);
				5233	if (VT == MVT::i32)
				5234	return std::make_pair(0U, X86::GR32RegisterClass);
				5235	else if (VT == MVT::i16)
				5236	return std::make_pair(0U, X86::GR16RegisterClass);
				5237	else if (VT == MVT::i8)
				5238	return std::make_pair(0U, X86::GR8RegisterClass);
				5239	break;
				5240	case 'y': // MMX_REGS if MMX allowed.
				5241	if (!Subtarget->hasMMX()) break;
				5242	return std::make_pair(0U, X86::VR64RegisterClass);
				5243	break;
				5244	case 'Y': // SSE_REGS if SSE2 allowed
				5245	if (!Subtarget->hasSSE2()) break;
				5246	// FALL THROUGH.
				5247	case 'x': // SSE_REGS if SSE1 allowed
				5248	if (!Subtarget->hasSSE1()) break;
				5249
				5250	switch (VT) {
				5251	default: break;
				5252	// Scalar SSE types.
				5253	case MVT::f32:
				5254	case MVT::i32:
				5255	return std::make_pair(0U, X86::FR32RegisterClass);
				5256	case MVT::f64:
				5257	case MVT::i64:
				5258	return std::make_pair(0U, X86::FR64RegisterClass);
				5259	// Vector types.
				5260	case MVT::v16i8:
				5261	case MVT::v8i16:
				5262	case MVT::v4i32:
				5263	case MVT::v2i64:
				5264	case MVT::v4f32:
				5265	case MVT::v2f64:
				5266	return std::make_pair(0U, X86::VR128RegisterClass);
				5267	}
				5268	break;
				5269	}
				5270	}
				5271
				5272	// Use the default implementation in TargetLowering to convert the register
				5273	// constraint into a member of a register class.
				5274	std::pair<unsigned, const TargetRegisterClass*> Res;
				5275	Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				5276
				5277	// Not found as a standard register?
				5278	if (Res.second == 0) {
				5279	// GCC calls "st(0)" just plain "st".
				5280	if (StringsEqualNoCase("{st}", Constraint)) {
				5281	Res.first = X86::ST0;
				5282	Res.second = X86::RSTRegisterClass;
				5283	}
				5284
				5285	return Res;
				5286	}
				5287
				5288	// Otherwise, check to see if this is a register class of the wrong value
				5289	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
				5290	// turn into {ax},{dx}.
				5291	if (Res.second->hasType(VT))
				5292	return Res; // Correct type already, nothing to do.
				5293
				5294	// All of the single-register GCC register classes map their values onto
				5295	// 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
				5296	// really want an 8-bit or 32-bit register, map to the appropriate register
				5297	// class and return the appropriate register.
				5298	if (Res.second != X86::GR16RegisterClass)
				5299	return Res;
				5300
				5301	if (VT == MVT::i8) {
				5302	unsigned DestReg = 0;
				5303	switch (Res.first) {
				5304	default: break;
				5305	case X86::AX: DestReg = X86::AL; break;
				5306	case X86::DX: DestReg = X86::DL; break;
				5307	case X86::CX: DestReg = X86::CL; break;
				5308	case X86::BX: DestReg = X86::BL; break;
				5309	}
				5310	if (DestReg) {
				5311	Res.first = DestReg;
				5312	Res.second = Res.second = X86::GR8RegisterClass;
				5313	}
				5314	} else if (VT == MVT::i32) {
				5315	unsigned DestReg = 0;
				5316	switch (Res.first) {
				5317	default: break;
				5318	case X86::AX: DestReg = X86::EAX; break;
				5319	case X86::DX: DestReg = X86::EDX; break;
				5320	case X86::CX: DestReg = X86::ECX; break;
				5321	case X86::BX: DestReg = X86::EBX; break;
				5322	case X86::SI: DestReg = X86::ESI; break;
				5323	case X86::DI: DestReg = X86::EDI; break;
				5324	case X86::BP: DestReg = X86::EBP; break;
				5325	case X86::SP: DestReg = X86::ESP; break;
				5326	}
				5327	if (DestReg) {
				5328	Res.first = DestReg;
				5329	Res.second = Res.second = X86::GR32RegisterClass;
				5330	}
				5331	} else if (VT == MVT::i64) {
				5332	unsigned DestReg = 0;
				5333	switch (Res.first) {
				5334	default: break;
				5335	case X86::AX: DestReg = X86::RAX; break;
				5336	case X86::DX: DestReg = X86::RDX; break;
				5337	case X86::CX: DestReg = X86::RCX; break;
				5338	case X86::BX: DestReg = X86::RBX; break;
				5339	case X86::SI: DestReg = X86::RSI; break;
				5340	case X86::DI: DestReg = X86::RDI; break;
				5341	case X86::BP: DestReg = X86::RBP; break;
				5342	case X86::SP: DestReg = X86::RSP; break;
				5343	}
				5344	if (DestReg) {
				5345	Res.first = DestReg;
				5346	Res.second = Res.second = X86::GR64RegisterClass;
				5347	}
				5348	}
				5349
				5350	return Res;
				5351	}