Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

blob: 912a0fb356ed4666e333897d90a021cfc0c3918e [file] [log] [blame]

Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1	//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -- tablegen --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file describes the various pseudo instructions used by the compiler,
				11	// as well as Pat patterns used during instruction selection.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	//===----------------------------------------------------------------------===//
				16	// Pattern Matching Support
				17
				18	def GetLo32XForm : SDNodeXForm<imm, [{
				19	// Transformation function: get the low 32 bits.
Sergey Dmitrouk	842a51b	2015-04-28 14:05:47 +0000	[diff] [blame]	20	return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	21	}]>;
				22
				23	def GetLo8XForm : SDNodeXForm<imm, [{
				24	// Transformation function: get the low 8 bits.
Sergey Dmitrouk	842a51b	2015-04-28 14:05:47 +0000	[diff] [blame]	25	return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	26	}]>;
				27
				28
				29	//===----------------------------------------------------------------------===//
				30	// Random Pseudo Instructions.
				31
				32	// PIC base construction. This expands to code that looks like this:
				33	// call $next_inst
				34	// popl %destreg"
				35	let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in
				36	def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
				37	"", []>;
				38
				39
				40	// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
				41	// a stack adjustment and the codegen must know that they may modify the stack
				42	// pointer before prolog-epilog rewriting occurs.
				43	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				44	// sub / add which can clobber EFLAGS.
				45	let Defs = [ESP, EFLAGS], Uses = [ESP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	46	def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	47	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	48	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	49	Requires<[NotLP64]>;
				50	def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				51	"#ADJCALLSTACKUP",
				52	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				53	Requires<[NotLP64]>;
				54	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	55	def : Pat<(X86callseq_start timm:$amt1),
				56	(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
				57
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	58
				59	// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
				60	// a stack adjustment and the codegen must know that they may modify the stack
				61	// pointer before prolog-epilog rewriting occurs.
				62	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				63	// sub / add which can clobber EFLAGS.
				64	let Defs = [RSP, EFLAGS], Uses = [RSP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	65	def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	66	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	67	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	68	Requires<[IsLP64]>;
				69	def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				70	"#ADJCALLSTACKUP",
				71	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				72	Requires<[IsLP64]>;
				73	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	74	def : Pat<(X86callseq_start timm:$amt1),
				75	(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	76
				77
				78	// x86-64 va_start lowering magic.
				79	let usesCustomInserter = 1, Defs = [EFLAGS] in {
				80	def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
				81	(outs),
				82	(ins GR8:$al,
				83	i64imm:$regsavefi, i64imm:$offset,
				84	variable_ops),
				85	"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
				86	[(X86vastart_save_xmm_regs GR8:$al,
				87	imm:$regsavefi,
				88	imm:$offset),
				89	(implicit EFLAGS)]>;
				90
				91	// The VAARG_64 pseudo-instruction takes the address of the va_list,
				92	// and places the address of the next argument into a register.
				93	let Defs = [EFLAGS] in
				94	def VAARG_64 : I<0, Pseudo,
				95	(outs GR64:$dst),
				96	(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
				97	"#VAARG_64 $dst, $ap, $size, $mode, $align",
				98	[(set GR64:$dst,
				99	(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
				100	(implicit EFLAGS)]>;
				101
				102	// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
				103	// targets. These calls are needed to probe the stack when allocating more than
				104	// 4k bytes in one go. Touching the stack at 4K increments is necessary to
				105	// ensure that the guard pages used by the OS virtual memory manager are
				106	// allocated in correct sequence.
				107	// The main point of having separate instruction are extra unmodelled effects
				108	// (compared to ordinary calls) like stack pointer change.
				109
				110	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				111	def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
				112	"# dynamic stack allocation",
				113	[(X86WinAlloca)]>;
				114
				115	// When using segmented stacks these are lowered into instructions which first
				116	// check if the current stacklet has enough free memory. If it does, memory is
				117	// allocated by bumping the stack pointer. Otherwise memory is allocated from
				118	// the heap.
				119
				120	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				121	def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
				122	"# variable sized alloca for segmented stacks",
				123	[(set GR32:$dst,
				124	(X86SegAlloca GR32:$size))]>,
				125	Requires<[NotLP64]>;
				126
				127	let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
				128	def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
				129	"# variable sized alloca for segmented stacks",
				130	[(set GR64:$dst,
				131	(X86SegAlloca GR64:$size))]>,
				132	Requires<[In64BitMode]>;
				133	}
				134
				135	// The MSVC runtime contains an _ftol2 routine for converting floating-point
				136	// to integer values. It has a strange calling convention: the input is
				137	// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
				138	// used as a temporary register. No other registers (aside from flags) are
				139	// touched.
				140	// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
				141	// variant is unnecessary.
				142
				143	let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
				144	def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
				145	"# win32 fptoui",
				146	[(X86WinFTOL RFP32:$src)]>,
				147	Requires<[Not64BitMode]>;
				148
				149	def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
				150	"# win32 fptoui",
				151	[(X86WinFTOL RFP64:$src)]>,
				152	Requires<[Not64BitMode]>;
				153	}
				154
				155	//===----------------------------------------------------------------------===//
				156	// EH Pseudo Instructions
				157	//
				158	let SchedRW = [WriteSystem] in {
				159	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				160	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				161	def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
				162	"ret\t#eh_return, addr: $addr",
				163	[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				164
				165	}
				166
				167	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				168	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				169	def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
				170	"ret\t#eh_return, addr: $addr",
				171	[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				172
				173	}
				174
				175	let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
				176	usesCustomInserter = 1 in {
				177	def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
				178	"#EH_SJLJ_SETJMP32",
				179	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				180	Requires<[Not64BitMode]>;
				181	def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
				182	"#EH_SJLJ_SETJMP64",
				183	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				184	Requires<[In64BitMode]>;
				185	let isTerminator = 1 in {
				186	def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
				187	"#EH_SJLJ_LONGJMP32",
				188	[(X86eh_sjlj_longjmp addr:$buf)]>,
				189	Requires<[Not64BitMode]>;
				190	def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
				191	"#EH_SJLJ_LONGJMP64",
				192	[(X86eh_sjlj_longjmp addr:$buf)]>,
				193	Requires<[In64BitMode]>;
				194	}
				195	}
				196	} // SchedRW
				197
				198	let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
				199	def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
				200	"#EH_SjLj_Setup\t$dst", []>;
				201	}
				202
				203	//===----------------------------------------------------------------------===//
				204	// Pseudo instructions used by unwind info.
				205	//
				206	let isPseudo = 1 in {
				207	def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
				208	"#SEH_PushReg $reg", []>;
				209	def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				210	"#SEH_SaveReg $reg, $dst", []>;
				211	def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				212	"#SEH_SaveXMM $reg, $dst", []>;
				213	def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
				214	"#SEH_StackAlloc $size", []>;
				215	def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
				216	"#SEH_SetFrame $reg, $offset", []>;
				217	def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
				218	"#SEH_PushFrame $mode", []>;
				219	def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
				220	"#SEH_EndPrologue", []>;
				221	def SEH_Epilogue : I<0, Pseudo, (outs), (ins),
				222	"#SEH_Epilogue", []>;
				223	}
				224
				225	//===----------------------------------------------------------------------===//
				226	// Pseudo instructions used by segmented stacks.
				227	//
				228
				229	// This is lowered into a RET instruction by MCInstLower. We need
				230	// this so that we don't have to have a MachineBasicBlock which ends
				231	// with a RET and also has successors.
				232	let isPseudo = 1 in {
				233	def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
				234	"", []>;
				235
				236	// This instruction is lowered to a RET followed by a MOV. The two
				237	// instructions are not generated on a higher level since then the
				238	// verifier sees a MachineBasicBlock ending with a non-terminator.
				239	def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
				240	"", []>;
				241	}
				242
				243	//===----------------------------------------------------------------------===//
				244	// Alias Instructions
				245	//===----------------------------------------------------------------------===//
				246
				247	// Alias instruction mapping movr0 to xor.
				248	// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
				249	let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
				250	isPseudo = 1 in
				251	def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				252	[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
				253
				254	// Other widths can also make use of the 32-bit xor, which may have a smaller
				255	// encoding and avoid partial register updates.
				256	def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
				257	def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
				258	def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
				259	let AddedComplexity = 20;
				260	}
				261
				262	// Materialize i64 constant where top 32-bits are zero. This could theoretically
				263	// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
				264	// that would make it more difficult to rematerialize.
				265	let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
				266	isCodeGenOnly = 1, hasSideEffects = 0 in
				267	def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
				268	"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
				269
				270	// This 64-bit pseudo-move can be used for both a 64-bit constant that is
				271	// actually the zero-extension of a 32-bit constant, and for labels in the
				272	// x86-64 small code model.
				273	def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
				274
				275	let AddedComplexity = 1 in
				276	def : Pat<(i64 mov64imm32:$src),
				277	(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
				278
				279	// Use sbb to materialize carry bit.
				280	let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
				281	// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
				282	// However, Pat<> can't replicate the destination reg into the inputs of the
				283	// result.
				284	def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
				285	[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				286	def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
				287	[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				288	def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				289	[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				290	def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
				291	[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				292	} // isCodeGenOnly
				293
				294
				295	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				296	(SETB_C16r)>;
				297	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				298	(SETB_C32r)>;
				299	def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				300	(SETB_C64r)>;
				301
				302	def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				303	(SETB_C16r)>;
				304	def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				305	(SETB_C32r)>;
				306	def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				307	(SETB_C64r)>;
				308
				309	// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
				310	// will be eliminated and that the sbb can be extended up to a wider type. When
				311	// this happens, it is great. However, if we are left with an 8-bit sbb and an
				312	// and, we might as well just match it as a setb.
				313	def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
				314	(SETBr)>;
				315
				316	// (add OP, SETB) -> (adc OP, 0)
				317	def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
				318	(ADC8ri GR8:$op, 0)>;
				319	def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
				320	(ADC32ri8 GR32:$op, 0)>;
				321	def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
				322	(ADC64ri8 GR64:$op, 0)>;
				323
				324	// (sub OP, SETB) -> (sbb OP, 0)
				325	def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				326	(SBB8ri GR8:$op, 0)>;
				327	def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				328	(SBB32ri8 GR32:$op, 0)>;
				329	def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				330	(SBB64ri8 GR64:$op, 0)>;
				331
				332	// (sub OP, SETCC_CARRY) -> (adc OP, 0)
				333	def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
				334	(ADC8ri GR8:$op, 0)>;
				335	def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
				336	(ADC32ri8 GR32:$op, 0)>;
				337	def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
				338	(ADC64ri8 GR64:$op, 0)>;
				339
				340	//===----------------------------------------------------------------------===//
				341	// String Pseudo Instructions
				342	//
				343	let SchedRW = [WriteMicrocoded] in {
				344	let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
				345	def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				346	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				347	Requires<[Not64BitMode]>;
				348	def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				349	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				350	Requires<[Not64BitMode]>;
				351	def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				352	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				353	Requires<[Not64BitMode]>;
				354	}
				355
				356	let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
				357	def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				358	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				359	Requires<[In64BitMode]>;
				360	def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				361	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				362	Requires<[In64BitMode]>;
				363	def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				364	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				365	Requires<[In64BitMode]>;
				366	def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq\|rep movsq}",
				367	[(X86rep_movs i64)], IIC_REP_MOVS>, REP,
				368	Requires<[In64BitMode]>;
				369	}
				370
				371	// FIXME: Should use "(X86rep_stos AL)" as the pattern.
				372	let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
				373	let Uses = [AL,ECX,EDI] in
				374	def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				375	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				376	Requires<[Not64BitMode]>;
				377	let Uses = [AX,ECX,EDI] in
				378	def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				379	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				380	Requires<[Not64BitMode]>;
				381	let Uses = [EAX,ECX,EDI] in
				382	def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				383	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				384	Requires<[Not64BitMode]>;
				385	}
				386
				387	let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
				388	let Uses = [AL,RCX,RDI] in
				389	def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				390	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				391	Requires<[In64BitMode]>;
				392	let Uses = [AX,RCX,RDI] in
				393	def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				394	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				395	Requires<[In64BitMode]>;
				396	let Uses = [RAX,RCX,RDI] in
				397	def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				398	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				399	Requires<[In64BitMode]>;
				400
				401	let Uses = [RAX,RCX,RDI] in
				402	def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq\|rep stosq}",
				403	[(X86rep_stos i64)], IIC_REP_STOS>, REP,
				404	Requires<[In64BitMode]>;
				405	}
				406	} // SchedRW
				407
				408	//===----------------------------------------------------------------------===//
				409	// Thread Local Storage Instructions
				410	//
				411
				412	// ELF TLS Support
				413	// All calls clobber the non-callee saved registers. ESP is marked as
				414	// a use to prevent stack-pointer assignments that appear immediately
				415	// before calls from potentially appearing dead.
				416	let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				417	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				418	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				419	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				420	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				421	Uses = [ESP] in {
				422	def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				423	"# TLS_addr32",
				424	[(X86tlsaddr tls32addr:$sym)]>,
				425	Requires<[Not64BitMode]>;
				426	def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				427	"# TLS_base_addr32",
				428	[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
				429	Requires<[Not64BitMode]>;
				430	}
				431
				432	// All calls clobber the non-callee saved registers. RSP is marked as
				433	// a use to prevent stack-pointer assignments that appear immediately
				434	// before calls from potentially appearing dead.
				435	let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
				436	FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				437	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				438	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				439	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				440	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				441	Uses = [RSP] in {
				442	def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				443	"# TLS_addr64",
				444	[(X86tlsaddr tls64addr:$sym)]>,
				445	Requires<[In64BitMode]>;
				446	def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				447	"# TLS_base_addr64",
				448	[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
				449	Requires<[In64BitMode]>;
				450	}
				451
				452	// Darwin TLS Support
				453	// For i386, the address of the thunk is passed on the stack, on return the
				454	// address of the variable is in %eax. %ecx is trashed during the function
				455	// call. All other registers are preserved.
				456	let Defs = [EAX, ECX, EFLAGS],
				457	Uses = [ESP],
				458	usesCustomInserter = 1 in
				459	def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				460	"# TLSCall_32",
				461	[(X86TLSCall addr:$sym)]>,
				462	Requires<[Not64BitMode]>;
				463
				464	// For x86_64, the address of the thunk is passed in %rdi, on return
				465	// the address of the variable is in %rax. All other registers are preserved.
				466	let Defs = [RAX, EFLAGS],
				467	Uses = [RSP, RDI],
				468	usesCustomInserter = 1 in
				469	def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				470	"# TLSCall_64",
				471	[(X86TLSCall addr:$sym)]>,
				472	Requires<[In64BitMode]>;
				473
				474
				475	//===----------------------------------------------------------------------===//
				476	// Conditional Move Pseudo Instructions
				477
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	478	// CMOV* - Used to implement the SELECT DAG operation. Expanded after
				479	// instruction selection into a branch sequence.
				480	multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {
				481	def CMOV#NAME : I<0, Pseudo,
				482	(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),
				483	"#CMOV_"#NAME#" PSEUDO!",
				484	[(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,
				485	EFLAGS)))]>;
				486	}
				487
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	488	let usesCustomInserter = 1, Uses = [EFLAGS] in {
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	489	// X86 doesn't have 8-bit conditional moves. Use a customInserter to
				490	// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
				491	// however that requires promoting the operands, and can induce additional
				492	// i8 register pressure.
				493	defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	494
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	495	let Predicates = [NoCMov] in {
				496	defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;
				497	defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;
				498	} // Predicates = [NoCMov]
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	499
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	500	// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
				501	// SSE1/SSE2.
				502	let Predicates = [FPStackf32] in
				503	defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	504
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	505	let Predicates = [FPStackf64] in
				506	defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;
				507
				508	defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
				509
				510	defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
				511	defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
				512	defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
				513	defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
				514	defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
				515	defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;
				516	defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;
				517	defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;
				518	defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;
				519	defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;
				520	defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
Elena Demikhovsky	c1ac5d7	2015-05-12 09:36:52 +0000	[diff] [blame]	521	defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;
				522	defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;
				523	defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;
				524	defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	525	} // usesCustomInserter = 1, Uses = [EFLAGS]
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	526
				527	//===----------------------------------------------------------------------===//
				528	// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
				529	//===----------------------------------------------------------------------===//
				530
				531	// FIXME: Use normal instructions and add lock prefix dynamically.
				532
				533	// Memory barriers
				534
				535	// TODO: Get this to fold the constant into the instruction.
				536	let isCodeGenOnly = 1, Defs = [EFLAGS] in
				537	def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
				538	"or{l}\t{$zero, $dst\|$dst, $zero}",
				539	[], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK,
				540	Sched<[WriteALULd, WriteRMW]>;
				541
				542	let hasSideEffects = 1 in
				543	def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
				544	"#MEMBARRIER",
				545	[(X86MemBarrier)]>, Sched<[WriteLoad]>;
				546
				547	// RegOpc corresponds to the mr version of the instruction
				548	// ImmOpc corresponds to the mi version of the instruction
				549	// ImmOpc8 corresponds to the mi8 version of the instruction
				550	// ImmMod corresponds to the instruction format of the mi and mi8 versions
				551	multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
				552	Format ImmMod, string mnemonic> {
				553	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				554	SchedRW = [WriteALULd, WriteRMW] in {
				555
				556	def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				557	RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
				558	MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
				559	!strconcat(mnemonic, "{b}\t",
				560	"{$src2, $dst\|$dst, $src2}"),
				561	[], IIC_ALU_NONMEM>, LOCK;
				562	def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				563	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				564	MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
				565	!strconcat(mnemonic, "{w}\t",
				566	"{$src2, $dst\|$dst, $src2}"),
				567	[], IIC_ALU_NONMEM>, OpSize16, LOCK;
				568	def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				569	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				570	MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
				571	!strconcat(mnemonic, "{l}\t",
				572	"{$src2, $dst\|$dst, $src2}"),
				573	[], IIC_ALU_NONMEM>, OpSize32, LOCK;
				574	def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				575	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				576	MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
				577	!strconcat(mnemonic, "{q}\t",
				578	"{$src2, $dst\|$dst, $src2}"),
				579	[], IIC_ALU_NONMEM>, LOCK;
				580
				581	def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				582	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
				583	ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
				584	!strconcat(mnemonic, "{b}\t",
				585	"{$src2, $dst\|$dst, $src2}"),
				586	[], IIC_ALU_MEM>, LOCK;
				587
				588	def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				589	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				590	ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
				591	!strconcat(mnemonic, "{w}\t",
				592	"{$src2, $dst\|$dst, $src2}"),
				593	[], IIC_ALU_MEM>, OpSize16, LOCK;
				594
				595	def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				596	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				597	ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
				598	!strconcat(mnemonic, "{l}\t",
				599	"{$src2, $dst\|$dst, $src2}"),
				600	[], IIC_ALU_MEM>, OpSize32, LOCK;
				601
				602	def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				603	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				604	ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
				605	!strconcat(mnemonic, "{q}\t",
				606	"{$src2, $dst\|$dst, $src2}"),
				607	[], IIC_ALU_MEM>, LOCK;
				608
				609	def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				610	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				611	ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
				612	!strconcat(mnemonic, "{w}\t",
				613	"{$src2, $dst\|$dst, $src2}"),
				614	[], IIC_ALU_MEM>, OpSize16, LOCK;
				615	def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				616	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				617	ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
				618	!strconcat(mnemonic, "{l}\t",
				619	"{$src2, $dst\|$dst, $src2}"),
				620	[], IIC_ALU_MEM>, OpSize32, LOCK;
				621	def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				622	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				623	ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
				624	!strconcat(mnemonic, "{q}\t",
				625	"{$src2, $dst\|$dst, $src2}"),
				626	[], IIC_ALU_MEM>, LOCK;
				627
				628	}
				629
				630	}
				631
				632	defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
				633	defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
				634	defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
				635	defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
				636	defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
				637
				638	// Optimized codegen when the non-memory output is not used.
				639	multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
				640	string mnemonic> {
				641	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				642	SchedRW = [WriteALULd, WriteRMW] in {
				643
				644	def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
				645	!strconcat(mnemonic, "{b}\t$dst"),
				646	[], IIC_UNARY_MEM>, LOCK;
				647	def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
				648	!strconcat(mnemonic, "{w}\t$dst"),
				649	[], IIC_UNARY_MEM>, OpSize16, LOCK;
				650	def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
				651	!strconcat(mnemonic, "{l}\t$dst"),
				652	[], IIC_UNARY_MEM>, OpSize32, LOCK;
				653	def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
				654	!strconcat(mnemonic, "{q}\t$dst"),
				655	[], IIC_UNARY_MEM>, LOCK;
				656	}
				657	}
				658
				659	defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
				660	defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
				661
				662	// Atomic compare and swap.
				663	multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
				664	SDPatternOperator frag, X86MemOperand x86memop,
				665	InstrItinClass itin> {
				666	let isCodeGenOnly = 1 in {
				667	def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
				668	!strconcat(mnemonic, "\t$ptr"),
				669	[(frag addr:$ptr)], itin>, TB, LOCK;
				670	}
				671	}
				672
				673	multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
				674	string mnemonic, SDPatternOperator frag,
				675	InstrItinClass itin8, InstrItinClass itin> {
				676	let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
				677	let Defs = [AL, EFLAGS], Uses = [AL] in
				678	def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
				679	!strconcat(mnemonic, "{b}\t{$swap, $ptr\|$ptr, $swap}"),
				680	[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
				681	let Defs = [AX, EFLAGS], Uses = [AX] in
				682	def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
				683	!strconcat(mnemonic, "{w}\t{$swap, $ptr\|$ptr, $swap}"),
				684	[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;
				685	let Defs = [EAX, EFLAGS], Uses = [EAX] in
				686	def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
				687	!strconcat(mnemonic, "{l}\t{$swap, $ptr\|$ptr, $swap}"),
				688	[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;
				689	let Defs = [RAX, EFLAGS], Uses = [RAX] in
				690	def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
				691	!strconcat(mnemonic, "{q}\t{$swap, $ptr\|$ptr, $swap}"),
				692	[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
				693	}
				694	}
				695
				696	let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
				697	SchedRW = [WriteALULd, WriteRMW] in {
				698	defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
				699	X86cas8, i64mem,
				700	IIC_CMPX_LOCK_8B>;
				701	}
				702
				703	let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
				704	Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
				705	defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
				706	X86cas16, i128mem,
				707	IIC_CMPX_LOCK_16B>, REX_W;
				708	}
				709
				710	defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
				711	X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
				712
				713	// Atomic exchange and add
				714	multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
				715	string frag,
				716	InstrItinClass itin8, InstrItinClass itin> {
				717	let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
				718	SchedRW = [WriteALULd, WriteRMW] in {
				719	def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
				720	(ins GR8:$val, i8mem:$ptr),
				721	!strconcat(mnemonic, "{b}\t{$val, $ptr\|$ptr, $val}"),
				722	[(set GR8:$dst,
				723	(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
				724	itin8>;
				725	def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
				726	(ins GR16:$val, i16mem:$ptr),
				727	!strconcat(mnemonic, "{w}\t{$val, $ptr\|$ptr, $val}"),
				728	[(set
				729	GR16:$dst,
				730	(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
				731	itin>, OpSize16;
				732	def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
				733	(ins GR32:$val, i32mem:$ptr),
				734	!strconcat(mnemonic, "{l}\t{$val, $ptr\|$ptr, $val}"),
				735	[(set
				736	GR32:$dst,
				737	(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
				738	itin>, OpSize32;
				739	def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
				740	(ins GR64:$val, i64mem:$ptr),
				741	!strconcat(mnemonic, "{q}\t{$val, $ptr\|$ptr, $val}"),
				742	[(set
				743	GR64:$dst,
				744	(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
				745	itin>;
				746	}
				747	}
				748
				749	defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
				750	IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
				751	TB, LOCK;
				752
				753	/* The following multiclass tries to make sure that in code like
				754	* x.store (immediate op x.load(acquire), release)
				755	* an operation directly on memory is generated instead of wasting a register.
				756	* It is not automatic as atomic_store/load are only lowered to MOV instructions
				757	* extremely late to prevent them from being accidentally reordered in the backend
				758	* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
				759	*/
				760	multiclass RELEASE_BINOP_MI<string op> {
				761	def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				762	"#RELEASE_BINOP PSEUDO!",
				763	[(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
				764	(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
				765	// NAME#16 is not generated as 16-bit arithmetic instructions are considered
				766	// costly and avoided as far as possible by this backend anyway
				767	def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				768	"#RELEASE_BINOP PSEUDO!",
				769	[(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
				770	(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
				771	def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				772	"#RELEASE_BINOP PSEUDO!",
				773	[(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
				774	(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
				775	}
				776	defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
				777	defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
				778	defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
				779	defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
				780	// Note: we don't deal with sub, because substractions of constants are
				781	// optimized into additions before this code can run
				782
				783	multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
				784	def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
				785	"#RELEASE_UNOP PSEUDO!",
				786	[(atomic_store_8 addr:$dst, dag8)]>;
				787	def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
				788	"#RELEASE_UNOP PSEUDO!",
				789	[(atomic_store_16 addr:$dst, dag16)]>;
				790	def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
				791	"#RELEASE_UNOP PSEUDO!",
				792	[(atomic_store_32 addr:$dst, dag32)]>;
				793	def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
				794	"#RELEASE_UNOP PSEUDO!",
				795	[(atomic_store_64 addr:$dst, dag64)]>;
				796	}
				797
				798	defm RELEASE_INC : RELEASE_UNOP<
				799	(add (atomic_load_8 addr:$dst), (i8 1)),
				800	(add (atomic_load_16 addr:$dst), (i16 1)),
				801	(add (atomic_load_32 addr:$dst), (i32 1)),
				802	(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
				803	defm RELEASE_DEC : RELEASE_UNOP<
				804	(add (atomic_load_8 addr:$dst), (i8 -1)),
				805	(add (atomic_load_16 addr:$dst), (i16 -1)),
				806	(add (atomic_load_32 addr:$dst), (i32 -1)),
				807	(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
				808	/*
				809	TODO: These don't work because the type inference of TableGen fails.
				810	TODO: find a way to fix it.
				811	defm RELEASE_NEG : RELEASE_UNOP<
				812	(ineg (atomic_load_8 addr:$dst)),
				813	(ineg (atomic_load_16 addr:$dst)),
				814	(ineg (atomic_load_32 addr:$dst)),
				815	(ineg (atomic_load_64 addr:$dst))>;
				816	defm RELEASE_NOT : RELEASE_UNOP<
				817	(not (atomic_load_8 addr:$dst)),
				818	(not (atomic_load_16 addr:$dst)),
				819	(not (atomic_load_32 addr:$dst)),
				820	(not (atomic_load_64 addr:$dst))>;
				821	*/
				822
				823	def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				824	"#RELEASE_MOV PSEUDO !",
				825	[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
				826	def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
				827	"#RELEASE_MOV PSEUDO !",
				828	[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
				829	def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				830	"#RELEASE_MOV PSEUDO !",
				831	[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
				832	def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				833	"#RELEASE_MOV PSEUDO !",
				834	[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
				835
				836	def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
				837	"#RELEASE_MOV PSEUDO!",
				838	[(atomic_store_8 addr:$dst, GR8 :$src)]>;
				839	def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
				840	"#RELEASE_MOV PSEUDO!",
				841	[(atomic_store_16 addr:$dst, GR16:$src)]>;
				842	def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
				843	"#RELEASE_MOV PSEUDO!",
				844	[(atomic_store_32 addr:$dst, GR32:$src)]>;
				845	def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
				846	"#RELEASE_MOV PSEUDO!",
				847	[(atomic_store_64 addr:$dst, GR64:$src)]>;
				848
				849	def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
				850	"#ACQUIRE_MOV PSEUDO!",
				851	[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
				852	def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
				853	"#ACQUIRE_MOV PSEUDO!",
				854	[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
				855	def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
				856	"#ACQUIRE_MOV PSEUDO!",
				857	[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
				858	def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
				859	"#ACQUIRE_MOV PSEUDO!",
				860	[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	861
				862	//===----------------------------------------------------------------------===//
				863	// DAG Pattern Matching Rules
				864	//===----------------------------------------------------------------------===//
				865
				866	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
				867	def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
				868	def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
				869	def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
				870	def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
				871	def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
				872	def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
				873
				874	def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
				875	(ADD32ri GR32:$src1, tconstpool:$src2)>;
				876	def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
				877	(ADD32ri GR32:$src1, tjumptable:$src2)>;
				878	def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
				879	(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
				880	def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
				881	(ADD32ri GR32:$src1, texternalsym:$src2)>;
				882	def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
				883	(ADD32ri GR32:$src1, tblockaddress:$src2)>;
				884
				885	def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				886	(MOV32mi addr:$dst, tglobaladdr:$src)>;
				887	def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
				888	(MOV32mi addr:$dst, texternalsym:$src)>;
				889	def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
				890	(MOV32mi addr:$dst, tblockaddress:$src)>;
				891
				892	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
				893	// code model mode, should use 'movabs'. FIXME: This is really a hack, the
				894	// 'movabs' predicate should handle this sort of thing.
				895	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				896	(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
				897	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				898	(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
				899	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				900	(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
				901	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				902	(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
				903	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				904	(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
				905
				906	// In kernel code model, we can get the address of a label
				907	// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
				908	// the MOV64ri32 should accept these.
				909	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				910	(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
				911	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				912	(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
				913	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				914	(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
				915	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				916	(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
				917	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				918	(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
				919
				920	// If we have small model and -static mode, it is safe to store global addresses
				921	// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
				922	// for MOV64mi32 should handle this sort of thing.
				923	def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
				924	(MOV64mi32 addr:$dst, tconstpool:$src)>,
				925	Requires<[NearData, IsStatic]>;
				926	def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
				927	(MOV64mi32 addr:$dst, tjumptable:$src)>,
				928	Requires<[NearData, IsStatic]>;
				929	def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				930	(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
				931	Requires<[NearData, IsStatic]>;
				932	def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
				933	(MOV64mi32 addr:$dst, texternalsym:$src)>,
				934	Requires<[NearData, IsStatic]>;
				935	def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
				936	(MOV64mi32 addr:$dst, tblockaddress:$src)>,
				937	Requires<[NearData, IsStatic]>;
				938
				939	def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
				940	def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
				941
				942	// Calls
				943
				944	// tls has some funny stuff here...
				945	// This corresponds to movabs $foo@tpoff, %rax
				946	def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
				947	(MOV64ri32 tglobaltlsaddr :$dst)>;
				948	// This corresponds to add $foo@tpoff, %rax
				949	def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
				950	(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
				951
				952
				953	// Direct PC relative function call for small code model. 32-bit displacement
				954	// sign extended to 64-bit.
				955	def : Pat<(X86call (i64 tglobaladdr:$dst)),
				956	(CALL64pcrel32 tglobaladdr:$dst)>;
				957	def : Pat<(X86call (i64 texternalsym:$dst)),
				958	(CALL64pcrel32 texternalsym:$dst)>;
				959
				960	// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
				961	// can never use callee-saved registers. That is the purpose of the GR64_TC
				962	// register classes.
				963	//
				964	// The only volatile register that is never used by the calling convention is
				965	// %r11. This happens when calling a vararg function with 6 arguments.
				966	//
				967	// Match an X86tcret that uses less than 7 volatile registers.
				968	def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
				969	(X86tcret node:$ptr, node:$off), [{
				970	// X86tcret args: (*chain, ptr, imm, regs..., glue)
				971	unsigned NumRegs = 0;
				972	for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
				973	if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
				974	return false;
				975	return true;
				976	}]>;
				977
				978	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				979	(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
				980	Requires<[Not64BitMode]>;
				981
				982	// FIXME: This is disabled for 32-bit PIC mode because the global base
				983	// register which is part of the address mode may be assigned a
				984	// callee-saved register.
				985	def : Pat<(X86tcret (load addr:$dst), imm:$off),
				986	(TCRETURNmi addr:$dst, imm:$off)>,
				987	Requires<[Not64BitMode, IsNotPIC]>;
				988
				989	def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
				990	(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
				991	Requires<[NotLP64]>;
				992
				993	def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
				994	(TCRETURNdi texternalsym:$dst, imm:$off)>,
				995	Requires<[NotLP64]>;
				996
				997	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				998	(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
				999	Requires<[In64BitMode]>;
				1000
				1001	// Don't fold loads into X86tcret requiring more than 6 regs.
				1002	// There wouldn't be enough scratch registers for base+index.
				1003	def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
				1004	(TCRETURNmi64 addr:$dst, imm:$off)>,
				1005	Requires<[In64BitMode]>;
				1006
				1007	def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
				1008	(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
				1009	Requires<[IsLP64]>;
				1010
				1011	def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
				1012	(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
				1013	Requires<[IsLP64]>;
				1014
				1015	// Normal calls, with various flavors of addresses.
				1016	def : Pat<(X86call (i32 tglobaladdr:$dst)),
				1017	(CALLpcrel32 tglobaladdr:$dst)>;
				1018	def : Pat<(X86call (i32 texternalsym:$dst)),
				1019	(CALLpcrel32 texternalsym:$dst)>;
				1020	def : Pat<(X86call (i32 imm:$dst)),
				1021	(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
				1022
				1023	// Comparisons.
				1024
				1025	// TEST R,R is smaller than CMP R,0
				1026	def : Pat<(X86cmp GR8:$src1, 0),
				1027	(TEST8rr GR8:$src1, GR8:$src1)>;
				1028	def : Pat<(X86cmp GR16:$src1, 0),
				1029	(TEST16rr GR16:$src1, GR16:$src1)>;
				1030	def : Pat<(X86cmp GR32:$src1, 0),
				1031	(TEST32rr GR32:$src1, GR32:$src1)>;
				1032	def : Pat<(X86cmp GR64:$src1, 0),
				1033	(TEST64rr GR64:$src1, GR64:$src1)>;
				1034
				1035	// Conditional moves with folded loads with operands swapped and conditions
				1036	// inverted.
				1037	multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
				1038	Instruction Inst64> {
				1039	let Predicates = [HasCMov] in {
				1040	def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
				1041	(Inst16 GR16:$src2, addr:$src1)>;
				1042	def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
				1043	(Inst32 GR32:$src2, addr:$src1)>;
				1044	def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
				1045	(Inst64 GR64:$src2, addr:$src1)>;
				1046	}
				1047	}
				1048
				1049	defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
				1050	defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
				1051	defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
				1052	defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
				1053	defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
				1054	defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
				1055	defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
				1056	defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
				1057	defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
				1058	defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
				1059	defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
				1060	defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
				1061	defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
				1062	defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
				1063	defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
				1064	defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
				1065
				1066	// zextload bool -> zextload byte
Elena Demikhovsky	f61727d	2015-05-20 14:32:03 +0000	[diff] [blame]	1067	def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
				1068	def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
				1069	def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1070	def : Pat<(zextloadi64i1 addr:$src),
Elena Demikhovsky	f61727d	2015-05-20 14:32:03 +0000	[diff] [blame]	1071	(SUBREG_TO_REG (i64 0),
				1072	(AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1073
				1074	// extload bool -> extload byte
				1075	// When extloading from 16-bit and smaller memory locations into 64-bit
				1076	// registers, use zero-extending loads so that the entire 64-bit register is
				1077	// defined, avoiding partial-register updates.
				1078
				1079	def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
				1080	def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
				1081	def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
				1082	def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
				1083	def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
				1084	def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
				1085
				1086	// For other extloads, use subregs, since the high contents of the register are
				1087	// defined after an extload.
				1088	def : Pat<(extloadi64i1 addr:$src),
				1089	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1090	def : Pat<(extloadi64i8 addr:$src),
				1091	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1092	def : Pat<(extloadi64i16 addr:$src),
				1093	(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
				1094	def : Pat<(extloadi64i32 addr:$src),
				1095	(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
				1096
				1097	// anyext. Define these to do an explicit zero-extend to
				1098	// avoid partial-register updates.
				1099	def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
				1100	(MOVZX32rr8 GR8 :$src), sub_16bit)>;
				1101	def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
				1102
				1103	// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
				1104	def : Pat<(i32 (anyext GR16:$src)),
				1105	(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
				1106
				1107	def : Pat<(i64 (anyext GR8 :$src)),
				1108	(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
				1109	def : Pat<(i64 (anyext GR16:$src)),
				1110	(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
				1111	def : Pat<(i64 (anyext GR32:$src)),
				1112	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1113
				1114
				1115	// Any instruction that defines a 32-bit result leaves the high half of the
				1116	// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
				1117	// be copying from a truncate. And x86's cmov doesn't do anything if the
				1118	// condition is false. But any other 32-bit operation will zero-extend
				1119	// up to 64 bits.
				1120	def def32 : PatLeaf<(i32 GR32:$src), [{
				1121	return N->getOpcode() != ISD::TRUNCATE &&
				1122	N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
				1123	N->getOpcode() != ISD::CopyFromReg &&
				1124	N->getOpcode() != ISD::AssertSext &&
				1125	N->getOpcode() != X86ISD::CMOV;
				1126	}]>;
				1127
				1128	// In the case of a 32-bit def that is known to implicitly zero-extend,
				1129	// we can use a SUBREG_TO_REG.
				1130	def : Pat<(i64 (zext def32:$src)),
				1131	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1132
				1133	//===----------------------------------------------------------------------===//
				1134	// Pattern match OR as ADD
				1135	//===----------------------------------------------------------------------===//
				1136
				1137	// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
				1138	// 3-addressified into an LEA instruction to avoid copies. However, we also
				1139	// want to finally emit these instructions as an or at the end of the code
				1140	// generator to make the generated code easier to read. To do this, we select
				1141	// into "disjoint bits" pseudo ops.
				1142
				1143	// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
				1144	def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
				1145	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
				1146	return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
				1147
				1148	APInt KnownZero0, KnownOne0;
				1149	CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
				1150	APInt KnownZero1, KnownOne1;
				1151	CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
				1152	return (~KnownZero0 & ~KnownZero1) == 0;
				1153	}]>;
				1154
				1155
				1156	// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
				1157	// Try this before the selecting to OR.
				1158	let AddedComplexity = 5, SchedRW = [WriteALU] in {
				1159
				1160	let isConvertibleToThreeAddress = 1,
				1161	Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
				1162	let isCommutable = 1 in {
				1163	def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
				1164	"", // orw/addw REG, REG
				1165	[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
				1166	def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
				1167	"", // orl/addl REG, REG
				1168	[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
				1169	def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
				1170	"", // orq/addq REG, REG
				1171	[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
				1172	} // isCommutable
				1173
				1174	// NOTE: These are order specific, we want the ri8 forms to be listed
				1175	// first so that they are slightly preferred to the ri forms.
				1176
				1177	def ADD16ri8_DB : I<0, Pseudo,
				1178	(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
				1179	"", // orw/addw REG, imm8
				1180	[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
				1181	def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
				1182	"", // orw/addw REG, imm
				1183	[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
				1184
				1185	def ADD32ri8_DB : I<0, Pseudo,
				1186	(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
				1187	"", // orl/addl REG, imm8
				1188	[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
				1189	def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
				1190	"", // orl/addl REG, imm
				1191	[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
				1192
				1193
				1194	def ADD64ri8_DB : I<0, Pseudo,
				1195	(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
				1196	"", // orq/addq REG, imm8
				1197	[(set GR64:$dst, (or_is_add GR64:$src1,
				1198	i64immSExt8:$src2))]>;
				1199	def ADD64ri32_DB : I<0, Pseudo,
				1200	(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
				1201	"", // orq/addq REG, imm
				1202	[(set GR64:$dst, (or_is_add GR64:$src1,
				1203	i64immSExt32:$src2))]>;
				1204	}
				1205	} // AddedComplexity, SchedRW
				1206
				1207
				1208	//===----------------------------------------------------------------------===//
				1209	// Some peepholes
				1210	//===----------------------------------------------------------------------===//
				1211
				1212	// Odd encoding trick: -128 fits into an 8-bit immediate field while
				1213	// +128 doesn't, so in this special case use a sub instead of an add.
				1214	def : Pat<(add GR16:$src1, 128),
				1215	(SUB16ri8 GR16:$src1, -128)>;
				1216	def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
				1217	(SUB16mi8 addr:$dst, -128)>;
				1218
				1219	def : Pat<(add GR32:$src1, 128),
				1220	(SUB32ri8 GR32:$src1, -128)>;
				1221	def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
				1222	(SUB32mi8 addr:$dst, -128)>;
				1223
				1224	def : Pat<(add GR64:$src1, 128),
				1225	(SUB64ri8 GR64:$src1, -128)>;
				1226	def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
				1227	(SUB64mi8 addr:$dst, -128)>;
				1228
				1229	// The same trick applies for 32-bit immediate fields in 64-bit
				1230	// instructions.
				1231	def : Pat<(add GR64:$src1, 0x0000000080000000),
				1232	(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
				1233	def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
				1234	(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
				1235
				1236	// To avoid needing to materialize an immediate in a register, use a 32-bit and
				1237	// with implicit zero-extension instead of a 64-bit and if the immediate has at
				1238	// least 32 bits of leading zeros. If in addition the last 32 bits can be
				1239	// represented with a sign extension of a 8 bit constant, use that.
Craig Topper	3d44178	2015-04-04 02:31:43 +0000	[diff] [blame]	1240	// This can also reduce instruction size by eliminating the need for the REX
				1241	// prefix.
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1242
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1243	// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
				1244	let AddedComplexity = 1 in {
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1245	def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
				1246	(SUBREG_TO_REG
				1247	(i64 0),
				1248	(AND32ri8
				1249	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1250	(i32 (GetLo8XForm imm:$imm))),
				1251	sub_32bit)>;
				1252
				1253	def : Pat<(and GR64:$src, i64immZExt32:$imm),
				1254	(SUBREG_TO_REG
				1255	(i64 0),
				1256	(AND32ri
				1257	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1258	(i32 (GetLo32XForm imm:$imm))),
				1259	sub_32bit)>;
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1260	} // AddedComplexity = 1
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1261
				1262
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1263	// AddedComplexity is needed due to the increased complexity on the
				1264	// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
				1265	// the MOVZX patterns keeps thems together in DAGIsel tables.
				1266	let AddedComplexity = 1 in {
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1267	// r & (2^16-1) ==> movz
				1268	def : Pat<(and GR32:$src1, 0xffff),
				1269	(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
				1270	// r & (2^8-1) ==> movz
				1271	def : Pat<(and GR32:$src1, 0xff),
				1272	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
				1273	GR32_ABCD)),
				1274	sub_8bit))>,
				1275	Requires<[Not64BitMode]>;
				1276	// r & (2^8-1) ==> movz
				1277	def : Pat<(and GR16:$src1, 0xff),
				1278	(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
				1279	(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
				1280	sub_16bit)>,
				1281	Requires<[Not64BitMode]>;
				1282
				1283	// r & (2^32-1) ==> movz
				1284	def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
				1285	(SUBREG_TO_REG (i64 0),
				1286	(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
				1287	sub_32bit)>;
				1288	// r & (2^16-1) ==> movz
Craig Topper	901202873	2015-04-04 02:08:20 +0000	[diff] [blame]	1289	let AddedComplexity = 1 in // Give priority over i64immZExt32.
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1290	def : Pat<(and GR64:$src, 0xffff),
				1291	(SUBREG_TO_REG (i64 0),
				1292	(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
				1293	sub_32bit)>;
				1294	// r & (2^8-1) ==> movz
				1295	def : Pat<(and GR64:$src, 0xff),
				1296	(SUBREG_TO_REG (i64 0),
				1297	(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
				1298	sub_32bit)>;
				1299	// r & (2^8-1) ==> movz
				1300	def : Pat<(and GR32:$src1, 0xff),
				1301	(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
				1302	Requires<[In64BitMode]>;
				1303	// r & (2^8-1) ==> movz
				1304	def : Pat<(and GR16:$src1, 0xff),
				1305	(EXTRACT_SUBREG (MOVZX32rr8 (i8
				1306	(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
				1307	Requires<[In64BitMode]>;
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1308	} // AddedComplexity = 1
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1309
				1310
				1311	// sext_inreg patterns
				1312	def : Pat<(sext_inreg GR32:$src, i16),
				1313	(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
				1314	def : Pat<(sext_inreg GR32:$src, i8),
				1315	(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1316	GR32_ABCD)),
				1317	sub_8bit))>,
				1318	Requires<[Not64BitMode]>;
				1319
				1320	def : Pat<(sext_inreg GR16:$src, i8),
				1321	(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
				1322	(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
				1323	sub_16bit)>,
				1324	Requires<[Not64BitMode]>;
				1325
				1326	def : Pat<(sext_inreg GR64:$src, i32),
				1327	(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
				1328	def : Pat<(sext_inreg GR64:$src, i16),
				1329	(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
				1330	def : Pat<(sext_inreg GR64:$src, i8),
				1331	(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
				1332	def : Pat<(sext_inreg GR32:$src, i8),
				1333	(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
				1334	Requires<[In64BitMode]>;
				1335	def : Pat<(sext_inreg GR16:$src, i8),
				1336	(EXTRACT_SUBREG (MOVSX32rr8
				1337	(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
				1338	Requires<[In64BitMode]>;
				1339
				1340	// sext, sext_load, zext, zext_load
				1341	def: Pat<(i16 (sext GR8:$src)),
				1342	(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
				1343	def: Pat<(sextloadi16i8 addr:$src),
				1344	(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
				1345	def: Pat<(i16 (zext GR8:$src)),
				1346	(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
				1347	def: Pat<(zextloadi16i8 addr:$src),
				1348	(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
				1349
				1350	// trunc patterns
				1351	def : Pat<(i16 (trunc GR32:$src)),
				1352	(EXTRACT_SUBREG GR32:$src, sub_16bit)>;
				1353	def : Pat<(i8 (trunc GR32:$src)),
				1354	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1355	sub_8bit)>,
				1356	Requires<[Not64BitMode]>;
				1357	def : Pat<(i8 (trunc GR16:$src)),
				1358	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1359	sub_8bit)>,
				1360	Requires<[Not64BitMode]>;
				1361	def : Pat<(i32 (trunc GR64:$src)),
				1362	(EXTRACT_SUBREG GR64:$src, sub_32bit)>;
				1363	def : Pat<(i16 (trunc GR64:$src)),
				1364	(EXTRACT_SUBREG GR64:$src, sub_16bit)>;
				1365	def : Pat<(i8 (trunc GR64:$src)),
				1366	(EXTRACT_SUBREG GR64:$src, sub_8bit)>;
				1367	def : Pat<(i8 (trunc GR32:$src)),
				1368	(EXTRACT_SUBREG GR32:$src, sub_8bit)>,
				1369	Requires<[In64BitMode]>;
				1370	def : Pat<(i8 (trunc GR16:$src)),
				1371	(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
				1372	Requires<[In64BitMode]>;
				1373
				1374	// h-register tricks
				1375	def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
				1376	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1377	sub_8bit_hi)>,
				1378	Requires<[Not64BitMode]>;
				1379	def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
				1380	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1381	sub_8bit_hi)>,
				1382	Requires<[Not64BitMode]>;
				1383	def : Pat<(srl GR16:$src, (i8 8)),
				1384	(EXTRACT_SUBREG
				1385	(MOVZX32rr8
				1386	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1387	sub_8bit_hi)),
				1388	sub_16bit)>,
				1389	Requires<[Not64BitMode]>;
				1390	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1391	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1392	GR16_ABCD)),
				1393	sub_8bit_hi))>,
				1394	Requires<[Not64BitMode]>;
				1395	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1396	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1397	GR16_ABCD)),
				1398	sub_8bit_hi))>,
				1399	Requires<[Not64BitMode]>;
				1400	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1401	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1402	GR32_ABCD)),
				1403	sub_8bit_hi))>,
				1404	Requires<[Not64BitMode]>;
				1405	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1406	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1407	GR32_ABCD)),
				1408	sub_8bit_hi))>,
				1409	Requires<[Not64BitMode]>;
				1410
				1411	// h-register tricks.
				1412	// For now, be conservative on x86-64 and use an h-register extract only if the
				1413	// value is immediately zero-extended or stored, which are somewhat common
				1414	// cases. This uses a bunch of code to prevent a register requiring a REX prefix
				1415	// from being allocated in the same instruction as the h register, as there's
				1416	// currently no way to describe this requirement to the register allocator.
				1417
				1418	// h-register extract and zero-extend.
				1419	def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
				1420	(SUBREG_TO_REG
				1421	(i64 0),
				1422	(MOVZX32_NOREXrr8
				1423	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1424	sub_8bit_hi)),
				1425	sub_32bit)>;
				1426	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1427	(MOVZX32_NOREXrr8
				1428	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1429	sub_8bit_hi))>,
				1430	Requires<[In64BitMode]>;
				1431	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1432	(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1433	GR32_ABCD)),
				1434	sub_8bit_hi))>,
				1435	Requires<[In64BitMode]>;
				1436	def : Pat<(srl GR16:$src, (i8 8)),
				1437	(EXTRACT_SUBREG
				1438	(MOVZX32_NOREXrr8
				1439	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1440	sub_8bit_hi)),
				1441	sub_16bit)>,
				1442	Requires<[In64BitMode]>;
				1443	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1444	(MOVZX32_NOREXrr8
				1445	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1446	sub_8bit_hi))>,
				1447	Requires<[In64BitMode]>;
				1448	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1449	(MOVZX32_NOREXrr8
				1450	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1451	sub_8bit_hi))>,
				1452	Requires<[In64BitMode]>;
				1453	def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
				1454	(SUBREG_TO_REG
				1455	(i64 0),
				1456	(MOVZX32_NOREXrr8
				1457	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1458	sub_8bit_hi)),
				1459	sub_32bit)>;
				1460	def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
				1461	(SUBREG_TO_REG
				1462	(i64 0),
				1463	(MOVZX32_NOREXrr8
				1464	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1465	sub_8bit_hi)),
				1466	sub_32bit)>;
				1467
				1468	// h-register extract and store.
				1469	def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
				1470	(MOV8mr_NOREX
				1471	addr:$dst,
				1472	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1473	sub_8bit_hi))>;
				1474	def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
				1475	(MOV8mr_NOREX
				1476	addr:$dst,
				1477	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1478	sub_8bit_hi))>,
				1479	Requires<[In64BitMode]>;
				1480	def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
				1481	(MOV8mr_NOREX
				1482	addr:$dst,
				1483	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1484	sub_8bit_hi))>,
				1485	Requires<[In64BitMode]>;
				1486
				1487
				1488	// (shl x, 1) ==> (add x, x)
				1489	// Note that if x is undef (immediate or otherwise), we could theoretically
				1490	// end up with the two uses of x getting different values, producing a result
				1491	// where the least significant bit is not 0. However, the probability of this
				1492	// happening is considered low enough that this is officially not a
				1493	// "real problem".
				1494	def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
				1495	def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
				1496	def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
				1497	def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
				1498
				1499	// Helper imms that check if a mask doesn't change significant shift bits.
Benjamin Kramer	5f6a907	2015-02-12 15:35:40 +0000	[diff] [blame]	1500	def immShift32 : ImmLeaf<i8, [{
				1501	return countTrailingOnes<uint64_t>(Imm) >= 5;
				1502	}]>;
				1503	def immShift64 : ImmLeaf<i8, [{
				1504	return countTrailingOnes<uint64_t>(Imm) >= 6;
				1505	}]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1506
				1507	// Shift amount is implicitly masked.
				1508	multiclass MaskedShiftAmountPats<SDNode frag, string name> {
				1509	// (shift x (and y, 31)) ==> (shift x, y)
				1510	def : Pat<(frag GR8:$src1, (and CL, immShift32)),
				1511	(!cast<Instruction>(name # "8rCL") GR8:$src1)>;
				1512	def : Pat<(frag GR16:$src1, (and CL, immShift32)),
				1513	(!cast<Instruction>(name # "16rCL") GR16:$src1)>;
				1514	def : Pat<(frag GR32:$src1, (and CL, immShift32)),
				1515	(!cast<Instruction>(name # "32rCL") GR32:$src1)>;
				1516	def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
				1517	(!cast<Instruction>(name # "8mCL") addr:$dst)>;
				1518	def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
				1519	(!cast<Instruction>(name # "16mCL") addr:$dst)>;
				1520	def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
				1521	(!cast<Instruction>(name # "32mCL") addr:$dst)>;
				1522
				1523	// (shift x (and y, 63)) ==> (shift x, y)
				1524	def : Pat<(frag GR64:$src1, (and CL, immShift64)),
				1525	(!cast<Instruction>(name # "64rCL") GR64:$src1)>;
				1526	def : Pat<(store (frag (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
				1527	(!cast<Instruction>(name # "64mCL") addr:$dst)>;
				1528	}
				1529
				1530	defm : MaskedShiftAmountPats<shl, "SHL">;
				1531	defm : MaskedShiftAmountPats<srl, "SHR">;
				1532	defm : MaskedShiftAmountPats<sra, "SAR">;
				1533	defm : MaskedShiftAmountPats<rotl, "ROL">;
				1534	defm : MaskedShiftAmountPats<rotr, "ROR">;
				1535
				1536	// (anyext (setcc_carry)) -> (setcc_carry)
				1537	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1538	(SETB_C16r)>;
				1539	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1540	(SETB_C32r)>;
				1541	def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
				1542	(SETB_C32r)>;
				1543
				1544
				1545
				1546
				1547	//===----------------------------------------------------------------------===//
				1548	// EFLAGS-defining Patterns
				1549	//===----------------------------------------------------------------------===//
				1550
				1551	// add reg, reg
				1552	def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
				1553	def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
				1554	def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
				1555
				1556	// add reg, mem
				1557	def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
				1558	(ADD8rm GR8:$src1, addr:$src2)>;
				1559	def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
				1560	(ADD16rm GR16:$src1, addr:$src2)>;
				1561	def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
				1562	(ADD32rm GR32:$src1, addr:$src2)>;
				1563
				1564	// add reg, imm
				1565	def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
				1566	def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
				1567	def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
				1568	def : Pat<(add GR16:$src1, i16immSExt8:$src2),
				1569	(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1570	def : Pat<(add GR32:$src1, i32immSExt8:$src2),
				1571	(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1572
				1573	// sub reg, reg
				1574	def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
				1575	def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
				1576	def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
				1577
				1578	// sub reg, mem
				1579	def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
				1580	(SUB8rm GR8:$src1, addr:$src2)>;
				1581	def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
				1582	(SUB16rm GR16:$src1, addr:$src2)>;
				1583	def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
				1584	(SUB32rm GR32:$src1, addr:$src2)>;
				1585
				1586	// sub reg, imm
				1587	def : Pat<(sub GR8:$src1, imm:$src2),
				1588	(SUB8ri GR8:$src1, imm:$src2)>;
				1589	def : Pat<(sub GR16:$src1, imm:$src2),
				1590	(SUB16ri GR16:$src1, imm:$src2)>;
				1591	def : Pat<(sub GR32:$src1, imm:$src2),
				1592	(SUB32ri GR32:$src1, imm:$src2)>;
				1593	def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
				1594	(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1595	def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
				1596	(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1597
				1598	// sub 0, reg
				1599	def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
				1600	def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
				1601	def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
				1602	def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
				1603
				1604	// mul reg, reg
				1605	def : Pat<(mul GR16:$src1, GR16:$src2),
				1606	(IMUL16rr GR16:$src1, GR16:$src2)>;
				1607	def : Pat<(mul GR32:$src1, GR32:$src2),
				1608	(IMUL32rr GR32:$src1, GR32:$src2)>;
				1609
				1610	// mul reg, mem
				1611	def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
				1612	(IMUL16rm GR16:$src1, addr:$src2)>;
				1613	def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
				1614	(IMUL32rm GR32:$src1, addr:$src2)>;
				1615
				1616	// mul reg, imm
				1617	def : Pat<(mul GR16:$src1, imm:$src2),
				1618	(IMUL16rri GR16:$src1, imm:$src2)>;
				1619	def : Pat<(mul GR32:$src1, imm:$src2),
				1620	(IMUL32rri GR32:$src1, imm:$src2)>;
				1621	def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
				1622	(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
				1623	def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
				1624	(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
				1625
				1626	// reg = mul mem, imm
				1627	def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
				1628	(IMUL16rmi addr:$src1, imm:$src2)>;
				1629	def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
				1630	(IMUL32rmi addr:$src1, imm:$src2)>;
				1631	def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
				1632	(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
				1633	def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
				1634	(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
				1635
				1636	// Patterns for nodes that do not produce flags, for instructions that do.
				1637
				1638	// addition
				1639	def : Pat<(add GR64:$src1, GR64:$src2),
				1640	(ADD64rr GR64:$src1, GR64:$src2)>;
				1641	def : Pat<(add GR64:$src1, i64immSExt8:$src2),
				1642	(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1643	def : Pat<(add GR64:$src1, i64immSExt32:$src2),
				1644	(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1645	def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
				1646	(ADD64rm GR64:$src1, addr:$src2)>;
				1647
				1648	// subtraction
				1649	def : Pat<(sub GR64:$src1, GR64:$src2),
				1650	(SUB64rr GR64:$src1, GR64:$src2)>;
				1651	def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
				1652	(SUB64rm GR64:$src1, addr:$src2)>;
				1653	def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
				1654	(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1655	def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
				1656	(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1657
				1658	// Multiply
				1659	def : Pat<(mul GR64:$src1, GR64:$src2),
				1660	(IMUL64rr GR64:$src1, GR64:$src2)>;
				1661	def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
				1662	(IMUL64rm GR64:$src1, addr:$src2)>;
				1663	def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
				1664	(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
				1665	def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
				1666	(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
				1667	def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
				1668	(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
				1669	def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
				1670	(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
				1671
				1672	// Increment/Decrement reg.
				1673	// Do not make INC/DEC if it is slow
				1674	let Predicates = [NotSlowIncDec] in {
				1675	def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
				1676	def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
				1677	def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
				1678	def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
				1679	def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
				1680	def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
				1681	def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
				1682	def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
				1683	}
				1684
				1685	// or reg/reg.
				1686	def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
				1687	def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
				1688	def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
				1689	def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
				1690
				1691	// or reg/mem
				1692	def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
				1693	(OR8rm GR8:$src1, addr:$src2)>;
				1694	def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
				1695	(OR16rm GR16:$src1, addr:$src2)>;
				1696	def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
				1697	(OR32rm GR32:$src1, addr:$src2)>;
				1698	def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
				1699	(OR64rm GR64:$src1, addr:$src2)>;
				1700
				1701	// or reg/imm
				1702	def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
				1703	def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
				1704	def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
				1705	def : Pat<(or GR16:$src1, i16immSExt8:$src2),
				1706	(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1707	def : Pat<(or GR32:$src1, i32immSExt8:$src2),
				1708	(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1709	def : Pat<(or GR64:$src1, i64immSExt8:$src2),
				1710	(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1711	def : Pat<(or GR64:$src1, i64immSExt32:$src2),
				1712	(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1713
				1714	// xor reg/reg
				1715	def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
				1716	def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
				1717	def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
				1718	def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
				1719
				1720	// xor reg/mem
				1721	def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
				1722	(XOR8rm GR8:$src1, addr:$src2)>;
				1723	def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
				1724	(XOR16rm GR16:$src1, addr:$src2)>;
				1725	def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
				1726	(XOR32rm GR32:$src1, addr:$src2)>;
				1727	def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
				1728	(XOR64rm GR64:$src1, addr:$src2)>;
				1729
				1730	// xor reg/imm
				1731	def : Pat<(xor GR8:$src1, imm:$src2),
				1732	(XOR8ri GR8:$src1, imm:$src2)>;
				1733	def : Pat<(xor GR16:$src1, imm:$src2),
				1734	(XOR16ri GR16:$src1, imm:$src2)>;
				1735	def : Pat<(xor GR32:$src1, imm:$src2),
				1736	(XOR32ri GR32:$src1, imm:$src2)>;
				1737	def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
				1738	(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1739	def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
				1740	(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1741	def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
				1742	(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1743	def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
				1744	(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1745
				1746	// and reg/reg
				1747	def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
				1748	def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
				1749	def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
				1750	def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
				1751
				1752	// and reg/mem
				1753	def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
				1754	(AND8rm GR8:$src1, addr:$src2)>;
				1755	def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
				1756	(AND16rm GR16:$src1, addr:$src2)>;
				1757	def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
				1758	(AND32rm GR32:$src1, addr:$src2)>;
				1759	def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
				1760	(AND64rm GR64:$src1, addr:$src2)>;
				1761
				1762	// and reg/imm
				1763	def : Pat<(and GR8:$src1, imm:$src2),
				1764	(AND8ri GR8:$src1, imm:$src2)>;
				1765	def : Pat<(and GR16:$src1, imm:$src2),
				1766	(AND16ri GR16:$src1, imm:$src2)>;
				1767	def : Pat<(and GR32:$src1, imm:$src2),
				1768	(AND32ri GR32:$src1, imm:$src2)>;
				1769	def : Pat<(and GR16:$src1, i16immSExt8:$src2),
				1770	(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1771	def : Pat<(and GR32:$src1, i32immSExt8:$src2),
				1772	(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1773	def : Pat<(and GR64:$src1, i64immSExt8:$src2),
				1774	(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1775	def : Pat<(and GR64:$src1, i64immSExt32:$src2),
				1776	(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1777
				1778	// Bit scan instruction patterns to match explicit zero-undef behavior.
				1779	def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
				1780	def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
				1781	def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
				1782	def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
				1783	def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
				1784	def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
				1785
				1786	// When HasMOVBE is enabled it is possible to get a non-legalized
				1787	// register-register 16 bit bswap. This maps it to a ROL instruction.
				1788	let Predicates = [HasMOVBE] in {
				1789	def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
				1790	}