Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

blob: 6abb035f688b9dc65f3946add8874db51172a1cc [file] [log] [blame]

Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1	//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -- tablegen --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file describes the various pseudo instructions used by the compiler,
				11	// as well as Pat patterns used during instruction selection.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	//===----------------------------------------------------------------------===//
				16	// Pattern Matching Support
				17
				18	def GetLo32XForm : SDNodeXForm<imm, [{
				19	// Transformation function: get the low 32 bits.
Sergey Dmitrouk	842a51b	2015-04-28 14:05:47 +0000	[diff] [blame^]	20	return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	21	}]>;
				22
				23	def GetLo8XForm : SDNodeXForm<imm, [{
				24	// Transformation function: get the low 8 bits.
Sergey Dmitrouk	842a51b	2015-04-28 14:05:47 +0000	[diff] [blame^]	25	return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	26	}]>;
				27
				28
				29	//===----------------------------------------------------------------------===//
				30	// Random Pseudo Instructions.
				31
				32	// PIC base construction. This expands to code that looks like this:
				33	// call $next_inst
				34	// popl %destreg"
				35	let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in
				36	def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
				37	"", []>;
				38
				39
				40	// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
				41	// a stack adjustment and the codegen must know that they may modify the stack
				42	// pointer before prolog-epilog rewriting occurs.
				43	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				44	// sub / add which can clobber EFLAGS.
				45	let Defs = [ESP, EFLAGS], Uses = [ESP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	46	def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	47	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	48	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	49	Requires<[NotLP64]>;
				50	def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				51	"#ADJCALLSTACKUP",
				52	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				53	Requires<[NotLP64]>;
				54	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	55	def : Pat<(X86callseq_start timm:$amt1),
				56	(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
				57
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	58
				59	// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
				60	// a stack adjustment and the codegen must know that they may modify the stack
				61	// pointer before prolog-epilog rewriting occurs.
				62	// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
				63	// sub / add which can clobber EFLAGS.
				64	let Defs = [RSP, EFLAGS], Uses = [RSP] in {
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	65	def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	66	"#ADJCALLSTACKDOWN",
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	67	[]>,
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	68	Requires<[IsLP64]>;
				69	def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
				70	"#ADJCALLSTACKUP",
				71	[(X86callseq_end timm:$amt1, timm:$amt2)]>,
				72	Requires<[IsLP64]>;
				73	}
Michael Kuperstein	13fbd45	2015-02-01 16:56:04 +0000	[diff] [blame]	74	def : Pat<(X86callseq_start timm:$amt1),
				75	(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	76
				77
				78	// x86-64 va_start lowering magic.
				79	let usesCustomInserter = 1, Defs = [EFLAGS] in {
				80	def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
				81	(outs),
				82	(ins GR8:$al,
				83	i64imm:$regsavefi, i64imm:$offset,
				84	variable_ops),
				85	"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
				86	[(X86vastart_save_xmm_regs GR8:$al,
				87	imm:$regsavefi,
				88	imm:$offset),
				89	(implicit EFLAGS)]>;
				90
				91	// The VAARG_64 pseudo-instruction takes the address of the va_list,
				92	// and places the address of the next argument into a register.
				93	let Defs = [EFLAGS] in
				94	def VAARG_64 : I<0, Pseudo,
				95	(outs GR64:$dst),
				96	(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
				97	"#VAARG_64 $dst, $ap, $size, $mode, $align",
				98	[(set GR64:$dst,
				99	(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
				100	(implicit EFLAGS)]>;
				101
				102	// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
				103	// targets. These calls are needed to probe the stack when allocating more than
				104	// 4k bytes in one go. Touching the stack at 4K increments is necessary to
				105	// ensure that the guard pages used by the OS virtual memory manager are
				106	// allocated in correct sequence.
				107	// The main point of having separate instruction are extra unmodelled effects
				108	// (compared to ordinary calls) like stack pointer change.
				109
				110	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				111	def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
				112	"# dynamic stack allocation",
				113	[(X86WinAlloca)]>;
				114
				115	// When using segmented stacks these are lowered into instructions which first
				116	// check if the current stacklet has enough free memory. If it does, memory is
				117	// allocated by bumping the stack pointer. Otherwise memory is allocated from
				118	// the heap.
				119
				120	let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
				121	def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
				122	"# variable sized alloca for segmented stacks",
				123	[(set GR32:$dst,
				124	(X86SegAlloca GR32:$size))]>,
				125	Requires<[NotLP64]>;
				126
				127	let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
				128	def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
				129	"# variable sized alloca for segmented stacks",
				130	[(set GR64:$dst,
				131	(X86SegAlloca GR64:$size))]>,
				132	Requires<[In64BitMode]>;
				133	}
				134
				135	// The MSVC runtime contains an _ftol2 routine for converting floating-point
				136	// to integer values. It has a strange calling convention: the input is
				137	// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
				138	// used as a temporary register. No other registers (aside from flags) are
				139	// touched.
				140	// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
				141	// variant is unnecessary.
				142
				143	let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
				144	def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
				145	"# win32 fptoui",
				146	[(X86WinFTOL RFP32:$src)]>,
				147	Requires<[Not64BitMode]>;
				148
				149	def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
				150	"# win32 fptoui",
				151	[(X86WinFTOL RFP64:$src)]>,
				152	Requires<[Not64BitMode]>;
				153	}
				154
				155	//===----------------------------------------------------------------------===//
				156	// EH Pseudo Instructions
				157	//
				158	let SchedRW = [WriteSystem] in {
				159	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				160	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				161	def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
				162	"ret\t#eh_return, addr: $addr",
				163	[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				164
				165	}
				166
				167	let isTerminator = 1, isReturn = 1, isBarrier = 1,
				168	hasCtrlDep = 1, isCodeGenOnly = 1 in {
				169	def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
				170	"ret\t#eh_return, addr: $addr",
				171	[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
				172
				173	}
				174
				175	let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
				176	usesCustomInserter = 1 in {
				177	def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
				178	"#EH_SJLJ_SETJMP32",
				179	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				180	Requires<[Not64BitMode]>;
				181	def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
				182	"#EH_SJLJ_SETJMP64",
				183	[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
				184	Requires<[In64BitMode]>;
				185	let isTerminator = 1 in {
				186	def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
				187	"#EH_SJLJ_LONGJMP32",
				188	[(X86eh_sjlj_longjmp addr:$buf)]>,
				189	Requires<[Not64BitMode]>;
				190	def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
				191	"#EH_SJLJ_LONGJMP64",
				192	[(X86eh_sjlj_longjmp addr:$buf)]>,
				193	Requires<[In64BitMode]>;
				194	}
				195	}
				196	} // SchedRW
				197
				198	let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
				199	def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
				200	"#EH_SjLj_Setup\t$dst", []>;
				201	}
				202
				203	//===----------------------------------------------------------------------===//
				204	// Pseudo instructions used by unwind info.
				205	//
				206	let isPseudo = 1 in {
				207	def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
				208	"#SEH_PushReg $reg", []>;
				209	def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				210	"#SEH_SaveReg $reg, $dst", []>;
				211	def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
				212	"#SEH_SaveXMM $reg, $dst", []>;
				213	def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
				214	"#SEH_StackAlloc $size", []>;
				215	def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
				216	"#SEH_SetFrame $reg, $offset", []>;
				217	def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
				218	"#SEH_PushFrame $mode", []>;
				219	def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
				220	"#SEH_EndPrologue", []>;
				221	def SEH_Epilogue : I<0, Pseudo, (outs), (ins),
				222	"#SEH_Epilogue", []>;
				223	}
				224
				225	//===----------------------------------------------------------------------===//
				226	// Pseudo instructions used by segmented stacks.
				227	//
				228
				229	// This is lowered into a RET instruction by MCInstLower. We need
				230	// this so that we don't have to have a MachineBasicBlock which ends
				231	// with a RET and also has successors.
				232	let isPseudo = 1 in {
				233	def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
				234	"", []>;
				235
				236	// This instruction is lowered to a RET followed by a MOV. The two
				237	// instructions are not generated on a higher level since then the
				238	// verifier sees a MachineBasicBlock ending with a non-terminator.
				239	def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
				240	"", []>;
				241	}
				242
				243	//===----------------------------------------------------------------------===//
				244	// Alias Instructions
				245	//===----------------------------------------------------------------------===//
				246
				247	// Alias instruction mapping movr0 to xor.
				248	// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
				249	let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
				250	isPseudo = 1 in
				251	def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				252	[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
				253
				254	// Other widths can also make use of the 32-bit xor, which may have a smaller
				255	// encoding and avoid partial register updates.
				256	def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
				257	def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
				258	def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
				259	let AddedComplexity = 20;
				260	}
				261
				262	// Materialize i64 constant where top 32-bits are zero. This could theoretically
				263	// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
				264	// that would make it more difficult to rematerialize.
				265	let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
				266	isCodeGenOnly = 1, hasSideEffects = 0 in
				267	def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
				268	"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
				269
				270	// This 64-bit pseudo-move can be used for both a 64-bit constant that is
				271	// actually the zero-extension of a 32-bit constant, and for labels in the
				272	// x86-64 small code model.
				273	def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
				274
				275	let AddedComplexity = 1 in
				276	def : Pat<(i64 mov64imm32:$src),
				277	(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;
				278
				279	// Use sbb to materialize carry bit.
				280	let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
				281	// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
				282	// However, Pat<> can't replicate the destination reg into the inputs of the
				283	// result.
				284	def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",
				285	[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				286	def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",
				287	[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				288	def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",
				289	[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				290	def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",
				291	[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
				292	} // isCodeGenOnly
				293
				294
				295	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				296	(SETB_C16r)>;
				297	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				298	(SETB_C32r)>;
				299	def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				300	(SETB_C64r)>;
				301
				302	def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				303	(SETB_C16r)>;
				304	def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				305	(SETB_C32r)>;
				306	def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				307	(SETB_C64r)>;
				308
				309	// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
				310	// will be eliminated and that the sbb can be extended up to a wider type. When
				311	// this happens, it is great. However, if we are left with an 8-bit sbb and an
				312	// and, we might as well just match it as a setb.
				313	def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
				314	(SETBr)>;
				315
				316	// (add OP, SETB) -> (adc OP, 0)
				317	def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
				318	(ADC8ri GR8:$op, 0)>;
				319	def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
				320	(ADC32ri8 GR32:$op, 0)>;
				321	def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
				322	(ADC64ri8 GR64:$op, 0)>;
				323
				324	// (sub OP, SETB) -> (sbb OP, 0)
				325	def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				326	(SBB8ri GR8:$op, 0)>;
				327	def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				328	(SBB32ri8 GR32:$op, 0)>;
				329	def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
				330	(SBB64ri8 GR64:$op, 0)>;
				331
				332	// (sub OP, SETCC_CARRY) -> (adc OP, 0)
				333	def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
				334	(ADC8ri GR8:$op, 0)>;
				335	def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
				336	(ADC32ri8 GR32:$op, 0)>;
				337	def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
				338	(ADC64ri8 GR64:$op, 0)>;
				339
				340	//===----------------------------------------------------------------------===//
				341	// String Pseudo Instructions
				342	//
				343	let SchedRW = [WriteMicrocoded] in {
				344	let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
				345	def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				346	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				347	Requires<[Not64BitMode]>;
				348	def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				349	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				350	Requires<[Not64BitMode]>;
				351	def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				352	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				353	Requires<[Not64BitMode]>;
				354	}
				355
				356	let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
				357	def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb\|rep movsb}",
				358	[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
				359	Requires<[In64BitMode]>;
				360	def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw\|rep movsw}",
				361	[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,
				362	Requires<[In64BitMode]>;
				363	def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl\|rep movsd}",
				364	[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,
				365	Requires<[In64BitMode]>;
				366	def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq\|rep movsq}",
				367	[(X86rep_movs i64)], IIC_REP_MOVS>, REP,
				368	Requires<[In64BitMode]>;
				369	}
				370
				371	// FIXME: Should use "(X86rep_stos AL)" as the pattern.
				372	let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
				373	let Uses = [AL,ECX,EDI] in
				374	def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				375	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				376	Requires<[Not64BitMode]>;
				377	let Uses = [AX,ECX,EDI] in
				378	def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				379	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				380	Requires<[Not64BitMode]>;
				381	let Uses = [EAX,ECX,EDI] in
				382	def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				383	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				384	Requires<[Not64BitMode]>;
				385	}
				386
				387	let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
				388	let Uses = [AL,RCX,RDI] in
				389	def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb\|rep stosb}",
				390	[(X86rep_stos i8)], IIC_REP_STOS>, REP,
				391	Requires<[In64BitMode]>;
				392	let Uses = [AX,RCX,RDI] in
				393	def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw\|rep stosw}",
				394	[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,
				395	Requires<[In64BitMode]>;
				396	let Uses = [RAX,RCX,RDI] in
				397	def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl\|rep stosd}",
				398	[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
				399	Requires<[In64BitMode]>;
				400
				401	let Uses = [RAX,RCX,RDI] in
				402	def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq\|rep stosq}",
				403	[(X86rep_stos i64)], IIC_REP_STOS>, REP,
				404	Requires<[In64BitMode]>;
				405	}
				406	} // SchedRW
				407
				408	//===----------------------------------------------------------------------===//
				409	// Thread Local Storage Instructions
				410	//
				411
				412	// ELF TLS Support
				413	// All calls clobber the non-callee saved registers. ESP is marked as
				414	// a use to prevent stack-pointer assignments that appear immediately
				415	// before calls from potentially appearing dead.
				416	let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				417	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				418	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				419	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				420	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				421	Uses = [ESP] in {
				422	def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				423	"# TLS_addr32",
				424	[(X86tlsaddr tls32addr:$sym)]>,
				425	Requires<[Not64BitMode]>;
				426	def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				427	"# TLS_base_addr32",
				428	[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
				429	Requires<[Not64BitMode]>;
				430	}
				431
				432	// All calls clobber the non-callee saved registers. RSP is marked as
				433	// a use to prevent stack-pointer assignments that appear immediately
				434	// before calls from potentially appearing dead.
				435	let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
				436	FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
				437	ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
				438	MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
				439	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
				440	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
				441	Uses = [RSP] in {
				442	def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				443	"# TLS_addr64",
				444	[(X86tlsaddr tls64addr:$sym)]>,
				445	Requires<[In64BitMode]>;
				446	def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				447	"# TLS_base_addr64",
				448	[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
				449	Requires<[In64BitMode]>;
				450	}
				451
				452	// Darwin TLS Support
				453	// For i386, the address of the thunk is passed on the stack, on return the
				454	// address of the variable is in %eax. %ecx is trashed during the function
				455	// call. All other registers are preserved.
				456	let Defs = [EAX, ECX, EFLAGS],
				457	Uses = [ESP],
				458	usesCustomInserter = 1 in
				459	def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
				460	"# TLSCall_32",
				461	[(X86TLSCall addr:$sym)]>,
				462	Requires<[Not64BitMode]>;
				463
				464	// For x86_64, the address of the thunk is passed in %rdi, on return
				465	// the address of the variable is in %rax. All other registers are preserved.
				466	let Defs = [RAX, EFLAGS],
				467	Uses = [RSP, RDI],
				468	usesCustomInserter = 1 in
				469	def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
				470	"# TLSCall_64",
				471	[(X86TLSCall addr:$sym)]>,
				472	Requires<[In64BitMode]>;
				473
				474
				475	//===----------------------------------------------------------------------===//
				476	// Conditional Move Pseudo Instructions
				477
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	478	// CMOV* - Used to implement the SELECT DAG operation. Expanded after
				479	// instruction selection into a branch sequence.
				480	multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {
				481	def CMOV#NAME : I<0, Pseudo,
				482	(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),
				483	"#CMOV_"#NAME#" PSEUDO!",
				484	[(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,
				485	EFLAGS)))]>;
				486	}
				487
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	488	let usesCustomInserter = 1, Uses = [EFLAGS] in {
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	489	// X86 doesn't have 8-bit conditional moves. Use a customInserter to
				490	// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
				491	// however that requires promoting the operands, and can induce additional
				492	// i8 register pressure.
				493	defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	494
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	495	let Predicates = [NoCMov] in {
				496	defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;
				497	defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;
				498	} // Predicates = [NoCMov]
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	499
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	500	// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
				501	// SSE1/SSE2.
				502	let Predicates = [FPStackf32] in
				503	defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	504
Ahmed Bougacha	8f2b4f0	2015-02-14 01:36:53 +0000	[diff] [blame]	505	let Predicates = [FPStackf64] in
				506	defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;
				507
				508	defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
				509
				510	defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
				511	defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
				512	defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
				513	defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
				514	defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
				515	defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;
				516	defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;
				517	defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;
				518	defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;
				519	defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;
				520	defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;
				521	} // usesCustomInserter = 1, Uses = [EFLAGS]
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	522
				523	//===----------------------------------------------------------------------===//
				524	// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
				525	//===----------------------------------------------------------------------===//
				526
				527	// FIXME: Use normal instructions and add lock prefix dynamically.
				528
				529	// Memory barriers
				530
				531	// TODO: Get this to fold the constant into the instruction.
				532	let isCodeGenOnly = 1, Defs = [EFLAGS] in
				533	def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
				534	"or{l}\t{$zero, $dst\|$dst, $zero}",
				535	[], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK,
				536	Sched<[WriteALULd, WriteRMW]>;
				537
				538	let hasSideEffects = 1 in
				539	def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
				540	"#MEMBARRIER",
				541	[(X86MemBarrier)]>, Sched<[WriteLoad]>;
				542
				543	// RegOpc corresponds to the mr version of the instruction
				544	// ImmOpc corresponds to the mi version of the instruction
				545	// ImmOpc8 corresponds to the mi8 version of the instruction
				546	// ImmMod corresponds to the instruction format of the mi and mi8 versions
				547	multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
				548	Format ImmMod, string mnemonic> {
				549	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				550	SchedRW = [WriteALULd, WriteRMW] in {
				551
				552	def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				553	RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
				554	MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
				555	!strconcat(mnemonic, "{b}\t",
				556	"{$src2, $dst\|$dst, $src2}"),
				557	[], IIC_ALU_NONMEM>, LOCK;
				558	def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				559	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				560	MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
				561	!strconcat(mnemonic, "{w}\t",
				562	"{$src2, $dst\|$dst, $src2}"),
				563	[], IIC_ALU_NONMEM>, OpSize16, LOCK;
				564	def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				565	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				566	MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
				567	!strconcat(mnemonic, "{l}\t",
				568	"{$src2, $dst\|$dst, $src2}"),
				569	[], IIC_ALU_NONMEM>, OpSize32, LOCK;
				570	def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
				571	RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
				572	MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
				573	!strconcat(mnemonic, "{q}\t",
				574	"{$src2, $dst\|$dst, $src2}"),
				575	[], IIC_ALU_NONMEM>, LOCK;
				576
				577	def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				578	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
				579	ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
				580	!strconcat(mnemonic, "{b}\t",
				581	"{$src2, $dst\|$dst, $src2}"),
				582	[], IIC_ALU_MEM>, LOCK;
				583
				584	def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				585	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				586	ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
				587	!strconcat(mnemonic, "{w}\t",
				588	"{$src2, $dst\|$dst, $src2}"),
				589	[], IIC_ALU_MEM>, OpSize16, LOCK;
				590
				591	def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				592	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				593	ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
				594	!strconcat(mnemonic, "{l}\t",
				595	"{$src2, $dst\|$dst, $src2}"),
				596	[], IIC_ALU_MEM>, OpSize32, LOCK;
				597
				598	def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
				599	ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
				600	ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
				601	!strconcat(mnemonic, "{q}\t",
				602	"{$src2, $dst\|$dst, $src2}"),
				603	[], IIC_ALU_MEM>, LOCK;
				604
				605	def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				606	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				607	ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
				608	!strconcat(mnemonic, "{w}\t",
				609	"{$src2, $dst\|$dst, $src2}"),
				610	[], IIC_ALU_MEM>, OpSize16, LOCK;
				611	def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				612	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				613	ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
				614	!strconcat(mnemonic, "{l}\t",
				615	"{$src2, $dst\|$dst, $src2}"),
				616	[], IIC_ALU_MEM>, OpSize32, LOCK;
				617	def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
				618	ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
				619	ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
				620	!strconcat(mnemonic, "{q}\t",
				621	"{$src2, $dst\|$dst, $src2}"),
				622	[], IIC_ALU_MEM>, LOCK;
				623
				624	}
				625
				626	}
				627
				628	defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
				629	defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
				630	defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
				631	defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">;
				632	defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
				633
				634	// Optimized codegen when the non-memory output is not used.
				635	multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
				636	string mnemonic> {
				637	let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
				638	SchedRW = [WriteALULd, WriteRMW] in {
				639
				640	def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
				641	!strconcat(mnemonic, "{b}\t$dst"),
				642	[], IIC_UNARY_MEM>, LOCK;
				643	def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
				644	!strconcat(mnemonic, "{w}\t$dst"),
				645	[], IIC_UNARY_MEM>, OpSize16, LOCK;
				646	def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
				647	!strconcat(mnemonic, "{l}\t$dst"),
				648	[], IIC_UNARY_MEM>, OpSize32, LOCK;
				649	def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
				650	!strconcat(mnemonic, "{q}\t$dst"),
				651	[], IIC_UNARY_MEM>, LOCK;
				652	}
				653	}
				654
				655	defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">;
				656	defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">;
				657
				658	// Atomic compare and swap.
				659	multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
				660	SDPatternOperator frag, X86MemOperand x86memop,
				661	InstrItinClass itin> {
				662	let isCodeGenOnly = 1 in {
				663	def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
				664	!strconcat(mnemonic, "\t$ptr"),
				665	[(frag addr:$ptr)], itin>, TB, LOCK;
				666	}
				667	}
				668
				669	multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
				670	string mnemonic, SDPatternOperator frag,
				671	InstrItinClass itin8, InstrItinClass itin> {
				672	let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
				673	let Defs = [AL, EFLAGS], Uses = [AL] in
				674	def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
				675	!strconcat(mnemonic, "{b}\t{$swap, $ptr\|$ptr, $swap}"),
				676	[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
				677	let Defs = [AX, EFLAGS], Uses = [AX] in
				678	def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
				679	!strconcat(mnemonic, "{w}\t{$swap, $ptr\|$ptr, $swap}"),
				680	[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;
				681	let Defs = [EAX, EFLAGS], Uses = [EAX] in
				682	def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
				683	!strconcat(mnemonic, "{l}\t{$swap, $ptr\|$ptr, $swap}"),
				684	[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;
				685	let Defs = [RAX, EFLAGS], Uses = [RAX] in
				686	def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
				687	!strconcat(mnemonic, "{q}\t{$swap, $ptr\|$ptr, $swap}"),
				688	[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
				689	}
				690	}
				691
				692	let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
				693	SchedRW = [WriteALULd, WriteRMW] in {
				694	defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
				695	X86cas8, i64mem,
				696	IIC_CMPX_LOCK_8B>;
				697	}
				698
				699	let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
				700	Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
				701	defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
				702	X86cas16, i128mem,
				703	IIC_CMPX_LOCK_16B>, REX_W;
				704	}
				705
				706	defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
				707	X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;
				708
				709	// Atomic exchange and add
				710	multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
				711	string frag,
				712	InstrItinClass itin8, InstrItinClass itin> {
				713	let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
				714	SchedRW = [WriteALULd, WriteRMW] in {
				715	def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
				716	(ins GR8:$val, i8mem:$ptr),
				717	!strconcat(mnemonic, "{b}\t{$val, $ptr\|$ptr, $val}"),
				718	[(set GR8:$dst,
				719	(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
				720	itin8>;
				721	def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
				722	(ins GR16:$val, i16mem:$ptr),
				723	!strconcat(mnemonic, "{w}\t{$val, $ptr\|$ptr, $val}"),
				724	[(set
				725	GR16:$dst,
				726	(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
				727	itin>, OpSize16;
				728	def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
				729	(ins GR32:$val, i32mem:$ptr),
				730	!strconcat(mnemonic, "{l}\t{$val, $ptr\|$ptr, $val}"),
				731	[(set
				732	GR32:$dst,
				733	(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
				734	itin>, OpSize32;
				735	def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
				736	(ins GR64:$val, i64mem:$ptr),
				737	!strconcat(mnemonic, "{q}\t{$val, $ptr\|$ptr, $val}"),
				738	[(set
				739	GR64:$dst,
				740	(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
				741	itin>;
				742	}
				743	}
				744
				745	defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
				746	IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
				747	TB, LOCK;
				748
				749	/* The following multiclass tries to make sure that in code like
				750	* x.store (immediate op x.load(acquire), release)
				751	* an operation directly on memory is generated instead of wasting a register.
				752	* It is not automatic as atomic_store/load are only lowered to MOV instructions
				753	* extremely late to prevent them from being accidentally reordered in the backend
				754	* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
				755	*/
				756	multiclass RELEASE_BINOP_MI<string op> {
				757	def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				758	"#RELEASE_BINOP PSEUDO!",
				759	[(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
				760	(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
				761	// NAME#16 is not generated as 16-bit arithmetic instructions are considered
				762	// costly and avoided as far as possible by this backend anyway
				763	def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				764	"#RELEASE_BINOP PSEUDO!",
				765	[(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
				766	(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
				767	def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				768	"#RELEASE_BINOP PSEUDO!",
				769	[(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
				770	(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
				771	}
				772	defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
				773	defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
				774	defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
				775	defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
				776	// Note: we don't deal with sub, because substractions of constants are
				777	// optimized into additions before this code can run
				778
				779	multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
				780	def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
				781	"#RELEASE_UNOP PSEUDO!",
				782	[(atomic_store_8 addr:$dst, dag8)]>;
				783	def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
				784	"#RELEASE_UNOP PSEUDO!",
				785	[(atomic_store_16 addr:$dst, dag16)]>;
				786	def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
				787	"#RELEASE_UNOP PSEUDO!",
				788	[(atomic_store_32 addr:$dst, dag32)]>;
				789	def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
				790	"#RELEASE_UNOP PSEUDO!",
				791	[(atomic_store_64 addr:$dst, dag64)]>;
				792	}
				793
				794	defm RELEASE_INC : RELEASE_UNOP<
				795	(add (atomic_load_8 addr:$dst), (i8 1)),
				796	(add (atomic_load_16 addr:$dst), (i16 1)),
				797	(add (atomic_load_32 addr:$dst), (i32 1)),
				798	(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
				799	defm RELEASE_DEC : RELEASE_UNOP<
				800	(add (atomic_load_8 addr:$dst), (i8 -1)),
				801	(add (atomic_load_16 addr:$dst), (i16 -1)),
				802	(add (atomic_load_32 addr:$dst), (i32 -1)),
				803	(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
				804	/*
				805	TODO: These don't work because the type inference of TableGen fails.
				806	TODO: find a way to fix it.
				807	defm RELEASE_NEG : RELEASE_UNOP<
				808	(ineg (atomic_load_8 addr:$dst)),
				809	(ineg (atomic_load_16 addr:$dst)),
				810	(ineg (atomic_load_32 addr:$dst)),
				811	(ineg (atomic_load_64 addr:$dst))>;
				812	defm RELEASE_NOT : RELEASE_UNOP<
				813	(not (atomic_load_8 addr:$dst)),
				814	(not (atomic_load_16 addr:$dst)),
				815	(not (atomic_load_32 addr:$dst)),
				816	(not (atomic_load_64 addr:$dst))>;
				817	*/
				818
				819	def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
				820	"#RELEASE_MOV PSEUDO !",
				821	[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
				822	def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
				823	"#RELEASE_MOV PSEUDO !",
				824	[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
				825	def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
				826	"#RELEASE_MOV PSEUDO !",
				827	[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
				828	def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
				829	"#RELEASE_MOV PSEUDO !",
				830	[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
				831
				832	def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
				833	"#RELEASE_MOV PSEUDO!",
				834	[(atomic_store_8 addr:$dst, GR8 :$src)]>;
				835	def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
				836	"#RELEASE_MOV PSEUDO!",
				837	[(atomic_store_16 addr:$dst, GR16:$src)]>;
				838	def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
				839	"#RELEASE_MOV PSEUDO!",
				840	[(atomic_store_32 addr:$dst, GR32:$src)]>;
				841	def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
				842	"#RELEASE_MOV PSEUDO!",
				843	[(atomic_store_64 addr:$dst, GR64:$src)]>;
				844
				845	def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
				846	"#ACQUIRE_MOV PSEUDO!",
				847	[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
				848	def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
				849	"#ACQUIRE_MOV PSEUDO!",
				850	[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
				851	def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
				852	"#ACQUIRE_MOV PSEUDO!",
				853	[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
				854	def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
				855	"#ACQUIRE_MOV PSEUDO!",
				856	[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	857
				858	//===----------------------------------------------------------------------===//
				859	// DAG Pattern Matching Rules
				860	//===----------------------------------------------------------------------===//
				861
				862	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
				863	def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
				864	def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
				865	def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
				866	def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
				867	def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
				868	def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
				869
				870	def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
				871	(ADD32ri GR32:$src1, tconstpool:$src2)>;
				872	def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
				873	(ADD32ri GR32:$src1, tjumptable:$src2)>;
				874	def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
				875	(ADD32ri GR32:$src1, tglobaladdr:$src2)>;
				876	def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
				877	(ADD32ri GR32:$src1, texternalsym:$src2)>;
				878	def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
				879	(ADD32ri GR32:$src1, tblockaddress:$src2)>;
				880
				881	def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				882	(MOV32mi addr:$dst, tglobaladdr:$src)>;
				883	def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
				884	(MOV32mi addr:$dst, texternalsym:$src)>;
				885	def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
				886	(MOV32mi addr:$dst, tblockaddress:$src)>;
				887
				888	// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
				889	// code model mode, should use 'movabs'. FIXME: This is really a hack, the
				890	// 'movabs' predicate should handle this sort of thing.
				891	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				892	(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
				893	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				894	(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
				895	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				896	(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
				897	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				898	(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
				899	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				900	(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
				901
				902	// In kernel code model, we can get the address of a label
				903	// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
				904	// the MOV64ri32 should accept these.
				905	def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
				906	(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
				907	def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
				908	(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
				909	def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
				910	(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
				911	def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
				912	(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
				913	def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
				914	(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
				915
				916	// If we have small model and -static mode, it is safe to store global addresses
				917	// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
				918	// for MOV64mi32 should handle this sort of thing.
				919	def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
				920	(MOV64mi32 addr:$dst, tconstpool:$src)>,
				921	Requires<[NearData, IsStatic]>;
				922	def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
				923	(MOV64mi32 addr:$dst, tjumptable:$src)>,
				924	Requires<[NearData, IsStatic]>;
				925	def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
				926	(MOV64mi32 addr:$dst, tglobaladdr:$src)>,
				927	Requires<[NearData, IsStatic]>;
				928	def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
				929	(MOV64mi32 addr:$dst, texternalsym:$src)>,
				930	Requires<[NearData, IsStatic]>;
				931	def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
				932	(MOV64mi32 addr:$dst, tblockaddress:$src)>,
				933	Requires<[NearData, IsStatic]>;
				934
				935	def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
				936	def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
				937
				938	// Calls
				939
				940	// tls has some funny stuff here...
				941	// This corresponds to movabs $foo@tpoff, %rax
				942	def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
				943	(MOV64ri32 tglobaltlsaddr :$dst)>;
				944	// This corresponds to add $foo@tpoff, %rax
				945	def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
				946	(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
				947
				948
				949	// Direct PC relative function call for small code model. 32-bit displacement
				950	// sign extended to 64-bit.
				951	def : Pat<(X86call (i64 tglobaladdr:$dst)),
				952	(CALL64pcrel32 tglobaladdr:$dst)>;
				953	def : Pat<(X86call (i64 texternalsym:$dst)),
				954	(CALL64pcrel32 texternalsym:$dst)>;
				955
				956	// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
				957	// can never use callee-saved registers. That is the purpose of the GR64_TC
				958	// register classes.
				959	//
				960	// The only volatile register that is never used by the calling convention is
				961	// %r11. This happens when calling a vararg function with 6 arguments.
				962	//
				963	// Match an X86tcret that uses less than 7 volatile registers.
				964	def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
				965	(X86tcret node:$ptr, node:$off), [{
				966	// X86tcret args: (*chain, ptr, imm, regs..., glue)
				967	unsigned NumRegs = 0;
				968	for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
				969	if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)
				970	return false;
				971	return true;
				972	}]>;
				973
				974	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				975	(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
				976	Requires<[Not64BitMode]>;
				977
				978	// FIXME: This is disabled for 32-bit PIC mode because the global base
				979	// register which is part of the address mode may be assigned a
				980	// callee-saved register.
				981	def : Pat<(X86tcret (load addr:$dst), imm:$off),
				982	(TCRETURNmi addr:$dst, imm:$off)>,
				983	Requires<[Not64BitMode, IsNotPIC]>;
				984
				985	def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
				986	(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
				987	Requires<[NotLP64]>;
				988
				989	def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
				990	(TCRETURNdi texternalsym:$dst, imm:$off)>,
				991	Requires<[NotLP64]>;
				992
				993	def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
				994	(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
				995	Requires<[In64BitMode]>;
				996
				997	// Don't fold loads into X86tcret requiring more than 6 regs.
				998	// There wouldn't be enough scratch registers for base+index.
				999	def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
				1000	(TCRETURNmi64 addr:$dst, imm:$off)>,
				1001	Requires<[In64BitMode]>;
				1002
				1003	def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
				1004	(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
				1005	Requires<[IsLP64]>;
				1006
				1007	def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
				1008	(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
				1009	Requires<[IsLP64]>;
				1010
				1011	// Normal calls, with various flavors of addresses.
				1012	def : Pat<(X86call (i32 tglobaladdr:$dst)),
				1013	(CALLpcrel32 tglobaladdr:$dst)>;
				1014	def : Pat<(X86call (i32 texternalsym:$dst)),
				1015	(CALLpcrel32 texternalsym:$dst)>;
				1016	def : Pat<(X86call (i32 imm:$dst)),
				1017	(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
				1018
				1019	// Comparisons.
				1020
				1021	// TEST R,R is smaller than CMP R,0
				1022	def : Pat<(X86cmp GR8:$src1, 0),
				1023	(TEST8rr GR8:$src1, GR8:$src1)>;
				1024	def : Pat<(X86cmp GR16:$src1, 0),
				1025	(TEST16rr GR16:$src1, GR16:$src1)>;
				1026	def : Pat<(X86cmp GR32:$src1, 0),
				1027	(TEST32rr GR32:$src1, GR32:$src1)>;
				1028	def : Pat<(X86cmp GR64:$src1, 0),
				1029	(TEST64rr GR64:$src1, GR64:$src1)>;
				1030
				1031	// Conditional moves with folded loads with operands swapped and conditions
				1032	// inverted.
				1033	multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
				1034	Instruction Inst64> {
				1035	let Predicates = [HasCMov] in {
				1036	def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
				1037	(Inst16 GR16:$src2, addr:$src1)>;
				1038	def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
				1039	(Inst32 GR32:$src2, addr:$src1)>;
				1040	def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
				1041	(Inst64 GR64:$src2, addr:$src1)>;
				1042	}
				1043	}
				1044
				1045	defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
				1046	defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
				1047	defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
				1048	defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
				1049	defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
				1050	defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
				1051	defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
				1052	defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
				1053	defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
				1054	defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
				1055	defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
				1056	defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
				1057	defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
				1058	defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
				1059	defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
				1060	defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
				1061
				1062	// zextload bool -> zextload byte
				1063	def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
				1064	def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
				1065	def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
				1066	def : Pat<(zextloadi64i1 addr:$src),
				1067	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1068
				1069	// extload bool -> extload byte
				1070	// When extloading from 16-bit and smaller memory locations into 64-bit
				1071	// registers, use zero-extending loads so that the entire 64-bit register is
				1072	// defined, avoiding partial-register updates.
				1073
				1074	def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
				1075	def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
				1076	def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
				1077	def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
				1078	def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
				1079	def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
				1080
				1081	// For other extloads, use subregs, since the high contents of the register are
				1082	// defined after an extload.
				1083	def : Pat<(extloadi64i1 addr:$src),
				1084	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1085	def : Pat<(extloadi64i8 addr:$src),
				1086	(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
				1087	def : Pat<(extloadi64i16 addr:$src),
				1088	(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
				1089	def : Pat<(extloadi64i32 addr:$src),
				1090	(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
				1091
				1092	// anyext. Define these to do an explicit zero-extend to
				1093	// avoid partial-register updates.
				1094	def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
				1095	(MOVZX32rr8 GR8 :$src), sub_16bit)>;
				1096	def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
				1097
				1098	// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
				1099	def : Pat<(i32 (anyext GR16:$src)),
				1100	(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
				1101
				1102	def : Pat<(i64 (anyext GR8 :$src)),
				1103	(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
				1104	def : Pat<(i64 (anyext GR16:$src)),
				1105	(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
				1106	def : Pat<(i64 (anyext GR32:$src)),
				1107	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1108
				1109
				1110	// Any instruction that defines a 32-bit result leaves the high half of the
				1111	// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
				1112	// be copying from a truncate. And x86's cmov doesn't do anything if the
				1113	// condition is false. But any other 32-bit operation will zero-extend
				1114	// up to 64 bits.
				1115	def def32 : PatLeaf<(i32 GR32:$src), [{
				1116	return N->getOpcode() != ISD::TRUNCATE &&
				1117	N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
				1118	N->getOpcode() != ISD::CopyFromReg &&
				1119	N->getOpcode() != ISD::AssertSext &&
				1120	N->getOpcode() != X86ISD::CMOV;
				1121	}]>;
				1122
				1123	// In the case of a 32-bit def that is known to implicitly zero-extend,
				1124	// we can use a SUBREG_TO_REG.
				1125	def : Pat<(i64 (zext def32:$src)),
				1126	(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
				1127
				1128	//===----------------------------------------------------------------------===//
				1129	// Pattern match OR as ADD
				1130	//===----------------------------------------------------------------------===//
				1131
				1132	// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
				1133	// 3-addressified into an LEA instruction to avoid copies. However, we also
				1134	// want to finally emit these instructions as an or at the end of the code
				1135	// generator to make the generated code easier to read. To do this, we select
				1136	// into "disjoint bits" pseudo ops.
				1137
				1138	// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
				1139	def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
				1140	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
				1141	return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
				1142
				1143	APInt KnownZero0, KnownOne0;
				1144	CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
				1145	APInt KnownZero1, KnownOne1;
				1146	CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
				1147	return (~KnownZero0 & ~KnownZero1) == 0;
				1148	}]>;
				1149
				1150
				1151	// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
				1152	// Try this before the selecting to OR.
				1153	let AddedComplexity = 5, SchedRW = [WriteALU] in {
				1154
				1155	let isConvertibleToThreeAddress = 1,
				1156	Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
				1157	let isCommutable = 1 in {
				1158	def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
				1159	"", // orw/addw REG, REG
				1160	[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
				1161	def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
				1162	"", // orl/addl REG, REG
				1163	[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
				1164	def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
				1165	"", // orq/addq REG, REG
				1166	[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
				1167	} // isCommutable
				1168
				1169	// NOTE: These are order specific, we want the ri8 forms to be listed
				1170	// first so that they are slightly preferred to the ri forms.
				1171
				1172	def ADD16ri8_DB : I<0, Pseudo,
				1173	(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
				1174	"", // orw/addw REG, imm8
				1175	[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
				1176	def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
				1177	"", // orw/addw REG, imm
				1178	[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
				1179
				1180	def ADD32ri8_DB : I<0, Pseudo,
				1181	(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
				1182	"", // orl/addl REG, imm8
				1183	[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
				1184	def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
				1185	"", // orl/addl REG, imm
				1186	[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
				1187
				1188
				1189	def ADD64ri8_DB : I<0, Pseudo,
				1190	(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
				1191	"", // orq/addq REG, imm8
				1192	[(set GR64:$dst, (or_is_add GR64:$src1,
				1193	i64immSExt8:$src2))]>;
				1194	def ADD64ri32_DB : I<0, Pseudo,
				1195	(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
				1196	"", // orq/addq REG, imm
				1197	[(set GR64:$dst, (or_is_add GR64:$src1,
				1198	i64immSExt32:$src2))]>;
				1199	}
				1200	} // AddedComplexity, SchedRW
				1201
				1202
				1203	//===----------------------------------------------------------------------===//
				1204	// Some peepholes
				1205	//===----------------------------------------------------------------------===//
				1206
				1207	// Odd encoding trick: -128 fits into an 8-bit immediate field while
				1208	// +128 doesn't, so in this special case use a sub instead of an add.
				1209	def : Pat<(add GR16:$src1, 128),
				1210	(SUB16ri8 GR16:$src1, -128)>;
				1211	def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
				1212	(SUB16mi8 addr:$dst, -128)>;
				1213
				1214	def : Pat<(add GR32:$src1, 128),
				1215	(SUB32ri8 GR32:$src1, -128)>;
				1216	def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
				1217	(SUB32mi8 addr:$dst, -128)>;
				1218
				1219	def : Pat<(add GR64:$src1, 128),
				1220	(SUB64ri8 GR64:$src1, -128)>;
				1221	def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
				1222	(SUB64mi8 addr:$dst, -128)>;
				1223
				1224	// The same trick applies for 32-bit immediate fields in 64-bit
				1225	// instructions.
				1226	def : Pat<(add GR64:$src1, 0x0000000080000000),
				1227	(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
				1228	def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
				1229	(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
				1230
				1231	// To avoid needing to materialize an immediate in a register, use a 32-bit and
				1232	// with implicit zero-extension instead of a 64-bit and if the immediate has at
				1233	// least 32 bits of leading zeros. If in addition the last 32 bits can be
				1234	// represented with a sign extension of a 8 bit constant, use that.
Craig Topper	3d44178	2015-04-04 02:31:43 +0000	[diff] [blame]	1235	// This can also reduce instruction size by eliminating the need for the REX
				1236	// prefix.
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1237
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1238	// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
				1239	let AddedComplexity = 1 in {
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1240	def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
				1241	(SUBREG_TO_REG
				1242	(i64 0),
				1243	(AND32ri8
				1244	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1245	(i32 (GetLo8XForm imm:$imm))),
				1246	sub_32bit)>;
				1247
				1248	def : Pat<(and GR64:$src, i64immZExt32:$imm),
				1249	(SUBREG_TO_REG
				1250	(i64 0),
				1251	(AND32ri
				1252	(EXTRACT_SUBREG GR64:$src, sub_32bit),
				1253	(i32 (GetLo32XForm imm:$imm))),
				1254	sub_32bit)>;
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1255	} // AddedComplexity = 1
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1256
				1257
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1258	// AddedComplexity is needed due to the increased complexity on the
				1259	// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
				1260	// the MOVZX patterns keeps thems together in DAGIsel tables.
				1261	let AddedComplexity = 1 in {
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1262	// r & (2^16-1) ==> movz
				1263	def : Pat<(and GR32:$src1, 0xffff),
				1264	(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
				1265	// r & (2^8-1) ==> movz
				1266	def : Pat<(and GR32:$src1, 0xff),
				1267	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
				1268	GR32_ABCD)),
				1269	sub_8bit))>,
				1270	Requires<[Not64BitMode]>;
				1271	// r & (2^8-1) ==> movz
				1272	def : Pat<(and GR16:$src1, 0xff),
				1273	(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
				1274	(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
				1275	sub_16bit)>,
				1276	Requires<[Not64BitMode]>;
				1277
				1278	// r & (2^32-1) ==> movz
				1279	def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
				1280	(SUBREG_TO_REG (i64 0),
				1281	(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
				1282	sub_32bit)>;
				1283	// r & (2^16-1) ==> movz
Craig Topper	901202873	2015-04-04 02:08:20 +0000	[diff] [blame]	1284	let AddedComplexity = 1 in // Give priority over i64immZExt32.
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1285	def : Pat<(and GR64:$src, 0xffff),
				1286	(SUBREG_TO_REG (i64 0),
				1287	(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
				1288	sub_32bit)>;
				1289	// r & (2^8-1) ==> movz
				1290	def : Pat<(and GR64:$src, 0xff),
				1291	(SUBREG_TO_REG (i64 0),
				1292	(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
				1293	sub_32bit)>;
				1294	// r & (2^8-1) ==> movz
				1295	def : Pat<(and GR32:$src1, 0xff),
				1296	(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
				1297	Requires<[In64BitMode]>;
				1298	// r & (2^8-1) ==> movz
				1299	def : Pat<(and GR16:$src1, 0xff),
				1300	(EXTRACT_SUBREG (MOVZX32rr8 (i8
				1301	(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
				1302	Requires<[In64BitMode]>;
Craig Topper	7ea899a	2015-04-04 04:22:12 +0000	[diff] [blame]	1303	} // AddedComplexity = 1
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1304
				1305
				1306	// sext_inreg patterns
				1307	def : Pat<(sext_inreg GR32:$src, i16),
				1308	(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
				1309	def : Pat<(sext_inreg GR32:$src, i8),
				1310	(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1311	GR32_ABCD)),
				1312	sub_8bit))>,
				1313	Requires<[Not64BitMode]>;
				1314
				1315	def : Pat<(sext_inreg GR16:$src, i8),
				1316	(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
				1317	(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
				1318	sub_16bit)>,
				1319	Requires<[Not64BitMode]>;
				1320
				1321	def : Pat<(sext_inreg GR64:$src, i32),
				1322	(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
				1323	def : Pat<(sext_inreg GR64:$src, i16),
				1324	(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
				1325	def : Pat<(sext_inreg GR64:$src, i8),
				1326	(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
				1327	def : Pat<(sext_inreg GR32:$src, i8),
				1328	(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
				1329	Requires<[In64BitMode]>;
				1330	def : Pat<(sext_inreg GR16:$src, i8),
				1331	(EXTRACT_SUBREG (MOVSX32rr8
				1332	(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
				1333	Requires<[In64BitMode]>;
				1334
				1335	// sext, sext_load, zext, zext_load
				1336	def: Pat<(i16 (sext GR8:$src)),
				1337	(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
				1338	def: Pat<(sextloadi16i8 addr:$src),
				1339	(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
				1340	def: Pat<(i16 (zext GR8:$src)),
				1341	(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
				1342	def: Pat<(zextloadi16i8 addr:$src),
				1343	(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
				1344
				1345	// trunc patterns
				1346	def : Pat<(i16 (trunc GR32:$src)),
				1347	(EXTRACT_SUBREG GR32:$src, sub_16bit)>;
				1348	def : Pat<(i8 (trunc GR32:$src)),
				1349	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1350	sub_8bit)>,
				1351	Requires<[Not64BitMode]>;
				1352	def : Pat<(i8 (trunc GR16:$src)),
				1353	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1354	sub_8bit)>,
				1355	Requires<[Not64BitMode]>;
				1356	def : Pat<(i32 (trunc GR64:$src)),
				1357	(EXTRACT_SUBREG GR64:$src, sub_32bit)>;
				1358	def : Pat<(i16 (trunc GR64:$src)),
				1359	(EXTRACT_SUBREG GR64:$src, sub_16bit)>;
				1360	def : Pat<(i8 (trunc GR64:$src)),
				1361	(EXTRACT_SUBREG GR64:$src, sub_8bit)>;
				1362	def : Pat<(i8 (trunc GR32:$src)),
				1363	(EXTRACT_SUBREG GR32:$src, sub_8bit)>,
				1364	Requires<[In64BitMode]>;
				1365	def : Pat<(i8 (trunc GR16:$src)),
				1366	(EXTRACT_SUBREG GR16:$src, sub_8bit)>,
				1367	Requires<[In64BitMode]>;
				1368
				1369	// h-register tricks
				1370	def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
				1371	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1372	sub_8bit_hi)>,
				1373	Requires<[Not64BitMode]>;
				1374	def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
				1375	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1376	sub_8bit_hi)>,
				1377	Requires<[Not64BitMode]>;
				1378	def : Pat<(srl GR16:$src, (i8 8)),
				1379	(EXTRACT_SUBREG
				1380	(MOVZX32rr8
				1381	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1382	sub_8bit_hi)),
				1383	sub_16bit)>,
				1384	Requires<[Not64BitMode]>;
				1385	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1386	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1387	GR16_ABCD)),
				1388	sub_8bit_hi))>,
				1389	Requires<[Not64BitMode]>;
				1390	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1391	(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
				1392	GR16_ABCD)),
				1393	sub_8bit_hi))>,
				1394	Requires<[Not64BitMode]>;
				1395	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1396	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1397	GR32_ABCD)),
				1398	sub_8bit_hi))>,
				1399	Requires<[Not64BitMode]>;
				1400	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1401	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1402	GR32_ABCD)),
				1403	sub_8bit_hi))>,
				1404	Requires<[Not64BitMode]>;
				1405
				1406	// h-register tricks.
				1407	// For now, be conservative on x86-64 and use an h-register extract only if the
				1408	// value is immediately zero-extended or stored, which are somewhat common
				1409	// cases. This uses a bunch of code to prevent a register requiring a REX prefix
				1410	// from being allocated in the same instruction as the h register, as there's
				1411	// currently no way to describe this requirement to the register allocator.
				1412
				1413	// h-register extract and zero-extend.
				1414	def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
				1415	(SUBREG_TO_REG
				1416	(i64 0),
				1417	(MOVZX32_NOREXrr8
				1418	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1419	sub_8bit_hi)),
				1420	sub_32bit)>;
				1421	def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
				1422	(MOVZX32_NOREXrr8
				1423	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1424	sub_8bit_hi))>,
				1425	Requires<[In64BitMode]>;
				1426	def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
				1427	(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
				1428	GR32_ABCD)),
				1429	sub_8bit_hi))>,
				1430	Requires<[In64BitMode]>;
				1431	def : Pat<(srl GR16:$src, (i8 8)),
				1432	(EXTRACT_SUBREG
				1433	(MOVZX32_NOREXrr8
				1434	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1435	sub_8bit_hi)),
				1436	sub_16bit)>,
				1437	Requires<[In64BitMode]>;
				1438	def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
				1439	(MOVZX32_NOREXrr8
				1440	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1441	sub_8bit_hi))>,
				1442	Requires<[In64BitMode]>;
				1443	def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
				1444	(MOVZX32_NOREXrr8
				1445	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1446	sub_8bit_hi))>,
				1447	Requires<[In64BitMode]>;
				1448	def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
				1449	(SUBREG_TO_REG
				1450	(i64 0),
				1451	(MOVZX32_NOREXrr8
				1452	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1453	sub_8bit_hi)),
				1454	sub_32bit)>;
				1455	def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
				1456	(SUBREG_TO_REG
				1457	(i64 0),
				1458	(MOVZX32_NOREXrr8
				1459	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1460	sub_8bit_hi)),
				1461	sub_32bit)>;
				1462
				1463	// h-register extract and store.
				1464	def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
				1465	(MOV8mr_NOREX
				1466	addr:$dst,
				1467	(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
				1468	sub_8bit_hi))>;
				1469	def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
				1470	(MOV8mr_NOREX
				1471	addr:$dst,
				1472	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
				1473	sub_8bit_hi))>,
				1474	Requires<[In64BitMode]>;
				1475	def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
				1476	(MOV8mr_NOREX
				1477	addr:$dst,
				1478	(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
				1479	sub_8bit_hi))>,
				1480	Requires<[In64BitMode]>;
				1481
				1482
				1483	// (shl x, 1) ==> (add x, x)
				1484	// Note that if x is undef (immediate or otherwise), we could theoretically
				1485	// end up with the two uses of x getting different values, producing a result
				1486	// where the least significant bit is not 0. However, the probability of this
				1487	// happening is considered low enough that this is officially not a
				1488	// "real problem".
				1489	def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
				1490	def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
				1491	def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
				1492	def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
				1493
				1494	// Helper imms that check if a mask doesn't change significant shift bits.
Benjamin Kramer	5f6a907	2015-02-12 15:35:40 +0000	[diff] [blame]	1495	def immShift32 : ImmLeaf<i8, [{
				1496	return countTrailingOnes<uint64_t>(Imm) >= 5;
				1497	}]>;
				1498	def immShift64 : ImmLeaf<i8, [{
				1499	return countTrailingOnes<uint64_t>(Imm) >= 6;
				1500	}]>;
Michael Kuperstein	e86aa9a	2015-02-01 16:15:07 +0000	[diff] [blame]	1501
				1502	// Shift amount is implicitly masked.
				1503	multiclass MaskedShiftAmountPats<SDNode frag, string name> {
				1504	// (shift x (and y, 31)) ==> (shift x, y)
				1505	def : Pat<(frag GR8:$src1, (and CL, immShift32)),
				1506	(!cast<Instruction>(name # "8rCL") GR8:$src1)>;
				1507	def : Pat<(frag GR16:$src1, (and CL, immShift32)),
				1508	(!cast<Instruction>(name # "16rCL") GR16:$src1)>;
				1509	def : Pat<(frag GR32:$src1, (and CL, immShift32)),
				1510	(!cast<Instruction>(name # "32rCL") GR32:$src1)>;
				1511	def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
				1512	(!cast<Instruction>(name # "8mCL") addr:$dst)>;
				1513	def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
				1514	(!cast<Instruction>(name # "16mCL") addr:$dst)>;
				1515	def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
				1516	(!cast<Instruction>(name # "32mCL") addr:$dst)>;
				1517
				1518	// (shift x (and y, 63)) ==> (shift x, y)
				1519	def : Pat<(frag GR64:$src1, (and CL, immShift64)),
				1520	(!cast<Instruction>(name # "64rCL") GR64:$src1)>;
				1521	def : Pat<(store (frag (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
				1522	(!cast<Instruction>(name # "64mCL") addr:$dst)>;
				1523	}
				1524
				1525	defm : MaskedShiftAmountPats<shl, "SHL">;
				1526	defm : MaskedShiftAmountPats<srl, "SHR">;
				1527	defm : MaskedShiftAmountPats<sra, "SAR">;
				1528	defm : MaskedShiftAmountPats<rotl, "ROL">;
				1529	defm : MaskedShiftAmountPats<rotr, "ROR">;
				1530
				1531	// (anyext (setcc_carry)) -> (setcc_carry)
				1532	def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1533	(SETB_C16r)>;
				1534	def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
				1535	(SETB_C32r)>;
				1536	def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
				1537	(SETB_C32r)>;
				1538
				1539
				1540
				1541
				1542	//===----------------------------------------------------------------------===//
				1543	// EFLAGS-defining Patterns
				1544	//===----------------------------------------------------------------------===//
				1545
				1546	// add reg, reg
				1547	def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
				1548	def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
				1549	def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
				1550
				1551	// add reg, mem
				1552	def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
				1553	(ADD8rm GR8:$src1, addr:$src2)>;
				1554	def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
				1555	(ADD16rm GR16:$src1, addr:$src2)>;
				1556	def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
				1557	(ADD32rm GR32:$src1, addr:$src2)>;
				1558
				1559	// add reg, imm
				1560	def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
				1561	def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
				1562	def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
				1563	def : Pat<(add GR16:$src1, i16immSExt8:$src2),
				1564	(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1565	def : Pat<(add GR32:$src1, i32immSExt8:$src2),
				1566	(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1567
				1568	// sub reg, reg
				1569	def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
				1570	def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
				1571	def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
				1572
				1573	// sub reg, mem
				1574	def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
				1575	(SUB8rm GR8:$src1, addr:$src2)>;
				1576	def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
				1577	(SUB16rm GR16:$src1, addr:$src2)>;
				1578	def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
				1579	(SUB32rm GR32:$src1, addr:$src2)>;
				1580
				1581	// sub reg, imm
				1582	def : Pat<(sub GR8:$src1, imm:$src2),
				1583	(SUB8ri GR8:$src1, imm:$src2)>;
				1584	def : Pat<(sub GR16:$src1, imm:$src2),
				1585	(SUB16ri GR16:$src1, imm:$src2)>;
				1586	def : Pat<(sub GR32:$src1, imm:$src2),
				1587	(SUB32ri GR32:$src1, imm:$src2)>;
				1588	def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
				1589	(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1590	def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
				1591	(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1592
				1593	// sub 0, reg
				1594	def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
				1595	def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
				1596	def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
				1597	def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
				1598
				1599	// mul reg, reg
				1600	def : Pat<(mul GR16:$src1, GR16:$src2),
				1601	(IMUL16rr GR16:$src1, GR16:$src2)>;
				1602	def : Pat<(mul GR32:$src1, GR32:$src2),
				1603	(IMUL32rr GR32:$src1, GR32:$src2)>;
				1604
				1605	// mul reg, mem
				1606	def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
				1607	(IMUL16rm GR16:$src1, addr:$src2)>;
				1608	def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
				1609	(IMUL32rm GR32:$src1, addr:$src2)>;
				1610
				1611	// mul reg, imm
				1612	def : Pat<(mul GR16:$src1, imm:$src2),
				1613	(IMUL16rri GR16:$src1, imm:$src2)>;
				1614	def : Pat<(mul GR32:$src1, imm:$src2),
				1615	(IMUL32rri GR32:$src1, imm:$src2)>;
				1616	def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
				1617	(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
				1618	def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
				1619	(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
				1620
				1621	// reg = mul mem, imm
				1622	def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
				1623	(IMUL16rmi addr:$src1, imm:$src2)>;
				1624	def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
				1625	(IMUL32rmi addr:$src1, imm:$src2)>;
				1626	def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
				1627	(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
				1628	def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
				1629	(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
				1630
				1631	// Patterns for nodes that do not produce flags, for instructions that do.
				1632
				1633	// addition
				1634	def : Pat<(add GR64:$src1, GR64:$src2),
				1635	(ADD64rr GR64:$src1, GR64:$src2)>;
				1636	def : Pat<(add GR64:$src1, i64immSExt8:$src2),
				1637	(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1638	def : Pat<(add GR64:$src1, i64immSExt32:$src2),
				1639	(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1640	def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
				1641	(ADD64rm GR64:$src1, addr:$src2)>;
				1642
				1643	// subtraction
				1644	def : Pat<(sub GR64:$src1, GR64:$src2),
				1645	(SUB64rr GR64:$src1, GR64:$src2)>;
				1646	def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
				1647	(SUB64rm GR64:$src1, addr:$src2)>;
				1648	def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
				1649	(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1650	def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
				1651	(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1652
				1653	// Multiply
				1654	def : Pat<(mul GR64:$src1, GR64:$src2),
				1655	(IMUL64rr GR64:$src1, GR64:$src2)>;
				1656	def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
				1657	(IMUL64rm GR64:$src1, addr:$src2)>;
				1658	def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
				1659	(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
				1660	def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
				1661	(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
				1662	def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
				1663	(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
				1664	def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
				1665	(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
				1666
				1667	// Increment/Decrement reg.
				1668	// Do not make INC/DEC if it is slow
				1669	let Predicates = [NotSlowIncDec] in {
				1670	def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
				1671	def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
				1672	def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
				1673	def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
				1674	def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
				1675	def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
				1676	def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
				1677	def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
				1678	}
				1679
				1680	// or reg/reg.
				1681	def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
				1682	def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
				1683	def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
				1684	def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
				1685
				1686	// or reg/mem
				1687	def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
				1688	(OR8rm GR8:$src1, addr:$src2)>;
				1689	def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
				1690	(OR16rm GR16:$src1, addr:$src2)>;
				1691	def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
				1692	(OR32rm GR32:$src1, addr:$src2)>;
				1693	def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
				1694	(OR64rm GR64:$src1, addr:$src2)>;
				1695
				1696	// or reg/imm
				1697	def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
				1698	def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
				1699	def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
				1700	def : Pat<(or GR16:$src1, i16immSExt8:$src2),
				1701	(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1702	def : Pat<(or GR32:$src1, i32immSExt8:$src2),
				1703	(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1704	def : Pat<(or GR64:$src1, i64immSExt8:$src2),
				1705	(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1706	def : Pat<(or GR64:$src1, i64immSExt32:$src2),
				1707	(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1708
				1709	// xor reg/reg
				1710	def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
				1711	def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
				1712	def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
				1713	def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
				1714
				1715	// xor reg/mem
				1716	def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
				1717	(XOR8rm GR8:$src1, addr:$src2)>;
				1718	def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
				1719	(XOR16rm GR16:$src1, addr:$src2)>;
				1720	def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
				1721	(XOR32rm GR32:$src1, addr:$src2)>;
				1722	def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
				1723	(XOR64rm GR64:$src1, addr:$src2)>;
				1724
				1725	// xor reg/imm
				1726	def : Pat<(xor GR8:$src1, imm:$src2),
				1727	(XOR8ri GR8:$src1, imm:$src2)>;
				1728	def : Pat<(xor GR16:$src1, imm:$src2),
				1729	(XOR16ri GR16:$src1, imm:$src2)>;
				1730	def : Pat<(xor GR32:$src1, imm:$src2),
				1731	(XOR32ri GR32:$src1, imm:$src2)>;
				1732	def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
				1733	(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1734	def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
				1735	(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1736	def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
				1737	(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1738	def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
				1739	(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1740
				1741	// and reg/reg
				1742	def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
				1743	def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
				1744	def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
				1745	def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
				1746
				1747	// and reg/mem
				1748	def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
				1749	(AND8rm GR8:$src1, addr:$src2)>;
				1750	def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
				1751	(AND16rm GR16:$src1, addr:$src2)>;
				1752	def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
				1753	(AND32rm GR32:$src1, addr:$src2)>;
				1754	def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
				1755	(AND64rm GR64:$src1, addr:$src2)>;
				1756
				1757	// and reg/imm
				1758	def : Pat<(and GR8:$src1, imm:$src2),
				1759	(AND8ri GR8:$src1, imm:$src2)>;
				1760	def : Pat<(and GR16:$src1, imm:$src2),
				1761	(AND16ri GR16:$src1, imm:$src2)>;
				1762	def : Pat<(and GR32:$src1, imm:$src2),
				1763	(AND32ri GR32:$src1, imm:$src2)>;
				1764	def : Pat<(and GR16:$src1, i16immSExt8:$src2),
				1765	(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
				1766	def : Pat<(and GR32:$src1, i32immSExt8:$src2),
				1767	(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
				1768	def : Pat<(and GR64:$src1, i64immSExt8:$src2),
				1769	(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
				1770	def : Pat<(and GR64:$src1, i64immSExt32:$src2),
				1771	(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
				1772
				1773	// Bit scan instruction patterns to match explicit zero-undef behavior.
				1774	def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
				1775	def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
				1776	def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
				1777	def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
				1778	def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
				1779	def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
				1780
				1781	// When HasMOVBE is enabled it is possible to get a non-legalized
				1782	// register-register 16 bit bswap. This maps it to a ROL instruction.
				1783	let Predicates = [HasMOVBE] in {
				1784	def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
				1785	}