Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

2015-02-01 16:15:07 +0000

[diff] [blame]

1

//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

//

10

// This file describes the various pseudo instructions used by the compiler,

11

// as well as Pat patterns used during instruction selection.

12

//

13

//===----------------------------------------------------------------------===//

14

15

//===----------------------------------------------------------------------===//

16

// Pattern Matching Support

17

18

def GetLo32XForm : SDNodeXForm<imm, [{

19

// Transformation function: get the low 32 bits.

Sergey Dmitrouk

842a51b

2015-04-28 14:05:47 +0000

[diff] [blame]

20

return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

21

}]>;

22

23

def GetLo8XForm : SDNodeXForm<imm, [{

24

// Transformation function: get the low 8 bits.

Sergey Dmitrouk

842a51b

2015-04-28 14:05:47 +0000

[diff] [blame]

25

return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}]>;

//===----------------------------------------------------------------------===//

30

// Random Pseudo Instructions.

31

32

// PIC base construction. This expands to code that looks like this:

33

// call $next_inst

34

// popl %destreg"

35

let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in

36

def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),

"", []>;

// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into

41

// a stack adjustment and the codegen must know that they may modify the stack

42

// pointer before prolog-epilog rewriting occurs.

43

// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become

44

// sub / add which can clobber EFLAGS.

45

let Defs = [ESP, EFLAGS], Uses = [ESP] in {

Serge Pavlov

2017-05-09 13:35:13 +0000

[diff] [blame]

46

def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs),

47

(ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

48

"#ADJCALLSTACKDOWN",

Michael Kuperstein

13fbd45

2015-02-01 16:56:04 +0000

[diff] [blame]

49

[]>,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

50

Requires<[NotLP64]>;

51

def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

52

"#ADJCALLSTACKUP",

53

[(X86callseq_end timm:$amt1, timm:$amt2)]>,

54

Requires<[NotLP64]>;

55

}

Serge Pavlov

2017-05-09 13:35:13 +0000

[diff] [blame]

56

def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),

57

(ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>;

Michael Kuperstein

13fbd45

2015-02-01 16:56:04 +0000

[diff] [blame]

58

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

59

60

// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into

61

// a stack adjustment and the codegen must know that they may modify the stack

62

// pointer before prolog-epilog rewriting occurs.

63

// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become

64

// sub / add which can clobber EFLAGS.

65

let Defs = [RSP, EFLAGS], Uses = [RSP] in {

Serge Pavlov

2017-05-09 13:35:13 +0000

[diff] [blame]

66

def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs),

67

(ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

68

"#ADJCALLSTACKDOWN",

Michael Kuperstein

13fbd45

2015-02-01 16:56:04 +0000

[diff] [blame]

69

[]>,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

70

Requires<[IsLP64]>;

71

def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

72

"#ADJCALLSTACKUP",

73

[(X86callseq_end timm:$amt1, timm:$amt2)]>,

74

Requires<[IsLP64]>;

75

}

Serge Pavlov

2017-05-09 13:35:13 +0000

[diff] [blame]

76

def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),

77

(ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

78

79

80

// x86-64 va_start lowering magic.

81

let usesCustomInserter = 1, Defs = [EFLAGS] in {

82

def VASTART_SAVE_XMM_REGS : I<0, Pseudo,

83

(outs),

84

(ins GR8:$al,

85

i64imm:$regsavefi, i64imm:$offset,

86

variable_ops),

87

"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",

88

[(X86vastart_save_xmm_regs GR8:$al,

imm:$regsavefi,

imm:$offset),

(implicit EFLAGS)]>;

// The VAARG_64 pseudo-instruction takes the address of the va_list,

94

// and places the address of the next argument into a register.

95

let Defs = [EFLAGS] in

96

def VAARG_64 : I<0, Pseudo,

97

(outs GR64:$dst),

98

(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),

99

"#VAARG_64 $dst, $ap, $size, $mode, $align",

100

[(set GR64:$dst,

101

(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),

102

(implicit EFLAGS)]>;

103

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

104

105

// When using segmented stacks these are lowered into instructions which first

106

// check if the current stacklet has enough free memory. If it does, memory is

107

// allocated by bumping the stack pointer. Otherwise memory is allocated from

108

// the heap.

109

110

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

111

def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),

112

"# variable sized alloca for segmented stacks",

113

[(set GR32:$dst,

114

(X86SegAlloca GR32:$size))]>,

115

Requires<[NotLP64]>;

116

117

let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in

118

def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),

119

"# variable sized alloca for segmented stacks",

120

[(set GR64:$dst,

121

(X86SegAlloca GR64:$size))]>,

122

Requires<[In64BitMode]>;

123

}

124

Hans Wennborg

8eb336c

2016-05-18 16:10:17 +0000

[diff] [blame]

125

// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows

126

// targets. These calls are needed to probe the stack when allocating more than

127

// 4k bytes in one go. Touching the stack at 4K increments is necessary to

128

// ensure that the guard pages used by the OS virtual memory manager are

129

// allocated in correct sequence.

130

// The main point of having separate instruction are extra unmodelled effects

131

// (compared to ordinary calls) like stack pointer change.

132

133

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

134

def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),

135

"# dynamic stack allocation",

136

[(X86WinAlloca GR32:$size)]>,

137

Requires<[NotLP64]>;

138

139

let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in

140

def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),

141

"# dynamic stack allocation",

142

[(X86WinAlloca GR64:$size)]>,

143

Requires<[In64BitMode]>;

144

145

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

146

//===----------------------------------------------------------------------===//

147

// EH Pseudo Instructions

148

//

149

let SchedRW = [WriteSystem] in {

150

let isTerminator = 1, isReturn = 1, isBarrier = 1,

151

hasCtrlDep = 1, isCodeGenOnly = 1 in {

152

def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),

153

"ret\t#eh_return, addr: $addr",

154

[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;

}

let isTerminator = 1, isReturn = 1, isBarrier = 1,

159

hasCtrlDep = 1, isCodeGenOnly = 1 in {

160

def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),

161

"ret\t#eh_return, addr: $addr",

162

[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;

}

Reid Kleckner

2015-11-06 01:49:05 +0000

[diff] [blame]

166

let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,

167

isCodeGenOnly = 1, isReturn = 1 in {

168

def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;

169

David Majnemer

2652b75

2015-11-09 23:07:48 +0000

[diff] [blame]

170

// CATCHRET needs a custom inserter for SEH.

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

171

let usesCustomInserter = 1 in

172

def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),

173

"# CATCHRET",

174

[(catchret bb:$dst, bb:$from)]>;

Reid Kleckner

0e28823

2015-08-27 23:27:47 +0000

[diff] [blame]

175

}

176

Reid Kleckner

420f054

2015-11-09 23:34:42 +0000

[diff] [blame]

177

let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,

David Majnemer

2652b75

2015-11-09 23:07:48 +0000

[diff] [blame]

178

usesCustomInserter = 1 in

179

def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;

180

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

181

// This instruction is responsible for re-establishing stack pointers after an

182

// exception has been caught and we are rejoining normal control flow in the

183

// parent function or funclet. It generally sets ESP and EBP, and optionally

184

// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us

185

// elsewhere.

Reid Kleckner

420f054

2015-11-09 23:34:42 +0000

[diff] [blame]

186

let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

187

def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;

188

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

189

let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,

190

usesCustomInserter = 1 in {

191

def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),

192

"#EH_SJLJ_SETJMP32",

193

[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,

194

Requires<[Not64BitMode]>;

195

def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),

196

"#EH_SJLJ_SETJMP64",

197

[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,

198

Requires<[In64BitMode]>;

199

let isTerminator = 1 in {

200

def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),

201

"#EH_SJLJ_LONGJMP32",

202

[(X86eh_sjlj_longjmp addr:$buf)]>,

203

Requires<[Not64BitMode]>;

204

def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),

205

"#EH_SJLJ_LONGJMP64",

206

[(X86eh_sjlj_longjmp addr:$buf)]>,

207

Requires<[In64BitMode]>;

}

}

} // SchedRW

let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {

213

def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),

214

"#EH_SjLj_Setup\t$dst", []>;

215

}

216

217

//===----------------------------------------------------------------------===//

218

// Pseudo instructions used by unwind info.

219

//

220

let isPseudo = 1 in {

221

def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),

222

"#SEH_PushReg $reg", []>;

223

def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),

224

"#SEH_SaveReg $reg, $dst", []>;

225

def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),

226

"#SEH_SaveXMM $reg, $dst", []>;

227

def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),

228

"#SEH_StackAlloc $size", []>;

229

def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),

230

"#SEH_SetFrame $reg, $offset", []>;

231

def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),

232

"#SEH_PushFrame $mode", []>;

233

def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),

234

"#SEH_EndPrologue", []>;

235

def SEH_Epilogue : I<0, Pseudo, (outs), (ins),

236

"#SEH_Epilogue", []>;

237

}

238

239

//===----------------------------------------------------------------------===//

240

// Pseudo instructions used by segmented stacks.

241

//

242

243

// This is lowered into a RET instruction by MCInstLower. We need

244

// this so that we don't have to have a MachineBasicBlock which ends

245

// with a RET and also has successors.

246

let isPseudo = 1 in {

247

def MORESTACK_RET: I<0, Pseudo, (outs), (ins),

248

"", []>;

249

250

// This instruction is lowered to a RET followed by a MOV. The two

251

// instructions are not generated on a higher level since then the

252

// verifier sees a MachineBasicBlock ending with a non-terminator.

253

def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),

"", []>;

}

//===----------------------------------------------------------------------===//

258

// Alias Instructions

259

//===----------------------------------------------------------------------===//

260

261

// Alias instruction mapping movr0 to xor.

262

// FIXME: remove when we can teach regalloc that xor reg, reg is ok.

263

let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,

Craig Topper

2017-03-17 05:59:54 +0000

[diff] [blame]

264

isPseudo = 1, AddedComplexity = 10 in

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

265

def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

266

[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;

267

268

// Other widths can also make use of the 32-bit xor, which may have a smaller

269

// encoding and avoid partial register updates.

Craig Topper

2017-03-17 05:59:54 +0000

[diff] [blame]

270

let AddedComplexity = 10 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

271

def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;

272

def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;

Craig Topper

2017-03-17 05:59:54 +0000

[diff] [blame]

273

def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

274

}

275

Hans Wennborg

08d5905

2015-12-15 17:10:28 +0000

[diff] [blame]

276

let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],

Craig Topper

2017-03-17 05:59:54 +0000

[diff] [blame]

277

AddedComplexity = 10 in {

Hans Wennborg

08d5905

2015-12-15 17:10:28 +0000

[diff] [blame]

278

// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,

279

// which only require 3 bytes compared to MOV32ri which requires 5.

280

let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {

281

def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

282

[(set GR32:$dst, 1)]>;

283

def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

284

[(set GR32:$dst, -1)]>;

285

}

286

287

// MOV16ri is 4 bytes, so the instructions above are smaller.

288

def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;

289

def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;

290

}

291

Craig Topper

2017-03-17 05:59:54 +0000

[diff] [blame]

292

let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5 in {

Hans Wennborg

4ae5119

2016-03-25 01:10:56 +0000

[diff] [blame]

293

// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.

294

// FIXME: Add itinerary class and Schedule.

295

def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",

296

[(set GR32:$dst, i32immSExt8:$src)]>,

297

Requires<[OptForMinSize, NotWin64WithoutFP]>;

298

def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",

299

[(set GR64:$dst, i64immSExt8:$src)]>,

300

Requires<[OptForMinSize, NotWin64WithoutFP]>;

301

}

302

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

303

// Materialize i64 constant where top 32-bits are zero. This could theoretically

304

// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however

305

// that would make it more difficult to rematerialize.

Craig Topper

e00bffb

2016-01-05 07:44:14 +0000

[diff] [blame]

306

let isReMaterializable = 1, isAsCheapAsAMove = 1,

307

isPseudo = 1, hasSideEffects = 0 in

308

def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

309

310

// This 64-bit pseudo-move can be used for both a 64-bit constant that is

Sanjay Patel

85030aa

2015-10-13 16:23:00 +0000

[diff] [blame]

311

// actually the zero-extension of a 32-bit constant and for labels in the

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

312

// x86-64 small code model.

Sanjay Patel

85030aa

2015-10-13 16:23:00 +0000

[diff] [blame]

313

def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

314

315

let AddedComplexity = 1 in

316

def : Pat<(i64 mov64imm32:$src),

317

(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;

318

319

// Use sbb to materialize carry bit.

320

let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {

321

// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.

322

// However, Pat<> can't replicate the destination reg into the inputs of the

323

// result.

324

def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",

325

[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

326

def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",

327

[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

328

def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",

329

[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

330

def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",

331

[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

} // isCodeGenOnly

def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

336

(SETB_C16r)>;

337

def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

338

(SETB_C32r)>;

339

def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

340

(SETB_C64r)>;

341

342

def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

343

(SETB_C16r)>;

344

def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

345

(SETB_C32r)>;

346

def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

347

(SETB_C64r)>;

348

349

// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and

350

// will be eliminated and that the sbb can be extended up to a wider type. When

351

// this happens, it is great. However, if we are left with an 8-bit sbb and an

352

// and, we might as well just match it as a setb.

353

def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),

354

(SETBr)>;

355

356

// (add OP, SETB) -> (adc OP, 0)

357

def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),

358

(ADC8ri GR8:$op, 0)>;

359

def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),

360

(ADC32ri8 GR32:$op, 0)>;

361

def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),

362

(ADC64ri8 GR64:$op, 0)>;

363

364

// (sub OP, SETB) -> (sbb OP, 0)

365

def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

366

(SBB8ri GR8:$op, 0)>;

367

def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

368

(SBB32ri8 GR32:$op, 0)>;

369

def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

370

(SBB64ri8 GR64:$op, 0)>;

371

372

// (sub OP, SETCC_CARRY) -> (adc OP, 0)

373

def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),

374

(ADC8ri GR8:$op, 0)>;

375

def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),

376

(ADC32ri8 GR32:$op, 0)>;

377

def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),

378

(ADC64ri8 GR64:$op, 0)>;

379

380

//===----------------------------------------------------------------------===//

381

// String Pseudo Instructions

382

//

383

let SchedRW = [WriteMicrocoded] in {

384

let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {

385

def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",

386

[(X86rep_movs i8)], IIC_REP_MOVS>, REP,

387

Requires<[Not64BitMode]>;

388

def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",

389

[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,

390

Requires<[Not64BitMode]>;

391

def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",

392

[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,

393

Requires<[Not64BitMode]>;

394

}

395

396

let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {

397

def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",

398

[(X86rep_movs i8)], IIC_REP_MOVS>, REP,

399

Requires<[In64BitMode]>;

400

def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",

401

[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,

402

Requires<[In64BitMode]>;

403

def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",

404

[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,

405

Requires<[In64BitMode]>;

406

def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",

407

[(X86rep_movs i64)], IIC_REP_MOVS>, REP,

408

Requires<[In64BitMode]>;

409

}

410

411

// FIXME: Should use "(X86rep_stos AL)" as the pattern.

412

let Defs = [ECX,EDI], isCodeGenOnly = 1 in {

413

let Uses = [AL,ECX,EDI] in

414

def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",

415

[(X86rep_stos i8)], IIC_REP_STOS>, REP,

416

Requires<[Not64BitMode]>;

417

let Uses = [AX,ECX,EDI] in

418

def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",

419

[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,

420

Requires<[Not64BitMode]>;

421

let Uses = [EAX,ECX,EDI] in

422

def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",

423

[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,

424

Requires<[Not64BitMode]>;

425

}

426

427

let Defs = [RCX,RDI], isCodeGenOnly = 1 in {

428

let Uses = [AL,RCX,RDI] in

429

def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",

430

[(X86rep_stos i8)], IIC_REP_STOS>, REP,

431

Requires<[In64BitMode]>;

432

let Uses = [AX,RCX,RDI] in

433

def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",

434

[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,

435

Requires<[In64BitMode]>;

436

let Uses = [RAX,RCX,RDI] in

437

def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",

438

[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,

439

Requires<[In64BitMode]>;

440

441

let Uses = [RAX,RCX,RDI] in

442

def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",

443

[(X86rep_stos i64)], IIC_REP_STOS>, REP,

444

Requires<[In64BitMode]>;

}

} // SchedRW

//===----------------------------------------------------------------------===//

449

// Thread Local Storage Instructions

//

// ELF TLS Support

// All calls clobber the non-callee saved registers. ESP is marked as

454

// a use to prevent stack-pointer assignments that appear immediately

455

// before calls from potentially appearing dead.

456

let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,

457

ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,

458

MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,

459

XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,

460

XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],

Davide Italiano

228978c

2016-02-20 00:44:47 +0000

[diff] [blame]

461

usesCustomInserter = 1, Uses = [ESP] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

462

def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

463

"# TLS_addr32",

464

[(X86tlsaddr tls32addr:$sym)]>,

465

Requires<[Not64BitMode]>;

466

def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

467

"# TLS_base_addr32",

468

[(X86tlsbaseaddr tls32baseaddr:$sym)]>,

469

Requires<[Not64BitMode]>;

470

}

471

472

// All calls clobber the non-callee saved registers. RSP is marked as

473

// a use to prevent stack-pointer assignments that appear immediately

474

// before calls from potentially appearing dead.

475

let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,

476

FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,

477

ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,

478

MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,

479

XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,

480

XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],

Davide Italiano

228978c

2016-02-20 00:44:47 +0000

[diff] [blame]

481

usesCustomInserter = 1, Uses = [RSP] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

482

def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

483

"# TLS_addr64",

484

[(X86tlsaddr tls64addr:$sym)]>,

485

Requires<[In64BitMode]>;

486

def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

487

"# TLS_base_addr64",

488

[(X86tlsbaseaddr tls64baseaddr:$sym)]>,

489

Requires<[In64BitMode]>;

490

}

491

492

// Darwin TLS Support

493

// For i386, the address of the thunk is passed on the stack, on return the

494

// address of the variable is in %eax. %ecx is trashed during the function

495

// call. All other registers are preserved.

496

let Defs = [EAX, ECX, EFLAGS],

497

Uses = [ESP],

498

usesCustomInserter = 1 in

499

def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

500

"# TLSCall_32",

501

[(X86TLSCall addr:$sym)]>,

502

Requires<[Not64BitMode]>;

503

Quentin Colombet

d6dbec4

2016-04-27 21:37:37 +0000

[diff] [blame]

504

// For x86_64, the address of the thunk is passed in %rdi, but the

505

// pseudo directly use the symbol, so do not add an implicit use of

506

// %rdi. The lowering will do the right thing with RDI.

507

// On return the address of the variable is in %rax. All other

508

// registers are preserved.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

509

let Defs = [RAX, EFLAGS],

Quentin Colombet

d6dbec4

2016-04-27 21:37:37 +0000

[diff] [blame]

510

Uses = [RSP],

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

511

usesCustomInserter = 1 in

512

def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

513

"# TLSCall_64",

514

[(X86TLSCall addr:$sym)]>,

515

Requires<[In64BitMode]>;

516

517

518

//===----------------------------------------------------------------------===//

519

// Conditional Move Pseudo Instructions

520

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

521

// CMOV* - Used to implement the SELECT DAG operation. Expanded after

522

// instruction selection into a branch sequence.

523

multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {

524

def CMOV#NAME : I<0, Pseudo,

525

(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),

526

"#CMOV_"#NAME#" PSEUDO!",

527

[(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,

EFLAGS)))]>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

531

let usesCustomInserter = 1, Uses = [EFLAGS] in {

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

532

// X86 doesn't have 8-bit conditional moves. Use a customInserter to

533

// emit control flow. An alternative to this is to mark i8 SELECT as Promote,

534

// however that requires promoting the operands, and can induce additional

535

// i8 register pressure.

536

defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

537

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

538

let Predicates = [NoCMov] in {

539

defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;

540

defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;

541

} // Predicates = [NoCMov]

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

542

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

543

// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no

544

// SSE1/SSE2.

545

let Predicates = [FPStackf32] in

546

defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

547

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

548

let Predicates = [FPStackf64] in

549

defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;

550

551

defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;

552

553

defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;

554

defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;

Chih-Hung Hsieh

7993e18

2015-12-14 22:08:36 +0000

[diff] [blame]

555

defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

556

defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;

557

defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;

558

defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;

559

defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;

560

defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;

561

defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;

562

defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;

563

defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;

564

defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;

Elena Demikhovsky

c1ac5d7

2015-05-12 09:36:52 +0000

[diff] [blame]

565

defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;

566

defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;

567

defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;

568

defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

569

} // usesCustomInserter = 1, Uses = [EFLAGS]

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

570

571

//===----------------------------------------------------------------------===//

572

// Normal-Instructions-With-Lock-Prefix Pseudo Instructions

573

//===----------------------------------------------------------------------===//

574

575

// FIXME: Use normal instructions and add lock prefix dynamically.

// Memory barriers

// TODO: Get this to fold the constant into the instruction.

580

let isCodeGenOnly = 1, Defs = [EFLAGS] in

581

def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),

Craig Topper

9583f51

2016-01-05 07:44:11 +0000

[diff] [blame]

582

"or{l}\t{$zero, $dst|$dst, $zero}", [],

583

IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

584

Sched<[WriteALULd, WriteRMW]>;

585

586

let hasSideEffects = 1 in

587

def Int_MemBarrier : I<0, Pseudo, (outs), (ins),

588

"#MEMBARRIER",

589

[(X86MemBarrier)]>, Sched<[WriteLoad]>;

590

591

// RegOpc corresponds to the mr version of the instruction

592

// ImmOpc corresponds to the mi version of the instruction

593

// ImmOpc8 corresponds to the mi8 version of the instruction

594

// ImmMod corresponds to the instruction format of the mi and mi8 versions

595

multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

596

Format ImmMod, SDPatternOperator Op, string mnemonic> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

597

let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,

598

SchedRW = [WriteALULd, WriteRMW] in {

599

600

def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

601

RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },

602

MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),

603

!strconcat(mnemonic, "{b}\t",

604

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

605

[(set EFLAGS, (Op addr:$dst, GR8:$src2))],

606

IIC_ALU_NONMEM>, LOCK;

607

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

608

def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

609

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

610

MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),

611

!strconcat(mnemonic, "{w}\t",

612

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

613

[(set EFLAGS, (Op addr:$dst, GR16:$src2))],

614

IIC_ALU_NONMEM>, OpSize16, LOCK;

615

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

616

def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

617

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

618

MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),

619

!strconcat(mnemonic, "{l}\t",

620

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

621

[(set EFLAGS, (Op addr:$dst, GR32:$src2))],

622

IIC_ALU_NONMEM>, OpSize32, LOCK;

623

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

624

def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

625

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

626

MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),

627

!strconcat(mnemonic, "{q}\t",

628

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

629

[(set EFLAGS, (Op addr:$dst, GR64:$src2))],

630

IIC_ALU_NONMEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

631

632

def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

633

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },

634

ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),

635

!strconcat(mnemonic, "{b}\t",

636

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

637

[(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))],

638

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

639

640

def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

641

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

642

ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),

643

!strconcat(mnemonic, "{w}\t",

644

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

645

[(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))],

646

IIC_ALU_MEM>, OpSize16, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

647

648

def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

649

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

650

ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),

651

!strconcat(mnemonic, "{l}\t",

652

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

653

[(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))],

654

IIC_ALU_MEM>, OpSize32, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

655

656

def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

657

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

658

ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),

659

!strconcat(mnemonic, "{q}\t",

660

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

661

[(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))],

662

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

663

664

def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

665

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

666

ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),

667

!strconcat(mnemonic, "{w}\t",

668

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

669

[(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))],

670

IIC_ALU_MEM>, OpSize16, LOCK;

671

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

672

def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

673

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

674

ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),

675

!strconcat(mnemonic, "{l}\t",

676

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

677

[(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))],

678

IIC_ALU_MEM>, OpSize32, LOCK;

679

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

680

def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

681

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

682

ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),

683

!strconcat(mnemonic, "{q}\t",

684

"{$src2, $dst|$dst, $src2}"),

Craig Topper

7b5925a

2016-05-02 05:44:21 +0000

[diff] [blame]

685

[(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))],

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

686

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}

}

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

692

defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">;

693

defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">;

694

defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;

695

defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;

696

defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

697

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

698

multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

699

int Increment, string mnemonic> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

700

let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

701

SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

702

def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),

703

!strconcat(mnemonic, "{b}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

704

[(set EFLAGS, (X86lock_add addr:$dst, (i8 Increment)))],

705

IIC_UNARY_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

706

def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),

707

!strconcat(mnemonic, "{w}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

708

[(set EFLAGS, (X86lock_add addr:$dst, (i16 Increment)))],

709

IIC_UNARY_MEM>, OpSize16, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

710

def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),

711

!strconcat(mnemonic, "{l}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

712

[(set EFLAGS, (X86lock_add addr:$dst, (i32 Increment)))],

713

IIC_UNARY_MEM>, OpSize32, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

714

def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),

715

!strconcat(mnemonic, "{q}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

716

[(set EFLAGS, (X86lock_add addr:$dst, (i64 Increment)))],

717

IIC_UNARY_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}

}

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

721

defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, 1, "inc">;

722

defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

723

724

// Atomic compare and swap.

725

multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,

726

SDPatternOperator frag, X86MemOperand x86memop,

727

InstrItinClass itin> {

Nikolai Bozhenov

3a8d108

2016-11-24 13:23:35 +0000

[diff] [blame]

728

let isCodeGenOnly = 1, usesCustomInserter = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

729

def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),

730

!strconcat(mnemonic, "\t$ptr"),

731

[(frag addr:$ptr)], itin>, TB, LOCK;

}

}

multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,

736

string mnemonic, SDPatternOperator frag,

737

InstrItinClass itin8, InstrItinClass itin> {

738

let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {

739

let Defs = [AL, EFLAGS], Uses = [AL] in

740

def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),

741

!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),

742

[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;

743

let Defs = [AX, EFLAGS], Uses = [AX] in

744

def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),

745

!strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),

746

[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;

747

let Defs = [EAX, EFLAGS], Uses = [EAX] in

748

def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),

749

!strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),

750

[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;

751

let Defs = [RAX, EFLAGS], Uses = [RAX] in

752

def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),

753

!strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),

754

[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;

}

}

let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],

759

SchedRW = [WriteALULd, WriteRMW] in {

760

defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",

X86cas8, i64mem,

IIC_CMPX_LOCK_8B>;

}

Quentin Colombet

2016-03-12 02:25:27 +0000

[diff] [blame]

765

// This pseudo must be used when the frame uses RBX as

766

// the base pointer. Indeed, in such situation RBX is a reserved

767

// register and the register allocator will ignore any use/def of

768

// it. In other words, the register will not fix the clobbering of

769

// RBX that will happen when setting the arguments for the instrucion.

770

//

771

// Unlike the actual related instuction, we mark that this one

772

// defines EBX (instead of using EBX).

773

// The rationale is that we will define RBX during the expansion of

774

// the pseudo. The argument feeding EBX is ebx_input.

775

//

776

// The additional argument, $ebx_save, is a temporary register used to

Simon Pilgrim

68168d1

2017-03-30 12:59:53 +0000

[diff] [blame]

777

// save the value of RBX across the actual instruction.

Quentin Colombet

cf9732b

2016-03-12 02:25:27 +0000

[diff] [blame]

778

//

779

// To make sure the register assigned to $ebx_save does not interfere with

780

// the definition of the actual instruction, we use a definition $dst which

Simon Pilgrim

68168d1

2017-03-30 12:59:53 +0000

[diff] [blame]

781

// is tied to $rbx_save. That way, the live-range of $rbx_save spans across

Quentin Colombet

cf9732b

2016-03-12 02:25:27 +0000

[diff] [blame]

782

// the instruction and we are sure we will have a valid register to restore

783

// the value of RBX.

784

let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],

785

SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1,

786

Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {

787

def LCMPXCHG8B_SAVE_EBX :

788

I<0, Pseudo, (outs GR32:$dst),

789

(ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),

790

!strconcat("cmpxchg8b", "\t$ptr"),

791

[(set GR32:$dst, (X86cas8save_ebx addr:$ptr, GR32:$ebx_input,

GR32:$ebx_save))],

IIC_CMPX_LOCK_8B>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

797

let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],

798

Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {

799

defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",

800

X86cas16, i128mem,

801

IIC_CMPX_LOCK_16B>, REX_W;

802

}

803

Quentin Colombet

cf9732b

2016-03-12 02:25:27 +0000

[diff] [blame]

804

// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.

805

let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],

806

Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW],

807

isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",

808

usesCustomInserter = 1 in {

809

def LCMPXCHG16B_SAVE_RBX :

810

I<0, Pseudo, (outs GR64:$dst),

811

(ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save),

812

!strconcat("cmpxchg16b", "\t$ptr"),

813

[(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input,

GR64:$rbx_save))],

IIC_CMPX_LOCK_16B>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

818

defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",

819

X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;

820

821

// Atomic exchange and add

822

multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,

823

string frag,

824

InstrItinClass itin8, InstrItinClass itin> {

825

let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,

826

SchedRW = [WriteALULd, WriteRMW] in {

827

def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),

828

(ins GR8:$val, i8mem:$ptr),

829

!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),

830

[(set GR8:$dst,

831

(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],

832

itin8>;

833

def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),

834

(ins GR16:$val, i16mem:$ptr),

835

!strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),

836

[(set

837

GR16:$dst,

838

(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],

839

itin>, OpSize16;

840

def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),

841

(ins GR32:$val, i32mem:$ptr),

842

!strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),

843

[(set

844

GR32:$dst,

845

(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],

846

itin>, OpSize32;

847

def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),

848

(ins GR64:$val, i64mem:$ptr),

849

!strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),

850

[(set

851

GR64:$dst,

852

(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],

itin>;

}

}

defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",

858

IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,

859

TB, LOCK;

860

861

/* The following multiclass tries to make sure that in code like

862

* x.store (immediate op x.load(acquire), release)

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

863

* and

864

* x.store (register op x.load(acquire), release)

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

865

* an operation directly on memory is generated instead of wasting a register.

866

* It is not automatic as atomic_store/load are only lowered to MOV instructions

867

* extremely late to prevent them from being accidentally reordered in the backend

868

* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)

869

*/

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

870

multiclass RELEASE_BINOP_MI<SDNode op> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

871

def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

872

"#BINOP "#NAME#"8mi PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

873

[(atomic_store_8 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

874

(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

875

def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),

876

"#BINOP "#NAME#"8mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

877

[(atomic_store_8 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

878

(atomic_load_8 addr:$dst), GR8:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

879

// NAME#16 is not generated as 16-bit arithmetic instructions are considered

880

// costly and avoided as far as possible by this backend anyway

881

def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

882

"#BINOP "#NAME#"32mi PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

883

[(atomic_store_32 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

884

(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

885

def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),

886

"#BINOP "#NAME#"32mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

887

[(atomic_store_32 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

888

(atomic_load_32 addr:$dst), GR32:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

889

def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

890

"#BINOP "#NAME#"64mi32 PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

891

[(atomic_store_64 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

892

(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

893

def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),

894

"#BINOP "#NAME#"64mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

895

[(atomic_store_64 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

896

(atomic_load_64 addr:$dst), GR64:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

897

}

JF Bastien

986ed68

2015-10-13 00:28:47 +0000

[diff] [blame]

898

let Defs = [EFLAGS] in {

899

defm RELEASE_ADD : RELEASE_BINOP_MI<add>;

900

defm RELEASE_AND : RELEASE_BINOP_MI<and>;

901

defm RELEASE_OR : RELEASE_BINOP_MI<or>;

902

defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;

903

// Note: we don't deal with sub, because substractions of constants are

904

// optimized into additions before this code can run.

905

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

906

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

907

// Same as above, but for floating-point.

908

// FIXME: imm version.

909

// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.

910

// FIXME: This could also handle SIMD operations with *ps and *pd instructions.

911

let usesCustomInserter = 1 in {

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

912

multiclass RELEASE_FP_BINOP_MI<SDNode op> {

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

913

def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),

914

"#BINOP "#NAME#"32mr PSEUDO!",

915

[(atomic_store_32 addr:$dst,

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

916

(i32 (bitconvert (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

917

(f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),

918

FR32:$src))))]>, Requires<[HasSSE1]>;

919

def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),

920

"#BINOP "#NAME#"64mr PSEUDO!",

921

[(atomic_store_64 addr:$dst,

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

922

(i64 (bitconvert (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

923

(f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),

924

FR64:$src))))]>, Requires<[HasSSE2]>;

925

}

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

926

defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

927

// FIXME: Add fsub, fmul, fdiv, ...

928

}

929

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

930

multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {

931

def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

932

"#UNOP "#NAME#"8m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

933

[(atomic_store_8 addr:$dst, dag8)]>;

934

def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

935

"#UNOP "#NAME#"16m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

936

[(atomic_store_16 addr:$dst, dag16)]>;

937

def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

938

"#UNOP "#NAME#"32m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

939

[(atomic_store_32 addr:$dst, dag32)]>;

940

def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

941

"#UNOP "#NAME#"64m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

942

[(atomic_store_64 addr:$dst, dag64)]>;

943

}

944

JF Bastien

2cdd5e4

2015-10-15 18:24:52 +0000

[diff] [blame]

945

let Defs = [EFLAGS] in {

946

defm RELEASE_INC : RELEASE_UNOP<

947

(add (atomic_load_8 addr:$dst), (i8 1)),

948

(add (atomic_load_16 addr:$dst), (i16 1)),

949

(add (atomic_load_32 addr:$dst), (i32 1)),

950

(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;

951

defm RELEASE_DEC : RELEASE_UNOP<

952

(add (atomic_load_8 addr:$dst), (i8 -1)),

953

(add (atomic_load_16 addr:$dst), (i16 -1)),

954

(add (atomic_load_32 addr:$dst), (i32 -1)),

955

(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;

956

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

957

/*

958

TODO: These don't work because the type inference of TableGen fails.

959

TODO: find a way to fix it.

JF Bastien

2cdd5e4

2015-10-15 18:24:52 +0000

[diff] [blame]

960

let Defs = [EFLAGS] in {

961

defm RELEASE_NEG : RELEASE_UNOP<

962

(ineg (atomic_load_8 addr:$dst)),

963

(ineg (atomic_load_16 addr:$dst)),

964

(ineg (atomic_load_32 addr:$dst)),

965

(ineg (atomic_load_64 addr:$dst))>;

966

}

967

// NOT doesn't set flags.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

968

defm RELEASE_NOT : RELEASE_UNOP<

969

(not (atomic_load_8 addr:$dst)),

970

(not (atomic_load_16 addr:$dst)),

971

(not (atomic_load_32 addr:$dst)),

972

(not (atomic_load_64 addr:$dst))>;

973

*/

974

975

def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

976

"#RELEASE_MOV8mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

977

[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;

978

def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

979

"#RELEASE_MOV16mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

980

[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;

981

def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

982

"#RELEASE_MOV32mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

983

[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;

984

def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

985

"#RELEASE_MOV64mi32 PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

986

[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;

987

988

def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

989

"#RELEASE_MOV8mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

990

[(atomic_store_8 addr:$dst, GR8 :$src)]>;

991

def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

992

"#RELEASE_MOV16mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

993

[(atomic_store_16 addr:$dst, GR16:$src)]>;

994

def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

995

"#RELEASE_MOV32mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

996

[(atomic_store_32 addr:$dst, GR32:$src)]>;

997

def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

998

"#RELEASE_MOV64mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

999

[(atomic_store_64 addr:$dst, GR64:$src)]>;

1000

1001

def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1002

"#ACQUIRE_MOV8rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1003

[(set GR8:$dst, (atomic_load_8 addr:$src))]>;

1004

def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1005

"#ACQUIRE_MOV16rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1006

[(set GR16:$dst, (atomic_load_16 addr:$src))]>;

1007

def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1008

"#ACQUIRE_MOV32rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1009

[(set GR32:$dst, (atomic_load_32 addr:$src))]>;

1010

def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

1011

"#ACQUIRE_MOV64rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1012

[(set GR64:$dst, (atomic_load_64 addr:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1013

1014

//===----------------------------------------------------------------------===//

1015

// DAG Pattern Matching Rules

1016

//===----------------------------------------------------------------------===//

1017

Hans Wennborg

5f916d3

2016-03-25 18:11:31 +0000

[diff] [blame]

1018

// Use AND/OR to store 0/-1 in memory when optimizing for minsize. This saves

1019

// binary size compared to a regular MOV, but it introduces an unnecessary

1020

// load, so is not suitable for regular or optsize functions.

1021

let Predicates = [OptForMinSize] in {

1022

def : Pat<(store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;

1023

def : Pat<(store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;

1024

def : Pat<(store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;

1025

def : Pat<(store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;

1026

def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;

1027

def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;

1028

}

1029

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1030

// In kernel code model, we can get the address of a label

1031

// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of

1032

// the MOV64ri32 should accept these.

1033

def : Pat<(i64 (X86Wrapper tconstpool :$dst)),

1034

(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;

1035

def : Pat<(i64 (X86Wrapper tjumptable :$dst)),

1036

(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;

1037

def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),

1038

(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;

1039

def : Pat<(i64 (X86Wrapper texternalsym:$dst)),

1040

(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;

Rafael Espindola

36b718f

2015-06-22 17:46:53 +0000

[diff] [blame]

1041

def : Pat<(i64 (X86Wrapper mcsym:$dst)),

1042

(MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1043

def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),

1044

(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;

1045

1046

// If we have small model and -static mode, it is safe to store global addresses

1047

// directly as immediates. FIXME: This is really a hack, the 'imm' predicate

1048

// for MOV64mi32 should handle this sort of thing.

1049

def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),

1050

(MOV64mi32 addr:$dst, tconstpool:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame]

1051

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1052

def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),

1053

(MOV64mi32 addr:$dst, tjumptable:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame]

1054

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1055

def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),

1056

(MOV64mi32 addr:$dst, tglobaladdr:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame]

1057

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1058

def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),

1059

(MOV64mi32 addr:$dst, texternalsym:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame]

1060

Requires<[NearData, IsNotPIC]>;

Rafael Espindola

36b718f

2015-06-22 17:46:53 +0000

[diff] [blame]

1061

def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),

1062

(MOV64mi32 addr:$dst, mcsym:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame]

1063

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1064

def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),

1065

(MOV64mi32 addr:$dst, tblockaddress:$src)>,

Rafael Espindola

2016-06-27 21:09:14 +0000

[diff] [blame]

1066

Requires<[NearData, IsNotPIC]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1067

Rafael Espindola

36b718f

2015-06-22 17:46:53 +0000

[diff] [blame]

1068

def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;

1069

def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

// Calls

// tls has some funny stuff here...

1074

// This corresponds to movabs $foo@tpoff, %rax

1075

def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),

1076

(MOV64ri32 tglobaltlsaddr :$dst)>;

1077

// This corresponds to add $foo@tpoff, %rax

1078

def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),

1079

(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;

1080

1081

1082

// Direct PC relative function call for small code model. 32-bit displacement

1083

// sign extended to 64-bit.

1084

def : Pat<(X86call (i64 tglobaladdr:$dst)),

1085

(CALL64pcrel32 tglobaladdr:$dst)>;

1086

def : Pat<(X86call (i64 texternalsym:$dst)),

1087

(CALL64pcrel32 texternalsym:$dst)>;

1088

1089

// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they

1090

// can never use callee-saved registers. That is the purpose of the GR64_TC

1091

// register classes.

1092

//

1093

// The only volatile register that is never used by the calling convention is

1094

// %r11. This happens when calling a vararg function with 6 arguments.

1095

//

1096

// Match an X86tcret that uses less than 7 volatile registers.

1097

def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),

1098

(X86tcret node:$ptr, node:$off), [{

1099

// X86tcret args: (*chain, ptr, imm, regs..., glue)

1100

unsigned NumRegs = 0;

1101

for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)

1102

if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)

return false;

return true;

}]>;

def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),

1108

(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,

1109

Requires<[Not64BitMode]>;

1110

1111

// FIXME: This is disabled for 32-bit PIC mode because the global base

1112

// register which is part of the address mode may be assigned a

1113

// callee-saved register.

1114

def : Pat<(X86tcret (load addr:$dst), imm:$off),

1115

(TCRETURNmi addr:$dst, imm:$off)>,

1116

Requires<[Not64BitMode, IsNotPIC]>;

1117

1118

def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),

1119

(TCRETURNdi tglobaladdr:$dst, imm:$off)>,

1120

Requires<[NotLP64]>;

1121

1122

def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),

1123

(TCRETURNdi texternalsym:$dst, imm:$off)>,

1124

Requires<[NotLP64]>;

1125

1126

def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),

1127

(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,

1128

Requires<[In64BitMode]>;

1129

1130

// Don't fold loads into X86tcret requiring more than 6 regs.

1131

// There wouldn't be enough scratch registers for base+index.

1132

def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),

1133

(TCRETURNmi64 addr:$dst, imm:$off)>,

1134

Requires<[In64BitMode]>;

1135

1136

def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),

1137

(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,

1138

Requires<[IsLP64]>;

1139

1140

def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),

1141

(TCRETURNdi64 texternalsym:$dst, imm:$off)>,

1142

Requires<[IsLP64]>;

1143

1144

// Normal calls, with various flavors of addresses.

1145

def : Pat<(X86call (i32 tglobaladdr:$dst)),

1146

(CALLpcrel32 tglobaladdr:$dst)>;

1147

def : Pat<(X86call (i32 texternalsym:$dst)),

1148

(CALLpcrel32 texternalsym:$dst)>;

1149

def : Pat<(X86call (i32 imm:$dst)),

1150

(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;

// Comparisons.

// TEST R,R is smaller than CMP R,0

1155

def : Pat<(X86cmp GR8:$src1, 0),

1156

(TEST8rr GR8:$src1, GR8:$src1)>;

1157

def : Pat<(X86cmp GR16:$src1, 0),

1158

(TEST16rr GR16:$src1, GR16:$src1)>;

1159

def : Pat<(X86cmp GR32:$src1, 0),

1160

(TEST32rr GR32:$src1, GR32:$src1)>;

1161

def : Pat<(X86cmp GR64:$src1, 0),

1162

(TEST64rr GR64:$src1, GR64:$src1)>;

1163

1164

// Conditional moves with folded loads with operands swapped and conditions

1165

// inverted.

1166

multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,

1167

Instruction Inst64> {

1168

let Predicates = [HasCMov] in {

1169

def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),

1170

(Inst16 GR16:$src2, addr:$src1)>;

1171

def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),

1172

(Inst32 GR32:$src2, addr:$src1)>;

1173

def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),

1174

(Inst64 GR64:$src2, addr:$src1)>;

}

}

defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;

1179

defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;

1180

defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;

1181

defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;

1182

defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;

1183

defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;

1184

defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;

1185

defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;

1186

defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;

1187

defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;

1188

defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;

1189

defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;

1190

defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;

1191

defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;

1192

defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;

1193

defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;

1194

1195

// zextload bool -> zextload byte

Elena Demikhovsky

e5bbca6

2016-02-25 07:05:12 +0000

[diff] [blame]

1196

// i1 stored in one byte in zero-extended form.

1197

// Upper bits cleanup should be executed before Store.

1198

def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;

1199

def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;

1200

def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1201

def : Pat<(zextloadi64i1 addr:$src),

Elena Demikhovsky

e5bbca6

2016-02-25 07:05:12 +0000

[diff] [blame]

1202

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1203

1204

// extload bool -> extload byte

1205

// When extloading from 16-bit and smaller memory locations into 64-bit

1206

// registers, use zero-extending loads so that the entire 64-bit register is

1207

// defined, avoiding partial-register updates.

1208

1209

def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;

1210

def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;

1211

def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;

1212

def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;

1213

def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;

1214

def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;

1215

1216

// For other extloads, use subregs, since the high contents of the register are

1217

// defined after an extload.

1218

def : Pat<(extloadi64i1 addr:$src),

1219

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

1220

def : Pat<(extloadi64i8 addr:$src),

1221

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

1222

def : Pat<(extloadi64i16 addr:$src),

1223

(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;

1224

def : Pat<(extloadi64i32 addr:$src),

1225

(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;

1226

1227

// anyext. Define these to do an explicit zero-extend to

1228

// avoid partial-register updates.

1229

def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG

1230

(MOVZX32rr8 GR8 :$src), sub_16bit)>;

1231

def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;

1232

1233

// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.

1234

def : Pat<(i32 (anyext GR16:$src)),

1235

(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;

1236

1237

def : Pat<(i64 (anyext GR8 :$src)),

1238

(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;

1239

def : Pat<(i64 (anyext GR16:$src)),

1240

(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;

1241

def : Pat<(i64 (anyext GR32:$src)),

1242

(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;

1243

1244

1245

// Any instruction that defines a 32-bit result leaves the high half of the

1246

// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may

David L Kreitzer

8b959e5

2016-07-29 15:09:54 +0000

[diff] [blame]

1247

// be copying from a truncate. Any other 32-bit operation will zero-extend

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1248

// up to 64 bits.

1249

def def32 : PatLeaf<(i32 GR32:$src), [{

1250

return N->getOpcode() != ISD::TRUNCATE &&

1251

N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&

1252

N->getOpcode() != ISD::CopyFromReg &&

David L Kreitzer

8b959e5

2016-07-29 15:09:54 +0000

[diff] [blame]

1253

N->getOpcode() != ISD::AssertSext;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1254

}]>;

1255

1256

// In the case of a 32-bit def that is known to implicitly zero-extend,

1257

// we can use a SUBREG_TO_REG.

1258

def : Pat<(i64 (zext def32:$src)),

1259

(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;

1260

1261

//===----------------------------------------------------------------------===//

1262

// Pattern match OR as ADD

1263

//===----------------------------------------------------------------------===//

1264

1265

// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be

1266

// 3-addressified into an LEA instruction to avoid copies. However, we also

1267

// want to finally emit these instructions as an or at the end of the code

1268

// generator to make the generated code easier to read. To do this, we select

1269

// into "disjoint bits" pseudo ops.

1270

1271

// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.

1272

def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{

1273

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))

1274

return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());

1275

Craig Topper

d0af7e8

2017-04-28 05:31:46 +0000

[diff] [blame]

1276

KnownBits Known0;

1277

CurDAG->computeKnownBits(N->getOperand(0), Known0, 0);

1278

KnownBits Known1;

1279

CurDAG->computeKnownBits(N->getOperand(1), Known1, 0);

1280

return (~Known0.Zero & ~Known1.Zero) == 0;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}]>;

// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.

1285

// Try this before the selecting to OR.

1286

let AddedComplexity = 5, SchedRW = [WriteALU] in {

1287

1288

let isConvertibleToThreeAddress = 1,

1289

Constraints = "$src1 = $dst", Defs = [EFLAGS] in {

1290

let isCommutable = 1 in {

1291

def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),

1292

"", // orw/addw REG, REG

1293

[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;

1294

def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),

1295

"", // orl/addl REG, REG

1296

[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;

1297

def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),

1298

"", // orq/addq REG, REG

1299

[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;

1300

} // isCommutable

1301

1302

// NOTE: These are order specific, we want the ri8 forms to be listed

1303

// first so that they are slightly preferred to the ri forms.

1304

1305

def ADD16ri8_DB : I<0, Pseudo,

1306

(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),

1307

"", // orw/addw REG, imm8

1308

[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;

1309

def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),

1310

"", // orw/addw REG, imm

1311

[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;

1312

1313

def ADD32ri8_DB : I<0, Pseudo,

1314

(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),

1315

"", // orl/addl REG, imm8

1316

[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;

1317

def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),

1318

"", // orl/addl REG, imm

1319

[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;

1320

1321

1322

def ADD64ri8_DB : I<0, Pseudo,

1323

(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),

1324

"", // orq/addq REG, imm8

1325

[(set GR64:$dst, (or_is_add GR64:$src1,

1326

i64immSExt8:$src2))]>;

1327

def ADD64ri32_DB : I<0, Pseudo,

1328

(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),

1329

"", // orq/addq REG, imm

1330

[(set GR64:$dst, (or_is_add GR64:$src1,

1331

i64immSExt32:$src2))]>;

1332

}

1333

} // AddedComplexity, SchedRW

1334

1335

1336

//===----------------------------------------------------------------------===//

1337

// Some peepholes

1338

//===----------------------------------------------------------------------===//

1339

1340

// Odd encoding trick: -128 fits into an 8-bit immediate field while

1341

// +128 doesn't, so in this special case use a sub instead of an add.

1342

def : Pat<(add GR16:$src1, 128),

1343

(SUB16ri8 GR16:$src1, -128)>;

1344

def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),

1345

(SUB16mi8 addr:$dst, -128)>;

1346

1347

def : Pat<(add GR32:$src1, 128),

1348

(SUB32ri8 GR32:$src1, -128)>;

1349

def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),

1350

(SUB32mi8 addr:$dst, -128)>;

1351

1352

def : Pat<(add GR64:$src1, 128),

1353

(SUB64ri8 GR64:$src1, -128)>;

1354

def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),

1355

(SUB64mi8 addr:$dst, -128)>;

1356

1357

// The same trick applies for 32-bit immediate fields in 64-bit

1358

// instructions.

1359

def : Pat<(add GR64:$src1, 0x0000000080000000),

1360

(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;

Eli Friedman

17e8ea1

2016-07-14 05:48:25 +0000

[diff] [blame]

1361

def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1362

(SUB64mi32 addr:$dst, 0xffffffff80000000)>;

1363

1364

// To avoid needing to materialize an immediate in a register, use a 32-bit and

1365

// with implicit zero-extension instead of a 64-bit and if the immediate has at

1366

// least 32 bits of leading zeros. If in addition the last 32 bits can be

1367

// represented with a sign extension of a 8 bit constant, use that.

Craig Topper

3d44178

2015-04-04 02:31:43 +0000

[diff] [blame]

1368

// This can also reduce instruction size by eliminating the need for the REX

1369

// prefix.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1370

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1371

// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.

1372

let AddedComplexity = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1373

def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),

(SUBREG_TO_REG

(i64 0),

(AND32ri8

(EXTRACT_SUBREG GR64:$src, sub_32bit),

1378

(i32 (GetLo8XForm imm:$imm))),

1379

sub_32bit)>;

1380

1381

def : Pat<(and GR64:$src, i64immZExt32:$imm),

(SUBREG_TO_REG

(i64 0),

(AND32ri

(EXTRACT_SUBREG GR64:$src, sub_32bit),

1386

(i32 (GetLo32XForm imm:$imm))),

1387

sub_32bit)>;

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1388

} // AddedComplexity = 1

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1389

1390

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1391

// AddedComplexity is needed due to the increased complexity on the

1392

// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all

1393

// the MOVZX patterns keeps thems together in DAGIsel tables.

1394

let AddedComplexity = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1395

// r & (2^16-1) ==> movz

1396

def : Pat<(and GR32:$src1, 0xffff),

1397

(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;

1398

// r & (2^8-1) ==> movz

1399

def : Pat<(and GR32:$src1, 0xff),

1400

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,

1401

GR32_ABCD)),

1402

sub_8bit))>,

1403

Requires<[Not64BitMode]>;

1404

// r & (2^8-1) ==> movz

1405

def : Pat<(and GR16:$src1, 0xff),

1406

(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG

1407

(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),

1408

sub_16bit)>,

1409

Requires<[Not64BitMode]>;

1410

1411

// r & (2^32-1) ==> movz

1412

def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),

1413

(SUBREG_TO_REG (i64 0),

1414

(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),

1415

sub_32bit)>;

1416

// r & (2^16-1) ==> movz

1417

def : Pat<(and GR64:$src, 0xffff),

1418

(SUBREG_TO_REG (i64 0),

1419

(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),

1420

sub_32bit)>;

1421

// r & (2^8-1) ==> movz

1422

def : Pat<(and GR64:$src, 0xff),

1423

(SUBREG_TO_REG (i64 0),

1424

(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),

1425

sub_32bit)>;

1426

// r & (2^8-1) ==> movz

1427

def : Pat<(and GR32:$src1, 0xff),

1428

(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,

1429

Requires<[In64BitMode]>;

1430

// r & (2^8-1) ==> movz

1431

def : Pat<(and GR16:$src1, 0xff),

1432

(EXTRACT_SUBREG (MOVZX32rr8 (i8

1433

(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,

1434

Requires<[In64BitMode]>;

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1435

} // AddedComplexity = 1

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1436

1437

1438

// sext_inreg patterns

1439

def : Pat<(sext_inreg GR32:$src, i16),

1440

(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;

1441

def : Pat<(sext_inreg GR32:$src, i8),

1442

(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1443

GR32_ABCD)),

1444

sub_8bit))>,

1445

Requires<[Not64BitMode]>;

1446

1447

def : Pat<(sext_inreg GR16:$src, i8),

1448

(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG

1449

(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),

1450

sub_16bit)>,

1451

Requires<[Not64BitMode]>;

1452

1453

def : Pat<(sext_inreg GR64:$src, i32),

1454

(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;

1455

def : Pat<(sext_inreg GR64:$src, i16),

1456

(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;

1457

def : Pat<(sext_inreg GR64:$src, i8),

1458

(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;

1459

def : Pat<(sext_inreg GR32:$src, i8),

1460

(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,

1461

Requires<[In64BitMode]>;

1462

def : Pat<(sext_inreg GR16:$src, i8),

1463

(EXTRACT_SUBREG (MOVSX32rr8

1464

(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,

1465

Requires<[In64BitMode]>;

1466

1467

// sext, sext_load, zext, zext_load

1468

def: Pat<(i16 (sext GR8:$src)),

1469

(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;

1470

def: Pat<(sextloadi16i8 addr:$src),

1471

(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;

1472

def: Pat<(i16 (zext GR8:$src)),

1473

(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;

1474

def: Pat<(zextloadi16i8 addr:$src),

1475

(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;

1476

1477

// trunc patterns

1478

def : Pat<(i16 (trunc GR32:$src)),

1479

(EXTRACT_SUBREG GR32:$src, sub_16bit)>;

1480

def : Pat<(i8 (trunc GR32:$src)),

1481

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1482

sub_8bit)>,

1483

Requires<[Not64BitMode]>;

1484

def : Pat<(i8 (trunc GR16:$src)),

1485

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1486

sub_8bit)>,

1487

Requires<[Not64BitMode]>;

1488

def : Pat<(i32 (trunc GR64:$src)),

1489

(EXTRACT_SUBREG GR64:$src, sub_32bit)>;

1490

def : Pat<(i16 (trunc GR64:$src)),

1491

(EXTRACT_SUBREG GR64:$src, sub_16bit)>;

1492

def : Pat<(i8 (trunc GR64:$src)),

1493

(EXTRACT_SUBREG GR64:$src, sub_8bit)>;

1494

def : Pat<(i8 (trunc GR32:$src)),

1495

(EXTRACT_SUBREG GR32:$src, sub_8bit)>,

1496

Requires<[In64BitMode]>;

1497

def : Pat<(i8 (trunc GR16:$src)),

1498

(EXTRACT_SUBREG GR16:$src, sub_8bit)>,

1499

Requires<[In64BitMode]>;

1500

1501

// h-register tricks

1502

def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),

1503

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1504

sub_8bit_hi)>,

1505

Requires<[Not64BitMode]>;

Kevin B. Smith

ed0b620

2016-05-31 22:00:12 +0000

[diff] [blame]

1506

def : Pat<(i8 (trunc (srl_su (i32 (anyext GR16:$src)), (i8 8)))),

1507

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1508

sub_8bit_hi)>,

1509

Requires<[Not64BitMode]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1510

def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),

1511

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1512

sub_8bit_hi)>,

1513

Requires<[Not64BitMode]>;

1514

def : Pat<(srl GR16:$src, (i8 8)),

1515

(EXTRACT_SUBREG

1516

(MOVZX32rr8

1517

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1518

sub_8bit_hi)),

1519

sub_16bit)>,

1520

Requires<[Not64BitMode]>;

1521

def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),

1522

(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,

1523

GR16_ABCD)),

1524

sub_8bit_hi))>,

1525

Requires<[Not64BitMode]>;

1526

def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),

1527

(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,

1528

GR16_ABCD)),

1529

sub_8bit_hi))>,

1530

Requires<[Not64BitMode]>;

1531

def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

1532

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1533

GR32_ABCD)),

1534

sub_8bit_hi))>,

1535

Requires<[Not64BitMode]>;

1536

def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),

1537

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1538

GR32_ABCD)),

1539

sub_8bit_hi))>,

1540

Requires<[Not64BitMode]>;

1541

1542

// h-register tricks.

1543

// For now, be conservative on x86-64 and use an h-register extract only if the

1544

// value is immediately zero-extended or stored, which are somewhat common

1545

// cases. This uses a bunch of code to prevent a register requiring a REX prefix

1546

// from being allocated in the same instruction as the h register, as there's

1547

// currently no way to describe this requirement to the register allocator.

1548

1549

// h-register extract and zero-extend.

1550

def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),

1555

sub_8bit_hi)),

1556

sub_32bit)>;

1557

def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

1558

(MOVZX32_NOREXrr8

1559

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1560

sub_8bit_hi))>,

1561

Requires<[In64BitMode]>;

1562

def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),

1563

(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1564

GR32_ABCD)),

1565

sub_8bit_hi))>,

1566

Requires<[In64BitMode]>;

1567

def : Pat<(srl GR16:$src, (i8 8)),

1568

(EXTRACT_SUBREG

1569

(MOVZX32_NOREXrr8

1570

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1571

sub_8bit_hi)),

1572

sub_16bit)>,

1573

Requires<[In64BitMode]>;

1574

def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),

1575

(MOVZX32_NOREXrr8

1576

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1577

sub_8bit_hi))>,

1578

Requires<[In64BitMode]>;

1579

def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),

1580

(MOVZX32_NOREXrr8

1581

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1582

sub_8bit_hi))>,

1583

Requires<[In64BitMode]>;

1584

def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1589

sub_8bit_hi)),

1590

sub_32bit)>;

1591

def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

sub_8bit_hi)),

sub_32bit)>;

// h-register extract and store.

1600

def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),

1601

(MOV8mr_NOREX

1602

addr:$dst,

1603

(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),

1604

sub_8bit_hi))>;

1605

def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),

1606

(MOV8mr_NOREX

1607

addr:$dst,

1608

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1609

sub_8bit_hi))>,

1610

Requires<[In64BitMode]>;

1611

def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),

1612

(MOV8mr_NOREX

1613

addr:$dst,

1614

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1615

sub_8bit_hi))>,

1616

Requires<[In64BitMode]>;

1617

1618

1619

// (shl x, 1) ==> (add x, x)

1620

// Note that if x is undef (immediate or otherwise), we could theoretically

1621

// end up with the two uses of x getting different values, producing a result

1622

// where the least significant bit is not 0. However, the probability of this

1623

// happening is considered low enough that this is officially not a

1624

// "real problem".

1625

def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;

1626

def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;

1627

def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;

1628

def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;

1629

1630

// Helper imms that check if a mask doesn't change significant shift bits.

Benjamin Kramer

5f6a907

2015-02-12 15:35:40 +0000

[diff] [blame]

1631

def immShift32 : ImmLeaf<i8, [{

1632

return countTrailingOnes<uint64_t>(Imm) >= 5;

1633

}]>;

1634

def immShift64 : ImmLeaf<i8, [{

1635

return countTrailingOnes<uint64_t>(Imm) >= 6;

1636

}]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1637

1638

// Shift amount is implicitly masked.

1639

multiclass MaskedShiftAmountPats<SDNode frag, string name> {

1640

// (shift x (and y, 31)) ==> (shift x, y)

1641

def : Pat<(frag GR8:$src1, (and CL, immShift32)),

1642

(!cast<Instruction>(name # "8rCL") GR8:$src1)>;

1643

def : Pat<(frag GR16:$src1, (and CL, immShift32)),

1644

(!cast<Instruction>(name # "16rCL") GR16:$src1)>;

1645

def : Pat<(frag GR32:$src1, (and CL, immShift32)),

1646

(!cast<Instruction>(name # "32rCL") GR32:$src1)>;

1647

def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),

1648

(!cast<Instruction>(name # "8mCL") addr:$dst)>;

1649

def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),

1650

(!cast<Instruction>(name # "16mCL") addr:$dst)>;

1651

def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),

1652

(!cast<Instruction>(name # "32mCL") addr:$dst)>;

1653

1654

// (shift x (and y, 63)) ==> (shift x, y)

1655

def : Pat<(frag GR64:$src1, (and CL, immShift64)),

1656

(!cast<Instruction>(name # "64rCL") GR64:$src1)>;

1657

def : Pat<(store (frag (loadi64 addr:$dst), (and CL, 63)), addr:$dst),

1658

(!cast<Instruction>(name # "64mCL") addr:$dst)>;

1659

}

1660

1661

defm : MaskedShiftAmountPats<shl, "SHL">;

1662

defm : MaskedShiftAmountPats<srl, "SHR">;

1663

defm : MaskedShiftAmountPats<sra, "SAR">;

1664

defm : MaskedShiftAmountPats<rotl, "ROL">;

1665

defm : MaskedShiftAmountPats<rotr, "ROR">;

1666

Simon Pilgrim

46f119a

2016-08-01 12:11:43 +0000

[diff] [blame]

1667

// Double shift amount is implicitly masked.

1668

multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {

1669

// (shift x (and y, 31)) ==> (shift x, y)

1670

def : Pat<(frag GR16:$src1, GR16:$src2, (and CL, immShift32)),

1671

(!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;

1672

def : Pat<(frag GR32:$src1, GR32:$src2, (and CL, immShift32)),

1673

(!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;

1674

1675

// (shift x (and y, 63)) ==> (shift x, y)

1676

def : Pat<(frag GR64:$src1, GR64:$src2, (and CL, immShift64)),

1677

(!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;

1678

}

1679

1680

defm : MaskedDoubleShiftAmountPats<X86shld, "SHLD">;

1681

defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;

1682

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1683

// (anyext (setcc_carry)) -> (setcc_carry)

1684

def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

1685

(SETB_C16r)>;

1686

def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

1687

(SETB_C32r)>;

1688

def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),

1689

(SETB_C32r)>;

1690

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1691

//===----------------------------------------------------------------------===//

1692

// EFLAGS-defining Patterns

1693

//===----------------------------------------------------------------------===//

1694

1695

// add reg, reg

1696

def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;

1697

def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;

1698

def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;

1699

1700

// add reg, mem

1701

def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),

1702

(ADD8rm GR8:$src1, addr:$src2)>;

1703

def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),

1704

(ADD16rm GR16:$src1, addr:$src2)>;

1705

def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),

1706

(ADD32rm GR32:$src1, addr:$src2)>;

1707

1708

// add reg, imm

1709

def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;

1710

def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;

1711

def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;

1712

def : Pat<(add GR16:$src1, i16immSExt8:$src2),

1713

(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;

1714

def : Pat<(add GR32:$src1, i32immSExt8:$src2),

1715

(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;

1716

1717

// sub reg, reg

1718

def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;

1719

def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;

1720

def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;

1721

1722

// sub reg, mem

1723

def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),

1724

(SUB8rm GR8:$src1, addr:$src2)>;

1725

def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),

1726

(SUB16rm GR16:$src1, addr:$src2)>;

1727

def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),

1728

(SUB32rm GR32:$src1, addr:$src2)>;

1729

1730

// sub reg, imm

1731

def : Pat<(sub GR8:$src1, imm:$src2),

1732

(SUB8ri GR8:$src1, imm:$src2)>;

1733

def : Pat<(sub GR16:$src1, imm:$src2),

1734

(SUB16ri GR16:$src1, imm:$src2)>;

1735

def : Pat<(sub GR32:$src1, imm:$src2),

1736

(SUB32ri GR32:$src1, imm:$src2)>;

1737

def : Pat<(sub GR16:$src1, i16immSExt8:$src2),

1738

(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;

1739

def : Pat<(sub GR32:$src1, i32immSExt8:$src2),

1740

(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;

1741

1742

// sub 0, reg

1743

def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;

1744

def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;

1745

def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;

1746

def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;

1747

Peter Collingbourne

ef089bd

2017-02-09 22:02:28 +0000

[diff] [blame]

1748

// sub reg, relocImm

1749

def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),

1750

(SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;

1751

def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt32_su:$src2),

1752

(SUB64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;

1753

Michael Kuperstein