Blame - llvm/lib/Target/X86/X86InstrCompiler.td - toolchain/llvm-project

2015-02-01 16:15:07 +0000

[diff] [blame]

1

//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

//

10

// This file describes the various pseudo instructions used by the compiler,

11

// as well as Pat patterns used during instruction selection.

12

//

13

//===----------------------------------------------------------------------===//

14

15

//===----------------------------------------------------------------------===//

16

// Pattern Matching Support

17

18

def GetLo32XForm : SDNodeXForm<imm, [{

19

// Transformation function: get the low 32 bits.

Sergey Dmitrouk

842a51b

2015-04-28 14:05:47 +0000

[diff] [blame]

20

return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

21

}]>;

22

23

def GetLo8XForm : SDNodeXForm<imm, [{

24

// Transformation function: get the low 8 bits.

Sergey Dmitrouk

842a51b

2015-04-28 14:05:47 +0000

[diff] [blame]

25

return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}]>;

//===----------------------------------------------------------------------===//

30

// Random Pseudo Instructions.

31

32

// PIC base construction. This expands to code that looks like this:

33

// call $next_inst

34

// popl %destreg"

35

let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in

36

def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),

"", []>;

// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into

41

// a stack adjustment and the codegen must know that they may modify the stack

42

// pointer before prolog-epilog rewriting occurs.

43

// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become

44

// sub / add which can clobber EFLAGS.

45

let Defs = [ESP, EFLAGS], Uses = [ESP] in {

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

46

def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

47

"#ADJCALLSTACKDOWN",

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

48

[]>,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

49

Requires<[NotLP64]>;

50

def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

51

"#ADJCALLSTACKUP",

52

[(X86callseq_end timm:$amt1, timm:$amt2)]>,

53

Requires<[NotLP64]>;

54

}

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

55

def : Pat<(X86callseq_start timm:$amt1),

56

(ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;

57

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

58

59

// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into

60

// a stack adjustment and the codegen must know that they may modify the stack

61

// pointer before prolog-epilog rewriting occurs.

62

// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become

63

// sub / add which can clobber EFLAGS.

64

let Defs = [RSP, EFLAGS], Uses = [RSP] in {

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

65

def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

66

"#ADJCALLSTACKDOWN",

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

67

[]>,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

68

Requires<[IsLP64]>;

69

def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),

70

"#ADJCALLSTACKUP",

71

[(X86callseq_end timm:$amt1, timm:$amt2)]>,

72

Requires<[IsLP64]>;

73

}

Michael Kuperstein

2015-02-01 16:56:04 +0000

[diff] [blame]

74

def : Pat<(X86callseq_start timm:$amt1),

75

(ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

76

77

78

// x86-64 va_start lowering magic.

79

let usesCustomInserter = 1, Defs = [EFLAGS] in {

80

def VASTART_SAVE_XMM_REGS : I<0, Pseudo,

81

(outs),

82

(ins GR8:$al,

83

i64imm:$regsavefi, i64imm:$offset,

84

variable_ops),

85

"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",

86

[(X86vastart_save_xmm_regs GR8:$al,

imm:$regsavefi,

imm:$offset),

(implicit EFLAGS)]>;

// The VAARG_64 pseudo-instruction takes the address of the va_list,

92

// and places the address of the next argument into a register.

93

let Defs = [EFLAGS] in

94

def VAARG_64 : I<0, Pseudo,

95

(outs GR64:$dst),

96

(ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),

97

"#VAARG_64 $dst, $ap, $size, $mode, $align",

98

[(set GR64:$dst,

99

(X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),

100

(implicit EFLAGS)]>;

101

102

// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows

103

// targets. These calls are needed to probe the stack when allocating more than

104

// 4k bytes in one go. Touching the stack at 4K increments is necessary to

105

// ensure that the guard pages used by the OS virtual memory manager are

106

// allocated in correct sequence.

107

// The main point of having separate instruction are extra unmodelled effects

108

// (compared to ordinary calls) like stack pointer change.

109

110

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

111

def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),

112

"# dynamic stack allocation",

113

[(X86WinAlloca)]>;

114

115

// When using segmented stacks these are lowered into instructions which first

116

// check if the current stacklet has enough free memory. If it does, memory is

117

// allocated by bumping the stack pointer. Otherwise memory is allocated from

118

// the heap.

119

120

let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in

121

def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),

122

"# variable sized alloca for segmented stacks",

123

[(set GR32:$dst,

124

(X86SegAlloca GR32:$size))]>,

125

Requires<[NotLP64]>;

126

127

let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in

128

def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),

129

"# variable sized alloca for segmented stacks",

130

[(set GR64:$dst,

131

(X86SegAlloca GR64:$size))]>,

132

Requires<[In64BitMode]>;

133

}

134

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

135

//===----------------------------------------------------------------------===//

136

// EH Pseudo Instructions

137

//

138

let SchedRW = [WriteSystem] in {

139

let isTerminator = 1, isReturn = 1, isBarrier = 1,

140

hasCtrlDep = 1, isCodeGenOnly = 1 in {

141

def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),

142

"ret\t#eh_return, addr: $addr",

143

[(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;

}

let isTerminator = 1, isReturn = 1, isBarrier = 1,

148

hasCtrlDep = 1, isCodeGenOnly = 1 in {

149

def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),

150

"ret\t#eh_return, addr: $addr",

151

[(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;

}

Reid Kleckner

2015-11-06 01:49:05 +0000

[diff] [blame]

155

let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,

156

isCodeGenOnly = 1, isReturn = 1 in {

157

def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;

158

David Majnemer

2652b75

2015-11-09 23:07:48 +0000

[diff] [blame]

159

// CATCHRET needs a custom inserter for SEH.

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

160

let usesCustomInserter = 1 in

161

def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),

162

"# CATCHRET",

163

[(catchret bb:$dst, bb:$from)]>;

Reid Kleckner

0e28823

2015-08-27 23:27:47 +0000

[diff] [blame]

164

}

165

Reid Kleckner

420f054

2015-11-09 23:34:42 +0000

[diff] [blame]

166

let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,

David Majnemer

2652b75

2015-11-09 23:07:48 +0000

[diff] [blame]

167

usesCustomInserter = 1 in

168

def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;

169

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

170

// This instruction is responsible for re-establishing stack pointers after an

171

// exception has been caught and we are rejoining normal control flow in the

172

// parent function or funclet. It generally sets ESP and EBP, and optionally

173

// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us

174

// elsewhere.

Reid Kleckner

420f054

2015-11-09 23:34:42 +0000

[diff] [blame]

175

let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in

Reid Kleckner

51460c1

2015-11-06 01:49:05 +0000

[diff] [blame]

176

def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;

177

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

178

let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,

179

usesCustomInserter = 1 in {

180

def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),

181

"#EH_SJLJ_SETJMP32",

182

[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,

183

Requires<[Not64BitMode]>;

184

def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),

185

"#EH_SJLJ_SETJMP64",

186

[(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,

187

Requires<[In64BitMode]>;

188

let isTerminator = 1 in {

189

def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),

190

"#EH_SJLJ_LONGJMP32",

191

[(X86eh_sjlj_longjmp addr:$buf)]>,

192

Requires<[Not64BitMode]>;

193

def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),

194

"#EH_SJLJ_LONGJMP64",

195

[(X86eh_sjlj_longjmp addr:$buf)]>,

196

Requires<[In64BitMode]>;

}

}

} // SchedRW

let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {

202

def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),

203

"#EH_SjLj_Setup\t$dst", []>;

204

}

205

206

//===----------------------------------------------------------------------===//

207

// Pseudo instructions used by unwind info.

208

//

209

let isPseudo = 1 in {

210

def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),

211

"#SEH_PushReg $reg", []>;

212

def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),

213

"#SEH_SaveReg $reg, $dst", []>;

214

def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),

215

"#SEH_SaveXMM $reg, $dst", []>;

216

def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),

217

"#SEH_StackAlloc $size", []>;

218

def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),

219

"#SEH_SetFrame $reg, $offset", []>;

220

def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),

221

"#SEH_PushFrame $mode", []>;

222

def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),

223

"#SEH_EndPrologue", []>;

224

def SEH_Epilogue : I<0, Pseudo, (outs), (ins),

225

"#SEH_Epilogue", []>;

226

}

227

228

//===----------------------------------------------------------------------===//

229

// Pseudo instructions used by segmented stacks.

230

//

231

232

// This is lowered into a RET instruction by MCInstLower. We need

233

// this so that we don't have to have a MachineBasicBlock which ends

234

// with a RET and also has successors.

235

let isPseudo = 1 in {

236

def MORESTACK_RET: I<0, Pseudo, (outs), (ins),

237

"", []>;

238

239

// This instruction is lowered to a RET followed by a MOV. The two

240

// instructions are not generated on a higher level since then the

241

// verifier sees a MachineBasicBlock ending with a non-terminator.

242

def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),

"", []>;

}

//===----------------------------------------------------------------------===//

247

// Alias Instructions

248

//===----------------------------------------------------------------------===//

249

250

// Alias instruction mapping movr0 to xor.

251

// FIXME: remove when we can teach regalloc that xor reg, reg is ok.

252

let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,

David Majnemer

869be0a

2016-01-05 02:32:06 +0000

[diff] [blame]

253

isPseudo = 1 in

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

254

def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

255

[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;

256

257

// Other widths can also make use of the 32-bit xor, which may have a smaller

258

// encoding and avoid partial register updates.

259

def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;

260

def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;

261

def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {

262

let AddedComplexity = 20;

263

}

264

Hans Wennborg

08d5905

2015-12-15 17:10:28 +0000

[diff] [blame]

265

let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],

David Majnemer

869be0a

2016-01-05 02:32:06 +0000

[diff] [blame]

266

AddedComplexity = 1 in {

Hans Wennborg

08d5905

2015-12-15 17:10:28 +0000

[diff] [blame]

267

// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,

268

// which only require 3 bytes compared to MOV32ri which requires 5.

269

let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {

270

def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

271

[(set GR32:$dst, 1)]>;

272

def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",

273

[(set GR32:$dst, -1)]>;

274

}

275

276

// MOV16ri is 4 bytes, so the instructions above are smaller.

277

def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;

278

def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;

279

}

280

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

281

// Materialize i64 constant where top 32-bits are zero. This could theoretically

282

// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however

283

// that would make it more difficult to rematerialize.

Craig Topper

e00bffb

2016-01-05 07:44:14 +0000

[diff] [blame]

284

let isReMaterializable = 1, isAsCheapAsAMove = 1,

285

isPseudo = 1, hasSideEffects = 0 in

286

def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

287

288

// This 64-bit pseudo-move can be used for both a 64-bit constant that is

Sanjay Patel

85030aa

2015-10-13 16:23:00 +0000

[diff] [blame]

289

// actually the zero-extension of a 32-bit constant and for labels in the

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

290

// x86-64 small code model.

Sanjay Patel

85030aa

2015-10-13 16:23:00 +0000

[diff] [blame]

291

def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

292

293

let AddedComplexity = 1 in

294

def : Pat<(i64 mov64imm32:$src),

295

(SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>;

296

297

// Use sbb to materialize carry bit.

298

let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {

299

// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.

300

// However, Pat<> can't replicate the destination reg into the inputs of the

301

// result.

302

def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "",

303

[(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

304

def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "",

305

[(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

306

def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "",

307

[(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

308

def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "",

309

[(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;

} // isCodeGenOnly

def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

314

(SETB_C16r)>;

315

def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

316

(SETB_C32r)>;

317

def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

318

(SETB_C64r)>;

319

320

def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

321

(SETB_C16r)>;

322

def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

323

(SETB_C32r)>;

324

def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),

325

(SETB_C64r)>;

326

327

// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and

328

// will be eliminated and that the sbb can be extended up to a wider type. When

329

// this happens, it is great. However, if we are left with an 8-bit sbb and an

330

// and, we might as well just match it as a setb.

331

def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),

332

(SETBr)>;

333

334

// (add OP, SETB) -> (adc OP, 0)

335

def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),

336

(ADC8ri GR8:$op, 0)>;

337

def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),

338

(ADC32ri8 GR32:$op, 0)>;

339

def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),

340

(ADC64ri8 GR64:$op, 0)>;

341

342

// (sub OP, SETB) -> (sbb OP, 0)

343

def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

344

(SBB8ri GR8:$op, 0)>;

345

def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

346

(SBB32ri8 GR32:$op, 0)>;

347

def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),

348

(SBB64ri8 GR64:$op, 0)>;

349

350

// (sub OP, SETCC_CARRY) -> (adc OP, 0)

351

def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),

352

(ADC8ri GR8:$op, 0)>;

353

def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),

354

(ADC32ri8 GR32:$op, 0)>;

355

def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),

356

(ADC64ri8 GR64:$op, 0)>;

357

358

//===----------------------------------------------------------------------===//

359

// String Pseudo Instructions

360

//

361

let SchedRW = [WriteMicrocoded] in {

362

let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {

363

def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",

364

[(X86rep_movs i8)], IIC_REP_MOVS>, REP,

365

Requires<[Not64BitMode]>;

366

def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",

367

[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,

368

Requires<[Not64BitMode]>;

369

def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",

370

[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,

371

Requires<[Not64BitMode]>;

372

}

373

374

let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {

375

def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",

376

[(X86rep_movs i8)], IIC_REP_MOVS>, REP,

377

Requires<[In64BitMode]>;

378

def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",

379

[(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16,

380

Requires<[In64BitMode]>;

381

def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",

382

[(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32,

383

Requires<[In64BitMode]>;

384

def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",

385

[(X86rep_movs i64)], IIC_REP_MOVS>, REP,

386

Requires<[In64BitMode]>;

387

}

388

389

// FIXME: Should use "(X86rep_stos AL)" as the pattern.

390

let Defs = [ECX,EDI], isCodeGenOnly = 1 in {

391

let Uses = [AL,ECX,EDI] in

392

def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",

393

[(X86rep_stos i8)], IIC_REP_STOS>, REP,

394

Requires<[Not64BitMode]>;

395

let Uses = [AX,ECX,EDI] in

396

def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",

397

[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,

398

Requires<[Not64BitMode]>;

399

let Uses = [EAX,ECX,EDI] in

400

def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",

401

[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,

402

Requires<[Not64BitMode]>;

403

}

404

405

let Defs = [RCX,RDI], isCodeGenOnly = 1 in {

406

let Uses = [AL,RCX,RDI] in

407

def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",

408

[(X86rep_stos i8)], IIC_REP_STOS>, REP,

409

Requires<[In64BitMode]>;

410

let Uses = [AX,RCX,RDI] in

411

def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",

412

[(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16,

413

Requires<[In64BitMode]>;

414

let Uses = [RAX,RCX,RDI] in

415

def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",

416

[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,

417

Requires<[In64BitMode]>;

418

419

let Uses = [RAX,RCX,RDI] in

420

def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",

421

[(X86rep_stos i64)], IIC_REP_STOS>, REP,

422

Requires<[In64BitMode]>;

}

} // SchedRW

//===----------------------------------------------------------------------===//

427

// Thread Local Storage Instructions

//

// ELF TLS Support

// All calls clobber the non-callee saved registers. ESP is marked as

432

// a use to prevent stack-pointer assignments that appear immediately

433

// before calls from potentially appearing dead.

434

let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,

435

ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,

436

MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,

437

XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,

438

XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],

Davide Italiano

228978c

2016-02-20 00:44:47 +0000

[diff] [blame]

439

usesCustomInserter = 1, Uses = [ESP] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

440

def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

441

"# TLS_addr32",

442

[(X86tlsaddr tls32addr:$sym)]>,

443

Requires<[Not64BitMode]>;

444

def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

445

"# TLS_base_addr32",

446

[(X86tlsbaseaddr tls32baseaddr:$sym)]>,

447

Requires<[Not64BitMode]>;

448

}

449

450

// All calls clobber the non-callee saved registers. RSP is marked as

451

// a use to prevent stack-pointer assignments that appear immediately

452

// before calls from potentially appearing dead.

453

let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,

454

FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,

455

ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,

456

MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,

457

XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,

458

XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],

Davide Italiano

228978c

2016-02-20 00:44:47 +0000

[diff] [blame]

459

usesCustomInserter = 1, Uses = [RSP] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

460

def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

461

"# TLS_addr64",

462

[(X86tlsaddr tls64addr:$sym)]>,

463

Requires<[In64BitMode]>;

464

def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

465

"# TLS_base_addr64",

466

[(X86tlsbaseaddr tls64baseaddr:$sym)]>,

467

Requires<[In64BitMode]>;

468

}

469

470

// Darwin TLS Support

471

// For i386, the address of the thunk is passed on the stack, on return the

472

// address of the variable is in %eax. %ecx is trashed during the function

473

// call. All other registers are preserved.

474

let Defs = [EAX, ECX, EFLAGS],

475

Uses = [ESP],

476

usesCustomInserter = 1 in

477

def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),

478

"# TLSCall_32",

479

[(X86TLSCall addr:$sym)]>,

480

Requires<[Not64BitMode]>;

481

482

// For x86_64, the address of the thunk is passed in %rdi, on return

483

// the address of the variable is in %rax. All other registers are preserved.

484

let Defs = [RAX, EFLAGS],

485

Uses = [RSP, RDI],

486

usesCustomInserter = 1 in

487

def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),

488

"# TLSCall_64",

489

[(X86TLSCall addr:$sym)]>,

490

Requires<[In64BitMode]>;

491

492

493

//===----------------------------------------------------------------------===//

494

// Conditional Move Pseudo Instructions

495

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

496

// CMOV* - Used to implement the SELECT DAG operation. Expanded after

497

// instruction selection into a branch sequence.

498

multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {

499

def CMOV#NAME : I<0, Pseudo,

500

(outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),

501

"#CMOV_"#NAME#" PSEUDO!",

502

[(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond,

EFLAGS)))]>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

506

let usesCustomInserter = 1, Uses = [EFLAGS] in {

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

507

// X86 doesn't have 8-bit conditional moves. Use a customInserter to

508

// emit control flow. An alternative to this is to mark i8 SELECT as Promote,

509

// however that requires promoting the operands, and can induce additional

510

// i8 register pressure.

511

defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

512

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

513

let Predicates = [NoCMov] in {

514

defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;

515

defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;

516

} // Predicates = [NoCMov]

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

517

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

518

// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no

519

// SSE1/SSE2.

520

let Predicates = [FPStackf32] in

521

defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

522

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

523

let Predicates = [FPStackf64] in

524

defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;

525

526

defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;

527

528

defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;

529

defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;

Chih-Hung Hsieh

7993e18

2015-12-14 22:08:36 +0000

[diff] [blame]

530

defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

531

defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;

532

defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;

533

defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;

534

defm _V8F32 : CMOVrr_PSEUDO<VR256, v8f32>;

535

defm _V4F64 : CMOVrr_PSEUDO<VR256, v4f64>;

536

defm _V4I64 : CMOVrr_PSEUDO<VR256, v4i64>;

537

defm _V8I64 : CMOVrr_PSEUDO<VR512, v8i64>;

538

defm _V8F64 : CMOVrr_PSEUDO<VR512, v8f64>;

539

defm _V16F32 : CMOVrr_PSEUDO<VR512, v16f32>;

Elena Demikhovsky

c1ac5d7

2015-05-12 09:36:52 +0000

[diff] [blame]

540

defm _V8I1 : CMOVrr_PSEUDO<VK8, v8i1>;

541

defm _V16I1 : CMOVrr_PSEUDO<VK16, v16i1>;

542

defm _V32I1 : CMOVrr_PSEUDO<VK32, v32i1>;

543

defm _V64I1 : CMOVrr_PSEUDO<VK64, v64i1>;

Ahmed Bougacha

2015-02-14 01:36:53 +0000

[diff] [blame]

544

} // usesCustomInserter = 1, Uses = [EFLAGS]

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

545

546

//===----------------------------------------------------------------------===//

547

// Normal-Instructions-With-Lock-Prefix Pseudo Instructions

548

//===----------------------------------------------------------------------===//

549

550

// FIXME: Use normal instructions and add lock prefix dynamically.

// Memory barriers

// TODO: Get this to fold the constant into the instruction.

555

let isCodeGenOnly = 1, Defs = [EFLAGS] in

556

def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),

Craig Topper

9583f51

2016-01-05 07:44:11 +0000

[diff] [blame]

557

"or{l}\t{$zero, $dst|$dst, $zero}", [],

558

IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK,

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

559

Sched<[WriteALULd, WriteRMW]>;

560

561

let hasSideEffects = 1 in

562

def Int_MemBarrier : I<0, Pseudo, (outs), (ins),

563

"#MEMBARRIER",

564

[(X86MemBarrier)]>, Sched<[WriteLoad]>;

565

566

// RegOpc corresponds to the mr version of the instruction

567

// ImmOpc corresponds to the mi version of the instruction

568

// ImmOpc8 corresponds to the mi8 version of the instruction

569

// ImmMod corresponds to the instruction format of the mi and mi8 versions

570

multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

571

Format ImmMod, SDPatternOperator Op, string mnemonic> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

572

let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,

573

SchedRW = [WriteALULd, WriteRMW] in {

574

575

def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

576

RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },

577

MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),

578

!strconcat(mnemonic, "{b}\t",

579

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

580

[(set EFLAGS, (Op addr:$dst, GR8:$src2))],

581

IIC_ALU_NONMEM>, LOCK;

582

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

583

def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

584

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

585

MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),

586

!strconcat(mnemonic, "{w}\t",

587

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

588

[(set EFLAGS, (Op addr:$dst, GR16:$src2))],

589

IIC_ALU_NONMEM>, OpSize16, LOCK;

590

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

591

def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

592

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

593

MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),

594

!strconcat(mnemonic, "{l}\t",

595

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

596

[(set EFLAGS, (Op addr:$dst, GR32:$src2))],

597

IIC_ALU_NONMEM>, OpSize32, LOCK;

598

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

599

def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},

600

RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },

601

MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),

602

!strconcat(mnemonic, "{q}\t",

603

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

604

[(set EFLAGS, (Op addr:$dst, GR64:$src2))],

605

IIC_ALU_NONMEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

606

607

def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

608

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },

609

ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),

610

!strconcat(mnemonic, "{b}\t",

611

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

612

[(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))],

613

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

614

615

def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

616

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

617

ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),

618

!strconcat(mnemonic, "{w}\t",

619

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

620

[(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))],

621

IIC_ALU_MEM>, OpSize16, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

622

623

def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

624

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

625

ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),

626

!strconcat(mnemonic, "{l}\t",

627

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

628

[(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))],

629

IIC_ALU_MEM>, OpSize32, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

630

631

def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},

632

ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },

633

ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),

634

!strconcat(mnemonic, "{q}\t",

635

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

636

[(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))],

637

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

638

639

def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

640

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

641

ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),

642

!strconcat(mnemonic, "{w}\t",

643

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

644

[(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))],

645

IIC_ALU_MEM>, OpSize16, LOCK;

646

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

647

def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

648

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

649

ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),

650

!strconcat(mnemonic, "{l}\t",

651

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

652

[(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))],

653

IIC_ALU_MEM>, OpSize32, LOCK;

654

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

655

def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},

656

ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },

657

ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),

658

!strconcat(mnemonic, "{q}\t",

659

"{$src2, $dst|$dst, $src2}"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

660

[(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))],

661

IIC_ALU_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}

}

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

667

defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">;

668

defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">;

669

defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;

670

defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;

671

defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

672

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

673

multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

674

int Increment, string mnemonic> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

675

let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

676

SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

677

def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),

678

!strconcat(mnemonic, "{b}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

679

[(set EFLAGS, (X86lock_add addr:$dst, (i8 Increment)))],

680

IIC_UNARY_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

681

def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),

682

!strconcat(mnemonic, "{w}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

683

[(set EFLAGS, (X86lock_add addr:$dst, (i16 Increment)))],

684

IIC_UNARY_MEM>, OpSize16, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

685

def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),

686

!strconcat(mnemonic, "{l}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

687

[(set EFLAGS, (X86lock_add addr:$dst, (i32 Increment)))],

688

IIC_UNARY_MEM>, OpSize32, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

689

def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),

690

!strconcat(mnemonic, "{q}\t$dst"),

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

691

[(set EFLAGS, (X86lock_add addr:$dst, (i64 Increment)))],

692

IIC_UNARY_MEM>, LOCK;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

}

}

Ahmed Bougacha

2016-02-29 19:28:07 +0000

[diff] [blame]

696

defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, 1, "inc">;

697

defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

698

699

// Atomic compare and swap.

700

multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,

701

SDPatternOperator frag, X86MemOperand x86memop,

702

InstrItinClass itin> {

703

let isCodeGenOnly = 1 in {

704

def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),

705

!strconcat(mnemonic, "\t$ptr"),

706

[(frag addr:$ptr)], itin>, TB, LOCK;

}

}

multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,

711

string mnemonic, SDPatternOperator frag,

712

InstrItinClass itin8, InstrItinClass itin> {

713

let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {

714

let Defs = [AL, EFLAGS], Uses = [AL] in

715

def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),

716

!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),

717

[(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;

718

let Defs = [AX, EFLAGS], Uses = [AX] in

719

def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),

720

!strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),

721

[(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;

722

let Defs = [EAX, EFLAGS], Uses = [EAX] in

723

def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),

724

!strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),

725

[(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;

726

let Defs = [RAX, EFLAGS], Uses = [RAX] in

727

def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),

728

!strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),

729

[(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;

}

}

let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],

734

SchedRW = [WriteALULd, WriteRMW] in {

735

defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",

X86cas8, i64mem,

IIC_CMPX_LOCK_8B>;

}

Quentin Colombet

2016-03-12 02:25:27 +0000

[diff] [blame^]

740

// This pseudo must be used when the frame uses RBX as

741

// the base pointer. Indeed, in such situation RBX is a reserved

742

// register and the register allocator will ignore any use/def of

743

// it. In other words, the register will not fix the clobbering of

744

// RBX that will happen when setting the arguments for the instrucion.

745

//

746

// Unlike the actual related instuction, we mark that this one

747

// defines EBX (instead of using EBX).

748

// The rationale is that we will define RBX during the expansion of

749

// the pseudo. The argument feeding EBX is ebx_input.

750

//

751

// The additional argument, $ebx_save, is a temporary register used to

752

// save the value of RBX accross the actual instruction.

753

//

754

// To make sure the register assigned to $ebx_save does not interfere with

755

// the definition of the actual instruction, we use a definition $dst which

756

// is tied to $rbx_save. That way, the live-range of $rbx_save spans accross

757

// the instruction and we are sure we will have a valid register to restore

758

// the value of RBX.

759

let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],

760

SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1,

761

Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {

762

def LCMPXCHG8B_SAVE_EBX :

763

I<0, Pseudo, (outs GR32:$dst),

764

(ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),

765

!strconcat("cmpxchg8b", "\t$ptr"),

766

[(set GR32:$dst, (X86cas8save_ebx addr:$ptr, GR32:$ebx_input,

GR32:$ebx_save))],

IIC_CMPX_LOCK_8B>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

772

let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],

773

Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {

774

defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",

775

X86cas16, i128mem,

776

IIC_CMPX_LOCK_16B>, REX_W;

777

}

778

Quentin Colombet

cf9732b

2016-03-12 02:25:27 +0000

[diff] [blame^]

779

// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.

780

let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],

781

Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW],

782

isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",

783

usesCustomInserter = 1 in {

784

def LCMPXCHG16B_SAVE_RBX :

785

I<0, Pseudo, (outs GR64:$dst),

786

(ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save),

787

!strconcat("cmpxchg16b", "\t$ptr"),

788

[(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input,

GR64:$rbx_save))],

IIC_CMPX_LOCK_16B>;

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

793

defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",

794

X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>;

795

796

// Atomic exchange and add

797

multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,

798

string frag,

799

InstrItinClass itin8, InstrItinClass itin> {

800

let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,

801

SchedRW = [WriteALULd, WriteRMW] in {

802

def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),

803

(ins GR8:$val, i8mem:$ptr),

804

!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),

805

[(set GR8:$dst,

806

(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],

807

itin8>;

808

def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),

809

(ins GR16:$val, i16mem:$ptr),

810

!strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),

811

[(set

812

GR16:$dst,

813

(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],

814

itin>, OpSize16;

815

def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),

816

(ins GR32:$val, i32mem:$ptr),

817

!strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),

818

[(set

819

GR32:$dst,

820

(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],

821

itin>, OpSize32;

822

def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),

823

(ins GR64:$val, i64mem:$ptr),

824

!strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),

825

[(set

826

GR64:$dst,

827

(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],

itin>;

}

}

defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",

833

IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,

834

TB, LOCK;

835

836

/* The following multiclass tries to make sure that in code like

837

* x.store (immediate op x.load(acquire), release)

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

838

* and

839

* x.store (register op x.load(acquire), release)

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

840

* an operation directly on memory is generated instead of wasting a register.

841

* It is not automatic as atomic_store/load are only lowered to MOV instructions

842

* extremely late to prevent them from being accidentally reordered in the backend

843

* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)

844

*/

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

845

multiclass RELEASE_BINOP_MI<SDNode op> {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

846

def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

847

"#BINOP "#NAME#"8mi PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

848

[(atomic_store_8 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

849

(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

850

def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),

851

"#BINOP "#NAME#"8mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

852

[(atomic_store_8 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

853

(atomic_load_8 addr:$dst), GR8:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

854

// NAME#16 is not generated as 16-bit arithmetic instructions are considered

855

// costly and avoided as far as possible by this backend anyway

856

def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

857

"#BINOP "#NAME#"32mi PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

858

[(atomic_store_32 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

859

(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

860

def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),

861

"#BINOP "#NAME#"32mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

862

[(atomic_store_32 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

863

(atomic_load_32 addr:$dst), GR32:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

864

def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

865

"#BINOP "#NAME#"64mi32 PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

866

[(atomic_store_64 addr:$dst, (op

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

867

(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

868

def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),

869

"#BINOP "#NAME#"64mr PSEUDO!",

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

870

[(atomic_store_64 addr:$dst, (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

871

(atomic_load_64 addr:$dst), GR64:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

872

}

JF Bastien

986ed68

2015-10-13 00:28:47 +0000

[diff] [blame]

873

let Defs = [EFLAGS] in {

874

defm RELEASE_ADD : RELEASE_BINOP_MI<add>;

875

defm RELEASE_AND : RELEASE_BINOP_MI<and>;

876

defm RELEASE_OR : RELEASE_BINOP_MI<or>;

877

defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;

878

// Note: we don't deal with sub, because substractions of constants are

879

// optimized into additions before this code can run.

880

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

881

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

882

// Same as above, but for floating-point.

883

// FIXME: imm version.

884

// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.

885

// FIXME: This could also handle SIMD operations with *ps and *pd instructions.

886

let usesCustomInserter = 1 in {

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

887

multiclass RELEASE_FP_BINOP_MI<SDNode op> {

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

888

def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),

889

"#BINOP "#NAME#"32mr PSEUDO!",

890

[(atomic_store_32 addr:$dst,

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

891

(i32 (bitconvert (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

892

(f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),

893

FR32:$src))))]>, Requires<[HasSSE1]>;

894

def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),

895

"#BINOP "#NAME#"64mr PSEUDO!",

896

[(atomic_store_64 addr:$dst,

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

897

(i64 (bitconvert (op

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

898

(f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),

899

FR64:$src))))]>, Requires<[HasSSE2]>;

900

}

JF Bastien

2015-08-05 23:15:37 +0000

[diff] [blame]

901

defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

902

// FIXME: Add fsub, fmul, fdiv, ...

903

}

904

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

905

multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {

906

def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

907

"#UNOP "#NAME#"8m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

908

[(atomic_store_8 addr:$dst, dag8)]>;

909

def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

910

"#UNOP "#NAME#"16m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

911

[(atomic_store_16 addr:$dst, dag16)]>;

912

def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

913

"#UNOP "#NAME#"32m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

914

[(atomic_store_32 addr:$dst, dag32)]>;

915

def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

916

"#UNOP "#NAME#"64m PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

917

[(atomic_store_64 addr:$dst, dag64)]>;

918

}

919

JF Bastien

2cdd5e4

2015-10-15 18:24:52 +0000

[diff] [blame]

920

let Defs = [EFLAGS] in {

921

defm RELEASE_INC : RELEASE_UNOP<

922

(add (atomic_load_8 addr:$dst), (i8 1)),

923

(add (atomic_load_16 addr:$dst), (i16 1)),

924

(add (atomic_load_32 addr:$dst), (i32 1)),

925

(add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;

926

defm RELEASE_DEC : RELEASE_UNOP<

927

(add (atomic_load_8 addr:$dst), (i8 -1)),

928

(add (atomic_load_16 addr:$dst), (i16 -1)),

929

(add (atomic_load_32 addr:$dst), (i32 -1)),

930

(add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;

931

}

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

932

/*

933

TODO: These don't work because the type inference of TableGen fails.

934

TODO: find a way to fix it.

JF Bastien

2cdd5e4

2015-10-15 18:24:52 +0000

[diff] [blame]

935

let Defs = [EFLAGS] in {

936

defm RELEASE_NEG : RELEASE_UNOP<

937

(ineg (atomic_load_8 addr:$dst)),

938

(ineg (atomic_load_16 addr:$dst)),

939

(ineg (atomic_load_32 addr:$dst)),

940

(ineg (atomic_load_64 addr:$dst))>;

941

}

942

// NOT doesn't set flags.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

943

defm RELEASE_NOT : RELEASE_UNOP<

944

(not (atomic_load_8 addr:$dst)),

945

(not (atomic_load_16 addr:$dst)),

946

(not (atomic_load_32 addr:$dst)),

947

(not (atomic_load_64 addr:$dst))>;

948

*/

949

950

def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

951

"#RELEASE_MOV8mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

952

[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;

953

def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

954

"#RELEASE_MOV16mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

955

[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;

956

def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

957

"#RELEASE_MOV32mi PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

958

[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;

959

def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

960

"#RELEASE_MOV64mi32 PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

961

[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;

962

963

def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

964

"#RELEASE_MOV8mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

965

[(atomic_store_8 addr:$dst, GR8 :$src)]>;

966

def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

967

"#RELEASE_MOV16mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

968

[(atomic_store_16 addr:$dst, GR16:$src)]>;

969

def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

970

"#RELEASE_MOV32mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

971

[(atomic_store_32 addr:$dst, GR32:$src)]>;

972

def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

973

"#RELEASE_MOV64mr PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

974

[(atomic_store_64 addr:$dst, GR64:$src)]>;

975

976

def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

977

"#ACQUIRE_MOV8rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

978

[(set GR8:$dst, (atomic_load_8 addr:$src))]>;

979

def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

980

"#ACQUIRE_MOV16rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

981

[(set GR16:$dst, (atomic_load_16 addr:$src))]>;

982

def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

983

"#ACQUIRE_MOV32rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

984

[(set GR32:$dst, (atomic_load_32 addr:$src))]>;

985

def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),

JF Bastien

2015-08-05 21:04:59 +0000

[diff] [blame]

986

"#ACQUIRE_MOV64rm PSEUDO!",

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

987

[(set GR64:$dst, (atomic_load_64 addr:$src))]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

988

989

//===----------------------------------------------------------------------===//

990

// DAG Pattern Matching Rules

991

//===----------------------------------------------------------------------===//

992

993

// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable

994

def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;

995

def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;

996

def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;

997

def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;

998

def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

999

def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1000

def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;

1001

1002

def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),

1003

(ADD32ri GR32:$src1, tconstpool:$src2)>;

1004

def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),

1005

(ADD32ri GR32:$src1, tjumptable:$src2)>;

1006

def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),

1007

(ADD32ri GR32:$src1, tglobaladdr:$src2)>;

1008

def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),

1009

(ADD32ri GR32:$src1, texternalsym:$src2)>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1010

def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),

1011

(ADD32ri GR32:$src1, mcsym:$src2)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1012

def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),

1013

(ADD32ri GR32:$src1, tblockaddress:$src2)>;

1014

1015

def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),

1016

(MOV32mi addr:$dst, tglobaladdr:$src)>;

1017

def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),

1018

(MOV32mi addr:$dst, texternalsym:$src)>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1019

def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),

1020

(MOV32mi addr:$dst, mcsym:$src)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1021

def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),

1022

(MOV32mi addr:$dst, tblockaddress:$src)>;

1023

1024

// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small

1025

// code model mode, should use 'movabs'. FIXME: This is really a hack, the

1026

// 'movabs' predicate should handle this sort of thing.

1027

def : Pat<(i64 (X86Wrapper tconstpool :$dst)),

1028

(MOV64ri tconstpool :$dst)>, Requires<[FarData]>;

1029

def : Pat<(i64 (X86Wrapper tjumptable :$dst)),

1030

(MOV64ri tjumptable :$dst)>, Requires<[FarData]>;

1031

def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),

1032

(MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;

1033

def : Pat<(i64 (X86Wrapper texternalsym:$dst)),

1034

(MOV64ri texternalsym:$dst)>, Requires<[FarData]>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1035

def : Pat<(i64 (X86Wrapper mcsym:$dst)),

1036

(MOV64ri mcsym:$dst)>, Requires<[FarData]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1037

def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),

1038

(MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;

1039

1040

// In kernel code model, we can get the address of a label

1041

// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of

1042

// the MOV64ri32 should accept these.

1043

def : Pat<(i64 (X86Wrapper tconstpool :$dst)),

1044

(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;

1045

def : Pat<(i64 (X86Wrapper tjumptable :$dst)),

1046

(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;

1047

def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),

1048

(MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;

1049

def : Pat<(i64 (X86Wrapper texternalsym:$dst)),

1050

(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1051

def : Pat<(i64 (X86Wrapper mcsym:$dst)),

1052

(MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1053

def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),

1054

(MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;

1055

1056

// If we have small model and -static mode, it is safe to store global addresses

1057

// directly as immediates. FIXME: This is really a hack, the 'imm' predicate

1058

// for MOV64mi32 should handle this sort of thing.

1059

def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),

1060

(MOV64mi32 addr:$dst, tconstpool:$src)>,

1061

Requires<[NearData, IsStatic]>;

1062

def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),

1063

(MOV64mi32 addr:$dst, tjumptable:$src)>,

1064

Requires<[NearData, IsStatic]>;

1065

def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),

1066

(MOV64mi32 addr:$dst, tglobaladdr:$src)>,

1067

Requires<[NearData, IsStatic]>;

1068

def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),

1069

(MOV64mi32 addr:$dst, texternalsym:$src)>,

1070

Requires<[NearData, IsStatic]>;

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1071

def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),

1072

(MOV64mi32 addr:$dst, mcsym:$src)>,

1073

Requires<[NearData, IsStatic]>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1074

def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),

1075

(MOV64mi32 addr:$dst, tblockaddress:$src)>,

1076

Requires<[NearData, IsStatic]>;

1077

Rafael Espindola

2015-06-22 17:46:53 +0000

[diff] [blame]

1078

def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;

1079

def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

// Calls

// tls has some funny stuff here...

1084

// This corresponds to movabs $foo@tpoff, %rax

1085

def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),

1086

(MOV64ri32 tglobaltlsaddr :$dst)>;

1087

// This corresponds to add $foo@tpoff, %rax

1088

def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),

1089

(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;

1090

1091

1092

// Direct PC relative function call for small code model. 32-bit displacement

1093

// sign extended to 64-bit.

1094

def : Pat<(X86call (i64 tglobaladdr:$dst)),

1095

(CALL64pcrel32 tglobaladdr:$dst)>;

1096

def : Pat<(X86call (i64 texternalsym:$dst)),

1097

(CALL64pcrel32 texternalsym:$dst)>;

1098

1099

// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they

1100

// can never use callee-saved registers. That is the purpose of the GR64_TC

1101

// register classes.

1102

//

1103

// The only volatile register that is never used by the calling convention is

1104

// %r11. This happens when calling a vararg function with 6 arguments.

1105

//

1106

// Match an X86tcret that uses less than 7 volatile registers.

1107

def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),

1108

(X86tcret node:$ptr, node:$off), [{

1109

// X86tcret args: (*chain, ptr, imm, regs..., glue)

1110

unsigned NumRegs = 0;

1111

for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)

1112

if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6)

return false;

return true;

}]>;

def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),

1118

(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,

1119

Requires<[Not64BitMode]>;

1120

1121

// FIXME: This is disabled for 32-bit PIC mode because the global base

1122

// register which is part of the address mode may be assigned a

1123

// callee-saved register.

1124

def : Pat<(X86tcret (load addr:$dst), imm:$off),

1125

(TCRETURNmi addr:$dst, imm:$off)>,

1126

Requires<[Not64BitMode, IsNotPIC]>;

1127

1128

def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),

1129

(TCRETURNdi tglobaladdr:$dst, imm:$off)>,

1130

Requires<[NotLP64]>;

1131

1132

def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),

1133

(TCRETURNdi texternalsym:$dst, imm:$off)>,

1134

Requires<[NotLP64]>;

1135

1136

def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),

1137

(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,

1138

Requires<[In64BitMode]>;

1139

1140

// Don't fold loads into X86tcret requiring more than 6 regs.

1141

// There wouldn't be enough scratch registers for base+index.

1142

def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),

1143

(TCRETURNmi64 addr:$dst, imm:$off)>,

1144

Requires<[In64BitMode]>;

1145

1146

def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),

1147

(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,

1148

Requires<[IsLP64]>;

1149

1150

def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),

1151

(TCRETURNdi64 texternalsym:$dst, imm:$off)>,

1152

Requires<[IsLP64]>;

1153

1154

// Normal calls, with various flavors of addresses.

1155

def : Pat<(X86call (i32 tglobaladdr:$dst)),

1156

(CALLpcrel32 tglobaladdr:$dst)>;

1157

def : Pat<(X86call (i32 texternalsym:$dst)),

1158

(CALLpcrel32 texternalsym:$dst)>;

1159

def : Pat<(X86call (i32 imm:$dst)),

1160

(CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;

// Comparisons.

// TEST R,R is smaller than CMP R,0

1165

def : Pat<(X86cmp GR8:$src1, 0),

1166

(TEST8rr GR8:$src1, GR8:$src1)>;

1167

def : Pat<(X86cmp GR16:$src1, 0),

1168

(TEST16rr GR16:$src1, GR16:$src1)>;

1169

def : Pat<(X86cmp GR32:$src1, 0),

1170

(TEST32rr GR32:$src1, GR32:$src1)>;

1171

def : Pat<(X86cmp GR64:$src1, 0),

1172

(TEST64rr GR64:$src1, GR64:$src1)>;

1173

1174

// Conditional moves with folded loads with operands swapped and conditions

1175

// inverted.

1176

multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,

1177

Instruction Inst64> {

1178

let Predicates = [HasCMov] in {

1179

def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),

1180

(Inst16 GR16:$src2, addr:$src1)>;

1181

def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),

1182

(Inst32 GR32:$src2, addr:$src1)>;

1183

def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),

1184

(Inst64 GR64:$src2, addr:$src1)>;

}

}

defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;

1189

defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;

1190

defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;

1191

defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;

1192

defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;

1193

defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;

1194

defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;

1195

defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;

1196

defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;

1197

defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;

1198

defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;

1199

defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;

1200

defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;

1201

defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;

1202

defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;

1203

defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;

1204

1205

// zextload bool -> zextload byte

Elena Demikhovsky

e5bbca6

2016-02-25 07:05:12 +0000

[diff] [blame]

1206

// i1 stored in one byte in zero-extended form.

1207

// Upper bits cleanup should be executed before Store.

1208

def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;

1209

def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;

1210

def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1211

def : Pat<(zextloadi64i1 addr:$src),

Elena Demikhovsky

e5bbca6

2016-02-25 07:05:12 +0000

[diff] [blame]

1212

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1213

1214

// extload bool -> extload byte

1215

// When extloading from 16-bit and smaller memory locations into 64-bit

1216

// registers, use zero-extending loads so that the entire 64-bit register is

1217

// defined, avoiding partial-register updates.

1218

1219

def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;

1220

def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;

1221

def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;

1222

def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;

1223

def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;

1224

def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;

1225

1226

// For other extloads, use subregs, since the high contents of the register are

1227

// defined after an extload.

1228

def : Pat<(extloadi64i1 addr:$src),

1229

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

1230

def : Pat<(extloadi64i8 addr:$src),

1231

(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;

1232

def : Pat<(extloadi64i16 addr:$src),

1233

(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;

1234

def : Pat<(extloadi64i32 addr:$src),

1235

(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;

1236

1237

// anyext. Define these to do an explicit zero-extend to

1238

// avoid partial-register updates.

1239

def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG

1240

(MOVZX32rr8 GR8 :$src), sub_16bit)>;

1241

def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;

1242

1243

// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.

1244

def : Pat<(i32 (anyext GR16:$src)),

1245

(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;

1246

1247

def : Pat<(i64 (anyext GR8 :$src)),

1248

(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;

1249

def : Pat<(i64 (anyext GR16:$src)),

1250

(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;

1251

def : Pat<(i64 (anyext GR32:$src)),

1252

(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;

1253

1254

1255

// Any instruction that defines a 32-bit result leaves the high half of the

1256

// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may

1257

// be copying from a truncate. And x86's cmov doesn't do anything if the

1258

// condition is false. But any other 32-bit operation will zero-extend

1259

// up to 64 bits.

1260

def def32 : PatLeaf<(i32 GR32:$src), [{

1261

return N->getOpcode() != ISD::TRUNCATE &&

1262

N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&

1263

N->getOpcode() != ISD::CopyFromReg &&

1264

N->getOpcode() != ISD::AssertSext &&

1265

N->getOpcode() != X86ISD::CMOV;

1266

}]>;

1267

1268

// In the case of a 32-bit def that is known to implicitly zero-extend,

1269

// we can use a SUBREG_TO_REG.

1270

def : Pat<(i64 (zext def32:$src)),

1271

(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;

1272

1273

//===----------------------------------------------------------------------===//

1274

// Pattern match OR as ADD

1275

//===----------------------------------------------------------------------===//

1276

1277

// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be

1278

// 3-addressified into an LEA instruction to avoid copies. However, we also

1279

// want to finally emit these instructions as an or at the end of the code

1280

// generator to make the generated code easier to read. To do this, we select

1281

// into "disjoint bits" pseudo ops.

1282

1283

// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.

1284

def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{

1285

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))

1286

return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());

1287

1288

APInt KnownZero0, KnownOne0;

1289

CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);

1290

APInt KnownZero1, KnownOne1;

1291

CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);

1292

return (~KnownZero0 & ~KnownZero1) == 0;

}]>;

// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.

1297

// Try this before the selecting to OR.

1298

let AddedComplexity = 5, SchedRW = [WriteALU] in {

1299

1300

let isConvertibleToThreeAddress = 1,

1301

Constraints = "$src1 = $dst", Defs = [EFLAGS] in {

1302

let isCommutable = 1 in {

1303

def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),

1304

"", // orw/addw REG, REG

1305

[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;

1306

def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),

1307

"", // orl/addl REG, REG

1308

[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;

1309

def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),

1310

"", // orq/addq REG, REG

1311

[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;

1312

} // isCommutable

1313

1314

// NOTE: These are order specific, we want the ri8 forms to be listed

1315

// first so that they are slightly preferred to the ri forms.

1316

1317

def ADD16ri8_DB : I<0, Pseudo,

1318

(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),

1319

"", // orw/addw REG, imm8

1320

[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;

1321

def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),

1322

"", // orw/addw REG, imm

1323

[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;

1324

1325

def ADD32ri8_DB : I<0, Pseudo,

1326

(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),

1327

"", // orl/addl REG, imm8

1328

[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;

1329

def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),

1330

"", // orl/addl REG, imm

1331

[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;

1332

1333

1334

def ADD64ri8_DB : I<0, Pseudo,

1335

(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),

1336

"", // orq/addq REG, imm8

1337

[(set GR64:$dst, (or_is_add GR64:$src1,

1338

i64immSExt8:$src2))]>;

1339

def ADD64ri32_DB : I<0, Pseudo,

1340

(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),

1341

"", // orq/addq REG, imm

1342

[(set GR64:$dst, (or_is_add GR64:$src1,

1343

i64immSExt32:$src2))]>;

1344

}

1345

} // AddedComplexity, SchedRW

1346

1347

1348

//===----------------------------------------------------------------------===//

1349

// Some peepholes

1350

//===----------------------------------------------------------------------===//

1351

1352

// Odd encoding trick: -128 fits into an 8-bit immediate field while

1353

// +128 doesn't, so in this special case use a sub instead of an add.

1354

def : Pat<(add GR16:$src1, 128),

1355

(SUB16ri8 GR16:$src1, -128)>;

1356

def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),

1357

(SUB16mi8 addr:$dst, -128)>;

1358

1359

def : Pat<(add GR32:$src1, 128),

1360

(SUB32ri8 GR32:$src1, -128)>;

1361

def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),

1362

(SUB32mi8 addr:$dst, -128)>;

1363

1364

def : Pat<(add GR64:$src1, 128),

1365

(SUB64ri8 GR64:$src1, -128)>;

1366

def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),

1367

(SUB64mi8 addr:$dst, -128)>;

1368

1369

// The same trick applies for 32-bit immediate fields in 64-bit

1370

// instructions.

1371

def : Pat<(add GR64:$src1, 0x0000000080000000),

1372

(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;

1373

def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),

1374

(SUB64mi32 addr:$dst, 0xffffffff80000000)>;

1375

1376

// To avoid needing to materialize an immediate in a register, use a 32-bit and

1377

// with implicit zero-extension instead of a 64-bit and if the immediate has at

1378

// least 32 bits of leading zeros. If in addition the last 32 bits can be

1379

// represented with a sign extension of a 8 bit constant, use that.

Craig Topper

3d44178

2015-04-04 02:31:43 +0000

[diff] [blame]

1380

// This can also reduce instruction size by eliminating the need for the REX

1381

// prefix.

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1382

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1383

// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.

1384

let AddedComplexity = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1385

def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),

(SUBREG_TO_REG

(i64 0),

(AND32ri8

(EXTRACT_SUBREG GR64:$src, sub_32bit),

1390

(i32 (GetLo8XForm imm:$imm))),

1391

sub_32bit)>;

1392

1393

def : Pat<(and GR64:$src, i64immZExt32:$imm),

(SUBREG_TO_REG

(i64 0),

(AND32ri

(EXTRACT_SUBREG GR64:$src, sub_32bit),

1398

(i32 (GetLo32XForm imm:$imm))),

1399

sub_32bit)>;

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1400

} // AddedComplexity = 1

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1401

1402

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1403

// AddedComplexity is needed due to the increased complexity on the

1404

// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all

1405

// the MOVZX patterns keeps thems together in DAGIsel tables.

1406

let AddedComplexity = 1 in {

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1407

// r & (2^16-1) ==> movz

1408

def : Pat<(and GR32:$src1, 0xffff),

1409

(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;

1410

// r & (2^8-1) ==> movz

1411

def : Pat<(and GR32:$src1, 0xff),

1412

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,

1413

GR32_ABCD)),

1414

sub_8bit))>,

1415

Requires<[Not64BitMode]>;

1416

// r & (2^8-1) ==> movz

1417

def : Pat<(and GR16:$src1, 0xff),

1418

(EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG

1419

(i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),

1420

sub_16bit)>,

1421

Requires<[Not64BitMode]>;

1422

1423

// r & (2^32-1) ==> movz

1424

def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),

1425

(SUBREG_TO_REG (i64 0),

1426

(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),

1427

sub_32bit)>;

1428

// r & (2^16-1) ==> movz

1429

def : Pat<(and GR64:$src, 0xffff),

1430

(SUBREG_TO_REG (i64 0),

1431

(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),

1432

sub_32bit)>;

1433

// r & (2^8-1) ==> movz

1434

def : Pat<(and GR64:$src, 0xff),

1435

(SUBREG_TO_REG (i64 0),

1436

(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),

1437

sub_32bit)>;

1438

// r & (2^8-1) ==> movz

1439

def : Pat<(and GR32:$src1, 0xff),

1440

(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,

1441

Requires<[In64BitMode]>;

1442

// r & (2^8-1) ==> movz

1443

def : Pat<(and GR16:$src1, 0xff),

1444

(EXTRACT_SUBREG (MOVZX32rr8 (i8

1445

(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,

1446

Requires<[In64BitMode]>;

Craig Topper

2015-04-04 04:22:12 +0000

[diff] [blame]

1447

} // AddedComplexity = 1

Michael Kuperstein

2015-02-01 16:15:07 +0000

[diff] [blame]

1448

1449

1450

// sext_inreg patterns

1451

def : Pat<(sext_inreg GR32:$src, i16),

1452

(MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;

1453

def : Pat<(sext_inreg GR32:$src, i8),

1454

(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1455

GR32_ABCD)),

1456

sub_8bit))>,

1457

Requires<[Not64BitMode]>;

1458

1459

def : Pat<(sext_inreg GR16:$src, i8),

1460

(EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG

1461

(i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),

1462

sub_16bit)>,

1463

Requires<[Not64BitMode]>;

1464

1465

def : Pat<(sext_inreg GR64:$src, i32),

1466

(MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;

1467

def : Pat<(sext_inreg GR64:$src, i16),

1468

(MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;

1469

def : Pat<(sext_inreg GR64:$src, i8),

1470

(MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;

1471

def : Pat<(sext_inreg GR32:$src, i8),

1472

(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,

1473

Requires<[In64BitMode]>;

1474

def : Pat<(sext_inreg GR16:$src, i8),

1475

(EXTRACT_SUBREG (MOVSX32rr8

1476

(EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,

1477

Requires<[In64BitMode]>;

1478

1479

// sext, sext_load, zext, zext_load

1480

def: Pat<(i16 (sext GR8:$src)),

1481

(EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;

1482

def: Pat<(sextloadi16i8 addr:$src),

1483

(EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;

1484

def: Pat<(i16 (zext GR8:$src)),

1485

(EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;

1486

def: Pat<(zextloadi16i8 addr:$src),

1487

(EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;

1488

1489

// trunc patterns

1490

def : Pat<(i16 (trunc GR32:$src)),

1491

(EXTRACT_SUBREG GR32:$src, sub_16bit)>;

1492

def : Pat<(i8 (trunc GR32:$src)),

1493

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1494

sub_8bit)>,

1495

Requires<[Not64BitMode]>;

1496

def : Pat<(i8 (trunc GR16:$src)),

1497

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1498

sub_8bit)>,

1499

Requires<[Not64BitMode]>;

1500

def : Pat<(i32 (trunc GR64:$src)),

1501

(EXTRACT_SUBREG GR64:$src, sub_32bit)>;

1502

def : Pat<(i16 (trunc GR64:$src)),

1503

(EXTRACT_SUBREG GR64:$src, sub_16bit)>;

1504

def : Pat<(i8 (trunc GR64:$src)),

1505

(EXTRACT_SUBREG GR64:$src, sub_8bit)>;

1506

def : Pat<(i8 (trunc GR32:$src)),

1507

(EXTRACT_SUBREG GR32:$src, sub_8bit)>,

1508

Requires<[In64BitMode]>;

1509

def : Pat<(i8 (trunc GR16:$src)),

1510

(EXTRACT_SUBREG GR16:$src, sub_8bit)>,

1511

Requires<[In64BitMode]>;

1512

1513

// h-register tricks

1514

def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),

1515

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1516

sub_8bit_hi)>,

1517

Requires<[Not64BitMode]>;

1518

def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),

1519

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1520

sub_8bit_hi)>,

1521

Requires<[Not64BitMode]>;

1522

def : Pat<(srl GR16:$src, (i8 8)),

1523

(EXTRACT_SUBREG

1524

(MOVZX32rr8

1525

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1526

sub_8bit_hi)),

1527

sub_16bit)>,

1528

Requires<[Not64BitMode]>;

1529

def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),

1530

(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,

1531

GR16_ABCD)),

1532

sub_8bit_hi))>,

1533

Requires<[Not64BitMode]>;

1534

def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),

1535

(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,

1536

GR16_ABCD)),

1537

sub_8bit_hi))>,

1538

Requires<[Not64BitMode]>;

1539

def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

1540

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1541

GR32_ABCD)),

1542

sub_8bit_hi))>,

1543

Requires<[Not64BitMode]>;

1544

def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),

1545

(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1546

GR32_ABCD)),

1547

sub_8bit_hi))>,

1548

Requires<[Not64BitMode]>;

1549

1550

// h-register tricks.

1551

// For now, be conservative on x86-64 and use an h-register extract only if the

1552

// value is immediately zero-extended or stored, which are somewhat common

1553

// cases. This uses a bunch of code to prevent a register requiring a REX prefix

1554

// from being allocated in the same instruction as the h register, as there's

1555

// currently no way to describe this requirement to the register allocator.

1556

1557

// h-register extract and zero-extend.

1558

def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),

1563

sub_8bit_hi)),

1564

sub_32bit)>;

1565

def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),

1566

(MOVZX32_NOREXrr8

1567

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1568

sub_8bit_hi))>,

1569

Requires<[In64BitMode]>;

1570

def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),

1571

(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,

1572

GR32_ABCD)),

1573

sub_8bit_hi))>,

1574

Requires<[In64BitMode]>;

1575

def : Pat<(srl GR16:$src, (i8 8)),

1576

(EXTRACT_SUBREG

1577

(MOVZX32_NOREXrr8

1578

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1579

sub_8bit_hi)),

1580

sub_16bit)>,

1581

Requires<[In64BitMode]>;

1582

def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),

1583

(MOVZX32_NOREXrr8

1584

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1585

sub_8bit_hi))>,

1586

Requires<[In64BitMode]>;

1587

def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),

1588

(MOVZX32_NOREXrr8

1589

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1590

sub_8bit_hi))>,

1591

Requires<[In64BitMode]>;

1592

def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1597

sub_8bit_hi)),

1598

sub_32bit)>;

1599

def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),

(SUBREG_TO_REG

(i64 0),

(MOVZX32_NOREXrr8

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

sub_8bit_hi)),

sub_32bit)>;

// h-register extract and store.

1608

def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),

1609

(MOV8mr_NOREX

1610

addr:$dst,

1611

(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),

1612

sub_8bit_hi))>;

1613

def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),

1614

(MOV8mr_NOREX

1615

addr:$dst,

1616

(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),

1617

sub_8bit_hi))>,

1618

Requires<[In64BitMode]>;

1619

def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),

1620

(MOV8mr_NOREX

1621

addr:$dst,

1622

(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),

1623

sub_8bit_hi))>,

1624

Requires<[In64BitMode]>;

1625

1626

1627

// (shl x, 1) ==> (add x, x)

1628

// Note that if x is undef (immediate or otherwise), we could theoretically

1629

// end up with the two uses of x getting different values, producing a result

1630

// where the least significant bit is not 0. However, the probability of this

1631

// happening is considered low enough that this is officially not a

1632

// "real problem".

1633

def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;

1634

def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;

1635

def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;

1636

def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;

1637

1638

// Helper imms that check if a mask doesn't change significant shift bits.

Benjamin Kramer

5f6a907

2015-02-12 15:35:40 +0000

[diff] [blame]

1639

def immShift32 : ImmLeaf<i8, [{

1640

return countTrailingOnes<uint64_t>(Imm) >= 5;

1641

}]>;

1642

def immShift64 : ImmLeaf<i8, [{

1643

return countTrailingOnes<uint64_t>(Imm) >= 6;

1644

}]>;

Michael Kuperstein