blob: 19e019eaddcdc86b4082510c9d1aacdb27eb7153 [file] [log] [blame]
Robin Morissetdf205862014-09-02 22:16:29 +00001; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
2; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
Robin Morissetf9e87212014-10-08 19:38:18 +00003; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
Robin Morissetdf205862014-09-02 22:16:29 +00004
5; This file checks that atomic (non-seq_cst) stores of immediate values are
6; done in one mov instruction and not 2. More precisely, it makes sure that the
7; immediate is not first copied uselessly into a register.
8
9; Similarily, it checks that a binary operation of an immediate with an atomic
10; variable that is stored back in that variable is done as a single instruction.
11; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
12; should be just an add instruction, instead of loading x into a register, doing
13; an add and storing the result back.
14; The binary operations supported are currently add, and, or, xor.
15; sub is not supported because they are translated by an addition of the
16; negated immediate.
17; Finally, we also check the same kind of pattern for inc/dec
18
19; seq_cst stores are left as (lock) xchgl, but we try to check every other
20; attribute at least once.
21
22; Please note that these operations do not require the lock prefix: only
23; sequentially consistent stores require this kind of protection on X86.
24; And even for seq_cst operations, llvm uses the xchg instruction which has
25; an implicit lock prefix, so making it explicit is not required.
26
27define void @store_atomic_imm_8(i8* %p) {
28; X64-LABEL: store_atomic_imm_8
29; X64: movb
30; X64-NOT: movb
31; X32-LABEL: store_atomic_imm_8
32; X32: movb
33; X32-NOT: movb
34 store atomic i8 42, i8* %p release, align 1
35 ret void
36}
37
38define void @store_atomic_imm_16(i16* %p) {
39; X64-LABEL: store_atomic_imm_16
40; X64: movw
41; X64-NOT: movw
42; X32-LABEL: store_atomic_imm_16
43; X32: movw
44; X32-NOT: movw
45 store atomic i16 42, i16* %p monotonic, align 2
46 ret void
47}
48
49define void @store_atomic_imm_32(i32* %p) {
50; X64-LABEL: store_atomic_imm_32
51; X64: movl
52; X64-NOT: movl
53; On 32 bits, there is an extra movl for each of those functions
54; (probably for alignment reasons).
55; X32-LABEL: store_atomic_imm_32
56; X32: movl 4(%esp), %eax
57; X32: movl
58; X32-NOT: movl
59 store atomic i32 42, i32* %p release, align 4
60 ret void
61}
62
63define void @store_atomic_imm_64(i64* %p) {
64; X64-LABEL: store_atomic_imm_64
65; X64: movq
66; X64-NOT: movq
67; These are implemented with a CAS loop on 32 bit architectures, and thus
68; cannot be optimized in the same way as the others.
69; X32-LABEL: store_atomic_imm_64
70; X32: cmpxchg8b
71 store atomic i64 42, i64* %p release, align 8
72 ret void
73}
74
75; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
76; even on X64, one must use movabsq that can only target a register.
77define void @store_atomic_imm_64_big(i64* %p) {
78; X64-LABEL: store_atomic_imm_64_big
79; X64: movabsq
80; X64: movq
81 store atomic i64 100000000000, i64* %p monotonic, align 8
82 ret void
83}
84
85; It would be incorrect to replace a lock xchgl by a movl
86define void @store_atomic_imm_32_seq_cst(i32* %p) {
87; X64-LABEL: store_atomic_imm_32_seq_cst
88; X64: xchgl
89; X32-LABEL: store_atomic_imm_32_seq_cst
90; X32: xchgl
91 store atomic i32 42, i32* %p seq_cst, align 4
92 ret void
93}
94
95; ----- ADD -----
96
97define void @add_8(i8* %p) {
98; X64-LABEL: add_8
99; X64-NOT: lock
100; X64: addb
101; X64-NOT: movb
102; X32-LABEL: add_8
103; X32-NOT: lock
104; X32: addb
105; X32-NOT: movb
106 %1 = load atomic i8* %p seq_cst, align 1
107 %2 = add i8 %1, 2
108 store atomic i8 %2, i8* %p release, align 1
109 ret void
110}
111
112define void @add_16(i16* %p) {
113; Currently the transformation is not done on 16 bit accesses, as the backend
114; treat 16 bit arithmetic as expensive on X86/X86_64.
115; X64-LABEL: add_16
116; X64-NOT: addw
117; X32-LABEL: add_16
118; X32-NOT: addw
119 %1 = load atomic i16* %p acquire, align 2
120 %2 = add i16 %1, 2
121 store atomic i16 %2, i16* %p release, align 2
122 ret void
123}
124
125define void @add_32(i32* %p) {
126; X64-LABEL: add_32
127; X64-NOT: lock
128; X64: addl
129; X64-NOT: movl
130; X32-LABEL: add_32
131; X32-NOT: lock
132; X32: addl
133; X32-NOT: movl
134 %1 = load atomic i32* %p acquire, align 4
135 %2 = add i32 %1, 2
136 store atomic i32 %2, i32* %p monotonic, align 4
137 ret void
138}
139
140define void @add_64(i64* %p) {
141; X64-LABEL: add_64
142; X64-NOT: lock
143; X64: addq
144; X64-NOT: movq
145; We do not check X86-32 as it cannot do 'addq'.
146; X32-LABEL: add_64
147 %1 = load atomic i64* %p acquire, align 8
148 %2 = add i64 %1, 2
149 store atomic i64 %2, i64* %p release, align 8
150 ret void
151}
152
153define void @add_32_seq_cst(i32* %p) {
154; X64-LABEL: add_32_seq_cst
155; X64: xchgl
156; X32-LABEL: add_32_seq_cst
157; X32: xchgl
158 %1 = load atomic i32* %p monotonic, align 4
159 %2 = add i32 %1, 2
160 store atomic i32 %2, i32* %p seq_cst, align 4
161 ret void
162}
163
164; ----- AND -----
165
166define void @and_8(i8* %p) {
167; X64-LABEL: and_8
168; X64-NOT: lock
169; X64: andb
170; X64-NOT: movb
171; X32-LABEL: and_8
172; X32-NOT: lock
173; X32: andb
174; X32-NOT: movb
175 %1 = load atomic i8* %p monotonic, align 1
176 %2 = and i8 %1, 2
177 store atomic i8 %2, i8* %p release, align 1
178 ret void
179}
180
181define void @and_16(i16* %p) {
182; Currently the transformation is not done on 16 bit accesses, as the backend
183; treat 16 bit arithmetic as expensive on X86/X86_64.
184; X64-LABEL: and_16
185; X64-NOT: andw
186; X32-LABEL: and_16
187; X32-NOT: andw
188 %1 = load atomic i16* %p acquire, align 2
189 %2 = and i16 %1, 2
190 store atomic i16 %2, i16* %p release, align 2
191 ret void
192}
193
194define void @and_32(i32* %p) {
195; X64-LABEL: and_32
196; X64-NOT: lock
197; X64: andl
198; X64-NOT: movl
199; X32-LABEL: and_32
200; X32-NOT: lock
201; X32: andl
202; X32-NOT: movl
203 %1 = load atomic i32* %p acquire, align 4
204 %2 = and i32 %1, 2
205 store atomic i32 %2, i32* %p release, align 4
206 ret void
207}
208
209define void @and_64(i64* %p) {
210; X64-LABEL: and_64
211; X64-NOT: lock
212; X64: andq
213; X64-NOT: movq
214; We do not check X86-32 as it cannot do 'andq'.
215; X32-LABEL: and_64
216 %1 = load atomic i64* %p acquire, align 8
217 %2 = and i64 %1, 2
218 store atomic i64 %2, i64* %p release, align 8
219 ret void
220}
221
222define void @and_32_seq_cst(i32* %p) {
223; X64-LABEL: and_32_seq_cst
224; X64: xchgl
225; X32-LABEL: and_32_seq_cst
226; X32: xchgl
227 %1 = load atomic i32* %p monotonic, align 4
228 %2 = and i32 %1, 2
229 store atomic i32 %2, i32* %p seq_cst, align 4
230 ret void
231}
232
233; ----- OR -----
234
235define void @or_8(i8* %p) {
236; X64-LABEL: or_8
237; X64-NOT: lock
238; X64: orb
239; X64-NOT: movb
240; X32-LABEL: or_8
241; X32-NOT: lock
242; X32: orb
243; X32-NOT: movb
244 %1 = load atomic i8* %p acquire, align 1
245 %2 = or i8 %1, 2
246 store atomic i8 %2, i8* %p release, align 1
247 ret void
248}
249
250define void @or_16(i16* %p) {
251; X64-LABEL: or_16
252; X64-NOT: orw
253; X32-LABEL: or_16
254; X32-NOT: orw
255 %1 = load atomic i16* %p acquire, align 2
256 %2 = or i16 %1, 2
257 store atomic i16 %2, i16* %p release, align 2
258 ret void
259}
260
261define void @or_32(i32* %p) {
262; X64-LABEL: or_32
263; X64-NOT: lock
264; X64: orl
265; X64-NOT: movl
266; X32-LABEL: or_32
267; X32-NOT: lock
268; X32: orl
269; X32-NOT: movl
270 %1 = load atomic i32* %p acquire, align 4
271 %2 = or i32 %1, 2
272 store atomic i32 %2, i32* %p release, align 4
273 ret void
274}
275
276define void @or_64(i64* %p) {
277; X64-LABEL: or_64
278; X64-NOT: lock
279; X64: orq
280; X64-NOT: movq
281; We do not check X86-32 as it cannot do 'orq'.
282; X32-LABEL: or_64
283 %1 = load atomic i64* %p acquire, align 8
284 %2 = or i64 %1, 2
285 store atomic i64 %2, i64* %p release, align 8
286 ret void
287}
288
289define void @or_32_seq_cst(i32* %p) {
290; X64-LABEL: or_32_seq_cst
291; X64: xchgl
292; X32-LABEL: or_32_seq_cst
293; X32: xchgl
294 %1 = load atomic i32* %p monotonic, align 4
295 %2 = or i32 %1, 2
296 store atomic i32 %2, i32* %p seq_cst, align 4
297 ret void
298}
299
300; ----- XOR -----
301
302define void @xor_8(i8* %p) {
303; X64-LABEL: xor_8
304; X64-NOT: lock
305; X64: xorb
306; X64-NOT: movb
307; X32-LABEL: xor_8
308; X32-NOT: lock
309; X32: xorb
310; X32-NOT: movb
311 %1 = load atomic i8* %p acquire, align 1
312 %2 = xor i8 %1, 2
313 store atomic i8 %2, i8* %p release, align 1
314 ret void
315}
316
317define void @xor_16(i16* %p) {
318; X64-LABEL: xor_16
319; X64-NOT: xorw
320; X32-LABEL: xor_16
321; X32-NOT: xorw
322 %1 = load atomic i16* %p acquire, align 2
323 %2 = xor i16 %1, 2
324 store atomic i16 %2, i16* %p release, align 2
325 ret void
326}
327
328define void @xor_32(i32* %p) {
329; X64-LABEL: xor_32
330; X64-NOT: lock
331; X64: xorl
332; X64-NOT: movl
333; X32-LABEL: xor_32
334; X32-NOT: lock
335; X32: xorl
336; X32-NOT: movl
337 %1 = load atomic i32* %p acquire, align 4
338 %2 = xor i32 %1, 2
339 store atomic i32 %2, i32* %p release, align 4
340 ret void
341}
342
343define void @xor_64(i64* %p) {
344; X64-LABEL: xor_64
345; X64-NOT: lock
346; X64: xorq
347; X64-NOT: movq
348; We do not check X86-32 as it cannot do 'xorq'.
349; X32-LABEL: xor_64
350 %1 = load atomic i64* %p acquire, align 8
351 %2 = xor i64 %1, 2
352 store atomic i64 %2, i64* %p release, align 8
353 ret void
354}
355
356define void @xor_32_seq_cst(i32* %p) {
357; X64-LABEL: xor_32_seq_cst
358; X64: xchgl
359; X32-LABEL: xor_32_seq_cst
360; X32: xchgl
361 %1 = load atomic i32* %p monotonic, align 4
362 %2 = xor i32 %1, 2
363 store atomic i32 %2, i32* %p seq_cst, align 4
364 ret void
365}
366
367; ----- INC -----
368
369define void @inc_8(i8* %p) {
370; X64-LABEL: inc_8
371; X64-NOT: lock
372; X64: incb
373; X64-NOT: movb
374; X32-LABEL: inc_8
375; X32-NOT: lock
376; X32: incb
377; X32-NOT: movb
Robin Morissetf9e87212014-10-08 19:38:18 +0000378; SLOW_INC-LABEL: inc_8
379; SLOW_INC-NOT: incb
380; SLOW_INC-NOT: movb
Robin Morissetdf205862014-09-02 22:16:29 +0000381 %1 = load atomic i8* %p seq_cst, align 1
382 %2 = add i8 %1, 1
383 store atomic i8 %2, i8* %p release, align 1
384 ret void
385}
386
387define void @inc_16(i16* %p) {
388; Currently the transformation is not done on 16 bit accesses, as the backend
389; treat 16 bit arithmetic as expensive on X86/X86_64.
390; X64-LABEL: inc_16
391; X64-NOT: incw
392; X32-LABEL: inc_16
393; X32-NOT: incw
Robin Morissetf9e87212014-10-08 19:38:18 +0000394; SLOW_INC-LABEL: inc_16
395; SLOW_INC-NOT: incw
Robin Morissetdf205862014-09-02 22:16:29 +0000396 %1 = load atomic i16* %p acquire, align 2
397 %2 = add i16 %1, 1
398 store atomic i16 %2, i16* %p release, align 2
399 ret void
400}
401
402define void @inc_32(i32* %p) {
403; X64-LABEL: inc_32
404; X64-NOT: lock
405; X64: incl
406; X64-NOT: movl
407; X32-LABEL: inc_32
408; X32-NOT: lock
409; X32: incl
410; X32-NOT: movl
Robin Morissetf9e87212014-10-08 19:38:18 +0000411; SLOW_INC-LABEL: inc_32
412; SLOW_INC-NOT: incl
413; SLOW_INC-NOT: movl
Robin Morissetdf205862014-09-02 22:16:29 +0000414 %1 = load atomic i32* %p acquire, align 4
415 %2 = add i32 %1, 1
416 store atomic i32 %2, i32* %p monotonic, align 4
417 ret void
418}
419
420define void @inc_64(i64* %p) {
421; X64-LABEL: inc_64
422; X64-NOT: lock
423; X64: incq
424; X64-NOT: movq
425; We do not check X86-32 as it cannot do 'incq'.
426; X32-LABEL: inc_64
Robin Morissetf9e87212014-10-08 19:38:18 +0000427; SLOW_INC-LABEL: inc_64
428; SLOW_INC-NOT: incq
429; SLOW_INC-NOT: movq
Robin Morissetdf205862014-09-02 22:16:29 +0000430 %1 = load atomic i64* %p acquire, align 8
431 %2 = add i64 %1, 1
432 store atomic i64 %2, i64* %p release, align 8
433 ret void
434}
435
436define void @inc_32_seq_cst(i32* %p) {
437; X64-LABEL: inc_32_seq_cst
438; X64: xchgl
439; X32-LABEL: inc_32_seq_cst
440; X32: xchgl
441 %1 = load atomic i32* %p monotonic, align 4
442 %2 = add i32 %1, 1
443 store atomic i32 %2, i32* %p seq_cst, align 4
444 ret void
445}
446
447; ----- DEC -----
448
449define void @dec_8(i8* %p) {
450; X64-LABEL: dec_8
451; X64-NOT: lock
452; X64: decb
453; X64-NOT: movb
454; X32-LABEL: dec_8
455; X32-NOT: lock
456; X32: decb
457; X32-NOT: movb
Robin Morissetf9e87212014-10-08 19:38:18 +0000458; SLOW_INC-LABEL: dec_8
459; SLOW_INC-NOT: decb
460; SLOW_INC-NOT: movb
Robin Morissetdf205862014-09-02 22:16:29 +0000461 %1 = load atomic i8* %p seq_cst, align 1
462 %2 = sub i8 %1, 1
463 store atomic i8 %2, i8* %p release, align 1
464 ret void
465}
466
467define void @dec_16(i16* %p) {
468; Currently the transformation is not done on 16 bit accesses, as the backend
469; treat 16 bit arithmetic as expensive on X86/X86_64.
470; X64-LABEL: dec_16
471; X64-NOT: decw
472; X32-LABEL: dec_16
473; X32-NOT: decw
Robin Morissetf9e87212014-10-08 19:38:18 +0000474; SLOW_INC-LABEL: dec_16
475; SLOW_INC-NOT: decw
Robin Morissetdf205862014-09-02 22:16:29 +0000476 %1 = load atomic i16* %p acquire, align 2
477 %2 = sub i16 %1, 1
478 store atomic i16 %2, i16* %p release, align 2
479 ret void
480}
481
482define void @dec_32(i32* %p) {
483; X64-LABEL: dec_32
484; X64-NOT: lock
485; X64: decl
486; X64-NOT: movl
487; X32-LABEL: dec_32
488; X32-NOT: lock
489; X32: decl
490; X32-NOT: movl
Robin Morissetf9e87212014-10-08 19:38:18 +0000491; SLOW_INC-LABEL: dec_32
492; SLOW_INC-NOT: decl
493; SLOW_INC-NOT: movl
Robin Morissetdf205862014-09-02 22:16:29 +0000494 %1 = load atomic i32* %p acquire, align 4
495 %2 = sub i32 %1, 1
496 store atomic i32 %2, i32* %p monotonic, align 4
497 ret void
498}
499
500define void @dec_64(i64* %p) {
501; X64-LABEL: dec_64
502; X64-NOT: lock
503; X64: decq
504; X64-NOT: movq
505; We do not check X86-32 as it cannot do 'decq'.
506; X32-LABEL: dec_64
Robin Morissetf9e87212014-10-08 19:38:18 +0000507; SLOW_INC-LABEL: dec_64
508; SLOW_INC-NOT: decq
509; SLOW_INC-NOT: movq
Robin Morissetdf205862014-09-02 22:16:29 +0000510 %1 = load atomic i64* %p acquire, align 8
511 %2 = sub i64 %1, 1
512 store atomic i64 %2, i64* %p release, align 8
513 ret void
514}
515
516define void @dec_32_seq_cst(i32* %p) {
517; X64-LABEL: dec_32_seq_cst
518; X64: xchgl
519; X32-LABEL: dec_32_seq_cst
520; X32: xchgl
521 %1 = load atomic i32* %p monotonic, align 4
522 %2 = sub i32 %1, 1
523 store atomic i32 %2, i32* %p seq_cst, align 4
524 ret void
525}