blob: e9f1b59ac58980371ef6488d7eaf15ab96226e86 [file] [log] [blame]
Reid Kleckner10cac7a2015-08-05 22:27:08 +00001; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64
2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
Robin Morissetdf205862014-09-02 22:16:29 +00004
5; This file checks that atomic (non-seq_cst) stores of immediate values are
6; done in one mov instruction and not 2. More precisely, it makes sure that the
7; immediate is not first copied uselessly into a register.
8
9; Similarily, it checks that a binary operation of an immediate with an atomic
10; variable that is stored back in that variable is done as a single instruction.
11; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
12; should be just an add instruction, instead of loading x into a register, doing
13; an add and storing the result back.
14; The binary operations supported are currently add, and, or, xor.
15; sub is not supported because they are translated by an addition of the
16; negated immediate.
JF Bastien86620832015-08-05 21:04:59 +000017;
18; We also check the same patterns:
19; - For inc/dec.
20; - For register instead of immediate operands.
21; - For floating point operations.
Robin Morissetdf205862014-09-02 22:16:29 +000022
23; seq_cst stores are left as (lock) xchgl, but we try to check every other
24; attribute at least once.
25
26; Please note that these operations do not require the lock prefix: only
27; sequentially consistent stores require this kind of protection on X86.
28; And even for seq_cst operations, llvm uses the xchg instruction which has
29; an implicit lock prefix, so making it explicit is not required.
30
31define void @store_atomic_imm_8(i8* %p) {
JF Bastien86620832015-08-05 21:04:59 +000032; X64-LABEL: store_atomic_imm_8:
Robin Morissetdf205862014-09-02 22:16:29 +000033; X64: movb
34; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +000035; X32-LABEL: store_atomic_imm_8:
Robin Morissetdf205862014-09-02 22:16:29 +000036; X32: movb
37; X32-NOT: movb
38 store atomic i8 42, i8* %p release, align 1
39 ret void
40}
41
42define void @store_atomic_imm_16(i16* %p) {
JF Bastien86620832015-08-05 21:04:59 +000043; X64-LABEL: store_atomic_imm_16:
Robin Morissetdf205862014-09-02 22:16:29 +000044; X64: movw
45; X64-NOT: movw
JF Bastien86620832015-08-05 21:04:59 +000046; X32-LABEL: store_atomic_imm_16:
Robin Morissetdf205862014-09-02 22:16:29 +000047; X32: movw
48; X32-NOT: movw
49 store atomic i16 42, i16* %p monotonic, align 2
50 ret void
51}
52
53define void @store_atomic_imm_32(i32* %p) {
JF Bastien86620832015-08-05 21:04:59 +000054; X64-LABEL: store_atomic_imm_32:
Robin Morissetdf205862014-09-02 22:16:29 +000055; X64: movl
56; X64-NOT: movl
57; On 32 bits, there is an extra movl for each of those functions
58; (probably for alignment reasons).
JF Bastien86620832015-08-05 21:04:59 +000059; X32-LABEL: store_atomic_imm_32:
Robin Morissetdf205862014-09-02 22:16:29 +000060; X32: movl 4(%esp), %eax
61; X32: movl
62; X32-NOT: movl
63 store atomic i32 42, i32* %p release, align 4
64 ret void
65}
66
67define void @store_atomic_imm_64(i64* %p) {
JF Bastien86620832015-08-05 21:04:59 +000068; X64-LABEL: store_atomic_imm_64:
Robin Morissetdf205862014-09-02 22:16:29 +000069; X64: movq
70; X64-NOT: movq
71; These are implemented with a CAS loop on 32 bit architectures, and thus
72; cannot be optimized in the same way as the others.
JF Bastien86620832015-08-05 21:04:59 +000073; X32-LABEL: store_atomic_imm_64:
Robin Morissetdf205862014-09-02 22:16:29 +000074; X32: cmpxchg8b
75 store atomic i64 42, i64* %p release, align 8
76 ret void
77}
78
79; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
80; even on X64, one must use movabsq that can only target a register.
81define void @store_atomic_imm_64_big(i64* %p) {
JF Bastien86620832015-08-05 21:04:59 +000082; X64-LABEL: store_atomic_imm_64_big:
Robin Morissetdf205862014-09-02 22:16:29 +000083; X64: movabsq
84; X64: movq
85 store atomic i64 100000000000, i64* %p monotonic, align 8
86 ret void
87}
88
89; It would be incorrect to replace a lock xchgl by a movl
90define void @store_atomic_imm_32_seq_cst(i32* %p) {
JF Bastien86620832015-08-05 21:04:59 +000091; X64-LABEL: store_atomic_imm_32_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +000092; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +000093; X32-LABEL: store_atomic_imm_32_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +000094; X32: xchgl
95 store atomic i32 42, i32* %p seq_cst, align 4
96 ret void
97}
98
99; ----- ADD -----
100
JF Bastien86620832015-08-05 21:04:59 +0000101define void @add_8i(i8* %p) {
102; X64-LABEL: add_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000103; X64-NOT: lock
104; X64: addb
105; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000106; X32-LABEL: add_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000107; X32-NOT: lock
108; X32: addb
109; X32-NOT: movb
David Blaikiea79ac142015-02-27 21:17:42 +0000110 %1 = load atomic i8, i8* %p seq_cst, align 1
Robin Morissetdf205862014-09-02 22:16:29 +0000111 %2 = add i8 %1, 2
112 store atomic i8 %2, i8* %p release, align 1
113 ret void
114}
115
JF Bastien86620832015-08-05 21:04:59 +0000116define void @add_8r(i8* %p, i8 %v) {
117; X64-LABEL: add_8r:
118; X64-NOT: lock
119; X64: addb
120; X64-NOT: movb
121; X32-LABEL: add_8r:
122; X32-NOT: lock
123; X32: addb
124; X32-NOT: movb
125 %1 = load atomic i8, i8* %p seq_cst, align 1
126 %2 = add i8 %1, %v
127 store atomic i8 %2, i8* %p release, align 1
128 ret void
129}
130
131define void @add_16i(i16* %p) {
Robin Morissetdf205862014-09-02 22:16:29 +0000132; Currently the transformation is not done on 16 bit accesses, as the backend
133; treat 16 bit arithmetic as expensive on X86/X86_64.
JF Bastien86620832015-08-05 21:04:59 +0000134; X64-LABEL: add_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000135; X64-NOT: addw
JF Bastien86620832015-08-05 21:04:59 +0000136; X32-LABEL: add_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000137; X32-NOT: addw
David Blaikiea79ac142015-02-27 21:17:42 +0000138 %1 = load atomic i16, i16* %p acquire, align 2
Robin Morissetdf205862014-09-02 22:16:29 +0000139 %2 = add i16 %1, 2
140 store atomic i16 %2, i16* %p release, align 2
141 ret void
142}
143
JF Bastien86620832015-08-05 21:04:59 +0000144define void @add_16r(i16* %p, i16 %v) {
145; Currently the transformation is not done on 16 bit accesses, as the backend
146; treat 16 bit arithmetic as expensive on X86/X86_64.
147; X64-LABEL: add_16r:
148; X64-NOT: addw
149; X32-LABEL: add_16r:
150; X32-NOT: addw [.*], (
151 %1 = load atomic i16, i16* %p acquire, align 2
152 %2 = add i16 %1, %v
153 store atomic i16 %2, i16* %p release, align 2
154 ret void
155}
156
157define void @add_32i(i32* %p) {
158; X64-LABEL: add_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000159; X64-NOT: lock
160; X64: addl
161; X64-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000162; X32-LABEL: add_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000163; X32-NOT: lock
164; X32: addl
165; X32-NOT: movl
David Blaikiea79ac142015-02-27 21:17:42 +0000166 %1 = load atomic i32, i32* %p acquire, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000167 %2 = add i32 %1, 2
168 store atomic i32 %2, i32* %p monotonic, align 4
169 ret void
170}
171
JF Bastien86620832015-08-05 21:04:59 +0000172define void @add_32r(i32* %p, i32 %v) {
173; X64-LABEL: add_32r:
174; X64-NOT: lock
175; X64: addl
176; X64-NOT: movl
177; X32-LABEL: add_32r:
178; X32-NOT: lock
179; X32: addl
180; X32-NOT: movl
181 %1 = load atomic i32, i32* %p acquire, align 4
182 %2 = add i32 %1, %v
183 store atomic i32 %2, i32* %p monotonic, align 4
184 ret void
185}
186
187; The following is a corner case where the load is added to itself. The pattern
188; matching should not fold this. We only test with 32-bit add, but the same
189; applies to other sizes and operations.
190define void @add_32r_self(i32* %p) {
191; X64-LABEL: add_32r_self:
192; X64-NOT: lock
193; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
194; X64: addl %[[R]], %[[R]]
195; X64: movl %[[R]], (%[[M]])
196; X32-LABEL: add_32r_self:
197; X32-NOT: lock
198; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
199; X32: addl %[[R]], %[[R]]
200; X32: movl %[[R]], (%[[M]])
201 %1 = load atomic i32, i32* %p acquire, align 4
202 %2 = add i32 %1, %1
203 store atomic i32 %2, i32* %p monotonic, align 4
204 ret void
205}
206
207; The following is a corner case where the load's result is returned. The
208; optimizer isn't allowed to duplicate the load because it's atomic.
209define i32 @add_32r_ret_load(i32* %p, i32 %v) {
210; X64-LABEL: add_32r_ret_load:
211; X64-NOT: lock
212; X64: movl (%rdi), %eax
Craig Topper2c4068f2015-10-06 05:39:59 +0000213; X64-NEXT: addl %eax, %esi
214; X64-NEXT: movl %esi, (%rdi)
JF Bastien86620832015-08-05 21:04:59 +0000215; X64-NEXT: retq
216; X32-LABEL: add_32r_ret_load:
217; X32-NOT: lock
218; X32: movl 4(%esp), %[[P:[a-z]+]]
219; X32-NEXT: movl (%[[P]]),
220; X32-NOT: %[[P]]
221; More code here, we just don't want it to load from P.
222; X32: movl %{{.*}}, (%[[P]])
223; X32-NEXT: retl
224 %1 = load atomic i32, i32* %p acquire, align 4
225 %2 = add i32 %1, %v
226 store atomic i32 %2, i32* %p monotonic, align 4
227 ret i32 %1
228}
229
230define void @add_64i(i64* %p) {
231; X64-LABEL: add_64i:
Robin Morissetdf205862014-09-02 22:16:29 +0000232; X64-NOT: lock
233; X64: addq
234; X64-NOT: movq
235; We do not check X86-32 as it cannot do 'addq'.
JF Bastien86620832015-08-05 21:04:59 +0000236; X32-LABEL: add_64i:
David Blaikiea79ac142015-02-27 21:17:42 +0000237 %1 = load atomic i64, i64* %p acquire, align 8
Robin Morissetdf205862014-09-02 22:16:29 +0000238 %2 = add i64 %1, 2
239 store atomic i64 %2, i64* %p release, align 8
240 ret void
241}
242
JF Bastien86620832015-08-05 21:04:59 +0000243define void @add_64r(i64* %p, i64 %v) {
244; X64-LABEL: add_64r:
245; X64-NOT: lock
246; X64: addq
247; X64-NOT: movq
248; We do not check X86-32 as it cannot do 'addq'.
249; X32-LABEL: add_64r:
250 %1 = load atomic i64, i64* %p acquire, align 8
251 %2 = add i64 %1, %v
252 store atomic i64 %2, i64* %p release, align 8
253 ret void
254}
255
256define void @add_32i_seq_cst(i32* %p) {
257; X64-LABEL: add_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000258; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +0000259; X32-LABEL: add_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000260; X32: xchgl
David Blaikiea79ac142015-02-27 21:17:42 +0000261 %1 = load atomic i32, i32* %p monotonic, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000262 %2 = add i32 %1, 2
263 store atomic i32 %2, i32* %p seq_cst, align 4
264 ret void
265}
266
JF Bastien86620832015-08-05 21:04:59 +0000267define void @add_32r_seq_cst(i32* %p, i32 %v) {
268; X64-LABEL: add_32r_seq_cst:
269; X64: xchgl
270; X32-LABEL: add_32r_seq_cst:
271; X32: xchgl
272 %1 = load atomic i32, i32* %p monotonic, align 4
273 %2 = add i32 %1, %v
274 store atomic i32 %2, i32* %p seq_cst, align 4
275 ret void
276}
277
Robin Morissetdf205862014-09-02 22:16:29 +0000278; ----- AND -----
279
JF Bastien86620832015-08-05 21:04:59 +0000280define void @and_8i(i8* %p) {
281; X64-LABEL: and_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000282; X64-NOT: lock
283; X64: andb
284; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000285; X32-LABEL: and_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000286; X32-NOT: lock
287; X32: andb
288; X32-NOT: movb
David Blaikiea79ac142015-02-27 21:17:42 +0000289 %1 = load atomic i8, i8* %p monotonic, align 1
Robin Morissetdf205862014-09-02 22:16:29 +0000290 %2 = and i8 %1, 2
291 store atomic i8 %2, i8* %p release, align 1
292 ret void
293}
294
JF Bastien86620832015-08-05 21:04:59 +0000295define void @and_8r(i8* %p, i8 %v) {
296; X64-LABEL: and_8r:
297; X64-NOT: lock
298; X64: andb
299; X64-NOT: movb
300; X32-LABEL: and_8r:
301; X32-NOT: lock
302; X32: andb
303; X32-NOT: movb
304 %1 = load atomic i8, i8* %p monotonic, align 1
305 %2 = and i8 %1, %v
306 store atomic i8 %2, i8* %p release, align 1
307 ret void
308}
309
310define void @and_16i(i16* %p) {
Robin Morissetdf205862014-09-02 22:16:29 +0000311; Currently the transformation is not done on 16 bit accesses, as the backend
312; treat 16 bit arithmetic as expensive on X86/X86_64.
JF Bastien86620832015-08-05 21:04:59 +0000313; X64-LABEL: and_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000314; X64-NOT: andw
JF Bastien86620832015-08-05 21:04:59 +0000315; X32-LABEL: and_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000316; X32-NOT: andw
David Blaikiea79ac142015-02-27 21:17:42 +0000317 %1 = load atomic i16, i16* %p acquire, align 2
Robin Morissetdf205862014-09-02 22:16:29 +0000318 %2 = and i16 %1, 2
319 store atomic i16 %2, i16* %p release, align 2
320 ret void
321}
322
JF Bastien86620832015-08-05 21:04:59 +0000323define void @and_16r(i16* %p, i16 %v) {
324; Currently the transformation is not done on 16 bit accesses, as the backend
325; treat 16 bit arithmetic as expensive on X86/X86_64.
326; X64-LABEL: and_16r:
327; X64-NOT: andw
328; X32-LABEL: and_16r:
329; X32-NOT: andw [.*], (
330 %1 = load atomic i16, i16* %p acquire, align 2
331 %2 = and i16 %1, %v
332 store atomic i16 %2, i16* %p release, align 2
333 ret void
334}
335
336define void @and_32i(i32* %p) {
337; X64-LABEL: and_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000338; X64-NOT: lock
339; X64: andl
340; X64-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000341; X32-LABEL: and_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000342; X32-NOT: lock
343; X32: andl
344; X32-NOT: movl
David Blaikiea79ac142015-02-27 21:17:42 +0000345 %1 = load atomic i32, i32* %p acquire, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000346 %2 = and i32 %1, 2
347 store atomic i32 %2, i32* %p release, align 4
348 ret void
349}
350
JF Bastien86620832015-08-05 21:04:59 +0000351define void @and_32r(i32* %p, i32 %v) {
352; X64-LABEL: and_32r:
353; X64-NOT: lock
354; X64: andl
355; X64-NOT: movl
356; X32-LABEL: and_32r:
357; X32-NOT: lock
358; X32: andl
359; X32-NOT: movl
360 %1 = load atomic i32, i32* %p acquire, align 4
361 %2 = and i32 %1, %v
362 store atomic i32 %2, i32* %p release, align 4
363 ret void
364}
365
366define void @and_64i(i64* %p) {
367; X64-LABEL: and_64i:
Robin Morissetdf205862014-09-02 22:16:29 +0000368; X64-NOT: lock
369; X64: andq
370; X64-NOT: movq
371; We do not check X86-32 as it cannot do 'andq'.
JF Bastien86620832015-08-05 21:04:59 +0000372; X32-LABEL: and_64i:
David Blaikiea79ac142015-02-27 21:17:42 +0000373 %1 = load atomic i64, i64* %p acquire, align 8
Robin Morissetdf205862014-09-02 22:16:29 +0000374 %2 = and i64 %1, 2
375 store atomic i64 %2, i64* %p release, align 8
376 ret void
377}
378
JF Bastien86620832015-08-05 21:04:59 +0000379define void @and_64r(i64* %p, i64 %v) {
380; X64-LABEL: and_64r:
381; X64-NOT: lock
382; X64: andq
383; X64-NOT: movq
384; We do not check X86-32 as it cannot do 'andq'.
385; X32-LABEL: and_64r:
386 %1 = load atomic i64, i64* %p acquire, align 8
387 %2 = and i64 %1, %v
388 store atomic i64 %2, i64* %p release, align 8
389 ret void
390}
391
392define void @and_32i_seq_cst(i32* %p) {
393; X64-LABEL: and_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000394; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +0000395; X32-LABEL: and_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000396; X32: xchgl
David Blaikiea79ac142015-02-27 21:17:42 +0000397 %1 = load atomic i32, i32* %p monotonic, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000398 %2 = and i32 %1, 2
399 store atomic i32 %2, i32* %p seq_cst, align 4
400 ret void
401}
402
JF Bastien86620832015-08-05 21:04:59 +0000403define void @and_32r_seq_cst(i32* %p, i32 %v) {
404; X64-LABEL: and_32r_seq_cst:
405; X64: xchgl
406; X32-LABEL: and_32r_seq_cst:
407; X32: xchgl
408 %1 = load atomic i32, i32* %p monotonic, align 4
409 %2 = and i32 %1, %v
410 store atomic i32 %2, i32* %p seq_cst, align 4
411 ret void
412}
413
Robin Morissetdf205862014-09-02 22:16:29 +0000414; ----- OR -----
415
JF Bastien86620832015-08-05 21:04:59 +0000416define void @or_8i(i8* %p) {
417; X64-LABEL: or_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000418; X64-NOT: lock
419; X64: orb
420; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000421; X32-LABEL: or_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000422; X32-NOT: lock
423; X32: orb
424; X32-NOT: movb
David Blaikiea79ac142015-02-27 21:17:42 +0000425 %1 = load atomic i8, i8* %p acquire, align 1
Robin Morissetdf205862014-09-02 22:16:29 +0000426 %2 = or i8 %1, 2
427 store atomic i8 %2, i8* %p release, align 1
428 ret void
429}
430
JF Bastien86620832015-08-05 21:04:59 +0000431define void @or_8r(i8* %p, i8 %v) {
432; X64-LABEL: or_8r:
433; X64-NOT: lock
434; X64: orb
435; X64-NOT: movb
436; X32-LABEL: or_8r:
437; X32-NOT: lock
438; X32: orb
439; X32-NOT: movb
440 %1 = load atomic i8, i8* %p acquire, align 1
441 %2 = or i8 %1, %v
442 store atomic i8 %2, i8* %p release, align 1
443 ret void
444}
445
446define void @or_16i(i16* %p) {
447; X64-LABEL: or_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000448; X64-NOT: orw
JF Bastien86620832015-08-05 21:04:59 +0000449; X32-LABEL: or_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000450; X32-NOT: orw
David Blaikiea79ac142015-02-27 21:17:42 +0000451 %1 = load atomic i16, i16* %p acquire, align 2
Robin Morissetdf205862014-09-02 22:16:29 +0000452 %2 = or i16 %1, 2
453 store atomic i16 %2, i16* %p release, align 2
454 ret void
455}
456
JF Bastien86620832015-08-05 21:04:59 +0000457define void @or_16r(i16* %p, i16 %v) {
458; X64-LABEL: or_16r:
459; X64-NOT: orw
460; X32-LABEL: or_16r:
461; X32-NOT: orw [.*], (
462 %1 = load atomic i16, i16* %p acquire, align 2
463 %2 = or i16 %1, %v
464 store atomic i16 %2, i16* %p release, align 2
465 ret void
466}
467
468define void @or_32i(i32* %p) {
469; X64-LABEL: or_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000470; X64-NOT: lock
471; X64: orl
472; X64-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000473; X32-LABEL: or_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000474; X32-NOT: lock
475; X32: orl
476; X32-NOT: movl
David Blaikiea79ac142015-02-27 21:17:42 +0000477 %1 = load atomic i32, i32* %p acquire, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000478 %2 = or i32 %1, 2
479 store atomic i32 %2, i32* %p release, align 4
480 ret void
481}
482
JF Bastien86620832015-08-05 21:04:59 +0000483define void @or_32r(i32* %p, i32 %v) {
484; X64-LABEL: or_32r:
485; X64-NOT: lock
486; X64: orl
487; X64-NOT: movl
488; X32-LABEL: or_32r:
489; X32-NOT: lock
490; X32: orl
491; X32-NOT: movl
492 %1 = load atomic i32, i32* %p acquire, align 4
493 %2 = or i32 %1, %v
494 store atomic i32 %2, i32* %p release, align 4
495 ret void
496}
497
498define void @or_64i(i64* %p) {
499; X64-LABEL: or_64i:
Robin Morissetdf205862014-09-02 22:16:29 +0000500; X64-NOT: lock
501; X64: orq
502; X64-NOT: movq
503; We do not check X86-32 as it cannot do 'orq'.
JF Bastien86620832015-08-05 21:04:59 +0000504; X32-LABEL: or_64i:
David Blaikiea79ac142015-02-27 21:17:42 +0000505 %1 = load atomic i64, i64* %p acquire, align 8
Robin Morissetdf205862014-09-02 22:16:29 +0000506 %2 = or i64 %1, 2
507 store atomic i64 %2, i64* %p release, align 8
508 ret void
509}
510
JF Bastien86620832015-08-05 21:04:59 +0000511define void @or_64r(i64* %p, i64 %v) {
512; X64-LABEL: or_64r:
513; X64-NOT: lock
514; X64: orq
515; X64-NOT: movq
516; We do not check X86-32 as it cannot do 'orq'.
517; X32-LABEL: or_64r:
518 %1 = load atomic i64, i64* %p acquire, align 8
519 %2 = or i64 %1, %v
520 store atomic i64 %2, i64* %p release, align 8
521 ret void
522}
523
524define void @or_32i_seq_cst(i32* %p) {
525; X64-LABEL: or_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000526; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +0000527; X32-LABEL: or_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000528; X32: xchgl
David Blaikiea79ac142015-02-27 21:17:42 +0000529 %1 = load atomic i32, i32* %p monotonic, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000530 %2 = or i32 %1, 2
531 store atomic i32 %2, i32* %p seq_cst, align 4
532 ret void
533}
534
JF Bastien86620832015-08-05 21:04:59 +0000535define void @or_32r_seq_cst(i32* %p, i32 %v) {
536; X64-LABEL: or_32r_seq_cst:
537; X64: xchgl
538; X32-LABEL: or_32r_seq_cst:
539; X32: xchgl
540 %1 = load atomic i32, i32* %p monotonic, align 4
541 %2 = or i32 %1, %v
542 store atomic i32 %2, i32* %p seq_cst, align 4
543 ret void
544}
545
Robin Morissetdf205862014-09-02 22:16:29 +0000546; ----- XOR -----
547
JF Bastien86620832015-08-05 21:04:59 +0000548define void @xor_8i(i8* %p) {
549; X64-LABEL: xor_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000550; X64-NOT: lock
551; X64: xorb
552; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000553; X32-LABEL: xor_8i:
Robin Morissetdf205862014-09-02 22:16:29 +0000554; X32-NOT: lock
555; X32: xorb
556; X32-NOT: movb
David Blaikiea79ac142015-02-27 21:17:42 +0000557 %1 = load atomic i8, i8* %p acquire, align 1
Robin Morissetdf205862014-09-02 22:16:29 +0000558 %2 = xor i8 %1, 2
559 store atomic i8 %2, i8* %p release, align 1
560 ret void
561}
562
JF Bastien86620832015-08-05 21:04:59 +0000563define void @xor_8r(i8* %p, i8 %v) {
564; X64-LABEL: xor_8r:
565; X64-NOT: lock
566; X64: xorb
567; X64-NOT: movb
568; X32-LABEL: xor_8r:
569; X32-NOT: lock
570; X32: xorb
571; X32-NOT: movb
572 %1 = load atomic i8, i8* %p acquire, align 1
573 %2 = xor i8 %1, %v
574 store atomic i8 %2, i8* %p release, align 1
575 ret void
576}
577
578define void @xor_16i(i16* %p) {
579; X64-LABEL: xor_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000580; X64-NOT: xorw
JF Bastien86620832015-08-05 21:04:59 +0000581; X32-LABEL: xor_16i:
Robin Morissetdf205862014-09-02 22:16:29 +0000582; X32-NOT: xorw
David Blaikiea79ac142015-02-27 21:17:42 +0000583 %1 = load atomic i16, i16* %p acquire, align 2
Robin Morissetdf205862014-09-02 22:16:29 +0000584 %2 = xor i16 %1, 2
585 store atomic i16 %2, i16* %p release, align 2
586 ret void
587}
588
JF Bastien86620832015-08-05 21:04:59 +0000589define void @xor_16r(i16* %p, i16 %v) {
590; X64-LABEL: xor_16r:
591; X64-NOT: xorw
592; X32-LABEL: xor_16r:
593; X32-NOT: xorw [.*], (
594 %1 = load atomic i16, i16* %p acquire, align 2
595 %2 = xor i16 %1, %v
596 store atomic i16 %2, i16* %p release, align 2
597 ret void
598}
599
600define void @xor_32i(i32* %p) {
601; X64-LABEL: xor_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000602; X64-NOT: lock
603; X64: xorl
604; X64-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000605; X32-LABEL: xor_32i:
Robin Morissetdf205862014-09-02 22:16:29 +0000606; X32-NOT: lock
607; X32: xorl
608; X32-NOT: movl
David Blaikiea79ac142015-02-27 21:17:42 +0000609 %1 = load atomic i32, i32* %p acquire, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000610 %2 = xor i32 %1, 2
611 store atomic i32 %2, i32* %p release, align 4
612 ret void
613}
614
JF Bastien86620832015-08-05 21:04:59 +0000615define void @xor_32r(i32* %p, i32 %v) {
616; X64-LABEL: xor_32r:
617; X64-NOT: lock
618; X64: xorl
619; X64-NOT: movl
620; X32-LABEL: xor_32r:
621; X32-NOT: lock
622; X32: xorl
623; X32-NOT: movl
624 %1 = load atomic i32, i32* %p acquire, align 4
625 %2 = xor i32 %1, %v
626 store atomic i32 %2, i32* %p release, align 4
627 ret void
628}
629
630define void @xor_64i(i64* %p) {
631; X64-LABEL: xor_64i:
Robin Morissetdf205862014-09-02 22:16:29 +0000632; X64-NOT: lock
633; X64: xorq
634; X64-NOT: movq
635; We do not check X86-32 as it cannot do 'xorq'.
JF Bastien86620832015-08-05 21:04:59 +0000636; X32-LABEL: xor_64i:
David Blaikiea79ac142015-02-27 21:17:42 +0000637 %1 = load atomic i64, i64* %p acquire, align 8
Robin Morissetdf205862014-09-02 22:16:29 +0000638 %2 = xor i64 %1, 2
639 store atomic i64 %2, i64* %p release, align 8
640 ret void
641}
642
JF Bastien86620832015-08-05 21:04:59 +0000643define void @xor_64r(i64* %p, i64 %v) {
644; X64-LABEL: xor_64r:
645; X64-NOT: lock
646; X64: xorq
647; X64-NOT: movq
648; We do not check X86-32 as it cannot do 'xorq'.
649; X32-LABEL: xor_64r:
650 %1 = load atomic i64, i64* %p acquire, align 8
651 %2 = xor i64 %1, %v
652 store atomic i64 %2, i64* %p release, align 8
653 ret void
654}
655
656define void @xor_32i_seq_cst(i32* %p) {
657; X64-LABEL: xor_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000658; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +0000659; X32-LABEL: xor_32i_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000660; X32: xchgl
David Blaikiea79ac142015-02-27 21:17:42 +0000661 %1 = load atomic i32, i32* %p monotonic, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000662 %2 = xor i32 %1, 2
663 store atomic i32 %2, i32* %p seq_cst, align 4
664 ret void
665}
666
JF Bastien86620832015-08-05 21:04:59 +0000667define void @xor_32r_seq_cst(i32* %p, i32 %v) {
668; X64-LABEL: xor_32r_seq_cst:
669; X64: xchgl
670; X32-LABEL: xor_32r_seq_cst:
671; X32: xchgl
672 %1 = load atomic i32, i32* %p monotonic, align 4
673 %2 = xor i32 %1, %v
674 store atomic i32 %2, i32* %p seq_cst, align 4
675 ret void
676}
677
Robin Morissetdf205862014-09-02 22:16:29 +0000678; ----- INC -----
679
680define void @inc_8(i8* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000681; X64-LABEL: inc_8:
Robin Morissetdf205862014-09-02 22:16:29 +0000682; X64-NOT: lock
683; X64: incb
684; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000685; X32-LABEL: inc_8:
Robin Morissetdf205862014-09-02 22:16:29 +0000686; X32-NOT: lock
687; X32: incb
688; X32-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000689; SLOW_INC-LABEL: inc_8:
Robin Morissetf9e87212014-10-08 19:38:18 +0000690; SLOW_INC-NOT: incb
691; SLOW_INC-NOT: movb
David Blaikiea79ac142015-02-27 21:17:42 +0000692 %1 = load atomic i8, i8* %p seq_cst, align 1
Robin Morissetdf205862014-09-02 22:16:29 +0000693 %2 = add i8 %1, 1
694 store atomic i8 %2, i8* %p release, align 1
695 ret void
696}
697
698define void @inc_16(i16* %p) {
699; Currently the transformation is not done on 16 bit accesses, as the backend
700; treat 16 bit arithmetic as expensive on X86/X86_64.
JF Bastien86620832015-08-05 21:04:59 +0000701; X64-LABEL: inc_16:
Robin Morissetdf205862014-09-02 22:16:29 +0000702; X64-NOT: incw
JF Bastien86620832015-08-05 21:04:59 +0000703; X32-LABEL: inc_16:
Robin Morissetdf205862014-09-02 22:16:29 +0000704; X32-NOT: incw
JF Bastien86620832015-08-05 21:04:59 +0000705; SLOW_INC-LABEL: inc_16:
Robin Morissetf9e87212014-10-08 19:38:18 +0000706; SLOW_INC-NOT: incw
David Blaikiea79ac142015-02-27 21:17:42 +0000707 %1 = load atomic i16, i16* %p acquire, align 2
Robin Morissetdf205862014-09-02 22:16:29 +0000708 %2 = add i16 %1, 1
709 store atomic i16 %2, i16* %p release, align 2
710 ret void
711}
712
713define void @inc_32(i32* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000714; X64-LABEL: inc_32:
Robin Morissetdf205862014-09-02 22:16:29 +0000715; X64-NOT: lock
716; X64: incl
717; X64-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000718; X32-LABEL: inc_32:
Robin Morissetdf205862014-09-02 22:16:29 +0000719; X32-NOT: lock
720; X32: incl
721; X32-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000722; SLOW_INC-LABEL: inc_32:
Robin Morissetf9e87212014-10-08 19:38:18 +0000723; SLOW_INC-NOT: incl
724; SLOW_INC-NOT: movl
David Blaikiea79ac142015-02-27 21:17:42 +0000725 %1 = load atomic i32, i32* %p acquire, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000726 %2 = add i32 %1, 1
727 store atomic i32 %2, i32* %p monotonic, align 4
728 ret void
729}
730
731define void @inc_64(i64* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000732; X64-LABEL: inc_64:
Robin Morissetdf205862014-09-02 22:16:29 +0000733; X64-NOT: lock
734; X64: incq
735; X64-NOT: movq
736; We do not check X86-32 as it cannot do 'incq'.
JF Bastien86620832015-08-05 21:04:59 +0000737; X32-LABEL: inc_64:
738; SLOW_INC-LABEL: inc_64:
Robin Morissetf9e87212014-10-08 19:38:18 +0000739; SLOW_INC-NOT: incq
740; SLOW_INC-NOT: movq
David Blaikiea79ac142015-02-27 21:17:42 +0000741 %1 = load atomic i64, i64* %p acquire, align 8
Robin Morissetdf205862014-09-02 22:16:29 +0000742 %2 = add i64 %1, 1
743 store atomic i64 %2, i64* %p release, align 8
744 ret void
745}
746
747define void @inc_32_seq_cst(i32* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000748; X64-LABEL: inc_32_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000749; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +0000750; X32-LABEL: inc_32_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000751; X32: xchgl
David Blaikiea79ac142015-02-27 21:17:42 +0000752 %1 = load atomic i32, i32* %p monotonic, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000753 %2 = add i32 %1, 1
754 store atomic i32 %2, i32* %p seq_cst, align 4
755 ret void
756}
757
758; ----- DEC -----
759
760define void @dec_8(i8* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000761; X64-LABEL: dec_8:
Robin Morissetdf205862014-09-02 22:16:29 +0000762; X64-NOT: lock
763; X64: decb
764; X64-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000765; X32-LABEL: dec_8:
Robin Morissetdf205862014-09-02 22:16:29 +0000766; X32-NOT: lock
767; X32: decb
768; X32-NOT: movb
JF Bastien86620832015-08-05 21:04:59 +0000769; SLOW_INC-LABEL: dec_8:
Robin Morissetf9e87212014-10-08 19:38:18 +0000770; SLOW_INC-NOT: decb
771; SLOW_INC-NOT: movb
David Blaikiea79ac142015-02-27 21:17:42 +0000772 %1 = load atomic i8, i8* %p seq_cst, align 1
Robin Morissetdf205862014-09-02 22:16:29 +0000773 %2 = sub i8 %1, 1
774 store atomic i8 %2, i8* %p release, align 1
775 ret void
776}
777
778define void @dec_16(i16* %p) {
779; Currently the transformation is not done on 16 bit accesses, as the backend
780; treat 16 bit arithmetic as expensive on X86/X86_64.
JF Bastien86620832015-08-05 21:04:59 +0000781; X64-LABEL: dec_16:
Robin Morissetdf205862014-09-02 22:16:29 +0000782; X64-NOT: decw
JF Bastien86620832015-08-05 21:04:59 +0000783; X32-LABEL: dec_16:
Robin Morissetdf205862014-09-02 22:16:29 +0000784; X32-NOT: decw
JF Bastien86620832015-08-05 21:04:59 +0000785; SLOW_INC-LABEL: dec_16:
Robin Morissetf9e87212014-10-08 19:38:18 +0000786; SLOW_INC-NOT: decw
David Blaikiea79ac142015-02-27 21:17:42 +0000787 %1 = load atomic i16, i16* %p acquire, align 2
Robin Morissetdf205862014-09-02 22:16:29 +0000788 %2 = sub i16 %1, 1
789 store atomic i16 %2, i16* %p release, align 2
790 ret void
791}
792
793define void @dec_32(i32* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000794; X64-LABEL: dec_32:
Robin Morissetdf205862014-09-02 22:16:29 +0000795; X64-NOT: lock
796; X64: decl
797; X64-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000798; X32-LABEL: dec_32:
Robin Morissetdf205862014-09-02 22:16:29 +0000799; X32-NOT: lock
800; X32: decl
801; X32-NOT: movl
JF Bastien86620832015-08-05 21:04:59 +0000802; SLOW_INC-LABEL: dec_32:
Robin Morissetf9e87212014-10-08 19:38:18 +0000803; SLOW_INC-NOT: decl
804; SLOW_INC-NOT: movl
David Blaikiea79ac142015-02-27 21:17:42 +0000805 %1 = load atomic i32, i32* %p acquire, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000806 %2 = sub i32 %1, 1
807 store atomic i32 %2, i32* %p monotonic, align 4
808 ret void
809}
810
811define void @dec_64(i64* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000812; X64-LABEL: dec_64:
Robin Morissetdf205862014-09-02 22:16:29 +0000813; X64-NOT: lock
814; X64: decq
815; X64-NOT: movq
816; We do not check X86-32 as it cannot do 'decq'.
JF Bastien86620832015-08-05 21:04:59 +0000817; X32-LABEL: dec_64:
818; SLOW_INC-LABEL: dec_64:
Robin Morissetf9e87212014-10-08 19:38:18 +0000819; SLOW_INC-NOT: decq
820; SLOW_INC-NOT: movq
David Blaikiea79ac142015-02-27 21:17:42 +0000821 %1 = load atomic i64, i64* %p acquire, align 8
Robin Morissetdf205862014-09-02 22:16:29 +0000822 %2 = sub i64 %1, 1
823 store atomic i64 %2, i64* %p release, align 8
824 ret void
825}
826
827define void @dec_32_seq_cst(i32* %p) {
JF Bastien86620832015-08-05 21:04:59 +0000828; X64-LABEL: dec_32_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000829; X64: xchgl
JF Bastien86620832015-08-05 21:04:59 +0000830; X32-LABEL: dec_32_seq_cst:
Robin Morissetdf205862014-09-02 22:16:29 +0000831; X32: xchgl
David Blaikiea79ac142015-02-27 21:17:42 +0000832 %1 = load atomic i32, i32* %p monotonic, align 4
Robin Morissetdf205862014-09-02 22:16:29 +0000833 %2 = sub i32 %1, 1
834 store atomic i32 %2, i32* %p seq_cst, align 4
835 ret void
836}
JF Bastien86620832015-08-05 21:04:59 +0000837
838; ----- FADD -----
839
840define void @fadd_32r(float* %loc, float %val) {
841; X64-LABEL: fadd_32r:
842; X64-NOT: lock
843; X64-NOT: mov
844; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
845; X64-NEXT: movss %[[XMM]], (%[[M]])
846; X32-LABEL: fadd_32r:
847; Don't check x86-32.
848; LLVM's SSE handling is conservative on x86-32 even without using atomics.
849 %floc = bitcast float* %loc to i32*
850 %1 = load atomic i32, i32* %floc seq_cst, align 4
851 %2 = bitcast i32 %1 to float
852 %add = fadd float %2, %val
853 %3 = bitcast float %add to i32
854 store atomic i32 %3, i32* %floc release, align 4
855 ret void
856}
857
858define void @fadd_64r(double* %loc, double %val) {
859; X64-LABEL: fadd_64r:
860; X64-NOT: lock
861; X64-NOT: mov
862; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
863; X64-NEXT: movsd %[[XMM]], (%[[M]])
864; X32-LABEL: fadd_64r:
865; Don't check x86-32 (see comment above).
866 %floc = bitcast double* %loc to i64*
867 %1 = load atomic i64, i64* %floc seq_cst, align 8
868 %2 = bitcast i64 %1 to double
869 %add = fadd double %2, %val
870 %3 = bitcast double %add to i64
871 store atomic i64 %3, i64* %floc release, align 8
872 ret void
873}
JF Bastien5b327712015-10-15 16:46:29 +0000874
875@glob32 = global float 0.000000e+00, align 4
876@glob64 = global double 0.000000e+00, align 8
877
878; Floating-point add to a global using an immediate.
879define void @fadd_32g() {
880; X64-LABEL: fadd_32g:
881; X64-NOT: lock
882; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
883; X64-NEXT: addss glob32(%rip), %[[XMM]]
884; X64-NEXT: movss %[[XMM]], glob32(%rip)
885; X32-LABEL: fadd_32g:
886; Don't check x86-32 (see comment above).
887 %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
888 %f = bitcast i32 %i to float
889 %add = fadd float %f, 1.000000e+00
890 %s = bitcast float %add to i32
891 store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
892 ret void
893}
894
895define void @fadd_64g() {
896; X64-LABEL: fadd_64g:
897; X64-NOT: lock
898; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
899; X64-NEXT: addsd glob64(%rip), %[[XMM]]
900; X64-NEXT: movsd %[[XMM]], glob64(%rip)
901; X32-LABEL: fadd_64g:
902; Don't check x86-32 (see comment above).
903 %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
904 %f = bitcast i64 %i to double
905 %add = fadd double %f, 1.000000e+00
906 %s = bitcast double %add to i64
907 store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
908 ret void
909}
910
911; Floating-point add to a hard-coded immediate location using an immediate.
912define void @fadd_32imm() {
913; X64-LABEL: fadd_32imm:
914; X64-NOT: lock
915; X64: movl $3735928559, %e[[M:[a-z]+]]
916; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
917; X64-NEXT: addss (%r[[M]]), %[[XMM]]
918; X64-NEXT: movss %[[XMM]], (%r[[M]])
919; X32-LABEL: fadd_32imm:
920; Don't check x86-32 (see comment above).
921 %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
922 %f = bitcast i32 %i to float
923 %add = fadd float %f, 1.000000e+00
924 %s = bitcast float %add to i32
925 store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
926 ret void
927}
928
929define void @fadd_64imm() {
930; X64-LABEL: fadd_64imm:
931; X64-NOT: lock
932; X64: movl $3735928559, %e[[M:[a-z]+]]
933; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
934; X64-NEXT: addsd (%r[[M]]), %[[XMM]]
935; X64-NEXT: movsd %[[XMM]], (%r[[M]])
936; X32-LABEL: fadd_64imm:
937; Don't check x86-32 (see comment above).
938 %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
939 %f = bitcast i64 %i to double
940 %add = fadd double %f, 1.000000e+00
941 %s = bitcast double %add to i64
942 store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
943 ret void
944}
945
946; Floating-point add to a stack location.
947define void @fadd_32stack() {
948; X64-LABEL: fadd_32stack:
949; X64-NOT: lock
950; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
951; X64-NEXT: addss [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]]
952; X64-NEXT: movss %[[XMM]], [[STACKOFF]](%rsp)
953; X32-LABEL: fadd_32stack:
954; Don't check x86-32 (see comment above).
955 %ptr = alloca i32, align 4
956 %bc3 = bitcast i32* %ptr to float*
957 %load = load atomic i32, i32* %ptr acquire, align 4
958 %bc0 = bitcast i32 %load to float
959 %fadd = fadd float 1.000000e+00, %bc0
960 %bc1 = bitcast float %fadd to i32
961 store atomic i32 %bc1, i32* %ptr release, align 4
962 ret void
963}
964
965define void @fadd_64stack() {
966; X64-LABEL: fadd_64stack:
967; X64-NOT: lock
968; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
969; X64-NEXT: addsd [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]]
970; X64-NEXT: movsd %[[XMM]], [[STACKOFF]](%rsp)
971; X32-LABEL: fadd_64stack:
972; Don't check x86-32 (see comment above).
973 %ptr = alloca i64, align 8
974 %bc3 = bitcast i64* %ptr to double*
975 %load = load atomic i64, i64* %ptr acquire, align 8
976 %bc0 = bitcast i64 %load to double
977 %fadd = fadd double 1.000000e+00, %bc0
978 %bc1 = bitcast double %fadd to i64
979 store atomic i64 %bc1, i64* %ptr release, align 8
980 ret void
981}
Chandler Carruth8e06a102016-03-30 08:41:59 +0000982
983define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) {
984; X64-LABEL: fadd_array:
985; X64-NOT: lock
986; X64: addsd ([[ADDR:%r..,%r..,8]]), %[[XMM:xmm[0-9]+]]
987; X64-NEXT: movsd %[[XMM]], ([[ADDR]])
988; X32-LABEL: fadd_array:
989; Don't check x86-32 (see comment above).
990bb:
991 %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2
992 %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8
993 %tmp7 = bitcast i64 %tmp6 to double
994 %tmp8 = fadd double %tmp7, %arg1
995 %tmp9 = bitcast double %tmp8 to i64
996 store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8
997 ret void
998}