Blame - llvm/test/CodeGen/X86/atomic_mi.ll - toolchain/llvm-project

blob: 19e019eaddcdc86b4082510c9d1aacdb27eb7153 [file] [log] [blame]

Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	1	; RUN: llc < %s -march=x86-64 -verify-machineinstrs \| FileCheck %s --check-prefix X64
				2	; RUN: llc < %s -march=x86 -verify-machineinstrs \| FileCheck %s --check-prefix X32
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	3	; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs \| FileCheck %s --check-prefix SLOW_INC
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	4
				5	; This file checks that atomic (non-seq_cst) stores of immediate values are
				6	; done in one mov instruction and not 2. More precisely, it makes sure that the
				7	; immediate is not first copied uselessly into a register.
				8
				9	; Similarily, it checks that a binary operation of an immediate with an atomic
				10	; variable that is stored back in that variable is done as a single instruction.
				11	; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
				12	; should be just an add instruction, instead of loading x into a register, doing
				13	; an add and storing the result back.
				14	; The binary operations supported are currently add, and, or, xor.
				15	; sub is not supported because they are translated by an addition of the
				16	; negated immediate.
				17	; Finally, we also check the same kind of pattern for inc/dec
				18
				19	; seq_cst stores are left as (lock) xchgl, but we try to check every other
				20	; attribute at least once.
				21
				22	; Please note that these operations do not require the lock prefix: only
				23	; sequentially consistent stores require this kind of protection on X86.
				24	; And even for seq_cst operations, llvm uses the xchg instruction which has
				25	; an implicit lock prefix, so making it explicit is not required.
				26
				27	define void @store_atomic_imm_8(i8* %p) {
				28	; X64-LABEL: store_atomic_imm_8
				29	; X64: movb
				30	; X64-NOT: movb
				31	; X32-LABEL: store_atomic_imm_8
				32	; X32: movb
				33	; X32-NOT: movb
				34	store atomic i8 42, i8* %p release, align 1
				35	ret void
				36	}
				37
				38	define void @store_atomic_imm_16(i16* %p) {
				39	; X64-LABEL: store_atomic_imm_16
				40	; X64: movw
				41	; X64-NOT: movw
				42	; X32-LABEL: store_atomic_imm_16
				43	; X32: movw
				44	; X32-NOT: movw
				45	store atomic i16 42, i16* %p monotonic, align 2
				46	ret void
				47	}
				48
				49	define void @store_atomic_imm_32(i32* %p) {
				50	; X64-LABEL: store_atomic_imm_32
				51	; X64: movl
				52	; X64-NOT: movl
				53	; On 32 bits, there is an extra movl for each of those functions
				54	; (probably for alignment reasons).
				55	; X32-LABEL: store_atomic_imm_32
				56	; X32: movl 4(%esp), %eax
				57	; X32: movl
				58	; X32-NOT: movl
				59	store atomic i32 42, i32* %p release, align 4
				60	ret void
				61	}
				62
				63	define void @store_atomic_imm_64(i64* %p) {
				64	; X64-LABEL: store_atomic_imm_64
				65	; X64: movq
				66	; X64-NOT: movq
				67	; These are implemented with a CAS loop on 32 bit architectures, and thus
				68	; cannot be optimized in the same way as the others.
				69	; X32-LABEL: store_atomic_imm_64
				70	; X32: cmpxchg8b
				71	store atomic i64 42, i64* %p release, align 8
				72	ret void
				73	}
				74
				75	; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
				76	; even on X64, one must use movabsq that can only target a register.
				77	define void @store_atomic_imm_64_big(i64* %p) {
				78	; X64-LABEL: store_atomic_imm_64_big
				79	; X64: movabsq
				80	; X64: movq
				81	store atomic i64 100000000000, i64* %p monotonic, align 8
				82	ret void
				83	}
				84
				85	; It would be incorrect to replace a lock xchgl by a movl
				86	define void @store_atomic_imm_32_seq_cst(i32* %p) {
				87	; X64-LABEL: store_atomic_imm_32_seq_cst
				88	; X64: xchgl
				89	; X32-LABEL: store_atomic_imm_32_seq_cst
				90	; X32: xchgl
				91	store atomic i32 42, i32* %p seq_cst, align 4
				92	ret void
				93	}
				94
				95	; ----- ADD -----
				96
				97	define void @add_8(i8* %p) {
				98	; X64-LABEL: add_8
				99	; X64-NOT: lock
				100	; X64: addb
				101	; X64-NOT: movb
				102	; X32-LABEL: add_8
				103	; X32-NOT: lock
				104	; X32: addb
				105	; X32-NOT: movb
				106	%1 = load atomic i8* %p seq_cst, align 1
				107	%2 = add i8 %1, 2
				108	store atomic i8 %2, i8* %p release, align 1
				109	ret void
				110	}
				111
				112	define void @add_16(i16* %p) {
				113	; Currently the transformation is not done on 16 bit accesses, as the backend
				114	; treat 16 bit arithmetic as expensive on X86/X86_64.
				115	; X64-LABEL: add_16
				116	; X64-NOT: addw
				117	; X32-LABEL: add_16
				118	; X32-NOT: addw
				119	%1 = load atomic i16* %p acquire, align 2
				120	%2 = add i16 %1, 2
				121	store atomic i16 %2, i16* %p release, align 2
				122	ret void
				123	}
				124
				125	define void @add_32(i32* %p) {
				126	; X64-LABEL: add_32
				127	; X64-NOT: lock
				128	; X64: addl
				129	; X64-NOT: movl
				130	; X32-LABEL: add_32
				131	; X32-NOT: lock
				132	; X32: addl
				133	; X32-NOT: movl
				134	%1 = load atomic i32* %p acquire, align 4
				135	%2 = add i32 %1, 2
				136	store atomic i32 %2, i32* %p monotonic, align 4
				137	ret void
				138	}
				139
				140	define void @add_64(i64* %p) {
				141	; X64-LABEL: add_64
				142	; X64-NOT: lock
				143	; X64: addq
				144	; X64-NOT: movq
				145	; We do not check X86-32 as it cannot do 'addq'.
				146	; X32-LABEL: add_64
				147	%1 = load atomic i64* %p acquire, align 8
				148	%2 = add i64 %1, 2
				149	store atomic i64 %2, i64* %p release, align 8
				150	ret void
				151	}
				152
				153	define void @add_32_seq_cst(i32* %p) {
				154	; X64-LABEL: add_32_seq_cst
				155	; X64: xchgl
				156	; X32-LABEL: add_32_seq_cst
				157	; X32: xchgl
				158	%1 = load atomic i32* %p monotonic, align 4
				159	%2 = add i32 %1, 2
				160	store atomic i32 %2, i32* %p seq_cst, align 4
				161	ret void
				162	}
				163
				164	; ----- AND -----
				165
				166	define void @and_8(i8* %p) {
				167	; X64-LABEL: and_8
				168	; X64-NOT: lock
				169	; X64: andb
				170	; X64-NOT: movb
				171	; X32-LABEL: and_8
				172	; X32-NOT: lock
				173	; X32: andb
				174	; X32-NOT: movb
				175	%1 = load atomic i8* %p monotonic, align 1
				176	%2 = and i8 %1, 2
				177	store atomic i8 %2, i8* %p release, align 1
				178	ret void
				179	}
				180
				181	define void @and_16(i16* %p) {
				182	; Currently the transformation is not done on 16 bit accesses, as the backend
				183	; treat 16 bit arithmetic as expensive on X86/X86_64.
				184	; X64-LABEL: and_16
				185	; X64-NOT: andw
				186	; X32-LABEL: and_16
				187	; X32-NOT: andw
				188	%1 = load atomic i16* %p acquire, align 2
				189	%2 = and i16 %1, 2
				190	store atomic i16 %2, i16* %p release, align 2
				191	ret void
				192	}
				193
				194	define void @and_32(i32* %p) {
				195	; X64-LABEL: and_32
				196	; X64-NOT: lock
				197	; X64: andl
				198	; X64-NOT: movl
				199	; X32-LABEL: and_32
				200	; X32-NOT: lock
				201	; X32: andl
				202	; X32-NOT: movl
				203	%1 = load atomic i32* %p acquire, align 4
				204	%2 = and i32 %1, 2
				205	store atomic i32 %2, i32* %p release, align 4
				206	ret void
				207	}
				208
				209	define void @and_64(i64* %p) {
				210	; X64-LABEL: and_64
				211	; X64-NOT: lock
				212	; X64: andq
				213	; X64-NOT: movq
				214	; We do not check X86-32 as it cannot do 'andq'.
				215	; X32-LABEL: and_64
				216	%1 = load atomic i64* %p acquire, align 8
				217	%2 = and i64 %1, 2
				218	store atomic i64 %2, i64* %p release, align 8
				219	ret void
				220	}
				221
				222	define void @and_32_seq_cst(i32* %p) {
				223	; X64-LABEL: and_32_seq_cst
				224	; X64: xchgl
				225	; X32-LABEL: and_32_seq_cst
				226	; X32: xchgl
				227	%1 = load atomic i32* %p monotonic, align 4
				228	%2 = and i32 %1, 2
				229	store atomic i32 %2, i32* %p seq_cst, align 4
				230	ret void
				231	}
				232
				233	; ----- OR -----
				234
				235	define void @or_8(i8* %p) {
				236	; X64-LABEL: or_8
				237	; X64-NOT: lock
				238	; X64: orb
				239	; X64-NOT: movb
				240	; X32-LABEL: or_8
				241	; X32-NOT: lock
				242	; X32: orb
				243	; X32-NOT: movb
				244	%1 = load atomic i8* %p acquire, align 1
				245	%2 = or i8 %1, 2
				246	store atomic i8 %2, i8* %p release, align 1
				247	ret void
				248	}
				249
				250	define void @or_16(i16* %p) {
				251	; X64-LABEL: or_16
				252	; X64-NOT: orw
				253	; X32-LABEL: or_16
				254	; X32-NOT: orw
				255	%1 = load atomic i16* %p acquire, align 2
				256	%2 = or i16 %1, 2
				257	store atomic i16 %2, i16* %p release, align 2
				258	ret void
				259	}
				260
				261	define void @or_32(i32* %p) {
				262	; X64-LABEL: or_32
				263	; X64-NOT: lock
				264	; X64: orl
				265	; X64-NOT: movl
				266	; X32-LABEL: or_32
				267	; X32-NOT: lock
				268	; X32: orl
				269	; X32-NOT: movl
				270	%1 = load atomic i32* %p acquire, align 4
				271	%2 = or i32 %1, 2
				272	store atomic i32 %2, i32* %p release, align 4
				273	ret void
				274	}
				275
				276	define void @or_64(i64* %p) {
				277	; X64-LABEL: or_64
				278	; X64-NOT: lock
				279	; X64: orq
				280	; X64-NOT: movq
				281	; We do not check X86-32 as it cannot do 'orq'.
				282	; X32-LABEL: or_64
				283	%1 = load atomic i64* %p acquire, align 8
				284	%2 = or i64 %1, 2
				285	store atomic i64 %2, i64* %p release, align 8
				286	ret void
				287	}
				288
				289	define void @or_32_seq_cst(i32* %p) {
				290	; X64-LABEL: or_32_seq_cst
				291	; X64: xchgl
				292	; X32-LABEL: or_32_seq_cst
				293	; X32: xchgl
				294	%1 = load atomic i32* %p monotonic, align 4
				295	%2 = or i32 %1, 2
				296	store atomic i32 %2, i32* %p seq_cst, align 4
				297	ret void
				298	}
				299
				300	; ----- XOR -----
				301
				302	define void @xor_8(i8* %p) {
				303	; X64-LABEL: xor_8
				304	; X64-NOT: lock
				305	; X64: xorb
				306	; X64-NOT: movb
				307	; X32-LABEL: xor_8
				308	; X32-NOT: lock
				309	; X32: xorb
				310	; X32-NOT: movb
				311	%1 = load atomic i8* %p acquire, align 1
				312	%2 = xor i8 %1, 2
				313	store atomic i8 %2, i8* %p release, align 1
				314	ret void
				315	}
				316
				317	define void @xor_16(i16* %p) {
				318	; X64-LABEL: xor_16
				319	; X64-NOT: xorw
				320	; X32-LABEL: xor_16
				321	; X32-NOT: xorw
				322	%1 = load atomic i16* %p acquire, align 2
				323	%2 = xor i16 %1, 2
				324	store atomic i16 %2, i16* %p release, align 2
				325	ret void
				326	}
				327
				328	define void @xor_32(i32* %p) {
				329	; X64-LABEL: xor_32
				330	; X64-NOT: lock
				331	; X64: xorl
				332	; X64-NOT: movl
				333	; X32-LABEL: xor_32
				334	; X32-NOT: lock
				335	; X32: xorl
				336	; X32-NOT: movl
				337	%1 = load atomic i32* %p acquire, align 4
				338	%2 = xor i32 %1, 2
				339	store atomic i32 %2, i32* %p release, align 4
				340	ret void
				341	}
				342
				343	define void @xor_64(i64* %p) {
				344	; X64-LABEL: xor_64
				345	; X64-NOT: lock
				346	; X64: xorq
				347	; X64-NOT: movq
				348	; We do not check X86-32 as it cannot do 'xorq'.
				349	; X32-LABEL: xor_64
				350	%1 = load atomic i64* %p acquire, align 8
				351	%2 = xor i64 %1, 2
				352	store atomic i64 %2, i64* %p release, align 8
				353	ret void
				354	}
				355
				356	define void @xor_32_seq_cst(i32* %p) {
				357	; X64-LABEL: xor_32_seq_cst
				358	; X64: xchgl
				359	; X32-LABEL: xor_32_seq_cst
				360	; X32: xchgl
				361	%1 = load atomic i32* %p monotonic, align 4
				362	%2 = xor i32 %1, 2
				363	store atomic i32 %2, i32* %p seq_cst, align 4
				364	ret void
				365	}
				366
				367	; ----- INC -----
				368
				369	define void @inc_8(i8* %p) {
				370	; X64-LABEL: inc_8
				371	; X64-NOT: lock
				372	; X64: incb
				373	; X64-NOT: movb
				374	; X32-LABEL: inc_8
				375	; X32-NOT: lock
				376	; X32: incb
				377	; X32-NOT: movb
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	378	; SLOW_INC-LABEL: inc_8
				379	; SLOW_INC-NOT: incb
				380	; SLOW_INC-NOT: movb
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	381	%1 = load atomic i8* %p seq_cst, align 1
				382	%2 = add i8 %1, 1
				383	store atomic i8 %2, i8* %p release, align 1
				384	ret void
				385	}
				386
				387	define void @inc_16(i16* %p) {
				388	; Currently the transformation is not done on 16 bit accesses, as the backend
				389	; treat 16 bit arithmetic as expensive on X86/X86_64.
				390	; X64-LABEL: inc_16
				391	; X64-NOT: incw
				392	; X32-LABEL: inc_16
				393	; X32-NOT: incw
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	394	; SLOW_INC-LABEL: inc_16
				395	; SLOW_INC-NOT: incw
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	396	%1 = load atomic i16* %p acquire, align 2
				397	%2 = add i16 %1, 1
				398	store atomic i16 %2, i16* %p release, align 2
				399	ret void
				400	}
				401
				402	define void @inc_32(i32* %p) {
				403	; X64-LABEL: inc_32
				404	; X64-NOT: lock
				405	; X64: incl
				406	; X64-NOT: movl
				407	; X32-LABEL: inc_32
				408	; X32-NOT: lock
				409	; X32: incl
				410	; X32-NOT: movl
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	411	; SLOW_INC-LABEL: inc_32
				412	; SLOW_INC-NOT: incl
				413	; SLOW_INC-NOT: movl
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	414	%1 = load atomic i32* %p acquire, align 4
				415	%2 = add i32 %1, 1
				416	store atomic i32 %2, i32* %p monotonic, align 4
				417	ret void
				418	}
				419
				420	define void @inc_64(i64* %p) {
				421	; X64-LABEL: inc_64
				422	; X64-NOT: lock
				423	; X64: incq
				424	; X64-NOT: movq
				425	; We do not check X86-32 as it cannot do 'incq'.
				426	; X32-LABEL: inc_64
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	427	; SLOW_INC-LABEL: inc_64
				428	; SLOW_INC-NOT: incq
				429	; SLOW_INC-NOT: movq
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	430	%1 = load atomic i64* %p acquire, align 8
				431	%2 = add i64 %1, 1
				432	store atomic i64 %2, i64* %p release, align 8
				433	ret void
				434	}
				435
				436	define void @inc_32_seq_cst(i32* %p) {
				437	; X64-LABEL: inc_32_seq_cst
				438	; X64: xchgl
				439	; X32-LABEL: inc_32_seq_cst
				440	; X32: xchgl
				441	%1 = load atomic i32* %p monotonic, align 4
				442	%2 = add i32 %1, 1
				443	store atomic i32 %2, i32* %p seq_cst, align 4
				444	ret void
				445	}
				446
				447	; ----- DEC -----
				448
				449	define void @dec_8(i8* %p) {
				450	; X64-LABEL: dec_8
				451	; X64-NOT: lock
				452	; X64: decb
				453	; X64-NOT: movb
				454	; X32-LABEL: dec_8
				455	; X32-NOT: lock
				456	; X32: decb
				457	; X32-NOT: movb
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	458	; SLOW_INC-LABEL: dec_8
				459	; SLOW_INC-NOT: decb
				460	; SLOW_INC-NOT: movb
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	461	%1 = load atomic i8* %p seq_cst, align 1
				462	%2 = sub i8 %1, 1
				463	store atomic i8 %2, i8* %p release, align 1
				464	ret void
				465	}
				466
				467	define void @dec_16(i16* %p) {
				468	; Currently the transformation is not done on 16 bit accesses, as the backend
				469	; treat 16 bit arithmetic as expensive on X86/X86_64.
				470	; X64-LABEL: dec_16
				471	; X64-NOT: decw
				472	; X32-LABEL: dec_16
				473	; X32-NOT: decw
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	474	; SLOW_INC-LABEL: dec_16
				475	; SLOW_INC-NOT: decw
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	476	%1 = load atomic i16* %p acquire, align 2
				477	%2 = sub i16 %1, 1
				478	store atomic i16 %2, i16* %p release, align 2
				479	ret void
				480	}
				481
				482	define void @dec_32(i32* %p) {
				483	; X64-LABEL: dec_32
				484	; X64-NOT: lock
				485	; X64: decl
				486	; X64-NOT: movl
				487	; X32-LABEL: dec_32
				488	; X32-NOT: lock
				489	; X32: decl
				490	; X32-NOT: movl
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	491	; SLOW_INC-LABEL: dec_32
				492	; SLOW_INC-NOT: decl
				493	; SLOW_INC-NOT: movl
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	494	%1 = load atomic i32* %p acquire, align 4
				495	%2 = sub i32 %1, 1
				496	store atomic i32 %2, i32* %p monotonic, align 4
				497	ret void
				498	}
				499
				500	define void @dec_64(i64* %p) {
				501	; X64-LABEL: dec_64
				502	; X64-NOT: lock
				503	; X64: decq
				504	; X64-NOT: movq
				505	; We do not check X86-32 as it cannot do 'decq'.
				506	; X32-LABEL: dec_64
Robin Morisset	f9e8721	2014-10-08 19:38:18 +0000	[diff] [blame^]	507	; SLOW_INC-LABEL: dec_64
				508	; SLOW_INC-NOT: decq
				509	; SLOW_INC-NOT: movq
Robin Morisset	df20586	2014-09-02 22:16:29 +0000	[diff] [blame]	510	%1 = load atomic i64* %p acquire, align 8
				511	%2 = sub i64 %1, 1
				512	store atomic i64 %2, i64* %p release, align 8
				513	ret void
				514	}
				515
				516	define void @dec_32_seq_cst(i32* %p) {
				517	; X64-LABEL: dec_32_seq_cst
				518	; X64: xchgl
				519	; X32-LABEL: dec_32_seq_cst
				520	; X32: xchgl
				521	%1 = load atomic i32* %p monotonic, align 4
				522	%2 = sub i32 %1, 1
				523	store atomic i32 %2, i32* %p seq_cst, align 4
				524	ret void
				525	}