Blame - llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll - toolchain/llvm-project

blob: 41240344493ab35cd086148da91aaefc99ebe031 [file] [log] [blame]

Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX7LESS %s
Valery Pykhtin	e1c338e	2019-02-11 16:28:42 +0000	[diff] [blame]	2	; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-dpp-combine=false -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s
				3	; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-dpp-combine=false -verify-machineinstrs < %s \| FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	4
				5	declare i32 @llvm.amdgcn.workitem.id.x()
				6	declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1)
				7	declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1)
				8
				9	; Show that what the atomic optimization pass will do for raw buffers.
				10
				11	; GCN-LABEL: add_i32_constant:
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	12	; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	13	; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
				14	; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
				15	; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
				16	; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
				17	; GCN: v_mul_u32_u24{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]], 5
				18	; GCN: buffer_atomic_add v[[value]]
				19	define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out, <4 x i32> %inout) {
				20	entry:
				21	%old = call i32 @llvm.amdgcn.buffer.atomic.add(i32 5, <4 x i32> %inout, i32 0, i32 0, i1 0)
				22	store i32 %old, i32 addrspace(1)* %out
				23	ret void
				24	}
				25
				26	; GCN-LABEL: add_i32_uniform:
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	27	; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	28	; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
				29	; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
				30	; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
				31	; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
				32	; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
				33	; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
				34	; GCN: buffer_atomic_add v[[value]]
				35	define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %inout, i32 %additive) {
				36	entry:
				37	%old = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %additive, <4 x i32> %inout, i32 0, i32 0, i1 0)
				38	store i32 %old, i32 addrspace(1)* %out
				39	ret void
				40	}
				41
				42	; GCN-LABEL: add_i32_varying_vdata:
				43	; GFX7LESS-NOT: v_mbcnt_lo_u32_b32
				44	; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
				45	; GFX7LESS-NOT: s_bcnt1_i32_b64
				46	; GFX7LESS: buffer_atomic_add v{{[0-9]+}}
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	47	; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf
				48	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf
				49	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	50	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:4 row_mask:0xf bank_mask:0xe
				51	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc
				52	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf
				53	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	54	; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
				55	; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
				56	; GFX8MORE: buffer_atomic_add v[[value]]
				57	define amdgpu_kernel void @add_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
				58	entry:
				59	%lane = call i32 @llvm.amdgcn.workitem.id.x()
				60	%old = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %lane, <4 x i32> %inout, i32 0, i32 0, i1 0)
				61	store i32 %old, i32 addrspace(1)* %out
				62	ret void
				63	}
				64
				65	; GCN-LABEL: add_i32_varying_offset:
				66	; GCN-NOT: v_mbcnt_lo_u32_b32
				67	; GCN-NOT: v_mbcnt_hi_u32_b32
				68	; GCN-NOT: s_bcnt1_i32_b64
				69	; GCN: buffer_atomic_add v{{[0-9]+}}
				70	define amdgpu_kernel void @add_i32_varying_offset(i32 addrspace(1)* %out, <4 x i32> %inout) {
				71	entry:
				72	%lane = call i32 @llvm.amdgcn.workitem.id.x()
				73	%old = call i32 @llvm.amdgcn.buffer.atomic.add(i32 1, <4 x i32> %inout, i32 %lane, i32 0, i1 0)
				74	store i32 %old, i32 addrspace(1)* %out
				75	ret void
				76	}
				77
				78	; GCN-LABEL: sub_i32_constant:
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	79	; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	80	; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
				81	; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
				82	; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
				83	; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
				84	; GCN: v_mul_u32_u24{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[popcount]], 5
				85	; GCN: buffer_atomic_sub v[[value]]
				86	define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out, <4 x i32> %inout) {
				87	entry:
				88	%old = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 5, <4 x i32> %inout, i32 0, i32 0, i1 0)
				89	store i32 %old, i32 addrspace(1)* %out
				90	ret void
				91	}
				92
				93	; GCN-LABEL: sub_i32_uniform:
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	94	; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	95	; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
				96	; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
				97	; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
				98	; GCN: s_bcnt1_i32_b64 s[[popcount:[0-9]+]], s{{\[}}[[exec_lo]]:[[exec_hi]]{{\]}}
				99	; GCN: s_mul_i32 s[[scalar_value:[0-9]+]], s{{[0-9]+}}, s[[popcount]]
				100	; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
				101	; GCN: buffer_atomic_sub v[[value]]
				102	define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %inout, i32 %subitive) {
				103	entry:
				104	%old = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %subitive, <4 x i32> %inout, i32 0, i32 0, i1 0)
				105	store i32 %old, i32 addrspace(1)* %out
				106	ret void
				107	}
				108
				109	; GCN-LABEL: sub_i32_varying_vdata:
				110	; GFX7LESS-NOT: v_mbcnt_lo_u32_b32
				111	; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
				112	; GFX7LESS-NOT: s_bcnt1_i32_b64
				113	; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	114	; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	115	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf
				116	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf
Neil Henning	8c10fa1	2019-02-11 14:44:14 +0000	[diff] [blame]	117	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:4 row_mask:0xf bank_mask:0xe
				118	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc
				119	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf
				120	; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
Neil Henning	0a30f33	2019-04-01 15:19:52 +0000	[diff] [blame]	121	; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
Neil Henning	6641657	2018-10-08 15:49:19 +0000	[diff] [blame]	122	; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
				123	; GFX8MORE: buffer_atomic_sub v[[value]]
				124	define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
				125	entry:
				126	%lane = call i32 @llvm.amdgcn.workitem.id.x()
				127	%old = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %lane, <4 x i32> %inout, i32 0, i32 0, i1 0)
				128	store i32 %old, i32 addrspace(1)* %out
				129	ret void
				130	}
				131
				132	; GCN-LABEL: sub_i32_varying_offset:
				133	; GCN-NOT: v_mbcnt_lo_u32_b32
				134	; GCN-NOT: v_mbcnt_hi_u32_b32
				135	; GCN-NOT: s_bcnt1_i32_b64
				136	; GCN: buffer_atomic_sub v{{[0-9]+}}
				137	define amdgpu_kernel void @sub_i32_varying_offset(i32 addrspace(1)* %out, <4 x i32> %inout) {
				138	entry:
				139	%lane = call i32 @llvm.amdgcn.workitem.id.x()
				140	%old = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 1, <4 x i32> %inout, i32 %lane, i32 0, i1 0)
				141	store i32 %old, i32 addrspace(1)* %out
				142	ret void
				143	}