Blame - llvm/test/CodeGen/AMDGPU/add3.ll - toolchain/llvm-project

blob: 35055190b348e628033c1489105a425b31a20afb [file] [log] [blame]

Nicolai Haehnle	ca4a329	2018-12-06 14:33:40 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				2	; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs \| FileCheck -check-prefix=VI %s
				3	; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs \| FileCheck -check-prefix=GFX9 %s
				4
				5	; ===================================================================================
				6	; V_ADD3_U32
				7	; ===================================================================================
				8
				9	define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) {
				10	; VI-LABEL: add3:
				11	; VI: ; %bb.0:
				12	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
				13	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
				14	; VI-NEXT: ; return to shader part epilog
				15	;
				16	; GFX9-LABEL: add3:
				17	; GFX9: ; %bb.0:
				18	; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2
				19	; GFX9-NEXT: ; return to shader part epilog
				20	%x = add i32 %a, %b
				21	%result = add i32 %x, %c
				22	%bc = bitcast i32 %result to float
				23	ret float %bc
				24	}
				25
				26	; ThreeOp instruction variant not used due to Constant Bus Limitations
				27	; TODO: with reassociation it is possible to replace a v_add_u32_e32 with a s_add_i32
				28	define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
				29	; VI-LABEL: add3_vgpr_b:
				30	; VI: ; %bb.0:
				31	; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
				32	; VI-NEXT: v_add_u32_e32 v0, vcc, s3, v0
				33	; VI-NEXT: ; return to shader part epilog
				34	;
				35	; GFX9-LABEL: add3_vgpr_b:
				36	; GFX9: ; %bb.0:
				37	; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
				38	; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
				39	; GFX9-NEXT: ; return to shader part epilog
				40	%x = add i32 %a, %b
				41	%result = add i32 %x, %c
				42	%bc = bitcast i32 %result to float
				43	ret float %bc
				44	}
				45
				46	define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
				47	; VI-LABEL: add3_vgpr_all2:
				48	; VI: ; %bb.0:
				49	; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v2
				50	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
				51	; VI-NEXT: ; return to shader part epilog
				52	;
				53	; GFX9-LABEL: add3_vgpr_all2:
				54	; GFX9: ; %bb.0:
				55	; GFX9-NEXT: v_add3_u32 v0, v1, v2, v0
				56	; GFX9-NEXT: ; return to shader part epilog
				57	%x = add i32 %b, %c
				58	%result = add i32 %a, %x
				59	%bc = bitcast i32 %result to float
				60	ret float %bc
				61	}
				62
				63	define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
				64	; VI-LABEL: add3_vgpr_bc:
				65	; VI: ; %bb.0:
				66	; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
				67	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
				68	; VI-NEXT: ; return to shader part epilog
				69	;
				70	; GFX9-LABEL: add3_vgpr_bc:
				71	; GFX9: ; %bb.0:
				72	; GFX9-NEXT: v_add3_u32 v0, s2, v0, v1
				73	; GFX9-NEXT: ; return to shader part epilog
				74	%x = add i32 %a, %b
				75	%result = add i32 %x, %c
				76	%bc = bitcast i32 %result to float
				77	ret float %bc
				78	}
				79
				80	define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) {
				81	; VI-LABEL: add3_vgpr_const:
				82	; VI: ; %bb.0:
				83	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
				84	; VI-NEXT: v_add_u32_e32 v0, vcc, 16, v0
				85	; VI-NEXT: ; return to shader part epilog
				86	;
				87	; GFX9-LABEL: add3_vgpr_const:
				88	; GFX9: ; %bb.0:
				89	; GFX9-NEXT: v_add3_u32 v0, v0, v1, 16
				90	; GFX9-NEXT: ; return to shader part epilog
				91	%x = add i32 %a, %b
				92	%result = add i32 %x, 16
				93	%bc = bitcast i32 %result to float
				94	ret float %bc
				95	}
				96
				97	define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) {
				98	; VI-LABEL: add3_multiuse_outer:
				99	; VI: ; %bb.0:
				100	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
				101	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
				102	; VI-NEXT: v_mul_lo_i32 v1, v0, v3
				103	; VI-NEXT: ; return to shader part epilog
				104	;
				105	; GFX9-LABEL: add3_multiuse_outer:
				106	; GFX9: ; %bb.0:
				107	; GFX9-NEXT: v_add3_u32 v0, v0, v1, v2
				108	; GFX9-NEXT: v_mul_lo_i32 v1, v0, v3
				109	; GFX9-NEXT: ; return to shader part epilog
				110	%inner = add i32 %a, %b
				111	%outer = add i32 %inner, %c
				112	%x1 = mul i32 %outer, %x
				113	%r1 = insertelement <2 x i32> undef, i32 %outer, i32 0
				114	%r0 = insertelement <2 x i32> %r1, i32 %x1, i32 1
				115	%bc = bitcast <2 x i32> %r0 to <2 x float>
				116	ret <2 x float> %bc
				117	}
				118
				119	define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
				120	; VI-LABEL: add3_multiuse_inner:
				121	; VI: ; %bb.0:
				122	; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
				123	; VI-NEXT: v_add_u32_e32 v1, vcc, v0, v2
				124	; VI-NEXT: ; return to shader part epilog
				125	;
				126	; GFX9-LABEL: add3_multiuse_inner:
				127	; GFX9: ; %bb.0:
				128	; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
				129	; GFX9-NEXT: v_add_u32_e32 v1, v0, v2
				130	; GFX9-NEXT: ; return to shader part epilog
				131	%inner = add i32 %a, %b
				132	%outer = add i32 %inner, %c
				133	%r1 = insertelement <2 x i32> undef, i32 %inner, i32 0
				134	%r0 = insertelement <2 x i32> %r1, i32 %outer, i32 1
				135	%bc = bitcast <2 x i32> %r0 to <2 x float>
				136	ret <2 x float> %bc
				137	}
				138
				139	; A case where uniform values end up in VGPRs -- we could use v_add3_u32 here,
				140	; but we don't.
				141	define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) {
				142	; VI-LABEL: add3_uniform_vgpr:
				143	; VI: ; %bb.0:
				144	; VI-NEXT: v_mov_b32_e32 v2, 0x40400000
				145	; VI-NEXT: v_add_f32_e64 v0, s2, 1.0
				146	; VI-NEXT: v_add_f32_e64 v1, s3, 2.0
				147	; VI-NEXT: v_add_f32_e32 v2, s4, v2
				148	; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
				149	; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0
				150	; VI-NEXT: ; return to shader part epilog
				151	;
				152	; GFX9-LABEL: add3_uniform_vgpr:
				153	; GFX9: ; %bb.0:
				154	; GFX9-NEXT: v_mov_b32_e32 v2, 0x40400000
				155	; GFX9-NEXT: v_add_f32_e64 v0, s2, 1.0
				156	; GFX9-NEXT: v_add_f32_e64 v1, s3, 2.0
				157	; GFX9-NEXT: v_add_f32_e32 v2, s4, v2
				158	; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
				159	; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
				160	; GFX9-NEXT: ; return to shader part epilog
				161	%a1 = fadd float %a, 1.0
				162	%b2 = fadd float %b, 2.0
				163	%c3 = fadd float %c, 3.0
				164	%bc.a = bitcast float %a1 to i32
				165	%bc.b = bitcast float %b2 to i32
				166	%bc.c = bitcast float %c3 to i32
				167	%x = add i32 %bc.a, %bc.b
				168	%result = add i32 %x, %bc.c
				169	%bc = bitcast i32 %result to float
				170	ret float %bc
				171	}