Blame - llvm/test/CodeGen/X86/avx.ll - toolchain/llvm-project

blob: 6069c14f0d800f9d5d3a604222a459956e33074c [file] [log] [blame]

Filipe Cabecinhas	dc92102	2014-05-19 19:45:57 +0000	[diff] [blame]	1	; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx \| FileCheck %s -check-prefix=X32 --check-prefix=CHECK
				2	; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx \| FileCheck %s -check-prefix=X64 --check-prefix=CHECK
Filipe Cabecinhas	e155518	2014-05-16 22:47:49 +0000	[diff] [blame]	3
				4	define <4 x i32> @blendvb_fallback_v4i32(<4 x i1> %mask, <4 x i32> %x, <4 x i32> %y) {
				5	; CHECK-LABEL: @blendvb_fallback_v4i32
Chandler Carruth	aaf8e03	2014-09-30 02:52:28 +0000	[diff] [blame]	6	; CHECK: vblendvps
Filipe Cabecinhas	e155518	2014-05-16 22:47:49 +0000	[diff] [blame]	7	; CHECK: ret
				8	%ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
				9	ret <4 x i32> %ret
				10	}
				11
				12	define <8 x i32> @blendvb_fallback_v8i32(<8 x i1> %mask, <8 x i32> %x, <8 x i32> %y) {
				13	; CHECK-LABEL: @blendvb_fallback_v8i32
				14	; CHECK: vblendvps
				15	; CHECK: ret
				16	%ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
				17	ret <8 x i32> %ret
				18	}
				19
				20	define <8 x float> @blendvb_fallback_v8f32(<8 x i1> %mask, <8 x float> %x, <8 x float> %y) {
				21	; CHECK-LABEL: @blendvb_fallback_v8f32
				22	; CHECK: vblendvps
				23	; CHECK: ret
				24	%ret = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
				25	ret <8 x float> %ret
				26	}
Filipe Cabecinhas	dc92102	2014-05-19 19:45:57 +0000	[diff] [blame]	27
				28	declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
				29
				30	define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
				31	; CHECK-LABEL: insertps_from_vector_load:
				32	; On X32, account for the argument's move to registers
				33	; X32: movl 4(%esp), %eax
				34	; CHECK-NOT: mov
				35	; CHECK: insertps $48
				36	; CHECK-NEXT: ret
				37	%1 = load <4 x float>* %pb, align 16
				38	%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
				39	ret <4 x float> %2
				40	}
				41
				42	;; Use a non-zero CountS for insertps
				43	define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
				44	; CHECK-LABEL: insertps_from_vector_load_offset:
				45	; On X32, account for the argument's move to registers
				46	; X32: movl 4(%esp), %eax
				47	; CHECK-NOT: mov
				48	;; Try to match a bit more of the instr, since we need the load's offset.
				49	; CHECK: insertps $96, 4(%{{...}}), %
				50	; CHECK-NEXT: ret
				51	%1 = load <4 x float>* %pb, align 16
				52	%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
				53	ret <4 x float> %2
				54	}
				55
				56	define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
				57	; CHECK-LABEL: insertps_from_vector_load_offset_2:
				58	; On X32, account for the argument's move to registers
				59	; X32: movl 4(%esp), %eax
				60	; X32: movl 8(%esp), %ecx
				61	; CHECK-NOT: mov
				62	;; Try to match a bit more of the instr, since we need the load's offset.
Craig Topper	0271d10	2015-01-23 08:00:59 +0000	[diff] [blame]	63	; CHECK: vinsertps $192, 12(%{{...}},%{{...}}), %
Filipe Cabecinhas	dc92102	2014-05-19 19:45:57 +0000	[diff] [blame]	64	; CHECK-NEXT: ret
				65	%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
				66	%2 = load <4 x float>* %1, align 16
				67	%3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
				68	ret <4 x float> %3
				69	}
				70
				71	define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) {
				72	; CHECK-LABEL: insertps_from_broadcast_loadf32:
				73	; On X32, account for the arguments' move to registers
				74	; X32: movl 8(%esp), %eax
				75	; X32: movl 4(%esp), %ecx
				76	; CHECK-NOT: mov
				77	; CHECK: insertps $48
				78	; CHECK-NEXT: ret
				79	%1 = getelementptr inbounds float* %fb, i64 %index
				80	%2 = load float* %1, align 4
				81	%3 = insertelement <4 x float> undef, float %2, i32 0
				82	%4 = insertelement <4 x float> %3, float %2, i32 1
				83	%5 = insertelement <4 x float> %4, float %2, i32 2
				84	%6 = insertelement <4 x float> %5, float %2, i32 3
				85	%7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
				86	ret <4 x float> %7
				87	}
				88
				89	define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) {
				90	; CHECK-LABEL: insertps_from_broadcast_loadv4f32:
				91	; On X32, account for the arguments' move to registers
				92	; X32: movl 4(%esp), %{{...}}
				93	; CHECK-NOT: mov
				94	; CHECK: insertps $48
				95	; CHECK-NEXT: ret
				96	%1 = load <4 x float>* %b, align 4
				97	%2 = extractelement <4 x float> %1, i32 0
				98	%3 = insertelement <4 x float> undef, float %2, i32 0
				99	%4 = insertelement <4 x float> %3, float %2, i32 1
				100	%5 = insertelement <4 x float> %4, float %2, i32 2
				101	%6 = insertelement <4 x float> %5, float %2, i32 3
				102	%7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
				103	ret <4 x float> %7
				104	}
				105
				106	;; FIXME: We're emitting an extraneous pshufd/vbroadcast.
				107	define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
				108	; CHECK-LABEL: insertps_from_broadcast_multiple_use:
				109	; On X32, account for the arguments' move to registers
				110	; X32: movl 8(%esp), %eax
				111	; X32: movl 4(%esp), %ecx
				112	; CHECK: vbroadcastss
				113	; CHECK-NOT: mov
				114	; CHECK: insertps $48
				115	; CHECK: insertps $48
				116	; CHECK: insertps $48
				117	; CHECK: insertps $48
				118	; CHECK: vaddps
				119	; CHECK: vaddps
				120	; CHECK: vaddps
				121	; CHECK-NEXT: ret
				122	%1 = getelementptr inbounds float* %fb, i64 %index
				123	%2 = load float* %1, align 4
				124	%3 = insertelement <4 x float> undef, float %2, i32 0
				125	%4 = insertelement <4 x float> %3, float %2, i32 1
				126	%5 = insertelement <4 x float> %4, float %2, i32 2
				127	%6 = insertelement <4 x float> %5, float %2, i32 3
				128	%7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
				129	%8 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %b, <4 x float> %6, i32 48)
				130	%9 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %c, <4 x float> %6, i32 48)
				131	%10 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %d, <4 x float> %6, i32 48)
				132	%11 = fadd <4 x float> %7, %8
				133	%12 = fadd <4 x float> %9, %10
				134	%13 = fadd <4 x float> %11, %12
				135	ret <4 x float> %13
				136	}