Blame - llvm/test/Analysis/CostModel/X86/arith-fp.ll - toolchain/llvm-project

blob: 6a68a7f330f87ffff91bb9e9db3f069ee7c12109 [file] [log] [blame]

Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	1	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
				2	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame^]	3	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX
				4	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	5	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
				6	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
				7
				8	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
				9	target triple = "x86_64-apple-macosx10.8.0"
				10
				11	; CHECK-LABEL: 'fadd'
				12	define i32 @fadd(i32 %arg) {
				13	; SSSE3: cost of 2 {{.*}} %A = fadd
				14	; SSE42: cost of 2 {{.*}} %A = fadd
				15	; AVX: cost of 2 {{.*}} %A = fadd
				16	; AVX2: cost of 2 {{.*}} %A = fadd
				17	; AVX512: cost of 2 {{.*}} %A = fadd
				18	%A = fadd <4 x float> undef, undef
				19	; SSSE3: cost of 4 {{.*}} %B = fadd
				20	; SSE42: cost of 4 {{.*}} %B = fadd
				21	; AVX: cost of 2 {{.*}} %B = fadd
				22	; AVX2: cost of 2 {{.*}} %B = fadd
				23	; AVX512: cost of 2 {{.*}} %B = fadd
				24	%B = fadd <8 x float> undef, undef
				25	; SSSE3: cost of 8 {{.*}} %C = fadd
				26	; SSE42: cost of 8 {{.*}} %C = fadd
				27	; AVX: cost of 4 {{.*}} %C = fadd
				28	; AVX2: cost of 4 {{.*}} %C = fadd
				29	; AVX512: cost of 2 {{.*}} %C = fadd
				30	%C = fadd <16 x float> undef, undef
				31
				32	; SSSE3: cost of 2 {{.*}} %D = fadd
				33	; SSE42: cost of 2 {{.*}} %D = fadd
				34	; AVX: cost of 2 {{.*}} %D = fadd
				35	; AVX2: cost of 2 {{.*}} %D = fadd
				36	; AVX512: cost of 2 {{.*}} %D = fadd
				37	%D = fadd <2 x double> undef, undef
				38	; SSSE3: cost of 4 {{.*}} %E = fadd
				39	; SSE42: cost of 4 {{.*}} %E = fadd
				40	; AVX: cost of 2 {{.*}} %E = fadd
				41	; AVX2: cost of 2 {{.*}} %E = fadd
				42	; AVX512: cost of 2 {{.*}} %E = fadd
				43	%E = fadd <4 x double> undef, undef
				44	; SSSE3: cost of 8 {{.*}} %F = fadd
				45	; SSE42: cost of 8 {{.*}} %F = fadd
				46	; AVX: cost of 4 {{.*}} %F = fadd
				47	; AVX2: cost of 4 {{.*}} %F = fadd
				48	; AVX512: cost of 2 {{.*}} %F = fadd
				49	%F = fadd <8 x double> undef, undef
				50
				51	ret i32 undef
				52	}
				53
				54	; CHECK-LABEL: 'fsub'
				55	define i32 @fsub(i32 %arg) {
				56	; SSSE3: cost of 2 {{.*}} %A = fsub
				57	; SSE42: cost of 2 {{.*}} %A = fsub
				58	; AVX: cost of 2 {{.*}} %A = fsub
				59	; AVX2: cost of 2 {{.*}} %A = fsub
				60	; AVX512: cost of 2 {{.*}} %A = fsub
				61	%A = fsub <4 x float> undef, undef
				62	; SSSE3: cost of 4 {{.*}} %B = fsub
				63	; SSE42: cost of 4 {{.*}} %B = fsub
				64	; AVX: cost of 2 {{.*}} %B = fsub
				65	; AVX2: cost of 2 {{.*}} %B = fsub
				66	; AVX512: cost of 2 {{.*}} %B = fsub
				67	%B = fsub <8 x float> undef, undef
				68	; SSSE3: cost of 8 {{.*}} %C = fsub
				69	; SSE42: cost of 8 {{.*}} %C = fsub
				70	; AVX: cost of 4 {{.*}} %C = fsub
				71	; AVX2: cost of 4 {{.*}} %C = fsub
				72	; AVX512: cost of 2 {{.*}} %C = fsub
				73	%C = fsub <16 x float> undef, undef
				74
				75	; SSSE3: cost of 2 {{.*}} %D = fsub
				76	; SSE42: cost of 2 {{.*}} %D = fsub
				77	; AVX: cost of 2 {{.*}} %D = fsub
				78	; AVX2: cost of 2 {{.*}} %D = fsub
				79	; AVX512: cost of 2 {{.*}} %D = fsub
				80	%D = fsub <2 x double> undef, undef
				81	; SSSE3: cost of 4 {{.*}} %E = fsub
				82	; SSE42: cost of 4 {{.*}} %E = fsub
				83	; AVX: cost of 2 {{.*}} %E = fsub
				84	; AVX2: cost of 2 {{.*}} %E = fsub
				85	; AVX512: cost of 2 {{.*}} %E = fsub
				86	%E = fsub <4 x double> undef, undef
				87	; SSSE3: cost of 8 {{.*}} %F = fsub
				88	; SSE42: cost of 8 {{.*}} %F = fsub
				89	; AVX: cost of 4 {{.*}} %F = fsub
				90	; AVX2: cost of 4 {{.*}} %F = fsub
				91	; AVX512: cost of 2 {{.*}} %F = fsub
				92	%F = fsub <8 x double> undef, undef
				93
				94	ret i32 undef
				95	}
				96
				97	; CHECK-LABEL: 'fmul'
				98	define i32 @fmul(i32 %arg) {
				99	; SSSE3: cost of 2 {{.*}} %A = fmul
				100	; SSE42: cost of 2 {{.*}} %A = fmul
				101	; AVX: cost of 2 {{.*}} %A = fmul
				102	; AVX2: cost of 2 {{.*}} %A = fmul
				103	; AVX512: cost of 2 {{.*}} %A = fmul
				104	%A = fmul <4 x float> undef, undef
				105	; SSSE3: cost of 4 {{.*}} %B = fmul
				106	; SSE42: cost of 4 {{.*}} %B = fmul
				107	; AVX: cost of 2 {{.*}} %B = fmul
				108	; AVX2: cost of 2 {{.*}} %B = fmul
				109	; AVX512: cost of 2 {{.*}} %B = fmul
				110	%B = fmul <8 x float> undef, undef
				111	; SSSE3: cost of 8 {{.*}} %C = fmul
				112	; SSE42: cost of 8 {{.*}} %C = fmul
				113	; AVX: cost of 4 {{.*}} %C = fmul
				114	; AVX2: cost of 4 {{.*}} %C = fmul
				115	; AVX512: cost of 2 {{.*}} %C = fmul
				116	%C = fmul <16 x float> undef, undef
				117
				118	; SSSE3: cost of 2 {{.*}} %D = fmul
				119	; SSE42: cost of 2 {{.*}} %D = fmul
				120	; AVX: cost of 2 {{.*}} %D = fmul
				121	; AVX2: cost of 2 {{.*}} %D = fmul
				122	; AVX512: cost of 2 {{.*}} %D = fmul
				123	%D = fmul <2 x double> undef, undef
				124	; SSSE3: cost of 4 {{.*}} %E = fmul
				125	; SSE42: cost of 4 {{.*}} %E = fmul
				126	; AVX: cost of 2 {{.*}} %E = fmul
				127	; AVX2: cost of 2 {{.*}} %E = fmul
				128	; AVX512: cost of 2 {{.*}} %E = fmul
				129	%E = fmul <4 x double> undef, undef
				130	; SSSE3: cost of 8 {{.*}} %F = fmul
				131	; SSE42: cost of 8 {{.*}} %F = fmul
				132	; AVX: cost of 4 {{.*}} %F = fmul
				133	; AVX2: cost of 4 {{.*}} %F = fmul
				134	; AVX512: cost of 2 {{.*}} %F = fmul
				135	%F = fmul <8 x double> undef, undef
				136
				137	ret i32 undef
				138	}
				139
				140	; CHECK-LABEL: 'fdiv'
				141	define i32 @fdiv(i32 %arg) {
				142	; SSSE3: cost of 2 {{.*}} %A = fdiv
				143	; SSE42: cost of 2 {{.*}} %A = fdiv
				144	; AVX: cost of 2 {{.*}} %A = fdiv
				145	; AVX2: cost of 2 {{.*}} %A = fdiv
				146	; AVX512: cost of 2 {{.*}} %A = fdiv
				147	%A = fdiv <4 x float> undef, undef
				148	; SSSE3: cost of 4 {{.*}} %B = fdiv
				149	; SSE42: cost of 4 {{.*}} %B = fdiv
				150	; AVX: cost of 2 {{.*}} %B = fdiv
				151	; AVX2: cost of 2 {{.*}} %B = fdiv
				152	; AVX512: cost of 2 {{.*}} %B = fdiv
				153	%B = fdiv <8 x float> undef, undef
				154	; SSSE3: cost of 8 {{.*}} %C = fdiv
				155	; SSE42: cost of 8 {{.*}} %C = fdiv
				156	; AVX: cost of 4 {{.*}} %C = fdiv
				157	; AVX2: cost of 4 {{.*}} %C = fdiv
				158	; AVX512: cost of 2 {{.*}} %C = fdiv
				159	%C = fdiv <16 x float> undef, undef
				160
				161	; SSSE3: cost of 2 {{.*}} %D = fdiv
				162	; SSE42: cost of 2 {{.*}} %D = fdiv
				163	; AVX: cost of 2 {{.*}} %D = fdiv
				164	; AVX2: cost of 2 {{.*}} %D = fdiv
				165	; AVX512: cost of 2 {{.*}} %D = fdiv
				166	%D = fdiv <2 x double> undef, undef
				167	; SSSE3: cost of 4 {{.*}} %E = fdiv
				168	; SSE42: cost of 4 {{.*}} %E = fdiv
				169	; AVX: cost of 2 {{.*}} %E = fdiv
				170	; AVX2: cost of 2 {{.*}} %E = fdiv
				171	; AVX512: cost of 2 {{.*}} %E = fdiv
				172	%E = fdiv <4 x double> undef, undef
				173	; SSSE3: cost of 8 {{.*}} %F = fdiv
				174	; SSE42: cost of 8 {{.*}} %F = fdiv
				175	; AVX: cost of 4 {{.*}} %F = fdiv
				176	; AVX2: cost of 4 {{.*}} %F = fdiv
				177	; AVX512: cost of 2 {{.*}} %F = fdiv
				178	%F = fdiv <8 x double> undef, undef
				179
				180	ret i32 undef
				181	}
				182
				183	; CHECK-LABEL: 'frem'
				184	define i32 @frem(i32 %arg) {
				185	; SSSE3: cost of 14 {{.*}} %A = frem
				186	; SSE42: cost of 14 {{.*}} %A = frem
				187	; AVX: cost of 14 {{.*}} %A = frem
				188	; AVX2: cost of 14 {{.*}} %A = frem
				189	; AVX512: cost of 14 {{.*}} %A = frem
				190	%A = frem <4 x float> undef, undef
				191	; SSSE3: cost of 28 {{.*}} %B = frem
				192	; SSE42: cost of 28 {{.*}} %B = frem
				193	; AVX: cost of 30 {{.*}} %B = frem
				194	; AVX2: cost of 30 {{.*}} %B = frem
				195	; AVX512: cost of 30 {{.*}} %B = frem
				196	%B = frem <8 x float> undef, undef
				197	; SSSE3: cost of 56 {{.*}} %C = frem
				198	; SSE42: cost of 56 {{.*}} %C = frem
				199	; AVX: cost of 60 {{.*}} %C = frem
				200	; AVX2: cost of 60 {{.*}} %C = frem
				201	; AVX512: cost of 62 {{.*}} %C = frem
				202	%C = frem <16 x float> undef, undef
				203
				204	; SSSE3: cost of 6 {{.*}} %D = frem
				205	; SSE42: cost of 6 {{.*}} %D = frem
				206	; AVX: cost of 6 {{.*}} %D = frem
				207	; AVX2: cost of 6 {{.*}} %D = frem
				208	; AVX512: cost of 6 {{.*}} %D = frem
				209	%D = frem <2 x double> undef, undef
				210	; SSSE3: cost of 12 {{.*}} %E = frem
				211	; SSE42: cost of 12 {{.*}} %E = frem
				212	; AVX: cost of 14 {{.*}} %E = frem
				213	; AVX2: cost of 14 {{.*}} %E = frem
				214	; AVX512: cost of 14 {{.*}} %E = frem
				215	%E = frem <4 x double> undef, undef
				216	; SSSE3: cost of 24 {{.*}} %F = frem
				217	; SSE42: cost of 24 {{.*}} %F = frem
				218	; AVX: cost of 28 {{.*}} %F = frem
				219	; AVX2: cost of 28 {{.*}} %F = frem
				220	; AVX512: cost of 30 {{.*}} %F = frem
				221	%F = frem <8 x double> undef, undef
				222
				223	ret i32 undef
				224	}
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame^]	225
				226	; CHECK-LABEL: 'fsqrt'
				227	define i32 @fsqrt(i32 %arg) {
				228	; SSSE3: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
				229	; SSE42: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
				230	; AVX: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
				231	; AVX2: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
				232	; AVX512: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
				233	%A = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
				234	; SSSE3: cost of 4 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
				235	; SSE42: cost of 4 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
				236	; AVX: cost of 1 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
				237	; AVX2: cost of 1 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
				238	; AVX512: cost of 1 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
				239	%B = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
				240	; SSSE3: cost of 8 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
				241	; SSE42: cost of 8 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
				242	; AVX: cost of 4 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
				243	; AVX2: cost of 4 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
				244	; AVX512: cost of 1 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
				245	%C = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
				246
				247	; SSSE3: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
				248	; SSE42: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
				249	; AVX: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
				250	; AVX2: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
				251	; AVX512: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
				252	%D = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
				253	; SSSE3: cost of 4 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
				254	; SSE42: cost of 4 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
				255	; AVX: cost of 1 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
				256	; AVX2: cost of 1 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
				257	; AVX512: cost of 1 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
				258	%E = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
				259	; SSSE3: cost of 8 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
				260	; SSE42: cost of 8 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
				261	; AVX: cost of 4 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
				262	; AVX2: cost of 4 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
				263	; AVX512: cost of 1 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
				264	%F = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
				265
				266	ret i32 undef
				267	}
				268
				269	; CHECK-LABEL: 'fma'
				270	define i32 @fma(i32 %arg) {
				271	; SSSE3: cost of 52 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
				272	; SSE42: cost of 52 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
				273	; AVX: cost of 1 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
				274	; AVX2: cost of 1 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
				275	; AVX512: cost of 1 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
				276	%A = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
				277	; SSSE3: cost of 104 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
				278	; SSE42: cost of 104 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
				279	; AVX: cost of 1 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
				280	; AVX2: cost of 1 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
				281	; AVX512: cost of 1 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
				282	%B = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
				283	; SSSE3: cost of 208 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
				284	; SSE42: cost of 208 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
				285	; AVX: cost of 4 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
				286	; AVX2: cost of 4 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
				287	; AVX512: cost of 1 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
				288	%C = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
				289
				290	; SSSE3: cost of 24 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
				291	; SSE42: cost of 24 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
				292	; AVX: cost of 1 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
				293	; AVX2: cost of 1 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
				294	; AVX512: cost of 1 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
				295	%D = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
				296	; SSSE3: cost of 48 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
				297	; SSE42: cost of 48 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
				298	; AVX: cost of 1 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
				299	; AVX2: cost of 1 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
				300	; AVX512: cost of 1 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
				301	%E = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
				302	; SSSE3: cost of 96 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
				303	; SSE42: cost of 96 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
				304	; AVX: cost of 4 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
				305	; AVX2: cost of 4 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
				306	; AVX512: cost of 1 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
				307	%F = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
				308
				309	ret i32 undef
				310	}
				311
				312	declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
				313	declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
				314	declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
				315
				316	declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
				317	declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
				318	declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
				319
				320	declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
				321	declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
				322	declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
				323
				324	declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
				325	declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
				326	declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)