Blame - llvm/test/Analysis/CostModel/X86/arith-fp.ll - toolchain/llvm-project

blob: d806d459a5f09ef2c72c0796d96450d9ff07db65 [file] [log] [blame]

Simon Pilgrim	b0a0576	2016-08-21 19:14:48 +0000	[diff] [blame]	1	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	2	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	3	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX
				4	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	5	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
				6	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
				7
				8	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
				9	target triple = "x86_64-apple-macosx10.8.0"
				10
				11	; CHECK-LABEL: 'fadd'
				12	define i32 @fadd(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	13	; SSE2: cost of 2 {{.*}} %F32 = fadd
				14	; SSE42: cost of 2 {{.*}} %F32 = fadd
				15	; AVX: cost of 2 {{.*}} %F32 = fadd
				16	; AVX2: cost of 2 {{.*}} %F32 = fadd
				17	; AVX512: cost of 2 {{.*}} %F32 = fadd
				18	%F32 = fadd float undef, undef
				19	; SSE2: cost of 2 {{.*}} %V4F32 = fadd
				20	; SSE42: cost of 2 {{.*}} %V4F32 = fadd
				21	; AVX: cost of 2 {{.*}} %V4F32 = fadd
				22	; AVX2: cost of 2 {{.*}} %V4F32 = fadd
				23	; AVX512: cost of 2 {{.*}} %V4F32 = fadd
				24	%V4F32 = fadd <4 x float> undef, undef
				25	; SSE2: cost of 4 {{.*}} %V8F32 = fadd
				26	; SSE42: cost of 4 {{.*}} %V8F32 = fadd
				27	; AVX: cost of 2 {{.*}} %V8F32 = fadd
				28	; AVX2: cost of 2 {{.*}} %V8F32 = fadd
				29	; AVX512: cost of 2 {{.*}} %V8F32 = fadd
				30	%V8F32 = fadd <8 x float> undef, undef
				31	; SSE2: cost of 8 {{.*}} %V16F32 = fadd
				32	; SSE42: cost of 8 {{.*}} %V16F32 = fadd
				33	; AVX: cost of 4 {{.*}} %V16F32 = fadd
				34	; AVX2: cost of 4 {{.*}} %V16F32 = fadd
				35	; AVX512: cost of 2 {{.*}} %V16F32 = fadd
				36	%V16F32 = fadd <16 x float> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	37
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	38	; SSE2: cost of 2 {{.*}} %F64 = fadd
				39	; SSE42: cost of 2 {{.*}} %F64 = fadd
				40	; AVX: cost of 2 {{.*}} %F64 = fadd
				41	; AVX2: cost of 2 {{.*}} %F64 = fadd
				42	; AVX512: cost of 2 {{.*}} %F64 = fadd
				43	%F64 = fadd double undef, undef
				44	; SSE2: cost of 2 {{.*}} %V2F64 = fadd
				45	; SSE42: cost of 2 {{.*}} %V2F64 = fadd
				46	; AVX: cost of 2 {{.*}} %V2F64 = fadd
				47	; AVX2: cost of 2 {{.*}} %V2F64 = fadd
				48	; AVX512: cost of 2 {{.*}} %V2F64 = fadd
				49	%V2F64 = fadd <2 x double> undef, undef
				50	; SSE2: cost of 4 {{.*}} %V4F64 = fadd
				51	; SSE42: cost of 4 {{.*}} %V4F64 = fadd
				52	; AVX: cost of 2 {{.*}} %V4F64 = fadd
				53	; AVX2: cost of 2 {{.*}} %V4F64 = fadd
				54	; AVX512: cost of 2 {{.*}} %V4F64 = fadd
				55	%V4F64 = fadd <4 x double> undef, undef
				56	; SSE2: cost of 8 {{.*}} %V8F64 = fadd
				57	; SSE42: cost of 8 {{.*}} %V8F64 = fadd
				58	; AVX: cost of 4 {{.*}} %V8F64 = fadd
				59	; AVX2: cost of 4 {{.*}} %V8F64 = fadd
				60	; AVX512: cost of 2 {{.*}} %V8F64 = fadd
				61	%V8F64 = fadd <8 x double> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	62
				63	ret i32 undef
				64	}
				65
				66	; CHECK-LABEL: 'fsub'
				67	define i32 @fsub(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	68	; SSE2: cost of 2 {{.*}} %F32 = fsub
				69	; SSE42: cost of 2 {{.*}} %F32 = fsub
				70	; AVX: cost of 2 {{.*}} %F32 = fsub
				71	; AVX2: cost of 2 {{.*}} %F32 = fsub
				72	; AVX512: cost of 2 {{.*}} %F32 = fsub
				73	%F32 = fsub float undef, undef
				74	; SSE2: cost of 2 {{.*}} %V4F32 = fsub
				75	; SSE42: cost of 2 {{.*}} %V4F32 = fsub
				76	; AVX: cost of 2 {{.*}} %V4F32 = fsub
				77	; AVX2: cost of 2 {{.*}} %V4F32 = fsub
				78	; AVX512: cost of 2 {{.*}} %V4F32 = fsub
				79	%V4F32 = fsub <4 x float> undef, undef
				80	; SSE2: cost of 4 {{.*}} %V8F32 = fsub
				81	; SSE42: cost of 4 {{.*}} %V8F32 = fsub
				82	; AVX: cost of 2 {{.*}} %V8F32 = fsub
				83	; AVX2: cost of 2 {{.*}} %V8F32 = fsub
				84	; AVX512: cost of 2 {{.*}} %V8F32 = fsub
				85	%V8F32 = fsub <8 x float> undef, undef
				86	; SSE2: cost of 8 {{.*}} %V16F32 = fsub
				87	; SSE42: cost of 8 {{.*}} %V16F32 = fsub
				88	; AVX: cost of 4 {{.*}} %V16F32 = fsub
				89	; AVX2: cost of 4 {{.*}} %V16F32 = fsub
				90	; AVX512: cost of 2 {{.*}} %V16F32 = fsub
				91	%V16F32 = fsub <16 x float> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	92
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	93	; SSE2: cost of 2 {{.*}} %F64 = fsub
				94	; SSE42: cost of 2 {{.*}} %F64 = fsub
				95	; AVX: cost of 2 {{.*}} %F64 = fsub
				96	; AVX2: cost of 2 {{.*}} %F64 = fsub
				97	; AVX512: cost of 2 {{.*}} %F64 = fsub
				98	%F64 = fsub double undef, undef
				99	; SSE2: cost of 2 {{.*}} %V2F64 = fsub
				100	; SSE42: cost of 2 {{.*}} %V2F64 = fsub
				101	; AVX: cost of 2 {{.*}} %V2F64 = fsub
				102	; AVX2: cost of 2 {{.*}} %V2F64 = fsub
				103	; AVX512: cost of 2 {{.*}} %V2F64 = fsub
				104	%V2F64 = fsub <2 x double> undef, undef
				105	; SSE2: cost of 4 {{.*}} %V4F64 = fsub
				106	; SSE42: cost of 4 {{.*}} %V4F64 = fsub
				107	; AVX: cost of 2 {{.*}} %V4F64 = fsub
				108	; AVX2: cost of 2 {{.*}} %V4F64 = fsub
				109	; AVX512: cost of 2 {{.*}} %V4F64 = fsub
				110	%V4F64 = fsub <4 x double> undef, undef
				111	; SSE2: cost of 8 {{.*}} %V8F64 = fsub
				112	; SSE42: cost of 8 {{.*}} %V8F64 = fsub
				113	; AVX: cost of 4 {{.*}} %V8F64 = fsub
				114	; AVX2: cost of 4 {{.*}} %V8F64 = fsub
				115	; AVX512: cost of 2 {{.*}} %V8F64 = fsub
				116	%V8F64 = fsub <8 x double> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	117
				118	ret i32 undef
				119	}
				120
				121	; CHECK-LABEL: 'fmul'
				122	define i32 @fmul(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	123	; SSE2: cost of 2 {{.*}} %F32 = fmul
				124	; SSE42: cost of 2 {{.*}} %F32 = fmul
				125	; AVX: cost of 2 {{.*}} %F32 = fmul
				126	; AVX2: cost of 2 {{.*}} %F32 = fmul
				127	; AVX512: cost of 2 {{.*}} %F32 = fmul
				128	%F32 = fmul float undef, undef
				129	; SSE2: cost of 2 {{.*}} %V4F32 = fmul
				130	; SSE42: cost of 2 {{.*}} %V4F32 = fmul
				131	; AVX: cost of 2 {{.*}} %V4F32 = fmul
				132	; AVX2: cost of 2 {{.*}} %V4F32 = fmul
				133	; AVX512: cost of 2 {{.*}} %V4F32 = fmul
				134	%V4F32 = fmul <4 x float> undef, undef
				135	; SSE2: cost of 4 {{.*}} %V8F32 = fmul
				136	; SSE42: cost of 4 {{.*}} %V8F32 = fmul
				137	; AVX: cost of 2 {{.*}} %V8F32 = fmul
				138	; AVX2: cost of 2 {{.*}} %V8F32 = fmul
				139	; AVX512: cost of 2 {{.*}} %V8F32 = fmul
				140	%V8F32 = fmul <8 x float> undef, undef
				141	; SSE2: cost of 8 {{.*}} %V16F32 = fmul
				142	; SSE42: cost of 8 {{.*}} %V16F32 = fmul
				143	; AVX: cost of 4 {{.*}} %V16F32 = fmul
				144	; AVX2: cost of 4 {{.*}} %V16F32 = fmul
				145	; AVX512: cost of 2 {{.*}} %V16F32 = fmul
				146	%V16F32 = fmul <16 x float> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	147
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	148	; SSE2: cost of 2 {{.*}} %F64 = fmul
				149	; SSE42: cost of 2 {{.*}} %F64 = fmul
				150	; AVX: cost of 2 {{.*}} %F64 = fmul
				151	; AVX2: cost of 2 {{.*}} %F64 = fmul
				152	; AVX512: cost of 2 {{.*}} %F64 = fmul
				153	%F64 = fmul double undef, undef
				154	; SSE2: cost of 2 {{.*}} %V2F64 = fmul
				155	; SSE42: cost of 2 {{.*}} %V2F64 = fmul
				156	; AVX: cost of 2 {{.*}} %V2F64 = fmul
				157	; AVX2: cost of 2 {{.*}} %V2F64 = fmul
				158	; AVX512: cost of 2 {{.*}} %V2F64 = fmul
				159	%V2F64 = fmul <2 x double> undef, undef
				160	; SSE2: cost of 4 {{.*}} %V4F64 = fmul
				161	; SSE42: cost of 4 {{.*}} %V4F64 = fmul
				162	; AVX: cost of 2 {{.*}} %V4F64 = fmul
				163	; AVX2: cost of 2 {{.*}} %V4F64 = fmul
				164	; AVX512: cost of 2 {{.*}} %V4F64 = fmul
				165	%V4F64 = fmul <4 x double> undef, undef
				166	; SSE2: cost of 8 {{.*}} %V8F64 = fmul
				167	; SSE42: cost of 8 {{.*}} %V8F64 = fmul
				168	; AVX: cost of 4 {{.*}} %V8F64 = fmul
				169	; AVX2: cost of 4 {{.*}} %V8F64 = fmul
				170	; AVX512: cost of 2 {{.*}} %V8F64 = fmul
				171	%V8F64 = fmul <8 x double> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	172
				173	ret i32 undef
				174	}
				175
				176	; CHECK-LABEL: 'fdiv'
				177	define i32 @fdiv(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	178	; SSE2: cost of 2 {{.*}} %F32 = fdiv
				179	; SSE42: cost of 2 {{.*}} %F32 = fdiv
				180	; AVX: cost of 2 {{.*}} %F32 = fdiv
				181	; AVX2: cost of 2 {{.*}} %F32 = fdiv
				182	; AVX512: cost of 2 {{.*}} %F32 = fdiv
				183	%F32 = fdiv float undef, undef
				184	; SSE2: cost of 2 {{.*}} %V4F32 = fdiv
				185	; SSE42: cost of 2 {{.*}} %V4F32 = fdiv
				186	; AVX: cost of 2 {{.*}} %V4F32 = fdiv
				187	; AVX2: cost of 2 {{.*}} %V4F32 = fdiv
				188	; AVX512: cost of 2 {{.*}} %V4F32 = fdiv
				189	%V4F32 = fdiv <4 x float> undef, undef
				190	; SSE2: cost of 4 {{.*}} %V8F32 = fdiv
				191	; SSE42: cost of 4 {{.*}} %V8F32 = fdiv
				192	; AVX: cost of 2 {{.*}} %V8F32 = fdiv
				193	; AVX2: cost of 2 {{.*}} %V8F32 = fdiv
				194	; AVX512: cost of 2 {{.*}} %V8F32 = fdiv
				195	%V8F32 = fdiv <8 x float> undef, undef
				196	; SSE2: cost of 8 {{.*}} %V16F32 = fdiv
				197	; SSE42: cost of 8 {{.*}} %V16F32 = fdiv
				198	; AVX: cost of 4 {{.*}} %V16F32 = fdiv
				199	; AVX2: cost of 4 {{.*}} %V16F32 = fdiv
				200	; AVX512: cost of 2 {{.*}} %V16F32 = fdiv
				201	%V16F32 = fdiv <16 x float> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	202
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	203	; SSE2: cost of 2 {{.*}} %F64 = fdiv
				204	; SSE42: cost of 2 {{.*}} %F64 = fdiv
				205	; AVX: cost of 2 {{.*}} %F64 = fdiv
				206	; AVX2: cost of 2 {{.*}} %F64 = fdiv
				207	; AVX512: cost of 2 {{.*}} %F64 = fdiv
				208	%F64 = fdiv double undef, undef
				209	; SSE2: cost of 2 {{.*}} %V2F64 = fdiv
				210	; SSE42: cost of 2 {{.*}} %V2F64 = fdiv
				211	; AVX: cost of 2 {{.*}} %V2F64 = fdiv
				212	; AVX2: cost of 2 {{.*}} %V2F64 = fdiv
				213	; AVX512: cost of 2 {{.*}} %V2F64 = fdiv
				214	%V2F64 = fdiv <2 x double> undef, undef
				215	; SSE2: cost of 4 {{.*}} %V4F64 = fdiv
				216	; SSE42: cost of 4 {{.*}} %V4F64 = fdiv
				217	; AVX: cost of 2 {{.*}} %V4F64 = fdiv
				218	; AVX2: cost of 2 {{.*}} %V4F64 = fdiv
				219	; AVX512: cost of 2 {{.*}} %V4F64 = fdiv
				220	%V4F64 = fdiv <4 x double> undef, undef
				221	; SSE2: cost of 8 {{.*}} %V8F64 = fdiv
				222	; SSE42: cost of 8 {{.*}} %V8F64 = fdiv
				223	; AVX: cost of 4 {{.*}} %V8F64 = fdiv
				224	; AVX2: cost of 4 {{.*}} %V8F64 = fdiv
				225	; AVX512: cost of 2 {{.*}} %V8F64 = fdiv
				226	%V8F64 = fdiv <8 x double> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	227
				228	ret i32 undef
				229	}
				230
				231	; CHECK-LABEL: 'frem'
				232	define i32 @frem(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	233	; SSE2: cost of 2 {{.*}} %F32 = frem
				234	; SSE42: cost of 2 {{.*}} %F32 = frem
				235	; AVX: cost of 2 {{.*}} %F32 = frem
				236	; AVX2: cost of 2 {{.*}} %F32 = frem
				237	; AVX512: cost of 2 {{.*}} %F32 = frem
				238	%F32 = frem float undef, undef
				239	; SSE2: cost of 14 {{.*}} %V4F32 = frem
				240	; SSE42: cost of 14 {{.*}} %V4F32 = frem
				241	; AVX: cost of 14 {{.*}} %V4F32 = frem
				242	; AVX2: cost of 14 {{.*}} %V4F32 = frem
				243	; AVX512: cost of 14 {{.*}} %V4F32 = frem
				244	%V4F32 = frem <4 x float> undef, undef
				245	; SSE2: cost of 28 {{.*}} %V8F32 = frem
				246	; SSE42: cost of 28 {{.*}} %V8F32 = frem
				247	; AVX: cost of 30 {{.*}} %V8F32 = frem
				248	; AVX2: cost of 30 {{.*}} %V8F32 = frem
				249	; AVX512: cost of 30 {{.*}} %V8F32 = frem
				250	%V8F32 = frem <8 x float> undef, undef
				251	; SSE2: cost of 56 {{.*}} %V16F32 = frem
				252	; SSE42: cost of 56 {{.*}} %V16F32 = frem
				253	; AVX: cost of 60 {{.*}} %V16F32 = frem
				254	; AVX2: cost of 60 {{.*}} %V16F32 = frem
				255	; AVX512: cost of 62 {{.*}} %V16F32 = frem
				256	%V16F32 = frem <16 x float> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	257
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	258	; SSE2: cost of 2 {{.*}} %F64 = frem
				259	; SSE42: cost of 2 {{.*}} %F64 = frem
				260	; AVX: cost of 2 {{.*}} %F64 = frem
				261	; AVX2: cost of 2 {{.*}} %F64 = frem
				262	; AVX512: cost of 2 {{.*}} %F64 = frem
				263	%F64 = frem double undef, undef
				264	; SSE2: cost of 6 {{.*}} %V2F64 = frem
				265	; SSE42: cost of 6 {{.*}} %V2F64 = frem
				266	; AVX: cost of 6 {{.*}} %V2F64 = frem
				267	; AVX2: cost of 6 {{.*}} %V2F64 = frem
				268	; AVX512: cost of 6 {{.*}} %V2F64 = frem
				269	%V2F64 = frem <2 x double> undef, undef
				270	; SSE2: cost of 12 {{.*}} %V4F64 = frem
				271	; SSE42: cost of 12 {{.*}} %V4F64 = frem
				272	; AVX: cost of 14 {{.*}} %V4F64 = frem
				273	; AVX2: cost of 14 {{.*}} %V4F64 = frem
				274	; AVX512: cost of 14 {{.*}} %V4F64 = frem
				275	%V4F64 = frem <4 x double> undef, undef
				276	; SSE2: cost of 24 {{.*}} %V8F64 = frem
				277	; SSE42: cost of 24 {{.*}} %V8F64 = frem
				278	; AVX: cost of 28 {{.*}} %V8F64 = frem
				279	; AVX2: cost of 28 {{.*}} %V8F64 = frem
				280	; AVX512: cost of 30 {{.*}} %V8F64 = frem
				281	%V8F64 = frem <8 x double> undef, undef
Simon Pilgrim	3cd61a0	2016-08-21 18:34:47 +0000	[diff] [blame]	282
				283	ret i32 undef
				284	}
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	285
				286	; CHECK-LABEL: 'fsqrt'
				287	define i32 @fsqrt(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	288	; SSE2: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
				289	; SSE42: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
				290	; AVX: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
				291	; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
				292	; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
				293	%F32 = call float @llvm.sqrt.f32(float undef)
				294	; SSE2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
				295	; SSE42: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
				296	; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
				297	; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
				298	; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
				299	%V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
				300	; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
				301	; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
				302	; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
				303	; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
				304	; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
				305	%V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
				306	; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
				307	; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
				308	; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
				309	; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
				310	; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
				311	%V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	312
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	313	; SSE2: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
				314	; SSE42: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
				315	; AVX: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
				316	; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
				317	; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
				318	%F64 = call double @llvm.sqrt.f64(double undef)
				319	; SSE2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
				320	; SSE42: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
				321	; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
				322	; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
				323	; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
				324	%V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
				325	; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
				326	; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
				327	; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
				328	; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
				329	; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
				330	%V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
				331	; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
				332	; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
				333	; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
				334	; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
				335	; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
				336	%V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	337
				338	ret i32 undef
				339	}
				340
Simon Pilgrim	8b021c3	2016-10-01 16:30:13 +0000	[diff] [blame^]	341	; CHECK-LABEL: 'fabs'
				342	define i32 @fabs(i32 %arg) {
				343	; SSE2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
				344	; SSE42: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
				345	; AVX: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
				346	; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
				347	; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
				348	%F32 = call float @llvm.fabs.f32(float undef)
				349	; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
				350	; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
				351	; AVX: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
				352	; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
				353	; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
				354	%V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
				355	; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
				356	; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
				357	; AVX: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
				358	; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
				359	; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
				360	%V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
				361	; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
				362	; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
				363	; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
				364	; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
				365	; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
				366	%V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
				367
				368	; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
				369	; SSE42: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
				370	; AVX: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
				371	; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
				372	; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
				373	%F64 = call double @llvm.fabs.f64(double undef)
				374	; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
				375	; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
				376	; AVX: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
				377	; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
				378	; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
				379	%V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
				380	; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
				381	; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
				382	; AVX: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
				383	; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
				384	; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
				385	%V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
				386	; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
				387	; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
				388	; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
				389	; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
				390	; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
				391	%V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
				392
				393	ret i32 undef
				394	}
				395
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	396	; CHECK-LABEL: 'fma'
				397	define i32 @fma(i32 %arg) {
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	398	; SSE2: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
				399	; SSE42: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
				400	; AVX: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
				401	; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
				402	; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
				403	%F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
				404	; SSE2: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
				405	; SSE42: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
				406	; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
				407	; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
				408	; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
				409	%V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
				410	; SSE2: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
				411	; SSE42: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
				412	; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
				413	; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
				414	; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
				415	%V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
				416	; SSE2: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
				417	; SSE42: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
				418	; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
				419	; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
				420	; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
				421	%V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	422
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	423	; SSE2: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
				424	; SSE42: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
				425	; AVX: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
				426	; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
				427	; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
				428	%F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
				429	; SSE2: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
				430	; SSE42: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
				431	; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
				432	; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
				433	; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
				434	%V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
				435	; SSE2: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
				436	; SSE42: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
				437	; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
				438	; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
				439	; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
				440	%V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
				441	; SSE2: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
				442	; SSE42: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
				443	; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
				444	; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
				445	; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
				446	%V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	447
				448	ret i32 undef
				449	}
				450
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	451	declare float @llvm.sqrt.f32(float)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	452	declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
				453	declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
				454	declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
				455
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	456	declare double @llvm.sqrt.f64(double)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	457	declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
				458	declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
				459	declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
				460
Simon Pilgrim	8b021c3	2016-10-01 16:30:13 +0000	[diff] [blame^]	461	declare float @llvm.fabs.f32(float)
				462	declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
				463	declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
				464	declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
				465
				466	declare double @llvm.fabs.f64(double)
				467	declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
				468	declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
				469	declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
				470
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	471	declare float @llvm.fma.f32(float, float, float)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	472	declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
				473	declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
				474	declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
				475
Simon Pilgrim	9178059	2016-09-18 21:01:20 +0000	[diff] [blame]	476	declare double @llvm.fma.f64(double, double, double)
Simon Pilgrim	07d7a21	2016-08-21 19:06:25 +0000	[diff] [blame]	477	declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
				478	declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
				479	declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)