Blame - llvm/test/Analysis/CostModel/X86/reduce-add.ll - toolchain/llvm-project

blob: cb5d0fa7ea03de2b88e5786d1a1887834606da20 [file] [log] [blame]

Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
Craig Topper	381b4fb	2018-12-05 07:56:50 +0000	[diff] [blame]	2	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2
				3	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 \| FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
				4	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
				5	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
				6	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2
				7	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
				8	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
				9	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	10
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	11	; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mcpu=slm \| FileCheck %s --check-prefixes=SLM
				12
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	13	define i32 @reduce_i64(i32 %arg) {
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	14	; SSE-LABEL: 'reduce_i64'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	15	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
				16	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
				17	; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
				18	; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
				19	; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	20	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	21	;
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	22	; AVX1-LABEL: 'reduce_i64'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	23	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
				24	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
				25	; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
				26	; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
				27	; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	28	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				29	;
				30	; AVX2-LABEL: 'reduce_i64'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	31	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
				32	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
				33	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
				34	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
				35	; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	36	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	37	;
				38	; AVX512-LABEL: 'reduce_i64'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	39	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
				40	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
				41	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
				42	; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
				43	; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	44	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				45	;
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	46	; SLM-LABEL: 'reduce_i64'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	47	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
				48	; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
				49	; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
				50	; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
				51	; SLM-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	52	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				53	;
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	54	%V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
				55	%V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
				56	%V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
				57	%V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
				58	%V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	59	ret i32 undef
				60	}
				61
				62	define i32 @reduce_i32(i32 %arg) {
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	63	; SSE-LABEL: 'reduce_i32'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	64	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
				65	; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
				66	; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
				67	; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
				68	; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	69	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	70	;
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	71	; AVX1-LABEL: 'reduce_i32'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	72	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
				73	; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
				74	; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
				75	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
				76	; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	77	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				78	;
				79	; AVX2-LABEL: 'reduce_i32'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	80	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
				81	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
				82	; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
				83	; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
				84	; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	85	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	86	;
				87	; AVX512-LABEL: 'reduce_i32'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	88	; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
				89	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
				90	; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
				91	; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
				92	; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	93	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				94	;
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	95	; SLM-LABEL: 'reduce_i32'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	96	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
				97	; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
				98	; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
				99	; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
				100	; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	101	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				102	;
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	103	%V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef)
				104	%V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef)
				105	%V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef)
				106	%V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef)
				107	%V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	108	ret i32 undef
				109	}
				110
				111	define i32 @reduce_i16(i32 %arg) {
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	112	; SSE-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	113	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				114	; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				115	; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				116	; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				117	; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				118	; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	119	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	120	;
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	121	; AVX1-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	122	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				123	; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				124	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				125	; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				126	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				127	; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	128	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				129	;
				130	; AVX2-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	131	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				132	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				133	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				134	; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				135	; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				136	; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	137	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	138	;
				139	; AVX512F-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	140	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				141	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				142	; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				143	; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				144	; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				145	; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	146	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				147	;
				148	; AVX512BW-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	149	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				150	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				151	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				152	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				153	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				154	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	155	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				156	;
				157	; AVX512DQ-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	158	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				159	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				160	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				161	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				162	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				163	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	164	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				165	;
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	166	; SLM-LABEL: 'reduce_i16'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	167	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				168	; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				169	; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				170	; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				171	; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				172	; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	173	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				174	;
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	175	%V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef)
				176	%V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef)
				177	%V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef)
				178	%V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef)
				179	%V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef)
				180	%V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	181	ret i32 undef
				182	}
				183
				184	define i32 @reduce_i8(i32 %arg) {
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	185	; SSE-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	186	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				187	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				188	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				189	; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				190	; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				191	; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				192	; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrim	1b59a16	2019-10-12 13:21:50 +0000	[diff] [blame]	193	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	194	;
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	195	; AVX1-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	196	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				197	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				198	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				199	; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				200	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				201	; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				202	; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	203	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				204	;
				205	; AVX2-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	206	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				207	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				208	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				209	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				210	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				211	; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				212	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Craig Topper	f4c67df	2020-03-22 11:09:55 -0700	[diff] [blame]	213	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	214	;
				215	; AVX512F-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	216	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				217	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				218	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				219	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				220	; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				221	; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				222	; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	223	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				224	;
				225	; AVX512BW-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	226	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				227	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				228	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				229	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				230	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				231	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				232	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	233	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				234	;
				235	; AVX512DQ-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	236	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				237	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				238	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				239	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				240	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				241	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				242	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	243	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				244	;
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	245	; SLM-LABEL: 'reduce_i8'
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	246	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				247	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				248	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				249	; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				250	; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				251	; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				252	; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrim	1b986b4	2019-11-06 17:04:07 +0000	[diff] [blame]	253	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				254	;
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	255	%V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
				256	%V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
				257	%V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
				258	%V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
				259	%V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
				260	%V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
				261	%V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	262	ret i32 undef
				263	}
				264
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	265	declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
				266	declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
				267	declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
				268	declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
				269	declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	270
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	271	declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
				272	declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
				273	declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
				274	declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
				275	declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	276
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	277	declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
				278	declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
				279	declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
				280	declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
				281	declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
				282	declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame]	283
Amara Emerson	322d0af	2020-10-02 18:30:53 -0700	[diff] [blame^]	284	declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
				285	declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
				286	declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
				287	declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
				288	declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
				289	declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
				290	declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)