Blame - llvm/test/Analysis/CostModel/X86/reduce-add.ll - toolchain/llvm-project

blob: 046aaf04e338d3bc9f03bc07e76de58235d8037f [file] [log] [blame]

Simon Pilgrim	e612ab0	2018-10-20 14:29:59 +0000	[diff] [blame^]	1	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
				2	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2
				3	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 \| FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
				4	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
				5	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
				6	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2
				7	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
				8	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
				9	; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
				10
				11	define i32 @reduce_i64(i32 %arg) {
				12	; SSE2-LABEL: 'reduce_i64'
				13	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
				14	; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
				15	; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
				16	; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
				17	; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
				18	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				19	;
				20	; SSSE3-LABEL: 'reduce_i64'
				21	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
				22	; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
				23	; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
				24	; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
				25	; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
				26	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				27	;
				28	; SSE42-LABEL: 'reduce_i64'
				29	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
				30	; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
				31	; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
				32	; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
				33	; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
				34	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				35	;
				36	; AVX-LABEL: 'reduce_i64'
				37	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
				38	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
				39	; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
				40	; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
				41	; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
				42	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				43	;
				44	; AVX512-LABEL: 'reduce_i64'
				45	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
				46	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
				47	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
				48	; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
				49	; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
				50	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				51	;
				52	%V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
				53	%V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
				54	%V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
				55	%V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
				56	%V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
				57	ret i32 undef
				58	}
				59
				60	define i32 @reduce_i32(i32 %arg) {
				61	; SSE2-LABEL: 'reduce_i32'
				62	; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
				63	; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
				64	; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
				65	; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
				66	; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
				67	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				68	;
				69	; SSSE3-LABEL: 'reduce_i32'
				70	; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
				71	; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
				72	; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
				73	; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
				74	; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
				75	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				76	;
				77	; SSE42-LABEL: 'reduce_i32'
				78	; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
				79	; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
				80	; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
				81	; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
				82	; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
				83	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				84	;
				85	; AVX-LABEL: 'reduce_i32'
				86	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
				87	; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
				88	; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
				89	; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
				90	; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
				91	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				92	;
				93	; AVX512-LABEL: 'reduce_i32'
				94	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
				95	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
				96	; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
				97	; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
				98	; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
				99	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				100	;
				101	%V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
				102	%V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
				103	%V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
				104	%V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
				105	%V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
				106	ret i32 undef
				107	}
				108
				109	define i32 @reduce_i16(i32 %arg) {
				110	; SSE2-LABEL: 'reduce_i16'
				111	; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				112	; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				113	; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				114	; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				115	; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				116	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				117	;
				118	; SSSE3-LABEL: 'reduce_i16'
				119	; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				120	; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				121	; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				122	; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				123	; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				124	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				125	;
				126	; SSE42-LABEL: 'reduce_i16'
				127	; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				128	; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				129	; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				130	; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				131	; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				132	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				133	;
				134	; AVX1-LABEL: 'reduce_i16'
				135	; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				136	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				137	; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				138	; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				139	; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				140	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				141	;
				142	; AVX2-LABEL: 'reduce_i16'
				143	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				144	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				145	; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				146	; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				147	; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				148	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				149	;
				150	; AVX512F-LABEL: 'reduce_i16'
				151	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				152	; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				153	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				154	; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				155	; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				156	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				157	;
				158	; AVX512BW-LABEL: 'reduce_i16'
				159	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				160	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				161	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				162	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				163	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				164	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				165	;
				166	; AVX512DQ-LABEL: 'reduce_i16'
				167	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				168	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				169	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				170	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				171	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				172	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				173	;
				174	%V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
				175	%V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
				176	%V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
				177	%V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
				178	%V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
				179	ret i32 undef
				180	}
				181
				182	define i32 @reduce_i8(i32 %arg) {
				183	; SSE2-LABEL: 'reduce_i8'
				184	; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				185	; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				186	; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				187	; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				188	; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				189	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				190	;
				191	; SSSE3-LABEL: 'reduce_i8'
				192	; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				193	; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				194	; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				195	; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				196	; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				197	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				198	;
				199	; SSE42-LABEL: 'reduce_i8'
				200	; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				201	; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				202	; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				203	; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				204	; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				205	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				206	;
				207	; AVX1-LABEL: 'reduce_i8'
				208	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				209	; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				210	; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				211	; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				212	; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				213	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				214	;
				215	; AVX2-LABEL: 'reduce_i8'
				216	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				217	; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				218	; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				219	; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				220	; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				221	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				222	;
				223	; AVX512F-LABEL: 'reduce_i8'
				224	; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				225	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				226	; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				227	; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				228	; AVX512F-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				229	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				230	;
				231	; AVX512BW-LABEL: 'reduce_i8'
				232	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				233	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				234	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				235	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				236	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				237	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				238	;
				239	; AVX512DQ-LABEL: 'reduce_i8'
				240	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				241	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				242	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				243	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				244	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				245	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				246	;
				247	%V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
				248	%V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
				249	%V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
				250	%V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
				251	%V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
				252	ret i32 undef
				253	}
				254
				255	declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>)
				256	declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
				257	declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
				258	declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
				259	declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
				260
				261	declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>)
				262	declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
				263	declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
				264	declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
				265	declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
				266
				267	declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
				268	declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
				269	declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
				270	declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
				271	declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
				272
				273	declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
				274	declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
				275	declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
				276	declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
				277	declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)