Blame - clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c - toolchain/llvm-project

blob: 58d911d3ff7062d41090960d9cb48144498eb1b1 [file] [log] [blame]

Abderrazek Zaafrani	b5ac56f	2018-03-23 00:08:40 +0000	[diff] [blame]	1	// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
				2	// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \
				3	// RUN: \| opt -S -mem2reg \
				4	// RUN: \| FileCheck %s
				5
				6	// REQUIRES: arm-registered-target
				7
				8	#include <arm_neon.h>
				9
				10	// CHECK-LABEL: test_vabs_f16
				11	// CHECK: [[ABS:%.*]] = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
				12	// CHECK: ret <4 x half> [[ABS]]
				13	float16x4_t test_vabs_f16(float16x4_t a) {
				14	return vabs_f16(a);
				15	}
				16
				17	// CHECK-LABEL: test_vabsq_f16
				18	// CHECK: [[ABS:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
				19	// CHECK: ret <8 x half> [[ABS]]
				20	float16x8_t test_vabsq_f16(float16x8_t a) {
				21	return vabsq_f16(a);
				22	}
				23
				24	// CHECK-LABEL: test_vceqz_f16
				25	// CHECK: [[TMP1:%.*]] = fcmp oeq <4 x half> %a, zeroinitializer
				26	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				27	// CHECK: ret <4 x i16> [[TMP2]]
				28	uint16x4_t test_vceqz_f16(float16x4_t a) {
				29	return vceqz_f16(a);
				30	}
				31
				32	// CHECK-LABEL: test_vceqzq_f16
				33	// CHECK: [[TMP1:%.*]] = fcmp oeq <8 x half> %a, zeroinitializer
				34	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				35	// CHECK: ret <8 x i16> [[TMP2]]
				36	uint16x8_t test_vceqzq_f16(float16x8_t a) {
				37	return vceqzq_f16(a);
				38	}
				39
				40	// CHECK-LABEL: test_vcgez_f16
				41	// CHECK: [[TMP1:%.*]] = fcmp oge <4 x half> %a, zeroinitializer
				42	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				43	// CHECK: ret <4 x i16> [[TMP2]]
				44	uint16x4_t test_vcgez_f16(float16x4_t a) {
				45	return vcgez_f16(a);
				46	}
				47
				48	// CHECK-LABEL: test_vcgezq_f16
				49	// CHECK: [[TMP1:%.*]] = fcmp oge <8 x half> %a, zeroinitializer
				50	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				51	// CHECK: ret <8 x i16> [[TMP2]]
				52	uint16x8_t test_vcgezq_f16(float16x8_t a) {
				53	return vcgezq_f16(a);
				54	}
				55
				56	// CHECK-LABEL: test_vcgtz_f16
				57	// CHECK: [[TMP1:%.*]] = fcmp ogt <4 x half> %a, zeroinitializer
				58	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				59	// CHECK: ret <4 x i16> [[TMP2]]
				60	uint16x4_t test_vcgtz_f16(float16x4_t a) {
				61	return vcgtz_f16(a);
				62	}
				63
				64	// CHECK-LABEL: test_vcgtzq_f16
				65	// CHECK: [[TMP1:%.*]] = fcmp ogt <8 x half> %a, zeroinitializer
				66	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				67	// CHECK: ret <8 x i16> [[TMP2]]
				68	uint16x8_t test_vcgtzq_f16(float16x8_t a) {
				69	return vcgtzq_f16(a);
				70	}
				71
				72	// CHECK-LABEL: test_vclez_f16
				73	// CHECK: [[TMP1:%.*]] = fcmp ole <4 x half> %a, zeroinitializer
				74	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				75	// CHECK: ret <4 x i16> [[TMP2]]
				76	uint16x4_t test_vclez_f16(float16x4_t a) {
				77	return vclez_f16(a);
				78	}
				79
				80	// CHECK-LABEL: test_vclezq_f16
				81	// CHECK: [[TMP1:%.*]] = fcmp ole <8 x half> %a, zeroinitializer
				82	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				83	// CHECK: ret <8 x i16> [[TMP2]]
				84	uint16x8_t test_vclezq_f16(float16x8_t a) {
				85	return vclezq_f16(a);
				86	}
				87
				88	// CHECK-LABEL: test_vcltz_f16
				89	// CHECK: [[TMP1:%.*]] = fcmp olt <4 x half> %a, zeroinitializer
				90	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				91	// CHECK: ret <4 x i16> [[TMP2]]
				92	uint16x4_t test_vcltz_f16(float16x4_t a) {
				93	return vcltz_f16(a);
				94	}
				95
				96	// CHECK-LABEL: test_vcltzq_f16
				97	// CHECK: [[TMP1:%.*]] = fcmp olt <8 x half> %a, zeroinitializer
				98	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				99	// CHECK: ret <8 x i16> [[TMP2]]
				100	uint16x8_t test_vcltzq_f16(float16x8_t a) {
				101	return vcltzq_f16(a);
				102	}
				103
				104	// CHECK-LABEL: test_vcvt_f16_s16
				105	// CHECK: [[VCVT:%.*]] = sitofp <4 x i16> %a to <4 x half>
				106	// CHECK: ret <4 x half> [[VCVT]]
				107	float16x4_t test_vcvt_f16_s16 (int16x4_t a) {
				108	return vcvt_f16_s16(a);
				109	}
				110
				111	// CHECK-LABEL: test_vcvtq_f16_s16
				112	// CHECK: [[VCVT:%.*]] = sitofp <8 x i16> %a to <8 x half>
				113	// CHECK: ret <8 x half> [[VCVT]]
				114	float16x8_t test_vcvtq_f16_s16 (int16x8_t a) {
				115	return vcvtq_f16_s16(a);
				116	}
				117
				118	// CHECK-LABEL: test_vcvt_f16_u16
				119	// CHECK: [[VCVT:%.*]] = uitofp <4 x i16> %a to <4 x half>
				120	// CHECK: ret <4 x half> [[VCVT]]
				121	float16x4_t test_vcvt_f16_u16 (uint16x4_t a) {
				122	return vcvt_f16_u16(a);
				123	}
				124
				125	// CHECK-LABEL: test_vcvtq_f16_u16
				126	// CHECK: [[VCVT:%.*]] = uitofp <8 x i16> %a to <8 x half>
				127	// CHECK: ret <8 x half> [[VCVT]]
				128	float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
				129	return vcvtq_f16_u16(a);
				130	}
				131
				132	// CHECK-LABEL: test_vcvt_s16_f16
				133	// CHECK: [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
				134	// CHECK: ret <4 x i16> [[VCVT]]
				135	int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
				136	return vcvt_s16_f16(a);
				137	}
				138
				139	// CHECK-LABEL: test_vcvtq_s16_f16
				140	// CHECK: [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
				141	// CHECK: ret <8 x i16> [[VCVT]]
				142	int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
				143	return vcvtq_s16_f16(a);
				144	}
				145
				146	// CHECK-LABEL: test_vcvt_u16_f16
				147	// CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
				148	// CHECK: ret <4 x i16> [[VCVT]]
				149	int16x4_t test_vcvt_u16_f16 (float16x4_t a) {
				150	return vcvt_u16_f16(a);
				151	}
				152
				153	// CHECK-LABEL: test_vcvtq_u16_f16
				154	// CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
				155	// CHECK: ret <8 x i16> [[VCVT]]
				156	int16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
				157	return vcvtq_u16_f16(a);
				158	}
				159
				160	// CHECK-LABEL: test_vcvta_s16_f16
				161	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
				162	// CHECK: ret <4 x i16> [[VCVT]]
				163	int16x4_t test_vcvta_s16_f16 (float16x4_t a) {
				164	return vcvta_s16_f16(a);
				165	}
				166
Luke Geeson	da2b2e8	2018-06-15 10:10:45 +0000	[diff] [blame]	167	// CHECK-LABEL: test_vcvta_u16_f16
				168	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
				169	// CHECK: ret <4 x i16> [[VCVT]]
				170	int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
				171	return vcvta_u16_f16(a);
				172	}
				173
Abderrazek Zaafrani	b5ac56f	2018-03-23 00:08:40 +0000	[diff] [blame]	174	// CHECK-LABEL: test_vcvtaq_s16_f16
				175	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
				176	// CHECK: ret <8 x i16> [[VCVT]]
				177	int16x8_t test_vcvtaq_s16_f16 (float16x8_t a) {
				178	return vcvtaq_s16_f16(a);
				179	}
				180
				181	// CHECK-LABEL: test_vcvtm_s16_f16
				182	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
				183	// CHECK: ret <4 x i16> [[VCVT]]
				184	int16x4_t test_vcvtm_s16_f16 (float16x4_t a) {
				185	return vcvtm_s16_f16(a);
				186	}
				187
				188	// CHECK-LABEL: test_vcvtmq_s16_f16
				189	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
				190	// CHECK: ret <8 x i16> [[VCVT]]
				191	int16x8_t test_vcvtmq_s16_f16 (float16x8_t a) {
				192	return vcvtmq_s16_f16(a);
				193	}
				194
				195	// CHECK-LABEL: test_vcvtm_u16_f16
				196	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
				197	// CHECK: ret <4 x i16> [[VCVT]]
				198	uint16x4_t test_vcvtm_u16_f16 (float16x4_t a) {
				199	return vcvtm_u16_f16(a);
				200	}
				201
				202	// CHECK-LABEL: test_vcvtmq_u16_f16
				203	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
				204	// CHECK: ret <8 x i16> [[VCVT]]
				205	uint16x8_t test_vcvtmq_u16_f16 (float16x8_t a) {
				206	return vcvtmq_u16_f16(a);
				207	}
				208
				209	// CHECK-LABEL: test_vcvtn_s16_f16
				210	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
				211	// CHECK: ret <4 x i16> [[VCVT]]
				212	int16x4_t test_vcvtn_s16_f16 (float16x4_t a) {
				213	return vcvtn_s16_f16(a);
				214	}
				215
				216	// CHECK-LABEL: test_vcvtnq_s16_f16
				217	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
				218	// CHECK: ret <8 x i16> [[VCVT]]
				219	int16x8_t test_vcvtnq_s16_f16 (float16x8_t a) {
				220	return vcvtnq_s16_f16(a);
				221	}
				222
				223	// CHECK-LABEL: test_vcvtn_u16_f16
				224	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
				225	// CHECK: ret <4 x i16> [[VCVT]]
				226	uint16x4_t test_vcvtn_u16_f16 (float16x4_t a) {
				227	return vcvtn_u16_f16(a);
				228	}
				229
				230	// CHECK-LABEL: test_vcvtnq_u16_f16
				231	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
				232	// CHECK: ret <8 x i16> [[VCVT]]
				233	uint16x8_t test_vcvtnq_u16_f16 (float16x8_t a) {
				234	return vcvtnq_u16_f16(a);
				235	}
				236
				237	// CHECK-LABEL: test_vcvtp_s16_f16
				238	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
				239	// CHECK: ret <4 x i16> [[VCVT]]
				240	int16x4_t test_vcvtp_s16_f16 (float16x4_t a) {
				241	return vcvtp_s16_f16(a);
				242	}
				243
				244	// CHECK-LABEL: test_vcvtpq_s16_f16
				245	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
				246	// CHECK: ret <8 x i16> [[VCVT]]
				247	int16x8_t test_vcvtpq_s16_f16 (float16x8_t a) {
				248	return vcvtpq_s16_f16(a);
				249	}
				250
				251	// CHECK-LABEL: test_vcvtp_u16_f16
				252	// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
				253	// CHECK: ret <4 x i16> [[VCVT]]
				254	uint16x4_t test_vcvtp_u16_f16 (float16x4_t a) {
				255	return vcvtp_u16_f16(a);
				256	}
				257
				258	// CHECK-LABEL: test_vcvtpq_u16_f16
				259	// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
				260	// CHECK: ret <8 x i16> [[VCVT]]
				261	uint16x8_t test_vcvtpq_u16_f16 (float16x8_t a) {
				262	return vcvtpq_u16_f16(a);
				263	}
				264
				265	// FIXME: Fix the zero constant when fp16 non-storage-only type becomes available.
				266	// CHECK-LABEL: test_vneg_f16
				267	// CHECK: [[NEG:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
				268	// CHECK: ret <4 x half> [[NEG]]
				269	float16x4_t test_vneg_f16(float16x4_t a) {
				270	return vneg_f16(a);
				271	}
				272
				273	// CHECK-LABEL: test_vnegq_f16
				274	// CHECK: [[NEG:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
				275	// CHECK: ret <8 x half> [[NEG]]
				276	float16x8_t test_vnegq_f16(float16x8_t a) {
				277	return vnegq_f16(a);
				278	}
				279
				280	// CHECK-LABEL: test_vrecpe_f16
				281	// CHECK: [[RCP:%.*]] = call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
				282	// CHECK: ret <4 x half> [[RCP]]
				283	float16x4_t test_vrecpe_f16(float16x4_t a) {
				284	return vrecpe_f16(a);
				285	}
				286
				287	// CHECK-LABEL: test_vrecpeq_f16
				288	// CHECK: [[RCP:%.*]] = call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
				289	// CHECK: ret <8 x half> [[RCP]]
				290	float16x8_t test_vrecpeq_f16(float16x8_t a) {
				291	return vrecpeq_f16(a);
				292	}
				293
				294	// CHECK-LABEL: test_vrnd_f16
				295	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
				296	// CHECK: ret <4 x half> [[RND]]
				297	float16x4_t test_vrnd_f16(float16x4_t a) {
				298	return vrnd_f16(a);
				299	}
				300
				301	// CHECK-LABEL: test_vrndq_f16
				302	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
				303	// CHECK: ret <8 x half> [[RND]]
				304	float16x8_t test_vrndq_f16(float16x8_t a) {
				305	return vrndq_f16(a);
				306	}
				307
				308	// CHECK-LABEL: test_vrnda_f16
				309	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
				310	// CHECK: ret <4 x half> [[RND]]
				311	float16x4_t test_vrnda_f16(float16x4_t a) {
				312	return vrnda_f16(a);
				313	}
				314
				315	// CHECK-LABEL: test_vrndaq_f16
				316	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
				317	// CHECK: ret <8 x half> [[RND]]
				318	float16x8_t test_vrndaq_f16(float16x8_t a) {
				319	return vrndaq_f16(a);
				320	}
				321
				322	// CHECK-LABEL: test_vrndm_f16
				323	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
				324	// CHECK: ret <4 x half> [[RND]]
				325	float16x4_t test_vrndm_f16(float16x4_t a) {
				326	return vrndm_f16(a);
				327	}
				328
				329	// CHECK-LABEL: test_vrndmq_f16
				330	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
				331	// CHECK: ret <8 x half> [[RND]]
				332	float16x8_t test_vrndmq_f16(float16x8_t a) {
				333	return vrndmq_f16(a);
				334	}
				335
				336	// CHECK-LABEL: test_vrndn_f16
				337	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
				338	// CHECK: ret <4 x half> [[RND]]
				339	float16x4_t test_vrndn_f16(float16x4_t a) {
				340	return vrndn_f16(a);
				341	}
				342
				343	// CHECK-LABEL: test_vrndnq_f16
				344	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
				345	// CHECK: ret <8 x half> [[RND]]
				346	float16x8_t test_vrndnq_f16(float16x8_t a) {
				347	return vrndnq_f16(a);
				348	}
				349
				350	// CHECK-LABEL: test_vrndp_f16
				351	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
				352	// CHECK: ret <4 x half> [[RND]]
				353	float16x4_t test_vrndp_f16(float16x4_t a) {
				354	return vrndp_f16(a);
				355	}
				356
				357	// CHECK-LABEL: test_vrndpq_f16
				358	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
				359	// CHECK: ret <8 x half> [[RND]]
				360	float16x8_t test_vrndpq_f16(float16x8_t a) {
				361	return vrndpq_f16(a);
				362	}
				363
				364	// CHECK-LABEL: test_vrndx_f16
				365	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
				366	// CHECK: ret <4 x half> [[RND]]
				367	float16x4_t test_vrndx_f16(float16x4_t a) {
				368	return vrndx_f16(a);
				369	}
				370
				371	// CHECK-LABEL: test_vrndxq_f16
				372	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
				373	// CHECK: ret <8 x half> [[RND]]
				374	float16x8_t test_vrndxq_f16(float16x8_t a) {
				375	return vrndxq_f16(a);
				376	}
				377
				378	// CHECK-LABEL: test_vrsqrte_f16
				379	// CHECK: [[RND:%.*]] = call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
				380	// CHECK: ret <4 x half> [[RND]]
				381	float16x4_t test_vrsqrte_f16(float16x4_t a) {
				382	return vrsqrte_f16(a);
				383	}
				384
				385	// CHECK-LABEL: test_vrsqrteq_f16
				386	// CHECK: [[RND:%.*]] = call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
				387	// CHECK: ret <8 x half> [[RND]]
				388	float16x8_t test_vrsqrteq_f16(float16x8_t a) {
				389	return vrsqrteq_f16(a);
				390	}
				391
				392	// CHECK-LABEL: test_vadd_f16
				393	// CHECK: [[ADD:%.*]] = fadd <4 x half> %a, %b
				394	// CHECK: ret <4 x half> [[ADD]]
				395	float16x4_t test_vadd_f16(float16x4_t a, float16x4_t b) {
				396	return vadd_f16(a, b);
				397	}
				398
				399	// CHECK-LABEL: test_vaddq_f16
				400	// CHECK: [[ADD:%.*]] = fadd <8 x half> %a, %b
				401	// CHECK: ret <8 x half> [[ADD]]
				402	float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b) {
				403	return vaddq_f16(a, b);
				404	}
				405
				406	// CHECK-LABEL: test_vabd_f16
				407	// CHECK: [[ABD:%.*]] = call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
				408	// CHECK: ret <4 x half> [[ABD]]
				409	float16x4_t test_vabd_f16(float16x4_t a, float16x4_t b) {
				410	return vabd_f16(a, b);
				411	}
				412
				413	// CHECK-LABEL: test_vabdq_f16
				414	// CHECK: [[ABD:%.*]] = call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
				415	// CHECK: ret <8 x half> [[ABD]]
				416	float16x8_t test_vabdq_f16(float16x8_t a, float16x8_t b) {
				417	return vabdq_f16(a, b);
				418	}
				419
				420	// CHECK-LABEL: test_vcage_f16
				421	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
				422	// CHECK: ret <4 x i16> [[ABS]]
				423	uint16x4_t test_vcage_f16(float16x4_t a, float16x4_t b) {
				424	return vcage_f16(a, b);
				425	}
				426
				427	// CHECK-LABEL: test_vcageq_f16
				428	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
				429	// CHECK: ret <8 x i16> [[ABS]]
				430	uint16x8_t test_vcageq_f16(float16x8_t a, float16x8_t b) {
				431	return vcageq_f16(a, b);
				432	}
				433
				434	// CHECK-LABEL: test_vcagt_f16
				435	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
				436	// CHECK: ret <4 x i16> [[ABS]]
				437	uint16x4_t test_vcagt_f16(float16x4_t a, float16x4_t b) {
				438	return vcagt_f16(a, b);
				439	}
				440
				441	// CHECK-LABEL: test_vcagtq_f16
				442	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
				443	// CHECK: ret <8 x i16> [[ABS]]
				444	uint16x8_t test_vcagtq_f16(float16x8_t a, float16x8_t b) {
				445	return vcagtq_f16(a, b);
				446	}
				447
				448	// CHECK-LABEL: test_vcale_f16
				449	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
				450	// CHECK: ret <4 x i16> [[ABS]]
				451	uint16x4_t test_vcale_f16(float16x4_t a, float16x4_t b) {
				452	return vcale_f16(a, b);
				453	}
				454
				455	// CHECK-LABEL: test_vcaleq_f16
				456	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
				457	// CHECK: ret <8 x i16> [[ABS]]
				458	uint16x8_t test_vcaleq_f16(float16x8_t a, float16x8_t b) {
				459	return vcaleq_f16(a, b);
				460	}
				461
				462	// CHECK-LABEL: test_vcalt_f16
				463	// CHECK: [[ABS:%.*]] = call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
				464	// CHECK: ret <4 x i16> [[ABS]]
				465	uint16x4_t test_vcalt_f16(float16x4_t a, float16x4_t b) {
				466	return vcalt_f16(a, b);
				467	}
				468
				469	// CHECK-LABEL: test_vcaltq_f16
				470	// CHECK: [[ABS:%.*]] = call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
				471	// CHECK: ret <8 x i16> [[ABS]]
				472	uint16x8_t test_vcaltq_f16(float16x8_t a, float16x8_t b) {
				473	return vcaltq_f16(a, b);
				474	}
				475
				476	// CHECK-LABEL: test_vceq_f16
				477	// CHECK: [[TMP1:%.*]] = fcmp oeq <4 x half> %a, %b
				478	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				479	// CHECK: ret <4 x i16> [[TMP2]]
				480	uint16x4_t test_vceq_f16(float16x4_t a, float16x4_t b) {
				481	return vceq_f16(a, b);
				482	}
				483
				484	// CHECK-LABEL: test_vceqq_f16
				485	// CHECK: [[TMP1:%.*]] = fcmp oeq <8 x half> %a, %b
				486	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				487	// CHECK: ret <8 x i16> [[TMP2]]
				488	uint16x8_t test_vceqq_f16(float16x8_t a, float16x8_t b) {
				489	return vceqq_f16(a, b);
				490	}
				491
				492	// CHECK-LABEL: test_vcge_f16
				493	// CHECK: [[TMP1:%.*]] = fcmp oge <4 x half> %a, %b
				494	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				495	// CHECK: ret <4 x i16> [[TMP2]]
				496	uint16x4_t test_vcge_f16(float16x4_t a, float16x4_t b) {
				497	return vcge_f16(a, b);
				498	}
				499
				500	// CHECK-LABEL: test_vcgeq_f16
				501	// CHECK: [[TMP1:%.*]] = fcmp oge <8 x half> %a, %b
				502	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				503	// CHECK: ret <8 x i16> [[TMP2]]
				504	uint16x8_t test_vcgeq_f16(float16x8_t a, float16x8_t b) {
				505	return vcgeq_f16(a, b);
				506	}
				507
				508	// CHECK-LABEL: test_vcgt_f16
				509	// CHECK: [[TMP1:%.*]] = fcmp ogt <4 x half> %a, %b
				510	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				511	// CHECK: ret <4 x i16> [[TMP2]]
				512	uint16x4_t test_vcgt_f16(float16x4_t a, float16x4_t b) {
				513	return vcgt_f16(a, b);
				514	}
				515
				516	// CHECK-LABEL: test_vcgtq_f16
				517	// CHECK: [[TMP1:%.*]] = fcmp ogt <8 x half> %a, %b
				518	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				519	// CHECK: ret <8 x i16> [[TMP2]]
				520	uint16x8_t test_vcgtq_f16(float16x8_t a, float16x8_t b) {
				521	return vcgtq_f16(a, b);
				522	}
				523
				524	// CHECK-LABEL: test_vcle_f16
				525	// CHECK: [[TMP1:%.*]] = fcmp ole <4 x half> %a, %b
				526	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				527	// CHECK: ret <4 x i16> [[TMP2]]
				528	uint16x4_t test_vcle_f16(float16x4_t a, float16x4_t b) {
				529	return vcle_f16(a, b);
				530	}
				531
				532	// CHECK-LABEL: test_vcleq_f16
				533	// CHECK: [[TMP1:%.*]] = fcmp ole <8 x half> %a, %b
				534	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				535	// CHECK: ret <8 x i16> [[TMP2]]
				536	uint16x8_t test_vcleq_f16(float16x8_t a, float16x8_t b) {
				537	return vcleq_f16(a, b);
				538	}
				539
				540	// CHECK-LABEL: test_vclt_f16
				541	// CHECK: [[TMP1:%.*]] = fcmp olt <4 x half> %a, %b
				542	// CHECK: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i16>
				543	// CHECK: ret <4 x i16> [[TMP2]]
				544	uint16x4_t test_vclt_f16(float16x4_t a, float16x4_t b) {
				545	return vclt_f16(a, b);
				546	}
				547
				548	// CHECK-LABEL: test_vcltq_f16
				549	// CHECK: [[TMP1:%.*]] = fcmp olt <8 x half> %a, %b
				550	// CHECK: [[TMP2:%.]] = sext <8 x i1> [[TMP1:%.]] to <8 x i16>
				551	// CHECK: ret <8 x i16> [[TMP2]]
				552	uint16x8_t test_vcltq_f16(float16x8_t a, float16x8_t b) {
				553	return vcltq_f16(a, b);
				554	}
				555
				556	// CHECK-LABEL: test_vcvt_n_f16_s16
				557	// CHECK: [[CVT:%.*]] = call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
				558	// CHECK: ret <4 x half> [[CVT]]
				559	float16x4_t test_vcvt_n_f16_s16(int16x4_t a) {
				560	return vcvt_n_f16_s16(a, 2);
				561	}
				562
				563	// CHECK-LABEL: test_vcvtq_n_f16_s16
				564	// CHECK: [[CVT:%.*]] = call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
				565	// CHECK: ret <8 x half> [[CVT]]
				566	float16x8_t test_vcvtq_n_f16_s16(int16x8_t a) {
				567	return vcvtq_n_f16_s16(a, 2);
				568	}
				569
				570	// CHECK-LABEL: test_vcvt_n_f16_u16
				571	// CHECK: [[CVT:%.*]] = call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %vcvt_n, i32 2)
				572	// CHECK: ret <4 x half> [[CVT]]
				573	float16x4_t test_vcvt_n_f16_u16(uint16x4_t a) {
				574	return vcvt_n_f16_u16(a, 2);
				575	}
				576
				577	// CHECK-LABEL: test_vcvtq_n_f16_u16
				578	// CHECK: [[CVT:%.*]] = call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %vcvt_n, i32 2)
				579	// CHECK: ret <8 x half> [[CVT]]
				580	float16x8_t test_vcvtq_n_f16_u16(uint16x8_t a) {
				581	return vcvtq_n_f16_u16(a, 2);
				582	}
				583
				584	// CHECK-LABEL: test_vcvt_n_s16_f16
				585	// CHECK: [[CVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
				586	// CHECK: ret <4 x i16> [[CVT]]
				587	int16x4_t test_vcvt_n_s16_f16(float16x4_t a) {
				588	return vcvt_n_s16_f16(a, 2);
				589	}
				590
				591	// CHECK-LABEL: test_vcvtq_n_s16_f16
				592	// CHECK: [[CVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
				593	// CHECK: ret <8 x i16> [[CVT]]
				594	int16x8_t test_vcvtq_n_s16_f16(float16x8_t a) {
				595	return vcvtq_n_s16_f16(a, 2);
				596	}
				597
				598	// CHECK-LABEL: test_vcvt_n_u16_f16
				599	// CHECK: [[CVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %vcvt_n, i32 2)
				600	// CHECK: ret <4 x i16> [[CVT]]
				601	uint16x4_t test_vcvt_n_u16_f16(float16x4_t a) {
				602	return vcvt_n_u16_f16(a, 2);
				603	}
				604
				605	// CHECK-LABEL: test_vcvtq_n_u16_f16
				606	// CHECK: [[CVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %vcvt_n, i32 2)
				607	// CHECK: ret <8 x i16> [[CVT]]
				608	uint16x8_t test_vcvtq_n_u16_f16(float16x8_t a) {
				609	return vcvtq_n_u16_f16(a, 2);
				610	}
				611
				612	// CHECK-LABEL: test_vmax_f16
				613	// CHECK: [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
				614	// CHECK: ret <4 x half> [[MAX]]
				615	float16x4_t test_vmax_f16(float16x4_t a, float16x4_t b) {
				616	return vmax_f16(a, b);
				617	}
				618
				619	// CHECK-LABEL: test_vmaxq_f16
				620	// CHECK: [[MAX:%.*]] = call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
				621	// CHECK: ret <8 x half> [[MAX]]
				622	float16x8_t test_vmaxq_f16(float16x8_t a, float16x8_t b) {
				623	return vmaxq_f16(a, b);
				624	}
				625
				626	// CHECK-LABEL: test_vmaxnm_f16
				627	// CHECK: [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
				628	// CHECK: ret <4 x half> [[MAX]]
				629	float16x4_t test_vmaxnm_f16(float16x4_t a, float16x4_t b) {
				630	return vmaxnm_f16(a, b);
				631	}
				632
				633	// CHECK-LABEL: test_vmaxnmq_f16
				634	// CHECK: [[MAX:%.*]] = call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
				635	// CHECK: ret <8 x half> [[MAX]]
				636	float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) {
				637	return vmaxnmq_f16(a, b);
				638	}
				639
				640	// CHECK-LABEL: test_vmin_f16
				641	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
				642	// CHECK: ret <4 x half> [[MIN]]
				643	float16x4_t test_vmin_f16(float16x4_t a, float16x4_t b) {
				644	return vmin_f16(a, b);
				645	}
				646
				647	// CHECK-LABEL: test_vminq_f16
				648	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
				649	// CHECK: ret <8 x half> [[MIN]]
				650	float16x8_t test_vminq_f16(float16x8_t a, float16x8_t b) {
				651	return vminq_f16(a, b);
				652	}
				653
				654	// CHECK-LABEL: test_vminnm_f16
				655	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
				656	// CHECK: ret <4 x half> [[MIN]]
				657	float16x4_t test_vminnm_f16(float16x4_t a, float16x4_t b) {
				658	return vminnm_f16(a, b);
				659	}
				660
				661	// CHECK-LABEL: test_vminnmq_f16
				662	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
				663	// CHECK: ret <8 x half> [[MIN]]
				664	float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) {
				665	return vminnmq_f16(a, b);
				666	}
				667
				668	// CHECK-LABEL: test_vmul_f16
				669	// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, %b
				670	// CHECK: ret <4 x half> [[MUL]]
				671	float16x4_t test_vmul_f16(float16x4_t a, float16x4_t b) {
				672	return vmul_f16(a, b);
				673	}
				674
				675	// CHECK-LABEL: test_vmulq_f16
				676	// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, %b
				677	// CHECK: ret <8 x half> [[MUL]]
				678	float16x8_t test_vmulq_f16(float16x8_t a, float16x8_t b) {
				679	return vmulq_f16(a, b);
				680	}
				681
				682	// CHECK-LABEL: test_vpadd_f16
				683	// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
				684	// CHECK: ret <4 x half> [[ADD]]
				685	float16x4_t test_vpadd_f16(float16x4_t a, float16x4_t b) {
				686	return vpadd_f16(a, b);
				687	}
				688
				689	// CHECK-LABEL: test_vpmax_f16
				690	// CHECK: [[MAX:%.*]] = call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
				691	// CHECK: ret <4 x half> [[MAX]]
				692	float16x4_t test_vpmax_f16(float16x4_t a, float16x4_t b) {
				693	return vpmax_f16(a, b);
				694	}
				695
				696	// CHECK-LABEL: test_vpmin_f16
				697	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
				698	// CHECK: ret <4 x half> [[MIN]]
				699	float16x4_t test_vpmin_f16(float16x4_t a, float16x4_t b) {
				700	return vpmin_f16(a, b);
				701	}
				702
				703	// CHECK-LABEL: test_vrecps_f16
				704	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
				705	// CHECK: ret <4 x half> [[MIN]]
				706	float16x4_t test_vrecps_f16(float16x4_t a, float16x4_t b) {
				707	return vrecps_f16(a, b);
				708	}
				709
				710	// CHECK-LABEL: test_vrecpsq_f16
				711	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
				712	// CHECK: ret <8 x half> [[MIN]]
				713	float16x8_t test_vrecpsq_f16(float16x8_t a, float16x8_t b) {
				714	return vrecpsq_f16(a, b);
				715	}
				716
				717	// CHECK-LABEL: test_vrsqrts_f16
				718	// CHECK: [[MIN:%.*]] = call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
				719	// CHECK: ret <4 x half> [[MIN]]
				720	float16x4_t test_vrsqrts_f16(float16x4_t a, float16x4_t b) {
				721	return vrsqrts_f16(a, b);
				722	}
				723
				724	// CHECK-LABEL: test_vrsqrtsq_f16
				725	// CHECK: [[MIN:%.*]] = call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
				726	// CHECK: ret <8 x half> [[MIN]]
				727	float16x8_t test_vrsqrtsq_f16(float16x8_t a, float16x8_t b) {
				728	return vrsqrtsq_f16(a, b);
				729	}
				730
				731	// CHECK-LABEL: test_vsub_f16
				732	// CHECK: [[ADD:%.*]] = fsub <4 x half> %a, %b
				733	// CHECK: ret <4 x half> [[ADD]]
				734	float16x4_t test_vsub_f16(float16x4_t a, float16x4_t b) {
				735	return vsub_f16(a, b);
				736	}
				737
				738	// CHECK-LABEL: test_vsubq_f16
				739	// CHECK: [[ADD:%.*]] = fsub <8 x half> %a, %b
				740	// CHECK: ret <8 x half> [[ADD]]
				741	float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b) {
				742	return vsubq_f16(a, b);
				743	}
				744
				745	// CHECK-LABEL: test_vfma_f16
				746	// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
				747	// CHECK: ret <4 x half> [[ADD]]
				748	float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
				749	return vfma_f16(a, b, c);
				750	}
				751
				752	// CHECK-LABEL: test_vfmaq_f16
				753	// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
				754	// CHECK: ret <8 x half> [[ADD]]
				755	float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
				756	return vfmaq_f16(a, b, c);
				757	}
				758
				759	// CHECK-LABEL: test_vfms_f16
				760	// CHECK: [[SUB:%.*]] = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
				761	// CHECK: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
				762	// CHECK: ret <4 x half> [[ADD]]
				763	float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
				764	return vfms_f16(a, b, c);
				765	}
				766
				767	// CHECK-LABEL: test_vfmsq_f16
				768	// CHECK: [[SUB:%.*]] = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
				769	// CHECK: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
				770	// CHECK: ret <8 x half> [[ADD]]
				771	float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
				772	return vfmsq_f16(a, b, c);
				773	}
				774
				775	// CHECK-LABEL: test_vmul_lane_f16
				776	// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
				777	// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP0]]
				778	// CHECK: ret <4 x half> [[MUL]]
				779	float16x4_t test_vmul_lane_f16(float16x4_t a, float16x4_t b) {
				780	return vmul_lane_f16(a, b, 3);
				781	}
				782
				783	// CHECK-LABEL: test_vmulq_lane_f16
				784	// CHECK: [[TMP0:%.*]] = shufflevector <4 x half> %b, <4 x half> %b, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
				785	// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP0]]
				786	// CHECK: ret <8 x half> [[MUL]]
				787	float16x8_t test_vmulq_lane_f16(float16x8_t a, float16x4_t b) {
				788	return vmulq_lane_f16(a, b, 7);
				789	}
				790
				791	// CHECK-LABEL: test_vmul_n_f16
				792	// CHECK: [[TMP0:%.]] = insertelement <4 x half> undef, half [[b:%.]], i32 0
				793	// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[b]], i32 1
				794	// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[b]], i32 2
				795	// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[b]], i32 3
				796	// CHECK: [[MUL:%.*]] = fmul <4 x half> %a, [[TMP3]]
				797	// CHECK: ret <4 x half> [[MUL]]
				798	float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) {
				799	return vmul_n_f16(a, b);
				800	}
				801
				802	// CHECK-LABEL: test_vmulq_n_f16
				803	// CHECK: [[TMP0:%.]] = insertelement <8 x half> undef, half [[b:%.]], i32 0
				804	// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[b]], i32 1
				805	// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[b]], i32 2
				806	// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[b]], i32 3
				807	// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[b]], i32 4
				808	// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[b]], i32 5
				809	// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[b]], i32 6
				810	// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[b]], i32 7
				811	// CHECK: [[MUL:%.*]] = fmul <8 x half> %a, [[TMP7]]
				812	// CHECK: ret <8 x half> [[MUL]]
				813	float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
				814	return vmulq_n_f16(a, b);
				815	}
				816
				817	// CHECK-LABEL: test_vbsl_f16
				818	// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
				819	// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
				820	// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
				821	// CHECK: [[VBSL:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
				822	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL]] to <4 x half>
				823	// CHECK: ret <4 x half> [[TMP3]]
				824	float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
				825	return vbsl_f16(a, b, c);
				826	}
				827
				828	// CHECK-LABEL: test_vbslq_f16
				829	// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
				830	// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
				831	// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
				832	// CHECK: [[VBSL:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
				833	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL]] to <8 x half>
				834	// CHECK: ret <8 x half> [[TMP3]]
				835	float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
				836	return vbslq_f16(a, b, c);
				837	}
				838
				839	// CHECK-LABEL: test_vzip_f16
				840	// CHECK: [[VZIP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
				841	// CHECK: store <4 x half> [[VZIP0]], <4 x half>* [[addr1:%.*]]
				842	// CHECK: [[VZIP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
				843	// CHECK: store <4 x half> [[VZIP1]], <4 x half>* [[addr2:%.*]]
				844	float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
				845	return vzip_f16(a, b);
				846	}
				847
				848	// CHECK-LABEL: test_vzipq_f16
				849	// CHECK: [[VZIP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
				850	// CHECK: store <8 x half> [[VZIP0]], <8 x half>* [[addr1:%.*]]
				851	// CHECK: [[VZIP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
				852	// CHECK: store <8 x half> [[VZIP1]], <8 x half>* [[addr2:%.*]]
				853	float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
				854	return vzipq_f16(a, b);
				855	}
				856
				857	// CHECK-LABEL: test_vuzp_f16
				858	// CHECK: [[VUZP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				859	// CHECK: store <4 x half> [[VUZP0]], <4 x half>* [[addr1:%.*]]
				860	// CHECK: [[VUZP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
				861	// CHECK: store <4 x half> [[VUZP1]], <4 x half>* [[addr1:%.*]]
				862	float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
				863	return vuzp_f16(a, b);
				864	}
				865
				866	// CHECK-LABEL: test_vuzpq_f16
				867	// CHECK: [[VUZP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
				868	// CHECK: store <8 x half> [[VUZP0]], <8 x half>* [[addr1:%.*]]
				869	// CHECK: [[VUZP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
				870	// CHECK: store <8 x half> [[VUZP1]], <8 x half>* [[addr2:%.*]]
				871	float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
				872	return vuzpq_f16(a, b);
				873	}
				874
				875	// CHECK-LABEL: test_vtrn_f16
				876	// CHECK: [[VTRN0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
				877	// CHECK: store <4 x half> [[VTRN0]], <4 x half>* [[addr1:%.*]]
				878	// CHECK: [[VTRN1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
				879	// CHECK: store <4 x half> [[VTRN1]], <4 x half>* [[addr2:%.*]]
				880	float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
				881	return vtrn_f16(a, b);
				882	}
				883
				884	// CHECK-LABEL: test_vtrnq_f16
				885	// CHECK: [[VTRN0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
				886	// CHECK: store <8 x half> [[VTRN0]], <8 x half>* [[addr1:%.*]]
				887	// CHECK: [[VTRN1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
				888	// CHECK: store <8 x half> [[VTRN1]], <8 x half>* [[addr2:%.*]]
				889	float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
				890	return vtrnq_f16(a, b);
				891	}
				892
				893	// CHECK-LABEL: test_vmov_n_f16
				894	// CHECK: [[TMP0:%.]] = insertelement <4 x half> undef, half [[ARG:%.]], i32 0
				895	// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1
				896	// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2
				897	// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3
				898	// CHECK: ret <4 x half> [[TMP3]]
				899	float16x4_t test_vmov_n_f16(float16_t a) {
				900	return vmov_n_f16(a);
				901	}
				902
				903	// CHECK-LABEL: test_vmovq_n_f16
				904	// CHECK: [[TMP0:%.]] = insertelement <8 x half> undef, half [[ARG:%.]], i32 0
				905	// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1
				906	// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2
				907	// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3
				908	// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4
				909	// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5
				910	// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6
				911	// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7
				912	// CHECK: ret <8 x half> [[TMP7]]
				913	float16x8_t test_vmovq_n_f16(float16_t a) {
				914	return vmovq_n_f16(a);
				915	}
				916
				917	// CHECK-LABEL: test_vdup_n_f16
				918	// CHECK: [[TMP0:%.]] = insertelement <4 x half> undef, half [[ARG:%.]], i32 0
				919	// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1
				920	// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2
				921	// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3
				922	// CHECK: ret <4 x half> [[TMP3]]
				923	float16x4_t test_vdup_n_f16(float16_t a) {
				924	return vdup_n_f16(a);
				925	}
				926
				927	// CHECK-LABEL: test_vdupq_n_f16
				928	// CHECK: [[TMP0:%.]] = insertelement <8 x half> undef, half [[ARG:%.]], i32 0
				929	// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1
				930	// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2
				931	// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3
				932	// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4
				933	// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5
				934	// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6
				935	// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7
				936	// CHECK: ret <8 x half> [[TMP7]]
				937	float16x8_t test_vdupq_n_f16(float16_t a) {
				938	return vdupq_n_f16(a);
				939	}
				940
				941	// CHECK-LABEL: test_vdup_lane_f16
				942	// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
				943	// CHECK: ret <4 x half> [[SHFL]]
				944	float16x4_t test_vdup_lane_f16(float16x4_t a) {
				945	return vdup_lane_f16(a, 3);
				946	}
				947
				948	// CHECK-LABEL: test_vdupq_lane_f16
				949	// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
				950	// CHECK: ret <8 x half> [[SHFL]]
				951	float16x8_t test_vdupq_lane_f16(float16x4_t a) {
				952	return vdupq_lane_f16(a, 7);
				953	}
				954
				955	// CHECK-LABEL: @test_vext_f16(
				956	// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
				957	// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
				958	// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
				959	// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
				960	// CHECK: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
				961	// CHECK: ret <4 x half> [[VEXT]]
				962	float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
				963	return vext_f16(a, b, 2);
				964	}
				965
				966	// CHECK-LABEL: @test_vextq_f16(
				967	// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
				968	// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
				969	// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
				970	// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
				971	// CHECK: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
				972	// CHECK: ret <8 x half> [[VEXT]]
				973	float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
				974	return vextq_f16(a, b, 5);
				975	}
				976
				977	// CHECK-LABEL: @test_vrev64_f16(
				978	// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
				979	// CHECK: ret <4 x half> [[SHFL]]
				980	float16x4_t test_vrev64_f16(float16x4_t a) {
				981	return vrev64_f16(a);
				982	}
				983
				984	// CHECK-LABEL: @test_vrev64q_f16(
				985	// CHECK: [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
				986	// CHECK: ret <8 x half> [[SHFL]]
				987	float16x8_t test_vrev64q_f16(float16x8_t a) {
				988	return vrev64q_f16(a);
				989	}