Blame - stm-arm-neon-ref.h - platform/external/arm-neon-tests

blob: e9bbe3cfa6ddd99d1c92ec1236ad37a13c8dc0e4 [file] [log] [blame]

Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	1	/*
				2
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	3	Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	4	Written by Christophe Lyon
				5
				6	Permission is hereby granted, free of charge, to any person obtaining a copy
				7	of this software and associated documentation files (the "Software"), to deal
				8	in the Software without restriction, including without limitation the rights
				9	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				10	copies of the Software, and to permit persons to whom the Software is
				11	furnished to do so, subject to the following conditions:
				12
				13	The above copyright notice and this permission notice shall be included in
				14	all copies or substantial portions of the Software.
				15
				16	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				22	THE SOFTWARE.
				23
				24	*/
				25
				26	#ifndef _STM_ARM_NEON_REF_H_
				27	#define _STM_ARM_NEON_REF_H_
				28
Christophe Lyon	6f4d36f	2011-07-19 16:18:19 +0200	[diff] [blame]	29	#if defined(__cplusplus)
				30	#include <cstdio>
				31	#include <cinttypes>
				32	#include <cstring>
				33	#else
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	34	#include <stdio.h>
Christophe Lyon	f3c80a5	2011-07-19 16:32:02 +0200	[diff] [blame]	35	#if defined(_MSC_VER)
				36	#include "msinttypes.h"
Christophe Lyon	836da4a	2011-10-03 18:18:49 +0200	[diff] [blame]	37	#include <float.h> /* for isnan() ... */
Christophe Lyon	164a959	2011-10-17 15:54:52 +0200	[diff] [blame]	38	static int32_t _ptrNan[]={0x7fc00000L};
				39	#define NAN ((float)_ptrNan)
				40	static int32_t _ptrInf[]={0x7f800000L};
				41	#define INFINITY ((float)_ptrInf)
				42	#define HUGE_VALF INFINITY
Christophe Lyon	f3c80a5	2011-07-19 16:32:02 +0200	[diff] [blame]	43	#else
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	44	#include <inttypes.h>
Christophe Lyon	f3c80a5	2011-07-19 16:32:02 +0200	[diff] [blame]	45	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	46	#include <string.h>
Christophe Lyon	6f4d36f	2011-07-19 16:18:19 +0200	[diff] [blame]	47	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	48
				49	#define xSTR(X) #X
				50	#define STR(X) xSTR(X)
				51
				52	#define xNAME1(V,T) V ## _ ## T
				53	#define xNAME(V,T) xNAME1(V,T)
				54
				55	#define VAR(V,T,W) xNAME(V,T##W)
				56	#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
				57
				58	#define VECT_NAME(T, W, N) T##W##x##N
				59	#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
				60	#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
				61	#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
				62
				63	#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
				64	#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	65
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	66	/* This one is used for padding between input buffers. */
				67	#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
				68
				69	/* Array declarations. */
				70	#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
				71	#define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
				72
				73	/* Arrays of vectors. */
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	74	#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	75	#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	76
				77	static int result_idx = 0;
				78	#define DUMP(MSG,T,W,N,FMT) \
				79	fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
				80	STR(VECT_VAR(result, T, W, N))); \
				81	for(i=0; i<N ; i++) \
				82	{ \
				83	fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
				84	} \
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	85	fprintf(ref_file, " }\n"); \
				86	DUMP4GCC(MSG,T,W,N,FMT);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	87
Christophe Lyon	d9ab3e3	2014-07-11 16:44:32 +0200	[diff] [blame]	88	/* Use casts for remove sign bits */
				89	#define DUMP_POLY(MSG,T,W,N,FMT) \
				90	fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
				91	STR(VECT_VAR(result, T, W, N))); \
				92	for(i=0; i<N ; i++) \
				93	{ \
				94	fprintf(ref_file, "%" FMT ", ", \
				95	(uint##W##_t)VECT_VAR(result, T, W, N)[i]); \
				96	} \
				97	fprintf(ref_file, " }\n"); \
				98	DUMP4GCC(MSG,T,W,N,FMT);
				99
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	100	#define DUMP_FP(MSG,T,W,N,FMT) \
				101	fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
				102	STR(VECT_VAR(result, T, W, N))); \
				103	for(i=0; i<N ; i++) \
				104	{ \
				105	union fp_operand { \
				106	uint##W##_t i; \
				107	float##W##_t f; \
				108	} tmp; \
				109	tmp.f = VECT_VAR(result, T, W, N)[i]; \
Christophe Lyon	4de1f51	2014-07-11 15:02:12 +0200	[diff] [blame]	110	fprintf(ref_file, "%" FMT ", ", tmp.i); \
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	111	} \
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	112	fprintf(ref_file, " }\n"); \
				113	DUMP4GCC_FP(MSG,T,W,N,FMT);
				114
				115	#define DUMP4GCC(MSG,T,W,N,FMT) \
				116	fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
				117	STR(T), W, N); \
				118	for(i=0; i<(N-1) ; i++) \
				119	{ \
				120	if (W < 32) { \
				121	uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
				122	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
				123	} else { \
				124	fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
				125	} \
				126	} \
				127	if (W < 32) { \
				128	uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
				129	fprintf(gcc_tests_file, "0x%" FMT, tmp); \
				130	} else { \
				131	fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]); \
				132	} \
				133	fprintf(gcc_tests_file, " };\n");
				134
				135	#define DUMP4GCC_FP(MSG,T,W,N,FMT) \
				136	{ \
				137	union fp_operand { \
				138	uint##W##_t i; \
				139	float##W##_t f; \
				140	} tmp; \
				141	fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
				142	"hfloat", W, N); \
				143	for(i=0; i<(N-1) ; i++) \
				144	{ \
				145	tmp.f = VECT_VAR(result, T, W, N)[i]; \
				146	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i); \
				147	} \
				148	tmp.f = VECT_VAR(result, T, W, N)[i]; \
				149	fprintf(gcc_tests_file, "0x%" FMT, tmp.i); \
				150	fprintf(gcc_tests_file, " };\n"); \
				151	}
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	152
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	153	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	154	#define float16_t __fp16
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	155
				156	#define DUMP_FP16(MSG,T,W,N,FMT) \
				157	fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
				158	STR(VECT_VAR(result, T, W, N))); \
				159	for(i=0; i<N ; i++) \
				160	{ \
				161	uint##W##_t tmp; \
Christophe Lyon	f91ba87	2014-07-11 13:05:38 +0200	[diff] [blame]	162	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
				163	fprintf(ref_file, "%" FMT ", ", tmp); \
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	164	} \
Christophe Lyon	f91ba87	2014-07-11 13:05:38 +0200	[diff] [blame]	165	fprintf(ref_file, " }\n"); \
				166	DUMP4GCC_FP16(MSG,T,W,N,FMT);
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	167
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	168	#define DUMP4GCC_FP16(MSG,T,W,N,FMT) \
Christophe Lyon	f91ba87	2014-07-11 13:05:38 +0200	[diff] [blame]	169	{ \
				170	uint##W##_t tmp; \
				171	fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
				172	"hfloat", W, N); \
				173	for(i=0; i<(N-1) ; i++) \
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	174	{ \
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	175	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
Christophe Lyon	f91ba87	2014-07-11 13:05:38 +0200	[diff] [blame]	176	fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	177	} \
Christophe Lyon	f91ba87	2014-07-11 13:05:38 +0200	[diff] [blame]	178	tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
				179	fprintf(gcc_tests_file, "0x%" FMT, tmp); \
				180	fprintf(gcc_tests_file, " };\n"); \
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	181	}
Christophe Lyon	94f99bc	2014-09-02 16:51:20 +0200	[diff] [blame]	182	#endif
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	183
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	184	#define CLEAN_PATTERN_8 0x33
				185	#define CLEAN_PATTERN_16 0x3333
				186	#define CLEAN_PATTERN_32 0x33333333
				187	#define CLEAN_PATTERN_64 0x3333333333333333
				188
				189	#define CLEAN(VAR,T,W,N) \
				190	memset(VECT_VAR(VAR, T, W, N), \
				191	CLEAN_PATTERN_8, \
				192	sizeof(VECT_VAR(VAR, T, W, N)));
				193
				194	#define CHECK_INIT(VAR,Q,T1,T2,W,N) \
				195	{ \
				196	ARRAY(check_result, T1, W, N); \
				197	int i; \
				198	\
				199	vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N), \
				200	VECT_VAR(VAR, T1, W, N)); \
				201	for(i=0; i<N ; i++) \
				202	{ \
				203	/if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)/ { \
				204	fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n", \
				205	__FUNCTION__, __LINE__, \
				206	STR(VECT_VAR(VAR, T1, W, N)), i, \
				207	VECT_VAR(check_result, T1, W, N)[i]); \
				208	} \
				209	} \
				210	}
				211
				212	/* Generic declarations: */
				213	extern FILE* log_file;
				214	extern FILE* ref_file;
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	215	extern FILE* gcc_tests_file;
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	216
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	217	/* Input buffers, one of each size */
				218	extern ARRAY(buffer, int, 8, 8);
				219	extern ARRAY(buffer, int, 16, 4);
				220	extern ARRAY(buffer, int, 32, 2);
				221	extern ARRAY(buffer, int, 64, 1);
				222	extern ARRAY(buffer, uint, 8, 8);
				223	extern ARRAY(buffer, uint, 16, 4);
				224	extern ARRAY(buffer, uint, 32, 2);
				225	extern ARRAY(buffer, uint, 64, 1);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	226	extern ARRAY(buffer, poly, 8, 8);
				227	extern ARRAY(buffer, poly, 16, 4);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	228	extern ARRAY(buffer, float, 32, 2);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	229	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	230	extern ARRAY(buffer, float, 16, 4);
				231	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	232	extern ARRAY(buffer, int, 8, 16);
				233	extern ARRAY(buffer, int, 16, 8);
				234	extern ARRAY(buffer, int, 32, 4);
				235	extern ARRAY(buffer, int, 64, 2);
				236	extern ARRAY(buffer, uint, 8, 16);
				237	extern ARRAY(buffer, uint, 16, 8);
				238	extern ARRAY(buffer, uint, 32, 4);
				239	extern ARRAY(buffer, uint, 64, 2);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	240	extern ARRAY(buffer, poly, 8, 16);
				241	extern ARRAY(buffer, poly, 16, 8);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	242	extern ARRAY(buffer, float, 32, 4);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	243	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	244	extern ARRAY(buffer, float, 16, 8);
				245	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	246
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	247	/* The tests for vld1_dup and vdup expect at least 4 entries in the
				248	input buffer, so force 1- and 2-elements initializers to have 4
				249	entries. */
				250	extern ARRAY(buffer_dup, int, 8, 8);
				251	extern ARRAY(buffer_dup, int, 16, 4);
				252	extern ARRAY4(buffer_dup, int, 32, 2);
				253	extern ARRAY4(buffer_dup, int, 64, 1);
				254	extern ARRAY(buffer_dup, uint, 8, 8);
				255	extern ARRAY(buffer_dup, uint, 16, 4);
				256	extern ARRAY4(buffer_dup, uint, 32, 2);
				257	extern ARRAY4(buffer_dup, uint, 64, 1);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	258	extern ARRAY(buffer_dup, poly, 8, 8);
				259	extern ARRAY(buffer_dup, poly, 16, 4);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	260	extern ARRAY4(buffer_dup, float, 32, 2);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	261	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	262	extern ARRAY4(buffer_dup, float, 16, 4);
				263	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	264	extern ARRAY(buffer_dup, int, 8, 16);
				265	extern ARRAY(buffer_dup, int, 16, 8);
				266	extern ARRAY(buffer_dup, int, 32, 4);
				267	extern ARRAY4(buffer_dup, int, 64, 2);
				268	extern ARRAY(buffer_dup, uint, 8, 16);
				269	extern ARRAY(buffer_dup, uint, 16, 8);
				270	extern ARRAY(buffer_dup, uint, 32, 4);
				271	extern ARRAY4(buffer_dup, uint, 64, 2);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	272	extern ARRAY(buffer_dup, poly, 8, 16);
				273	extern ARRAY(buffer_dup, poly, 16, 8);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	274	extern ARRAY(buffer_dup, float, 32, 4);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	275	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	276	extern ARRAY(buffer_dup, float, 16, 8);
				277	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	278
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	279	/* Input buffers for vld2, one of each size */
				280	extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
				281	extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
				282	extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
				283	extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
				284	extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
				285	extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
				286	extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
				287	extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	288	extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
				289	extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	290	extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	291	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	292	extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
				293	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	294	extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
				295	extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
				296	extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
				297	extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
				298	extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
				299	extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
				300	extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
				301	extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	302	extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
				303	extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	304	extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	305	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	306	extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
				307	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	308
				309	/* Input buffers for vld3, one of each size */
				310	extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
				311	extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
				312	extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
				313	extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
				314	extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
				315	extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
				316	extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
				317	extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	318	extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
				319	extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	320	extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	321	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	322	extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
				323	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	324	extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
				325	extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
				326	extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
				327	extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
				328	extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
				329	extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
				330	extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
				331	extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	332	extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
				333	extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	334	extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	335	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	336	extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
				337	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	338
				339	/* Input buffers for vld4, one of each size */
				340	extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
				341	extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
				342	extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
				343	extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
				344	extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
				345	extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
				346	extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
				347	extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	348	extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
				349	extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	350	extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	351	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	352	extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
				353	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	354	extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
				355	extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
				356	extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
				357	extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
				358	extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
				359	extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
				360	extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
				361	extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	362	extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
				363	extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	364	extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	365	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	366	extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
				367	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	368
				369	/* Input buffers for vld2_lane */
				370	extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
				371	extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
				372	extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
				373	extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
				374	extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
				375	extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
				376	extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
				377	extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	378	extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
				379	extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	380	extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	381	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	382	extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
				383	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	384
				385	/* Input buffers for vld3_lane */
				386	extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
				387	extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
				388	extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
				389	extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
				390	extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
				391	extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
				392	extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
				393	extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	394	extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
				395	extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	396	extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	397	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	398	extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
				399	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	400
				401	/* Input buffers for vld4_lane */
				402	extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
				403	extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
				404	extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
				405	extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
				406	extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
				407	extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
				408	extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
				409	extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	410	extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
				411	extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	412	extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	413	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	414	extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
				415	#endif
Christophe Lyon	01af0a5	2013-01-17 17:23:11 +0100	[diff] [blame]	416
				417	/* Output buffers, one of each size */
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	418	static ARRAY(result, int, 8, 8);
				419	static ARRAY(result, int, 16, 4);
				420	static ARRAY(result, int, 32, 2);
				421	static ARRAY(result, int, 64, 1);
				422	static ARRAY(result, uint, 8, 8);
				423	static ARRAY(result, uint, 16, 4);
				424	static ARRAY(result, uint, 32, 2);
				425	static ARRAY(result, uint, 64, 1);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	426	static ARRAY(result, poly, 8, 8);
				427	static ARRAY(result, poly, 16, 4);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	428	static ARRAY(result, float, 32, 2);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	429	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	430	static ARRAY(result, float, 16, 4);
				431	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	432	static ARRAY(result, int, 8, 16);
				433	static ARRAY(result, int, 16, 8);
				434	static ARRAY(result, int, 32, 4);
				435	static ARRAY(result, int, 64, 2);
				436	static ARRAY(result, uint, 8, 16);
				437	static ARRAY(result, uint, 16, 8);
				438	static ARRAY(result, uint, 32, 4);
				439	static ARRAY(result, uint, 64, 2);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	440	static ARRAY(result, poly, 8, 16);
				441	static ARRAY(result, poly, 16, 8);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	442	static ARRAY(result, float, 32, 4);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	443	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	444	static ARRAY(result, float, 16, 8);
				445	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	446
				447	/* Dump results (generic function) */
				448	static void dump_results (char *test_name)
				449	{
				450	int i;
				451
				452	fprintf(ref_file, "\n%s output:\n", test_name);
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	453	fprintf(gcc_tests_file, "\n%s output:\n", test_name);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	454
				455	DUMP(test_name, int, 8, 8, PRId8);
				456	DUMP(test_name, int, 16, 4, PRId16);
				457	DUMP(test_name, int, 32, 2, PRId32);
				458	DUMP(test_name, int, 64, 1, PRId64);
				459	DUMP(test_name, uint, 8, 8, PRIu8);
				460	DUMP(test_name, uint, 16, 4, PRIu16);
				461	DUMP(test_name, uint, 32, 2, PRIu32);
				462	DUMP(test_name, uint, 64, 1, PRIu64);
Christophe Lyon	d9ab3e3	2014-07-11 16:44:32 +0200	[diff] [blame]	463	DUMP_POLY(test_name, poly, 8, 8, PRIu8);
				464	DUMP_POLY(test_name, poly, 16, 4, PRIu16);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	465	DUMP_FP(test_name, float, 32, 2, PRIx32);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	466	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	467	DUMP_FP16(test_name, float, 16, 4, PRIu16);
				468	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	469
				470	DUMP(test_name, int, 8, 16, PRId8);
				471	DUMP(test_name, int, 16, 8, PRId16);
				472	DUMP(test_name, int, 32, 4, PRId32);
				473	DUMP(test_name, int, 64, 2, PRId64);
				474	DUMP(test_name, uint, 8, 16, PRIu8);
				475	DUMP(test_name, uint, 16, 8, PRIu16);
				476	DUMP(test_name, uint, 32, 4, PRIu32);
				477	DUMP(test_name, uint, 64, 2, PRIu64);
Christophe Lyon	d9ab3e3	2014-07-11 16:44:32 +0200	[diff] [blame]	478	DUMP_POLY(test_name, poly, 8, 16, PRIu8);
				479	DUMP_POLY(test_name, poly, 16, 8, PRIu16);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	480	DUMP_FP(test_name, float, 32, 4, PRIx32);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	481	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	482	DUMP_FP16(test_name, float, 16, 8, PRIu16);
				483	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	484	}
				485
				486	/* Dump results in hex (generic function) */
Christophe Lyon	6f4d36f	2011-07-19 16:18:19 +0200	[diff] [blame]	487	static void dump_results_hex2 (const char test_name, const char comment)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	488	{
				489	int i;
				490
				491	fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
Christophe Lyon	fad316a	2014-05-16 17:12:21 +0200	[diff] [blame]	492	fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	493
				494	DUMP(test_name, int, 8, 8, PRIx8);
				495	DUMP(test_name, int, 16, 4, PRIx16);
				496	DUMP(test_name, int, 32, 2, PRIx32);
				497	DUMP(test_name, int, 64, 1, PRIx64);
				498	DUMP(test_name, uint, 8, 8, PRIx8);
				499	DUMP(test_name, uint, 16, 4, PRIx16);
				500	DUMP(test_name, uint, 32, 2, PRIx32);
				501	DUMP(test_name, uint, 64, 1, PRIx64);
Christophe Lyon	d9ab3e3	2014-07-11 16:44:32 +0200	[diff] [blame]	502	DUMP_POLY(test_name, poly, 8, 8, PRIx8);
				503	DUMP_POLY(test_name, poly, 16, 4, PRIx16);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	504	DUMP_FP(test_name, float, 32, 2, PRIx32);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	505	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	506	DUMP_FP16(test_name, float, 16, 4, PRIx16);
				507	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	508
				509	DUMP(test_name, int, 8, 16, PRIx8);
				510	DUMP(test_name, int, 16, 8, PRIx16);
				511	DUMP(test_name, int, 32, 4, PRIx32);
				512	DUMP(test_name, int, 64, 2, PRIx64);
				513	DUMP(test_name, uint, 8, 16, PRIx8);
				514	DUMP(test_name, uint, 16, 8, PRIx16);
				515	DUMP(test_name, uint, 32, 4, PRIx32);
				516	DUMP(test_name, uint, 64, 2, PRIx64);
Christophe Lyon	d9ab3e3	2014-07-11 16:44:32 +0200	[diff] [blame]	517	DUMP_POLY(test_name, poly, 8, 16, PRIx8);
				518	DUMP_POLY(test_name, poly, 16, 8, PRIx16);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	519	DUMP_FP(test_name, float, 32, 4, PRIx32);
Christophe Lyon	d98beba	2016-08-24 18:02:41 +0200	[diff] [blame]	520	#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) \|\| ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon	34adaf6	2013-04-11 15:05:18 +0200	[diff] [blame]	521	DUMP_FP16(test_name, float, 16, 8, PRIx16);
				522	#endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	523	}
				524
Christophe Lyon	6f4d36f	2011-07-19 16:18:19 +0200	[diff] [blame]	525	static void dump_results_hex (const char *test_name)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	526	{
				527	dump_results_hex2(test_name, "");
				528	}
				529
				530	#ifndef STM_ARM_NEON_MODELS
				531
Christophe Lyon	87b607e	2012-08-31 10:42:41 +0200	[diff] [blame]	532	/* This hack is to cope with various compilers/libc which may not
				533	provide endian.h or cross-compilers such as llvm which includes the
				534	host's endian.h. */
				535	#ifndef __arm__
				536	#include <endian.h>
				537	#define THIS_ENDIAN __BYTE_ORDER
				538	#else /* __arm__ */
				539	#ifdef __ARMEL__
				540	#define THIS_ENDIAN __LITTLE_ENDIAN
				541	#else /* __ARMEL__ */
				542	#define THIS_ENDIAN __BIG_ENDIAN
				543	#endif
				544	#endif /* __arm__ */
				545
				546	#if THIS_ENDIAN == __LITTLE_ENDIAN
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	547
				548	typedef union {
				549	struct {
				550	int _xxx:27;
Christophe Lyon	eb8034b	2012-05-09 17:06:10 +0200	[diff] [blame]	551	unsigned int QC:1;
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	552	int V:1;
				553	int C:1;
				554	int Z:1;
				555	int N:1;
				556	} b;
				557	unsigned int word;
				558	} _ARM_FPSCR;
				559
				560	#else /* __BIG_ENDIAN */
				561
				562	typedef union {
				563	struct {
				564	int N:1;
				565	int Z:1;
				566	int C:1;
				567	int V:1;
Christophe Lyon	eb8034b	2012-05-09 17:06:10 +0200	[diff] [blame]	568	unsigned int QC:1;
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	569	int _dnm:27;
				570	} b;
				571	unsigned int word;
				572	} _ARM_FPSCR;
				573
				574	#endif /* __BIG_ENDIAN */
				575
				576	#ifdef __ARMCC_VERSION
				577	register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
Christophe Lyon	1a3b239	2014-07-10 13:47:40 +0200	[diff] [blame]	578	# define Neon_Cumulative_Sat _afpscr_for_qc.b.QC
Christophe Lyon	c1cc782	2015-01-20 16:04:24 +0100	[diff] [blame]	579	# define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);}
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	580	#else
Christophe Lyon	eb8034b	2012-05-09 17:06:10 +0200	[diff] [blame]	581	/* GCC/ARM does not know this register */
Christophe Lyon	1a3b239	2014-07-10 13:47:40 +0200	[diff] [blame]	582	# define Neon_Cumulative_Sat __read_neon_cumulative_sat()
Christophe Lyon	c1cc782	2015-01-20 16:04:24 +0100	[diff] [blame]	583	/* We need a fake dependency to ensure correct ordering of asm
				584	statements to preset the QC flag value, and Neon operators writing
				585	to QC. */
				586	#define Set_Neon_Cumulative_Sat(x, depend) \
				587	__set_neon_cumulative_sat((x), (depend))
Christophe Lyon	1a3b239	2014-07-10 13:47:40 +0200	[diff] [blame]	588
				589	# if defined(__aarch64__)
				590	static volatile int __read_neon_cumulative_sat (void) {
				591	_ARM_FPSCR _afpscr_for_qc;
				592	asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
				593	return _afpscr_for_qc.b.QC;
Christophe Lyon	eb8034b	2012-05-09 17:06:10 +0200	[diff] [blame]	594	}
Christophe Lyon	c1cc782	2015-01-20 16:04:24 +0100	[diff] [blame]	595
				596	#define __set_neon_cumulative_sat(x, depend) { \
				597	_ARM_FPSCR _afpscr_for_qc; \
				598	asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \
				599	_afpscr_for_qc.b.QC = x; \
				600	asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
				601	}
				602
Christophe Lyon	1a3b239	2014-07-10 13:47:40 +0200	[diff] [blame]	603	# else
				604	static volatile int __read_neon_cumulative_sat (void) {
				605	_ARM_FPSCR _afpscr_for_qc;
				606	asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
				607	return _afpscr_for_qc.b.QC;
Christophe Lyon	eb8034b	2012-05-09 17:06:10 +0200	[diff] [blame]	608	}
				609
Christophe Lyon	c1cc782	2015-01-20 16:04:24 +0100	[diff] [blame]	610	#define __set_neon_cumulative_sat(x, depend) { \
				611	_ARM_FPSCR _afpscr_for_qc; \
				612	asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \
				613	_afpscr_for_qc.b.QC = x; \
				614	asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
				615	}
				616
Christophe Lyon	1a3b239	2014-07-10 13:47:40 +0200	[diff] [blame]	617	# endif
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	618	#endif
				619
				620	#endif /* STM_ARM_NEON_MODELS */
				621
Christophe Lyon	4a6e5cc	2014-06-03 22:47:52 +0200	[diff] [blame]	622	static void dump_neon_cumulative_sat(const char* msg, const char *name,
				623	const char* t1, int w, int n)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	624	{
Christophe Lyon	4a6e5cc	2014-06-03 22:47:52 +0200	[diff] [blame]	625	fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++,
				626	name, Neon_Cumulative_Sat);
				627	fprintf(gcc_tests_file,
				628	"int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n",
				629	t1, w, n, Neon_Cumulative_Sat);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	630	}
				631
				632	/* Clean output buffers before execution */
				633	static void clean_results (void)
				634	{
				635	result_idx = 0;
				636	CLEAN(result, int, 8, 8);
				637	CLEAN(result, int, 16, 4);
				638	CLEAN(result, int, 32, 2);
				639	CLEAN(result, int, 64, 1);
				640	CLEAN(result, uint, 8, 8);
				641	CLEAN(result, uint, 16, 4);
				642	CLEAN(result, uint, 32, 2);
				643	CLEAN(result, uint, 64, 1);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	644	CLEAN(result, poly, 8, 8);
				645	CLEAN(result, poly, 16, 4);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	646	CLEAN(result, float, 32, 2);
				647
				648	CLEAN(result, int, 8, 16);
				649	CLEAN(result, int, 16, 8);
				650	CLEAN(result, int, 32, 4);
				651	CLEAN(result, int, 64, 2);
				652	CLEAN(result, uint, 8, 16);
				653	CLEAN(result, uint, 16, 8);
				654	CLEAN(result, uint, 32, 4);
				655	CLEAN(result, uint, 64, 2);
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	656	CLEAN(result, poly, 8, 16);
				657	CLEAN(result, poly, 16, 8);
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	658	CLEAN(result, float, 32, 4);
				659	}
				660
				661
				662	/* Helpers to declare variables of various types */
				663	#define DECL_VARIABLE(VAR, T1, W, N) \
Victor Khimenko	3de3e4a	2016-10-19 18:16:39 +0200	[diff] [blame]	664	volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	665
				666	#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
				667	DECL_VARIABLE(VAR, int, 8, 8); \
				668	DECL_VARIABLE(VAR, int, 16, 4); \
				669	DECL_VARIABLE(VAR, int, 32, 2); \
				670	DECL_VARIABLE(VAR, int, 64, 1)
				671
				672	#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \
				673	DECL_VARIABLE(VAR, uint, 8, 8); \
				674	DECL_VARIABLE(VAR, uint, 16, 4); \
				675	DECL_VARIABLE(VAR, uint, 32, 2); \
				676	DECL_VARIABLE(VAR, uint, 64, 1)
				677
				678	#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \
				679	DECL_VARIABLE(VAR, int, 8, 16); \
				680	DECL_VARIABLE(VAR, int, 16, 8); \
				681	DECL_VARIABLE(VAR, int, 32, 4); \
				682	DECL_VARIABLE(VAR, int, 64, 2)
				683
				684	#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \
				685	DECL_VARIABLE(VAR, uint, 8, 16); \
				686	DECL_VARIABLE(VAR, uint, 16, 8); \
				687	DECL_VARIABLE(VAR, uint, 32, 4); \
				688	DECL_VARIABLE(VAR, uint, 64, 2)
				689
				690	#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
				691	DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
				692	DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	693	DECL_VARIABLE(VAR, poly, 8, 8); \
				694	DECL_VARIABLE(VAR, poly, 16, 4); \
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	695	DECL_VARIABLE(VAR, float, 32, 2)
				696
				697	#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
				698	DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
				699	DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	700	DECL_VARIABLE(VAR, poly, 8, 16); \
				701	DECL_VARIABLE(VAR, poly, 16, 8); \
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	702	DECL_VARIABLE(VAR, float, 32, 4)
				703
				704	#define DECL_VARIABLE_ALL_VARIANTS(VAR) \
				705	DECL_VARIABLE_64BITS_VARIANTS(VAR); \
				706	DECL_VARIABLE_128BITS_VARIANTS(VAR)
				707
				708	#define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \
				709	DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
				710	DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
				711
				712	#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \
				713	DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
				714	DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
				715
				716	/* Helpers to initialize vectors */
Christophe Lyon	f205367	2014-12-16 10:26:00 +0100	[diff] [blame]	717	#define VDUP(VAR, Q, T1, T2, W, N, V) \
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	718	VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
				719
				720	#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
				721	VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
				722	VECT_VAR(VAR, T1, W, N), \
				723	L)
				724
				725	/* We need to load initial values first, so rely on VLD1 */
Christophe Lyon	f205367	2014-12-16 10:26:00 +0100	[diff] [blame]	726	#define VLOAD(VAR, BUF, Q, T1, T2, W, N) \
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	727	VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
				728
				729	/* Helpers for macros with 1 constant and 5 variable arguments */
				730	#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
				731	MACRO(VAR, , int, s, 8, 8); \
				732	MACRO(VAR, , int, s, 16, 4); \
				733	MACRO(VAR, , int, s, 32, 2); \
				734	MACRO(VAR, , int, s, 64, 1)
				735
				736	#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \
				737	MACRO(VAR, , uint, u, 8, 8); \
				738	MACRO(VAR, , uint, u, 16, 4); \
				739	MACRO(VAR, , uint, u, 32, 2); \
				740	MACRO(VAR, , uint, u, 64, 1)
				741
				742	#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
				743	MACRO(VAR, q, int, s, 8, 16); \
				744	MACRO(VAR, q, int, s, 16, 8); \
				745	MACRO(VAR, q, int, s, 32, 4); \
				746	MACRO(VAR, q, int, s, 64, 2)
				747
				748	#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \
				749	MACRO(VAR, q, uint, u, 8, 16); \
				750	MACRO(VAR, q, uint, u, 16, 8); \
				751	MACRO(VAR, q, uint, u, 32, 4); \
				752	MACRO(VAR, q, uint, u, 64, 2)
				753
				754	#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \
				755	TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
				756	TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
				757
				758	#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \
				759	TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
				760	TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
				761
				762	#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \
				763	TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \
				764	TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
				765
				766	#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \
				767	TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
				768	TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
				769
				770	/* Helpers for macros with 2 constant and 5 variable arguments */
				771	#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				772	MACRO(VAR1, VAR2, , int, s, 8, 8); \
				773	MACRO(VAR1, VAR2, , int, s, 16, 4); \
				774	MACRO(VAR1, VAR2, , int, s, 32, 2); \
				775	MACRO(VAR1, VAR2 , , int, s, 64, 1)
				776
				777	#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				778	MACRO(VAR1, VAR2, , uint, u, 8, 8); \
				779	MACRO(VAR1, VAR2, , uint, u, 16, 4); \
				780	MACRO(VAR1, VAR2, , uint, u, 32, 2); \
				781	MACRO(VAR1, VAR2, , uint, u, 64, 1)
				782
				783	#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				784	MACRO(VAR1, VAR2, q, int, s, 8, 16); \
				785	MACRO(VAR1, VAR2, q, int, s, 16, 8); \
				786	MACRO(VAR1, VAR2, q, int, s, 32, 4); \
				787	MACRO(VAR1, VAR2, q, int, s, 64, 2)
				788
				789	#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				790	MACRO(VAR1, VAR2, q, uint, u, 8, 16); \
				791	MACRO(VAR1, VAR2, q, uint, u, 16, 8); \
				792	MACRO(VAR1, VAR2, q, uint, u, 32, 4); \
				793	MACRO(VAR1, VAR2, q, uint, u, 64, 2)
				794
				795	#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				796	TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	797	TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
				798	MACRO(VAR1, VAR2, , poly, p, 8, 8); \
				799	MACRO(VAR1, VAR2, , poly, p, 16, 4)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	800
				801	#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				802	TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
Christophe Lyon	80902f6	2013-03-29 16:26:42 +0100	[diff] [blame]	803	TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
				804	MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
				805	MACRO(VAR1, VAR2, q, poly, p, 16, 8)
Christophe Lyon	073831a	2011-01-24 17:37:40 +0100	[diff] [blame]	806
				807	#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				808	TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
				809	TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
				810
				811	#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
				812	TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
				813	TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
				814
				815	#endif /* _STM_ARM_NEON_REF_H_ */