blob: e9bbe3cfa6ddd99d1c92ec1236ad37a13c8dc0e4 [file] [log] [blame]
Christophe Lyon073831a2011-01-24 17:37:40 +01001/*
2
Christophe Lyon80902f62013-03-29 16:26:42 +01003Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics
Christophe Lyon073831a2011-01-24 17:37:40 +01004Written by Christophe Lyon
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in
14all copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22THE SOFTWARE.
23
24*/
25
26#ifndef _STM_ARM_NEON_REF_H_
27#define _STM_ARM_NEON_REF_H_
28
Christophe Lyon6f4d36f2011-07-19 16:18:19 +020029#if defined(__cplusplus)
30#include <cstdio>
31#include <cinttypes>
32#include <cstring>
33#else
Christophe Lyon073831a2011-01-24 17:37:40 +010034#include <stdio.h>
Christophe Lyonf3c80a52011-07-19 16:32:02 +020035#if defined(_MSC_VER)
36#include "msinttypes.h"
Christophe Lyon836da4a2011-10-03 18:18:49 +020037#include <float.h> /* for isnan() ... */
Christophe Lyon164a9592011-10-17 15:54:52 +020038static int32_t _ptrNan[]={0x7fc00000L};
39#define NAN (*(float*)_ptrNan)
40static int32_t _ptrInf[]={0x7f800000L};
41#define INFINITY (*(float*)_ptrInf)
42#define HUGE_VALF INFINITY
Christophe Lyonf3c80a52011-07-19 16:32:02 +020043#else
Christophe Lyon073831a2011-01-24 17:37:40 +010044#include <inttypes.h>
Christophe Lyonf3c80a52011-07-19 16:32:02 +020045#endif
Christophe Lyon073831a2011-01-24 17:37:40 +010046#include <string.h>
Christophe Lyon6f4d36f2011-07-19 16:18:19 +020047#endif
Christophe Lyon073831a2011-01-24 17:37:40 +010048
49#define xSTR(X) #X
50#define STR(X) xSTR(X)
51
52#define xNAME1(V,T) V ## _ ## T
53#define xNAME(V,T) xNAME1(V,T)
54
55#define VAR(V,T,W) xNAME(V,T##W)
56#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
57
58#define VECT_NAME(T, W, N) T##W##x##N
59#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
60#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
61#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
62
63#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
64#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
Christophe Lyon073831a2011-01-24 17:37:40 +010065
Christophe Lyon01af0a52013-01-17 17:23:11 +010066/* This one is used for padding between input buffers. */
67#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
68
69/* Array declarations. */
70#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
71#define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
72
73/* Arrays of vectors. */
Christophe Lyon073831a2011-01-24 17:37:40 +010074#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
Christophe Lyon01af0a52013-01-17 17:23:11 +010075#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
Christophe Lyon073831a2011-01-24 17:37:40 +010076
77static int result_idx = 0;
78#define DUMP(MSG,T,W,N,FMT) \
79 fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
80 STR(VECT_VAR(result, T, W, N))); \
81 for(i=0; i<N ; i++) \
82 { \
83 fprintf(ref_file, "%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
84 } \
Christophe Lyonfad316a2014-05-16 17:12:21 +020085 fprintf(ref_file, " }\n"); \
86 DUMP4GCC(MSG,T,W,N,FMT);
Christophe Lyon073831a2011-01-24 17:37:40 +010087
Christophe Lyond9ab3e32014-07-11 16:44:32 +020088/* Use casts for remove sign bits */
89#define DUMP_POLY(MSG,T,W,N,FMT) \
90 fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
91 STR(VECT_VAR(result, T, W, N))); \
92 for(i=0; i<N ; i++) \
93 { \
94 fprintf(ref_file, "%" FMT ", ", \
95 (uint##W##_t)VECT_VAR(result, T, W, N)[i]); \
96 } \
97 fprintf(ref_file, " }\n"); \
98 DUMP4GCC(MSG,T,W,N,FMT);
99
Christophe Lyon073831a2011-01-24 17:37:40 +0100100#define DUMP_FP(MSG,T,W,N,FMT) \
101 fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
102 STR(VECT_VAR(result, T, W, N))); \
103 for(i=0; i<N ; i++) \
104 { \
105 union fp_operand { \
106 uint##W##_t i; \
107 float##W##_t f; \
108 } tmp; \
109 tmp.f = VECT_VAR(result, T, W, N)[i]; \
Christophe Lyon4de1f512014-07-11 15:02:12 +0200110 fprintf(ref_file, "%" FMT ", ", tmp.i); \
Christophe Lyon073831a2011-01-24 17:37:40 +0100111 } \
Christophe Lyonfad316a2014-05-16 17:12:21 +0200112 fprintf(ref_file, " }\n"); \
113 DUMP4GCC_FP(MSG,T,W,N,FMT);
114
115#define DUMP4GCC(MSG,T,W,N,FMT) \
116 fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
117 STR(T), W, N); \
118 for(i=0; i<(N-1) ; i++) \
119 { \
120 if (W < 32) { \
121 uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
122 fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
123 } else { \
124 fprintf(gcc_tests_file, "0x%" FMT ", ", VECT_VAR(result, T, W, N)[i]); \
125 } \
126 } \
127 if (W < 32) { \
128 uint32_t tmp = (uint##W##_t) VECT_VAR(result, T, W, N)[i]; \
129 fprintf(gcc_tests_file, "0x%" FMT, tmp); \
130 } else { \
131 fprintf(gcc_tests_file, "0x%" FMT, VECT_VAR(result, T, W, N)[i]); \
132 } \
133 fprintf(gcc_tests_file, " };\n");
134
135#define DUMP4GCC_FP(MSG,T,W,N,FMT) \
136 { \
137 union fp_operand { \
138 uint##W##_t i; \
139 float##W##_t f; \
140 } tmp; \
141 fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
142 "hfloat", W, N); \
143 for(i=0; i<(N-1) ; i++) \
144 { \
145 tmp.f = VECT_VAR(result, T, W, N)[i]; \
146 fprintf(gcc_tests_file, "0x%" FMT ", ", tmp.i); \
147 } \
148 tmp.f = VECT_VAR(result, T, W, N)[i]; \
149 fprintf(gcc_tests_file, "0x%" FMT, tmp.i); \
150 fprintf(gcc_tests_file, " };\n"); \
151 }
Christophe Lyon073831a2011-01-24 17:37:40 +0100152
Christophe Lyond98beba2016-08-24 18:02:41 +0200153#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200154#define float16_t __fp16
Christophe Lyon34adaf62013-04-11 15:05:18 +0200155
156#define DUMP_FP16(MSG,T,W,N,FMT) \
157 fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
158 STR(VECT_VAR(result, T, W, N))); \
159 for(i=0; i<N ; i++) \
160 { \
161 uint##W##_t tmp; \
Christophe Lyonf91ba872014-07-11 13:05:38 +0200162 tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
163 fprintf(ref_file, "%" FMT ", ", tmp); \
Christophe Lyon34adaf62013-04-11 15:05:18 +0200164 } \
Christophe Lyonf91ba872014-07-11 13:05:38 +0200165 fprintf(ref_file, " }\n"); \
166 DUMP4GCC_FP16(MSG,T,W,N,FMT);
Christophe Lyon34adaf62013-04-11 15:05:18 +0200167
Christophe Lyonfad316a2014-05-16 17:12:21 +0200168#define DUMP4GCC_FP16(MSG,T,W,N,FMT) \
Christophe Lyonf91ba872014-07-11 13:05:38 +0200169 { \
170 uint##W##_t tmp; \
171 fprintf(gcc_tests_file, "VECT_VAR_DECL(expected,%s,%d,%d) [] = { ", \
172 "hfloat", W, N); \
173 for(i=0; i<(N-1) ; i++) \
Christophe Lyonfad316a2014-05-16 17:12:21 +0200174 { \
Christophe Lyonfad316a2014-05-16 17:12:21 +0200175 tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
Christophe Lyonf91ba872014-07-11 13:05:38 +0200176 fprintf(gcc_tests_file, "0x%" FMT ", ", tmp); \
Christophe Lyonfad316a2014-05-16 17:12:21 +0200177 } \
Christophe Lyonf91ba872014-07-11 13:05:38 +0200178 tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
179 fprintf(gcc_tests_file, "0x%" FMT, tmp); \
180 fprintf(gcc_tests_file, " };\n"); \
Christophe Lyonfad316a2014-05-16 17:12:21 +0200181 }
Christophe Lyon94f99bc2014-09-02 16:51:20 +0200182#endif
Christophe Lyonfad316a2014-05-16 17:12:21 +0200183
Christophe Lyon073831a2011-01-24 17:37:40 +0100184#define CLEAN_PATTERN_8 0x33
185#define CLEAN_PATTERN_16 0x3333
186#define CLEAN_PATTERN_32 0x33333333
187#define CLEAN_PATTERN_64 0x3333333333333333
188
189#define CLEAN(VAR,T,W,N) \
190 memset(VECT_VAR(VAR, T, W, N), \
191 CLEAN_PATTERN_8, \
192 sizeof(VECT_VAR(VAR, T, W, N)));
193
194#define CHECK_INIT(VAR,Q,T1,T2,W,N) \
195 { \
196 ARRAY(check_result, T1, W, N); \
197 int i; \
198 \
199 vst1##Q##_##T2##W(VECT_VAR(check_result, T1, W, N), \
200 VECT_VAR(VAR, T1, W, N)); \
201 for(i=0; i<N ; i++) \
202 { \
203 /*if (VECT_VAR(check_result, T1, W, N)[i] == CLEAN_PATTERN_##W)*/ { \
204 fprintf(stdout, "%s:%d: %s[%d] unintialized! %#x\n", \
205 __FUNCTION__, __LINE__, \
206 STR(VECT_VAR(VAR, T1, W, N)), i, \
207 VECT_VAR(check_result, T1, W, N)[i]); \
208 } \
209 } \
210 }
211
212/* Generic declarations: */
213extern FILE* log_file;
214extern FILE* ref_file;
Christophe Lyonfad316a2014-05-16 17:12:21 +0200215extern FILE* gcc_tests_file;
Christophe Lyon073831a2011-01-24 17:37:40 +0100216
Christophe Lyon01af0a52013-01-17 17:23:11 +0100217/* Input buffers, one of each size */
218extern ARRAY(buffer, int, 8, 8);
219extern ARRAY(buffer, int, 16, 4);
220extern ARRAY(buffer, int, 32, 2);
221extern ARRAY(buffer, int, 64, 1);
222extern ARRAY(buffer, uint, 8, 8);
223extern ARRAY(buffer, uint, 16, 4);
224extern ARRAY(buffer, uint, 32, 2);
225extern ARRAY(buffer, uint, 64, 1);
Christophe Lyon80902f62013-03-29 16:26:42 +0100226extern ARRAY(buffer, poly, 8, 8);
227extern ARRAY(buffer, poly, 16, 4);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100228extern ARRAY(buffer, float, 32, 2);
Christophe Lyond98beba2016-08-24 18:02:41 +0200229#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200230extern ARRAY(buffer, float, 16, 4);
231#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100232extern ARRAY(buffer, int, 8, 16);
233extern ARRAY(buffer, int, 16, 8);
234extern ARRAY(buffer, int, 32, 4);
235extern ARRAY(buffer, int, 64, 2);
236extern ARRAY(buffer, uint, 8, 16);
237extern ARRAY(buffer, uint, 16, 8);
238extern ARRAY(buffer, uint, 32, 4);
239extern ARRAY(buffer, uint, 64, 2);
Christophe Lyon80902f62013-03-29 16:26:42 +0100240extern ARRAY(buffer, poly, 8, 16);
241extern ARRAY(buffer, poly, 16, 8);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100242extern ARRAY(buffer, float, 32, 4);
Christophe Lyond98beba2016-08-24 18:02:41 +0200243#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200244extern ARRAY(buffer, float, 16, 8);
245#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100246
Christophe Lyon01af0a52013-01-17 17:23:11 +0100247/* The tests for vld1_dup and vdup expect at least 4 entries in the
248 input buffer, so force 1- and 2-elements initializers to have 4
249 entries. */
250extern ARRAY(buffer_dup, int, 8, 8);
251extern ARRAY(buffer_dup, int, 16, 4);
252extern ARRAY4(buffer_dup, int, 32, 2);
253extern ARRAY4(buffer_dup, int, 64, 1);
254extern ARRAY(buffer_dup, uint, 8, 8);
255extern ARRAY(buffer_dup, uint, 16, 4);
256extern ARRAY4(buffer_dup, uint, 32, 2);
257extern ARRAY4(buffer_dup, uint, 64, 1);
Christophe Lyon80902f62013-03-29 16:26:42 +0100258extern ARRAY(buffer_dup, poly, 8, 8);
259extern ARRAY(buffer_dup, poly, 16, 4);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100260extern ARRAY4(buffer_dup, float, 32, 2);
Christophe Lyond98beba2016-08-24 18:02:41 +0200261#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200262extern ARRAY4(buffer_dup, float, 16, 4);
263#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100264extern ARRAY(buffer_dup, int, 8, 16);
265extern ARRAY(buffer_dup, int, 16, 8);
266extern ARRAY(buffer_dup, int, 32, 4);
267extern ARRAY4(buffer_dup, int, 64, 2);
268extern ARRAY(buffer_dup, uint, 8, 16);
269extern ARRAY(buffer_dup, uint, 16, 8);
270extern ARRAY(buffer_dup, uint, 32, 4);
271extern ARRAY4(buffer_dup, uint, 64, 2);
Christophe Lyon80902f62013-03-29 16:26:42 +0100272extern ARRAY(buffer_dup, poly, 8, 16);
273extern ARRAY(buffer_dup, poly, 16, 8);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100274extern ARRAY(buffer_dup, float, 32, 4);
Christophe Lyond98beba2016-08-24 18:02:41 +0200275#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200276extern ARRAY(buffer_dup, float, 16, 8);
277#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100278
Christophe Lyon01af0a52013-01-17 17:23:11 +0100279/* Input buffers for vld2, one of each size */
280extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
281extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
282extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
283extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
284extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
285extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
286extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
287extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
Christophe Lyon80902f62013-03-29 16:26:42 +0100288extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
289extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100290extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
Christophe Lyond98beba2016-08-24 18:02:41 +0200291#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200292extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
293#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100294extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
295extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
296extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
297extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
298extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
299extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
300extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
301extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
Christophe Lyon80902f62013-03-29 16:26:42 +0100302extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
303extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100304extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
Christophe Lyond98beba2016-08-24 18:02:41 +0200305#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200306extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
307#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100308
309/* Input buffers for vld3, one of each size */
310extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
311extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
312extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
313extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
314extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
315extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
316extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
317extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
Christophe Lyon80902f62013-03-29 16:26:42 +0100318extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
319extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100320extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
Christophe Lyond98beba2016-08-24 18:02:41 +0200321#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200322extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
323#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100324extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
325extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
326extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
327extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
328extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
329extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
330extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
331extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
Christophe Lyon80902f62013-03-29 16:26:42 +0100332extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
333extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100334extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
Christophe Lyond98beba2016-08-24 18:02:41 +0200335#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200336extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
337#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100338
339/* Input buffers for vld4, one of each size */
340extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
341extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
342extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
343extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
344extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
345extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
346extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
347extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
Christophe Lyon80902f62013-03-29 16:26:42 +0100348extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
349extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100350extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
Christophe Lyond98beba2016-08-24 18:02:41 +0200351#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200352extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
353#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100354extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
355extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
356extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
357extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
358extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
359extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
360extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
361extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
Christophe Lyon80902f62013-03-29 16:26:42 +0100362extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
363extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
Christophe Lyon01af0a52013-01-17 17:23:11 +0100364extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
Christophe Lyond98beba2016-08-24 18:02:41 +0200365#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200366extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
367#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100368
369/* Input buffers for vld2_lane */
370extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
371extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
372extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
373extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
374extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
375extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
376extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
377extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
Christophe Lyon80902f62013-03-29 16:26:42 +0100378extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
379extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
Christophe Lyon01af0a52013-01-17 17:23:11 +0100380extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
Christophe Lyond98beba2016-08-24 18:02:41 +0200381#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200382extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
383#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100384
385/* Input buffers for vld3_lane */
386extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
387extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
388extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
389extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
390extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
391extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
392extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
393extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
Christophe Lyon80902f62013-03-29 16:26:42 +0100394extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
395extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
Christophe Lyon01af0a52013-01-17 17:23:11 +0100396extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
Christophe Lyond98beba2016-08-24 18:02:41 +0200397#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200398extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
399#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100400
401/* Input buffers for vld4_lane */
402extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
403extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
404extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
405extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
406extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
407extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
408extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
409extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
Christophe Lyon80902f62013-03-29 16:26:42 +0100410extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
411extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
Christophe Lyon01af0a52013-01-17 17:23:11 +0100412extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
Christophe Lyond98beba2016-08-24 18:02:41 +0200413#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200414extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
415#endif
Christophe Lyon01af0a52013-01-17 17:23:11 +0100416
417/* Output buffers, one of each size */
Christophe Lyon073831a2011-01-24 17:37:40 +0100418static ARRAY(result, int, 8, 8);
419static ARRAY(result, int, 16, 4);
420static ARRAY(result, int, 32, 2);
421static ARRAY(result, int, 64, 1);
422static ARRAY(result, uint, 8, 8);
423static ARRAY(result, uint, 16, 4);
424static ARRAY(result, uint, 32, 2);
425static ARRAY(result, uint, 64, 1);
Christophe Lyon80902f62013-03-29 16:26:42 +0100426static ARRAY(result, poly, 8, 8);
427static ARRAY(result, poly, 16, 4);
Christophe Lyon073831a2011-01-24 17:37:40 +0100428static ARRAY(result, float, 32, 2);
Christophe Lyond98beba2016-08-24 18:02:41 +0200429#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200430static ARRAY(result, float, 16, 4);
431#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100432static ARRAY(result, int, 8, 16);
433static ARRAY(result, int, 16, 8);
434static ARRAY(result, int, 32, 4);
435static ARRAY(result, int, 64, 2);
436static ARRAY(result, uint, 8, 16);
437static ARRAY(result, uint, 16, 8);
438static ARRAY(result, uint, 32, 4);
439static ARRAY(result, uint, 64, 2);
Christophe Lyon80902f62013-03-29 16:26:42 +0100440static ARRAY(result, poly, 8, 16);
441static ARRAY(result, poly, 16, 8);
Christophe Lyon073831a2011-01-24 17:37:40 +0100442static ARRAY(result, float, 32, 4);
Christophe Lyond98beba2016-08-24 18:02:41 +0200443#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200444static ARRAY(result, float, 16, 8);
445#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100446
447/* Dump results (generic function) */
448static void dump_results (char *test_name)
449{
450 int i;
451
452 fprintf(ref_file, "\n%s output:\n", test_name);
Christophe Lyonfad316a2014-05-16 17:12:21 +0200453 fprintf(gcc_tests_file, "\n%s output:\n", test_name);
Christophe Lyon073831a2011-01-24 17:37:40 +0100454
455 DUMP(test_name, int, 8, 8, PRId8);
456 DUMP(test_name, int, 16, 4, PRId16);
457 DUMP(test_name, int, 32, 2, PRId32);
458 DUMP(test_name, int, 64, 1, PRId64);
459 DUMP(test_name, uint, 8, 8, PRIu8);
460 DUMP(test_name, uint, 16, 4, PRIu16);
461 DUMP(test_name, uint, 32, 2, PRIu32);
462 DUMP(test_name, uint, 64, 1, PRIu64);
Christophe Lyond9ab3e32014-07-11 16:44:32 +0200463 DUMP_POLY(test_name, poly, 8, 8, PRIu8);
464 DUMP_POLY(test_name, poly, 16, 4, PRIu16);
Christophe Lyon073831a2011-01-24 17:37:40 +0100465 DUMP_FP(test_name, float, 32, 2, PRIx32);
Christophe Lyond98beba2016-08-24 18:02:41 +0200466#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200467 DUMP_FP16(test_name, float, 16, 4, PRIu16);
468#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100469
470 DUMP(test_name, int, 8, 16, PRId8);
471 DUMP(test_name, int, 16, 8, PRId16);
472 DUMP(test_name, int, 32, 4, PRId32);
473 DUMP(test_name, int, 64, 2, PRId64);
474 DUMP(test_name, uint, 8, 16, PRIu8);
475 DUMP(test_name, uint, 16, 8, PRIu16);
476 DUMP(test_name, uint, 32, 4, PRIu32);
477 DUMP(test_name, uint, 64, 2, PRIu64);
Christophe Lyond9ab3e32014-07-11 16:44:32 +0200478 DUMP_POLY(test_name, poly, 8, 16, PRIu8);
479 DUMP_POLY(test_name, poly, 16, 8, PRIu16);
Christophe Lyon073831a2011-01-24 17:37:40 +0100480 DUMP_FP(test_name, float, 32, 4, PRIx32);
Christophe Lyond98beba2016-08-24 18:02:41 +0200481#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200482 DUMP_FP16(test_name, float, 16, 8, PRIu16);
483#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100484}
485
486/* Dump results in hex (generic function) */
Christophe Lyon6f4d36f2011-07-19 16:18:19 +0200487static void dump_results_hex2 (const char *test_name, const char* comment)
Christophe Lyon073831a2011-01-24 17:37:40 +0100488{
489 int i;
490
491 fprintf(ref_file, "\n%s%s output:\n", test_name, comment);
Christophe Lyonfad316a2014-05-16 17:12:21 +0200492 fprintf(gcc_tests_file, "\n%s%s output:\n", test_name, comment);
Christophe Lyon073831a2011-01-24 17:37:40 +0100493
494 DUMP(test_name, int, 8, 8, PRIx8);
495 DUMP(test_name, int, 16, 4, PRIx16);
496 DUMP(test_name, int, 32, 2, PRIx32);
497 DUMP(test_name, int, 64, 1, PRIx64);
498 DUMP(test_name, uint, 8, 8, PRIx8);
499 DUMP(test_name, uint, 16, 4, PRIx16);
500 DUMP(test_name, uint, 32, 2, PRIx32);
501 DUMP(test_name, uint, 64, 1, PRIx64);
Christophe Lyond9ab3e32014-07-11 16:44:32 +0200502 DUMP_POLY(test_name, poly, 8, 8, PRIx8);
503 DUMP_POLY(test_name, poly, 16, 4, PRIx16);
Christophe Lyon073831a2011-01-24 17:37:40 +0100504 DUMP_FP(test_name, float, 32, 2, PRIx32);
Christophe Lyond98beba2016-08-24 18:02:41 +0200505#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200506 DUMP_FP16(test_name, float, 16, 4, PRIx16);
507#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100508
509 DUMP(test_name, int, 8, 16, PRIx8);
510 DUMP(test_name, int, 16, 8, PRIx16);
511 DUMP(test_name, int, 32, 4, PRIx32);
512 DUMP(test_name, int, 64, 2, PRIx64);
513 DUMP(test_name, uint, 8, 16, PRIx8);
514 DUMP(test_name, uint, 16, 8, PRIx16);
515 DUMP(test_name, uint, 32, 4, PRIx32);
516 DUMP(test_name, uint, 64, 2, PRIx64);
Christophe Lyond9ab3e32014-07-11 16:44:32 +0200517 DUMP_POLY(test_name, poly, 8, 16, PRIx8);
518 DUMP_POLY(test_name, poly, 16, 8, PRIx16);
Christophe Lyon073831a2011-01-24 17:37:40 +0100519 DUMP_FP(test_name, float, 32, 4, PRIx32);
Christophe Lyond98beba2016-08-24 18:02:41 +0200520#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) )
Christophe Lyon34adaf62013-04-11 15:05:18 +0200521 DUMP_FP16(test_name, float, 16, 8, PRIx16);
522#endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100523}
524
Christophe Lyon6f4d36f2011-07-19 16:18:19 +0200525static void dump_results_hex (const char *test_name)
Christophe Lyon073831a2011-01-24 17:37:40 +0100526{
527 dump_results_hex2(test_name, "");
528}
529
530#ifndef STM_ARM_NEON_MODELS
531
Christophe Lyon87b607e2012-08-31 10:42:41 +0200532/* This hack is to cope with various compilers/libc which may not
533 provide endian.h or cross-compilers such as llvm which includes the
534 host's endian.h. */
535#ifndef __arm__
536#include <endian.h>
537#define THIS_ENDIAN __BYTE_ORDER
538#else /* __arm__ */
539#ifdef __ARMEL__
540#define THIS_ENDIAN __LITTLE_ENDIAN
541#else /* __ARMEL__ */
542#define THIS_ENDIAN __BIG_ENDIAN
543#endif
544#endif /* __arm__ */
545
546#if THIS_ENDIAN == __LITTLE_ENDIAN
Christophe Lyon073831a2011-01-24 17:37:40 +0100547
548typedef union {
549 struct {
550 int _xxx:27;
Christophe Lyoneb8034b2012-05-09 17:06:10 +0200551 unsigned int QC:1;
Christophe Lyon073831a2011-01-24 17:37:40 +0100552 int V:1;
553 int C:1;
554 int Z:1;
555 int N:1;
556 } b;
557 unsigned int word;
558} _ARM_FPSCR;
559
560#else /* __BIG_ENDIAN */
561
562typedef union {
563 struct {
564 int N:1;
565 int Z:1;
566 int C:1;
567 int V:1;
Christophe Lyoneb8034b2012-05-09 17:06:10 +0200568 unsigned int QC:1;
Christophe Lyon073831a2011-01-24 17:37:40 +0100569 int _dnm:27;
570 } b;
571 unsigned int word;
572} _ARM_FPSCR;
573
574#endif /* __BIG_ENDIAN */
575
576#ifdef __ARMCC_VERSION
577register _ARM_FPSCR _afpscr_for_qc __asm("fpscr");
Christophe Lyon1a3b2392014-07-10 13:47:40 +0200578# define Neon_Cumulative_Sat _afpscr_for_qc.b.QC
Christophe Lyonc1cc7822015-01-20 16:04:24 +0100579# define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);}
Christophe Lyon073831a2011-01-24 17:37:40 +0100580#else
Christophe Lyoneb8034b2012-05-09 17:06:10 +0200581/* GCC/ARM does not know this register */
Christophe Lyon1a3b2392014-07-10 13:47:40 +0200582# define Neon_Cumulative_Sat __read_neon_cumulative_sat()
Christophe Lyonc1cc7822015-01-20 16:04:24 +0100583/* We need a fake dependency to ensure correct ordering of asm
584 statements to preset the QC flag value, and Neon operators writing
585 to QC. */
586#define Set_Neon_Cumulative_Sat(x, depend) \
587 __set_neon_cumulative_sat((x), (depend))
Christophe Lyon1a3b2392014-07-10 13:47:40 +0200588
589# if defined(__aarch64__)
590static volatile int __read_neon_cumulative_sat (void) {
591 _ARM_FPSCR _afpscr_for_qc;
592 asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
593 return _afpscr_for_qc.b.QC;
Christophe Lyoneb8034b2012-05-09 17:06:10 +0200594}
Christophe Lyonc1cc7822015-01-20 16:04:24 +0100595
596#define __set_neon_cumulative_sat(x, depend) { \
597 _ARM_FPSCR _afpscr_for_qc; \
598 asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \
599 _afpscr_for_qc.b.QC = x; \
600 asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
601 }
602
Christophe Lyon1a3b2392014-07-10 13:47:40 +0200603# else
604static volatile int __read_neon_cumulative_sat (void) {
605 _ARM_FPSCR _afpscr_for_qc;
606 asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
607 return _afpscr_for_qc.b.QC;
Christophe Lyoneb8034b2012-05-09 17:06:10 +0200608}
609
Christophe Lyonc1cc7822015-01-20 16:04:24 +0100610#define __set_neon_cumulative_sat(x, depend) { \
611 _ARM_FPSCR _afpscr_for_qc; \
612 asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \
613 _afpscr_for_qc.b.QC = x; \
614 asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
615 }
616
Christophe Lyon1a3b2392014-07-10 13:47:40 +0200617# endif
Christophe Lyon073831a2011-01-24 17:37:40 +0100618#endif
619
620#endif /* STM_ARM_NEON_MODELS */
621
Christophe Lyon4a6e5cc2014-06-03 22:47:52 +0200622static void dump_neon_cumulative_sat(const char* msg, const char *name,
623 const char* t1, int w, int n)
Christophe Lyon073831a2011-01-24 17:37:40 +0100624{
Christophe Lyon4a6e5cc2014-06-03 22:47:52 +0200625 fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++,
626 name, Neon_Cumulative_Sat);
627 fprintf(gcc_tests_file,
628 "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n",
629 t1, w, n, Neon_Cumulative_Sat);
Christophe Lyon073831a2011-01-24 17:37:40 +0100630}
631
632/* Clean output buffers before execution */
633static void clean_results (void)
634{
635 result_idx = 0;
636 CLEAN(result, int, 8, 8);
637 CLEAN(result, int, 16, 4);
638 CLEAN(result, int, 32, 2);
639 CLEAN(result, int, 64, 1);
640 CLEAN(result, uint, 8, 8);
641 CLEAN(result, uint, 16, 4);
642 CLEAN(result, uint, 32, 2);
643 CLEAN(result, uint, 64, 1);
Christophe Lyon80902f62013-03-29 16:26:42 +0100644 CLEAN(result, poly, 8, 8);
645 CLEAN(result, poly, 16, 4);
Christophe Lyon073831a2011-01-24 17:37:40 +0100646 CLEAN(result, float, 32, 2);
647
648 CLEAN(result, int, 8, 16);
649 CLEAN(result, int, 16, 8);
650 CLEAN(result, int, 32, 4);
651 CLEAN(result, int, 64, 2);
652 CLEAN(result, uint, 8, 16);
653 CLEAN(result, uint, 16, 8);
654 CLEAN(result, uint, 32, 4);
655 CLEAN(result, uint, 64, 2);
Christophe Lyon80902f62013-03-29 16:26:42 +0100656 CLEAN(result, poly, 8, 16);
657 CLEAN(result, poly, 16, 8);
Christophe Lyon073831a2011-01-24 17:37:40 +0100658 CLEAN(result, float, 32, 4);
659}
660
661
662/* Helpers to declare variables of various types */
663#define DECL_VARIABLE(VAR, T1, W, N) \
Victor Khimenko3de3e4a2016-10-19 18:16:39 +0200664 volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
Christophe Lyon073831a2011-01-24 17:37:40 +0100665
666#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
667 DECL_VARIABLE(VAR, int, 8, 8); \
668 DECL_VARIABLE(VAR, int, 16, 4); \
669 DECL_VARIABLE(VAR, int, 32, 2); \
670 DECL_VARIABLE(VAR, int, 64, 1)
671
672#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \
673 DECL_VARIABLE(VAR, uint, 8, 8); \
674 DECL_VARIABLE(VAR, uint, 16, 4); \
675 DECL_VARIABLE(VAR, uint, 32, 2); \
676 DECL_VARIABLE(VAR, uint, 64, 1)
677
678#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \
679 DECL_VARIABLE(VAR, int, 8, 16); \
680 DECL_VARIABLE(VAR, int, 16, 8); \
681 DECL_VARIABLE(VAR, int, 32, 4); \
682 DECL_VARIABLE(VAR, int, 64, 2)
683
684#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \
685 DECL_VARIABLE(VAR, uint, 8, 16); \
686 DECL_VARIABLE(VAR, uint, 16, 8); \
687 DECL_VARIABLE(VAR, uint, 32, 4); \
688 DECL_VARIABLE(VAR, uint, 64, 2)
689
690#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
691 DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
692 DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
Christophe Lyon80902f62013-03-29 16:26:42 +0100693 DECL_VARIABLE(VAR, poly, 8, 8); \
694 DECL_VARIABLE(VAR, poly, 16, 4); \
Christophe Lyon073831a2011-01-24 17:37:40 +0100695 DECL_VARIABLE(VAR, float, 32, 2)
696
697#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
698 DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
699 DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
Christophe Lyon80902f62013-03-29 16:26:42 +0100700 DECL_VARIABLE(VAR, poly, 8, 16); \
701 DECL_VARIABLE(VAR, poly, 16, 8); \
Christophe Lyon073831a2011-01-24 17:37:40 +0100702 DECL_VARIABLE(VAR, float, 32, 4)
703
704#define DECL_VARIABLE_ALL_VARIANTS(VAR) \
705 DECL_VARIABLE_64BITS_VARIANTS(VAR); \
706 DECL_VARIABLE_128BITS_VARIANTS(VAR)
707
708#define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \
709 DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
710 DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
711
712#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \
713 DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
714 DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
715
716/* Helpers to initialize vectors */
Christophe Lyonf2053672014-12-16 10:26:00 +0100717#define VDUP(VAR, Q, T1, T2, W, N, V) \
Christophe Lyon073831a2011-01-24 17:37:40 +0100718 VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
719
720#define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
721 VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
722 VECT_VAR(VAR, T1, W, N), \
723 L)
724
725/* We need to load initial values first, so rely on VLD1 */
Christophe Lyonf2053672014-12-16 10:26:00 +0100726#define VLOAD(VAR, BUF, Q, T1, T2, W, N) \
Christophe Lyon073831a2011-01-24 17:37:40 +0100727 VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
728
729/* Helpers for macros with 1 constant and 5 variable arguments */
730#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
731 MACRO(VAR, , int, s, 8, 8); \
732 MACRO(VAR, , int, s, 16, 4); \
733 MACRO(VAR, , int, s, 32, 2); \
734 MACRO(VAR, , int, s, 64, 1)
735
736#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \
737 MACRO(VAR, , uint, u, 8, 8); \
738 MACRO(VAR, , uint, u, 16, 4); \
739 MACRO(VAR, , uint, u, 32, 2); \
740 MACRO(VAR, , uint, u, 64, 1)
741
742#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
743 MACRO(VAR, q, int, s, 8, 16); \
744 MACRO(VAR, q, int, s, 16, 8); \
745 MACRO(VAR, q, int, s, 32, 4); \
746 MACRO(VAR, q, int, s, 64, 2)
747
748#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \
749 MACRO(VAR, q, uint, u, 8, 16); \
750 MACRO(VAR, q, uint, u, 16, 8); \
751 MACRO(VAR, q, uint, u, 32, 4); \
752 MACRO(VAR, q, uint, u, 64, 2)
753
754#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \
755 TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
756 TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
757
758#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \
759 TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
760 TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
761
762#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \
763 TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \
764 TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
765
766#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \
767 TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
768 TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
769
770/* Helpers for macros with 2 constant and 5 variable arguments */
771#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
772 MACRO(VAR1, VAR2, , int, s, 8, 8); \
773 MACRO(VAR1, VAR2, , int, s, 16, 4); \
774 MACRO(VAR1, VAR2, , int, s, 32, 2); \
775 MACRO(VAR1, VAR2 , , int, s, 64, 1)
776
777#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
778 MACRO(VAR1, VAR2, , uint, u, 8, 8); \
779 MACRO(VAR1, VAR2, , uint, u, 16, 4); \
780 MACRO(VAR1, VAR2, , uint, u, 32, 2); \
781 MACRO(VAR1, VAR2, , uint, u, 64, 1)
782
783#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
784 MACRO(VAR1, VAR2, q, int, s, 8, 16); \
785 MACRO(VAR1, VAR2, q, int, s, 16, 8); \
786 MACRO(VAR1, VAR2, q, int, s, 32, 4); \
787 MACRO(VAR1, VAR2, q, int, s, 64, 2)
788
789#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
790 MACRO(VAR1, VAR2, q, uint, u, 8, 16); \
791 MACRO(VAR1, VAR2, q, uint, u, 16, 8); \
792 MACRO(VAR1, VAR2, q, uint, u, 32, 4); \
793 MACRO(VAR1, VAR2, q, uint, u, 64, 2)
794
795#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
796 TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
Christophe Lyon80902f62013-03-29 16:26:42 +0100797 TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
798 MACRO(VAR1, VAR2, , poly, p, 8, 8); \
799 MACRO(VAR1, VAR2, , poly, p, 16, 4)
Christophe Lyon073831a2011-01-24 17:37:40 +0100800
801#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
802 TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
Christophe Lyon80902f62013-03-29 16:26:42 +0100803 TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
804 MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
805 MACRO(VAR1, VAR2, q, poly, p, 16, 8)
Christophe Lyon073831a2011-01-24 17:37:40 +0100806
807#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
808 TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
809 TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
810
811#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
812 TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
813 TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
814
815#endif /* _STM_ARM_NEON_REF_H_ */