blob: c297518e258832e03a68f661a4ba5937f68a79bc [file] [log] [blame]
Stephen Hines51a0ffb2014-02-14 00:25:07 -08001/*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __ARM_NEON_H
25#define __ARM_NEON_H
26
27#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)
28#error "NEON support not enabled"
29#endif
30
31#include <stdint.h>
32
33typedef float float32_t;
34typedef __fp16 float16_t;
35#ifdef __aarch64__
36typedef double float64_t;
37#endif
38
39#ifdef __aarch64__
40typedef uint8_t poly8_t;
41typedef uint16_t poly16_t;
42typedef uint64_t poly64_t;
43#else
44typedef int8_t poly8_t;
45typedef int16_t poly16_t;
46#endif
47typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
48typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
49typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
50typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
51typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
52typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
53typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
54typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
55typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
56typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
57typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
58typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
59typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
60typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
61typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
62typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
63typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
64typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
65typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
66typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
67#ifdef __aarch64__
68typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
69typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
70#endif
71typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
72typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
73typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
74typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
75#ifdef __aarch64__
76typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t;
77typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t;
78#endif
79
80typedef struct int8x8x2_t {
81 int8x8_t val[2];
82} int8x8x2_t;
83
84typedef struct int8x16x2_t {
85 int8x16_t val[2];
86} int8x16x2_t;
87
88typedef struct int16x4x2_t {
89 int16x4_t val[2];
90} int16x4x2_t;
91
92typedef struct int16x8x2_t {
93 int16x8_t val[2];
94} int16x8x2_t;
95
96typedef struct int32x2x2_t {
97 int32x2_t val[2];
98} int32x2x2_t;
99
100typedef struct int32x4x2_t {
101 int32x4_t val[2];
102} int32x4x2_t;
103
104typedef struct int64x1x2_t {
105 int64x1_t val[2];
106} int64x1x2_t;
107
108typedef struct int64x2x2_t {
109 int64x2_t val[2];
110} int64x2x2_t;
111
112typedef struct uint8x8x2_t {
113 uint8x8_t val[2];
114} uint8x8x2_t;
115
116typedef struct uint8x16x2_t {
117 uint8x16_t val[2];
118} uint8x16x2_t;
119
120typedef struct uint16x4x2_t {
121 uint16x4_t val[2];
122} uint16x4x2_t;
123
124typedef struct uint16x8x2_t {
125 uint16x8_t val[2];
126} uint16x8x2_t;
127
128typedef struct uint32x2x2_t {
129 uint32x2_t val[2];
130} uint32x2x2_t;
131
132typedef struct uint32x4x2_t {
133 uint32x4_t val[2];
134} uint32x4x2_t;
135
136typedef struct uint64x1x2_t {
137 uint64x1_t val[2];
138} uint64x1x2_t;
139
140typedef struct uint64x2x2_t {
141 uint64x2_t val[2];
142} uint64x2x2_t;
143
144typedef struct float16x4x2_t {
145 float16x4_t val[2];
146} float16x4x2_t;
147
148typedef struct float16x8x2_t {
149 float16x8_t val[2];
150} float16x8x2_t;
151
152typedef struct float32x2x2_t {
153 float32x2_t val[2];
154} float32x2x2_t;
155
156typedef struct float32x4x2_t {
157 float32x4_t val[2];
158} float32x4x2_t;
159
160#ifdef __aarch64__
161typedef struct float64x1x2_t {
162 float64x1_t val[2];
163} float64x1x2_t;
164
165typedef struct float64x2x2_t {
166 float64x2_t val[2];
167} float64x2x2_t;
168
169#endif
170typedef struct poly8x8x2_t {
171 poly8x8_t val[2];
172} poly8x8x2_t;
173
174typedef struct poly8x16x2_t {
175 poly8x16_t val[2];
176} poly8x16x2_t;
177
178typedef struct poly16x4x2_t {
179 poly16x4_t val[2];
180} poly16x4x2_t;
181
182typedef struct poly16x8x2_t {
183 poly16x8_t val[2];
184} poly16x8x2_t;
185
186#ifdef __aarch64__
187typedef struct poly64x1x2_t {
188 poly64x1_t val[2];
189} poly64x1x2_t;
190
191typedef struct poly64x2x2_t {
192 poly64x2_t val[2];
193} poly64x2x2_t;
194
195#endif
196typedef struct int8x8x3_t {
197 int8x8_t val[3];
198} int8x8x3_t;
199
200typedef struct int8x16x3_t {
201 int8x16_t val[3];
202} int8x16x3_t;
203
204typedef struct int16x4x3_t {
205 int16x4_t val[3];
206} int16x4x3_t;
207
208typedef struct int16x8x3_t {
209 int16x8_t val[3];
210} int16x8x3_t;
211
212typedef struct int32x2x3_t {
213 int32x2_t val[3];
214} int32x2x3_t;
215
216typedef struct int32x4x3_t {
217 int32x4_t val[3];
218} int32x4x3_t;
219
220typedef struct int64x1x3_t {
221 int64x1_t val[3];
222} int64x1x3_t;
223
224typedef struct int64x2x3_t {
225 int64x2_t val[3];
226} int64x2x3_t;
227
228typedef struct uint8x8x3_t {
229 uint8x8_t val[3];
230} uint8x8x3_t;
231
232typedef struct uint8x16x3_t {
233 uint8x16_t val[3];
234} uint8x16x3_t;
235
236typedef struct uint16x4x3_t {
237 uint16x4_t val[3];
238} uint16x4x3_t;
239
240typedef struct uint16x8x3_t {
241 uint16x8_t val[3];
242} uint16x8x3_t;
243
244typedef struct uint32x2x3_t {
245 uint32x2_t val[3];
246} uint32x2x3_t;
247
248typedef struct uint32x4x3_t {
249 uint32x4_t val[3];
250} uint32x4x3_t;
251
252typedef struct uint64x1x3_t {
253 uint64x1_t val[3];
254} uint64x1x3_t;
255
256typedef struct uint64x2x3_t {
257 uint64x2_t val[3];
258} uint64x2x3_t;
259
260typedef struct float16x4x3_t {
261 float16x4_t val[3];
262} float16x4x3_t;
263
264typedef struct float16x8x3_t {
265 float16x8_t val[3];
266} float16x8x3_t;
267
268typedef struct float32x2x3_t {
269 float32x2_t val[3];
270} float32x2x3_t;
271
272typedef struct float32x4x3_t {
273 float32x4_t val[3];
274} float32x4x3_t;
275
276#ifdef __aarch64__
277typedef struct float64x1x3_t {
278 float64x1_t val[3];
279} float64x1x3_t;
280
281typedef struct float64x2x3_t {
282 float64x2_t val[3];
283} float64x2x3_t;
284
285#endif
286typedef struct poly8x8x3_t {
287 poly8x8_t val[3];
288} poly8x8x3_t;
289
290typedef struct poly8x16x3_t {
291 poly8x16_t val[3];
292} poly8x16x3_t;
293
294typedef struct poly16x4x3_t {
295 poly16x4_t val[3];
296} poly16x4x3_t;
297
298typedef struct poly16x8x3_t {
299 poly16x8_t val[3];
300} poly16x8x3_t;
301
302#ifdef __aarch64__
303typedef struct poly64x1x3_t {
304 poly64x1_t val[3];
305} poly64x1x3_t;
306
307typedef struct poly64x2x3_t {
308 poly64x2_t val[3];
309} poly64x2x3_t;
310
311#endif
312typedef struct int8x8x4_t {
313 int8x8_t val[4];
314} int8x8x4_t;
315
316typedef struct int8x16x4_t {
317 int8x16_t val[4];
318} int8x16x4_t;
319
320typedef struct int16x4x4_t {
321 int16x4_t val[4];
322} int16x4x4_t;
323
324typedef struct int16x8x4_t {
325 int16x8_t val[4];
326} int16x8x4_t;
327
328typedef struct int32x2x4_t {
329 int32x2_t val[4];
330} int32x2x4_t;
331
332typedef struct int32x4x4_t {
333 int32x4_t val[4];
334} int32x4x4_t;
335
336typedef struct int64x1x4_t {
337 int64x1_t val[4];
338} int64x1x4_t;
339
340typedef struct int64x2x4_t {
341 int64x2_t val[4];
342} int64x2x4_t;
343
344typedef struct uint8x8x4_t {
345 uint8x8_t val[4];
346} uint8x8x4_t;
347
348typedef struct uint8x16x4_t {
349 uint8x16_t val[4];
350} uint8x16x4_t;
351
352typedef struct uint16x4x4_t {
353 uint16x4_t val[4];
354} uint16x4x4_t;
355
356typedef struct uint16x8x4_t {
357 uint16x8_t val[4];
358} uint16x8x4_t;
359
360typedef struct uint32x2x4_t {
361 uint32x2_t val[4];
362} uint32x2x4_t;
363
364typedef struct uint32x4x4_t {
365 uint32x4_t val[4];
366} uint32x4x4_t;
367
368typedef struct uint64x1x4_t {
369 uint64x1_t val[4];
370} uint64x1x4_t;
371
372typedef struct uint64x2x4_t {
373 uint64x2_t val[4];
374} uint64x2x4_t;
375
376typedef struct float16x4x4_t {
377 float16x4_t val[4];
378} float16x4x4_t;
379
380typedef struct float16x8x4_t {
381 float16x8_t val[4];
382} float16x8x4_t;
383
384typedef struct float32x2x4_t {
385 float32x2_t val[4];
386} float32x2x4_t;
387
388typedef struct float32x4x4_t {
389 float32x4_t val[4];
390} float32x4x4_t;
391
392#ifdef __aarch64__
393typedef struct float64x1x4_t {
394 float64x1_t val[4];
395} float64x1x4_t;
396
397typedef struct float64x2x4_t {
398 float64x2_t val[4];
399} float64x2x4_t;
400
401#endif
402typedef struct poly8x8x4_t {
403 poly8x8_t val[4];
404} poly8x8x4_t;
405
406typedef struct poly8x16x4_t {
407 poly8x16_t val[4];
408} poly8x16x4_t;
409
410typedef struct poly16x4x4_t {
411 poly16x4_t val[4];
412} poly16x4x4_t;
413
414typedef struct poly16x8x4_t {
415 poly16x8_t val[4];
416} poly16x8x4_t;
417
418#ifdef __aarch64__
419typedef struct poly64x1x4_t {
420 poly64x1_t val[4];
421} poly64x1x4_t;
422
423typedef struct poly64x2x4_t {
424 poly64x2_t val[4];
425} poly64x2x4_t;
426
427#endif
428
429#define __ai static inline __attribute__((__always_inline__, __nodebug__))
430
431__ai int16x8_t vmovl_s8(int8x8_t __a) {
432 return (int16x8_t)__builtin_neon_vmovl_v(__a, 33); }
433__ai int32x4_t vmovl_s16(int16x4_t __a) {
434 return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 34); }
435__ai int64x2_t vmovl_s32(int32x2_t __a) {
436 return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 35); }
437__ai uint16x8_t vmovl_u8(uint8x8_t __a) {
438 return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 49); }
439__ai uint32x4_t vmovl_u16(uint16x4_t __a) {
440 return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 50); }
441__ai uint64x2_t vmovl_u32(uint32x2_t __a) {
442 return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 51); }
443
444__ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) {
445 return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 33); }
446__ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) {
447 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 34); }
448__ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) {
449 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 35); }
450__ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) {
451 return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 49); }
452__ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) {
453 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 50); }
454__ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) {
455 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 51); }
456__ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) {
457 return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 37); }
458
459__ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) {
460 return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); }
461__ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) {
462 return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
463__ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) {
464 return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
465__ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) {
466 return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
467__ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) {
468 return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
469__ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) {
470 return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
471__ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) {
472 return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 8); }
473__ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) {
474 return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 32); }
475__ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) {
476 return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
477__ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) {
478 return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
479__ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) {
480 return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
481__ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) {
482 return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
483__ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) {
484 return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
485__ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) {
486 return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
487
488__ai int16x8_t vabdl_s8(int8x8_t __a, int8x8_t __b) {
489 return (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__a, __b)); }
490__ai int32x4_t vabdl_s16(int16x4_t __a, int16x4_t __b) {
491 return (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__a, __b)); }
492__ai int64x2_t vabdl_s32(int32x2_t __a, int32x2_t __b) {
493 return (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__a, __b)); }
494__ai uint16x8_t vabdl_u8(uint8x8_t __a, uint8x8_t __b) {
495 return vmovl_u8(vabd_u8(__a, __b)); }
496__ai uint32x4_t vabdl_u16(uint16x4_t __a, uint16x4_t __b) {
497 return vmovl_u16(vabd_u16(__a, __b)); }
498__ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) {
499 return vmovl_u32(vabd_u32(__a, __b)); }
500
501__ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
502 return __a + vabd_s8(__b, __c); }
503__ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) {
504 return __a + vabd_s16(__b, __c); }
505__ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) {
506 return __a + vabd_s32(__b, __c); }
507__ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
508 return __a + vabd_u8(__b, __c); }
509__ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
510 return __a + vabd_u16(__b, __c); }
511__ai uint32x2_t vaba_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
512 return __a + vabd_u32(__b, __c); }
513__ai int8x16_t vabaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
514 return __a + vabdq_s8(__b, __c); }
515__ai int16x8_t vabaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) {
516 return __a + vabdq_s16(__b, __c); }
517__ai int32x4_t vabaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) {
518 return __a + vabdq_s32(__b, __c); }
519__ai uint8x16_t vabaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
520 return __a + vabdq_u8(__b, __c); }
521__ai uint16x8_t vabaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
522 return __a + vabdq_u16(__b, __c); }
523__ai uint32x4_t vabaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
524 return __a + vabdq_u32(__b, __c); }
525
526__ai int16x8_t vabal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) {
527 return __a + vabdl_s8(__b, __c); }
528__ai int32x4_t vabal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
529 return __a + vabdl_s16(__b, __c); }
530__ai int64x2_t vabal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
531 return __a + vabdl_s32(__b, __c); }
532__ai uint16x8_t vabal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) {
533 return __a + vabdl_u8(__b, __c); }
534__ai uint32x4_t vabal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) {
535 return __a + vabdl_u16(__b, __c); }
536__ai uint64x2_t vabal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) {
537 return __a + vabdl_u32(__b, __c); }
538
539
540__ai int8x8_t vabs_s8(int8x8_t __a) {
541 return (int8x8_t)__builtin_neon_vabs_v(__a, 0); }
542__ai int16x4_t vabs_s16(int16x4_t __a) {
543 return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); }
544__ai int32x2_t vabs_s32(int32x2_t __a) {
545 return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); }
546__ai float32x2_t vabs_f32(float32x2_t __a) {
547 return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 8); }
548__ai int8x16_t vabsq_s8(int8x16_t __a) {
549 return (int8x16_t)__builtin_neon_vabsq_v(__a, 32); }
550__ai int16x8_t vabsq_s16(int16x8_t __a) {
551 return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 33); }
552__ai int32x4_t vabsq_s32(int32x4_t __a) {
553 return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 34); }
554__ai float32x4_t vabsq_f32(float32x4_t __a) {
555 return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 40); }
556
557__ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) {
558 return __a + __b; }
559__ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) {
560 return __a + __b; }
561__ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) {
562 return __a + __b; }
563__ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) {
564 return __a + __b; }
565__ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) {
566 return __a + __b; }
567__ai uint8x8_t vadd_u8(uint8x8_t __a, uint8x8_t __b) {
568 return __a + __b; }
569__ai uint16x4_t vadd_u16(uint16x4_t __a, uint16x4_t __b) {
570 return __a + __b; }
571__ai uint32x2_t vadd_u32(uint32x2_t __a, uint32x2_t __b) {
572 return __a + __b; }
573__ai uint64x1_t vadd_u64(uint64x1_t __a, uint64x1_t __b) {
574 return __a + __b; }
575__ai int8x16_t vaddq_s8(int8x16_t __a, int8x16_t __b) {
576 return __a + __b; }
577__ai int16x8_t vaddq_s16(int16x8_t __a, int16x8_t __b) {
578 return __a + __b; }
579__ai int32x4_t vaddq_s32(int32x4_t __a, int32x4_t __b) {
580 return __a + __b; }
581__ai int64x2_t vaddq_s64(int64x2_t __a, int64x2_t __b) {
582 return __a + __b; }
583__ai float32x4_t vaddq_f32(float32x4_t __a, float32x4_t __b) {
584 return __a + __b; }
585__ai uint8x16_t vaddq_u8(uint8x16_t __a, uint8x16_t __b) {
586 return __a + __b; }
587__ai uint16x8_t vaddq_u16(uint16x8_t __a, uint16x8_t __b) {
588 return __a + __b; }
589__ai uint32x4_t vaddq_u32(uint32x4_t __a, uint32x4_t __b) {
590 return __a + __b; }
591__ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) {
592 return __a + __b; }
593
594__ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) {
595 return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
596__ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) {
597 return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
598__ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) {
599 return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
600__ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) {
601 return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
602__ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) {
603 return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); }
604__ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) {
605 return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); }
606
607__ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) {
608 return vmovl_s8(__a) + vmovl_s8(__b); }
609__ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) {
610 return vmovl_s16(__a) + vmovl_s16(__b); }
611__ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) {
612 return vmovl_s32(__a) + vmovl_s32(__b); }
613__ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) {
614 return vmovl_u8(__a) + vmovl_u8(__b); }
615__ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) {
616 return vmovl_u16(__a) + vmovl_u16(__b); }
617__ai uint64x2_t vaddl_u32(uint32x2_t __a, uint32x2_t __b) {
618 return vmovl_u32(__a) + vmovl_u32(__b); }
619
620__ai int16x8_t vaddw_s8(int16x8_t __a, int8x8_t __b) {
621 return __a + vmovl_s8(__b); }
622__ai int32x4_t vaddw_s16(int32x4_t __a, int16x4_t __b) {
623 return __a + vmovl_s16(__b); }
624__ai int64x2_t vaddw_s32(int64x2_t __a, int32x2_t __b) {
625 return __a + vmovl_s32(__b); }
626__ai uint16x8_t vaddw_u8(uint16x8_t __a, uint8x8_t __b) {
627 return __a + vmovl_u8(__b); }
628__ai uint32x4_t vaddw_u16(uint32x4_t __a, uint16x4_t __b) {
629 return __a + vmovl_u16(__b); }
630__ai uint64x2_t vaddw_u32(uint64x2_t __a, uint32x2_t __b) {
631 return __a + vmovl_u32(__b); }
632
633__ai int8x8_t vand_s8(int8x8_t __a, int8x8_t __b) {
634 return __a & __b; }
635__ai int16x4_t vand_s16(int16x4_t __a, int16x4_t __b) {
636 return __a & __b; }
637__ai int32x2_t vand_s32(int32x2_t __a, int32x2_t __b) {
638 return __a & __b; }
639__ai int64x1_t vand_s64(int64x1_t __a, int64x1_t __b) {
640 return __a & __b; }
641__ai uint8x8_t vand_u8(uint8x8_t __a, uint8x8_t __b) {
642 return __a & __b; }
643__ai uint16x4_t vand_u16(uint16x4_t __a, uint16x4_t __b) {
644 return __a & __b; }
645__ai uint32x2_t vand_u32(uint32x2_t __a, uint32x2_t __b) {
646 return __a & __b; }
647__ai uint64x1_t vand_u64(uint64x1_t __a, uint64x1_t __b) {
648 return __a & __b; }
649__ai int8x16_t vandq_s8(int8x16_t __a, int8x16_t __b) {
650 return __a & __b; }
651__ai int16x8_t vandq_s16(int16x8_t __a, int16x8_t __b) {
652 return __a & __b; }
653__ai int32x4_t vandq_s32(int32x4_t __a, int32x4_t __b) {
654 return __a & __b; }
655__ai int64x2_t vandq_s64(int64x2_t __a, int64x2_t __b) {
656 return __a & __b; }
657__ai uint8x16_t vandq_u8(uint8x16_t __a, uint8x16_t __b) {
658 return __a & __b; }
659__ai uint16x8_t vandq_u16(uint16x8_t __a, uint16x8_t __b) {
660 return __a & __b; }
661__ai uint32x4_t vandq_u32(uint32x4_t __a, uint32x4_t __b) {
662 return __a & __b; }
663__ai uint64x2_t vandq_u64(uint64x2_t __a, uint64x2_t __b) {
664 return __a & __b; }
665
666__ai int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) {
667 return __a & ~__b; }
668__ai int16x4_t vbic_s16(int16x4_t __a, int16x4_t __b) {
669 return __a & ~__b; }
670__ai int32x2_t vbic_s32(int32x2_t __a, int32x2_t __b) {
671 return __a & ~__b; }
672__ai int64x1_t vbic_s64(int64x1_t __a, int64x1_t __b) {
673 return __a & ~__b; }
674__ai uint8x8_t vbic_u8(uint8x8_t __a, uint8x8_t __b) {
675 return __a & ~__b; }
676__ai uint16x4_t vbic_u16(uint16x4_t __a, uint16x4_t __b) {
677 return __a & ~__b; }
678__ai uint32x2_t vbic_u32(uint32x2_t __a, uint32x2_t __b) {
679 return __a & ~__b; }
680__ai uint64x1_t vbic_u64(uint64x1_t __a, uint64x1_t __b) {
681 return __a & ~__b; }
682__ai int8x16_t vbicq_s8(int8x16_t __a, int8x16_t __b) {
683 return __a & ~__b; }
684__ai int16x8_t vbicq_s16(int16x8_t __a, int16x8_t __b) {
685 return __a & ~__b; }
686__ai int32x4_t vbicq_s32(int32x4_t __a, int32x4_t __b) {
687 return __a & ~__b; }
688__ai int64x2_t vbicq_s64(int64x2_t __a, int64x2_t __b) {
689 return __a & ~__b; }
690__ai uint8x16_t vbicq_u8(uint8x16_t __a, uint8x16_t __b) {
691 return __a & ~__b; }
692__ai uint16x8_t vbicq_u16(uint16x8_t __a, uint16x8_t __b) {
693 return __a & ~__b; }
694__ai uint32x4_t vbicq_u32(uint32x4_t __a, uint32x4_t __b) {
695 return __a & ~__b; }
696__ai uint64x2_t vbicq_u64(uint64x2_t __a, uint64x2_t __b) {
697 return __a & ~__b; }
698
699__ai int8x8_t vbsl_s8(uint8x8_t __a, int8x8_t __b, int8x8_t __c) {
700 return (int8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, __b, __c, 0); }
701__ai int16x4_t vbsl_s16(uint16x4_t __a, int16x4_t __b, int16x4_t __c) {
702 return (int16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 1); }
703__ai int32x2_t vbsl_s32(uint32x2_t __a, int32x2_t __b, int32x2_t __c) {
704 return (int32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 2); }
705__ai int64x1_t vbsl_s64(uint64x1_t __a, int64x1_t __b, int64x1_t __c) {
706 return (int64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 3); }
707__ai uint8x8_t vbsl_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
708 return (uint8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 16); }
709__ai uint16x4_t vbsl_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
710 return (uint16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 17); }
711__ai uint32x2_t vbsl_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
712 return (uint32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 18); }
713__ai uint64x1_t vbsl_u64(uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) {
714 return (uint64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 19); }
715__ai float32x2_t vbsl_f32(uint32x2_t __a, float32x2_t __b, float32x2_t __c) {
716 return (float32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 8); }
717__ai poly8x8_t vbsl_p8(uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) {
718 return (poly8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 4); }
719__ai poly16x4_t vbsl_p16(uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) {
720 return (poly16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 5); }
721__ai int8x16_t vbslq_s8(uint8x16_t __a, int8x16_t __b, int8x16_t __c) {
722 return (int8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, __b, __c, 32); }
723__ai int16x8_t vbslq_s16(uint16x8_t __a, int16x8_t __b, int16x8_t __c) {
724 return (int16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); }
725__ai int32x4_t vbslq_s32(uint32x4_t __a, int32x4_t __b, int32x4_t __c) {
726 return (int32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); }
727__ai int64x2_t vbslq_s64(uint64x2_t __a, int64x2_t __b, int64x2_t __c) {
728 return (int64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 35); }
729__ai uint8x16_t vbslq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
730 return (uint8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); }
731__ai uint16x8_t vbslq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
732 return (uint16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); }
733__ai uint32x4_t vbslq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
734 return (uint32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); }
735__ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) {
736 return (uint64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 51); }
737__ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) {
738 return (float32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); }
739__ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) {
740 return (poly8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 36); }
741__ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) {
742 return (poly16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 37); }
743
744__ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) {
745 return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 18); }
746__ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) {
747 return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
748
749__ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) {
750 return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 18); }
751__ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) {
752 return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
753
754__ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) {
755 return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 18); }
756__ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) {
757 return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
758
759__ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) {
760 return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 18); }
761__ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) {
762 return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
763
764__ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) {
765 return (uint8x8_t)(__a == __b); }
766__ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) {
767 return (uint16x4_t)(__a == __b); }
768__ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) {
769 return (uint32x2_t)(__a == __b); }
770__ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) {
771 return (uint32x2_t)(__a == __b); }
772__ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) {
773 return (uint8x8_t)(__a == __b); }
774__ai uint16x4_t vceq_u16(uint16x4_t __a, uint16x4_t __b) {
775 return (uint16x4_t)(__a == __b); }
776__ai uint32x2_t vceq_u32(uint32x2_t __a, uint32x2_t __b) {
777 return (uint32x2_t)(__a == __b); }
778__ai uint8x8_t vceq_p8(poly8x8_t __a, poly8x8_t __b) {
779 return (uint8x8_t)(__a == __b); }
780__ai uint8x16_t vceqq_s8(int8x16_t __a, int8x16_t __b) {
781 return (uint8x16_t)(__a == __b); }
782__ai uint16x8_t vceqq_s16(int16x8_t __a, int16x8_t __b) {
783 return (uint16x8_t)(__a == __b); }
784__ai uint32x4_t vceqq_s32(int32x4_t __a, int32x4_t __b) {
785 return (uint32x4_t)(__a == __b); }
786__ai uint32x4_t vceqq_f32(float32x4_t __a, float32x4_t __b) {
787 return (uint32x4_t)(__a == __b); }
788__ai uint8x16_t vceqq_u8(uint8x16_t __a, uint8x16_t __b) {
789 return (uint8x16_t)(__a == __b); }
790__ai uint16x8_t vceqq_u16(uint16x8_t __a, uint16x8_t __b) {
791 return (uint16x8_t)(__a == __b); }
792__ai uint32x4_t vceqq_u32(uint32x4_t __a, uint32x4_t __b) {
793 return (uint32x4_t)(__a == __b); }
794__ai uint8x16_t vceqq_p8(poly8x16_t __a, poly8x16_t __b) {
795 return (uint8x16_t)(__a == __b); }
796
797__ai uint8x8_t vcge_s8(int8x8_t __a, int8x8_t __b) {
798 return (uint8x8_t)(__a >= __b); }
799__ai uint16x4_t vcge_s16(int16x4_t __a, int16x4_t __b) {
800 return (uint16x4_t)(__a >= __b); }
801__ai uint32x2_t vcge_s32(int32x2_t __a, int32x2_t __b) {
802 return (uint32x2_t)(__a >= __b); }
803__ai uint32x2_t vcge_f32(float32x2_t __a, float32x2_t __b) {
804 return (uint32x2_t)(__a >= __b); }
805__ai uint8x8_t vcge_u8(uint8x8_t __a, uint8x8_t __b) {
806 return (uint8x8_t)(__a >= __b); }
807__ai uint16x4_t vcge_u16(uint16x4_t __a, uint16x4_t __b) {
808 return (uint16x4_t)(__a >= __b); }
809__ai uint32x2_t vcge_u32(uint32x2_t __a, uint32x2_t __b) {
810 return (uint32x2_t)(__a >= __b); }
811__ai uint8x16_t vcgeq_s8(int8x16_t __a, int8x16_t __b) {
812 return (uint8x16_t)(__a >= __b); }
813__ai uint16x8_t vcgeq_s16(int16x8_t __a, int16x8_t __b) {
814 return (uint16x8_t)(__a >= __b); }
815__ai uint32x4_t vcgeq_s32(int32x4_t __a, int32x4_t __b) {
816 return (uint32x4_t)(__a >= __b); }
817__ai uint32x4_t vcgeq_f32(float32x4_t __a, float32x4_t __b) {
818 return (uint32x4_t)(__a >= __b); }
819__ai uint8x16_t vcgeq_u8(uint8x16_t __a, uint8x16_t __b) {
820 return (uint8x16_t)(__a >= __b); }
821__ai uint16x8_t vcgeq_u16(uint16x8_t __a, uint16x8_t __b) {
822 return (uint16x8_t)(__a >= __b); }
823__ai uint32x4_t vcgeq_u32(uint32x4_t __a, uint32x4_t __b) {
824 return (uint32x4_t)(__a >= __b); }
825
826__ai uint8x8_t vcgt_s8(int8x8_t __a, int8x8_t __b) {
827 return (uint8x8_t)(__a > __b); }
828__ai uint16x4_t vcgt_s16(int16x4_t __a, int16x4_t __b) {
829 return (uint16x4_t)(__a > __b); }
830__ai uint32x2_t vcgt_s32(int32x2_t __a, int32x2_t __b) {
831 return (uint32x2_t)(__a > __b); }
832__ai uint32x2_t vcgt_f32(float32x2_t __a, float32x2_t __b) {
833 return (uint32x2_t)(__a > __b); }
834__ai uint8x8_t vcgt_u8(uint8x8_t __a, uint8x8_t __b) {
835 return (uint8x8_t)(__a > __b); }
836__ai uint16x4_t vcgt_u16(uint16x4_t __a, uint16x4_t __b) {
837 return (uint16x4_t)(__a > __b); }
838__ai uint32x2_t vcgt_u32(uint32x2_t __a, uint32x2_t __b) {
839 return (uint32x2_t)(__a > __b); }
840__ai uint8x16_t vcgtq_s8(int8x16_t __a, int8x16_t __b) {
841 return (uint8x16_t)(__a > __b); }
842__ai uint16x8_t vcgtq_s16(int16x8_t __a, int16x8_t __b) {
843 return (uint16x8_t)(__a > __b); }
844__ai uint32x4_t vcgtq_s32(int32x4_t __a, int32x4_t __b) {
845 return (uint32x4_t)(__a > __b); }
846__ai uint32x4_t vcgtq_f32(float32x4_t __a, float32x4_t __b) {
847 return (uint32x4_t)(__a > __b); }
848__ai uint8x16_t vcgtq_u8(uint8x16_t __a, uint8x16_t __b) {
849 return (uint8x16_t)(__a > __b); }
850__ai uint16x8_t vcgtq_u16(uint16x8_t __a, uint16x8_t __b) {
851 return (uint16x8_t)(__a > __b); }
852__ai uint32x4_t vcgtq_u32(uint32x4_t __a, uint32x4_t __b) {
853 return (uint32x4_t)(__a > __b); }
854
855__ai uint8x8_t vcle_s8(int8x8_t __a, int8x8_t __b) {
856 return (uint8x8_t)(__a <= __b); }
857__ai uint16x4_t vcle_s16(int16x4_t __a, int16x4_t __b) {
858 return (uint16x4_t)(__a <= __b); }
859__ai uint32x2_t vcle_s32(int32x2_t __a, int32x2_t __b) {
860 return (uint32x2_t)(__a <= __b); }
861__ai uint32x2_t vcle_f32(float32x2_t __a, float32x2_t __b) {
862 return (uint32x2_t)(__a <= __b); }
863__ai uint8x8_t vcle_u8(uint8x8_t __a, uint8x8_t __b) {
864 return (uint8x8_t)(__a <= __b); }
865__ai uint16x4_t vcle_u16(uint16x4_t __a, uint16x4_t __b) {
866 return (uint16x4_t)(__a <= __b); }
867__ai uint32x2_t vcle_u32(uint32x2_t __a, uint32x2_t __b) {
868 return (uint32x2_t)(__a <= __b); }
869__ai uint8x16_t vcleq_s8(int8x16_t __a, int8x16_t __b) {
870 return (uint8x16_t)(__a <= __b); }
871__ai uint16x8_t vcleq_s16(int16x8_t __a, int16x8_t __b) {
872 return (uint16x8_t)(__a <= __b); }
873__ai uint32x4_t vcleq_s32(int32x4_t __a, int32x4_t __b) {
874 return (uint32x4_t)(__a <= __b); }
875__ai uint32x4_t vcleq_f32(float32x4_t __a, float32x4_t __b) {
876 return (uint32x4_t)(__a <= __b); }
877__ai uint8x16_t vcleq_u8(uint8x16_t __a, uint8x16_t __b) {
878 return (uint8x16_t)(__a <= __b); }
879__ai uint16x8_t vcleq_u16(uint16x8_t __a, uint16x8_t __b) {
880 return (uint16x8_t)(__a <= __b); }
881__ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) {
882 return (uint32x4_t)(__a <= __b); }
883
884__ai int8x8_t vcls_s8(int8x8_t __a) {
885 return (int8x8_t)__builtin_neon_vcls_v(__a, 0); }
886__ai int16x4_t vcls_s16(int16x4_t __a) {
887 return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); }
888__ai int32x2_t vcls_s32(int32x2_t __a) {
889 return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); }
890__ai int8x16_t vclsq_s8(int8x16_t __a) {
891 return (int8x16_t)__builtin_neon_vclsq_v(__a, 32); }
892__ai int16x8_t vclsq_s16(int16x8_t __a) {
893 return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 33); }
894__ai int32x4_t vclsq_s32(int32x4_t __a) {
895 return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 34); }
896
897__ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) {
898 return (uint8x8_t)(__a < __b); }
899__ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) {
900 return (uint16x4_t)(__a < __b); }
901__ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) {
902 return (uint32x2_t)(__a < __b); }
903__ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) {
904 return (uint32x2_t)(__a < __b); }
905__ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) {
906 return (uint8x8_t)(__a < __b); }
907__ai uint16x4_t vclt_u16(uint16x4_t __a, uint16x4_t __b) {
908 return (uint16x4_t)(__a < __b); }
909__ai uint32x2_t vclt_u32(uint32x2_t __a, uint32x2_t __b) {
910 return (uint32x2_t)(__a < __b); }
911__ai uint8x16_t vcltq_s8(int8x16_t __a, int8x16_t __b) {
912 return (uint8x16_t)(__a < __b); }
913__ai uint16x8_t vcltq_s16(int16x8_t __a, int16x8_t __b) {
914 return (uint16x8_t)(__a < __b); }
915__ai uint32x4_t vcltq_s32(int32x4_t __a, int32x4_t __b) {
916 return (uint32x4_t)(__a < __b); }
917__ai uint32x4_t vcltq_f32(float32x4_t __a, float32x4_t __b) {
918 return (uint32x4_t)(__a < __b); }
919__ai uint8x16_t vcltq_u8(uint8x16_t __a, uint8x16_t __b) {
920 return (uint8x16_t)(__a < __b); }
921__ai uint16x8_t vcltq_u16(uint16x8_t __a, uint16x8_t __b) {
922 return (uint16x8_t)(__a < __b); }
923__ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) {
924 return (uint32x4_t)(__a < __b); }
925
926__ai int8x8_t vclz_s8(int8x8_t __a) {
927 return (int8x8_t)__builtin_neon_vclz_v(__a, 0); }
928__ai int16x4_t vclz_s16(int16x4_t __a) {
929 return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); }
930__ai int32x2_t vclz_s32(int32x2_t __a) {
931 return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); }
932__ai uint8x8_t vclz_u8(uint8x8_t __a) {
933 return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 16); }
934__ai uint16x4_t vclz_u16(uint16x4_t __a) {
935 return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 17); }
936__ai uint32x2_t vclz_u32(uint32x2_t __a) {
937 return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 18); }
938__ai int8x16_t vclzq_s8(int8x16_t __a) {
939 return (int8x16_t)__builtin_neon_vclzq_v(__a, 32); }
940__ai int16x8_t vclzq_s16(int16x8_t __a) {
941 return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 33); }
942__ai int32x4_t vclzq_s32(int32x4_t __a) {
943 return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 34); }
944__ai uint8x16_t vclzq_u8(uint8x16_t __a) {
945 return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 48); }
946__ai uint16x8_t vclzq_u16(uint16x8_t __a) {
947 return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 49); }
948__ai uint32x4_t vclzq_u32(uint32x4_t __a) {
949 return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 50); }
950
951__ai uint8x8_t vcnt_u8(uint8x8_t __a) {
952 return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 16); }
953__ai int8x8_t vcnt_s8(int8x8_t __a) {
954 return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); }
955__ai poly8x8_t vcnt_p8(poly8x8_t __a) {
956 return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 4); }
957__ai uint8x16_t vcntq_u8(uint8x16_t __a) {
958 return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 48); }
959__ai int8x16_t vcntq_s8(int8x16_t __a) {
960 return (int8x16_t)__builtin_neon_vcntq_v(__a, 32); }
961__ai poly8x16_t vcntq_p8(poly8x16_t __a) {
962 return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 36); }
963
964__ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) {
965 return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
966__ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) {
967 return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
968__ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) {
969 return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
970__ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) {
971 return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
972__ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) {
973 return (float16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
974__ai float32x4_t vcombine_f32(float32x2_t __a, float32x2_t __b) {
975 return (float32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
976__ai uint8x16_t vcombine_u8(uint8x8_t __a, uint8x8_t __b) {
977 return (uint8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
978__ai uint16x8_t vcombine_u16(uint16x4_t __a, uint16x4_t __b) {
979 return (uint16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
980__ai uint32x4_t vcombine_u32(uint32x2_t __a, uint32x2_t __b) {
981 return (uint32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
982__ai uint64x2_t vcombine_u64(uint64x1_t __a, uint64x1_t __b) {
983 return (uint64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
984__ai poly8x16_t vcombine_p8(poly8x8_t __a, poly8x8_t __b) {
985 return (poly8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
986__ai poly16x8_t vcombine_p16(poly16x4_t __a, poly16x4_t __b) {
987 return (poly16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
988
989__ai int8x8_t vcreate_s8(uint64_t __a) {
990 return (int8x8_t)__a; }
991__ai int16x4_t vcreate_s16(uint64_t __a) {
992 return (int16x4_t)__a; }
993__ai int32x2_t vcreate_s32(uint64_t __a) {
994 return (int32x2_t)__a; }
995__ai float16x4_t vcreate_f16(uint64_t __a) {
996 return (float16x4_t)__a; }
997__ai float32x2_t vcreate_f32(uint64_t __a) {
998 return (float32x2_t)__a; }
999__ai uint8x8_t vcreate_u8(uint64_t __a) {
1000 return (uint8x8_t)__a; }
1001__ai uint16x4_t vcreate_u16(uint64_t __a) {
1002 return (uint16x4_t)__a; }
1003__ai uint32x2_t vcreate_u32(uint64_t __a) {
1004 return (uint32x2_t)__a; }
1005__ai uint64x1_t vcreate_u64(uint64_t __a) {
1006 return (uint64x1_t)__a; }
1007__ai poly8x8_t vcreate_p8(uint64_t __a) {
1008 return (poly8x8_t)__a; }
1009__ai poly16x4_t vcreate_p16(uint64_t __a) {
1010 return (poly16x4_t)__a; }
1011__ai int64x1_t vcreate_s64(uint64_t __a) {
1012 return (int64x1_t)__a; }
1013
1014__ai float16x4_t vcvt_f16_f32(float32x4_t __a) {
1015 return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 7); }
1016
1017__ai float32x2_t vcvt_f32_s32(int32x2_t __a) {
1018 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); }
1019__ai float32x2_t vcvt_f32_u32(uint32x2_t __a) {
1020 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 18); }
1021__ai float32x4_t vcvtq_f32_s32(int32x4_t __a) {
1022 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 34); }
1023__ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) {
1024 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 50); }
1025
1026__ai float32x4_t vcvt_f32_f16(float16x4_t __a) {
1027 return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 7); }
1028
1029#define vcvt_n_f32_s32(a, __b) __extension__ ({ \
1030 int32x2_t __a = (a); \
1031 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); })
1032#define vcvt_n_f32_u32(a, __b) __extension__ ({ \
1033 uint32x2_t __a = (a); \
1034 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 18); })
1035#define vcvtq_n_f32_s32(a, __b) __extension__ ({ \
1036 int32x4_t __a = (a); \
1037 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 34); })
1038#define vcvtq_n_f32_u32(a, __b) __extension__ ({ \
1039 uint32x4_t __a = (a); \
1040 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 50); })
1041
1042#define vcvt_n_s32_f32(a, __b) __extension__ ({ \
1043 float32x2_t __a = (a); \
1044 (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); })
1045#define vcvtq_n_s32_f32(a, __b) __extension__ ({ \
1046 float32x4_t __a = (a); \
1047 (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 34); })
1048
1049#define vcvt_n_u32_f32(a, __b) __extension__ ({ \
1050 float32x2_t __a = (a); \
1051 (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 18); })
1052#define vcvtq_n_u32_f32(a, __b) __extension__ ({ \
1053 float32x4_t __a = (a); \
1054 (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 50); })
1055
1056__ai int32x2_t vcvt_s32_f32(float32x2_t __a) {
1057 return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); }
1058__ai int32x4_t vcvtq_s32_f32(float32x4_t __a) {
1059 return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 34); }
1060
1061__ai uint32x2_t vcvt_u32_f32(float32x2_t __a) {
1062 return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 18); }
1063__ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) {
1064 return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 50); }
1065
1066#define vdup_lane_u8(a, __b) __extension__ ({ \
1067 uint8x8_t __a = (a); \
1068 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1069#define vdup_lane_u16(a, __b) __extension__ ({ \
1070 uint16x4_t __a = (a); \
1071 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1072#define vdup_lane_u32(a, __b) __extension__ ({ \
1073 uint32x2_t __a = (a); \
1074 __builtin_shufflevector(__a, __a, __b, __b); })
1075#define vdup_lane_s8(a, __b) __extension__ ({ \
1076 int8x8_t __a = (a); \
1077 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1078#define vdup_lane_s16(a, __b) __extension__ ({ \
1079 int16x4_t __a = (a); \
1080 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1081#define vdup_lane_s32(a, __b) __extension__ ({ \
1082 int32x2_t __a = (a); \
1083 __builtin_shufflevector(__a, __a, __b, __b); })
1084#define vdup_lane_p8(a, __b) __extension__ ({ \
1085 poly8x8_t __a = (a); \
1086 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1087#define vdup_lane_p16(a, __b) __extension__ ({ \
1088 poly16x4_t __a = (a); \
1089 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1090#define vdup_lane_f32(a, __b) __extension__ ({ \
1091 float32x2_t __a = (a); \
1092 __builtin_shufflevector(__a, __a, __b, __b); })
1093#define vdupq_lane_u8(a, __b) __extension__ ({ \
1094 uint8x8_t __a = (a); \
1095 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
1096#define vdupq_lane_u16(a, __b) __extension__ ({ \
1097 uint16x4_t __a = (a); \
1098 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1099#define vdupq_lane_u32(a, __b) __extension__ ({ \
1100 uint32x2_t __a = (a); \
1101 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1102#define vdupq_lane_s8(a, __b) __extension__ ({ \
1103 int8x8_t __a = (a); \
1104 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
1105#define vdupq_lane_s16(a, __b) __extension__ ({ \
1106 int16x4_t __a = (a); \
1107 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1108#define vdupq_lane_s32(a, __b) __extension__ ({ \
1109 int32x2_t __a = (a); \
1110 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1111#define vdupq_lane_p8(a, __b) __extension__ ({ \
1112 poly8x8_t __a = (a); \
1113 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
1114#define vdupq_lane_p16(a, __b) __extension__ ({ \
1115 poly16x4_t __a = (a); \
1116 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1117#define vdupq_lane_f32(a, __b) __extension__ ({ \
1118 float32x2_t __a = (a); \
1119 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1120#define vdup_lane_s64(a, __b) __extension__ ({ \
1121 int64x1_t __a = (a); \
1122 __builtin_shufflevector(__a, __a, __b); })
1123#define vdup_lane_u64(a, __b) __extension__ ({ \
1124 uint64x1_t __a = (a); \
1125 __builtin_shufflevector(__a, __a, __b); })
1126#define vdupq_lane_s64(a, __b) __extension__ ({ \
1127 int64x1_t __a = (a); \
1128 __builtin_shufflevector(__a, __a, __b, __b); })
1129#define vdupq_lane_u64(a, __b) __extension__ ({ \
1130 uint64x1_t __a = (a); \
1131 __builtin_shufflevector(__a, __a, __b, __b); })
1132
1133__ai uint8x8_t vdup_n_u8(uint8_t __a) {
1134 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1135__ai uint16x4_t vdup_n_u16(uint16_t __a) {
1136 return (uint16x4_t){ __a, __a, __a, __a }; }
1137__ai uint32x2_t vdup_n_u32(uint32_t __a) {
1138 return (uint32x2_t){ __a, __a }; }
1139__ai int8x8_t vdup_n_s8(int8_t __a) {
1140 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1141__ai int16x4_t vdup_n_s16(int16_t __a) {
1142 return (int16x4_t){ __a, __a, __a, __a }; }
1143__ai int32x2_t vdup_n_s32(int32_t __a) {
1144 return (int32x2_t){ __a, __a }; }
1145__ai poly8x8_t vdup_n_p8(poly8_t __a) {
1146 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1147__ai poly16x4_t vdup_n_p16(poly16_t __a) {
1148 return (poly16x4_t){ __a, __a, __a, __a }; }
1149__ai float32x2_t vdup_n_f32(float32_t __a) {
1150 return (float32x2_t){ __a, __a }; }
1151__ai uint8x16_t vdupq_n_u8(uint8_t __a) {
1152 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; }
1153__ai uint16x8_t vdupq_n_u16(uint16_t __a) {
1154 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1155__ai uint32x4_t vdupq_n_u32(uint32_t __a) {
1156 return (uint32x4_t){ __a, __a, __a, __a }; }
1157__ai int8x16_t vdupq_n_s8(int8_t __a) {
1158 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; }
1159__ai int16x8_t vdupq_n_s16(int16_t __a) {
1160 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1161__ai int32x4_t vdupq_n_s32(int32_t __a) {
1162 return (int32x4_t){ __a, __a, __a, __a }; }
1163__ai poly8x16_t vdupq_n_p8(poly8_t __a) {
1164 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; }
1165__ai poly16x8_t vdupq_n_p16(poly16_t __a) {
1166 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1167__ai float32x4_t vdupq_n_f32(float32_t __a) {
1168 return (float32x4_t){ __a, __a, __a, __a }; }
1169__ai int64x1_t vdup_n_s64(int64_t __a) {
1170 return (int64x1_t){ __a }; }
1171__ai uint64x1_t vdup_n_u64(uint64_t __a) {
1172 return (uint64x1_t){ __a }; }
1173__ai int64x2_t vdupq_n_s64(int64_t __a) {
1174 return (int64x2_t){ __a, __a }; }
1175__ai uint64x2_t vdupq_n_u64(uint64_t __a) {
1176 return (uint64x2_t){ __a, __a }; }
1177
1178__ai int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) {
1179 return __a ^ __b; }
1180__ai int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) {
1181 return __a ^ __b; }
1182__ai int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) {
1183 return __a ^ __b; }
1184__ai int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) {
1185 return __a ^ __b; }
1186__ai uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) {
1187 return __a ^ __b; }
1188__ai uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) {
1189 return __a ^ __b; }
1190__ai uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) {
1191 return __a ^ __b; }
1192__ai uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) {
1193 return __a ^ __b; }
1194__ai int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) {
1195 return __a ^ __b; }
1196__ai int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) {
1197 return __a ^ __b; }
1198__ai int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) {
1199 return __a ^ __b; }
1200__ai int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) {
1201 return __a ^ __b; }
1202__ai uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) {
1203 return __a ^ __b; }
1204__ai uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) {
1205 return __a ^ __b; }
1206__ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) {
1207 return __a ^ __b; }
1208__ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) {
1209 return __a ^ __b; }
1210
1211#define vext_s8(a, b, __c) __extension__ ({ \
1212 int8x8_t __a = (a); int8x8_t __b = (b); \
1213 (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); })
1214#define vext_u8(a, b, __c) __extension__ ({ \
1215 uint8x8_t __a = (a); uint8x8_t __b = (b); \
1216 (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
1217#define vext_p8(a, b, __c) __extension__ ({ \
1218 poly8x8_t __a = (a); poly8x8_t __b = (b); \
1219 (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
1220#define vext_s16(a, b, __c) __extension__ ({ \
1221 int16x4_t __a = (a); int16x4_t __b = (b); \
1222 (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
1223#define vext_u16(a, b, __c) __extension__ ({ \
1224 uint16x4_t __a = (a); uint16x4_t __b = (b); \
1225 (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
1226#define vext_p16(a, b, __c) __extension__ ({ \
1227 poly16x4_t __a = (a); poly16x4_t __b = (b); \
1228 (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
1229#define vext_s32(a, b, __c) __extension__ ({ \
1230 int32x2_t __a = (a); int32x2_t __b = (b); \
1231 (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
1232#define vext_u32(a, b, __c) __extension__ ({ \
1233 uint32x2_t __a = (a); uint32x2_t __b = (b); \
1234 (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
1235#define vext_s64(a, b, __c) __extension__ ({ \
1236 int64x1_t __a = (a); int64x1_t __b = (b); \
1237 (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
1238#define vext_u64(a, b, __c) __extension__ ({ \
1239 uint64x1_t __a = (a); uint64x1_t __b = (b); \
1240 (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
1241#define vext_f32(a, b, __c) __extension__ ({ \
1242 float32x2_t __a = (a); float32x2_t __b = (b); \
1243 (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); })
1244#define vextq_s8(a, b, __c) __extension__ ({ \
1245 int8x16_t __a = (a); int8x16_t __b = (b); \
1246 (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 32); })
1247#define vextq_u8(a, b, __c) __extension__ ({ \
1248 uint8x16_t __a = (a); uint8x16_t __b = (b); \
1249 (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
1250#define vextq_p8(a, b, __c) __extension__ ({ \
1251 poly8x16_t __a = (a); poly8x16_t __b = (b); \
1252 (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
1253#define vextq_s16(a, b, __c) __extension__ ({ \
1254 int16x8_t __a = (a); int16x8_t __b = (b); \
1255 (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
1256#define vextq_u16(a, b, __c) __extension__ ({ \
1257 uint16x8_t __a = (a); uint16x8_t __b = (b); \
1258 (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
1259#define vextq_p16(a, b, __c) __extension__ ({ \
1260 poly16x8_t __a = (a); poly16x8_t __b = (b); \
1261 (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
1262#define vextq_s32(a, b, __c) __extension__ ({ \
1263 int32x4_t __a = (a); int32x4_t __b = (b); \
1264 (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
1265#define vextq_u32(a, b, __c) __extension__ ({ \
1266 uint32x4_t __a = (a); uint32x4_t __b = (b); \
1267 (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
1268#define vextq_s64(a, b, __c) __extension__ ({ \
1269 int64x2_t __a = (a); int64x2_t __b = (b); \
1270 (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
1271#define vextq_u64(a, b, __c) __extension__ ({ \
1272 uint64x2_t __a = (a); uint64x2_t __b = (b); \
1273 (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
1274#define vextq_f32(a, b, __c) __extension__ ({ \
1275 float32x4_t __a = (a); float32x4_t __b = (b); \
1276 (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 40); })
1277
1278__ai float32x2_t vfma_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
1279 return (float32x2_t)__builtin_neon_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 8); }
1280__ai float32x4_t vfmaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
1281 return (float32x4_t)__builtin_neon_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); }
1282
1283__ai int8x8_t vget_high_s8(int8x16_t __a) {
1284 return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); }
1285__ai int16x4_t vget_high_s16(int16x8_t __a) {
1286 return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); }
1287__ai int32x2_t vget_high_s32(int32x4_t __a) {
1288 return __builtin_shufflevector(__a, __a, 2, 3); }
1289__ai int64x1_t vget_high_s64(int64x2_t __a) {
1290 return __builtin_shufflevector(__a, __a, 1); }
1291__ai float16x4_t vget_high_f16(float16x8_t __a) {
1292 return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); }
1293__ai float32x2_t vget_high_f32(float32x4_t __a) {
1294 return __builtin_shufflevector(__a, __a, 2, 3); }
1295__ai uint8x8_t vget_high_u8(uint8x16_t __a) {
1296 return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); }
1297__ai uint16x4_t vget_high_u16(uint16x8_t __a) {
1298 return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); }
1299__ai uint32x2_t vget_high_u32(uint32x4_t __a) {
1300 return __builtin_shufflevector(__a, __a, 2, 3); }
1301__ai uint64x1_t vget_high_u64(uint64x2_t __a) {
1302 return __builtin_shufflevector(__a, __a, 1); }
1303__ai poly8x8_t vget_high_p8(poly8x16_t __a) {
1304 return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); }
1305__ai poly16x4_t vget_high_p16(poly16x8_t __a) {
1306 return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); }
1307
1308#define vget_lane_u8(a, __b) __extension__ ({ \
1309 uint8x8_t __a = (a); \
1310 (uint8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); })
1311#define vget_lane_u16(a, __b) __extension__ ({ \
1312 uint16x4_t __a = (a); \
1313 (uint16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); })
1314#define vget_lane_u32(a, __b) __extension__ ({ \
1315 uint32x2_t __a = (a); \
1316 (uint32_t)__builtin_neon_vget_lane_i32((int32x2_t)__a, __b); })
1317#define vget_lane_s8(a, __b) __extension__ ({ \
1318 int8x8_t __a = (a); \
1319 (int8_t)__builtin_neon_vget_lane_i8(__a, __b); })
1320#define vget_lane_s16(a, __b) __extension__ ({ \
1321 int16x4_t __a = (a); \
1322 (int16_t)__builtin_neon_vget_lane_i16(__a, __b); })
1323#define vget_lane_s32(a, __b) __extension__ ({ \
1324 int32x2_t __a = (a); \
1325 (int32_t)__builtin_neon_vget_lane_i32(__a, __b); })
1326#define vget_lane_p8(a, __b) __extension__ ({ \
1327 poly8x8_t __a = (a); \
1328 (poly8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); })
1329#define vget_lane_p16(a, __b) __extension__ ({ \
1330 poly16x4_t __a = (a); \
1331 (poly16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); })
1332#define vget_lane_f32(a, __b) __extension__ ({ \
1333 float32x2_t __a = (a); \
1334 (float32_t)__builtin_neon_vget_lane_f32(__a, __b); })
1335#define vgetq_lane_u8(a, __b) __extension__ ({ \
1336 uint8x16_t __a = (a); \
1337 (uint8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); })
1338#define vgetq_lane_u16(a, __b) __extension__ ({ \
1339 uint16x8_t __a = (a); \
1340 (uint16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); })
1341#define vgetq_lane_u32(a, __b) __extension__ ({ \
1342 uint32x4_t __a = (a); \
1343 (uint32_t)__builtin_neon_vgetq_lane_i32((int32x4_t)__a, __b); })
1344#define vgetq_lane_s8(a, __b) __extension__ ({ \
1345 int8x16_t __a = (a); \
1346 (int8_t)__builtin_neon_vgetq_lane_i8(__a, __b); })
1347#define vgetq_lane_s16(a, __b) __extension__ ({ \
1348 int16x8_t __a = (a); \
1349 (int16_t)__builtin_neon_vgetq_lane_i16(__a, __b); })
1350#define vgetq_lane_s32(a, __b) __extension__ ({ \
1351 int32x4_t __a = (a); \
1352 (int32_t)__builtin_neon_vgetq_lane_i32(__a, __b); })
1353#define vgetq_lane_p8(a, __b) __extension__ ({ \
1354 poly8x16_t __a = (a); \
1355 (poly8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); })
1356#define vgetq_lane_p16(a, __b) __extension__ ({ \
1357 poly16x8_t __a = (a); \
1358 (poly16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); })
1359#define vgetq_lane_f32(a, __b) __extension__ ({ \
1360 float32x4_t __a = (a); \
1361 (float32_t)__builtin_neon_vgetq_lane_f32(__a, __b); })
1362#define vget_lane_s64(a, __b) __extension__ ({ \
1363 int64x1_t __a = (a); \
1364 (int64_t)__builtin_neon_vget_lane_i64(__a, __b); })
1365#define vget_lane_u64(a, __b) __extension__ ({ \
1366 uint64x1_t __a = (a); \
1367 (uint64_t)__builtin_neon_vget_lane_i64((int64x1_t)__a, __b); })
1368#define vgetq_lane_s64(a, __b) __extension__ ({ \
1369 int64x2_t __a = (a); \
1370 (int64_t)__builtin_neon_vgetq_lane_i64(__a, __b); })
1371#define vgetq_lane_u64(a, __b) __extension__ ({ \
1372 uint64x2_t __a = (a); \
1373 (uint64_t)__builtin_neon_vgetq_lane_i64((int64x2_t)__a, __b); })
1374
1375__ai int8x8_t vget_low_s8(int8x16_t __a) {
1376 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); }
1377__ai int16x4_t vget_low_s16(int16x8_t __a) {
1378 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); }
1379__ai int32x2_t vget_low_s32(int32x4_t __a) {
1380 return __builtin_shufflevector(__a, __a, 0, 1); }
1381__ai int64x1_t vget_low_s64(int64x2_t __a) {
1382 return __builtin_shufflevector(__a, __a, 0); }
1383__ai float16x4_t vget_low_f16(float16x8_t __a) {
1384 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); }
1385__ai float32x2_t vget_low_f32(float32x4_t __a) {
1386 return __builtin_shufflevector(__a, __a, 0, 1); }
1387__ai uint8x8_t vget_low_u8(uint8x16_t __a) {
1388 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); }
1389__ai uint16x4_t vget_low_u16(uint16x8_t __a) {
1390 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); }
1391__ai uint32x2_t vget_low_u32(uint32x4_t __a) {
1392 return __builtin_shufflevector(__a, __a, 0, 1); }
1393__ai uint64x1_t vget_low_u64(uint64x2_t __a) {
1394 return __builtin_shufflevector(__a, __a, 0); }
1395__ai poly8x8_t vget_low_p8(poly8x16_t __a) {
1396 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); }
1397__ai poly16x4_t vget_low_p16(poly16x8_t __a) {
1398 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); }
1399
1400__ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) {
1401 return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); }
1402__ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) {
1403 return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1404__ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) {
1405 return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1406__ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) {
1407 return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1408__ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) {
1409 return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1410__ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) {
1411 return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1412__ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) {
1413 return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 32); }
1414__ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) {
1415 return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1416__ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) {
1417 return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1418__ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) {
1419 return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
1420__ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) {
1421 return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
1422__ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) {
1423 return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
1424
1425__ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) {
1426 return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); }
1427__ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) {
1428 return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1429__ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) {
1430 return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1431__ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) {
1432 return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1433__ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) {
1434 return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1435__ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) {
1436 return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1437__ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) {
1438 return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 32); }
1439__ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) {
1440 return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1441__ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) {
1442 return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1443__ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) {
1444 return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
1445__ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) {
1446 return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
1447__ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) {
1448 return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
1449
1450#define vld1q_u8(__a) __extension__ ({ \
1451 (uint8x16_t)__builtin_neon_vld1q_v(__a, 48); })
1452#define vld1q_u16(__a) __extension__ ({ \
1453 (uint16x8_t)__builtin_neon_vld1q_v(__a, 49); })
1454#define vld1q_u32(__a) __extension__ ({ \
1455 (uint32x4_t)__builtin_neon_vld1q_v(__a, 50); })
1456#define vld1q_u64(__a) __extension__ ({ \
1457 (uint64x2_t)__builtin_neon_vld1q_v(__a, 51); })
1458#define vld1q_s8(__a) __extension__ ({ \
1459 (int8x16_t)__builtin_neon_vld1q_v(__a, 32); })
1460#define vld1q_s16(__a) __extension__ ({ \
1461 (int16x8_t)__builtin_neon_vld1q_v(__a, 33); })
1462#define vld1q_s32(__a) __extension__ ({ \
1463 (int32x4_t)__builtin_neon_vld1q_v(__a, 34); })
1464#define vld1q_s64(__a) __extension__ ({ \
1465 (int64x2_t)__builtin_neon_vld1q_v(__a, 35); })
1466#define vld1q_f16(__a) __extension__ ({ \
1467 (float16x8_t)__builtin_neon_vld1q_v(__a, 39); })
1468#define vld1q_f32(__a) __extension__ ({ \
1469 (float32x4_t)__builtin_neon_vld1q_v(__a, 40); })
1470#define vld1q_p8(__a) __extension__ ({ \
1471 (poly8x16_t)__builtin_neon_vld1q_v(__a, 36); })
1472#define vld1q_p16(__a) __extension__ ({ \
1473 (poly16x8_t)__builtin_neon_vld1q_v(__a, 37); })
1474#define vld1_u8(__a) __extension__ ({ \
1475 (uint8x8_t)__builtin_neon_vld1_v(__a, 16); })
1476#define vld1_u16(__a) __extension__ ({ \
1477 (uint16x4_t)__builtin_neon_vld1_v(__a, 17); })
1478#define vld1_u32(__a) __extension__ ({ \
1479 (uint32x2_t)__builtin_neon_vld1_v(__a, 18); })
1480#define vld1_u64(__a) __extension__ ({ \
1481 (uint64x1_t)__builtin_neon_vld1_v(__a, 19); })
1482#define vld1_s8(__a) __extension__ ({ \
1483 (int8x8_t)__builtin_neon_vld1_v(__a, 0); })
1484#define vld1_s16(__a) __extension__ ({ \
1485 (int16x4_t)__builtin_neon_vld1_v(__a, 1); })
1486#define vld1_s32(__a) __extension__ ({ \
1487 (int32x2_t)__builtin_neon_vld1_v(__a, 2); })
1488#define vld1_s64(__a) __extension__ ({ \
1489 (int64x1_t)__builtin_neon_vld1_v(__a, 3); })
1490#define vld1_f16(__a) __extension__ ({ \
1491 (float16x4_t)__builtin_neon_vld1_v(__a, 7); })
1492#define vld1_f32(__a) __extension__ ({ \
1493 (float32x2_t)__builtin_neon_vld1_v(__a, 8); })
1494#define vld1_p8(__a) __extension__ ({ \
1495 (poly8x8_t)__builtin_neon_vld1_v(__a, 4); })
1496#define vld1_p16(__a) __extension__ ({ \
1497 (poly16x4_t)__builtin_neon_vld1_v(__a, 5); })
1498
1499#define vld1q_dup_u8(__a) __extension__ ({ \
1500 (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 48); })
1501#define vld1q_dup_u16(__a) __extension__ ({ \
1502 (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 49); })
1503#define vld1q_dup_u32(__a) __extension__ ({ \
1504 (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 50); })
1505#define vld1q_dup_u64(__a) __extension__ ({ \
1506 (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 51); })
1507#define vld1q_dup_s8(__a) __extension__ ({ \
1508 (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 32); })
1509#define vld1q_dup_s16(__a) __extension__ ({ \
1510 (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 33); })
1511#define vld1q_dup_s32(__a) __extension__ ({ \
1512 (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 34); })
1513#define vld1q_dup_s64(__a) __extension__ ({ \
1514 (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 35); })
1515#define vld1q_dup_f16(__a) __extension__ ({ \
1516 (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 39); })
1517#define vld1q_dup_f32(__a) __extension__ ({ \
1518 (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 40); })
1519#define vld1q_dup_p8(__a) __extension__ ({ \
1520 (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 36); })
1521#define vld1q_dup_p16(__a) __extension__ ({ \
1522 (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 37); })
1523#define vld1_dup_u8(__a) __extension__ ({ \
1524 (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 16); })
1525#define vld1_dup_u16(__a) __extension__ ({ \
1526 (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 17); })
1527#define vld1_dup_u32(__a) __extension__ ({ \
1528 (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 18); })
1529#define vld1_dup_u64(__a) __extension__ ({ \
1530 (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 19); })
1531#define vld1_dup_s8(__a) __extension__ ({ \
1532 (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); })
1533#define vld1_dup_s16(__a) __extension__ ({ \
1534 (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); })
1535#define vld1_dup_s32(__a) __extension__ ({ \
1536 (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); })
1537#define vld1_dup_s64(__a) __extension__ ({ \
1538 (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); })
1539#define vld1_dup_f16(__a) __extension__ ({ \
1540 (float16x4_t)__builtin_neon_vld1_dup_v(__a, 7); })
1541#define vld1_dup_f32(__a) __extension__ ({ \
1542 (float32x2_t)__builtin_neon_vld1_dup_v(__a, 8); })
1543#define vld1_dup_p8(__a) __extension__ ({ \
1544 (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 4); })
1545#define vld1_dup_p16(__a) __extension__ ({ \
1546 (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 5); })
1547
1548#define vld1q_lane_u8(__a, b, __c) __extension__ ({ \
1549 uint8x16_t __b = (b); \
1550 (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 48); })
1551#define vld1q_lane_u16(__a, b, __c) __extension__ ({ \
1552 uint16x8_t __b = (b); \
1553 (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 49); })
1554#define vld1q_lane_u32(__a, b, __c) __extension__ ({ \
1555 uint32x4_t __b = (b); \
1556 (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 50); })
1557#define vld1q_lane_u64(__a, b, __c) __extension__ ({ \
1558 uint64x2_t __b = (b); \
1559 (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 51); })
1560#define vld1q_lane_s8(__a, b, __c) __extension__ ({ \
1561 int8x16_t __b = (b); \
1562 (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 32); })
1563#define vld1q_lane_s16(__a, b, __c) __extension__ ({ \
1564 int16x8_t __b = (b); \
1565 (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 33); })
1566#define vld1q_lane_s32(__a, b, __c) __extension__ ({ \
1567 int32x4_t __b = (b); \
1568 (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 34); })
1569#define vld1q_lane_s64(__a, b, __c) __extension__ ({ \
1570 int64x2_t __b = (b); \
1571 (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 35); })
1572#define vld1q_lane_f16(__a, b, __c) __extension__ ({ \
1573 float16x8_t __b = (b); \
1574 (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 39); })
1575#define vld1q_lane_f32(__a, b, __c) __extension__ ({ \
1576 float32x4_t __b = (b); \
1577 (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 40); })
1578#define vld1q_lane_p8(__a, b, __c) __extension__ ({ \
1579 poly8x16_t __b = (b); \
1580 (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 36); })
1581#define vld1q_lane_p16(__a, b, __c) __extension__ ({ \
1582 poly16x8_t __b = (b); \
1583 (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 37); })
1584#define vld1_lane_u8(__a, b, __c) __extension__ ({ \
1585 uint8x8_t __b = (b); \
1586 (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 16); })
1587#define vld1_lane_u16(__a, b, __c) __extension__ ({ \
1588 uint16x4_t __b = (b); \
1589 (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 17); })
1590#define vld1_lane_u32(__a, b, __c) __extension__ ({ \
1591 uint32x2_t __b = (b); \
1592 (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 18); })
1593#define vld1_lane_u64(__a, b, __c) __extension__ ({ \
1594 uint64x1_t __b = (b); \
1595 (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 19); })
1596#define vld1_lane_s8(__a, b, __c) __extension__ ({ \
1597 int8x8_t __b = (b); \
1598 (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); })
1599#define vld1_lane_s16(__a, b, __c) __extension__ ({ \
1600 int16x4_t __b = (b); \
1601 (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); })
1602#define vld1_lane_s32(__a, b, __c) __extension__ ({ \
1603 int32x2_t __b = (b); \
1604 (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); })
1605#define vld1_lane_s64(__a, b, __c) __extension__ ({ \
1606 int64x1_t __b = (b); \
1607 (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); })
1608#define vld1_lane_f16(__a, b, __c) __extension__ ({ \
1609 float16x4_t __b = (b); \
1610 (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); })
1611#define vld1_lane_f32(__a, b, __c) __extension__ ({ \
1612 float32x2_t __b = (b); \
1613 (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 8); })
1614#define vld1_lane_p8(__a, b, __c) __extension__ ({ \
1615 poly8x8_t __b = (b); \
1616 (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); })
1617#define vld1_lane_p16(__a, b, __c) __extension__ ({ \
1618 poly16x4_t __b = (b); \
1619 (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); })
1620
1621#define vld2q_u8(__a) __extension__ ({ \
1622 uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 48); r; })
1623#define vld2q_u16(__a) __extension__ ({ \
1624 uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 49); r; })
1625#define vld2q_u32(__a) __extension__ ({ \
1626 uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 50); r; })
1627#define vld2q_s8(__a) __extension__ ({ \
1628 int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 32); r; })
1629#define vld2q_s16(__a) __extension__ ({ \
1630 int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 33); r; })
1631#define vld2q_s32(__a) __extension__ ({ \
1632 int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 34); r; })
1633#define vld2q_f16(__a) __extension__ ({ \
1634 float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 39); r; })
1635#define vld2q_f32(__a) __extension__ ({ \
1636 float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 40); r; })
1637#define vld2q_p8(__a) __extension__ ({ \
1638 poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 36); r; })
1639#define vld2q_p16(__a) __extension__ ({ \
1640 poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 37); r; })
1641#define vld2_u8(__a) __extension__ ({ \
1642 uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 16); r; })
1643#define vld2_u16(__a) __extension__ ({ \
1644 uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 17); r; })
1645#define vld2_u32(__a) __extension__ ({ \
1646 uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 18); r; })
1647#define vld2_u64(__a) __extension__ ({ \
1648 uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 19); r; })
1649#define vld2_s8(__a) __extension__ ({ \
1650 int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; })
1651#define vld2_s16(__a) __extension__ ({ \
1652 int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; })
1653#define vld2_s32(__a) __extension__ ({ \
1654 int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; })
1655#define vld2_s64(__a) __extension__ ({ \
1656 int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; })
1657#define vld2_f16(__a) __extension__ ({ \
1658 float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; })
1659#define vld2_f32(__a) __extension__ ({ \
1660 float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 8); r; })
1661#define vld2_p8(__a) __extension__ ({ \
1662 poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; })
1663#define vld2_p16(__a) __extension__ ({ \
1664 poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; })
1665
1666#define vld2_dup_u8(__a) __extension__ ({ \
1667 uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 16); r; })
1668#define vld2_dup_u16(__a) __extension__ ({ \
1669 uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 17); r; })
1670#define vld2_dup_u32(__a) __extension__ ({ \
1671 uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 18); r; })
1672#define vld2_dup_u64(__a) __extension__ ({ \
1673 uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 19); r; })
1674#define vld2_dup_s8(__a) __extension__ ({ \
1675 int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; })
1676#define vld2_dup_s16(__a) __extension__ ({ \
1677 int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; })
1678#define vld2_dup_s32(__a) __extension__ ({ \
1679 int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; })
1680#define vld2_dup_s64(__a) __extension__ ({ \
1681 int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; })
1682#define vld2_dup_f16(__a) __extension__ ({ \
1683 float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; })
1684#define vld2_dup_f32(__a) __extension__ ({ \
1685 float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 8); r; })
1686#define vld2_dup_p8(__a) __extension__ ({ \
1687 poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; })
1688#define vld2_dup_p16(__a) __extension__ ({ \
1689 poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; })
1690
1691#define vld2q_lane_u16(__a, b, __c) __extension__ ({ \
1692 uint16x8x2_t __b = (b); \
1693 uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); r; })
1694#define vld2q_lane_u32(__a, b, __c) __extension__ ({ \
1695 uint32x4x2_t __b = (b); \
1696 uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); r; })
1697#define vld2q_lane_s16(__a, b, __c) __extension__ ({ \
1698 int16x8x2_t __b = (b); \
1699 int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); r; })
1700#define vld2q_lane_s32(__a, b, __c) __extension__ ({ \
1701 int32x4x2_t __b = (b); \
1702 int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); r; })
1703#define vld2q_lane_f16(__a, b, __c) __extension__ ({ \
1704 float16x8x2_t __b = (b); \
1705 float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); r; })
1706#define vld2q_lane_f32(__a, b, __c) __extension__ ({ \
1707 float32x4x2_t __b = (b); \
1708 float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 40); r; })
1709#define vld2q_lane_p16(__a, b, __c) __extension__ ({ \
1710 poly16x8x2_t __b = (b); \
1711 poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); r; })
1712#define vld2_lane_u8(__a, b, __c) __extension__ ({ \
1713 uint8x8x2_t __b = (b); \
1714 uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); r; })
1715#define vld2_lane_u16(__a, b, __c) __extension__ ({ \
1716 uint16x4x2_t __b = (b); \
1717 uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); r; })
1718#define vld2_lane_u32(__a, b, __c) __extension__ ({ \
1719 uint32x2x2_t __b = (b); \
1720 uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); r; })
1721#define vld2_lane_s8(__a, b, __c) __extension__ ({ \
1722 int8x8x2_t __b = (b); \
1723 int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; })
1724#define vld2_lane_s16(__a, b, __c) __extension__ ({ \
1725 int16x4x2_t __b = (b); \
1726 int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); r; })
1727#define vld2_lane_s32(__a, b, __c) __extension__ ({ \
1728 int32x2x2_t __b = (b); \
1729 int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); r; })
1730#define vld2_lane_f16(__a, b, __c) __extension__ ({ \
1731 float16x4x2_t __b = (b); \
1732 float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); r; })
1733#define vld2_lane_f32(__a, b, __c) __extension__ ({ \
1734 float32x2x2_t __b = (b); \
1735 float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 8); r; })
1736#define vld2_lane_p8(__a, b, __c) __extension__ ({ \
1737 poly8x8x2_t __b = (b); \
1738 poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); r; })
1739#define vld2_lane_p16(__a, b, __c) __extension__ ({ \
1740 poly16x4x2_t __b = (b); \
1741 poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); r; })
1742
1743#define vld3q_u8(__a) __extension__ ({ \
1744 uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 48); r; })
1745#define vld3q_u16(__a) __extension__ ({ \
1746 uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 49); r; })
1747#define vld3q_u32(__a) __extension__ ({ \
1748 uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 50); r; })
1749#define vld3q_s8(__a) __extension__ ({ \
1750 int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 32); r; })
1751#define vld3q_s16(__a) __extension__ ({ \
1752 int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 33); r; })
1753#define vld3q_s32(__a) __extension__ ({ \
1754 int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 34); r; })
1755#define vld3q_f16(__a) __extension__ ({ \
1756 float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 39); r; })
1757#define vld3q_f32(__a) __extension__ ({ \
1758 float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 40); r; })
1759#define vld3q_p8(__a) __extension__ ({ \
1760 poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 36); r; })
1761#define vld3q_p16(__a) __extension__ ({ \
1762 poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 37); r; })
1763#define vld3_u8(__a) __extension__ ({ \
1764 uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 16); r; })
1765#define vld3_u16(__a) __extension__ ({ \
1766 uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 17); r; })
1767#define vld3_u32(__a) __extension__ ({ \
1768 uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 18); r; })
1769#define vld3_u64(__a) __extension__ ({ \
1770 uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 19); r; })
1771#define vld3_s8(__a) __extension__ ({ \
1772 int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; })
1773#define vld3_s16(__a) __extension__ ({ \
1774 int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; })
1775#define vld3_s32(__a) __extension__ ({ \
1776 int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; })
1777#define vld3_s64(__a) __extension__ ({ \
1778 int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; })
1779#define vld3_f16(__a) __extension__ ({ \
1780 float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; })
1781#define vld3_f32(__a) __extension__ ({ \
1782 float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 8); r; })
1783#define vld3_p8(__a) __extension__ ({ \
1784 poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; })
1785#define vld3_p16(__a) __extension__ ({ \
1786 poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; })
1787
1788#define vld3_dup_u8(__a) __extension__ ({ \
1789 uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 16); r; })
1790#define vld3_dup_u16(__a) __extension__ ({ \
1791 uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 17); r; })
1792#define vld3_dup_u32(__a) __extension__ ({ \
1793 uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 18); r; })
1794#define vld3_dup_u64(__a) __extension__ ({ \
1795 uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 19); r; })
1796#define vld3_dup_s8(__a) __extension__ ({ \
1797 int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; })
1798#define vld3_dup_s16(__a) __extension__ ({ \
1799 int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; })
1800#define vld3_dup_s32(__a) __extension__ ({ \
1801 int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; })
1802#define vld3_dup_s64(__a) __extension__ ({ \
1803 int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; })
1804#define vld3_dup_f16(__a) __extension__ ({ \
1805 float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; })
1806#define vld3_dup_f32(__a) __extension__ ({ \
1807 float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 8); r; })
1808#define vld3_dup_p8(__a) __extension__ ({ \
1809 poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; })
1810#define vld3_dup_p16(__a) __extension__ ({ \
1811 poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; })
1812
1813#define vld3q_lane_u16(__a, b, __c) __extension__ ({ \
1814 uint16x8x3_t __b = (b); \
1815 uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); r; })
1816#define vld3q_lane_u32(__a, b, __c) __extension__ ({ \
1817 uint32x4x3_t __b = (b); \
1818 uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); r; })
1819#define vld3q_lane_s16(__a, b, __c) __extension__ ({ \
1820 int16x8x3_t __b = (b); \
1821 int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); r; })
1822#define vld3q_lane_s32(__a, b, __c) __extension__ ({ \
1823 int32x4x3_t __b = (b); \
1824 int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); r; })
1825#define vld3q_lane_f16(__a, b, __c) __extension__ ({ \
1826 float16x8x3_t __b = (b); \
1827 float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); r; })
1828#define vld3q_lane_f32(__a, b, __c) __extension__ ({ \
1829 float32x4x3_t __b = (b); \
1830 float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 40); r; })
1831#define vld3q_lane_p16(__a, b, __c) __extension__ ({ \
1832 poly16x8x3_t __b = (b); \
1833 poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); r; })
1834#define vld3_lane_u8(__a, b, __c) __extension__ ({ \
1835 uint8x8x3_t __b = (b); \
1836 uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); r; })
1837#define vld3_lane_u16(__a, b, __c) __extension__ ({ \
1838 uint16x4x3_t __b = (b); \
1839 uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); r; })
1840#define vld3_lane_u32(__a, b, __c) __extension__ ({ \
1841 uint32x2x3_t __b = (b); \
1842 uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); r; })
1843#define vld3_lane_s8(__a, b, __c) __extension__ ({ \
1844 int8x8x3_t __b = (b); \
1845 int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __c, 0); r; })
1846#define vld3_lane_s16(__a, b, __c) __extension__ ({ \
1847 int16x4x3_t __b = (b); \
1848 int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; })
1849#define vld3_lane_s32(__a, b, __c) __extension__ ({ \
1850 int32x2x3_t __b = (b); \
1851 int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; })
1852#define vld3_lane_f16(__a, b, __c) __extension__ ({ \
1853 float16x4x3_t __b = (b); \
1854 float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; })
1855#define vld3_lane_f32(__a, b, __c) __extension__ ({ \
1856 float32x2x3_t __b = (b); \
1857 float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 8); r; })
1858#define vld3_lane_p8(__a, b, __c) __extension__ ({ \
1859 poly8x8x3_t __b = (b); \
1860 poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; })
1861#define vld3_lane_p16(__a, b, __c) __extension__ ({ \
1862 poly16x4x3_t __b = (b); \
1863 poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; })
1864
1865#define vld4q_u8(__a) __extension__ ({ \
1866 uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 48); r; })
1867#define vld4q_u16(__a) __extension__ ({ \
1868 uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 49); r; })
1869#define vld4q_u32(__a) __extension__ ({ \
1870 uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 50); r; })
1871#define vld4q_s8(__a) __extension__ ({ \
1872 int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 32); r; })
1873#define vld4q_s16(__a) __extension__ ({ \
1874 int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 33); r; })
1875#define vld4q_s32(__a) __extension__ ({ \
1876 int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 34); r; })
1877#define vld4q_f16(__a) __extension__ ({ \
1878 float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 39); r; })
1879#define vld4q_f32(__a) __extension__ ({ \
1880 float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 40); r; })
1881#define vld4q_p8(__a) __extension__ ({ \
1882 poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 36); r; })
1883#define vld4q_p16(__a) __extension__ ({ \
1884 poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 37); r; })
1885#define vld4_u8(__a) __extension__ ({ \
1886 uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 16); r; })
1887#define vld4_u16(__a) __extension__ ({ \
1888 uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 17); r; })
1889#define vld4_u32(__a) __extension__ ({ \
1890 uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 18); r; })
1891#define vld4_u64(__a) __extension__ ({ \
1892 uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 19); r; })
1893#define vld4_s8(__a) __extension__ ({ \
1894 int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; })
1895#define vld4_s16(__a) __extension__ ({ \
1896 int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; })
1897#define vld4_s32(__a) __extension__ ({ \
1898 int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; })
1899#define vld4_s64(__a) __extension__ ({ \
1900 int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; })
1901#define vld4_f16(__a) __extension__ ({ \
1902 float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; })
1903#define vld4_f32(__a) __extension__ ({ \
1904 float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 8); r; })
1905#define vld4_p8(__a) __extension__ ({ \
1906 poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; })
1907#define vld4_p16(__a) __extension__ ({ \
1908 poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; })
1909
1910#define vld4_dup_u8(__a) __extension__ ({ \
1911 uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 16); r; })
1912#define vld4_dup_u16(__a) __extension__ ({ \
1913 uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 17); r; })
1914#define vld4_dup_u32(__a) __extension__ ({ \
1915 uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 18); r; })
1916#define vld4_dup_u64(__a) __extension__ ({ \
1917 uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 19); r; })
1918#define vld4_dup_s8(__a) __extension__ ({ \
1919 int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; })
1920#define vld4_dup_s16(__a) __extension__ ({ \
1921 int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; })
1922#define vld4_dup_s32(__a) __extension__ ({ \
1923 int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; })
1924#define vld4_dup_s64(__a) __extension__ ({ \
1925 int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; })
1926#define vld4_dup_f16(__a) __extension__ ({ \
1927 float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; })
1928#define vld4_dup_f32(__a) __extension__ ({ \
1929 float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 8); r; })
1930#define vld4_dup_p8(__a) __extension__ ({ \
1931 poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; })
1932#define vld4_dup_p16(__a) __extension__ ({ \
1933 poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; })
1934
1935#define vld4q_lane_u16(__a, b, __c) __extension__ ({ \
1936 uint16x8x4_t __b = (b); \
1937 uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); r; })
1938#define vld4q_lane_u32(__a, b, __c) __extension__ ({ \
1939 uint32x4x4_t __b = (b); \
1940 uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); r; })
1941#define vld4q_lane_s16(__a, b, __c) __extension__ ({ \
1942 int16x8x4_t __b = (b); \
1943 int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); r; })
1944#define vld4q_lane_s32(__a, b, __c) __extension__ ({ \
1945 int32x4x4_t __b = (b); \
1946 int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); r; })
1947#define vld4q_lane_f16(__a, b, __c) __extension__ ({ \
1948 float16x8x4_t __b = (b); \
1949 float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); r; })
1950#define vld4q_lane_f32(__a, b, __c) __extension__ ({ \
1951 float32x4x4_t __b = (b); \
1952 float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 40); r; })
1953#define vld4q_lane_p16(__a, b, __c) __extension__ ({ \
1954 poly16x8x4_t __b = (b); \
1955 poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); r; })
1956#define vld4_lane_u8(__a, b, __c) __extension__ ({ \
1957 uint8x8x4_t __b = (b); \
1958 uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); r; })
1959#define vld4_lane_u16(__a, b, __c) __extension__ ({ \
1960 uint16x4x4_t __b = (b); \
1961 uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); r; })
1962#define vld4_lane_u32(__a, b, __c) __extension__ ({ \
1963 uint32x2x4_t __b = (b); \
1964 uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); r; })
1965#define vld4_lane_s8(__a, b, __c) __extension__ ({ \
1966 int8x8x4_t __b = (b); \
1967 int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); r; })
1968#define vld4_lane_s16(__a, b, __c) __extension__ ({ \
1969 int16x4x4_t __b = (b); \
1970 int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; })
1971#define vld4_lane_s32(__a, b, __c) __extension__ ({ \
1972 int32x2x4_t __b = (b); \
1973 int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; })
1974#define vld4_lane_f16(__a, b, __c) __extension__ ({ \
1975 float16x4x4_t __b = (b); \
1976 float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; })
1977#define vld4_lane_f32(__a, b, __c) __extension__ ({ \
1978 float32x2x4_t __b = (b); \
1979 float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); r; })
1980#define vld4_lane_p8(__a, b, __c) __extension__ ({ \
1981 poly8x8x4_t __b = (b); \
1982 poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; })
1983#define vld4_lane_p16(__a, b, __c) __extension__ ({ \
1984 poly16x4x4_t __b = (b); \
1985 poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; })
1986
1987__ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) {
1988 return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); }
1989__ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) {
1990 return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1991__ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) {
1992 return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1993__ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) {
1994 return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1995__ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) {
1996 return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1997__ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) {
1998 return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1999__ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) {
2000 return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 8); }
2001__ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) {
2002 return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 32); }
2003__ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) {
2004 return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2005__ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) {
2006 return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2007__ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) {
2008 return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
2009__ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) {
2010 return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
2011__ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) {
2012 return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
2013__ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) {
2014 return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
2015
2016__ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) {
2017 return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); }
2018__ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) {
2019 return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2020__ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) {
2021 return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2022__ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) {
2023 return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2024__ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) {
2025 return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2026__ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) {
2027 return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2028__ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) {
2029 return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 8); }
2030__ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) {
2031 return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 32); }
2032__ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) {
2033 return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2034__ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) {
2035 return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2036__ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) {
2037 return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
2038__ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) {
2039 return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
2040__ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) {
2041 return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
2042__ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) {
2043 return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
2044
2045__ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
2046 return __a + (__b * __c); }
2047__ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) {
2048 return __a + (__b * __c); }
2049__ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) {
2050 return __a + (__b * __c); }
2051__ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
2052 return __a + (__b * __c); }
2053__ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2054 return __a + (__b * __c); }
2055__ai uint16x4_t vmla_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2056 return __a + (__b * __c); }
2057__ai uint32x2_t vmla_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2058 return __a + (__b * __c); }
2059__ai int8x16_t vmlaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
2060 return __a + (__b * __c); }
2061__ai int16x8_t vmlaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) {
2062 return __a + (__b * __c); }
2063__ai int32x4_t vmlaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) {
2064 return __a + (__b * __c); }
2065__ai float32x4_t vmlaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
2066 return __a + (__b * __c); }
2067__ai uint8x16_t vmlaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
2068 return __a + (__b * __c); }
2069__ai uint16x8_t vmlaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
2070 return __a + (__b * __c); }
2071__ai uint32x4_t vmlaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
2072 return __a + (__b * __c); }
2073
2074__ai int16x8_t vmlal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) {
2075 return __a + vmull_s8(__b, __c); }
2076__ai int32x4_t vmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2077 return __a + vmull_s16(__b, __c); }
2078__ai int64x2_t vmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2079 return __a + vmull_s32(__b, __c); }
2080__ai uint16x8_t vmlal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2081 return __a + vmull_u8(__b, __c); }
2082__ai uint32x4_t vmlal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2083 return __a + vmull_u16(__b, __c); }
2084__ai uint64x2_t vmlal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2085 return __a + vmull_u32(__b, __c); }
2086
2087#define vmlal_lane_s16(a, b, c, __d) __extension__ ({ \
2088 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2089 __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2090#define vmlal_lane_s32(a, b, c, __d) __extension__ ({ \
2091 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2092 __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2093#define vmlal_lane_u16(a, b, c, __d) __extension__ ({ \
2094 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2095 __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2096#define vmlal_lane_u32(a, b, c, __d) __extension__ ({ \
2097 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2098 __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2099
2100__ai int32x4_t vmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2101 return __a + vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); }
2102__ai int64x2_t vmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2103 return __a + vmull_s32(__b, (int32x2_t){ __c, __c }); }
2104__ai uint32x4_t vmlal_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) {
2105 return __a + vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); }
2106__ai uint64x2_t vmlal_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) {
2107 return __a + vmull_u32(__b, (uint32x2_t){ __c, __c }); }
2108
2109#define vmla_lane_s16(a, b, c, __d) __extension__ ({ \
2110 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2111 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2112#define vmla_lane_s32(a, b, c, __d) __extension__ ({ \
2113 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2114 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2115#define vmla_lane_u16(a, b, c, __d) __extension__ ({ \
2116 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2117 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2118#define vmla_lane_u32(a, b, c, __d) __extension__ ({ \
2119 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2120 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2121#define vmla_lane_f32(a, b, c, __d) __extension__ ({ \
2122 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \
2123 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2124#define vmlaq_lane_s16(a, b, c, __d) __extension__ ({ \
2125 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
2126 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
2127#define vmlaq_lane_s32(a, b, c, __d) __extension__ ({ \
2128 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
2129 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2130#define vmlaq_lane_u16(a, b, c, __d) __extension__ ({ \
2131 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \
2132 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
2133#define vmlaq_lane_u32(a, b, c, __d) __extension__ ({ \
2134 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \
2135 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2136#define vmlaq_lane_f32(a, b, c, __d) __extension__ ({ \
2137 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \
2138 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2139
2140__ai int16x4_t vmla_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) {
2141 return __a + (__b * (int16x4_t){ __c, __c, __c, __c }); }
2142__ai int32x2_t vmla_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) {
2143 return __a + (__b * (int32x2_t){ __c, __c }); }
2144__ai uint16x4_t vmla_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) {
2145 return __a + (__b * (uint16x4_t){ __c, __c, __c, __c }); }
2146__ai uint32x2_t vmla_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) {
2147 return __a + (__b * (uint32x2_t){ __c, __c }); }
2148__ai float32x2_t vmla_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) {
2149 return __a + (__b * (float32x2_t){ __c, __c }); }
2150__ai int16x8_t vmlaq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) {
2151 return __a + (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2152__ai int32x4_t vmlaq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) {
2153 return __a + (__b * (int32x4_t){ __c, __c, __c, __c }); }
2154__ai uint16x8_t vmlaq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) {
2155 return __a + (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2156__ai uint32x4_t vmlaq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) {
2157 return __a + (__b * (uint32x4_t){ __c, __c, __c, __c }); }
2158__ai float32x4_t vmlaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
2159 return __a + (__b * (float32x4_t){ __c, __c, __c, __c }); }
2160
2161__ai int8x8_t vmls_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
2162 return __a - (__b * __c); }
2163__ai int16x4_t vmls_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) {
2164 return __a - (__b * __c); }
2165__ai int32x2_t vmls_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) {
2166 return __a - (__b * __c); }
2167__ai float32x2_t vmls_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
2168 return __a - (__b * __c); }
2169__ai uint8x8_t vmls_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2170 return __a - (__b * __c); }
2171__ai uint16x4_t vmls_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2172 return __a - (__b * __c); }
2173__ai uint32x2_t vmls_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2174 return __a - (__b * __c); }
2175__ai int8x16_t vmlsq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
2176 return __a - (__b * __c); }
2177__ai int16x8_t vmlsq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) {
2178 return __a - (__b * __c); }
2179__ai int32x4_t vmlsq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) {
2180 return __a - (__b * __c); }
2181__ai float32x4_t vmlsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
2182 return __a - (__b * __c); }
2183__ai uint8x16_t vmlsq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
2184 return __a - (__b * __c); }
2185__ai uint16x8_t vmlsq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
2186 return __a - (__b * __c); }
2187__ai uint32x4_t vmlsq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
2188 return __a - (__b * __c); }
2189
2190__ai int16x8_t vmlsl_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) {
2191 return __a - vmull_s8(__b, __c); }
2192__ai int32x4_t vmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2193 return __a - vmull_s16(__b, __c); }
2194__ai int64x2_t vmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2195 return __a - vmull_s32(__b, __c); }
2196__ai uint16x8_t vmlsl_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2197 return __a - vmull_u8(__b, __c); }
2198__ai uint32x4_t vmlsl_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2199 return __a - vmull_u16(__b, __c); }
2200__ai uint64x2_t vmlsl_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2201 return __a - vmull_u32(__b, __c); }
2202
2203#define vmlsl_lane_s16(a, b, c, __d) __extension__ ({ \
2204 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2205 __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2206#define vmlsl_lane_s32(a, b, c, __d) __extension__ ({ \
2207 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2208 __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2209#define vmlsl_lane_u16(a, b, c, __d) __extension__ ({ \
2210 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2211 __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2212#define vmlsl_lane_u32(a, b, c, __d) __extension__ ({ \
2213 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2214 __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2215
2216__ai int32x4_t vmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2217 return __a - vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); }
2218__ai int64x2_t vmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2219 return __a - vmull_s32(__b, (int32x2_t){ __c, __c }); }
2220__ai uint32x4_t vmlsl_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) {
2221 return __a - vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); }
2222__ai uint64x2_t vmlsl_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) {
2223 return __a - vmull_u32(__b, (uint32x2_t){ __c, __c }); }
2224
2225#define vmls_lane_s16(a, b, c, __d) __extension__ ({ \
2226 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2227 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2228#define vmls_lane_s32(a, b, c, __d) __extension__ ({ \
2229 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2230 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2231#define vmls_lane_u16(a, b, c, __d) __extension__ ({ \
2232 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2233 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2234#define vmls_lane_u32(a, b, c, __d) __extension__ ({ \
2235 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2236 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2237#define vmls_lane_f32(a, b, c, __d) __extension__ ({ \
2238 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \
2239 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2240#define vmlsq_lane_s16(a, b, c, __d) __extension__ ({ \
2241 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
2242 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
2243#define vmlsq_lane_s32(a, b, c, __d) __extension__ ({ \
2244 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
2245 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2246#define vmlsq_lane_u16(a, b, c, __d) __extension__ ({ \
2247 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \
2248 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
2249#define vmlsq_lane_u32(a, b, c, __d) __extension__ ({ \
2250 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \
2251 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2252#define vmlsq_lane_f32(a, b, c, __d) __extension__ ({ \
2253 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \
2254 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2255
2256__ai int16x4_t vmls_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) {
2257 return __a - (__b * (int16x4_t){ __c, __c, __c, __c }); }
2258__ai int32x2_t vmls_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) {
2259 return __a - (__b * (int32x2_t){ __c, __c }); }
2260__ai uint16x4_t vmls_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) {
2261 return __a - (__b * (uint16x4_t){ __c, __c, __c, __c }); }
2262__ai uint32x2_t vmls_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) {
2263 return __a - (__b * (uint32x2_t){ __c, __c }); }
2264__ai float32x2_t vmls_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) {
2265 return __a - (__b * (float32x2_t){ __c, __c }); }
2266__ai int16x8_t vmlsq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) {
2267 return __a - (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2268__ai int32x4_t vmlsq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) {
2269 return __a - (__b * (int32x4_t){ __c, __c, __c, __c }); }
2270__ai uint16x8_t vmlsq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) {
2271 return __a - (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2272__ai uint32x4_t vmlsq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) {
2273 return __a - (__b * (uint32x4_t){ __c, __c, __c, __c }); }
2274__ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
2275 return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); }
2276
2277__ai int8x8_t vmovn_s16(int16x8_t __a) {
2278 return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); }
2279__ai int16x4_t vmovn_s32(int32x4_t __a) {
2280 return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); }
2281__ai int32x2_t vmovn_s64(int64x2_t __a) {
2282 return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); }
2283__ai uint8x8_t vmovn_u16(uint16x8_t __a) {
2284 return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 16); }
2285__ai uint16x4_t vmovn_u32(uint32x4_t __a) {
2286 return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 17); }
2287__ai uint32x2_t vmovn_u64(uint64x2_t __a) {
2288 return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 18); }
2289
2290__ai uint8x8_t vmov_n_u8(uint8_t __a) {
2291 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2292__ai uint16x4_t vmov_n_u16(uint16_t __a) {
2293 return (uint16x4_t){ __a, __a, __a, __a }; }
2294__ai uint32x2_t vmov_n_u32(uint32_t __a) {
2295 return (uint32x2_t){ __a, __a }; }
2296__ai int8x8_t vmov_n_s8(int8_t __a) {
2297 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2298__ai int16x4_t vmov_n_s16(int16_t __a) {
2299 return (int16x4_t){ __a, __a, __a, __a }; }
2300__ai int32x2_t vmov_n_s32(int32_t __a) {
2301 return (int32x2_t){ __a, __a }; }
2302__ai poly8x8_t vmov_n_p8(poly8_t __a) {
2303 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2304__ai poly16x4_t vmov_n_p16(poly16_t __a) {
2305 return (poly16x4_t){ __a, __a, __a, __a }; }
2306__ai float32x2_t vmov_n_f32(float32_t __a) {
2307 return (float32x2_t){ __a, __a }; }
2308__ai uint8x16_t vmovq_n_u8(uint8_t __a) {
2309 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; }
2310__ai uint16x8_t vmovq_n_u16(uint16_t __a) {
2311 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2312__ai uint32x4_t vmovq_n_u32(uint32_t __a) {
2313 return (uint32x4_t){ __a, __a, __a, __a }; }
2314__ai int8x16_t vmovq_n_s8(int8_t __a) {
2315 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; }
2316__ai int16x8_t vmovq_n_s16(int16_t __a) {
2317 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2318__ai int32x4_t vmovq_n_s32(int32_t __a) {
2319 return (int32x4_t){ __a, __a, __a, __a }; }
2320__ai poly8x16_t vmovq_n_p8(poly8_t __a) {
2321 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; }
2322__ai poly16x8_t vmovq_n_p16(poly16_t __a) {
2323 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2324__ai float32x4_t vmovq_n_f32(float32_t __a) {
2325 return (float32x4_t){ __a, __a, __a, __a }; }
2326__ai int64x1_t vmov_n_s64(int64_t __a) {
2327 return (int64x1_t){ __a }; }
2328__ai uint64x1_t vmov_n_u64(uint64_t __a) {
2329 return (uint64x1_t){ __a }; }
2330__ai int64x2_t vmovq_n_s64(int64_t __a) {
2331 return (int64x2_t){ __a, __a }; }
2332__ai uint64x2_t vmovq_n_u64(uint64_t __a) {
2333 return (uint64x2_t){ __a, __a }; }
2334
2335__ai int8x8_t vmul_s8(int8x8_t __a, int8x8_t __b) {
2336 return __a * __b; }
2337__ai int16x4_t vmul_s16(int16x4_t __a, int16x4_t __b) {
2338 return __a * __b; }
2339__ai int32x2_t vmul_s32(int32x2_t __a, int32x2_t __b) {
2340 return __a * __b; }
2341__ai float32x2_t vmul_f32(float32x2_t __a, float32x2_t __b) {
2342 return __a * __b; }
2343__ai uint8x8_t vmul_u8(uint8x8_t __a, uint8x8_t __b) {
2344 return __a * __b; }
2345__ai uint16x4_t vmul_u16(uint16x4_t __a, uint16x4_t __b) {
2346 return __a * __b; }
2347__ai uint32x2_t vmul_u32(uint32x2_t __a, uint32x2_t __b) {
2348 return __a * __b; }
2349__ai int8x16_t vmulq_s8(int8x16_t __a, int8x16_t __b) {
2350 return __a * __b; }
2351__ai int16x8_t vmulq_s16(int16x8_t __a, int16x8_t __b) {
2352 return __a * __b; }
2353__ai int32x4_t vmulq_s32(int32x4_t __a, int32x4_t __b) {
2354 return __a * __b; }
2355__ai float32x4_t vmulq_f32(float32x4_t __a, float32x4_t __b) {
2356 return __a * __b; }
2357__ai uint8x16_t vmulq_u8(uint8x16_t __a, uint8x16_t __b) {
2358 return __a * __b; }
2359__ai uint16x8_t vmulq_u16(uint16x8_t __a, uint16x8_t __b) {
2360 return __a * __b; }
2361__ai uint32x4_t vmulq_u32(uint32x4_t __a, uint32x4_t __b) {
2362 return __a * __b; }
2363
2364#define vmull_lane_s16(a, b, __c) __extension__ ({ \
2365 int16x4_t __a = (a); int16x4_t __b = (b); \
2366 vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2367#define vmull_lane_s32(a, b, __c) __extension__ ({ \
2368 int32x2_t __a = (a); int32x2_t __b = (b); \
2369 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2370#define vmull_lane_u16(a, b, __c) __extension__ ({ \
2371 uint16x4_t __a = (a); uint16x4_t __b = (b); \
2372 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2373#define vmull_lane_u32(a, b, __c) __extension__ ({ \
2374 uint32x2_t __a = (a); uint32x2_t __b = (b); \
2375 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2376
2377__ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) {
2378 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); }
2379__ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) {
2380 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); }
2381__ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) {
2382 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t){ __b, __b, __b, __b }, 50); }
2383__ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) {
2384 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t){ __b, __b }, 51); }
2385
2386__ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) {
2387 return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 4); }
2388__ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) {
2389 return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 36); }
2390
2391#define vmul_lane_s16(a, b, __c) __extension__ ({ \
2392 int16x4_t __a = (a); int16x4_t __b = (b); \
2393 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2394#define vmul_lane_s32(a, b, __c) __extension__ ({ \
2395 int32x2_t __a = (a); int32x2_t __b = (b); \
2396 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2397#define vmul_lane_f32(a, b, __c) __extension__ ({ \
2398 float32x2_t __a = (a); float32x2_t __b = (b); \
2399 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2400#define vmul_lane_u16(a, b, __c) __extension__ ({ \
2401 uint16x4_t __a = (a); uint16x4_t __b = (b); \
2402 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2403#define vmul_lane_u32(a, b, __c) __extension__ ({ \
2404 uint32x2_t __a = (a); uint32x2_t __b = (b); \
2405 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2406#define vmulq_lane_s16(a, b, __c) __extension__ ({ \
2407 int16x8_t __a = (a); int16x4_t __b = (b); \
2408 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); })
2409#define vmulq_lane_s32(a, b, __c) __extension__ ({ \
2410 int32x4_t __a = (a); int32x2_t __b = (b); \
2411 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2412#define vmulq_lane_f32(a, b, __c) __extension__ ({ \
2413 float32x4_t __a = (a); float32x2_t __b = (b); \
2414 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2415#define vmulq_lane_u16(a, b, __c) __extension__ ({ \
2416 uint16x8_t __a = (a); uint16x4_t __b = (b); \
2417 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); })
2418#define vmulq_lane_u32(a, b, __c) __extension__ ({ \
2419 uint32x4_t __a = (a); uint32x2_t __b = (b); \
2420 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2421
2422__ai int16x4_t vmul_n_s16(int16x4_t __a, int16_t __b) {
2423 return __a * (int16x4_t){ __b, __b, __b, __b }; }
2424__ai int32x2_t vmul_n_s32(int32x2_t __a, int32_t __b) {
2425 return __a * (int32x2_t){ __b, __b }; }
2426__ai float32x2_t vmul_n_f32(float32x2_t __a, float32_t __b) {
2427 return __a * (float32x2_t){ __b, __b }; }
2428__ai uint16x4_t vmul_n_u16(uint16x4_t __a, uint16_t __b) {
2429 return __a * (uint16x4_t){ __b, __b, __b, __b }; }
2430__ai uint32x2_t vmul_n_u32(uint32x2_t __a, uint32_t __b) {
2431 return __a * (uint32x2_t){ __b, __b }; }
2432__ai int16x8_t vmulq_n_s16(int16x8_t __a, int16_t __b) {
2433 return __a * (int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; }
2434__ai int32x4_t vmulq_n_s32(int32x4_t __a, int32_t __b) {
2435 return __a * (int32x4_t){ __b, __b, __b, __b }; }
2436__ai float32x4_t vmulq_n_f32(float32x4_t __a, float32_t __b) {
2437 return __a * (float32x4_t){ __b, __b, __b, __b }; }
2438__ai uint16x8_t vmulq_n_u16(uint16x8_t __a, uint16_t __b) {
2439 return __a * (uint16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; }
2440__ai uint32x4_t vmulq_n_u32(uint32x4_t __a, uint32_t __b) {
2441 return __a * (uint32x4_t){ __b, __b, __b, __b }; }
2442
2443__ai int8x8_t vmvn_s8(int8x8_t __a) {
2444 return ~__a; }
2445__ai int16x4_t vmvn_s16(int16x4_t __a) {
2446 return ~__a; }
2447__ai int32x2_t vmvn_s32(int32x2_t __a) {
2448 return ~__a; }
2449__ai uint8x8_t vmvn_u8(uint8x8_t __a) {
2450 return ~__a; }
2451__ai uint16x4_t vmvn_u16(uint16x4_t __a) {
2452 return ~__a; }
2453__ai uint32x2_t vmvn_u32(uint32x2_t __a) {
2454 return ~__a; }
2455__ai poly8x8_t vmvn_p8(poly8x8_t __a) {
2456 return ~__a; }
2457__ai int8x16_t vmvnq_s8(int8x16_t __a) {
2458 return ~__a; }
2459__ai int16x8_t vmvnq_s16(int16x8_t __a) {
2460 return ~__a; }
2461__ai int32x4_t vmvnq_s32(int32x4_t __a) {
2462 return ~__a; }
2463__ai uint8x16_t vmvnq_u8(uint8x16_t __a) {
2464 return ~__a; }
2465__ai uint16x8_t vmvnq_u16(uint16x8_t __a) {
2466 return ~__a; }
2467__ai uint32x4_t vmvnq_u32(uint32x4_t __a) {
2468 return ~__a; }
2469__ai poly8x16_t vmvnq_p8(poly8x16_t __a) {
2470 return ~__a; }
2471
2472__ai int8x8_t vneg_s8(int8x8_t __a) {
2473 return -__a; }
2474__ai int16x4_t vneg_s16(int16x4_t __a) {
2475 return -__a; }
2476__ai int32x2_t vneg_s32(int32x2_t __a) {
2477 return -__a; }
2478__ai float32x2_t vneg_f32(float32x2_t __a) {
2479 return -__a; }
2480__ai int8x16_t vnegq_s8(int8x16_t __a) {
2481 return -__a; }
2482__ai int16x8_t vnegq_s16(int16x8_t __a) {
2483 return -__a; }
2484__ai int32x4_t vnegq_s32(int32x4_t __a) {
2485 return -__a; }
2486__ai float32x4_t vnegq_f32(float32x4_t __a) {
2487 return -__a; }
2488
2489__ai int8x8_t vorn_s8(int8x8_t __a, int8x8_t __b) {
2490 return __a | ~__b; }
2491__ai int16x4_t vorn_s16(int16x4_t __a, int16x4_t __b) {
2492 return __a | ~__b; }
2493__ai int32x2_t vorn_s32(int32x2_t __a, int32x2_t __b) {
2494 return __a | ~__b; }
2495__ai int64x1_t vorn_s64(int64x1_t __a, int64x1_t __b) {
2496 return __a | ~__b; }
2497__ai uint8x8_t vorn_u8(uint8x8_t __a, uint8x8_t __b) {
2498 return __a | ~__b; }
2499__ai uint16x4_t vorn_u16(uint16x4_t __a, uint16x4_t __b) {
2500 return __a | ~__b; }
2501__ai uint32x2_t vorn_u32(uint32x2_t __a, uint32x2_t __b) {
2502 return __a | ~__b; }
2503__ai uint64x1_t vorn_u64(uint64x1_t __a, uint64x1_t __b) {
2504 return __a | ~__b; }
2505__ai int8x16_t vornq_s8(int8x16_t __a, int8x16_t __b) {
2506 return __a | ~__b; }
2507__ai int16x8_t vornq_s16(int16x8_t __a, int16x8_t __b) {
2508 return __a | ~__b; }
2509__ai int32x4_t vornq_s32(int32x4_t __a, int32x4_t __b) {
2510 return __a | ~__b; }
2511__ai int64x2_t vornq_s64(int64x2_t __a, int64x2_t __b) {
2512 return __a | ~__b; }
2513__ai uint8x16_t vornq_u8(uint8x16_t __a, uint8x16_t __b) {
2514 return __a | ~__b; }
2515__ai uint16x8_t vornq_u16(uint16x8_t __a, uint16x8_t __b) {
2516 return __a | ~__b; }
2517__ai uint32x4_t vornq_u32(uint32x4_t __a, uint32x4_t __b) {
2518 return __a | ~__b; }
2519__ai uint64x2_t vornq_u64(uint64x2_t __a, uint64x2_t __b) {
2520 return __a | ~__b; }
2521
2522__ai int8x8_t vorr_s8(int8x8_t __a, int8x8_t __b) {
2523 return __a | __b; }
2524__ai int16x4_t vorr_s16(int16x4_t __a, int16x4_t __b) {
2525 return __a | __b; }
2526__ai int32x2_t vorr_s32(int32x2_t __a, int32x2_t __b) {
2527 return __a | __b; }
2528__ai int64x1_t vorr_s64(int64x1_t __a, int64x1_t __b) {
2529 return __a | __b; }
2530__ai uint8x8_t vorr_u8(uint8x8_t __a, uint8x8_t __b) {
2531 return __a | __b; }
2532__ai uint16x4_t vorr_u16(uint16x4_t __a, uint16x4_t __b) {
2533 return __a | __b; }
2534__ai uint32x2_t vorr_u32(uint32x2_t __a, uint32x2_t __b) {
2535 return __a | __b; }
2536__ai uint64x1_t vorr_u64(uint64x1_t __a, uint64x1_t __b) {
2537 return __a | __b; }
2538__ai int8x16_t vorrq_s8(int8x16_t __a, int8x16_t __b) {
2539 return __a | __b; }
2540__ai int16x8_t vorrq_s16(int16x8_t __a, int16x8_t __b) {
2541 return __a | __b; }
2542__ai int32x4_t vorrq_s32(int32x4_t __a, int32x4_t __b) {
2543 return __a | __b; }
2544__ai int64x2_t vorrq_s64(int64x2_t __a, int64x2_t __b) {
2545 return __a | __b; }
2546__ai uint8x16_t vorrq_u8(uint8x16_t __a, uint8x16_t __b) {
2547 return __a | __b; }
2548__ai uint16x8_t vorrq_u16(uint16x8_t __a, uint16x8_t __b) {
2549 return __a | __b; }
2550__ai uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) {
2551 return __a | __b; }
2552__ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) {
2553 return __a | __b; }
2554
2555__ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) {
2556 return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); }
2557__ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) {
2558 return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2559__ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) {
2560 return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2561__ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) {
2562 return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2563__ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) {
2564 return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2565__ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) {
2566 return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2567__ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) {
2568 return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 33); }
2569__ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) {
2570 return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2571__ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) {
2572 return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2573__ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) {
2574 return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
2575__ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) {
2576 return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
2577__ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) {
2578 return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
2579
2580__ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) {
2581 return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); }
2582__ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) {
2583 return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2584__ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) {
2585 return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2586__ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) {
2587 return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2588__ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) {
2589 return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2590__ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) {
2591 return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2592__ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) {
2593 return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 8); }
2594
2595__ai int16x4_t vpaddl_s8(int8x8_t __a) {
2596 return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); }
2597__ai int32x2_t vpaddl_s16(int16x4_t __a) {
2598 return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); }
2599__ai int64x1_t vpaddl_s32(int32x2_t __a) {
2600 return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); }
2601__ai uint16x4_t vpaddl_u8(uint8x8_t __a) {
2602 return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 17); }
2603__ai uint32x2_t vpaddl_u16(uint16x4_t __a) {
2604 return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 18); }
2605__ai uint64x1_t vpaddl_u32(uint32x2_t __a) {
2606 return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 19); }
2607__ai int16x8_t vpaddlq_s8(int8x16_t __a) {
2608 return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 33); }
2609__ai int32x4_t vpaddlq_s16(int16x8_t __a) {
2610 return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 34); }
2611__ai int64x2_t vpaddlq_s32(int32x4_t __a) {
2612 return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 35); }
2613__ai uint16x8_t vpaddlq_u8(uint8x16_t __a) {
2614 return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 49); }
2615__ai uint32x4_t vpaddlq_u16(uint16x8_t __a) {
2616 return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 50); }
2617__ai uint64x2_t vpaddlq_u32(uint32x4_t __a) {
2618 return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 51); }
2619
2620__ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) {
2621 return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); }
2622__ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) {
2623 return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2624__ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) {
2625 return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2626__ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) {
2627 return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2628__ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) {
2629 return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2630__ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) {
2631 return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2632__ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) {
2633 return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 8); }
2634
2635__ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) {
2636 return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); }
2637__ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) {
2638 return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2639__ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) {
2640 return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2641__ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) {
2642 return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2643__ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) {
2644 return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2645__ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) {
2646 return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2647__ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) {
2648 return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 8); }
2649
2650__ai int8x8_t vqabs_s8(int8x8_t __a) {
2651 return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); }
2652__ai int16x4_t vqabs_s16(int16x4_t __a) {
2653 return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); }
2654__ai int32x2_t vqabs_s32(int32x2_t __a) {
2655 return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); }
2656__ai int8x16_t vqabsq_s8(int8x16_t __a) {
2657 return (int8x16_t)__builtin_neon_vqabsq_v(__a, 32); }
2658__ai int16x8_t vqabsq_s16(int16x8_t __a) {
2659 return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 33); }
2660__ai int32x4_t vqabsq_s32(int32x4_t __a) {
2661 return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 34); }
2662
2663__ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) {
2664 return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); }
2665__ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) {
2666 return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2667__ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) {
2668 return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2669__ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) {
2670 return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2671__ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) {
2672 return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2673__ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) {
2674 return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2675__ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) {
2676 return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2677__ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) {
2678 return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2679__ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) {
2680 return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 32); }
2681__ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) {
2682 return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2683__ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) {
2684 return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2685__ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) {
2686 return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2687__ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) {
2688 return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
2689__ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) {
2690 return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
2691__ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) {
2692 return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
2693__ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) {
2694 return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
2695
2696__ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2697 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); }
2698__ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2699 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); }
2700
2701#define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \
2702 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2703 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2704#define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \
2705 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2706 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
2707
2708__ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2709 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); }
2710__ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2711 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int32x2_t){ __c, __c }, 35); }
2712
2713__ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2714 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); }
2715__ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2716 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); }
2717
2718#define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \
2719 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2720 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2721#define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \
2722 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2723 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
2724
2725__ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2726 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); }
2727__ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2728 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int32x2_t){ __c, __c }, 35); }
2729
2730__ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) {
2731 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2732__ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) {
2733 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2734__ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) {
2735 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2736__ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) {
2737 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2738
2739#define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \
2740 int16x4_t __a = (a); int16x4_t __b = (b); \
2741 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2742#define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \
2743 int32x2_t __a = (a); int32x2_t __b = (b); \
2744 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2745#define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \
2746 int16x8_t __a = (a); int16x4_t __b = (b); \
2747 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); })
2748#define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \
2749 int32x4_t __a = (a); int32x2_t __b = (b); \
2750 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2751
2752__ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) {
2753 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); }
2754__ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) {
2755 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); }
2756__ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) {
2757 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); }
2758__ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) {
2759 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); }
2760
2761__ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) {
2762 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 34); }
2763__ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) {
2764 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 35); }
2765
2766#define vqdmull_lane_s16(a, b, __c) __extension__ ({ \
2767 int16x4_t __a = (a); int16x4_t __b = (b); \
2768 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2769#define vqdmull_lane_s32(a, b, __c) __extension__ ({ \
2770 int32x2_t __a = (a); int32x2_t __b = (b); \
2771 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2772
2773__ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) {
2774 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); }
2775__ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) {
2776 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); }
2777
2778__ai int8x8_t vqmovn_s16(int16x8_t __a) {
2779 return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); }
2780__ai int16x4_t vqmovn_s32(int32x4_t __a) {
2781 return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); }
2782__ai int32x2_t vqmovn_s64(int64x2_t __a) {
2783 return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); }
2784__ai uint8x8_t vqmovn_u16(uint16x8_t __a) {
2785 return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 16); }
2786__ai uint16x4_t vqmovn_u32(uint32x4_t __a) {
2787 return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 17); }
2788__ai uint32x2_t vqmovn_u64(uint64x2_t __a) {
2789 return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 18); }
2790
2791__ai uint8x8_t vqmovun_s16(int16x8_t __a) {
2792 return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 16); }
2793__ai uint16x4_t vqmovun_s32(int32x4_t __a) {
2794 return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 17); }
2795__ai uint32x2_t vqmovun_s64(int64x2_t __a) {
2796 return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 18); }
2797
2798__ai int8x8_t vqneg_s8(int8x8_t __a) {
2799 return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); }
2800__ai int16x4_t vqneg_s16(int16x4_t __a) {
2801 return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); }
2802__ai int32x2_t vqneg_s32(int32x2_t __a) {
2803 return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); }
2804__ai int8x16_t vqnegq_s8(int8x16_t __a) {
2805 return (int8x16_t)__builtin_neon_vqnegq_v(__a, 32); }
2806__ai int16x8_t vqnegq_s16(int16x8_t __a) {
2807 return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 33); }
2808__ai int32x4_t vqnegq_s32(int32x4_t __a) {
2809 return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 34); }
2810
2811__ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) {
2812 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2813__ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) {
2814 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2815__ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) {
2816 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2817__ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) {
2818 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2819
2820#define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \
2821 int16x4_t __a = (a); int16x4_t __b = (b); \
2822 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2823#define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \
2824 int32x2_t __a = (a); int32x2_t __b = (b); \
2825 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2826#define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \
2827 int16x8_t __a = (a); int16x4_t __b = (b); \
2828 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); })
2829#define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \
2830 int32x4_t __a = (a); int32x2_t __b = (b); \
2831 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2832
2833__ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) {
2834 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); }
2835__ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) {
2836 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); }
2837__ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) {
2838 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); }
2839__ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) {
2840 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); }
2841
2842__ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) {
2843 return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); }
2844__ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) {
2845 return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2846__ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) {
2847 return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2848__ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) {
2849 return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2850__ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) {
2851 return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 16); }
2852__ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) {
2853 return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2854__ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) {
2855 return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2856__ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) {
2857 return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2858__ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) {
2859 return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 32); }
2860__ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) {
2861 return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2862__ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) {
2863 return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2864__ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) {
2865 return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2866__ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) {
2867 return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 48); }
2868__ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) {
2869 return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
2870__ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) {
2871 return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
2872__ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) {
2873 return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
2874
2875#define vqrshrn_n_s16(a, __b) __extension__ ({ \
2876 int16x8_t __a = (a); \
2877 (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); })
2878#define vqrshrn_n_s32(a, __b) __extension__ ({ \
2879 int32x4_t __a = (a); \
2880 (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); })
2881#define vqrshrn_n_s64(a, __b) __extension__ ({ \
2882 int64x2_t __a = (a); \
2883 (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); })
2884#define vqrshrn_n_u16(a, __b) __extension__ ({ \
2885 uint16x8_t __a = (a); \
2886 (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 16); })
2887#define vqrshrn_n_u32(a, __b) __extension__ ({ \
2888 uint32x4_t __a = (a); \
2889 (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 17); })
2890#define vqrshrn_n_u64(a, __b) __extension__ ({ \
2891 uint64x2_t __a = (a); \
2892 (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 18); })
2893
2894#define vqrshrun_n_s16(a, __b) __extension__ ({ \
2895 int16x8_t __a = (a); \
2896 (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 16); })
2897#define vqrshrun_n_s32(a, __b) __extension__ ({ \
2898 int32x4_t __a = (a); \
2899 (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 17); })
2900#define vqrshrun_n_s64(a, __b) __extension__ ({ \
2901 int64x2_t __a = (a); \
2902 (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 18); })
2903
2904__ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) {
2905 return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); }
2906__ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) {
2907 return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2908__ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) {
2909 return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2910__ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) {
2911 return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2912__ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) {
2913 return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 16); }
2914__ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) {
2915 return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2916__ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) {
2917 return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2918__ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) {
2919 return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2920__ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) {
2921 return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 32); }
2922__ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) {
2923 return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2924__ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) {
2925 return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2926__ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) {
2927 return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2928__ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) {
2929 return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 48); }
2930__ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) {
2931 return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
2932__ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) {
2933 return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
2934__ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) {
2935 return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
2936
2937#define vqshlu_n_s8(a, __b) __extension__ ({ \
2938 int8x8_t __a = (a); \
2939 (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 16); })
2940#define vqshlu_n_s16(a, __b) __extension__ ({ \
2941 int16x4_t __a = (a); \
2942 (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 17); })
2943#define vqshlu_n_s32(a, __b) __extension__ ({ \
2944 int32x2_t __a = (a); \
2945 (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 18); })
2946#define vqshlu_n_s64(a, __b) __extension__ ({ \
2947 int64x1_t __a = (a); \
2948 (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 19); })
2949#define vqshluq_n_s8(a, __b) __extension__ ({ \
2950 int8x16_t __a = (a); \
2951 (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 48); })
2952#define vqshluq_n_s16(a, __b) __extension__ ({ \
2953 int16x8_t __a = (a); \
2954 (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 49); })
2955#define vqshluq_n_s32(a, __b) __extension__ ({ \
2956 int32x4_t __a = (a); \
2957 (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 50); })
2958#define vqshluq_n_s64(a, __b) __extension__ ({ \
2959 int64x2_t __a = (a); \
2960 (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 51); })
2961
2962#define vqshl_n_s8(a, __b) __extension__ ({ \
2963 int8x8_t __a = (a); \
2964 (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); })
2965#define vqshl_n_s16(a, __b) __extension__ ({ \
2966 int16x4_t __a = (a); \
2967 (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); })
2968#define vqshl_n_s32(a, __b) __extension__ ({ \
2969 int32x2_t __a = (a); \
2970 (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); })
2971#define vqshl_n_s64(a, __b) __extension__ ({ \
2972 int64x1_t __a = (a); \
2973 (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); })
2974#define vqshl_n_u8(a, __b) __extension__ ({ \
2975 uint8x8_t __a = (a); \
2976 (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 16); })
2977#define vqshl_n_u16(a, __b) __extension__ ({ \
2978 uint16x4_t __a = (a); \
2979 (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 17); })
2980#define vqshl_n_u32(a, __b) __extension__ ({ \
2981 uint32x2_t __a = (a); \
2982 (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 18); })
2983#define vqshl_n_u64(a, __b) __extension__ ({ \
2984 uint64x1_t __a = (a); \
2985 (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 19); })
2986#define vqshlq_n_s8(a, __b) __extension__ ({ \
2987 int8x16_t __a = (a); \
2988 (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 32); })
2989#define vqshlq_n_s16(a, __b) __extension__ ({ \
2990 int16x8_t __a = (a); \
2991 (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 33); })
2992#define vqshlq_n_s32(a, __b) __extension__ ({ \
2993 int32x4_t __a = (a); \
2994 (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 34); })
2995#define vqshlq_n_s64(a, __b) __extension__ ({ \
2996 int64x2_t __a = (a); \
2997 (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 35); })
2998#define vqshlq_n_u8(a, __b) __extension__ ({ \
2999 uint8x16_t __a = (a); \
3000 (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 48); })
3001#define vqshlq_n_u16(a, __b) __extension__ ({ \
3002 uint16x8_t __a = (a); \
3003 (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 49); })
3004#define vqshlq_n_u32(a, __b) __extension__ ({ \
3005 uint32x4_t __a = (a); \
3006 (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 50); })
3007#define vqshlq_n_u64(a, __b) __extension__ ({ \
3008 uint64x2_t __a = (a); \
3009 (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 51); })
3010
3011#define vqshrn_n_s16(a, __b) __extension__ ({ \
3012 int16x8_t __a = (a); \
3013 (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); })
3014#define vqshrn_n_s32(a, __b) __extension__ ({ \
3015 int32x4_t __a = (a); \
3016 (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); })
3017#define vqshrn_n_s64(a, __b) __extension__ ({ \
3018 int64x2_t __a = (a); \
3019 (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); })
3020#define vqshrn_n_u16(a, __b) __extension__ ({ \
3021 uint16x8_t __a = (a); \
3022 (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 16); })
3023#define vqshrn_n_u32(a, __b) __extension__ ({ \
3024 uint32x4_t __a = (a); \
3025 (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 17); })
3026#define vqshrn_n_u64(a, __b) __extension__ ({ \
3027 uint64x2_t __a = (a); \
3028 (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 18); })
3029
3030#define vqshrun_n_s16(a, __b) __extension__ ({ \
3031 int16x8_t __a = (a); \
3032 (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 16); })
3033#define vqshrun_n_s32(a, __b) __extension__ ({ \
3034 int32x4_t __a = (a); \
3035 (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 17); })
3036#define vqshrun_n_s64(a, __b) __extension__ ({ \
3037 int64x2_t __a = (a); \
3038 (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 18); })
3039
3040__ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) {
3041 return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); }
3042__ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) {
3043 return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3044__ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) {
3045 return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3046__ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) {
3047 return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3048__ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) {
3049 return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 16); }
3050__ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) {
3051 return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3052__ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) {
3053 return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3054__ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) {
3055 return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3056__ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) {
3057 return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 32); }
3058__ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) {
3059 return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3060__ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) {
3061 return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3062__ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) {
3063 return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3064__ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) {
3065 return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
3066__ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) {
3067 return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
3068__ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) {
3069 return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
3070__ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) {
3071 return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
3072
3073__ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) {
3074 return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
3075__ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) {
3076 return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
3077__ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) {
3078 return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
3079__ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) {
3080 return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
3081__ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) {
3082 return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); }
3083__ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) {
3084 return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); }
3085
3086__ai float32x2_t vrecpe_f32(float32x2_t __a) {
3087 return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 8); }
3088__ai uint32x2_t vrecpe_u32(uint32x2_t __a) {
3089 return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 18); }
3090__ai float32x4_t vrecpeq_f32(float32x4_t __a) {
3091 return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 40); }
3092__ai uint32x4_t vrecpeq_u32(uint32x4_t __a) {
3093 return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 50); }
3094
3095__ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) {
3096 return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 8); }
3097__ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) {
3098 return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
3099
3100__ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) {
3101 return (int8x8_t)__a; }
3102__ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) {
3103 return (int8x8_t)__a; }
3104__ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) {
3105 return (int8x8_t)__a; }
3106__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) {
3107 return (int8x8_t)__a; }
3108__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) {
3109 return (int8x8_t)__a; }
3110__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __a) {
3111 return (int8x8_t)__a; }
3112__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __a) {
3113 return (int8x8_t)__a; }
3114__ai int8x8_t vreinterpret_s8_f16(float16x4_t __a) {
3115 return (int8x8_t)__a; }
3116__ai int8x8_t vreinterpret_s8_f32(float32x2_t __a) {
3117 return (int8x8_t)__a; }
3118__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __a) {
3119 return (int8x8_t)__a; }
3120__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __a) {
3121 return (int8x8_t)__a; }
3122__ai int16x4_t vreinterpret_s16_s8(int8x8_t __a) {
3123 return (int16x4_t)__a; }
3124__ai int16x4_t vreinterpret_s16_s32(int32x2_t __a) {
3125 return (int16x4_t)__a; }
3126__ai int16x4_t vreinterpret_s16_s64(int64x1_t __a) {
3127 return (int16x4_t)__a; }
3128__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __a) {
3129 return (int16x4_t)__a; }
3130__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __a) {
3131 return (int16x4_t)__a; }
3132__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __a) {
3133 return (int16x4_t)__a; }
3134__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __a) {
3135 return (int16x4_t)__a; }
3136__ai int16x4_t vreinterpret_s16_f16(float16x4_t __a) {
3137 return (int16x4_t)__a; }
3138__ai int16x4_t vreinterpret_s16_f32(float32x2_t __a) {
3139 return (int16x4_t)__a; }
3140__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __a) {
3141 return (int16x4_t)__a; }
3142__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __a) {
3143 return (int16x4_t)__a; }
3144__ai int32x2_t vreinterpret_s32_s8(int8x8_t __a) {
3145 return (int32x2_t)__a; }
3146__ai int32x2_t vreinterpret_s32_s16(int16x4_t __a) {
3147 return (int32x2_t)__a; }
3148__ai int32x2_t vreinterpret_s32_s64(int64x1_t __a) {
3149 return (int32x2_t)__a; }
3150__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __a) {
3151 return (int32x2_t)__a; }
3152__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __a) {
3153 return (int32x2_t)__a; }
3154__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __a) {
3155 return (int32x2_t)__a; }
3156__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __a) {
3157 return (int32x2_t)__a; }
3158__ai int32x2_t vreinterpret_s32_f16(float16x4_t __a) {
3159 return (int32x2_t)__a; }
3160__ai int32x2_t vreinterpret_s32_f32(float32x2_t __a) {
3161 return (int32x2_t)__a; }
3162__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __a) {
3163 return (int32x2_t)__a; }
3164__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __a) {
3165 return (int32x2_t)__a; }
3166__ai int64x1_t vreinterpret_s64_s8(int8x8_t __a) {
3167 return (int64x1_t)__a; }
3168__ai int64x1_t vreinterpret_s64_s16(int16x4_t __a) {
3169 return (int64x1_t)__a; }
3170__ai int64x1_t vreinterpret_s64_s32(int32x2_t __a) {
3171 return (int64x1_t)__a; }
3172__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __a) {
3173 return (int64x1_t)__a; }
3174__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __a) {
3175 return (int64x1_t)__a; }
3176__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __a) {
3177 return (int64x1_t)__a; }
3178__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __a) {
3179 return (int64x1_t)__a; }
3180__ai int64x1_t vreinterpret_s64_f16(float16x4_t __a) {
3181 return (int64x1_t)__a; }
3182__ai int64x1_t vreinterpret_s64_f32(float32x2_t __a) {
3183 return (int64x1_t)__a; }
3184__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __a) {
3185 return (int64x1_t)__a; }
3186__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __a) {
3187 return (int64x1_t)__a; }
3188__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __a) {
3189 return (uint8x8_t)__a; }
3190__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __a) {
3191 return (uint8x8_t)__a; }
3192__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __a) {
3193 return (uint8x8_t)__a; }
3194__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __a) {
3195 return (uint8x8_t)__a; }
3196__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __a) {
3197 return (uint8x8_t)__a; }
3198__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __a) {
3199 return (uint8x8_t)__a; }
3200__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __a) {
3201 return (uint8x8_t)__a; }
3202__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __a) {
3203 return (uint8x8_t)__a; }
3204__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __a) {
3205 return (uint8x8_t)__a; }
3206__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __a) {
3207 return (uint8x8_t)__a; }
3208__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __a) {
3209 return (uint8x8_t)__a; }
3210__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __a) {
3211 return (uint16x4_t)__a; }
3212__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __a) {
3213 return (uint16x4_t)__a; }
3214__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __a) {
3215 return (uint16x4_t)__a; }
3216__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __a) {
3217 return (uint16x4_t)__a; }
3218__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __a) {
3219 return (uint16x4_t)__a; }
3220__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __a) {
3221 return (uint16x4_t)__a; }
3222__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __a) {
3223 return (uint16x4_t)__a; }
3224__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __a) {
3225 return (uint16x4_t)__a; }
3226__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __a) {
3227 return (uint16x4_t)__a; }
3228__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __a) {
3229 return (uint16x4_t)__a; }
3230__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __a) {
3231 return (uint16x4_t)__a; }
3232__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __a) {
3233 return (uint32x2_t)__a; }
3234__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __a) {
3235 return (uint32x2_t)__a; }
3236__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __a) {
3237 return (uint32x2_t)__a; }
3238__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __a) {
3239 return (uint32x2_t)__a; }
3240__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __a) {
3241 return (uint32x2_t)__a; }
3242__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __a) {
3243 return (uint32x2_t)__a; }
3244__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __a) {
3245 return (uint32x2_t)__a; }
3246__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __a) {
3247 return (uint32x2_t)__a; }
3248__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __a) {
3249 return (uint32x2_t)__a; }
3250__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __a) {
3251 return (uint32x2_t)__a; }
3252__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __a) {
3253 return (uint32x2_t)__a; }
3254__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __a) {
3255 return (uint64x1_t)__a; }
3256__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __a) {
3257 return (uint64x1_t)__a; }
3258__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __a) {
3259 return (uint64x1_t)__a; }
3260__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __a) {
3261 return (uint64x1_t)__a; }
3262__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __a) {
3263 return (uint64x1_t)__a; }
3264__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __a) {
3265 return (uint64x1_t)__a; }
3266__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __a) {
3267 return (uint64x1_t)__a; }
3268__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __a) {
3269 return (uint64x1_t)__a; }
3270__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __a) {
3271 return (uint64x1_t)__a; }
3272__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __a) {
3273 return (uint64x1_t)__a; }
3274__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __a) {
3275 return (uint64x1_t)__a; }
3276__ai float16x4_t vreinterpret_f16_s8(int8x8_t __a) {
3277 return (float16x4_t)__a; }
3278__ai float16x4_t vreinterpret_f16_s16(int16x4_t __a) {
3279 return (float16x4_t)__a; }
3280__ai float16x4_t vreinterpret_f16_s32(int32x2_t __a) {
3281 return (float16x4_t)__a; }
3282__ai float16x4_t vreinterpret_f16_s64(int64x1_t __a) {
3283 return (float16x4_t)__a; }
3284__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __a) {
3285 return (float16x4_t)__a; }
3286__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __a) {
3287 return (float16x4_t)__a; }
3288__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __a) {
3289 return (float16x4_t)__a; }
3290__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __a) {
3291 return (float16x4_t)__a; }
3292__ai float16x4_t vreinterpret_f16_f32(float32x2_t __a) {
3293 return (float16x4_t)__a; }
3294__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __a) {
3295 return (float16x4_t)__a; }
3296__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __a) {
3297 return (float16x4_t)__a; }
3298__ai float32x2_t vreinterpret_f32_s8(int8x8_t __a) {
3299 return (float32x2_t)__a; }
3300__ai float32x2_t vreinterpret_f32_s16(int16x4_t __a) {
3301 return (float32x2_t)__a; }
3302__ai float32x2_t vreinterpret_f32_s32(int32x2_t __a) {
3303 return (float32x2_t)__a; }
3304__ai float32x2_t vreinterpret_f32_s64(int64x1_t __a) {
3305 return (float32x2_t)__a; }
3306__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __a) {
3307 return (float32x2_t)__a; }
3308__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __a) {
3309 return (float32x2_t)__a; }
3310__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __a) {
3311 return (float32x2_t)__a; }
3312__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __a) {
3313 return (float32x2_t)__a; }
3314__ai float32x2_t vreinterpret_f32_f16(float16x4_t __a) {
3315 return (float32x2_t)__a; }
3316__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __a) {
3317 return (float32x2_t)__a; }
3318__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __a) {
3319 return (float32x2_t)__a; }
3320__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __a) {
3321 return (poly8x8_t)__a; }
3322__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __a) {
3323 return (poly8x8_t)__a; }
3324__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __a) {
3325 return (poly8x8_t)__a; }
3326__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __a) {
3327 return (poly8x8_t)__a; }
3328__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __a) {
3329 return (poly8x8_t)__a; }
3330__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __a) {
3331 return (poly8x8_t)__a; }
3332__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __a) {
3333 return (poly8x8_t)__a; }
3334__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __a) {
3335 return (poly8x8_t)__a; }
3336__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __a) {
3337 return (poly8x8_t)__a; }
3338__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __a) {
3339 return (poly8x8_t)__a; }
3340__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __a) {
3341 return (poly8x8_t)__a; }
3342__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __a) {
3343 return (poly16x4_t)__a; }
3344__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __a) {
3345 return (poly16x4_t)__a; }
3346__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __a) {
3347 return (poly16x4_t)__a; }
3348__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __a) {
3349 return (poly16x4_t)__a; }
3350__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __a) {
3351 return (poly16x4_t)__a; }
3352__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __a) {
3353 return (poly16x4_t)__a; }
3354__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __a) {
3355 return (poly16x4_t)__a; }
3356__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __a) {
3357 return (poly16x4_t)__a; }
3358__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __a) {
3359 return (poly16x4_t)__a; }
3360__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __a) {
3361 return (poly16x4_t)__a; }
3362__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __a) {
3363 return (poly16x4_t)__a; }
3364__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __a) {
3365 return (int8x16_t)__a; }
3366__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __a) {
3367 return (int8x16_t)__a; }
3368__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __a) {
3369 return (int8x16_t)__a; }
3370__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __a) {
3371 return (int8x16_t)__a; }
3372__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __a) {
3373 return (int8x16_t)__a; }
3374__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __a) {
3375 return (int8x16_t)__a; }
3376__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __a) {
3377 return (int8x16_t)__a; }
3378__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __a) {
3379 return (int8x16_t)__a; }
3380__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __a) {
3381 return (int8x16_t)__a; }
3382__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __a) {
3383 return (int8x16_t)__a; }
3384__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __a) {
3385 return (int8x16_t)__a; }
3386__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __a) {
3387 return (int16x8_t)__a; }
3388__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __a) {
3389 return (int16x8_t)__a; }
3390__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __a) {
3391 return (int16x8_t)__a; }
3392__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __a) {
3393 return (int16x8_t)__a; }
3394__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __a) {
3395 return (int16x8_t)__a; }
3396__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __a) {
3397 return (int16x8_t)__a; }
3398__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __a) {
3399 return (int16x8_t)__a; }
3400__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __a) {
3401 return (int16x8_t)__a; }
3402__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __a) {
3403 return (int16x8_t)__a; }
3404__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __a) {
3405 return (int16x8_t)__a; }
3406__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __a) {
3407 return (int16x8_t)__a; }
3408__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __a) {
3409 return (int32x4_t)__a; }
3410__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __a) {
3411 return (int32x4_t)__a; }
3412__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __a) {
3413 return (int32x4_t)__a; }
3414__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __a) {
3415 return (int32x4_t)__a; }
3416__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __a) {
3417 return (int32x4_t)__a; }
3418__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __a) {
3419 return (int32x4_t)__a; }
3420__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __a) {
3421 return (int32x4_t)__a; }
3422__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __a) {
3423 return (int32x4_t)__a; }
3424__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __a) {
3425 return (int32x4_t)__a; }
3426__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __a) {
3427 return (int32x4_t)__a; }
3428__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __a) {
3429 return (int32x4_t)__a; }
3430__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __a) {
3431 return (int64x2_t)__a; }
3432__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __a) {
3433 return (int64x2_t)__a; }
3434__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __a) {
3435 return (int64x2_t)__a; }
3436__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __a) {
3437 return (int64x2_t)__a; }
3438__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __a) {
3439 return (int64x2_t)__a; }
3440__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __a) {
3441 return (int64x2_t)__a; }
3442__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __a) {
3443 return (int64x2_t)__a; }
3444__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __a) {
3445 return (int64x2_t)__a; }
3446__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __a) {
3447 return (int64x2_t)__a; }
3448__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __a) {
3449 return (int64x2_t)__a; }
3450__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __a) {
3451 return (int64x2_t)__a; }
3452__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __a) {
3453 return (uint8x16_t)__a; }
3454__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __a) {
3455 return (uint8x16_t)__a; }
3456__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __a) {
3457 return (uint8x16_t)__a; }
3458__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __a) {
3459 return (uint8x16_t)__a; }
3460__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __a) {
3461 return (uint8x16_t)__a; }
3462__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __a) {
3463 return (uint8x16_t)__a; }
3464__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __a) {
3465 return (uint8x16_t)__a; }
3466__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __a) {
3467 return (uint8x16_t)__a; }
3468__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __a) {
3469 return (uint8x16_t)__a; }
3470__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __a) {
3471 return (uint8x16_t)__a; }
3472__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __a) {
3473 return (uint8x16_t)__a; }
3474__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __a) {
3475 return (uint16x8_t)__a; }
3476__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __a) {
3477 return (uint16x8_t)__a; }
3478__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __a) {
3479 return (uint16x8_t)__a; }
3480__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __a) {
3481 return (uint16x8_t)__a; }
3482__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __a) {
3483 return (uint16x8_t)__a; }
3484__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __a) {
3485 return (uint16x8_t)__a; }
3486__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __a) {
3487 return (uint16x8_t)__a; }
3488__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __a) {
3489 return (uint16x8_t)__a; }
3490__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __a) {
3491 return (uint16x8_t)__a; }
3492__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __a) {
3493 return (uint16x8_t)__a; }
3494__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __a) {
3495 return (uint16x8_t)__a; }
3496__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __a) {
3497 return (uint32x4_t)__a; }
3498__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __a) {
3499 return (uint32x4_t)__a; }
3500__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __a) {
3501 return (uint32x4_t)__a; }
3502__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __a) {
3503 return (uint32x4_t)__a; }
3504__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __a) {
3505 return (uint32x4_t)__a; }
3506__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __a) {
3507 return (uint32x4_t)__a; }
3508__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __a) {
3509 return (uint32x4_t)__a; }
3510__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __a) {
3511 return (uint32x4_t)__a; }
3512__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __a) {
3513 return (uint32x4_t)__a; }
3514__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __a) {
3515 return (uint32x4_t)__a; }
3516__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __a) {
3517 return (uint32x4_t)__a; }
3518__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __a) {
3519 return (uint64x2_t)__a; }
3520__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __a) {
3521 return (uint64x2_t)__a; }
3522__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __a) {
3523 return (uint64x2_t)__a; }
3524__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __a) {
3525 return (uint64x2_t)__a; }
3526__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __a) {
3527 return (uint64x2_t)__a; }
3528__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __a) {
3529 return (uint64x2_t)__a; }
3530__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __a) {
3531 return (uint64x2_t)__a; }
3532__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __a) {
3533 return (uint64x2_t)__a; }
3534__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __a) {
3535 return (uint64x2_t)__a; }
3536__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __a) {
3537 return (uint64x2_t)__a; }
3538__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __a) {
3539 return (uint64x2_t)__a; }
3540__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __a) {
3541 return (float16x8_t)__a; }
3542__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __a) {
3543 return (float16x8_t)__a; }
3544__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __a) {
3545 return (float16x8_t)__a; }
3546__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __a) {
3547 return (float16x8_t)__a; }
3548__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __a) {
3549 return (float16x8_t)__a; }
3550__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __a) {
3551 return (float16x8_t)__a; }
3552__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __a) {
3553 return (float16x8_t)__a; }
3554__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __a) {
3555 return (float16x8_t)__a; }
3556__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __a) {
3557 return (float16x8_t)__a; }
3558__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __a) {
3559 return (float16x8_t)__a; }
3560__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __a) {
3561 return (float16x8_t)__a; }
3562__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __a) {
3563 return (float32x4_t)__a; }
3564__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __a) {
3565 return (float32x4_t)__a; }
3566__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __a) {
3567 return (float32x4_t)__a; }
3568__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __a) {
3569 return (float32x4_t)__a; }
3570__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __a) {
3571 return (float32x4_t)__a; }
3572__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __a) {
3573 return (float32x4_t)__a; }
3574__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __a) {
3575 return (float32x4_t)__a; }
3576__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __a) {
3577 return (float32x4_t)__a; }
3578__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __a) {
3579 return (float32x4_t)__a; }
3580__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __a) {
3581 return (float32x4_t)__a; }
3582__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __a) {
3583 return (float32x4_t)__a; }
3584__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __a) {
3585 return (poly8x16_t)__a; }
3586__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __a) {
3587 return (poly8x16_t)__a; }
3588__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __a) {
3589 return (poly8x16_t)__a; }
3590__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __a) {
3591 return (poly8x16_t)__a; }
3592__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __a) {
3593 return (poly8x16_t)__a; }
3594__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __a) {
3595 return (poly8x16_t)__a; }
3596__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __a) {
3597 return (poly8x16_t)__a; }
3598__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __a) {
3599 return (poly8x16_t)__a; }
3600__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __a) {
3601 return (poly8x16_t)__a; }
3602__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __a) {
3603 return (poly8x16_t)__a; }
3604__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __a) {
3605 return (poly8x16_t)__a; }
3606__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __a) {
3607 return (poly16x8_t)__a; }
3608__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __a) {
3609 return (poly16x8_t)__a; }
3610__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __a) {
3611 return (poly16x8_t)__a; }
3612__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __a) {
3613 return (poly16x8_t)__a; }
3614__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __a) {
3615 return (poly16x8_t)__a; }
3616__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __a) {
3617 return (poly16x8_t)__a; }
3618__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __a) {
3619 return (poly16x8_t)__a; }
3620__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __a) {
3621 return (poly16x8_t)__a; }
3622__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __a) {
3623 return (poly16x8_t)__a; }
3624__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __a) {
3625 return (poly16x8_t)__a; }
3626__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __a) {
3627 return (poly16x8_t)__a; }
3628
3629__ai int8x8_t vrev16_s8(int8x8_t __a) {
3630 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3631__ai uint8x8_t vrev16_u8(uint8x8_t __a) {
3632 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3633__ai poly8x8_t vrev16_p8(poly8x8_t __a) {
3634 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3635__ai int8x16_t vrev16q_s8(int8x16_t __a) {
3636 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); }
3637__ai uint8x16_t vrev16q_u8(uint8x16_t __a) {
3638 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); }
3639__ai poly8x16_t vrev16q_p8(poly8x16_t __a) {
3640 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); }
3641
3642__ai int8x8_t vrev32_s8(int8x8_t __a) {
3643 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3644__ai int16x4_t vrev32_s16(int16x4_t __a) {
3645 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3646__ai uint8x8_t vrev32_u8(uint8x8_t __a) {
3647 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3648__ai uint16x4_t vrev32_u16(uint16x4_t __a) {
3649 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3650__ai poly8x8_t vrev32_p8(poly8x8_t __a) {
3651 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3652__ai poly16x4_t vrev32_p16(poly16x4_t __a) {
3653 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3654__ai int8x16_t vrev32q_s8(int8x16_t __a) {
3655 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); }
3656__ai int16x8_t vrev32q_s16(int16x8_t __a) {
3657 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3658__ai uint8x16_t vrev32q_u8(uint8x16_t __a) {
3659 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); }
3660__ai uint16x8_t vrev32q_u16(uint16x8_t __a) {
3661 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3662__ai poly8x16_t vrev32q_p8(poly8x16_t __a) {
3663 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); }
3664__ai poly16x8_t vrev32q_p16(poly16x8_t __a) {
3665 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3666
3667__ai int8x8_t vrev64_s8(int8x8_t __a) {
3668 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); }
3669__ai int16x4_t vrev64_s16(int16x4_t __a) {
3670 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); }
3671__ai int32x2_t vrev64_s32(int32x2_t __a) {
3672 return __builtin_shufflevector(__a, __a, 1, 0); }
3673__ai uint8x8_t vrev64_u8(uint8x8_t __a) {
3674 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); }
3675__ai uint16x4_t vrev64_u16(uint16x4_t __a) {
3676 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); }
3677__ai uint32x2_t vrev64_u32(uint32x2_t __a) {
3678 return __builtin_shufflevector(__a, __a, 1, 0); }
3679__ai poly8x8_t vrev64_p8(poly8x8_t __a) {
3680 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); }
3681__ai poly16x4_t vrev64_p16(poly16x4_t __a) {
3682 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); }
3683__ai float32x2_t vrev64_f32(float32x2_t __a) {
3684 return __builtin_shufflevector(__a, __a, 1, 0); }
3685__ai int8x16_t vrev64q_s8(int8x16_t __a) {
3686 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); }
3687__ai int16x8_t vrev64q_s16(int16x8_t __a) {
3688 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3689__ai int32x4_t vrev64q_s32(int32x4_t __a) {
3690 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3691__ai uint8x16_t vrev64q_u8(uint8x16_t __a) {
3692 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); }
3693__ai uint16x8_t vrev64q_u16(uint16x8_t __a) {
3694 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3695__ai uint32x4_t vrev64q_u32(uint32x4_t __a) {
3696 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3697__ai poly8x16_t vrev64q_p8(poly8x16_t __a) {
3698 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); }
3699__ai poly16x8_t vrev64q_p16(poly16x8_t __a) {
3700 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3701__ai float32x4_t vrev64q_f32(float32x4_t __a) {
3702 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3703
3704__ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) {
3705 return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); }
3706__ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) {
3707 return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3708__ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) {
3709 return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3710__ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) {
3711 return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
3712__ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) {
3713 return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3714__ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) {
3715 return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3716__ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) {
3717 return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 32); }
3718__ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) {
3719 return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3720__ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) {
3721 return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3722__ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) {
3723 return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
3724__ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) {
3725 return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
3726__ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) {
3727 return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
3728
3729__ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) {
3730 return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); }
3731__ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) {
3732 return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3733__ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) {
3734 return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3735__ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) {
3736 return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3737__ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) {
3738 return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 16); }
3739__ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) {
3740 return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3741__ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) {
3742 return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3743__ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) {
3744 return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3745__ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) {
3746 return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 32); }
3747__ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) {
3748 return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3749__ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) {
3750 return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3751__ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) {
3752 return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3753__ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) {
3754 return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 48); }
3755__ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) {
3756 return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
3757__ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) {
3758 return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
3759__ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) {
3760 return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
3761
3762#define vrshrn_n_s16(a, __b) __extension__ ({ \
3763 int16x8_t __a = (a); \
3764 (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); })
3765#define vrshrn_n_s32(a, __b) __extension__ ({ \
3766 int32x4_t __a = (a); \
3767 (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); })
3768#define vrshrn_n_s64(a, __b) __extension__ ({ \
3769 int64x2_t __a = (a); \
3770 (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); })
3771#define vrshrn_n_u16(a, __b) __extension__ ({ \
3772 uint16x8_t __a = (a); \
3773 (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 16); })
3774#define vrshrn_n_u32(a, __b) __extension__ ({ \
3775 uint32x4_t __a = (a); \
3776 (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 17); })
3777#define vrshrn_n_u64(a, __b) __extension__ ({ \
3778 uint64x2_t __a = (a); \
3779 (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 18); })
3780
3781#define vrshr_n_s8(a, __b) __extension__ ({ \
3782 int8x8_t __a = (a); \
3783 (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); })
3784#define vrshr_n_s16(a, __b) __extension__ ({ \
3785 int16x4_t __a = (a); \
3786 (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); })
3787#define vrshr_n_s32(a, __b) __extension__ ({ \
3788 int32x2_t __a = (a); \
3789 (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); })
3790#define vrshr_n_s64(a, __b) __extension__ ({ \
3791 int64x1_t __a = (a); \
3792 (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); })
3793#define vrshr_n_u8(a, __b) __extension__ ({ \
3794 uint8x8_t __a = (a); \
3795 (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 16); })
3796#define vrshr_n_u16(a, __b) __extension__ ({ \
3797 uint16x4_t __a = (a); \
3798 (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 17); })
3799#define vrshr_n_u32(a, __b) __extension__ ({ \
3800 uint32x2_t __a = (a); \
3801 (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 18); })
3802#define vrshr_n_u64(a, __b) __extension__ ({ \
3803 uint64x1_t __a = (a); \
3804 (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 19); })
3805#define vrshrq_n_s8(a, __b) __extension__ ({ \
3806 int8x16_t __a = (a); \
3807 (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 32); })
3808#define vrshrq_n_s16(a, __b) __extension__ ({ \
3809 int16x8_t __a = (a); \
3810 (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 33); })
3811#define vrshrq_n_s32(a, __b) __extension__ ({ \
3812 int32x4_t __a = (a); \
3813 (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 34); })
3814#define vrshrq_n_s64(a, __b) __extension__ ({ \
3815 int64x2_t __a = (a); \
3816 (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 35); })
3817#define vrshrq_n_u8(a, __b) __extension__ ({ \
3818 uint8x16_t __a = (a); \
3819 (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 48); })
3820#define vrshrq_n_u16(a, __b) __extension__ ({ \
3821 uint16x8_t __a = (a); \
3822 (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 49); })
3823#define vrshrq_n_u32(a, __b) __extension__ ({ \
3824 uint32x4_t __a = (a); \
3825 (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 50); })
3826#define vrshrq_n_u64(a, __b) __extension__ ({ \
3827 uint64x2_t __a = (a); \
3828 (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 51); })
3829
3830__ai float32x2_t vrsqrte_f32(float32x2_t __a) {
3831 return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 8); }
3832__ai uint32x2_t vrsqrte_u32(uint32x2_t __a) {
3833 return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 18); }
3834__ai float32x4_t vrsqrteq_f32(float32x4_t __a) {
3835 return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 40); }
3836__ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) {
3837 return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 50); }
3838
3839__ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) {
3840 return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 8); }
3841__ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) {
3842 return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
3843
3844#define vrsra_n_s8(a, b, __c) __extension__ ({ \
3845 int8x8_t __a = (a); int8x8_t __b = (b); \
3846 (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); })
3847#define vrsra_n_s16(a, b, __c) __extension__ ({ \
3848 int16x4_t __a = (a); int16x4_t __b = (b); \
3849 (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
3850#define vrsra_n_s32(a, b, __c) __extension__ ({ \
3851 int32x2_t __a = (a); int32x2_t __b = (b); \
3852 (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
3853#define vrsra_n_s64(a, b, __c) __extension__ ({ \
3854 int64x1_t __a = (a); int64x1_t __b = (b); \
3855 (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
3856#define vrsra_n_u8(a, b, __c) __extension__ ({ \
3857 uint8x8_t __a = (a); uint8x8_t __b = (b); \
3858 (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
3859#define vrsra_n_u16(a, b, __c) __extension__ ({ \
3860 uint16x4_t __a = (a); uint16x4_t __b = (b); \
3861 (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
3862#define vrsra_n_u32(a, b, __c) __extension__ ({ \
3863 uint32x2_t __a = (a); uint32x2_t __b = (b); \
3864 (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
3865#define vrsra_n_u64(a, b, __c) __extension__ ({ \
3866 uint64x1_t __a = (a); uint64x1_t __b = (b); \
3867 (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
3868#define vrsraq_n_s8(a, b, __c) __extension__ ({ \
3869 int8x16_t __a = (a); int8x16_t __b = (b); \
3870 (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 32); })
3871#define vrsraq_n_s16(a, b, __c) __extension__ ({ \
3872 int16x8_t __a = (a); int16x8_t __b = (b); \
3873 (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
3874#define vrsraq_n_s32(a, b, __c) __extension__ ({ \
3875 int32x4_t __a = (a); int32x4_t __b = (b); \
3876 (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
3877#define vrsraq_n_s64(a, b, __c) __extension__ ({ \
3878 int64x2_t __a = (a); int64x2_t __b = (b); \
3879 (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
3880#define vrsraq_n_u8(a, b, __c) __extension__ ({ \
3881 uint8x16_t __a = (a); uint8x16_t __b = (b); \
3882 (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
3883#define vrsraq_n_u16(a, b, __c) __extension__ ({ \
3884 uint16x8_t __a = (a); uint16x8_t __b = (b); \
3885 (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
3886#define vrsraq_n_u32(a, b, __c) __extension__ ({ \
3887 uint32x4_t __a = (a); uint32x4_t __b = (b); \
3888 (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
3889#define vrsraq_n_u64(a, b, __c) __extension__ ({ \
3890 uint64x2_t __a = (a); uint64x2_t __b = (b); \
3891 (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
3892
3893__ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) {
3894 return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
3895__ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) {
3896 return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
3897__ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) {
3898 return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
3899__ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) {
3900 return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
3901__ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) {
3902 return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); }
3903__ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) {
3904 return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); }
3905
3906#define vset_lane_u8(a, b, __c) __extension__ ({ \
3907 uint8_t __a = (a); uint8x8_t __b = (b); \
3908 (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); })
3909#define vset_lane_u16(a, b, __c) __extension__ ({ \
3910 uint16_t __a = (a); uint16x4_t __b = (b); \
3911 (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); })
3912#define vset_lane_u32(a, b, __c) __extension__ ({ \
3913 uint32_t __a = (a); uint32x2_t __b = (b); \
3914 (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); })
3915#define vset_lane_s8(a, b, __c) __extension__ ({ \
3916 int8_t __a = (a); int8x8_t __b = (b); \
3917 (int8x8_t)__builtin_neon_vset_lane_i8(__a, __b, __c); })
3918#define vset_lane_s16(a, b, __c) __extension__ ({ \
3919 int16_t __a = (a); int16x4_t __b = (b); \
3920 (int16x4_t)__builtin_neon_vset_lane_i16(__a, __b, __c); })
3921#define vset_lane_s32(a, b, __c) __extension__ ({ \
3922 int32_t __a = (a); int32x2_t __b = (b); \
3923 (int32x2_t)__builtin_neon_vset_lane_i32(__a, __b, __c); })
3924#define vset_lane_p8(a, b, __c) __extension__ ({ \
3925 poly8_t __a = (a); poly8x8_t __b = (b); \
3926 (poly8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); })
3927#define vset_lane_p16(a, b, __c) __extension__ ({ \
3928 poly16_t __a = (a); poly16x4_t __b = (b); \
3929 (poly16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); })
3930#define vset_lane_f32(a, b, __c) __extension__ ({ \
3931 float32_t __a = (a); float32x2_t __b = (b); \
3932 (float32x2_t)__builtin_neon_vset_lane_f32(__a, __b, __c); })
3933#define vsetq_lane_u8(a, b, __c) __extension__ ({ \
3934 uint8_t __a = (a); uint8x16_t __b = (b); \
3935 (uint8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); })
3936#define vsetq_lane_u16(a, b, __c) __extension__ ({ \
3937 uint16_t __a = (a); uint16x8_t __b = (b); \
3938 (uint16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); })
3939#define vsetq_lane_u32(a, b, __c) __extension__ ({ \
3940 uint32_t __a = (a); uint32x4_t __b = (b); \
3941 (uint32x4_t)__builtin_neon_vsetq_lane_i32(__a, (int32x4_t)__b, __c); })
3942#define vsetq_lane_s8(a, b, __c) __extension__ ({ \
3943 int8_t __a = (a); int8x16_t __b = (b); \
3944 (int8x16_t)__builtin_neon_vsetq_lane_i8(__a, __b, __c); })
3945#define vsetq_lane_s16(a, b, __c) __extension__ ({ \
3946 int16_t __a = (a); int16x8_t __b = (b); \
3947 (int16x8_t)__builtin_neon_vsetq_lane_i16(__a, __b, __c); })
3948#define vsetq_lane_s32(a, b, __c) __extension__ ({ \
3949 int32_t __a = (a); int32x4_t __b = (b); \
3950 (int32x4_t)__builtin_neon_vsetq_lane_i32(__a, __b, __c); })
3951#define vsetq_lane_p8(a, b, __c) __extension__ ({ \
3952 poly8_t __a = (a); poly8x16_t __b = (b); \
3953 (poly8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); })
3954#define vsetq_lane_p16(a, b, __c) __extension__ ({ \
3955 poly16_t __a = (a); poly16x8_t __b = (b); \
3956 (poly16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); })
3957#define vsetq_lane_f32(a, b, __c) __extension__ ({ \
3958 float32_t __a = (a); float32x4_t __b = (b); \
3959 (float32x4_t)__builtin_neon_vsetq_lane_f32(__a, __b, __c); })
3960#define vset_lane_s64(a, b, __c) __extension__ ({ \
3961 int64_t __a = (a); int64x1_t __b = (b); \
3962 (int64x1_t)__builtin_neon_vset_lane_i64(__a, __b, __c); })
3963#define vset_lane_u64(a, b, __c) __extension__ ({ \
3964 uint64_t __a = (a); uint64x1_t __b = (b); \
3965 (uint64x1_t)__builtin_neon_vset_lane_i64(__a, (int64x1_t)__b, __c); })
3966#define vsetq_lane_s64(a, b, __c) __extension__ ({ \
3967 int64_t __a = (a); int64x2_t __b = (b); \
3968 (int64x2_t)__builtin_neon_vsetq_lane_i64(__a, __b, __c); })
3969#define vsetq_lane_u64(a, b, __c) __extension__ ({ \
3970 uint64_t __a = (a); uint64x2_t __b = (b); \
3971 (uint64x2_t)__builtin_neon_vsetq_lane_i64(__a, (int64x2_t)__b, __c); })
3972
3973__ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) {
3974 return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); }
3975__ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) {
3976 return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3977__ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) {
3978 return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3979__ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) {
3980 return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3981__ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) {
3982 return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 16); }
3983__ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) {
3984 return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3985__ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) {
3986 return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3987__ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) {
3988 return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3989__ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) {
3990 return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 32); }
3991__ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) {
3992 return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3993__ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) {
3994 return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3995__ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) {
3996 return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3997__ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) {
3998 return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 48); }
3999__ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) {
4000 return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
4001__ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) {
4002 return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
4003__ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) {
4004 return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
4005
4006#define vshll_n_s8(a, __b) __extension__ ({ \
4007 int8x8_t __a = (a); \
4008 (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 33); })
4009#define vshll_n_s16(a, __b) __extension__ ({ \
4010 int16x4_t __a = (a); \
4011 (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 34); })
4012#define vshll_n_s32(a, __b) __extension__ ({ \
4013 int32x2_t __a = (a); \
4014 (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 35); })
4015#define vshll_n_u8(a, __b) __extension__ ({ \
4016 uint8x8_t __a = (a); \
4017 (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 49); })
4018#define vshll_n_u16(a, __b) __extension__ ({ \
4019 uint16x4_t __a = (a); \
4020 (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 50); })
4021#define vshll_n_u32(a, __b) __extension__ ({ \
4022 uint32x2_t __a = (a); \
4023 (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 51); })
4024
4025#define vshl_n_s8(a, __b) __extension__ ({ \
4026 int8x8_t __a = (a); \
4027 (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); })
4028#define vshl_n_s16(a, __b) __extension__ ({ \
4029 int16x4_t __a = (a); \
4030 (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); })
4031#define vshl_n_s32(a, __b) __extension__ ({ \
4032 int32x2_t __a = (a); \
4033 (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); })
4034#define vshl_n_s64(a, __b) __extension__ ({ \
4035 int64x1_t __a = (a); \
4036 (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); })
4037#define vshl_n_u8(a, __b) __extension__ ({ \
4038 uint8x8_t __a = (a); \
4039 (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 16); })
4040#define vshl_n_u16(a, __b) __extension__ ({ \
4041 uint16x4_t __a = (a); \
4042 (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 17); })
4043#define vshl_n_u32(a, __b) __extension__ ({ \
4044 uint32x2_t __a = (a); \
4045 (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 18); })
4046#define vshl_n_u64(a, __b) __extension__ ({ \
4047 uint64x1_t __a = (a); \
4048 (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 19); })
4049#define vshlq_n_s8(a, __b) __extension__ ({ \
4050 int8x16_t __a = (a); \
4051 (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 32); })
4052#define vshlq_n_s16(a, __b) __extension__ ({ \
4053 int16x8_t __a = (a); \
4054 (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 33); })
4055#define vshlq_n_s32(a, __b) __extension__ ({ \
4056 int32x4_t __a = (a); \
4057 (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 34); })
4058#define vshlq_n_s64(a, __b) __extension__ ({ \
4059 int64x2_t __a = (a); \
4060 (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 35); })
4061#define vshlq_n_u8(a, __b) __extension__ ({ \
4062 uint8x16_t __a = (a); \
4063 (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 48); })
4064#define vshlq_n_u16(a, __b) __extension__ ({ \
4065 uint16x8_t __a = (a); \
4066 (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 49); })
4067#define vshlq_n_u32(a, __b) __extension__ ({ \
4068 uint32x4_t __a = (a); \
4069 (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 50); })
4070#define vshlq_n_u64(a, __b) __extension__ ({ \
4071 uint64x2_t __a = (a); \
4072 (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 51); })
4073
4074#define vshrn_n_s16(a, __b) __extension__ ({ \
4075 int16x8_t __a = (a); \
4076 (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); })
4077#define vshrn_n_s32(a, __b) __extension__ ({ \
4078 int32x4_t __a = (a); \
4079 (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); })
4080#define vshrn_n_s64(a, __b) __extension__ ({ \
4081 int64x2_t __a = (a); \
4082 (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); })
4083#define vshrn_n_u16(a, __b) __extension__ ({ \
4084 uint16x8_t __a = (a); \
4085 (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 16); })
4086#define vshrn_n_u32(a, __b) __extension__ ({ \
4087 uint32x4_t __a = (a); \
4088 (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 17); })
4089#define vshrn_n_u64(a, __b) __extension__ ({ \
4090 uint64x2_t __a = (a); \
4091 (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 18); })
4092
4093#define vshr_n_s8(a, __b) __extension__ ({ \
4094 int8x8_t __a = (a); \
4095 (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); })
4096#define vshr_n_s16(a, __b) __extension__ ({ \
4097 int16x4_t __a = (a); \
4098 (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); })
4099#define vshr_n_s32(a, __b) __extension__ ({ \
4100 int32x2_t __a = (a); \
4101 (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); })
4102#define vshr_n_s64(a, __b) __extension__ ({ \
4103 int64x1_t __a = (a); \
4104 (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); })
4105#define vshr_n_u8(a, __b) __extension__ ({ \
4106 uint8x8_t __a = (a); \
4107 (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 16); })
4108#define vshr_n_u16(a, __b) __extension__ ({ \
4109 uint16x4_t __a = (a); \
4110 (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 17); })
4111#define vshr_n_u32(a, __b) __extension__ ({ \
4112 uint32x2_t __a = (a); \
4113 (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 18); })
4114#define vshr_n_u64(a, __b) __extension__ ({ \
4115 uint64x1_t __a = (a); \
4116 (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 19); })
4117#define vshrq_n_s8(a, __b) __extension__ ({ \
4118 int8x16_t __a = (a); \
4119 (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 32); })
4120#define vshrq_n_s16(a, __b) __extension__ ({ \
4121 int16x8_t __a = (a); \
4122 (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 33); })
4123#define vshrq_n_s32(a, __b) __extension__ ({ \
4124 int32x4_t __a = (a); \
4125 (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 34); })
4126#define vshrq_n_s64(a, __b) __extension__ ({ \
4127 int64x2_t __a = (a); \
4128 (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 35); })
4129#define vshrq_n_u8(a, __b) __extension__ ({ \
4130 uint8x16_t __a = (a); \
4131 (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 48); })
4132#define vshrq_n_u16(a, __b) __extension__ ({ \
4133 uint16x8_t __a = (a); \
4134 (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 49); })
4135#define vshrq_n_u32(a, __b) __extension__ ({ \
4136 uint32x4_t __a = (a); \
4137 (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 50); })
4138#define vshrq_n_u64(a, __b) __extension__ ({ \
4139 uint64x2_t __a = (a); \
4140 (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 51); })
4141
4142#define vsli_n_s8(a, b, __c) __extension__ ({ \
4143 int8x8_t __a = (a); int8x8_t __b = (b); \
4144 (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); })
4145#define vsli_n_s16(a, b, __c) __extension__ ({ \
4146 int16x4_t __a = (a); int16x4_t __b = (b); \
4147 (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4148#define vsli_n_s32(a, b, __c) __extension__ ({ \
4149 int32x2_t __a = (a); int32x2_t __b = (b); \
4150 (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4151#define vsli_n_s64(a, b, __c) __extension__ ({ \
4152 int64x1_t __a = (a); int64x1_t __b = (b); \
4153 (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4154#define vsli_n_u8(a, b, __c) __extension__ ({ \
4155 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4156 (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4157#define vsli_n_u16(a, b, __c) __extension__ ({ \
4158 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4159 (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4160#define vsli_n_u32(a, b, __c) __extension__ ({ \
4161 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4162 (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4163#define vsli_n_u64(a, b, __c) __extension__ ({ \
4164 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4165 (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4166#define vsli_n_p8(a, b, __c) __extension__ ({ \
4167 poly8x8_t __a = (a); poly8x8_t __b = (b); \
4168 (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
4169#define vsli_n_p16(a, b, __c) __extension__ ({ \
4170 poly16x4_t __a = (a); poly16x4_t __b = (b); \
4171 (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
4172#define vsliq_n_s8(a, b, __c) __extension__ ({ \
4173 int8x16_t __a = (a); int8x16_t __b = (b); \
4174 (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 32); })
4175#define vsliq_n_s16(a, b, __c) __extension__ ({ \
4176 int16x8_t __a = (a); int16x8_t __b = (b); \
4177 (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4178#define vsliq_n_s32(a, b, __c) __extension__ ({ \
4179 int32x4_t __a = (a); int32x4_t __b = (b); \
4180 (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4181#define vsliq_n_s64(a, b, __c) __extension__ ({ \
4182 int64x2_t __a = (a); int64x2_t __b = (b); \
4183 (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4184#define vsliq_n_u8(a, b, __c) __extension__ ({ \
4185 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4186 (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4187#define vsliq_n_u16(a, b, __c) __extension__ ({ \
4188 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4189 (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4190#define vsliq_n_u32(a, b, __c) __extension__ ({ \
4191 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4192 (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4193#define vsliq_n_u64(a, b, __c) __extension__ ({ \
4194 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4195 (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4196#define vsliq_n_p8(a, b, __c) __extension__ ({ \
4197 poly8x16_t __a = (a); poly8x16_t __b = (b); \
4198 (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
4199#define vsliq_n_p16(a, b, __c) __extension__ ({ \
4200 poly16x8_t __a = (a); poly16x8_t __b = (b); \
4201 (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
4202
4203#define vsra_n_s8(a, b, __c) __extension__ ({ \
4204 int8x8_t __a = (a); int8x8_t __b = (b); \
4205 (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); })
4206#define vsra_n_s16(a, b, __c) __extension__ ({ \
4207 int16x4_t __a = (a); int16x4_t __b = (b); \
4208 (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4209#define vsra_n_s32(a, b, __c) __extension__ ({ \
4210 int32x2_t __a = (a); int32x2_t __b = (b); \
4211 (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4212#define vsra_n_s64(a, b, __c) __extension__ ({ \
4213 int64x1_t __a = (a); int64x1_t __b = (b); \
4214 (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4215#define vsra_n_u8(a, b, __c) __extension__ ({ \
4216 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4217 (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4218#define vsra_n_u16(a, b, __c) __extension__ ({ \
4219 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4220 (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4221#define vsra_n_u32(a, b, __c) __extension__ ({ \
4222 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4223 (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4224#define vsra_n_u64(a, b, __c) __extension__ ({ \
4225 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4226 (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4227#define vsraq_n_s8(a, b, __c) __extension__ ({ \
4228 int8x16_t __a = (a); int8x16_t __b = (b); \
4229 (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 32); })
4230#define vsraq_n_s16(a, b, __c) __extension__ ({ \
4231 int16x8_t __a = (a); int16x8_t __b = (b); \
4232 (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4233#define vsraq_n_s32(a, b, __c) __extension__ ({ \
4234 int32x4_t __a = (a); int32x4_t __b = (b); \
4235 (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4236#define vsraq_n_s64(a, b, __c) __extension__ ({ \
4237 int64x2_t __a = (a); int64x2_t __b = (b); \
4238 (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4239#define vsraq_n_u8(a, b, __c) __extension__ ({ \
4240 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4241 (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4242#define vsraq_n_u16(a, b, __c) __extension__ ({ \
4243 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4244 (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4245#define vsraq_n_u32(a, b, __c) __extension__ ({ \
4246 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4247 (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4248#define vsraq_n_u64(a, b, __c) __extension__ ({ \
4249 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4250 (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4251
4252#define vsri_n_s8(a, b, __c) __extension__ ({ \
4253 int8x8_t __a = (a); int8x8_t __b = (b); \
4254 (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); })
4255#define vsri_n_s16(a, b, __c) __extension__ ({ \
4256 int16x4_t __a = (a); int16x4_t __b = (b); \
4257 (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4258#define vsri_n_s32(a, b, __c) __extension__ ({ \
4259 int32x2_t __a = (a); int32x2_t __b = (b); \
4260 (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4261#define vsri_n_s64(a, b, __c) __extension__ ({ \
4262 int64x1_t __a = (a); int64x1_t __b = (b); \
4263 (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4264#define vsri_n_u8(a, b, __c) __extension__ ({ \
4265 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4266 (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4267#define vsri_n_u16(a, b, __c) __extension__ ({ \
4268 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4269 (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4270#define vsri_n_u32(a, b, __c) __extension__ ({ \
4271 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4272 (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4273#define vsri_n_u64(a, b, __c) __extension__ ({ \
4274 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4275 (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4276#define vsri_n_p8(a, b, __c) __extension__ ({ \
4277 poly8x8_t __a = (a); poly8x8_t __b = (b); \
4278 (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
4279#define vsri_n_p16(a, b, __c) __extension__ ({ \
4280 poly16x4_t __a = (a); poly16x4_t __b = (b); \
4281 (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
4282#define vsriq_n_s8(a, b, __c) __extension__ ({ \
4283 int8x16_t __a = (a); int8x16_t __b = (b); \
4284 (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 32); })
4285#define vsriq_n_s16(a, b, __c) __extension__ ({ \
4286 int16x8_t __a = (a); int16x8_t __b = (b); \
4287 (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4288#define vsriq_n_s32(a, b, __c) __extension__ ({ \
4289 int32x4_t __a = (a); int32x4_t __b = (b); \
4290 (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4291#define vsriq_n_s64(a, b, __c) __extension__ ({ \
4292 int64x2_t __a = (a); int64x2_t __b = (b); \
4293 (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4294#define vsriq_n_u8(a, b, __c) __extension__ ({ \
4295 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4296 (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4297#define vsriq_n_u16(a, b, __c) __extension__ ({ \
4298 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4299 (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4300#define vsriq_n_u32(a, b, __c) __extension__ ({ \
4301 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4302 (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4303#define vsriq_n_u64(a, b, __c) __extension__ ({ \
4304 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4305 (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4306#define vsriq_n_p8(a, b, __c) __extension__ ({ \
4307 poly8x16_t __a = (a); poly8x16_t __b = (b); \
4308 (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
4309#define vsriq_n_p16(a, b, __c) __extension__ ({ \
4310 poly16x8_t __a = (a); poly16x8_t __b = (b); \
4311 (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
4312
4313#define vst1q_u8(__a, b) __extension__ ({ \
4314 uint8x16_t __b = (b); \
4315 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 48); })
4316#define vst1q_u16(__a, b) __extension__ ({ \
4317 uint16x8_t __b = (b); \
4318 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 49); })
4319#define vst1q_u32(__a, b) __extension__ ({ \
4320 uint32x4_t __b = (b); \
4321 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 50); })
4322#define vst1q_u64(__a, b) __extension__ ({ \
4323 uint64x2_t __b = (b); \
4324 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 51); })
4325#define vst1q_s8(__a, b) __extension__ ({ \
4326 int8x16_t __b = (b); \
4327 __builtin_neon_vst1q_v(__a, __b, 32); })
4328#define vst1q_s16(__a, b) __extension__ ({ \
4329 int16x8_t __b = (b); \
4330 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 33); })
4331#define vst1q_s32(__a, b) __extension__ ({ \
4332 int32x4_t __b = (b); \
4333 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 34); })
4334#define vst1q_s64(__a, b) __extension__ ({ \
4335 int64x2_t __b = (b); \
4336 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 35); })
4337#define vst1q_f16(__a, b) __extension__ ({ \
4338 float16x8_t __b = (b); \
4339 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 39); })
4340#define vst1q_f32(__a, b) __extension__ ({ \
4341 float32x4_t __b = (b); \
4342 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 40); })
4343#define vst1q_p8(__a, b) __extension__ ({ \
4344 poly8x16_t __b = (b); \
4345 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 36); })
4346#define vst1q_p16(__a, b) __extension__ ({ \
4347 poly16x8_t __b = (b); \
4348 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 37); })
4349#define vst1_u8(__a, b) __extension__ ({ \
4350 uint8x8_t __b = (b); \
4351 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 16); })
4352#define vst1_u16(__a, b) __extension__ ({ \
4353 uint16x4_t __b = (b); \
4354 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 17); })
4355#define vst1_u32(__a, b) __extension__ ({ \
4356 uint32x2_t __b = (b); \
4357 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 18); })
4358#define vst1_u64(__a, b) __extension__ ({ \
4359 uint64x1_t __b = (b); \
4360 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 19); })
4361#define vst1_s8(__a, b) __extension__ ({ \
4362 int8x8_t __b = (b); \
4363 __builtin_neon_vst1_v(__a, __b, 0); })
4364#define vst1_s16(__a, b) __extension__ ({ \
4365 int16x4_t __b = (b); \
4366 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); })
4367#define vst1_s32(__a, b) __extension__ ({ \
4368 int32x2_t __b = (b); \
4369 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); })
4370#define vst1_s64(__a, b) __extension__ ({ \
4371 int64x1_t __b = (b); \
4372 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); })
4373#define vst1_f16(__a, b) __extension__ ({ \
4374 float16x4_t __b = (b); \
4375 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); })
4376#define vst1_f32(__a, b) __extension__ ({ \
4377 float32x2_t __b = (b); \
4378 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 8); })
4379#define vst1_p8(__a, b) __extension__ ({ \
4380 poly8x8_t __b = (b); \
4381 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); })
4382#define vst1_p16(__a, b) __extension__ ({ \
4383 poly16x4_t __b = (b); \
4384 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); })
4385
4386#define vst1q_lane_u8(__a, b, __c) __extension__ ({ \
4387 uint8x16_t __b = (b); \
4388 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 48); })
4389#define vst1q_lane_u16(__a, b, __c) __extension__ ({ \
4390 uint16x8_t __b = (b); \
4391 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 49); })
4392#define vst1q_lane_u32(__a, b, __c) __extension__ ({ \
4393 uint32x4_t __b = (b); \
4394 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 50); })
4395#define vst1q_lane_u64(__a, b, __c) __extension__ ({ \
4396 uint64x2_t __b = (b); \
4397 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 51); })
4398#define vst1q_lane_s8(__a, b, __c) __extension__ ({ \
4399 int8x16_t __b = (b); \
4400 __builtin_neon_vst1q_lane_v(__a, __b, __c, 32); })
4401#define vst1q_lane_s16(__a, b, __c) __extension__ ({ \
4402 int16x8_t __b = (b); \
4403 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 33); })
4404#define vst1q_lane_s32(__a, b, __c) __extension__ ({ \
4405 int32x4_t __b = (b); \
4406 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 34); })
4407#define vst1q_lane_s64(__a, b, __c) __extension__ ({ \
4408 int64x2_t __b = (b); \
4409 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 35); })
4410#define vst1q_lane_f16(__a, b, __c) __extension__ ({ \
4411 float16x8_t __b = (b); \
4412 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 39); })
4413#define vst1q_lane_f32(__a, b, __c) __extension__ ({ \
4414 float32x4_t __b = (b); \
4415 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 40); })
4416#define vst1q_lane_p8(__a, b, __c) __extension__ ({ \
4417 poly8x16_t __b = (b); \
4418 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 36); })
4419#define vst1q_lane_p16(__a, b, __c) __extension__ ({ \
4420 poly16x8_t __b = (b); \
4421 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 37); })
4422#define vst1_lane_u8(__a, b, __c) __extension__ ({ \
4423 uint8x8_t __b = (b); \
4424 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 16); })
4425#define vst1_lane_u16(__a, b, __c) __extension__ ({ \
4426 uint16x4_t __b = (b); \
4427 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 17); })
4428#define vst1_lane_u32(__a, b, __c) __extension__ ({ \
4429 uint32x2_t __b = (b); \
4430 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 18); })
4431#define vst1_lane_u64(__a, b, __c) __extension__ ({ \
4432 uint64x1_t __b = (b); \
4433 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 19); })
4434#define vst1_lane_s8(__a, b, __c) __extension__ ({ \
4435 int8x8_t __b = (b); \
4436 __builtin_neon_vst1_lane_v(__a, __b, __c, 0); })
4437#define vst1_lane_s16(__a, b, __c) __extension__ ({ \
4438 int16x4_t __b = (b); \
4439 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); })
4440#define vst1_lane_s32(__a, b, __c) __extension__ ({ \
4441 int32x2_t __b = (b); \
4442 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); })
4443#define vst1_lane_s64(__a, b, __c) __extension__ ({ \
4444 int64x1_t __b = (b); \
4445 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); })
4446#define vst1_lane_f16(__a, b, __c) __extension__ ({ \
4447 float16x4_t __b = (b); \
4448 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); })
4449#define vst1_lane_f32(__a, b, __c) __extension__ ({ \
4450 float32x2_t __b = (b); \
4451 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 8); })
4452#define vst1_lane_p8(__a, b, __c) __extension__ ({ \
4453 poly8x8_t __b = (b); \
4454 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); })
4455#define vst1_lane_p16(__a, b, __c) __extension__ ({ \
4456 poly16x4_t __b = (b); \
4457 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); })
4458
4459#define vst2q_u8(__a, b) __extension__ ({ \
4460 uint8x16x2_t __b = (b); \
4461 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); })
4462#define vst2q_u16(__a, b) __extension__ ({ \
4463 uint16x8x2_t __b = (b); \
4464 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); })
4465#define vst2q_u32(__a, b) __extension__ ({ \
4466 uint32x4x2_t __b = (b); \
4467 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); })
4468#define vst2q_s8(__a, b) __extension__ ({ \
4469 int8x16x2_t __b = (b); \
4470 __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 32); })
4471#define vst2q_s16(__a, b) __extension__ ({ \
4472 int16x8x2_t __b = (b); \
4473 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); })
4474#define vst2q_s32(__a, b) __extension__ ({ \
4475 int32x4x2_t __b = (b); \
4476 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); })
4477#define vst2q_f16(__a, b) __extension__ ({ \
4478 float16x8x2_t __b = (b); \
4479 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); })
4480#define vst2q_f32(__a, b) __extension__ ({ \
4481 float32x4x2_t __b = (b); \
4482 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 40); })
4483#define vst2q_p8(__a, b) __extension__ ({ \
4484 poly8x16x2_t __b = (b); \
4485 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); })
4486#define vst2q_p16(__a, b) __extension__ ({ \
4487 poly16x8x2_t __b = (b); \
4488 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); })
4489#define vst2_u8(__a, b) __extension__ ({ \
4490 uint8x8x2_t __b = (b); \
4491 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); })
4492#define vst2_u16(__a, b) __extension__ ({ \
4493 uint16x4x2_t __b = (b); \
4494 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); })
4495#define vst2_u32(__a, b) __extension__ ({ \
4496 uint32x2x2_t __b = (b); \
4497 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); })
4498#define vst2_u64(__a, b) __extension__ ({ \
4499 uint64x1x2_t __b = (b); \
4500 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); })
4501#define vst2_s8(__a, b) __extension__ ({ \
4502 int8x8x2_t __b = (b); \
4503 __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); })
4504#define vst2_s16(__a, b) __extension__ ({ \
4505 int16x4x2_t __b = (b); \
4506 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); })
4507#define vst2_s32(__a, b) __extension__ ({ \
4508 int32x2x2_t __b = (b); \
4509 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); })
4510#define vst2_s64(__a, b) __extension__ ({ \
4511 int64x1x2_t __b = (b); \
4512 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); })
4513#define vst2_f16(__a, b) __extension__ ({ \
4514 float16x4x2_t __b = (b); \
4515 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); })
4516#define vst2_f32(__a, b) __extension__ ({ \
4517 float32x2x2_t __b = (b); \
4518 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 8); })
4519#define vst2_p8(__a, b) __extension__ ({ \
4520 poly8x8x2_t __b = (b); \
4521 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); })
4522#define vst2_p16(__a, b) __extension__ ({ \
4523 poly16x4x2_t __b = (b); \
4524 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); })
4525
4526#define vst2q_lane_u16(__a, b, __c) __extension__ ({ \
4527 uint16x8x2_t __b = (b); \
4528 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); })
4529#define vst2q_lane_u32(__a, b, __c) __extension__ ({ \
4530 uint32x4x2_t __b = (b); \
4531 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); })
4532#define vst2q_lane_s16(__a, b, __c) __extension__ ({ \
4533 int16x8x2_t __b = (b); \
4534 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); })
4535#define vst2q_lane_s32(__a, b, __c) __extension__ ({ \
4536 int32x4x2_t __b = (b); \
4537 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); })
4538#define vst2q_lane_f16(__a, b, __c) __extension__ ({ \
4539 float16x8x2_t __b = (b); \
4540 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); })
4541#define vst2q_lane_f32(__a, b, __c) __extension__ ({ \
4542 float32x4x2_t __b = (b); \
4543 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 40); })
4544#define vst2q_lane_p16(__a, b, __c) __extension__ ({ \
4545 poly16x8x2_t __b = (b); \
4546 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); })
4547#define vst2_lane_u8(__a, b, __c) __extension__ ({ \
4548 uint8x8x2_t __b = (b); \
4549 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); })
4550#define vst2_lane_u16(__a, b, __c) __extension__ ({ \
4551 uint16x4x2_t __b = (b); \
4552 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); })
4553#define vst2_lane_u32(__a, b, __c) __extension__ ({ \
4554 uint32x2x2_t __b = (b); \
4555 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); })
4556#define vst2_lane_s8(__a, b, __c) __extension__ ({ \
4557 int8x8x2_t __b = (b); \
4558 __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); })
4559#define vst2_lane_s16(__a, b, __c) __extension__ ({ \
4560 int16x4x2_t __b = (b); \
4561 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); })
4562#define vst2_lane_s32(__a, b, __c) __extension__ ({ \
4563 int32x2x2_t __b = (b); \
4564 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); })
4565#define vst2_lane_f16(__a, b, __c) __extension__ ({ \
4566 float16x4x2_t __b = (b); \
4567 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); })
4568#define vst2_lane_f32(__a, b, __c) __extension__ ({ \
4569 float32x2x2_t __b = (b); \
4570 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 8); })
4571#define vst2_lane_p8(__a, b, __c) __extension__ ({ \
4572 poly8x8x2_t __b = (b); \
4573 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); })
4574#define vst2_lane_p16(__a, b, __c) __extension__ ({ \
4575 poly16x4x2_t __b = (b); \
4576 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); })
4577
4578#define vst3q_u8(__a, b) __extension__ ({ \
4579 uint8x16x3_t __b = (b); \
4580 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 48); })
4581#define vst3q_u16(__a, b) __extension__ ({ \
4582 uint16x8x3_t __b = (b); \
4583 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 49); })
4584#define vst3q_u32(__a, b) __extension__ ({ \
4585 uint32x4x3_t __b = (b); \
4586 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 50); })
4587#define vst3q_s8(__a, b) __extension__ ({ \
4588 int8x16x3_t __b = (b); \
4589 __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); })
4590#define vst3q_s16(__a, b) __extension__ ({ \
4591 int16x8x3_t __b = (b); \
4592 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 33); })
4593#define vst3q_s32(__a, b) __extension__ ({ \
4594 int32x4x3_t __b = (b); \
4595 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 34); })
4596#define vst3q_f16(__a, b) __extension__ ({ \
4597 float16x8x3_t __b = (b); \
4598 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 39); })
4599#define vst3q_f32(__a, b) __extension__ ({ \
4600 float32x4x3_t __b = (b); \
4601 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 40); })
4602#define vst3q_p8(__a, b) __extension__ ({ \
4603 poly8x16x3_t __b = (b); \
4604 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 36); })
4605#define vst3q_p16(__a, b) __extension__ ({ \
4606 poly16x8x3_t __b = (b); \
4607 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 37); })
4608#define vst3_u8(__a, b) __extension__ ({ \
4609 uint8x8x3_t __b = (b); \
4610 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 16); })
4611#define vst3_u16(__a, b) __extension__ ({ \
4612 uint16x4x3_t __b = (b); \
4613 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 17); })
4614#define vst3_u32(__a, b) __extension__ ({ \
4615 uint32x2x3_t __b = (b); \
4616 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 18); })
4617#define vst3_u64(__a, b) __extension__ ({ \
4618 uint64x1x3_t __b = (b); \
4619 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 19); })
4620#define vst3_s8(__a, b) __extension__ ({ \
4621 int8x8x3_t __b = (b); \
4622 __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); })
4623#define vst3_s16(__a, b) __extension__ ({ \
4624 int16x4x3_t __b = (b); \
4625 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 1); })
4626#define vst3_s32(__a, b) __extension__ ({ \
4627 int32x2x3_t __b = (b); \
4628 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 2); })
4629#define vst3_s64(__a, b) __extension__ ({ \
4630 int64x1x3_t __b = (b); \
4631 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 3); })
4632#define vst3_f16(__a, b) __extension__ ({ \
4633 float16x4x3_t __b = (b); \
4634 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 7); })
4635#define vst3_f32(__a, b) __extension__ ({ \
4636 float32x2x3_t __b = (b); \
4637 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 8); })
4638#define vst3_p8(__a, b) __extension__ ({ \
4639 poly8x8x3_t __b = (b); \
4640 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 4); })
4641#define vst3_p16(__a, b) __extension__ ({ \
4642 poly16x4x3_t __b = (b); \
4643 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 5); })
4644
4645#define vst3q_lane_u16(__a, b, __c) __extension__ ({ \
4646 uint16x8x3_t __b = (b); \
4647 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); })
4648#define vst3q_lane_u32(__a, b, __c) __extension__ ({ \
4649 uint32x4x3_t __b = (b); \
4650 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); })
4651#define vst3q_lane_s16(__a, b, __c) __extension__ ({ \
4652 int16x8x3_t __b = (b); \
4653 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); })
4654#define vst3q_lane_s32(__a, b, __c) __extension__ ({ \
4655 int32x4x3_t __b = (b); \
4656 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); })
4657#define vst3q_lane_f16(__a, b, __c) __extension__ ({ \
4658 float16x8x3_t __b = (b); \
4659 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); })
4660#define vst3q_lane_f32(__a, b, __c) __extension__ ({ \
4661 float32x4x3_t __b = (b); \
4662 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 40); })
4663#define vst3q_lane_p16(__a, b, __c) __extension__ ({ \
4664 poly16x8x3_t __b = (b); \
4665 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); })
4666#define vst3_lane_u8(__a, b, __c) __extension__ ({ \
4667 uint8x8x3_t __b = (b); \
4668 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); })
4669#define vst3_lane_u16(__a, b, __c) __extension__ ({ \
4670 uint16x4x3_t __b = (b); \
4671 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); })
4672#define vst3_lane_u32(__a, b, __c) __extension__ ({ \
4673 uint32x2x3_t __b = (b); \
4674 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); })
4675#define vst3_lane_s8(__a, b, __c) __extension__ ({ \
4676 int8x8x3_t __b = (b); \
4677 __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); })
4678#define vst3_lane_s16(__a, b, __c) __extension__ ({ \
4679 int16x4x3_t __b = (b); \
4680 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); })
4681#define vst3_lane_s32(__a, b, __c) __extension__ ({ \
4682 int32x2x3_t __b = (b); \
4683 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); })
4684#define vst3_lane_f16(__a, b, __c) __extension__ ({ \
4685 float16x4x3_t __b = (b); \
4686 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); })
4687#define vst3_lane_f32(__a, b, __c) __extension__ ({ \
4688 float32x2x3_t __b = (b); \
4689 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 8); })
4690#define vst3_lane_p8(__a, b, __c) __extension__ ({ \
4691 poly8x8x3_t __b = (b); \
4692 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); })
4693#define vst3_lane_p16(__a, b, __c) __extension__ ({ \
4694 poly16x4x3_t __b = (b); \
4695 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); })
4696
4697#define vst4q_u8(__a, b) __extension__ ({ \
4698 uint8x16x4_t __b = (b); \
4699 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); })
4700#define vst4q_u16(__a, b) __extension__ ({ \
4701 uint16x8x4_t __b = (b); \
4702 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); })
4703#define vst4q_u32(__a, b) __extension__ ({ \
4704 uint32x4x4_t __b = (b); \
4705 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); })
4706#define vst4q_s8(__a, b) __extension__ ({ \
4707 int8x16x4_t __b = (b); \
4708 __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32); })
4709#define vst4q_s16(__a, b) __extension__ ({ \
4710 int16x8x4_t __b = (b); \
4711 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); })
4712#define vst4q_s32(__a, b) __extension__ ({ \
4713 int32x4x4_t __b = (b); \
4714 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); })
4715#define vst4q_f16(__a, b) __extension__ ({ \
4716 float16x8x4_t __b = (b); \
4717 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); })
4718#define vst4q_f32(__a, b) __extension__ ({ \
4719 float32x4x4_t __b = (b); \
4720 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 40); })
4721#define vst4q_p8(__a, b) __extension__ ({ \
4722 poly8x16x4_t __b = (b); \
4723 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); })
4724#define vst4q_p16(__a, b) __extension__ ({ \
4725 poly16x8x4_t __b = (b); \
4726 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); })
4727#define vst4_u8(__a, b) __extension__ ({ \
4728 uint8x8x4_t __b = (b); \
4729 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 16); })
4730#define vst4_u16(__a, b) __extension__ ({ \
4731 uint16x4x4_t __b = (b); \
4732 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 17); })
4733#define vst4_u32(__a, b) __extension__ ({ \
4734 uint32x2x4_t __b = (b); \
4735 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 18); })
4736#define vst4_u64(__a, b) __extension__ ({ \
4737 uint64x1x4_t __b = (b); \
4738 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 19); })
4739#define vst4_s8(__a, b) __extension__ ({ \
4740 int8x8x4_t __b = (b); \
4741 __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); })
4742#define vst4_s16(__a, b) __extension__ ({ \
4743 int16x4x4_t __b = (b); \
4744 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 1); })
4745#define vst4_s32(__a, b) __extension__ ({ \
4746 int32x2x4_t __b = (b); \
4747 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 2); })
4748#define vst4_s64(__a, b) __extension__ ({ \
4749 int64x1x4_t __b = (b); \
4750 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 3); })
4751#define vst4_f16(__a, b) __extension__ ({ \
4752 float16x4x4_t __b = (b); \
4753 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 7); })
4754#define vst4_f32(__a, b) __extension__ ({ \
4755 float32x2x4_t __b = (b); \
4756 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 8); })
4757#define vst4_p8(__a, b) __extension__ ({ \
4758 poly8x8x4_t __b = (b); \
4759 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 4); })
4760#define vst4_p16(__a, b) __extension__ ({ \
4761 poly16x4x4_t __b = (b); \
4762 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 5); })
4763
4764#define vst4q_lane_u16(__a, b, __c) __extension__ ({ \
4765 uint16x8x4_t __b = (b); \
4766 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); })
4767#define vst4q_lane_u32(__a, b, __c) __extension__ ({ \
4768 uint32x4x4_t __b = (b); \
4769 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); })
4770#define vst4q_lane_s16(__a, b, __c) __extension__ ({ \
4771 int16x8x4_t __b = (b); \
4772 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); })
4773#define vst4q_lane_s32(__a, b, __c) __extension__ ({ \
4774 int32x4x4_t __b = (b); \
4775 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); })
4776#define vst4q_lane_f16(__a, b, __c) __extension__ ({ \
4777 float16x8x4_t __b = (b); \
4778 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); })
4779#define vst4q_lane_f32(__a, b, __c) __extension__ ({ \
4780 float32x4x4_t __b = (b); \
4781 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 40); })
4782#define vst4q_lane_p16(__a, b, __c) __extension__ ({ \
4783 poly16x8x4_t __b = (b); \
4784 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); })
4785#define vst4_lane_u8(__a, b, __c) __extension__ ({ \
4786 uint8x8x4_t __b = (b); \
4787 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); })
4788#define vst4_lane_u16(__a, b, __c) __extension__ ({ \
4789 uint16x4x4_t __b = (b); \
4790 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); })
4791#define vst4_lane_u32(__a, b, __c) __extension__ ({ \
4792 uint32x2x4_t __b = (b); \
4793 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); })
4794#define vst4_lane_s8(__a, b, __c) __extension__ ({ \
4795 int8x8x4_t __b = (b); \
4796 __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); })
4797#define vst4_lane_s16(__a, b, __c) __extension__ ({ \
4798 int16x4x4_t __b = (b); \
4799 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); })
4800#define vst4_lane_s32(__a, b, __c) __extension__ ({ \
4801 int32x2x4_t __b = (b); \
4802 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); })
4803#define vst4_lane_f16(__a, b, __c) __extension__ ({ \
4804 float16x4x4_t __b = (b); \
4805 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); })
4806#define vst4_lane_f32(__a, b, __c) __extension__ ({ \
4807 float32x2x4_t __b = (b); \
4808 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); })
4809#define vst4_lane_p8(__a, b, __c) __extension__ ({ \
4810 poly8x8x4_t __b = (b); \
4811 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); })
4812#define vst4_lane_p16(__a, b, __c) __extension__ ({ \
4813 poly16x4x4_t __b = (b); \
4814 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); })
4815
4816__ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) {
4817 return __a - __b; }
4818__ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) {
4819 return __a - __b; }
4820__ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) {
4821 return __a - __b; }
4822__ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) {
4823 return __a - __b; }
4824__ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) {
4825 return __a - __b; }
4826__ai uint8x8_t vsub_u8(uint8x8_t __a, uint8x8_t __b) {
4827 return __a - __b; }
4828__ai uint16x4_t vsub_u16(uint16x4_t __a, uint16x4_t __b) {
4829 return __a - __b; }
4830__ai uint32x2_t vsub_u32(uint32x2_t __a, uint32x2_t __b) {
4831 return __a - __b; }
4832__ai uint64x1_t vsub_u64(uint64x1_t __a, uint64x1_t __b) {
4833 return __a - __b; }
4834__ai int8x16_t vsubq_s8(int8x16_t __a, int8x16_t __b) {
4835 return __a - __b; }
4836__ai int16x8_t vsubq_s16(int16x8_t __a, int16x8_t __b) {
4837 return __a - __b; }
4838__ai int32x4_t vsubq_s32(int32x4_t __a, int32x4_t __b) {
4839 return __a - __b; }
4840__ai int64x2_t vsubq_s64(int64x2_t __a, int64x2_t __b) {
4841 return __a - __b; }
4842__ai float32x4_t vsubq_f32(float32x4_t __a, float32x4_t __b) {
4843 return __a - __b; }
4844__ai uint8x16_t vsubq_u8(uint8x16_t __a, uint8x16_t __b) {
4845 return __a - __b; }
4846__ai uint16x8_t vsubq_u16(uint16x8_t __a, uint16x8_t __b) {
4847 return __a - __b; }
4848__ai uint32x4_t vsubq_u32(uint32x4_t __a, uint32x4_t __b) {
4849 return __a - __b; }
4850__ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) {
4851 return __a - __b; }
4852
4853__ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) {
4854 return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
4855__ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) {
4856 return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
4857__ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) {
4858 return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
4859__ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) {
4860 return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
4861__ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) {
4862 return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); }
4863__ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) {
4864 return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); }
4865
4866__ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) {
4867 return vmovl_s8(__a) - vmovl_s8(__b); }
4868__ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) {
4869 return vmovl_s16(__a) - vmovl_s16(__b); }
4870__ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) {
4871 return vmovl_s32(__a) - vmovl_s32(__b); }
4872__ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) {
4873 return vmovl_u8(__a) - vmovl_u8(__b); }
4874__ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) {
4875 return vmovl_u16(__a) - vmovl_u16(__b); }
4876__ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) {
4877 return vmovl_u32(__a) - vmovl_u32(__b); }
4878
4879__ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) {
4880 return __a - vmovl_s8(__b); }
4881__ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) {
4882 return __a - vmovl_s16(__b); }
4883__ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) {
4884 return __a - vmovl_s32(__b); }
4885__ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) {
4886 return __a - vmovl_u8(__b); }
4887__ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) {
4888 return __a - vmovl_u16(__b); }
4889__ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) {
4890 return __a - vmovl_u32(__b); }
4891
4892__ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) {
4893 return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4894__ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) {
4895 return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); }
4896__ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) {
4897 return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 4); }
4898
4899__ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) {
4900 return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__b, 16); }
4901__ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) {
4902 return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); }
4903__ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) {
4904 return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__b, 4); }
4905
4906__ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) {
4907 return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 16); }
4908__ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) {
4909 return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __b, 0); }
4910__ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) {
4911 return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 4); }
4912
4913__ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) {
4914 return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 16); }
4915__ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) {
4916 return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 0); }
4917__ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) {
4918 return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 4); }
4919
4920__ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
4921 return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 16); }
4922__ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
4923 return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); }
4924__ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) {
4925 return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 4); }
4926
4927__ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) {
4928 return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 16); }
4929__ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) {
4930 return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); }
4931__ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) {
4932 return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 4); }
4933
4934__ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) {
4935 return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 16); }
4936__ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) {
4937 return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); }
4938__ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) {
4939 return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 4); }
4940
4941__ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) {
4942 return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 16); }
4943__ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) {
4944 return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); }
4945__ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) {
4946 return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 4); }
4947
4948__ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) {
4949 int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; }
4950__ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) {
4951 int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; }
4952__ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) {
4953 int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; }
4954__ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) {
4955 uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; }
4956__ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) {
4957 uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; }
4958__ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) {
4959 uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; }
4960__ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) {
4961 float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); return r; }
4962__ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) {
4963 poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; }
4964__ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) {
4965 poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; }
4966__ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) {
4967 int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 32); return r; }
4968__ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) {
4969 int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4970__ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) {
4971 int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4972__ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) {
4973 uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; }
4974__ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) {
4975 uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; }
4976__ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) {
4977 uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; }
4978__ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) {
4979 float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 40); return r; }
4980__ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) {
4981 poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; }
4982__ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) {
4983 poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; }
4984
4985__ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) {
4986 return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 16); }
4987__ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) {
4988 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
4989__ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) {
4990 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); }
4991__ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) {
4992 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4993__ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) {
4994 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
4995__ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) {
4996 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); }
4997__ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) {
4998 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4999__ai uint16x4_t vtst_p16(poly16x4_t __a, poly16x4_t __b) {
5000 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
5001__ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) {
5002 return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 48); }
5003__ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) {
5004 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
5005__ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) {
5006 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
5007__ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) {
5008 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
5009__ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) {
5010 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
5011__ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) {
5012 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
5013__ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) {
5014 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
5015__ai uint16x8_t vtstq_p16(poly16x8_t __a, poly16x8_t __b) {
5016 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
5017
5018__ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) {
5019 int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; }
5020__ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) {
5021 int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; }
5022__ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) {
5023 int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; }
5024__ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) {
5025 uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; }
5026__ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) {
5027 uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; }
5028__ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) {
5029 uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; }
5030__ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) {
5031 float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); return r; }
5032__ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) {
5033 poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; }
5034__ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) {
5035 poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; }
5036__ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) {
5037 int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 32); return r; }
5038__ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) {
5039 int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
5040__ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) {
5041 int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
5042__ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) {
5043 uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; }
5044__ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) {
5045 uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; }
5046__ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) {
5047 uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; }
5048__ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) {
5049 float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 40); return r; }
5050__ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) {
5051 poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; }
5052__ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) {
5053 poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; }
5054
5055__ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) {
5056 int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; }
5057__ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) {
5058 int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; }
5059__ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) {
5060 int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; }
5061__ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) {
5062 uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; }
5063__ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) {
5064 uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; }
5065__ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) {
5066 uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; }
5067__ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) {
5068 float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); return r; }
5069__ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) {
5070 poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; }
5071__ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) {
5072 poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; }
5073__ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) {
5074 int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 32); return r; }
5075__ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) {
5076 int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
5077__ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) {
5078 int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
5079__ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) {
5080 uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; }
5081__ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) {
5082 uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; }
5083__ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) {
5084 uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; }
5085__ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) {
5086 float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 40); return r; }
5087__ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) {
5088 poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; }
5089__ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) {
5090 poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; }
5091
5092#ifdef __aarch64__
5093__ai int16x8_t vmovl_high_s8(int8x16_t __a) {
5094 int8x8_t __a1 = vget_high_s8(__a);
5095 return (int16x8_t)vshll_n_s8(__a1, 0); }
5096__ai int32x4_t vmovl_high_s16(int16x8_t __a) {
5097 int16x4_t __a1 = vget_high_s16(__a);
5098 return (int32x4_t)vshll_n_s16(__a1, 0); }
5099__ai int64x2_t vmovl_high_s32(int32x4_t __a) {
5100 int32x2_t __a1 = vget_high_s32(__a);
5101 return (int64x2_t)vshll_n_s32(__a1, 0); }
5102__ai uint16x8_t vmovl_high_u8(uint8x16_t __a) {
5103 uint8x8_t __a1 = vget_high_u8(__a);
5104 return (uint16x8_t)vshll_n_u8(__a1, 0); }
5105__ai uint32x4_t vmovl_high_u16(uint16x8_t __a) {
5106 uint16x4_t __a1 = vget_high_u16(__a);
5107 return (uint32x4_t)vshll_n_u16(__a1, 0); }
5108__ai uint64x2_t vmovl_high_u32(uint32x4_t __a) {
5109 uint32x2_t __a1 = vget_high_u32(__a);
5110 return (uint64x2_t)vshll_n_u32(__a1, 0); }
5111
5112__ai int16x8_t vmull_high_s8(int8x16_t __a, int8x16_t __b) {
5113 return vmull_s8(vget_high_s8(__a), vget_high_s8(__b)); }
5114__ai int32x4_t vmull_high_s16(int16x8_t __a, int16x8_t __b) {
5115 return vmull_s16(vget_high_s16(__a), vget_high_s16(__b)); }
5116__ai int64x2_t vmull_high_s32(int32x4_t __a, int32x4_t __b) {
5117 return vmull_s32(vget_high_s32(__a), vget_high_s32(__b)); }
5118__ai uint16x8_t vmull_high_u8(uint8x16_t __a, uint8x16_t __b) {
5119 return vmull_u8(vget_high_u8(__a), vget_high_u8(__b)); }
5120__ai uint32x4_t vmull_high_u16(uint16x8_t __a, uint16x8_t __b) {
5121 return vmull_u16(vget_high_u16(__a), vget_high_u16(__b)); }
5122__ai uint64x2_t vmull_high_u32(uint32x4_t __a, uint32x4_t __b) {
5123 return vmull_u32(vget_high_u32(__a), vget_high_u32(__b)); }
5124__ai poly16x8_t vmull_high_p8(poly8x16_t __a, poly8x16_t __b) {
5125 return vmull_p8(vget_high_p8(__a), vget_high_p8(__b)); }
5126
5127__ai int16x8_t vabdl_high_s8(int8x16_t __a, int8x16_t __b) {
5128 return vabdl_s8(vget_high_s8(__a), vget_high_s8(__b)); }
5129__ai int32x4_t vabdl_high_s16(int16x8_t __a, int16x8_t __b) {
5130 return vabdl_s16(vget_high_s16(__a), vget_high_s16(__b)); }
5131__ai int64x2_t vabdl_high_s32(int32x4_t __a, int32x4_t __b) {
5132 return vabdl_s32(vget_high_s32(__a), vget_high_s32(__b)); }
5133__ai uint16x8_t vabdl_high_u8(uint8x16_t __a, uint8x16_t __b) {
5134 return vabdl_u8(vget_high_u8(__a), vget_high_u8(__b)); }
5135__ai uint32x4_t vabdl_high_u16(uint16x8_t __a, uint16x8_t __b) {
5136 return vabdl_u16(vget_high_u16(__a), vget_high_u16(__b)); }
5137__ai uint64x2_t vabdl_high_u32(uint32x4_t __a, uint32x4_t __b) {
5138 return vabdl_u32(vget_high_u32(__a), vget_high_u32(__b)); }
5139
5140__ai float64x1_t vabd_f64(float64x1_t __a, float64x1_t __b) {
5141 return (float64x1_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 9); }
5142__ai float64x2_t vabdq_f64(float64x2_t __a, float64x2_t __b) {
5143 return (float64x2_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5144
5145__ai int64x1_t vabs_s64(int64x1_t __a) {
5146 return (int64x1_t)__builtin_neon_vabs_v((int8x8_t)__a, 3); }
5147__ai float64x1_t vabs_f64(float64x1_t __a) {
5148 return (float64x1_t)__builtin_neon_vabs_v((int8x8_t)__a, 9); }
5149__ai int64x2_t vabsq_s64(int64x2_t __a) {
5150 return (int64x2_t)__builtin_neon_vabsq_v((int8x16_t)__a, 35); }
5151__ai float64x2_t vabsq_f64(float64x2_t __a) {
5152 return (float64x2_t)__builtin_neon_vabsq_v((int8x16_t)__a, 41); }
5153
5154__ai float64x1_t vadd_f64(float64x1_t __a, float64x1_t __b) {
5155 return __a + __b; }
5156__ai float64x2_t vaddq_f64(float64x2_t __a, float64x2_t __b) {
5157 return __a + __b; }
5158
5159__ai int8x16_t vpaddq_s8(int8x16_t __a, int8x16_t __b) {
5160 return (int8x16_t)__builtin_neon_vpaddq_v(__a, __b, 32); }
5161__ai int16x8_t vpaddq_s16(int16x8_t __a, int16x8_t __b) {
5162 return (int16x8_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
5163__ai int32x4_t vpaddq_s32(int32x4_t __a, int32x4_t __b) {
5164 return (int32x4_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
5165__ai int64x2_t vpaddq_s64(int64x2_t __a, int64x2_t __b) {
5166 return (int64x2_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
5167__ai uint8x16_t vpaddq_u8(uint8x16_t __a, uint8x16_t __b) {
5168 return (uint8x16_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
5169__ai uint16x8_t vpaddq_u16(uint16x8_t __a, uint16x8_t __b) {
5170 return (uint16x8_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
5171__ai uint32x4_t vpaddq_u32(uint32x4_t __a, uint32x4_t __b) {
5172 return (uint32x4_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
5173__ai uint64x2_t vpaddq_u64(uint64x2_t __a, uint64x2_t __b) {
5174 return (uint64x2_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5175__ai float32x4_t vpaddq_f32(float32x4_t __a, float32x4_t __b) {
5176 return (float32x4_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
5177__ai float64x2_t vpaddq_f64(float64x2_t __a, float64x2_t __b) {
5178 return (float64x2_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5179
5180__ai float64x1_t vbsl_f64(uint64x1_t __a, float64x1_t __b, float64x1_t __c) {
5181 return (float64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 9); }
5182__ai float64x2_t vbslq_f64(uint64x2_t __a, float64x2_t __b, float64x2_t __c) {
5183 return (float64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 41); }
5184__ai poly64x1_t vbsl_p64(uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) {
5185 return (poly64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 6); }
5186__ai poly64x2_t vbslq_p64(uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) {
5187 return (poly64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 38); }
5188
5189__ai uint64x1_t vceq_s64(int64x1_t __a, int64x1_t __b) {
5190 return (uint64x1_t)(__a == __b); }
5191__ai uint64x1_t vceq_u64(uint64x1_t __a, uint64x1_t __b) {
5192 return (uint64x1_t)(__a == __b); }
5193__ai uint64x1_t vceq_f64(float64x1_t __a, float64x1_t __b) {
5194 return (uint64x1_t)(__a == __b); }
5195__ai uint64x2_t vceqq_f64(float64x2_t __a, float64x2_t __b) {
5196 return (uint64x2_t)(__a == __b); }
5197__ai uint64x2_t vceqq_u64(uint64x2_t __a, uint64x2_t __b) {
5198 return (uint64x2_t)(__a == __b); }
5199__ai uint64x2_t vceqq_s64(int64x2_t __a, int64x2_t __b) {
5200 return (uint64x2_t)(__a == __b); }
5201__ai uint64x1_t vceq_p64(poly64x1_t __a, poly64x1_t __b) {
5202 return (uint64x1_t)(__a == __b); }
5203__ai uint64x2_t vceqq_p64(poly64x2_t __a, poly64x2_t __b) {
5204 return (uint64x2_t)(__a == __b); }
5205
5206__ai uint64x1_t vcge_s64(int64x1_t __a, int64x1_t __b) {
5207 return (uint64x1_t)(__a >= __b); }
5208__ai uint64x1_t vcge_u64(uint64x1_t __a, uint64x1_t __b) {
5209 return (uint64x1_t)(__a >= __b); }
5210__ai uint64x2_t vcgeq_s64(int64x2_t __a, int64x2_t __b) {
5211 return (uint64x2_t)(__a >= __b); }
5212__ai uint64x2_t vcgeq_u64(uint64x2_t __a, uint64x2_t __b) {
5213 return (uint64x2_t)(__a >= __b); }
5214__ai uint64x1_t vcge_f64(float64x1_t __a, float64x1_t __b) {
5215 return (uint64x1_t)(__a >= __b); }
5216__ai uint64x2_t vcgeq_f64(float64x2_t __a, float64x2_t __b) {
5217 return (uint64x2_t)(__a >= __b); }
5218
5219__ai uint64x1_t vcgt_s64(int64x1_t __a, int64x1_t __b) {
5220 return (uint64x1_t)(__a > __b); }
5221__ai uint64x1_t vcgt_u64(uint64x1_t __a, uint64x1_t __b) {
5222 return (uint64x1_t)(__a > __b); }
5223__ai uint64x2_t vcgtq_s64(int64x2_t __a, int64x2_t __b) {
5224 return (uint64x2_t)(__a > __b); }
5225__ai uint64x2_t vcgtq_u64(uint64x2_t __a, uint64x2_t __b) {
5226 return (uint64x2_t)(__a > __b); }
5227__ai uint64x1_t vcgt_f64(float64x1_t __a, float64x1_t __b) {
5228 return (uint64x1_t)(__a > __b); }
5229__ai uint64x2_t vcgtq_f64(float64x2_t __a, float64x2_t __b) {
5230 return (uint64x2_t)(__a > __b); }
5231
5232__ai uint64x1_t vcle_s64(int64x1_t __a, int64x1_t __b) {
5233 return (uint64x1_t)(__a <= __b); }
5234__ai uint64x1_t vcle_u64(uint64x1_t __a, uint64x1_t __b) {
5235 return (uint64x1_t)(__a <= __b); }
5236__ai uint64x2_t vcleq_s64(int64x2_t __a, int64x2_t __b) {
5237 return (uint64x2_t)(__a <= __b); }
5238__ai uint64x2_t vcleq_u64(uint64x2_t __a, uint64x2_t __b) {
5239 return (uint64x2_t)(__a <= __b); }
5240__ai uint64x1_t vcle_f64(float64x1_t __a, float64x1_t __b) {
5241 return (uint64x1_t)(__a <= __b); }
5242__ai uint64x2_t vcleq_f64(float64x2_t __a, float64x2_t __b) {
5243 return (uint64x2_t)(__a <= __b); }
5244
5245__ai uint64x1_t vclt_s64(int64x1_t __a, int64x1_t __b) {
5246 return (uint64x1_t)(__a < __b); }
5247__ai uint64x1_t vclt_u64(uint64x1_t __a, uint64x1_t __b) {
5248 return (uint64x1_t)(__a < __b); }
5249__ai uint64x2_t vcltq_s64(int64x2_t __a, int64x2_t __b) {
5250 return (uint64x2_t)(__a < __b); }
5251__ai uint64x2_t vcltq_u64(uint64x2_t __a, uint64x2_t __b) {
5252 return (uint64x2_t)(__a < __b); }
5253__ai uint64x1_t vclt_f64(float64x1_t __a, float64x1_t __b) {
5254 return (uint64x1_t)(__a < __b); }
5255__ai uint64x2_t vcltq_f64(float64x2_t __a, float64x2_t __b) {
5256 return (uint64x2_t)(__a < __b); }
5257
5258__ai uint8x8_t vceqz_s8(int8x8_t __a) {
5259 return (uint8x8_t)__builtin_neon_vceqz_v(__a, 16); }
5260__ai uint16x4_t vceqz_s16(int16x4_t __a) {
5261 return (uint16x4_t)__builtin_neon_vceqz_v((int8x8_t)__a, 17); }
5262__ai uint32x2_t vceqz_s32(int32x2_t __a) {
5263 return (uint32x2_t)__builtin_neon_vceqz_v((int8x8_t)__a, 18); }
5264__ai uint64x1_t vceqz_s64(int64x1_t __a) {
5265 return (uint64x1_t)__builtin_neon_vceqz_v((int8x8_t)__a, 19); }
5266__ai uint32x2_t vceqz_f32(float32x2_t __a) {
5267 return (uint32x2_t)__builtin_neon_vceqz_v((int8x8_t)__a, 18); }
5268__ai uint8x8_t vceqz_u8(uint8x8_t __a) {
5269 return (uint8x8_t)__builtin_neon_vceqz_v((int8x8_t)__a, 16); }
5270__ai uint16x4_t vceqz_u16(uint16x4_t __a) {
5271 return (uint16x4_t)__builtin_neon_vceqz_v((int8x8_t)__a, 17); }
5272__ai uint32x2_t vceqz_u32(uint32x2_t __a) {
5273 return (uint32x2_t)__builtin_neon_vceqz_v((int8x8_t)__a, 18); }
5274__ai uint64x1_t vceqz_u64(uint64x1_t __a) {
5275 return (uint64x1_t)__builtin_neon_vceqz_v((int8x8_t)__a, 19); }
5276__ai uint8x8_t vceqz_p8(poly8x8_t __a) {
5277 return (uint8x8_t)__builtin_neon_vceqz_v((int8x8_t)__a, 16); }
5278__ai uint16x4_t vceqz_p16(poly16x4_t __a) {
5279 return (uint16x4_t)__builtin_neon_vceqz_v((int8x8_t)__a, 17); }
5280__ai uint64x1_t vceqz_p64(poly64x1_t __a) {
5281 return (uint64x1_t)__builtin_neon_vceqz_v((int8x8_t)__a, 19); }
5282__ai uint8x16_t vceqzq_s8(int8x16_t __a) {
5283 return (uint8x16_t)__builtin_neon_vceqzq_v(__a, 48); }
5284__ai uint16x8_t vceqzq_s16(int16x8_t __a) {
5285 return (uint16x8_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 49); }
5286__ai uint32x4_t vceqzq_s32(int32x4_t __a) {
5287 return (uint32x4_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 50); }
5288__ai uint64x2_t vceqzq_s64(int64x2_t __a) {
5289 return (uint64x2_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 51); }
5290__ai uint32x4_t vceqzq_f32(float32x4_t __a) {
5291 return (uint32x4_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 50); }
5292__ai uint8x16_t vceqzq_u8(uint8x16_t __a) {
5293 return (uint8x16_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 48); }
5294__ai uint16x8_t vceqzq_u16(uint16x8_t __a) {
5295 return (uint16x8_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 49); }
5296__ai uint32x4_t vceqzq_u32(uint32x4_t __a) {
5297 return (uint32x4_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 50); }
5298__ai uint64x2_t vceqzq_u64(uint64x2_t __a) {
5299 return (uint64x2_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 51); }
5300__ai uint8x16_t vceqzq_p8(poly8x16_t __a) {
5301 return (uint8x16_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 48); }
5302__ai uint16x8_t vceqzq_p16(poly16x8_t __a) {
5303 return (uint16x8_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 49); }
5304__ai uint64x1_t vceqz_f64(float64x1_t __a) {
5305 return (uint64x1_t)__builtin_neon_vceqz_v((int8x8_t)__a, 19); }
5306__ai uint64x2_t vceqzq_f64(float64x2_t __a) {
5307 return (uint64x2_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 51); }
5308__ai uint64x2_t vceqzq_p64(poly64x2_t __a) {
5309 return (uint64x2_t)__builtin_neon_vceqzq_v((int8x16_t)__a, 51); }
5310
5311__ai uint8x8_t vcgez_s8(int8x8_t __a) {
5312 return (uint8x8_t)__builtin_neon_vcgez_v(__a, 16); }
5313__ai uint16x4_t vcgez_s16(int16x4_t __a) {
5314 return (uint16x4_t)__builtin_neon_vcgez_v((int8x8_t)__a, 17); }
5315__ai uint32x2_t vcgez_s32(int32x2_t __a) {
5316 return (uint32x2_t)__builtin_neon_vcgez_v((int8x8_t)__a, 18); }
5317__ai uint64x1_t vcgez_s64(int64x1_t __a) {
5318 return (uint64x1_t)__builtin_neon_vcgez_v((int8x8_t)__a, 19); }
5319__ai uint32x2_t vcgez_f32(float32x2_t __a) {
5320 return (uint32x2_t)__builtin_neon_vcgez_v((int8x8_t)__a, 18); }
5321__ai uint64x1_t vcgez_f64(float64x1_t __a) {
5322 return (uint64x1_t)__builtin_neon_vcgez_v((int8x8_t)__a, 19); }
5323__ai uint8x16_t vcgezq_s8(int8x16_t __a) {
5324 return (uint8x16_t)__builtin_neon_vcgezq_v(__a, 48); }
5325__ai uint16x8_t vcgezq_s16(int16x8_t __a) {
5326 return (uint16x8_t)__builtin_neon_vcgezq_v((int8x16_t)__a, 49); }
5327__ai uint32x4_t vcgezq_s32(int32x4_t __a) {
5328 return (uint32x4_t)__builtin_neon_vcgezq_v((int8x16_t)__a, 50); }
5329__ai uint64x2_t vcgezq_s64(int64x2_t __a) {
5330 return (uint64x2_t)__builtin_neon_vcgezq_v((int8x16_t)__a, 51); }
5331__ai uint32x4_t vcgezq_f32(float32x4_t __a) {
5332 return (uint32x4_t)__builtin_neon_vcgezq_v((int8x16_t)__a, 50); }
5333__ai uint64x2_t vcgezq_f64(float64x2_t __a) {
5334 return (uint64x2_t)__builtin_neon_vcgezq_v((int8x16_t)__a, 51); }
5335
5336__ai uint8x8_t vcgtz_s8(int8x8_t __a) {
5337 return (uint8x8_t)__builtin_neon_vcgtz_v(__a, 16); }
5338__ai uint16x4_t vcgtz_s16(int16x4_t __a) {
5339 return (uint16x4_t)__builtin_neon_vcgtz_v((int8x8_t)__a, 17); }
5340__ai uint32x2_t vcgtz_s32(int32x2_t __a) {
5341 return (uint32x2_t)__builtin_neon_vcgtz_v((int8x8_t)__a, 18); }
5342__ai uint64x1_t vcgtz_s64(int64x1_t __a) {
5343 return (uint64x1_t)__builtin_neon_vcgtz_v((int8x8_t)__a, 19); }
5344__ai uint32x2_t vcgtz_f32(float32x2_t __a) {
5345 return (uint32x2_t)__builtin_neon_vcgtz_v((int8x8_t)__a, 18); }
5346__ai uint64x1_t vcgtz_f64(float64x1_t __a) {
5347 return (uint64x1_t)__builtin_neon_vcgtz_v((int8x8_t)__a, 19); }
5348__ai uint8x16_t vcgtzq_s8(int8x16_t __a) {
5349 return (uint8x16_t)__builtin_neon_vcgtzq_v(__a, 48); }
5350__ai uint16x8_t vcgtzq_s16(int16x8_t __a) {
5351 return (uint16x8_t)__builtin_neon_vcgtzq_v((int8x16_t)__a, 49); }
5352__ai uint32x4_t vcgtzq_s32(int32x4_t __a) {
5353 return (uint32x4_t)__builtin_neon_vcgtzq_v((int8x16_t)__a, 50); }
5354__ai uint64x2_t vcgtzq_s64(int64x2_t __a) {
5355 return (uint64x2_t)__builtin_neon_vcgtzq_v((int8x16_t)__a, 51); }
5356__ai uint32x4_t vcgtzq_f32(float32x4_t __a) {
5357 return (uint32x4_t)__builtin_neon_vcgtzq_v((int8x16_t)__a, 50); }
5358__ai uint64x2_t vcgtzq_f64(float64x2_t __a) {
5359 return (uint64x2_t)__builtin_neon_vcgtzq_v((int8x16_t)__a, 51); }
5360
5361__ai uint8x8_t vclez_s8(int8x8_t __a) {
5362 return (uint8x8_t)__builtin_neon_vclez_v(__a, 16); }
5363__ai uint16x4_t vclez_s16(int16x4_t __a) {
5364 return (uint16x4_t)__builtin_neon_vclez_v((int8x8_t)__a, 17); }
5365__ai uint32x2_t vclez_s32(int32x2_t __a) {
5366 return (uint32x2_t)__builtin_neon_vclez_v((int8x8_t)__a, 18); }
5367__ai uint64x1_t vclez_s64(int64x1_t __a) {
5368 return (uint64x1_t)__builtin_neon_vclez_v((int8x8_t)__a, 19); }
5369__ai uint32x2_t vclez_f32(float32x2_t __a) {
5370 return (uint32x2_t)__builtin_neon_vclez_v((int8x8_t)__a, 18); }
5371__ai uint64x1_t vclez_f64(float64x1_t __a) {
5372 return (uint64x1_t)__builtin_neon_vclez_v((int8x8_t)__a, 19); }
5373__ai uint8x16_t vclezq_s8(int8x16_t __a) {
5374 return (uint8x16_t)__builtin_neon_vclezq_v(__a, 48); }
5375__ai uint16x8_t vclezq_s16(int16x8_t __a) {
5376 return (uint16x8_t)__builtin_neon_vclezq_v((int8x16_t)__a, 49); }
5377__ai uint32x4_t vclezq_s32(int32x4_t __a) {
5378 return (uint32x4_t)__builtin_neon_vclezq_v((int8x16_t)__a, 50); }
5379__ai uint64x2_t vclezq_s64(int64x2_t __a) {
5380 return (uint64x2_t)__builtin_neon_vclezq_v((int8x16_t)__a, 51); }
5381__ai uint32x4_t vclezq_f32(float32x4_t __a) {
5382 return (uint32x4_t)__builtin_neon_vclezq_v((int8x16_t)__a, 50); }
5383__ai uint64x2_t vclezq_f64(float64x2_t __a) {
5384 return (uint64x2_t)__builtin_neon_vclezq_v((int8x16_t)__a, 51); }
5385
5386__ai uint8x8_t vcltz_s8(int8x8_t __a) {
5387 return (uint8x8_t)__builtin_neon_vcltz_v(__a, 16); }
5388__ai uint16x4_t vcltz_s16(int16x4_t __a) {
5389 return (uint16x4_t)__builtin_neon_vcltz_v((int8x8_t)__a, 17); }
5390__ai uint32x2_t vcltz_s32(int32x2_t __a) {
5391 return (uint32x2_t)__builtin_neon_vcltz_v((int8x8_t)__a, 18); }
5392__ai uint64x1_t vcltz_s64(int64x1_t __a) {
5393 return (uint64x1_t)__builtin_neon_vcltz_v((int8x8_t)__a, 19); }
5394__ai uint32x2_t vcltz_f32(float32x2_t __a) {
5395 return (uint32x2_t)__builtin_neon_vcltz_v((int8x8_t)__a, 18); }
5396__ai uint64x1_t vcltz_f64(float64x1_t __a) {
5397 return (uint64x1_t)__builtin_neon_vcltz_v((int8x8_t)__a, 19); }
5398__ai uint8x16_t vcltzq_s8(int8x16_t __a) {
5399 return (uint8x16_t)__builtin_neon_vcltzq_v(__a, 48); }
5400__ai uint16x8_t vcltzq_s16(int16x8_t __a) {
5401 return (uint16x8_t)__builtin_neon_vcltzq_v((int8x16_t)__a, 49); }
5402__ai uint32x4_t vcltzq_s32(int32x4_t __a) {
5403 return (uint32x4_t)__builtin_neon_vcltzq_v((int8x16_t)__a, 50); }
5404__ai uint64x2_t vcltzq_s64(int64x2_t __a) {
5405 return (uint64x2_t)__builtin_neon_vcltzq_v((int8x16_t)__a, 51); }
5406__ai uint32x4_t vcltzq_f32(float32x4_t __a) {
5407 return (uint32x4_t)__builtin_neon_vcltzq_v((int8x16_t)__a, 50); }
5408__ai uint64x2_t vcltzq_f64(float64x2_t __a) {
5409 return (uint64x2_t)__builtin_neon_vcltzq_v((int8x16_t)__a, 51); }
5410
5411__ai uint64x1_t vtst_s64(int64x1_t __a, int64x1_t __b) {
5412 return (uint64x1_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5413__ai uint64x1_t vtst_u64(uint64x1_t __a, uint64x1_t __b) {
5414 return (uint64x1_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5415__ai uint64x2_t vtstq_s64(int64x2_t __a, int64x2_t __b) {
5416 return (uint64x2_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5417__ai uint64x2_t vtstq_u64(uint64x2_t __a, uint64x2_t __b) {
5418 return (uint64x2_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5419__ai uint64x1_t vtst_p64(poly64x1_t __a, poly64x1_t __b) {
5420 return (uint64x1_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5421__ai uint64x2_t vtstq_p64(poly64x2_t __a, poly64x2_t __b) {
5422 return (uint64x2_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5423
5424__ai float64x2_t vcombine_f64(float64x1_t __a, float64x1_t __b) {
5425 return (float64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
5426__ai poly64x2_t vcombine_p64(poly64x1_t __a, poly64x1_t __b) {
5427 return (poly64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
5428
5429#define vcopyq_lane_s8(a1, __b1, c1, __d1) __extension__ ({ \
5430 int8x16_t __a1 = (a1); int8x8_t __c1 = (c1); \
5431 int8_t __c2 = vget_lane_s8(__c1, __d1); \
5432 vsetq_lane_s8(__c2, __a1, __b1); })
5433#define vcopyq_lane_s16(a1, __b1, c1, __d1) __extension__ ({ \
5434 int16x8_t __a1 = (a1); int16x4_t __c1 = (c1); \
5435 int16_t __c2 = vget_lane_s16(__c1, __d1); \
5436 vsetq_lane_s16(__c2, __a1, __b1); })
5437#define vcopyq_lane_s32(a1, __b1, c1, __d1) __extension__ ({ \
5438 int32x4_t __a1 = (a1); int32x2_t __c1 = (c1); \
5439 int32_t __c2 = vget_lane_s32(__c1, __d1); \
5440 vsetq_lane_s32(__c2, __a1, __b1); })
5441#define vcopyq_lane_s64(a1, __b1, c1, __d1) __extension__ ({ \
5442 int64x2_t __a1 = (a1); int64x1_t __c1 = (c1); \
5443 int64_t __c2 = vget_lane_s64(__c1, __d1); \
5444 vsetq_lane_s64(__c2, __a1, __b1); })
5445#define vcopyq_lane_u8(a1, __b1, c1, __d1) __extension__ ({ \
5446 uint8x16_t __a1 = (a1); uint8x8_t __c1 = (c1); \
5447 uint8_t __c2 = vget_lane_u8(__c1, __d1); \
5448 vsetq_lane_u8(__c2, __a1, __b1); })
5449#define vcopyq_lane_u16(a1, __b1, c1, __d1) __extension__ ({ \
5450 uint16x8_t __a1 = (a1); uint16x4_t __c1 = (c1); \
5451 uint16_t __c2 = vget_lane_u16(__c1, __d1); \
5452 vsetq_lane_u16(__c2, __a1, __b1); })
5453#define vcopyq_lane_u32(a1, __b1, c1, __d1) __extension__ ({ \
5454 uint32x4_t __a1 = (a1); uint32x2_t __c1 = (c1); \
5455 uint32_t __c2 = vget_lane_u32(__c1, __d1); \
5456 vsetq_lane_u32(__c2, __a1, __b1); })
5457#define vcopyq_lane_u64(a1, __b1, c1, __d1) __extension__ ({ \
5458 uint64x2_t __a1 = (a1); uint64x1_t __c1 = (c1); \
5459 uint64_t __c2 = vget_lane_u64(__c1, __d1); \
5460 vsetq_lane_u64(__c2, __a1, __b1); })
5461#define vcopyq_lane_p8(a1, __b1, c1, __d1) __extension__ ({ \
5462 poly8x16_t __a1 = (a1); poly8x8_t __c1 = (c1); \
5463 poly8_t __c2 = vget_lane_p8(__c1, __d1); \
5464 vsetq_lane_p8(__c2, __a1, __b1); })
5465#define vcopyq_lane_p16(a1, __b1, c1, __d1) __extension__ ({ \
5466 poly16x8_t __a1 = (a1); poly16x4_t __c1 = (c1); \
5467 poly16_t __c2 = vget_lane_p16(__c1, __d1); \
5468 vsetq_lane_p16(__c2, __a1, __b1); })
5469#define vcopyq_lane_f32(a1, __b1, c1, __d1) __extension__ ({ \
5470 float32x4_t __a1 = (a1); float32x2_t __c1 = (c1); \
5471 float32_t __c2 = vget_lane_f32(__c1, __d1); \
5472 vsetq_lane_f32(__c2, __a1, __b1); })
5473#define vcopyq_lane_f64(a1, __b1, c1, __d1) __extension__ ({ \
5474 float64x2_t __a1 = (a1); float64x1_t __c1 = (c1); \
5475 float64_t __c2 = vget_lane_f64(__c1, __d1); \
5476 vsetq_lane_f64(__c2, __a1, __b1); })
5477#define vcopyq_lane_p64(a1, __b1, c1, __d1) __extension__ ({ \
5478 poly64x2_t __a1 = (a1); poly64x1_t __c1 = (c1); \
5479 poly64_t __c2 = vget_lane_p64(__c1, __d1); \
5480 vsetq_lane_p64(__c2, __a1, __b1); })
5481
5482#define vcopyq_laneq_s8(a1, __b1, c1, __d1) __extension__ ({ \
5483 int8x16_t __a1 = (a1); int8x16_t __c1 = (c1); \
5484 int8_t __c2 = vgetq_lane_s8(__c1, __d1); \
5485 vsetq_lane_s8(__c2, __a1, __b1); })
5486#define vcopyq_laneq_s16(a1, __b1, c1, __d1) __extension__ ({ \
5487 int16x8_t __a1 = (a1); int16x8_t __c1 = (c1); \
5488 int16_t __c2 = vgetq_lane_s16(__c1, __d1); \
5489 vsetq_lane_s16(__c2, __a1, __b1); })
5490#define vcopyq_laneq_s32(a1, __b1, c1, __d1) __extension__ ({ \
5491 int32x4_t __a1 = (a1); int32x4_t __c1 = (c1); \
5492 int32_t __c2 = vgetq_lane_s32(__c1, __d1); \
5493 vsetq_lane_s32(__c2, __a1, __b1); })
5494#define vcopyq_laneq_s64(a1, __b1, c1, __d1) __extension__ ({ \
5495 int64x2_t __a1 = (a1); int64x2_t __c1 = (c1); \
5496 int64_t __c2 = vgetq_lane_s64(__c1, __d1); \
5497 vsetq_lane_s64(__c2, __a1, __b1); })
5498#define vcopyq_laneq_u8(a1, __b1, c1, __d1) __extension__ ({ \
5499 uint8x16_t __a1 = (a1); uint8x16_t __c1 = (c1); \
5500 uint8_t __c2 = vgetq_lane_u8(__c1, __d1); \
5501 vsetq_lane_u8(__c2, __a1, __b1); })
5502#define vcopyq_laneq_u16(a1, __b1, c1, __d1) __extension__ ({ \
5503 uint16x8_t __a1 = (a1); uint16x8_t __c1 = (c1); \
5504 uint16_t __c2 = vgetq_lane_u16(__c1, __d1); \
5505 vsetq_lane_u16(__c2, __a1, __b1); })
5506#define vcopyq_laneq_u32(a1, __b1, c1, __d1) __extension__ ({ \
5507 uint32x4_t __a1 = (a1); uint32x4_t __c1 = (c1); \
5508 uint32_t __c2 = vgetq_lane_u32(__c1, __d1); \
5509 vsetq_lane_u32(__c2, __a1, __b1); })
5510#define vcopyq_laneq_u64(a1, __b1, c1, __d1) __extension__ ({ \
5511 uint64x2_t __a1 = (a1); uint64x2_t __c1 = (c1); \
5512 uint64_t __c2 = vgetq_lane_u64(__c1, __d1); \
5513 vsetq_lane_u64(__c2, __a1, __b1); })
5514#define vcopyq_laneq_p8(a1, __b1, c1, __d1) __extension__ ({ \
5515 poly8x16_t __a1 = (a1); poly8x16_t __c1 = (c1); \
5516 poly8_t __c2 = vgetq_lane_p8(__c1, __d1); \
5517 vsetq_lane_p8(__c2, __a1, __b1); })
5518#define vcopyq_laneq_p16(a1, __b1, c1, __d1) __extension__ ({ \
5519 poly16x8_t __a1 = (a1); poly16x8_t __c1 = (c1); \
5520 poly16_t __c2 = vgetq_lane_p16(__c1, __d1); \
5521 vsetq_lane_p16(__c2, __a1, __b1); })
5522#define vcopyq_laneq_f32(a1, __b1, c1, __d1) __extension__ ({ \
5523 float32x4_t __a1 = (a1); float32x4_t __c1 = (c1); \
5524 float32_t __c2 = vgetq_lane_f32(__c1, __d1); \
5525 vsetq_lane_f32(__c2, __a1, __b1); })
5526#define vcopyq_laneq_f64(a1, __b1, c1, __d1) __extension__ ({ \
5527 float64x2_t __a1 = (a1); float64x2_t __c1 = (c1); \
5528 float64_t __c2 = vgetq_lane_f64(__c1, __d1); \
5529 vsetq_lane_f64(__c2, __a1, __b1); })
5530#define vcopyq_laneq_p64(a1, __b1, c1, __d1) __extension__ ({ \
5531 poly64x2_t __a1 = (a1); poly64x2_t __c1 = (c1); \
5532 poly64_t __c2 = vgetq_lane_p64(__c1, __d1); \
5533 vsetq_lane_p64(__c2, __a1, __b1); })
5534
5535#define vcopy_lane_s8(a1, __b1, c1, __d1) __extension__ ({ \
5536 int8x8_t __a1 = (a1); int8x8_t __c1 = (c1); \
5537 int8_t __c2 = vget_lane_s8(__c1, __d1); \
5538 vset_lane_s8(__c2, __a1, __b1); })
5539#define vcopy_lane_s16(a1, __b1, c1, __d1) __extension__ ({ \
5540 int16x4_t __a1 = (a1); int16x4_t __c1 = (c1); \
5541 int16_t __c2 = vget_lane_s16(__c1, __d1); \
5542 vset_lane_s16(__c2, __a1, __b1); })
5543#define vcopy_lane_s32(a1, __b1, c1, __d1) __extension__ ({ \
5544 int32x2_t __a1 = (a1); int32x2_t __c1 = (c1); \
5545 int32_t __c2 = vget_lane_s32(__c1, __d1); \
5546 vset_lane_s32(__c2, __a1, __b1); })
5547#define vcopy_lane_s64(a1, __b1, c1, __d1) __extension__ ({ \
5548 int64x1_t __a1 = (a1); int64x1_t __c1 = (c1); \
5549 int64_t __c2 = vget_lane_s64(__c1, __d1); \
5550 vset_lane_s64(__c2, __a1, __b1); })
5551#define vcopy_lane_p8(a1, __b1, c1, __d1) __extension__ ({ \
5552 poly8x8_t __a1 = (a1); poly8x8_t __c1 = (c1); \
5553 poly8_t __c2 = vget_lane_p8(__c1, __d1); \
5554 vset_lane_p8(__c2, __a1, __b1); })
5555#define vcopy_lane_p16(a1, __b1, c1, __d1) __extension__ ({ \
5556 poly16x4_t __a1 = (a1); poly16x4_t __c1 = (c1); \
5557 poly16_t __c2 = vget_lane_p16(__c1, __d1); \
5558 vset_lane_p16(__c2, __a1, __b1); })
5559#define vcopy_lane_u8(a1, __b1, c1, __d1) __extension__ ({ \
5560 uint8x8_t __a1 = (a1); uint8x8_t __c1 = (c1); \
5561 uint8_t __c2 = vget_lane_u8(__c1, __d1); \
5562 vset_lane_u8(__c2, __a1, __b1); })
5563#define vcopy_lane_u16(a1, __b1, c1, __d1) __extension__ ({ \
5564 uint16x4_t __a1 = (a1); uint16x4_t __c1 = (c1); \
5565 uint16_t __c2 = vget_lane_u16(__c1, __d1); \
5566 vset_lane_u16(__c2, __a1, __b1); })
5567#define vcopy_lane_u32(a1, __b1, c1, __d1) __extension__ ({ \
5568 uint32x2_t __a1 = (a1); uint32x2_t __c1 = (c1); \
5569 uint32_t __c2 = vget_lane_u32(__c1, __d1); \
5570 vset_lane_u32(__c2, __a1, __b1); })
5571#define vcopy_lane_u64(a1, __b1, c1, __d1) __extension__ ({ \
5572 uint64x1_t __a1 = (a1); uint64x1_t __c1 = (c1); \
5573 uint64_t __c2 = vget_lane_u64(__c1, __d1); \
5574 vset_lane_u64(__c2, __a1, __b1); })
5575#define vcopy_lane_p64(a1, __b1, c1, __d1) __extension__ ({ \
5576 poly64x1_t __a1 = (a1); poly64x1_t __c1 = (c1); \
5577 poly64_t __c2 = vget_lane_p64(__c1, __d1); \
5578 vset_lane_p64(__c2, __a1, __b1); })
5579#define vcopy_lane_f32(a1, __b1, c1, __d1) __extension__ ({ \
5580 float32x2_t __a1 = (a1); float32x2_t __c1 = (c1); \
5581 float32_t __c2 = vget_lane_f32(__c1, __d1); \
5582 vset_lane_f32(__c2, __a1, __b1); })
5583#define vcopy_lane_f64(a1, __b1, c1, __d1) __extension__ ({ \
5584 float64x1_t __a1 = (a1); float64x1_t __c1 = (c1); \
5585 float64_t __c2 = vget_lane_f64(__c1, __d1); \
5586 vset_lane_f64(__c2, __a1, __b1); })
5587
5588#define vcopy_laneq_s8(a1, __b1, c1, __d1) __extension__ ({ \
5589 int8x8_t __a1 = (a1); int8x16_t __c1 = (c1); \
5590 int8_t __c2 = vgetq_lane_s8(__c1, __d1); \
5591 vset_lane_s8(__c2, __a1, __b1); })
5592#define vcopy_laneq_s16(a1, __b1, c1, __d1) __extension__ ({ \
5593 int16x4_t __a1 = (a1); int16x8_t __c1 = (c1); \
5594 int16_t __c2 = vgetq_lane_s16(__c1, __d1); \
5595 vset_lane_s16(__c2, __a1, __b1); })
5596#define vcopy_laneq_s32(a1, __b1, c1, __d1) __extension__ ({ \
5597 int32x2_t __a1 = (a1); int32x4_t __c1 = (c1); \
5598 int32_t __c2 = vgetq_lane_s32(__c1, __d1); \
5599 vset_lane_s32(__c2, __a1, __b1); })
5600#define vcopy_laneq_s64(a1, __b1, c1, __d1) __extension__ ({ \
5601 int64x1_t __a1 = (a1); int64x2_t __c1 = (c1); \
5602 int64_t __c2 = vgetq_lane_s64(__c1, __d1); \
5603 vset_lane_s64(__c2, __a1, __b1); })
5604#define vcopy_laneq_p8(a1, __b1, c1, __d1) __extension__ ({ \
5605 poly8x8_t __a1 = (a1); poly8x16_t __c1 = (c1); \
5606 poly8_t __c2 = vgetq_lane_p8(__c1, __d1); \
5607 vset_lane_p8(__c2, __a1, __b1); })
5608#define vcopy_laneq_p16(a1, __b1, c1, __d1) __extension__ ({ \
5609 poly16x4_t __a1 = (a1); poly16x8_t __c1 = (c1); \
5610 poly16_t __c2 = vgetq_lane_p16(__c1, __d1); \
5611 vset_lane_p16(__c2, __a1, __b1); })
5612#define vcopy_laneq_p64(a1, __b1, c1, __d1) __extension__ ({ \
5613 poly64x1_t __a1 = (a1); poly64x2_t __c1 = (c1); \
5614 poly64_t __c2 = vgetq_lane_p64(__c1, __d1); \
5615 vset_lane_p64(__c2, __a1, __b1); })
5616#define vcopy_laneq_u8(a1, __b1, c1, __d1) __extension__ ({ \
5617 uint8x8_t __a1 = (a1); uint8x16_t __c1 = (c1); \
5618 uint8_t __c2 = vgetq_lane_u8(__c1, __d1); \
5619 vset_lane_u8(__c2, __a1, __b1); })
5620#define vcopy_laneq_u16(a1, __b1, c1, __d1) __extension__ ({ \
5621 uint16x4_t __a1 = (a1); uint16x8_t __c1 = (c1); \
5622 uint16_t __c2 = vgetq_lane_u16(__c1, __d1); \
5623 vset_lane_u16(__c2, __a1, __b1); })
5624#define vcopy_laneq_u32(a1, __b1, c1, __d1) __extension__ ({ \
5625 uint32x2_t __a1 = (a1); uint32x4_t __c1 = (c1); \
5626 uint32_t __c2 = vgetq_lane_u32(__c1, __d1); \
5627 vset_lane_u32(__c2, __a1, __b1); })
5628#define vcopy_laneq_u64(a1, __b1, c1, __d1) __extension__ ({ \
5629 uint64x1_t __a1 = (a1); uint64x2_t __c1 = (c1); \
5630 uint64_t __c2 = vgetq_lane_u64(__c1, __d1); \
5631 vset_lane_u64(__c2, __a1, __b1); })
5632#define vcopy_laneq_f32(a1, __b1, c1, __d1) __extension__ ({ \
5633 float32x2_t __a1 = (a1); float32x4_t __c1 = (c1); \
5634 float32_t __c2 = vgetq_lane_f32(__c1, __d1); \
5635 vset_lane_f32(__c2, __a1, __b1); })
5636#define vcopy_laneq_f64(a1, __b1, c1, __d1) __extension__ ({ \
5637 float64x1_t __a1 = (a1); float64x2_t __c1 = (c1); \
5638 float64_t __c2 = vgetq_lane_f64(__c1, __d1); \
5639 vset_lane_f64(__c2, __a1, __b1); })
5640
5641__ai float64x1_t vcreate_f64(uint64_t __a) {
5642 return (float64x1_t)__a; }
5643__ai poly64x1_t vcreate_p64(uint64_t __a) {
5644 return (poly64x1_t)__a; }
5645
5646#define vcvt_n_f64_s64(a, __b) __extension__ ({ \
5647 int64x1_t __a = (a); \
5648 (float64x1_t)__builtin_neon_vcvt_n_f64_v((int8x8_t)__a, __b, 3); })
5649#define vcvt_n_f64_u64(a, __b) __extension__ ({ \
5650 uint64x1_t __a = (a); \
5651 (float64x1_t)__builtin_neon_vcvt_n_f64_v((int8x8_t)__a, __b, 19); })
5652#define vcvtq_n_f64_s64(a, __b) __extension__ ({ \
5653 int64x2_t __a = (a); \
5654 (float64x2_t)__builtin_neon_vcvtq_n_f64_v((int8x16_t)__a, __b, 35); })
5655#define vcvtq_n_f64_u64(a, __b) __extension__ ({ \
5656 uint64x2_t __a = (a); \
5657 (float64x2_t)__builtin_neon_vcvtq_n_f64_v((int8x16_t)__a, __b, 51); })
5658
5659__ai float64x1_t vdup_n_f64(float64_t __a) {
5660 return (float64x1_t){ __a }; }
5661__ai float64x2_t vdupq_n_f64(float64_t __a) {
5662 return (float64x2_t){ __a, __a }; }
5663__ai poly64x1_t vdup_n_p64(poly64_t __a) {
5664 return (poly64x1_t){ __a }; }
5665__ai poly64x2_t vdupq_n_p64(poly64_t __a) {
5666 return (poly64x2_t){ __a, __a }; }
5667
5668__ai uint64x1_t vcage_f64(float64x1_t __a, float64x1_t __b) {
5669 return (uint64x1_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5670__ai uint64x2_t vcageq_f64(float64x2_t __a, float64x2_t __b) {
5671 return (uint64x2_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5672
5673__ai uint64x1_t vcagt_f64(float64x1_t __a, float64x1_t __b) {
5674 return (uint64x1_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5675__ai uint64x2_t vcagtq_f64(float64x2_t __a, float64x2_t __b) {
5676 return (uint64x2_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5677
5678__ai uint64x1_t vcale_f64(float64x1_t __a, float64x1_t __b) {
5679 return (uint64x1_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5680__ai uint64x2_t vcaleq_f64(float64x2_t __a, float64x2_t __b) {
5681 return (uint64x2_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5682
5683__ai uint64x1_t vcalt_f64(float64x1_t __a, float64x1_t __b) {
5684 return (uint64x1_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 19); }
5685__ai uint64x2_t vcaltq_f64(float64x2_t __a, float64x2_t __b) {
5686 return (uint64x2_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
5687
5688__ai int32x2_t vcvta_s32_f32(float32x2_t __a) {
5689 return (int32x2_t)__builtin_neon_vcvta_s32_v((int8x8_t)__a, 2); }
5690__ai int32x4_t vcvtaq_s32_f32(float32x4_t __a) {
5691 return (int32x4_t)__builtin_neon_vcvtaq_s32_v((int8x16_t)__a, 34); }
5692
5693__ai int64x1_t vcvta_s64_f64(float64x1_t __a) {
5694 return (int64x1_t)__builtin_neon_vcvta_s64_v((int8x8_t)__a, 3); }
5695__ai int64x2_t vcvtaq_s64_f64(float64x2_t __a) {
5696 return (int64x2_t)__builtin_neon_vcvtaq_s64_v((int8x16_t)__a, 35); }
5697
5698__ai uint32x2_t vcvta_u32_f32(float32x2_t __a) {
5699 return (uint32x2_t)__builtin_neon_vcvta_u32_v((int8x8_t)__a, 18); }
5700__ai uint32x4_t vcvtaq_u32_f32(float32x4_t __a) {
5701 return (uint32x4_t)__builtin_neon_vcvtaq_u32_v((int8x16_t)__a, 50); }
5702
5703__ai uint64x1_t vcvta_u64_f64(float64x1_t __a) {
5704 return (uint64x1_t)__builtin_neon_vcvta_u64_v((int8x8_t)__a, 19); }
5705__ai uint64x2_t vcvtaq_u64_f64(float64x2_t __a) {
5706 return (uint64x2_t)__builtin_neon_vcvtaq_u64_v((int8x16_t)__a, 51); }
5707
5708__ai int32x2_t vcvtm_s32_f32(float32x2_t __a) {
5709 return (int32x2_t)__builtin_neon_vcvtm_s32_v((int8x8_t)__a, 2); }
5710__ai int32x4_t vcvtmq_s32_f32(float32x4_t __a) {
5711 return (int32x4_t)__builtin_neon_vcvtmq_s32_v((int8x16_t)__a, 34); }
5712
5713__ai int64x1_t vcvtm_s64_f64(float64x1_t __a) {
5714 return (int64x1_t)__builtin_neon_vcvtm_s64_v((int8x8_t)__a, 3); }
5715__ai int64x2_t vcvtmq_s64_f64(float64x2_t __a) {
5716 return (int64x2_t)__builtin_neon_vcvtmq_s64_v((int8x16_t)__a, 35); }
5717
5718__ai uint32x2_t vcvtm_u32_f32(float32x2_t __a) {
5719 return (uint32x2_t)__builtin_neon_vcvtm_u32_v((int8x8_t)__a, 18); }
5720__ai uint32x4_t vcvtmq_u32_f32(float32x4_t __a) {
5721 return (uint32x4_t)__builtin_neon_vcvtmq_u32_v((int8x16_t)__a, 50); }
5722
5723__ai uint64x1_t vcvtm_u64_f64(float64x1_t __a) {
5724 return (uint64x1_t)__builtin_neon_vcvtm_u64_v((int8x8_t)__a, 19); }
5725__ai uint64x2_t vcvtmq_u64_f64(float64x2_t __a) {
5726 return (uint64x2_t)__builtin_neon_vcvtmq_u64_v((int8x16_t)__a, 51); }
5727
5728__ai int32x2_t vcvtn_s32_f32(float32x2_t __a) {
5729 return (int32x2_t)__builtin_neon_vcvtn_s32_v((int8x8_t)__a, 2); }
5730__ai int32x4_t vcvtnq_s32_f32(float32x4_t __a) {
5731 return (int32x4_t)__builtin_neon_vcvtnq_s32_v((int8x16_t)__a, 34); }
5732
5733__ai int64x1_t vcvtn_s64_f64(float64x1_t __a) {
5734 return (int64x1_t)__builtin_neon_vcvtn_s64_v((int8x8_t)__a, 3); }
5735__ai int64x2_t vcvtnq_s64_f64(float64x2_t __a) {
5736 return (int64x2_t)__builtin_neon_vcvtnq_s64_v((int8x16_t)__a, 35); }
5737
5738__ai uint32x2_t vcvtn_u32_f32(float32x2_t __a) {
5739 return (uint32x2_t)__builtin_neon_vcvtn_u32_v((int8x8_t)__a, 18); }
5740__ai uint32x4_t vcvtnq_u32_f32(float32x4_t __a) {
5741 return (uint32x4_t)__builtin_neon_vcvtnq_u32_v((int8x16_t)__a, 50); }
5742
5743__ai uint64x1_t vcvtn_u64_f64(float64x1_t __a) {
5744 return (uint64x1_t)__builtin_neon_vcvtn_u64_v((int8x8_t)__a, 19); }
5745__ai uint64x2_t vcvtnq_u64_f64(float64x2_t __a) {
5746 return (uint64x2_t)__builtin_neon_vcvtnq_u64_v((int8x16_t)__a, 51); }
5747
5748__ai int32x2_t vcvtp_s32_f32(float32x2_t __a) {
5749 return (int32x2_t)__builtin_neon_vcvtp_s32_v((int8x8_t)__a, 2); }
5750__ai int32x4_t vcvtpq_s32_f32(float32x4_t __a) {
5751 return (int32x4_t)__builtin_neon_vcvtpq_s32_v((int8x16_t)__a, 34); }
5752
5753__ai int64x1_t vcvtp_s64_f64(float64x1_t __a) {
5754 return (int64x1_t)__builtin_neon_vcvtp_s64_v((int8x8_t)__a, 3); }
5755__ai int64x2_t vcvtpq_s64_f64(float64x2_t __a) {
5756 return (int64x2_t)__builtin_neon_vcvtpq_s64_v((int8x16_t)__a, 35); }
5757
5758__ai uint32x2_t vcvtp_u32_f32(float32x2_t __a) {
5759 return (uint32x2_t)__builtin_neon_vcvtp_u32_v((int8x8_t)__a, 18); }
5760__ai uint32x4_t vcvtpq_u32_f32(float32x4_t __a) {
5761 return (uint32x4_t)__builtin_neon_vcvtpq_u32_v((int8x16_t)__a, 50); }
5762
5763__ai uint64x1_t vcvtp_u64_f64(float64x1_t __a) {
5764 return (uint64x1_t)__builtin_neon_vcvtp_u64_v((int8x8_t)__a, 19); }
5765__ai uint64x2_t vcvtpq_u64_f64(float64x2_t __a) {
5766 return (uint64x2_t)__builtin_neon_vcvtpq_u64_v((int8x16_t)__a, 51); }
5767
5768#define vcvt_n_s64_f64(a, __b) __extension__ ({ \
5769 float64x1_t __a = (a); \
5770 (int64x1_t)__builtin_neon_vcvt_n_s64_v((int8x8_t)__a, __b, 3); })
5771#define vcvtq_n_s64_f64(a, __b) __extension__ ({ \
5772 float64x2_t __a = (a); \
5773 (int64x2_t)__builtin_neon_vcvtq_n_s64_v((int8x16_t)__a, __b, 35); })
5774
5775#define vcvt_n_u64_f64(a, __b) __extension__ ({ \
5776 float64x1_t __a = (a); \
5777 (uint64x1_t)__builtin_neon_vcvt_n_u64_v((int8x8_t)__a, __b, 19); })
5778#define vcvtq_n_u64_f64(a, __b) __extension__ ({ \
5779 float64x2_t __a = (a); \
5780 (uint64x2_t)__builtin_neon_vcvtq_n_u64_v((int8x16_t)__a, __b, 51); })
5781
5782__ai float32x2_t vdiv_f32(float32x2_t __a, float32x2_t __b) {
5783 return __a / __b; }
5784__ai float64x1_t vdiv_f64(float64x1_t __a, float64x1_t __b) {
5785 return __a / __b; }
5786__ai float32x4_t vdivq_f32(float32x4_t __a, float32x4_t __b) {
5787 return __a / __b; }
5788__ai float64x2_t vdivq_f64(float64x2_t __a, float64x2_t __b) {
5789 return __a / __b; }
5790
5791__ai float32x2_t vmaxnm_f32(float32x2_t __a, float32x2_t __b) {
5792 return (float32x2_t)__builtin_neon_vmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 8); }
5793__ai float64x1_t vmaxnm_f64(float64x1_t __a, float64x1_t __b) {
5794 return (float64x1_t)__builtin_neon_vmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 9); }
5795__ai float32x4_t vmaxnmq_f32(float32x4_t __a, float32x4_t __b) {
5796 return (float32x4_t)__builtin_neon_vmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
5797__ai float64x2_t vmaxnmq_f64(float64x2_t __a, float64x2_t __b) {
5798 return (float64x2_t)__builtin_neon_vmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5799
5800__ai float32x2_t vpmaxnm_f32(float32x2_t __a, float32x2_t __b) {
5801 return (float32x2_t)__builtin_neon_vpmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 8); }
5802__ai float32x4_t vpmaxnmq_f32(float32x4_t __a, float32x4_t __b) {
5803 return (float32x4_t)__builtin_neon_vpmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
5804__ai float64x2_t vpmaxnmq_f64(float64x2_t __a, float64x2_t __b) {
5805 return (float64x2_t)__builtin_neon_vpmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5806
5807__ai float32_t vmaxnmv_f32(float32x2_t __a) {
5808 return (float32_t)__builtin_neon_vmaxnmv_f32(__a); }
5809__ai float32_t vmaxnmvq_f32(float32x4_t __a) {
5810 return (float32_t)__builtin_neon_vmaxnmvq_f32(__a); }
5811__ai float64_t vmaxnmvq_f64(float64x2_t __a) {
5812 return (float64_t)__builtin_neon_vmaxnmvq_f64(__a); }
5813
5814__ai float32x2_t vminnm_f32(float32x2_t __a, float32x2_t __b) {
5815 return (float32x2_t)__builtin_neon_vminnm_v((int8x8_t)__a, (int8x8_t)__b, 8); }
5816__ai float64x1_t vminnm_f64(float64x1_t __a, float64x1_t __b) {
5817 return (float64x1_t)__builtin_neon_vminnm_v((int8x8_t)__a, (int8x8_t)__b, 9); }
5818__ai float32x4_t vminnmq_f32(float32x4_t __a, float32x4_t __b) {
5819 return (float32x4_t)__builtin_neon_vminnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
5820__ai float64x2_t vminnmq_f64(float64x2_t __a, float64x2_t __b) {
5821 return (float64x2_t)__builtin_neon_vminnmq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5822
5823__ai float32x2_t vpminnm_f32(float32x2_t __a, float32x2_t __b) {
5824 return (float32x2_t)__builtin_neon_vpminnm_v((int8x8_t)__a, (int8x8_t)__b, 8); }
5825__ai float32x4_t vpminnmq_f32(float32x4_t __a, float32x4_t __b) {
5826 return (float32x4_t)__builtin_neon_vpminnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
5827__ai float64x2_t vpminnmq_f64(float64x2_t __a, float64x2_t __b) {
5828 return (float64x2_t)__builtin_neon_vpminnmq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5829
5830__ai float32_t vminnmv_f32(float32x2_t __a) {
5831 return (float32_t)__builtin_neon_vminnmv_f32(__a); }
5832__ai float32_t vminnmvq_f32(float32x4_t __a) {
5833 return (float32_t)__builtin_neon_vminnmvq_f32(__a); }
5834__ai float64_t vminnmvq_f64(float64x2_t __a) {
5835 return (float64_t)__builtin_neon_vminnmvq_f64(__a); }
5836
5837__ai float64x1_t vfma_f64(float64x1_t __a, float64x1_t __b, float64x1_t __c) {
5838 return (float64x1_t)__builtin_neon_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 9); }
5839__ai float64x2_t vfmaq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) {
5840 return (float64x2_t)__builtin_neon_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 41); }
5841
5842__ai float32x2_t vfma_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) {
5843 return vfma_f32(__a, __b, (float32x2_t){ __c, __c }); }
5844__ai float32x4_t vfmaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
5845 return vfmaq_f32(__a, __b, (float32x4_t){ __c, __c, __c, __c }); }
5846
5847__ai float32x2_t vfms_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
5848 return (float32x2_t)__builtin_neon_vfms_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 8); }
5849__ai float64x1_t vfms_f64(float64x1_t __a, float64x1_t __b, float64x1_t __c) {
5850 return (float64x1_t)__builtin_neon_vfms_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 9); }
5851__ai float32x4_t vfmsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
5852 return (float32x4_t)__builtin_neon_vfmsq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); }
5853__ai float64x2_t vfmsq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) {
5854 return (float64x2_t)__builtin_neon_vfmsq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 41); }
5855
5856__ai float32x2_t vfms_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) {
5857 return vfms_f32(__a, __b, (float32x2_t){ __c, __c }); }
5858__ai float32x4_t vfmsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
5859 return vfmsq_f32(__a, __b, (float32x4_t){ __c, __c, __c, __c }); }
5860
5861__ai float64x1_t vrecpe_f64(float64x1_t __a) {
5862 return (float64x1_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 9); }
5863__ai float64x2_t vrecpeq_f64(float64x2_t __a) {
5864 return (float64x2_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 41); }
5865
5866__ai float64x1_t vrecps_f64(float64x1_t __a, float64x1_t __b) {
5867 return (float64x1_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 9); }
5868__ai float64x2_t vrecpsq_f64(float64x2_t __a, float64x2_t __b) {
5869 return (float64x2_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5870
5871__ai float32x2_t vrnda_f32(float32x2_t __a) {
5872 return (float32x2_t)__builtin_neon_vrnda_v((int8x8_t)__a, 8); }
5873__ai float64x1_t vrnda_f64(float64x1_t __a) {
5874 return (float64x1_t)__builtin_neon_vrnda_v((int8x8_t)__a, 9); }
5875__ai float32x4_t vrndaq_f32(float32x4_t __a) {
5876 return (float32x4_t)__builtin_neon_vrndaq_v((int8x16_t)__a, 40); }
5877__ai float64x2_t vrndaq_f64(float64x2_t __a) {
5878 return (float64x2_t)__builtin_neon_vrndaq_v((int8x16_t)__a, 41); }
5879
5880__ai float32x2_t vrndi_f32(float32x2_t __a) {
5881 return (float32x2_t)__builtin_neon_vrndi_v((int8x8_t)__a, 8); }
5882__ai float64x1_t vrndi_f64(float64x1_t __a) {
5883 return (float64x1_t)__builtin_neon_vrndi_v((int8x8_t)__a, 9); }
5884__ai float32x4_t vrndiq_f32(float32x4_t __a) {
5885 return (float32x4_t)__builtin_neon_vrndiq_v((int8x16_t)__a, 40); }
5886__ai float64x2_t vrndiq_f64(float64x2_t __a) {
5887 return (float64x2_t)__builtin_neon_vrndiq_v((int8x16_t)__a, 41); }
5888
5889__ai float32x2_t vrndm_f32(float32x2_t __a) {
5890 return (float32x2_t)__builtin_neon_vrndm_v((int8x8_t)__a, 8); }
5891__ai float64x1_t vrndm_f64(float64x1_t __a) {
5892 return (float64x1_t)__builtin_neon_vrndm_v((int8x8_t)__a, 9); }
5893__ai float32x4_t vrndmq_f32(float32x4_t __a) {
5894 return (float32x4_t)__builtin_neon_vrndmq_v((int8x16_t)__a, 40); }
5895__ai float64x2_t vrndmq_f64(float64x2_t __a) {
5896 return (float64x2_t)__builtin_neon_vrndmq_v((int8x16_t)__a, 41); }
5897
5898__ai float32x2_t vrndn_f32(float32x2_t __a) {
5899 return (float32x2_t)__builtin_neon_vrndn_v((int8x8_t)__a, 8); }
5900__ai float64x1_t vrndn_f64(float64x1_t __a) {
5901 return (float64x1_t)__builtin_neon_vrndn_v((int8x8_t)__a, 9); }
5902__ai float32x4_t vrndnq_f32(float32x4_t __a) {
5903 return (float32x4_t)__builtin_neon_vrndnq_v((int8x16_t)__a, 40); }
5904__ai float64x2_t vrndnq_f64(float64x2_t __a) {
5905 return (float64x2_t)__builtin_neon_vrndnq_v((int8x16_t)__a, 41); }
5906
5907__ai float32x2_t vrndp_f32(float32x2_t __a) {
5908 return (float32x2_t)__builtin_neon_vrndp_v((int8x8_t)__a, 8); }
5909__ai float64x1_t vrndp_f64(float64x1_t __a) {
5910 return (float64x1_t)__builtin_neon_vrndp_v((int8x8_t)__a, 9); }
5911__ai float32x4_t vrndpq_f32(float32x4_t __a) {
5912 return (float32x4_t)__builtin_neon_vrndpq_v((int8x16_t)__a, 40); }
5913__ai float64x2_t vrndpq_f64(float64x2_t __a) {
5914 return (float64x2_t)__builtin_neon_vrndpq_v((int8x16_t)__a, 41); }
5915
5916__ai float32x2_t vrndx_f32(float32x2_t __a) {
5917 return (float32x2_t)__builtin_neon_vrndx_v((int8x8_t)__a, 8); }
5918__ai float64x1_t vrndx_f64(float64x1_t __a) {
5919 return (float64x1_t)__builtin_neon_vrndx_v((int8x8_t)__a, 9); }
5920__ai float32x4_t vrndxq_f32(float32x4_t __a) {
5921 return (float32x4_t)__builtin_neon_vrndxq_v((int8x16_t)__a, 40); }
5922__ai float64x2_t vrndxq_f64(float64x2_t __a) {
5923 return (float64x2_t)__builtin_neon_vrndxq_v((int8x16_t)__a, 41); }
5924
5925__ai float32x2_t vrnd_f32(float32x2_t __a) {
5926 return (float32x2_t)__builtin_neon_vrnd_v((int8x8_t)__a, 8); }
5927__ai float64x1_t vrnd_f64(float64x1_t __a) {
5928 return (float64x1_t)__builtin_neon_vrnd_v((int8x8_t)__a, 9); }
5929__ai float32x4_t vrndq_f32(float32x4_t __a) {
5930 return (float32x4_t)__builtin_neon_vrndq_v((int8x16_t)__a, 40); }
5931__ai float64x2_t vrndq_f64(float64x2_t __a) {
5932 return (float64x2_t)__builtin_neon_vrndq_v((int8x16_t)__a, 41); }
5933
5934__ai float64x1_t vrsqrte_f64(float64x1_t __a) {
5935 return (float64x1_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 9); }
5936__ai float64x2_t vrsqrteq_f64(float64x2_t __a) {
5937 return (float64x2_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 41); }
5938
5939__ai float64x1_t vrsqrts_f64(float64x1_t __a, float64x1_t __b) {
5940 return (float64x1_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 9); }
5941__ai float64x2_t vrsqrtsq_f64(float64x2_t __a, float64x2_t __b) {
5942 return (float64x2_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
5943
5944__ai float32x2_t vsqrt_f32(float32x2_t __a) {
5945 return (float32x2_t)__builtin_neon_vsqrt_v((int8x8_t)__a, 8); }
5946__ai float64x1_t vsqrt_f64(float64x1_t __a) {
5947 return (float64x1_t)__builtin_neon_vsqrt_v((int8x8_t)__a, 9); }
5948__ai float32x4_t vsqrtq_f32(float32x4_t __a) {
5949 return (float32x4_t)__builtin_neon_vsqrtq_v((int8x16_t)__a, 40); }
5950__ai float64x2_t vsqrtq_f64(float64x2_t __a) {
5951 return (float64x2_t)__builtin_neon_vsqrtq_v((int8x16_t)__a, 41); }
5952
5953#define vget_lane_f64(a, __b) __extension__ ({ \
5954 float64x1_t __a = (a); \
5955 (float64_t)__builtin_neon_vget_lane_f64(__a, __b); })
5956#define vgetq_lane_f64(a, __b) __extension__ ({ \
5957 float64x2_t __a = (a); \
5958 (float64_t)__builtin_neon_vgetq_lane_f64(__a, __b); })
5959#define vget_lane_p64(a, __b) __extension__ ({ \
5960 poly64x1_t __a = (a); \
5961 (poly64_t)__builtin_neon_vget_lane_i64((int64x1_t)__a, __b); })
5962#define vgetq_lane_p64(a, __b) __extension__ ({ \
5963 poly64x2_t __a = (a); \
5964 (poly64_t)__builtin_neon_vgetq_lane_i64((int64x2_t)__a, __b); })
5965
5966#define vld1q_f64(__a) __extension__ ({ \
5967 (float64x2_t)__builtin_neon_vld1q_v(__a, 41); })
5968#define vld1_f64(__a) __extension__ ({ \
5969 (float64x1_t)__builtin_neon_vld1_v(__a, 9); })
5970#define vld1_p64(__a) __extension__ ({ \
5971 (poly64x1_t)__builtin_neon_vld1_v(__a, 6); })
5972#define vld1q_p64(__a) __extension__ ({ \
5973 (poly64x2_t)__builtin_neon_vld1q_v(__a, 38); })
5974
5975#define vld1q_dup_f64(__a) __extension__ ({ \
5976 (float64x2_t)__builtin_neon_vld1q_dup_v(__a, 41); })
5977#define vld1q_dup_p64(__a) __extension__ ({ \
5978 (poly64x2_t)__builtin_neon_vld1q_dup_v(__a, 38); })
5979#define vld1_dup_f64(__a) __extension__ ({ \
5980 (float64x1_t)__builtin_neon_vld1_dup_v(__a, 9); })
5981#define vld1_dup_p64(__a) __extension__ ({ \
5982 (poly64x1_t)__builtin_neon_vld1_dup_v(__a, 6); })
5983
5984#define vld1q_lane_f64(__a, b, __c) __extension__ ({ \
5985 float64x2_t __b = (b); \
5986 (float64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 41); })
5987#define vld1q_lane_p64(__a, b, __c) __extension__ ({ \
5988 poly64x2_t __b = (b); \
5989 (poly64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 38); })
5990#define vld1_lane_f64(__a, b, __c) __extension__ ({ \
5991 float64x1_t __b = (b); \
5992 (float64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 9); })
5993#define vld1_lane_p64(__a, b, __c) __extension__ ({ \
5994 poly64x1_t __b = (b); \
5995 (poly64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); })
5996
5997#define vld1q_u8_x2(__a) __extension__ ({ \
5998 uint8x16x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 48); r; })
5999#define vld1q_u16_x2(__a) __extension__ ({ \
6000 uint16x8x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 49); r; })
6001#define vld1q_u32_x2(__a) __extension__ ({ \
6002 uint32x4x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 50); r; })
6003#define vld1q_u64_x2(__a) __extension__ ({ \
6004 uint64x2x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 51); r; })
6005#define vld1q_s8_x2(__a) __extension__ ({ \
6006 int8x16x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 32); r; })
6007#define vld1q_s16_x2(__a) __extension__ ({ \
6008 int16x8x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 33); r; })
6009#define vld1q_s32_x2(__a) __extension__ ({ \
6010 int32x4x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 34); r; })
6011#define vld1q_s64_x2(__a) __extension__ ({ \
6012 int64x2x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 35); r; })
6013#define vld1q_f16_x2(__a) __extension__ ({ \
6014 float16x8x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 39); r; })
6015#define vld1q_f32_x2(__a) __extension__ ({ \
6016 float32x4x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 40); r; })
6017#define vld1q_f64_x2(__a) __extension__ ({ \
6018 float64x2x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 41); r; })
6019#define vld1q_p8_x2(__a) __extension__ ({ \
6020 poly8x16x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 36); r; })
6021#define vld1q_p16_x2(__a) __extension__ ({ \
6022 poly16x8x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 37); r; })
6023#define vld1q_p64_x2(__a) __extension__ ({ \
6024 poly64x2x2_t r; __builtin_neon_vld1q_x2_v(&r, __a, 38); r; })
6025#define vld1_u8_x2(__a) __extension__ ({ \
6026 uint8x8x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 16); r; })
6027#define vld1_u16_x2(__a) __extension__ ({ \
6028 uint16x4x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 17); r; })
6029#define vld1_u32_x2(__a) __extension__ ({ \
6030 uint32x2x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 18); r; })
6031#define vld1_u64_x2(__a) __extension__ ({ \
6032 uint64x1x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 19); r; })
6033#define vld1_s8_x2(__a) __extension__ ({ \
6034 int8x8x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 0); r; })
6035#define vld1_s16_x2(__a) __extension__ ({ \
6036 int16x4x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 1); r; })
6037#define vld1_s32_x2(__a) __extension__ ({ \
6038 int32x2x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 2); r; })
6039#define vld1_s64_x2(__a) __extension__ ({ \
6040 int64x1x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 3); r; })
6041#define vld1_f16_x2(__a) __extension__ ({ \
6042 float16x4x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 7); r; })
6043#define vld1_f32_x2(__a) __extension__ ({ \
6044 float32x2x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 8); r; })
6045#define vld1_f64_x2(__a) __extension__ ({ \
6046 float64x1x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 9); r; })
6047#define vld1_p8_x2(__a) __extension__ ({ \
6048 poly8x8x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 4); r; })
6049#define vld1_p16_x2(__a) __extension__ ({ \
6050 poly16x4x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 5); r; })
6051#define vld1_p64_x2(__a) __extension__ ({ \
6052 poly64x1x2_t r; __builtin_neon_vld1_x2_v(&r, __a, 6); r; })
6053
6054#define vld2q_u64(__a) __extension__ ({ \
6055 uint64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 51); r; })
6056#define vld2q_s64(__a) __extension__ ({ \
6057 int64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 35); r; })
6058#define vld2q_f64(__a) __extension__ ({ \
6059 float64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 41); r; })
6060#define vld2_f64(__a) __extension__ ({ \
6061 float64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 9); r; })
6062#define vld2_p64(__a) __extension__ ({ \
6063 poly64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; })
6064#define vld2q_p64(__a) __extension__ ({ \
6065 poly64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 38); r; })
6066
6067#define vld2q_dup_u8(__a) __extension__ ({ \
6068 uint8x16x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 48); r; })
6069#define vld2q_dup_u16(__a) __extension__ ({ \
6070 uint16x8x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 49); r; })
6071#define vld2q_dup_u32(__a) __extension__ ({ \
6072 uint32x4x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 50); r; })
6073#define vld2q_dup_u64(__a) __extension__ ({ \
6074 uint64x2x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 51); r; })
6075#define vld2q_dup_s8(__a) __extension__ ({ \
6076 int8x16x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 32); r; })
6077#define vld2q_dup_s16(__a) __extension__ ({ \
6078 int16x8x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 33); r; })
6079#define vld2q_dup_s32(__a) __extension__ ({ \
6080 int32x4x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 34); r; })
6081#define vld2q_dup_s64(__a) __extension__ ({ \
6082 int64x2x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 35); r; })
6083#define vld2q_dup_f16(__a) __extension__ ({ \
6084 float16x8x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 39); r; })
6085#define vld2q_dup_f32(__a) __extension__ ({ \
6086 float32x4x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 40); r; })
6087#define vld2q_dup_f64(__a) __extension__ ({ \
6088 float64x2x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 41); r; })
6089#define vld2q_dup_p8(__a) __extension__ ({ \
6090 poly8x16x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 36); r; })
6091#define vld2q_dup_p16(__a) __extension__ ({ \
6092 poly16x8x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 37); r; })
6093#define vld2q_dup_p64(__a) __extension__ ({ \
6094 poly64x2x2_t r; __builtin_neon_vld2q_dup_v(&r, __a, 38); r; })
6095#define vld2_dup_f64(__a) __extension__ ({ \
6096 float64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 9); r; })
6097#define vld2_dup_p64(__a) __extension__ ({ \
6098 poly64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; })
6099
6100#define vld2q_lane_u8(__a, b, __c) __extension__ ({ \
6101 uint8x16x2_t __b = (b); \
6102 uint8x16x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 48); r; })
6103#define vld2q_lane_u64(__a, b, __c) __extension__ ({ \
6104 uint64x2x2_t __b = (b); \
6105 uint64x2x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 51); r; })
6106#define vld2q_lane_s8(__a, b, __c) __extension__ ({ \
6107 int8x16x2_t __b = (b); \
6108 int8x16x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 32); r; })
6109#define vld2q_lane_s64(__a, b, __c) __extension__ ({ \
6110 int64x2x2_t __b = (b); \
6111 int64x2x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 35); r; })
6112#define vld2q_lane_f64(__a, b, __c) __extension__ ({ \
6113 float64x2x2_t __b = (b); \
6114 float64x2x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 41); r; })
6115#define vld2q_lane_p8(__a, b, __c) __extension__ ({ \
6116 poly8x16x2_t __b = (b); \
6117 poly8x16x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 36); r; })
6118#define vld2q_lane_p64(__a, b, __c) __extension__ ({ \
6119 poly64x2x2_t __b = (b); \
6120 poly64x2x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); r; })
6121#define vld2_lane_u64(__a, b, __c) __extension__ ({ \
6122 uint64x1x2_t __b = (b); \
6123 uint64x1x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 19); r; })
6124#define vld2_lane_s64(__a, b, __c) __extension__ ({ \
6125 int64x1x2_t __b = (b); \
6126 int64x1x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 3); r; })
6127#define vld2_lane_f64(__a, b, __c) __extension__ ({ \
6128 float64x1x2_t __b = (b); \
6129 float64x1x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 9); r; })
6130#define vld2_lane_p64(__a, b, __c) __extension__ ({ \
6131 poly64x1x2_t __b = (b); \
6132 poly64x1x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); r; })
6133
6134#define vld3q_u64(__a) __extension__ ({ \
6135 uint64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 51); r; })
6136#define vld3q_s64(__a) __extension__ ({ \
6137 int64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 35); r; })
6138#define vld3q_f64(__a) __extension__ ({ \
6139 float64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 41); r; })
6140#define vld3_f64(__a) __extension__ ({ \
6141 float64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 9); r; })
6142#define vld3_p64(__a) __extension__ ({ \
6143 poly64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; })
6144#define vld3q_p64(__a) __extension__ ({ \
6145 poly64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 38); r; })
6146
6147#define vld3q_dup_u8(__a) __extension__ ({ \
6148 uint8x16x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 48); r; })
6149#define vld3q_dup_u16(__a) __extension__ ({ \
6150 uint16x8x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 49); r; })
6151#define vld3q_dup_u32(__a) __extension__ ({ \
6152 uint32x4x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 50); r; })
6153#define vld3q_dup_u64(__a) __extension__ ({ \
6154 uint64x2x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 51); r; })
6155#define vld3q_dup_s8(__a) __extension__ ({ \
6156 int8x16x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 32); r; })
6157#define vld3q_dup_s16(__a) __extension__ ({ \
6158 int16x8x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 33); r; })
6159#define vld3q_dup_s32(__a) __extension__ ({ \
6160 int32x4x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 34); r; })
6161#define vld3q_dup_s64(__a) __extension__ ({ \
6162 int64x2x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 35); r; })
6163#define vld3q_dup_f16(__a) __extension__ ({ \
6164 float16x8x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 39); r; })
6165#define vld3q_dup_f32(__a) __extension__ ({ \
6166 float32x4x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 40); r; })
6167#define vld3q_dup_f64(__a) __extension__ ({ \
6168 float64x2x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 41); r; })
6169#define vld3q_dup_p8(__a) __extension__ ({ \
6170 poly8x16x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 36); r; })
6171#define vld3q_dup_p16(__a) __extension__ ({ \
6172 poly16x8x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 37); r; })
6173#define vld3q_dup_p64(__a) __extension__ ({ \
6174 poly64x2x3_t r; __builtin_neon_vld3q_dup_v(&r, __a, 38); r; })
6175#define vld3_dup_f64(__a) __extension__ ({ \
6176 float64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 9); r; })
6177#define vld3_dup_p64(__a) __extension__ ({ \
6178 poly64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; })
6179
6180#define vld3q_lane_u8(__a, b, __c) __extension__ ({ \
6181 uint8x16x3_t __b = (b); \
6182 uint8x16x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 48); r; })
6183#define vld3q_lane_u64(__a, b, __c) __extension__ ({ \
6184 uint64x2x3_t __b = (b); \
6185 uint64x2x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 51); r; })
6186#define vld3q_lane_s8(__a, b, __c) __extension__ ({ \
6187 int8x16x3_t __b = (b); \
6188 int8x16x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __c, 32); r; })
6189#define vld3q_lane_s64(__a, b, __c) __extension__ ({ \
6190 int64x2x3_t __b = (b); \
6191 int64x2x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 35); r; })
6192#define vld3q_lane_f64(__a, b, __c) __extension__ ({ \
6193 float64x2x3_t __b = (b); \
6194 float64x2x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 41); r; })
6195#define vld3q_lane_p8(__a, b, __c) __extension__ ({ \
6196 poly8x16x3_t __b = (b); \
6197 poly8x16x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 36); r; })
6198#define vld3q_lane_p64(__a, b, __c) __extension__ ({ \
6199 poly64x2x3_t __b = (b); \
6200 poly64x2x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); r; })
6201#define vld3_lane_u64(__a, b, __c) __extension__ ({ \
6202 uint64x1x3_t __b = (b); \
6203 uint64x1x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 19); r; })
6204#define vld3_lane_s64(__a, b, __c) __extension__ ({ \
6205 int64x1x3_t __b = (b); \
6206 int64x1x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 3); r; })
6207#define vld3_lane_f64(__a, b, __c) __extension__ ({ \
6208 float64x1x3_t __b = (b); \
6209 float64x1x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 9); r; })
6210#define vld3_lane_p64(__a, b, __c) __extension__ ({ \
6211 poly64x1x3_t __b = (b); \
6212 poly64x1x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; })
6213
6214#define vld1q_u8_x3(__a) __extension__ ({ \
6215 uint8x16x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 48); r; })
6216#define vld1q_u16_x3(__a) __extension__ ({ \
6217 uint16x8x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 49); r; })
6218#define vld1q_u32_x3(__a) __extension__ ({ \
6219 uint32x4x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 50); r; })
6220#define vld1q_u64_x3(__a) __extension__ ({ \
6221 uint64x2x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 51); r; })
6222#define vld1q_s8_x3(__a) __extension__ ({ \
6223 int8x16x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 32); r; })
6224#define vld1q_s16_x3(__a) __extension__ ({ \
6225 int16x8x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 33); r; })
6226#define vld1q_s32_x3(__a) __extension__ ({ \
6227 int32x4x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 34); r; })
6228#define vld1q_s64_x3(__a) __extension__ ({ \
6229 int64x2x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 35); r; })
6230#define vld1q_f16_x3(__a) __extension__ ({ \
6231 float16x8x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 39); r; })
6232#define vld1q_f32_x3(__a) __extension__ ({ \
6233 float32x4x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 40); r; })
6234#define vld1q_f64_x3(__a) __extension__ ({ \
6235 float64x2x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 41); r; })
6236#define vld1q_p8_x3(__a) __extension__ ({ \
6237 poly8x16x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 36); r; })
6238#define vld1q_p16_x3(__a) __extension__ ({ \
6239 poly16x8x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 37); r; })
6240#define vld1q_p64_x3(__a) __extension__ ({ \
6241 poly64x2x3_t r; __builtin_neon_vld1q_x3_v(&r, __a, 38); r; })
6242#define vld1_u8_x3(__a) __extension__ ({ \
6243 uint8x8x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 16); r; })
6244#define vld1_u16_x3(__a) __extension__ ({ \
6245 uint16x4x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 17); r; })
6246#define vld1_u32_x3(__a) __extension__ ({ \
6247 uint32x2x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 18); r; })
6248#define vld1_u64_x3(__a) __extension__ ({ \
6249 uint64x1x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 19); r; })
6250#define vld1_s8_x3(__a) __extension__ ({ \
6251 int8x8x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 0); r; })
6252#define vld1_s16_x3(__a) __extension__ ({ \
6253 int16x4x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 1); r; })
6254#define vld1_s32_x3(__a) __extension__ ({ \
6255 int32x2x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 2); r; })
6256#define vld1_s64_x3(__a) __extension__ ({ \
6257 int64x1x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 3); r; })
6258#define vld1_f16_x3(__a) __extension__ ({ \
6259 float16x4x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 7); r; })
6260#define vld1_f32_x3(__a) __extension__ ({ \
6261 float32x2x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 8); r; })
6262#define vld1_f64_x3(__a) __extension__ ({ \
6263 float64x1x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 9); r; })
6264#define vld1_p8_x3(__a) __extension__ ({ \
6265 poly8x8x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 4); r; })
6266#define vld1_p16_x3(__a) __extension__ ({ \
6267 poly16x4x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 5); r; })
6268#define vld1_p64_x3(__a) __extension__ ({ \
6269 poly64x1x3_t r; __builtin_neon_vld1_x3_v(&r, __a, 6); r; })
6270
6271#define vld4q_u64(__a) __extension__ ({ \
6272 uint64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 51); r; })
6273#define vld4q_s64(__a) __extension__ ({ \
6274 int64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 35); r; })
6275#define vld4q_f64(__a) __extension__ ({ \
6276 float64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 41); r; })
6277#define vld4_f64(__a) __extension__ ({ \
6278 float64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 9); r; })
6279#define vld4_p64(__a) __extension__ ({ \
6280 poly64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; })
6281#define vld4q_p64(__a) __extension__ ({ \
6282 poly64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 38); r; })
6283
6284#define vld4q_dup_u8(__a) __extension__ ({ \
6285 uint8x16x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 48); r; })
6286#define vld4q_dup_u16(__a) __extension__ ({ \
6287 uint16x8x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 49); r; })
6288#define vld4q_dup_u32(__a) __extension__ ({ \
6289 uint32x4x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 50); r; })
6290#define vld4q_dup_u64(__a) __extension__ ({ \
6291 uint64x2x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 51); r; })
6292#define vld4q_dup_s8(__a) __extension__ ({ \
6293 int8x16x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 32); r; })
6294#define vld4q_dup_s16(__a) __extension__ ({ \
6295 int16x8x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 33); r; })
6296#define vld4q_dup_s32(__a) __extension__ ({ \
6297 int32x4x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 34); r; })
6298#define vld4q_dup_s64(__a) __extension__ ({ \
6299 int64x2x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 35); r; })
6300#define vld4q_dup_f16(__a) __extension__ ({ \
6301 float16x8x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 39); r; })
6302#define vld4q_dup_f32(__a) __extension__ ({ \
6303 float32x4x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 40); r; })
6304#define vld4q_dup_f64(__a) __extension__ ({ \
6305 float64x2x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 41); r; })
6306#define vld4q_dup_p8(__a) __extension__ ({ \
6307 poly8x16x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 36); r; })
6308#define vld4q_dup_p16(__a) __extension__ ({ \
6309 poly16x8x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 37); r; })
6310#define vld4q_dup_p64(__a) __extension__ ({ \
6311 poly64x2x4_t r; __builtin_neon_vld4q_dup_v(&r, __a, 38); r; })
6312#define vld4_dup_f64(__a) __extension__ ({ \
6313 float64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 9); r; })
6314#define vld4_dup_p64(__a) __extension__ ({ \
6315 poly64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; })
6316
6317#define vld4q_lane_u8(__a, b, __c) __extension__ ({ \
6318 uint8x16x4_t __b = (b); \
6319 uint8x16x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 48); r; })
6320#define vld4q_lane_u64(__a, b, __c) __extension__ ({ \
6321 uint64x2x4_t __b = (b); \
6322 uint64x2x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 51); r; })
6323#define vld4q_lane_s8(__a, b, __c) __extension__ ({ \
6324 int8x16x4_t __b = (b); \
6325 int8x16x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 32); r; })
6326#define vld4q_lane_s64(__a, b, __c) __extension__ ({ \
6327 int64x2x4_t __b = (b); \
6328 int64x2x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 35); r; })
6329#define vld4q_lane_f64(__a, b, __c) __extension__ ({ \
6330 float64x2x4_t __b = (b); \
6331 float64x2x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 41); r; })
6332#define vld4q_lane_p8(__a, b, __c) __extension__ ({ \
6333 poly8x16x4_t __b = (b); \
6334 poly8x16x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 36); r; })
6335#define vld4q_lane_p64(__a, b, __c) __extension__ ({ \
6336 poly64x2x4_t __b = (b); \
6337 poly64x2x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); r; })
6338#define vld4_lane_u64(__a, b, __c) __extension__ ({ \
6339 uint64x1x4_t __b = (b); \
6340 uint64x1x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 19); r; })
6341#define vld4_lane_s64(__a, b, __c) __extension__ ({ \
6342 int64x1x4_t __b = (b); \
6343 int64x1x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 3); r; })
6344#define vld4_lane_f64(__a, b, __c) __extension__ ({ \
6345 float64x1x4_t __b = (b); \
6346 float64x1x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 9); r; })
6347#define vld4_lane_p64(__a, b, __c) __extension__ ({ \
6348 poly64x1x4_t __b = (b); \
6349 poly64x1x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; })
6350
6351#define vld1q_u8_x4(__a) __extension__ ({ \
6352 uint8x16x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 48); r; })
6353#define vld1q_u16_x4(__a) __extension__ ({ \
6354 uint16x8x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 49); r; })
6355#define vld1q_u32_x4(__a) __extension__ ({ \
6356 uint32x4x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 50); r; })
6357#define vld1q_u64_x4(__a) __extension__ ({ \
6358 uint64x2x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 51); r; })
6359#define vld1q_s8_x4(__a) __extension__ ({ \
6360 int8x16x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 32); r; })
6361#define vld1q_s16_x4(__a) __extension__ ({ \
6362 int16x8x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 33); r; })
6363#define vld1q_s32_x4(__a) __extension__ ({ \
6364 int32x4x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 34); r; })
6365#define vld1q_s64_x4(__a) __extension__ ({ \
6366 int64x2x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 35); r; })
6367#define vld1q_f16_x4(__a) __extension__ ({ \
6368 float16x8x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 39); r; })
6369#define vld1q_f32_x4(__a) __extension__ ({ \
6370 float32x4x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 40); r; })
6371#define vld1q_f64_x4(__a) __extension__ ({ \
6372 float64x2x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 41); r; })
6373#define vld1q_p8_x4(__a) __extension__ ({ \
6374 poly8x16x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 36); r; })
6375#define vld1q_p16_x4(__a) __extension__ ({ \
6376 poly16x8x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 37); r; })
6377#define vld1q_p64_x4(__a) __extension__ ({ \
6378 poly64x2x4_t r; __builtin_neon_vld1q_x4_v(&r, __a, 38); r; })
6379#define vld1_u8_x4(__a) __extension__ ({ \
6380 uint8x8x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 16); r; })
6381#define vld1_u16_x4(__a) __extension__ ({ \
6382 uint16x4x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 17); r; })
6383#define vld1_u32_x4(__a) __extension__ ({ \
6384 uint32x2x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 18); r; })
6385#define vld1_u64_x4(__a) __extension__ ({ \
6386 uint64x1x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 19); r; })
6387#define vld1_s8_x4(__a) __extension__ ({ \
6388 int8x8x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 0); r; })
6389#define vld1_s16_x4(__a) __extension__ ({ \
6390 int16x4x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 1); r; })
6391#define vld1_s32_x4(__a) __extension__ ({ \
6392 int32x2x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 2); r; })
6393#define vld1_s64_x4(__a) __extension__ ({ \
6394 int64x1x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 3); r; })
6395#define vld1_f16_x4(__a) __extension__ ({ \
6396 float16x4x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 7); r; })
6397#define vld1_f32_x4(__a) __extension__ ({ \
6398 float32x2x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 8); r; })
6399#define vld1_f64_x4(__a) __extension__ ({ \
6400 float64x1x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 9); r; })
6401#define vld1_p8_x4(__a) __extension__ ({ \
6402 poly8x8x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 4); r; })
6403#define vld1_p16_x4(__a) __extension__ ({ \
6404 poly16x4x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 5); r; })
6405#define vld1_p64_x4(__a) __extension__ ({ \
6406 poly64x1x4_t r; __builtin_neon_vld1_x4_v(&r, __a, 6); r; })
6407
6408__ai float64x1_t vmax_f64(float64x1_t __a, float64x1_t __b) {
6409 return (float64x1_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 9); }
6410__ai float64x2_t vmaxq_f64(float64x2_t __a, float64x2_t __b) {
6411 return (float64x2_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
6412
6413__ai int8x16_t vpmaxq_s8(int8x16_t __a, int8x16_t __b) {
6414 return (int8x16_t)__builtin_neon_vpmaxq_v(__a, __b, 32); }
6415__ai int16x8_t vpmaxq_s16(int16x8_t __a, int16x8_t __b) {
6416 return (int16x8_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
6417__ai int32x4_t vpmaxq_s32(int32x4_t __a, int32x4_t __b) {
6418 return (int32x4_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
6419__ai uint8x16_t vpmaxq_u8(uint8x16_t __a, uint8x16_t __b) {
6420 return (uint8x16_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
6421__ai uint16x8_t vpmaxq_u16(uint16x8_t __a, uint16x8_t __b) {
6422 return (uint16x8_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
6423__ai uint32x4_t vpmaxq_u32(uint32x4_t __a, uint32x4_t __b) {
6424 return (uint32x4_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
6425__ai float32x4_t vpmaxq_f32(float32x4_t __a, float32x4_t __b) {
6426 return (float32x4_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
6427__ai float64x2_t vpmaxq_f64(float64x2_t __a, float64x2_t __b) {
6428 return (float64x2_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
6429
6430__ai float64x1_t vmin_f64(float64x1_t __a, float64x1_t __b) {
6431 return (float64x1_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 9); }
6432__ai float64x2_t vminq_f64(float64x2_t __a, float64x2_t __b) {
6433 return (float64x2_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
6434
6435__ai int8x16_t vpminq_s8(int8x16_t __a, int8x16_t __b) {
6436 return (int8x16_t)__builtin_neon_vpminq_v(__a, __b, 32); }
6437__ai int16x8_t vpminq_s16(int16x8_t __a, int16x8_t __b) {
6438 return (int16x8_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
6439__ai int32x4_t vpminq_s32(int32x4_t __a, int32x4_t __b) {
6440 return (int32x4_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
6441__ai uint8x16_t vpminq_u8(uint8x16_t __a, uint8x16_t __b) {
6442 return (uint8x16_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
6443__ai uint16x8_t vpminq_u16(uint16x8_t __a, uint16x8_t __b) {
6444 return (uint16x8_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
6445__ai uint32x4_t vpminq_u32(uint32x4_t __a, uint32x4_t __b) {
6446 return (uint32x4_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
6447__ai float32x4_t vpminq_f32(float32x4_t __a, float32x4_t __b) {
6448 return (float32x4_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
6449__ai float64x2_t vpminq_f64(float64x2_t __a, float64x2_t __b) {
6450 return (float64x2_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
6451
6452__ai float64x1_t vmla_f64(float64x1_t __a, float64x1_t __b, float64x1_t __c) {
6453 return __a + (__b * __c); }
6454__ai float64x2_t vmlaq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) {
6455 return __a + (__b * __c); }
6456
6457__ai float64x1_t vmls_f64(float64x1_t __a, float64x1_t __b, float64x1_t __c) {
6458 return __a - (__b * __c); }
6459__ai float64x2_t vmlsq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) {
6460 return __a - (__b * __c); }
6461
6462__ai float64x1_t vmov_n_f64(float64_t __a) {
6463 return (float64x1_t){ __a }; }
6464__ai float64x2_t vmovq_n_f64(float64_t __a) {
6465 return (float64x2_t){ __a, __a }; }
6466
6467__ai float64x1_t vmul_f64(float64x1_t __a, float64x1_t __b) {
6468 return __a * __b; }
6469__ai float64x2_t vmulq_f64(float64x2_t __a, float64x2_t __b) {
6470 return __a * __b; }
6471
6472__ai float32x2_t vmulx_f32(float32x2_t __a, float32x2_t __b) {
6473 return (float32x2_t)__builtin_neon_vmulx_v((int8x8_t)__a, (int8x8_t)__b, 8); }
6474__ai float64x1_t vmulx_f64(float64x1_t __a, float64x1_t __b) {
6475 return (float64x1_t)__builtin_neon_vmulx_v((int8x8_t)__a, (int8x8_t)__b, 9); }
6476__ai float32x4_t vmulxq_f32(float32x4_t __a, float32x4_t __b) {
6477 return (float32x4_t)__builtin_neon_vmulxq_v((int8x16_t)__a, (int8x16_t)__b, 40); }
6478__ai float64x2_t vmulxq_f64(float64x2_t __a, float64x2_t __b) {
6479 return (float64x2_t)__builtin_neon_vmulxq_v((int8x16_t)__a, (int8x16_t)__b, 41); }
6480
6481__ai int64x1_t vneg_s64(int64x1_t __a) {
6482 return -__a; }
6483__ai float64x1_t vneg_f64(float64x1_t __a) {
6484 return -__a; }
6485__ai float64x2_t vnegq_f64(float64x2_t __a) {
6486 return -__a; }
6487__ai int64x2_t vnegq_s64(int64x2_t __a) {
6488 return -__a; }
6489
6490__ai int64x1_t vqabs_s64(int64x1_t __a) {
6491 return (int64x1_t)__builtin_neon_vqabs_v((int8x8_t)__a, 3); }
6492__ai int64x2_t vqabsq_s64(int64x2_t __a) {
6493 return (int64x2_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 35); }
6494
6495__ai int64x1_t vqneg_s64(int64x1_t __a) {
6496 return (int64x1_t)__builtin_neon_vqneg_v((int8x8_t)__a, 3); }
6497__ai int64x2_t vqnegq_s64(int64x2_t __a) {
6498 return (int64x2_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 35); }
6499
6500#define vqrshrn_high_n_s16(a, b, __c) __extension__ ({ \
6501 int8x8_t __a = (a); int16x8_t __b = (b); \
6502 (int8x16_t)vcombine_s16(__a, vqrshrn_n_s16(__b, __c)); })
6503#define vqrshrn_high_n_s32(a, b, __c) __extension__ ({ \
6504 int16x4_t __a = (a); int32x4_t __b = (b); \
6505 (int16x8_t)vcombine_s32(__a, vqrshrn_n_s32(__b, __c)); })
6506#define vqrshrn_high_n_s64(a, b, __c) __extension__ ({ \
6507 int32x2_t __a = (a); int64x2_t __b = (b); \
6508 (int32x4_t)vcombine_s64(__a, vqrshrn_n_s64(__b, __c)); })
6509#define vqrshrn_high_n_u16(a, b, __c) __extension__ ({ \
6510 uint8x8_t __a = (a); uint16x8_t __b = (b); \
6511 (uint8x16_t)vcombine_u16(__a, vqrshrn_n_u16(__b, __c)); })
6512#define vqrshrn_high_n_u32(a, b, __c) __extension__ ({ \
6513 uint16x4_t __a = (a); uint32x4_t __b = (b); \
6514 (uint16x8_t)vcombine_u32(__a, vqrshrn_n_u32(__b, __c)); })
6515#define vqrshrn_high_n_u64(a, b, __c) __extension__ ({ \
6516 uint32x2_t __a = (a); uint64x2_t __b = (b); \
6517 (uint32x4_t)vcombine_u64(__a, vqrshrn_n_u64(__b, __c)); })
6518
6519#define vqrshrun_high_n_s16(a, b, __c) __extension__ ({ \
6520 int8x8_t __a = (a); int16x8_t __b = (b); \
6521 (int8x16_t)vcombine_s16(__a, vqrshrun_n_s16(__b, __c)); })
6522#define vqrshrun_high_n_s32(a, b, __c) __extension__ ({ \
6523 int16x4_t __a = (a); int32x4_t __b = (b); \
6524 (int16x8_t)vcombine_s32(__a, vqrshrun_n_s32(__b, __c)); })
6525#define vqrshrun_high_n_s64(a, b, __c) __extension__ ({ \
6526 int32x2_t __a = (a); int64x2_t __b = (b); \
6527 (int32x4_t)vcombine_s64(__a, vqrshrun_n_s64(__b, __c)); })
6528
6529#define vqshrn_high_n_s16(a, b, __c) __extension__ ({ \
6530 int8x8_t __a = (a); int16x8_t __b = (b); \
6531 (int8x16_t)vcombine_s16(__a, vqshrn_n_s16(__b, __c)); })
6532#define vqshrn_high_n_s32(a, b, __c) __extension__ ({ \
6533 int16x4_t __a = (a); int32x4_t __b = (b); \
6534 (int16x8_t)vcombine_s32(__a, vqshrn_n_s32(__b, __c)); })
6535#define vqshrn_high_n_s64(a, b, __c) __extension__ ({ \
6536 int32x2_t __a = (a); int64x2_t __b = (b); \
6537 (int32x4_t)vcombine_s64(__a, vqshrn_n_s64(__b, __c)); })
6538#define vqshrn_high_n_u16(a, b, __c) __extension__ ({ \
6539 uint8x8_t __a = (a); uint16x8_t __b = (b); \
6540 (uint8x16_t)vcombine_u16(__a, vqshrn_n_u16(__b, __c)); })
6541#define vqshrn_high_n_u32(a, b, __c) __extension__ ({ \
6542 uint16x4_t __a = (a); uint32x4_t __b = (b); \
6543 (uint16x8_t)vcombine_u32(__a, vqshrn_n_u32(__b, __c)); })
6544#define vqshrn_high_n_u64(a, b, __c) __extension__ ({ \
6545 uint32x2_t __a = (a); uint64x2_t __b = (b); \
6546 (uint32x4_t)vcombine_u64(__a, vqshrn_n_u64(__b, __c)); })
6547
6548#define vqshrun_high_n_s16(a, b, __c) __extension__ ({ \
6549 int8x8_t __a = (a); int16x8_t __b = (b); \
6550 (int8x16_t)vcombine_s16(__a, vqshrun_n_s16(__b, __c)); })
6551#define vqshrun_high_n_s32(a, b, __c) __extension__ ({ \
6552 int16x4_t __a = (a); int32x4_t __b = (b); \
6553 (int16x8_t)vcombine_s32(__a, vqshrun_n_s32(__b, __c)); })
6554#define vqshrun_high_n_s64(a, b, __c) __extension__ ({ \
6555 int32x2_t __a = (a); int64x2_t __b = (b); \
6556 (int32x4_t)vcombine_s64(__a, vqshrun_n_s64(__b, __c)); })
6557
6558__ai int8x16_t vqmovn_high_s16(int8x8_t __a, int16x8_t __b) {
6559 int8x8_t __a1 = vqmovn_s16(__b);
6560 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); }
6561__ai int16x8_t vqmovn_high_s32(int16x4_t __a, int32x4_t __b) {
6562 int16x4_t __a1 = vqmovn_s32(__b);
6563 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7); }
6564__ai int32x4_t vqmovn_high_s64(int32x2_t __a, int64x2_t __b) {
6565 int32x2_t __a1 = vqmovn_s64(__b);
6566 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
6567__ai uint8x16_t vqmovn_high_u16(uint8x8_t __a, uint16x8_t __b) {
6568 uint8x8_t __a1 = vqmovn_u16(__b);
6569 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); }
6570__ai uint16x8_t vqmovn_high_u32(uint16x4_t __a, uint32x4_t __b) {
6571 uint16x4_t __a1 = vqmovn_u32(__b);
6572 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7); }
6573__ai uint32x4_t vqmovn_high_u64(uint32x2_t __a, uint64x2_t __b) {
6574 uint32x2_t __a1 = vqmovn_u64(__b);
6575 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
6576
6577__ai int8x8_t vrbit_s8(int8x8_t __a) {
6578 return (int8x8_t)__builtin_neon_vrbit_v(__a, 0); }
6579__ai uint8x8_t vrbit_u8(uint8x8_t __a) {
6580 return (uint8x8_t)__builtin_neon_vrbit_v((int8x8_t)__a, 16); }
6581__ai poly8x8_t vrbit_p8(poly8x8_t __a) {
6582 return (poly8x8_t)__builtin_neon_vrbit_v((int8x8_t)__a, 4); }
6583__ai int8x16_t vrbitq_s8(int8x16_t __a) {
6584 return (int8x16_t)__builtin_neon_vrbitq_v(__a, 32); }
6585__ai uint8x16_t vrbitq_u8(uint8x16_t __a) {
6586 return (uint8x16_t)__builtin_neon_vrbitq_v((int8x16_t)__a, 48); }
6587__ai poly8x16_t vrbitq_p8(poly8x16_t __a) {
6588 return (poly8x16_t)__builtin_neon_vrbitq_v((int8x16_t)__a, 36); }
6589
6590__ai int8x8_t vreinterpret_s8_f64(float64x1_t __a) {
6591 return (int8x8_t)__a; }
6592__ai int8x8_t vreinterpret_s8_p64(poly64x1_t __a) {
6593 return (int8x8_t)__a; }
6594__ai int16x4_t vreinterpret_s16_f64(float64x1_t __a) {
6595 return (int16x4_t)__a; }
6596__ai int16x4_t vreinterpret_s16_p64(poly64x1_t __a) {
6597 return (int16x4_t)__a; }
6598__ai int32x2_t vreinterpret_s32_f64(float64x1_t __a) {
6599 return (int32x2_t)__a; }
6600__ai int32x2_t vreinterpret_s32_p64(poly64x1_t __a) {
6601 return (int32x2_t)__a; }
6602__ai int64x1_t vreinterpret_s64_f64(float64x1_t __a) {
6603 return (int64x1_t)__a; }
6604__ai int64x1_t vreinterpret_s64_p64(poly64x1_t __a) {
6605 return (int64x1_t)__a; }
6606__ai uint8x8_t vreinterpret_u8_f64(float64x1_t __a) {
6607 return (uint8x8_t)__a; }
6608__ai uint8x8_t vreinterpret_u8_p64(poly64x1_t __a) {
6609 return (uint8x8_t)__a; }
6610__ai uint16x4_t vreinterpret_u16_f64(float64x1_t __a) {
6611 return (uint16x4_t)__a; }
6612__ai uint16x4_t vreinterpret_u16_p64(poly64x1_t __a) {
6613 return (uint16x4_t)__a; }
6614__ai uint32x2_t vreinterpret_u32_f64(float64x1_t __a) {
6615 return (uint32x2_t)__a; }
6616__ai uint32x2_t vreinterpret_u32_p64(poly64x1_t __a) {
6617 return (uint32x2_t)__a; }
6618__ai uint64x1_t vreinterpret_u64_f64(float64x1_t __a) {
6619 return (uint64x1_t)__a; }
6620__ai uint64x1_t vreinterpret_u64_p64(poly64x1_t __a) {
6621 return (uint64x1_t)__a; }
6622__ai float16x4_t vreinterpret_f16_f64(float64x1_t __a) {
6623 return (float16x4_t)__a; }
6624__ai float16x4_t vreinterpret_f16_p64(poly64x1_t __a) {
6625 return (float16x4_t)__a; }
6626__ai float32x2_t vreinterpret_f32_f64(float64x1_t __a) {
6627 return (float32x2_t)__a; }
6628__ai float32x2_t vreinterpret_f32_p64(poly64x1_t __a) {
6629 return (float32x2_t)__a; }
6630__ai float64x1_t vreinterpret_f64_s8(int8x8_t __a) {
6631 return (float64x1_t)__a; }
6632__ai float64x1_t vreinterpret_f64_s16(int16x4_t __a) {
6633 return (float64x1_t)__a; }
6634__ai float64x1_t vreinterpret_f64_s32(int32x2_t __a) {
6635 return (float64x1_t)__a; }
6636__ai float64x1_t vreinterpret_f64_s64(int64x1_t __a) {
6637 return (float64x1_t)__a; }
6638__ai float64x1_t vreinterpret_f64_u8(uint8x8_t __a) {
6639 return (float64x1_t)__a; }
6640__ai float64x1_t vreinterpret_f64_u16(uint16x4_t __a) {
6641 return (float64x1_t)__a; }
6642__ai float64x1_t vreinterpret_f64_u32(uint32x2_t __a) {
6643 return (float64x1_t)__a; }
6644__ai float64x1_t vreinterpret_f64_u64(uint64x1_t __a) {
6645 return (float64x1_t)__a; }
6646__ai float64x1_t vreinterpret_f64_f16(float16x4_t __a) {
6647 return (float64x1_t)__a; }
6648__ai float64x1_t vreinterpret_f64_f32(float32x2_t __a) {
6649 return (float64x1_t)__a; }
6650__ai float64x1_t vreinterpret_f64_p8(poly8x8_t __a) {
6651 return (float64x1_t)__a; }
6652__ai float64x1_t vreinterpret_f64_p16(poly16x4_t __a) {
6653 return (float64x1_t)__a; }
6654__ai float64x1_t vreinterpret_f64_p64(poly64x1_t __a) {
6655 return (float64x1_t)__a; }
6656__ai poly8x8_t vreinterpret_p8_f64(float64x1_t __a) {
6657 return (poly8x8_t)__a; }
6658__ai poly8x8_t vreinterpret_p8_p64(poly64x1_t __a) {
6659 return (poly8x8_t)__a; }
6660__ai poly16x4_t vreinterpret_p16_f64(float64x1_t __a) {
6661 return (poly16x4_t)__a; }
6662__ai poly16x4_t vreinterpret_p16_p64(poly64x1_t __a) {
6663 return (poly16x4_t)__a; }
6664__ai poly64x1_t vreinterpret_p64_s8(int8x8_t __a) {
6665 return (poly64x1_t)__a; }
6666__ai poly64x1_t vreinterpret_p64_s16(int16x4_t __a) {
6667 return (poly64x1_t)__a; }
6668__ai poly64x1_t vreinterpret_p64_s32(int32x2_t __a) {
6669 return (poly64x1_t)__a; }
6670__ai poly64x1_t vreinterpret_p64_s64(int64x1_t __a) {
6671 return (poly64x1_t)__a; }
6672__ai poly64x1_t vreinterpret_p64_u8(uint8x8_t __a) {
6673 return (poly64x1_t)__a; }
6674__ai poly64x1_t vreinterpret_p64_u16(uint16x4_t __a) {
6675 return (poly64x1_t)__a; }
6676__ai poly64x1_t vreinterpret_p64_u32(uint32x2_t __a) {
6677 return (poly64x1_t)__a; }
6678__ai poly64x1_t vreinterpret_p64_u64(uint64x1_t __a) {
6679 return (poly64x1_t)__a; }
6680__ai poly64x1_t vreinterpret_p64_f16(float16x4_t __a) {
6681 return (poly64x1_t)__a; }
6682__ai poly64x1_t vreinterpret_p64_f32(float32x2_t __a) {
6683 return (poly64x1_t)__a; }
6684__ai poly64x1_t vreinterpret_p64_f64(float64x1_t __a) {
6685 return (poly64x1_t)__a; }
6686__ai poly64x1_t vreinterpret_p64_p8(poly8x8_t __a) {
6687 return (poly64x1_t)__a; }
6688__ai poly64x1_t vreinterpret_p64_p16(poly16x4_t __a) {
6689 return (poly64x1_t)__a; }
6690__ai int8x16_t vreinterpretq_s8_f64(float64x2_t __a) {
6691 return (int8x16_t)__a; }
6692__ai int8x16_t vreinterpretq_s8_p64(poly64x2_t __a) {
6693 return (int8x16_t)__a; }
6694__ai int16x8_t vreinterpretq_s16_f64(float64x2_t __a) {
6695 return (int16x8_t)__a; }
6696__ai int16x8_t vreinterpretq_s16_p64(poly64x2_t __a) {
6697 return (int16x8_t)__a; }
6698__ai int32x4_t vreinterpretq_s32_f64(float64x2_t __a) {
6699 return (int32x4_t)__a; }
6700__ai int32x4_t vreinterpretq_s32_p64(poly64x2_t __a) {
6701 return (int32x4_t)__a; }
6702__ai int64x2_t vreinterpretq_s64_f64(float64x2_t __a) {
6703 return (int64x2_t)__a; }
6704__ai int64x2_t vreinterpretq_s64_p64(poly64x2_t __a) {
6705 return (int64x2_t)__a; }
6706__ai uint8x16_t vreinterpretq_u8_f64(float64x2_t __a) {
6707 return (uint8x16_t)__a; }
6708__ai uint8x16_t vreinterpretq_u8_p64(poly64x2_t __a) {
6709 return (uint8x16_t)__a; }
6710__ai uint16x8_t vreinterpretq_u16_f64(float64x2_t __a) {
6711 return (uint16x8_t)__a; }
6712__ai uint16x8_t vreinterpretq_u16_p64(poly64x2_t __a) {
6713 return (uint16x8_t)__a; }
6714__ai uint32x4_t vreinterpretq_u32_f64(float64x2_t __a) {
6715 return (uint32x4_t)__a; }
6716__ai uint32x4_t vreinterpretq_u32_p64(poly64x2_t __a) {
6717 return (uint32x4_t)__a; }
6718__ai uint64x2_t vreinterpretq_u64_f64(float64x2_t __a) {
6719 return (uint64x2_t)__a; }
6720__ai uint64x2_t vreinterpretq_u64_p64(poly64x2_t __a) {
6721 return (uint64x2_t)__a; }
6722__ai float16x8_t vreinterpretq_f16_f64(float64x2_t __a) {
6723 return (float16x8_t)__a; }
6724__ai float16x8_t vreinterpretq_f16_p64(poly64x2_t __a) {
6725 return (float16x8_t)__a; }
6726__ai float32x4_t vreinterpretq_f32_f64(float64x2_t __a) {
6727 return (float32x4_t)__a; }
6728__ai float32x4_t vreinterpretq_f32_p64(poly64x2_t __a) {
6729 return (float32x4_t)__a; }
6730__ai float64x2_t vreinterpretq_f64_s8(int8x16_t __a) {
6731 return (float64x2_t)__a; }
6732__ai float64x2_t vreinterpretq_f64_s16(int16x8_t __a) {
6733 return (float64x2_t)__a; }
6734__ai float64x2_t vreinterpretq_f64_s32(int32x4_t __a) {
6735 return (float64x2_t)__a; }
6736__ai float64x2_t vreinterpretq_f64_s64(int64x2_t __a) {
6737 return (float64x2_t)__a; }
6738__ai float64x2_t vreinterpretq_f64_u8(uint8x16_t __a) {
6739 return (float64x2_t)__a; }
6740__ai float64x2_t vreinterpretq_f64_u16(uint16x8_t __a) {
6741 return (float64x2_t)__a; }
6742__ai float64x2_t vreinterpretq_f64_u32(uint32x4_t __a) {
6743 return (float64x2_t)__a; }
6744__ai float64x2_t vreinterpretq_f64_u64(uint64x2_t __a) {
6745 return (float64x2_t)__a; }
6746__ai float64x2_t vreinterpretq_f64_f16(float16x8_t __a) {
6747 return (float64x2_t)__a; }
6748__ai float64x2_t vreinterpretq_f64_f32(float32x4_t __a) {
6749 return (float64x2_t)__a; }
6750__ai float64x2_t vreinterpretq_f64_p8(poly8x16_t __a) {
6751 return (float64x2_t)__a; }
6752__ai float64x2_t vreinterpretq_f64_p16(poly16x8_t __a) {
6753 return (float64x2_t)__a; }
6754__ai float64x2_t vreinterpretq_f64_p64(poly64x2_t __a) {
6755 return (float64x2_t)__a; }
6756__ai poly8x16_t vreinterpretq_p8_f64(float64x2_t __a) {
6757 return (poly8x16_t)__a; }
6758__ai poly8x16_t vreinterpretq_p8_p64(poly64x2_t __a) {
6759 return (poly8x16_t)__a; }
6760__ai poly16x8_t vreinterpretq_p16_f64(float64x2_t __a) {
6761 return (poly16x8_t)__a; }
6762__ai poly16x8_t vreinterpretq_p16_p64(poly64x2_t __a) {
6763 return (poly16x8_t)__a; }
6764__ai poly64x2_t vreinterpretq_p64_s8(int8x16_t __a) {
6765 return (poly64x2_t)__a; }
6766__ai poly64x2_t vreinterpretq_p64_s16(int16x8_t __a) {
6767 return (poly64x2_t)__a; }
6768__ai poly64x2_t vreinterpretq_p64_s32(int32x4_t __a) {
6769 return (poly64x2_t)__a; }
6770__ai poly64x2_t vreinterpretq_p64_s64(int64x2_t __a) {
6771 return (poly64x2_t)__a; }
6772__ai poly64x2_t vreinterpretq_p64_u8(uint8x16_t __a) {
6773 return (poly64x2_t)__a; }
6774__ai poly64x2_t vreinterpretq_p64_u16(uint16x8_t __a) {
6775 return (poly64x2_t)__a; }
6776__ai poly64x2_t vreinterpretq_p64_u32(uint32x4_t __a) {
6777 return (poly64x2_t)__a; }
6778__ai poly64x2_t vreinterpretq_p64_u64(uint64x2_t __a) {
6779 return (poly64x2_t)__a; }
6780__ai poly64x2_t vreinterpretq_p64_f16(float16x8_t __a) {
6781 return (poly64x2_t)__a; }
6782__ai poly64x2_t vreinterpretq_p64_f32(float32x4_t __a) {
6783 return (poly64x2_t)__a; }
6784__ai poly64x2_t vreinterpretq_p64_f64(float64x2_t __a) {
6785 return (poly64x2_t)__a; }
6786__ai poly64x2_t vreinterpretq_p64_p8(poly8x16_t __a) {
6787 return (poly64x2_t)__a; }
6788__ai poly64x2_t vreinterpretq_p64_p16(poly16x8_t __a) {
6789 return (poly64x2_t)__a; }
6790
6791#define vrshrn_high_n_s16(a, b, __c) __extension__ ({ \
6792 int8x8_t __a = (a); int16x8_t __b = (b); \
6793 (int8x16_t)vcombine_s16(__a, vrshrn_n_s16(__b, __c)); })
6794#define vrshrn_high_n_s32(a, b, __c) __extension__ ({ \
6795 int16x4_t __a = (a); int32x4_t __b = (b); \
6796 (int16x8_t)vcombine_s32(__a, vrshrn_n_s32(__b, __c)); })
6797#define vrshrn_high_n_s64(a, b, __c) __extension__ ({ \
6798 int32x2_t __a = (a); int64x2_t __b = (b); \
6799 (int32x4_t)vcombine_s64(__a, vrshrn_n_s64(__b, __c)); })
6800#define vrshrn_high_n_u16(a, b, __c) __extension__ ({ \
6801 uint8x8_t __a = (a); uint16x8_t __b = (b); \
6802 (uint8x16_t)vcombine_u16(__a, vrshrn_n_u16(__b, __c)); })
6803#define vrshrn_high_n_u32(a, b, __c) __extension__ ({ \
6804 uint16x4_t __a = (a); uint32x4_t __b = (b); \
6805 (uint16x8_t)vcombine_u32(__a, vrshrn_n_u32(__b, __c)); })
6806#define vrshrn_high_n_u64(a, b, __c) __extension__ ({ \
6807 uint32x2_t __a = (a); uint64x2_t __b = (b); \
6808 (uint32x4_t)vcombine_u64(__a, vrshrn_n_u64(__b, __c)); })
6809
6810__ai float32_t vabds_f32(float32_t __a, float32_t __b) {
6811 return (float32_t)__builtin_neon_vabds_f32(__a, __b); }
6812__ai float64_t vabdd_f64(float64_t __a, float64_t __b) {
6813 return (float64_t)__builtin_neon_vabdd_f64(__a, __b); }
6814
6815__ai int64_t vabsd_s64(int64_t __a) {
6816 return (int64_t)__builtin_neon_vabsd_s64(__a); }
6817
6818__ai int64_t vaddd_s64(int64_t __a, int64_t __b) {
6819 return (int64_t)__builtin_neon_vaddd_s64(__a, __b); }
6820__ai uint64_t vaddd_u64(uint64_t __a, uint64_t __b) {
6821 return (uint64_t)__builtin_neon_vaddd_u64(__a, __b); }
6822
6823__ai float32_t vpadds_f32(float32x2_t __a) {
6824 return (float32_t)__builtin_neon_vpadds_f32(__a); }
6825__ai int64_t vpaddd_s64(int64x2_t __a) {
6826 return (int64_t)__builtin_neon_vpaddd_s64(__a); }
6827__ai float64_t vpaddd_f64(float64x2_t __a) {
6828 return (float64_t)__builtin_neon_vpaddd_f64(__a); }
6829__ai uint64_t vpaddd_u64(uint64x2_t __a) {
6830 return (uint64_t)__builtin_neon_vpaddd_u64((int64x2_t)__a); }
6831
6832__ai int64_t vceqd_s64(int64_t __a, int64_t __b) {
6833 return (int64_t)__builtin_neon_vceqd_s64(__a, __b); }
6834__ai uint64_t vceqd_u64(uint64_t __a, uint64_t __b) {
6835 return (uint64_t)__builtin_neon_vceqd_u64(__a, __b); }
6836
6837__ai int64_t vceqzd_s64(int64_t __a) {
6838 return (int64_t)__builtin_neon_vceqzd_s64(__a); }
6839__ai uint64_t vceqzd_u64(uint64_t __a) {
6840 return (uint64_t)__builtin_neon_vceqzd_u64(__a); }
6841
6842__ai int64_t vcged_s64(int64_t __a, int64_t __b) {
6843 return (int64_t)__builtin_neon_vcged_s64(__a, __b); }
6844
6845__ai int64_t vcgezd_s64(int64_t __a) {
6846 return (int64_t)__builtin_neon_vcgezd_s64(__a); }
6847
6848__ai int64_t vcgtd_s64(int64_t __a, int64_t __b) {
6849 return (int64_t)__builtin_neon_vcgtd_s64(__a, __b); }
6850
6851__ai int64_t vcgtzd_s64(int64_t __a) {
6852 return (int64_t)__builtin_neon_vcgtzd_s64(__a); }
6853
6854__ai uint64_t vcgtd_u64(uint64_t __a, uint64_t __b) {
6855 return (uint64_t)__builtin_neon_vcgtd_u64(__a, __b); }
6856
6857__ai uint64_t vcged_u64(uint64_t __a, uint64_t __b) {
6858 return (uint64_t)__builtin_neon_vcged_u64(__a, __b); }
6859
6860__ai int64_t vcled_s64(int64_t __a, int64_t __b) {
6861 return (int64_t)__builtin_neon_vcled_s64(__a, __b); }
6862__ai uint64_t vcled_u64(uint64_t __a, uint64_t __b) {
6863 return (uint64_t)__builtin_neon_vcled_u64(__a, __b); }
6864
6865__ai int64_t vclezd_s64(int64_t __a) {
6866 return (int64_t)__builtin_neon_vclezd_s64(__a); }
6867
6868__ai int64_t vcltd_s64(int64_t __a, int64_t __b) {
6869 return (int64_t)__builtin_neon_vcltd_s64(__a, __b); }
6870__ai uint64_t vcltd_u64(uint64_t __a, uint64_t __b) {
6871 return (uint64_t)__builtin_neon_vcltd_u64(__a, __b); }
6872
6873__ai int64_t vcltzd_s64(int64_t __a) {
6874 return (int64_t)__builtin_neon_vcltzd_s64(__a); }
6875
6876__ai int64_t vtstd_s64(int64_t __a, int64_t __b) {
6877 return (int64_t)__builtin_neon_vtstd_s64(__a, __b); }
6878__ai uint64_t vtstd_u64(uint64_t __a, uint64_t __b) {
6879 return (uint64_t)__builtin_neon_vtstd_u64(__a, __b); }
6880
6881__ai uint32_t vcages_f32(float32_t __a, float32_t __b) {
6882 return (uint32_t)__builtin_neon_vcages_f32(__a, __b); }
6883__ai uint64_t vcaged_f64(float64_t __a, float64_t __b) {
6884 return (uint64_t)__builtin_neon_vcaged_f64(__a, __b); }
6885
6886__ai uint32_t vcagts_f32(float32_t __a, float32_t __b) {
6887 return (uint32_t)__builtin_neon_vcagts_f32(__a, __b); }
6888__ai uint64_t vcagtd_f64(float64_t __a, float64_t __b) {
6889 return (uint64_t)__builtin_neon_vcagtd_f64(__a, __b); }
6890
6891__ai uint32_t vcales_f32(float32_t __a, float32_t __b) {
6892 return (uint32_t)__builtin_neon_vcales_f32(__a, __b); }
6893__ai uint64_t vcaled_f64(float64_t __a, float64_t __b) {
6894 return (uint64_t)__builtin_neon_vcaled_f64(__a, __b); }
6895
6896__ai uint32_t vcalts_f32(float32_t __a, float32_t __b) {
6897 return (uint32_t)__builtin_neon_vcalts_f32(__a, __b); }
6898__ai uint64_t vcaltd_f64(float64_t __a, float64_t __b) {
6899 return (uint64_t)__builtin_neon_vcaltd_f64(__a, __b); }
6900
6901__ai uint32_t vceqs_f32(float32_t __a, float32_t __b) {
6902 return (uint32_t)__builtin_neon_vceqs_f32(__a, __b); }
6903__ai uint64_t vceqd_f64(float64_t __a, float64_t __b) {
6904 return (uint64_t)__builtin_neon_vceqd_f64(__a, __b); }
6905
6906__ai uint32_t vceqzs_f32(float32_t __a) {
6907 return (uint32_t)__builtin_neon_vceqzs_f32(__a); }
6908__ai uint64_t vceqzd_f64(float64_t __a) {
6909 return (uint64_t)__builtin_neon_vceqzd_f64(__a); }
6910
6911__ai uint32_t vcges_f32(float32_t __a, float32_t __b) {
6912 return (uint32_t)__builtin_neon_vcges_f32(__a, __b); }
6913__ai uint64_t vcged_f64(float64_t __a, float64_t __b) {
6914 return (uint64_t)__builtin_neon_vcged_f64(__a, __b); }
6915
6916__ai uint32_t vcgezs_f32(float32_t __a) {
6917 return (uint32_t)__builtin_neon_vcgezs_f32(__a); }
6918__ai uint64_t vcgezd_f64(float64_t __a) {
6919 return (uint64_t)__builtin_neon_vcgezd_f64(__a); }
6920
6921__ai uint32_t vcgts_f32(float32_t __a, float32_t __b) {
6922 return (uint32_t)__builtin_neon_vcgts_f32(__a, __b); }
6923__ai uint64_t vcgtd_f64(float64_t __a, float64_t __b) {
6924 return (uint64_t)__builtin_neon_vcgtd_f64(__a, __b); }
6925
6926__ai uint32_t vcgtzs_f32(float32_t __a) {
6927 return (uint32_t)__builtin_neon_vcgtzs_f32(__a); }
6928__ai uint64_t vcgtzd_f64(float64_t __a) {
6929 return (uint64_t)__builtin_neon_vcgtzd_f64(__a); }
6930
6931__ai uint32_t vcles_f32(float32_t __a, float32_t __b) {
6932 return (uint32_t)__builtin_neon_vcles_f32(__a, __b); }
6933__ai uint64_t vcled_f64(float64_t __a, float64_t __b) {
6934 return (uint64_t)__builtin_neon_vcled_f64(__a, __b); }
6935
6936__ai uint32_t vclezs_f32(float32_t __a) {
6937 return (uint32_t)__builtin_neon_vclezs_f32(__a); }
6938__ai uint64_t vclezd_f64(float64_t __a) {
6939 return (uint64_t)__builtin_neon_vclezd_f64(__a); }
6940
6941__ai uint32_t vclts_f32(float32_t __a, float32_t __b) {
6942 return (uint32_t)__builtin_neon_vclts_f32(__a, __b); }
6943__ai uint64_t vcltd_f64(float64_t __a, float64_t __b) {
6944 return (uint64_t)__builtin_neon_vcltd_f64(__a, __b); }
6945
6946__ai uint32_t vcltzs_f32(float32_t __a) {
6947 return (uint32_t)__builtin_neon_vcltzs_f32(__a); }
6948__ai uint64_t vcltzd_f64(float64_t __a) {
6949 return (uint64_t)__builtin_neon_vcltzd_f64(__a); }
6950
6951__ai int64_t vcvtad_s64_f64(float64_t __a) {
6952 return (int64_t)__builtin_neon_vcvtad_s64_f64(__a); }
6953
6954__ai int32_t vcvtas_s32_f32(float32_t __a) {
6955 return (int32_t)__builtin_neon_vcvtas_s32_f32(__a); }
6956
6957__ai uint64_t vcvtad_u64_f64(float64_t __a) {
6958 return (uint64_t)__builtin_neon_vcvtad_u64_f64(__a); }
6959
6960__ai uint32_t vcvtas_u32_f32(float32_t __a) {
6961 return (uint32_t)__builtin_neon_vcvtas_u32_f32(__a); }
6962
6963__ai int64_t vcvtmd_s64_f64(float64_t __a) {
6964 return (int64_t)__builtin_neon_vcvtmd_s64_f64(__a); }
6965
6966__ai int32_t vcvtms_s32_f32(float32_t __a) {
6967 return (int32_t)__builtin_neon_vcvtms_s32_f32(__a); }
6968
6969__ai uint64_t vcvtmd_u64_f64(float64_t __a) {
6970 return (uint64_t)__builtin_neon_vcvtmd_u64_f64(__a); }
6971
6972__ai uint32_t vcvtms_u32_f32(float32_t __a) {
6973 return (uint32_t)__builtin_neon_vcvtms_u32_f32(__a); }
6974
6975__ai int64_t vcvtnd_s64_f64(float64_t __a) {
6976 return (int64_t)__builtin_neon_vcvtnd_s64_f64(__a); }
6977
6978__ai int32_t vcvtns_s32_f32(float32_t __a) {
6979 return (int32_t)__builtin_neon_vcvtns_s32_f32(__a); }
6980
6981__ai uint64_t vcvtnd_u64_f64(float64_t __a) {
6982 return (uint64_t)__builtin_neon_vcvtnd_u64_f64(__a); }
6983
6984__ai uint32_t vcvtns_u32_f32(float32_t __a) {
6985 return (uint32_t)__builtin_neon_vcvtns_u32_f32(__a); }
6986
6987__ai int64_t vcvtpd_s64_f64(float64_t __a) {
6988 return (int64_t)__builtin_neon_vcvtpd_s64_f64(__a); }
6989
6990__ai int32_t vcvtps_s32_f32(float32_t __a) {
6991 return (int32_t)__builtin_neon_vcvtps_s32_f32(__a); }
6992
6993__ai uint64_t vcvtpd_u64_f64(float64_t __a) {
6994 return (uint64_t)__builtin_neon_vcvtpd_u64_f64(__a); }
6995
6996__ai uint32_t vcvtps_u32_f32(float32_t __a) {
6997 return (uint32_t)__builtin_neon_vcvtps_u32_f32(__a); }
6998
6999__ai float32_t vcvtxd_f32_f64(float64_t __a) {
7000 return (float32_t)__builtin_neon_vcvtxd_f32_f64(__a); }
7001
7002__ai int64_t vcvtd_s64_f64(float64_t __a) {
7003 return (int64_t)__builtin_neon_vcvtd_s64_f64(__a); }
7004
7005__ai int32_t vcvts_s32_f32(float32_t __a) {
7006 return (int32_t)__builtin_neon_vcvts_s32_f32(__a); }
7007
7008#define vcvts_n_s32_f32(a, __b) __extension__ ({ \
7009 float32_t __a = (a); \
7010 (int32_t)__builtin_neon_vcvts_n_s32_f32(__a, __b); })
7011
7012#define vcvtd_n_s64_f64(a, __b) __extension__ ({ \
7013 float64_t __a = (a); \
7014 (int64_t)__builtin_neon_vcvtd_n_s64_f64(__a, __b); })
7015
7016__ai uint64_t vcvtd_u64_f64(float64_t __a) {
7017 return (uint64_t)__builtin_neon_vcvtd_u64_f64(__a); }
7018
7019__ai uint32_t vcvts_u32_f32(float32_t __a) {
7020 return (uint32_t)__builtin_neon_vcvts_u32_f32(__a); }
7021
7022#define vcvts_n_u32_f32(a, __b) __extension__ ({ \
7023 float32_t __a = (a); \
7024 (uint32_t)__builtin_neon_vcvts_n_u32_f32(__a, __b); })
7025
7026#define vcvtd_n_u64_f64(a, __b) __extension__ ({ \
7027 float64_t __a = (a); \
7028 (uint64_t)__builtin_neon_vcvtd_n_u64_f64(__a, __b); })
7029
7030__ai float32_t vpmaxnms_f32(float32x2_t __a) {
7031 return (float32_t)__builtin_neon_vpmaxnms_f32(__a); }
7032__ai float64_t vpmaxnmqd_f64(float64x2_t __a) {
7033 return (float64_t)__builtin_neon_vpmaxnmqd_f64(__a); }
7034
7035__ai float32_t vpmaxs_f32(float32x2_t __a) {
7036 return (float32_t)__builtin_neon_vpmaxs_f32(__a); }
7037__ai float64_t vpmaxqd_f64(float64x2_t __a) {
7038 return (float64_t)__builtin_neon_vpmaxqd_f64(__a); }
7039
7040__ai float32_t vpminnms_f32(float32x2_t __a) {
7041 return (float32_t)__builtin_neon_vpminnms_f32(__a); }
7042__ai float64_t vpminnmqd_f64(float64x2_t __a) {
7043 return (float64_t)__builtin_neon_vpminnmqd_f64(__a); }
7044
7045__ai float32_t vpmins_f32(float32x2_t __a) {
7046 return (float32_t)__builtin_neon_vpmins_f32(__a); }
7047__ai float64_t vpminqd_f64(float64x2_t __a) {
7048 return (float64_t)__builtin_neon_vpminqd_f64(__a); }
7049
7050#define vfmas_lane_f32(a, b, c, __d) __extension__ ({ \
7051 float32_t __a = (a); float32_t __b = (b); float32x2_t __c = (c); \
7052 (float32_t)__builtin_neon_vfmas_lane_f32(__a, __b, __c, __d); })
7053#define vfmad_lane_f64(a, b, c, __d) __extension__ ({ \
7054 float64_t __a = (a); float64_t __b = (b); float64x1_t __c = (c); \
7055 (float64_t)__builtin_neon_vfmad_lane_f64(__a, __b, __c, __d); })
7056
7057#define vfmas_laneq_f32(a, b, c, __d) __extension__ ({ \
7058 float32_t __a = (a); float32_t __b = (b); float32x4_t __c = (c); \
7059 (float32_t)__builtin_neon_vfmas_laneq_f32(__a, __b, __c, __d); })
7060#define vfmad_laneq_f64(a, b, c, __d) __extension__ ({ \
7061 float64_t __a = (a); float64_t __b = (b); float64x2_t __c = (c); \
7062 (float64_t)__builtin_neon_vfmad_laneq_f64(__a, __b, __c, __d); })
7063
7064#define vfmss_lane_f32(a, b, c, __d) __extension__ ({ \
7065 float32_t __a = (a); float32_t __b = (b); float32x2_t __c = (c); \
7066 float32_t __a1 = __a; \
7067 float32_t __b1 = __b; \
7068 float32x2_t __c1 = __c; \
7069 vfmas_lane_f32(__a1, __b1, -__c1, __d); })
7070#define vfmsd_lane_f64(a, b, c, __d) __extension__ ({ \
7071 float64_t __a = (a); float64_t __b = (b); float64x1_t __c = (c); \
7072 float64_t __a1 = __a; \
7073 float64_t __b1 = __b; \
7074 float64x1_t __c1 = __c; \
7075 vfmad_lane_f64(__a1, __b1, -__c1, __d); })
7076
7077#define vfmss_laneq_f32(a, b, c, __d) __extension__ ({ \
7078 float32_t __a = (a); float32_t __b = (b); float32x4_t __c = (c); \
7079 float32_t __a1 = __a; \
7080 float32_t __b1 = __b; \
7081 float32x4_t __c1 = __c; \
7082 vfmas_laneq_f32(__a1, __b1, -__c1, __d); })
7083#define vfmsd_laneq_f64(a, b, c, __d) __extension__ ({ \
7084 float64_t __a = (a); float64_t __b = (b); float64x2_t __c = (c); \
7085 float64_t __a1 = __a; \
7086 float64_t __b1 = __b; \
7087 float64x2_t __c1 = __c; \
7088 vfmad_laneq_f64(__a1, __b1, -__c1, __d); })
7089
7090__ai float32_t vmulxs_f32(float32_t __a, float32_t __b) {
7091 return (float32_t)__builtin_neon_vmulxs_f32(__a, __b); }
7092__ai float64_t vmulxd_f64(float64_t __a, float64_t __b) {
7093 return (float64_t)__builtin_neon_vmulxd_f64(__a, __b); }
7094
7095#define vmulxs_lane_f32(a, b, __c) __extension__ ({ \
7096 float32_t __a = (a); float32x2_t __b = (b); \
7097 float32_t __d1 = vget_lane_f32(__b, __c);\
7098 vmulxs_f32(__a, __d1); })
7099#define vmulxd_lane_f64(a, b, __c) __extension__ ({ \
7100 float64_t __a = (a); float64x1_t __b = (b); \
7101 float64_t __d1 = vget_lane_f64(__b, __c);\
7102 vmulxd_f64(__a, __d1); })
7103
7104#define vmulxs_laneq_f32(a, b, __c) __extension__ ({ \
7105 float32_t __a = (a); float32x4_t __b = (b); \
7106 float32_t __d1 = vgetq_lane_f32(__b, __c);\
7107 vmulxs_f32(__a, __d1); })
7108#define vmulxd_laneq_f64(a, b, __c) __extension__ ({ \
7109 float64_t __a = (a); float64x2_t __b = (b); \
7110 float64_t __d1 = vgetq_lane_f64(__b, __c);\
7111 vmulxd_f64(__a, __d1); })
7112
7113#define vmuls_lane_f32(a, b, __c) __extension__ ({ \
7114 float32_t __a = (a); float32x2_t __b = (b); \
7115 float32_t __d1 = vget_lane_f32(__b, __c);\
7116 __a * __d1; })
7117#define vmuld_lane_f64(a, b, __c) __extension__ ({ \
7118 float64_t __a = (a); float64x1_t __b = (b); \
7119 float64_t __d1 = vget_lane_f64(__b, __c);\
7120 __a * __d1; })
7121
7122#define vmuls_laneq_f32(a, b, __c) __extension__ ({ \
7123 float32_t __a = (a); float32x4_t __b = (b); \
7124 float32_t __d1 = vgetq_lane_f32(__b, __c);\
7125 __a * __d1; })
7126#define vmuld_laneq_f64(a, b, __c) __extension__ ({ \
7127 float64_t __a = (a); float64x2_t __b = (b); \
7128 float64_t __d1 = vgetq_lane_f64(__b, __c);\
7129 __a * __d1; })
7130
7131__ai float32_t vrecpes_f32(float32_t __a) {
7132 return (float32_t)__builtin_neon_vrecpes_f32(__a); }
7133__ai float64_t vrecped_f64(float64_t __a) {
7134 return (float64_t)__builtin_neon_vrecped_f64(__a); }
7135
7136__ai float32_t vrecpss_f32(float32_t __a, float32_t __b) {
7137 return (float32_t)__builtin_neon_vrecpss_f32(__a, __b); }
7138__ai float64_t vrecpsd_f64(float64_t __a, float64_t __b) {
7139 return (float64_t)__builtin_neon_vrecpsd_f64(__a, __b); }
7140
7141__ai float32_t vrecpxs_f32(float32_t __a) {
7142 return (float32_t)__builtin_neon_vrecpxs_f32(__a); }
7143__ai float64_t vrecpxd_f64(float64_t __a) {
7144 return (float64_t)__builtin_neon_vrecpxd_f64(__a); }
7145
7146__ai float32_t vrsqrtes_f32(float32_t __a) {
7147 return (float32_t)__builtin_neon_vrsqrtes_f32(__a); }
7148__ai float64_t vrsqrted_f64(float64_t __a) {
7149 return (float64_t)__builtin_neon_vrsqrted_f64(__a); }
7150
7151__ai float32_t vrsqrtss_f32(float32_t __a, float32_t __b) {
7152 return (float32_t)__builtin_neon_vrsqrtss_f32(__a, __b); }
7153__ai float64_t vrsqrtsd_f64(float64_t __a, float64_t __b) {
7154 return (float64_t)__builtin_neon_vrsqrtsd_f64(__a, __b); }
7155
7156#define vget_lane_f16(a, __b) __extension__ ({ \
7157 float16x4_t __a = (a); \
7158 int16x4_t __a1 = vreinterpret_s16_f16(__a);\
7159 vget_lane_s16(__a1, __b); })
7160#define vgetq_lane_f16(a, __b) __extension__ ({ \
7161 float16x8_t __a = (a); \
7162 int16x8_t __a1 = vreinterpretq_s16_f16(__a);\
7163 vgetq_lane_s16(__a1, __b); })
7164
7165__ai int64_t vnegd_s64(int64_t __a) {
7166 return (int64_t)__builtin_neon_vnegd_s64(__a); }
7167
7168__ai int8_t vqaddb_s8(int8_t __a, int8_t __b) {
7169 return (int8_t)__builtin_neon_vqaddb_s8(__a, __b); }
7170__ai int16_t vqaddh_s16(int16_t __a, int16_t __b) {
7171 return (int16_t)__builtin_neon_vqaddh_s16(__a, __b); }
7172__ai int32_t vqadds_s32(int32_t __a, int32_t __b) {
7173 return (int32_t)__builtin_neon_vqadds_s32(__a, __b); }
7174__ai int64_t vqaddd_s64(int64_t __a, int64_t __b) {
7175 return (int64_t)__builtin_neon_vqaddd_s64(__a, __b); }
7176__ai uint8_t vqaddb_u8(uint8_t __a, uint8_t __b) {
7177 return (uint8_t)__builtin_neon_vqaddb_u8(__a, __b); }
7178__ai uint16_t vqaddh_u16(uint16_t __a, uint16_t __b) {
7179 return (uint16_t)__builtin_neon_vqaddh_u16(__a, __b); }
7180__ai uint32_t vqadds_u32(uint32_t __a, uint32_t __b) {
7181 return (uint32_t)__builtin_neon_vqadds_u32(__a, __b); }
7182__ai uint64_t vqaddd_u64(uint64_t __a, uint64_t __b) {
7183 return (uint64_t)__builtin_neon_vqaddd_u64(__a, __b); }
7184
7185__ai int8_t vqrshlb_s8(int8_t __a, int8_t __b) {
7186 return (int8_t)__builtin_neon_vqrshlb_s8(__a, __b); }
7187__ai int16_t vqrshlh_s16(int16_t __a, int16_t __b) {
7188 return (int16_t)__builtin_neon_vqrshlh_s16(__a, __b); }
7189__ai int32_t vqrshls_s32(int32_t __a, int32_t __b) {
7190 return (int32_t)__builtin_neon_vqrshls_s32(__a, __b); }
7191__ai int64_t vqrshld_s64(int64_t __a, int64_t __b) {
7192 return (int64_t)__builtin_neon_vqrshld_s64(__a, __b); }
7193__ai uint8_t vqrshlb_u8(uint8_t __a, uint8_t __b) {
7194 return (uint8_t)__builtin_neon_vqrshlb_u8(__a, __b); }
7195__ai uint16_t vqrshlh_u16(uint16_t __a, uint16_t __b) {
7196 return (uint16_t)__builtin_neon_vqrshlh_u16(__a, __b); }
7197__ai uint32_t vqrshls_u32(uint32_t __a, uint32_t __b) {
7198 return (uint32_t)__builtin_neon_vqrshls_u32(__a, __b); }
7199__ai uint64_t vqrshld_u64(uint64_t __a, uint64_t __b) {
7200 return (uint64_t)__builtin_neon_vqrshld_u64(__a, __b); }
7201
7202__ai int8_t vqshlb_s8(int8_t __a, int8_t __b) {
7203 return (int8_t)__builtin_neon_vqshlb_s8(__a, __b); }
7204__ai int16_t vqshlh_s16(int16_t __a, int16_t __b) {
7205 return (int16_t)__builtin_neon_vqshlh_s16(__a, __b); }
7206__ai int32_t vqshls_s32(int32_t __a, int32_t __b) {
7207 return (int32_t)__builtin_neon_vqshls_s32(__a, __b); }
7208__ai int64_t vqshld_s64(int64_t __a, int64_t __b) {
7209 return (int64_t)__builtin_neon_vqshld_s64(__a, __b); }
7210__ai uint8_t vqshlb_u8(uint8_t __a, uint8_t __b) {
7211 return (uint8_t)__builtin_neon_vqshlb_u8(__a, __b); }
7212__ai uint16_t vqshlh_u16(uint16_t __a, uint16_t __b) {
7213 return (uint16_t)__builtin_neon_vqshlh_u16(__a, __b); }
7214__ai uint32_t vqshls_u32(uint32_t __a, uint32_t __b) {
7215 return (uint32_t)__builtin_neon_vqshls_u32(__a, __b); }
7216__ai uint64_t vqshld_u64(uint64_t __a, uint64_t __b) {
7217 return (uint64_t)__builtin_neon_vqshld_u64(__a, __b); }
7218
7219__ai int8_t vqsubb_s8(int8_t __a, int8_t __b) {
7220 return (int8_t)__builtin_neon_vqsubb_s8(__a, __b); }
7221__ai int16_t vqsubh_s16(int16_t __a, int16_t __b) {
7222 return (int16_t)__builtin_neon_vqsubh_s16(__a, __b); }
7223__ai int32_t vqsubs_s32(int32_t __a, int32_t __b) {
7224 return (int32_t)__builtin_neon_vqsubs_s32(__a, __b); }
7225__ai int64_t vqsubd_s64(int64_t __a, int64_t __b) {
7226 return (int64_t)__builtin_neon_vqsubd_s64(__a, __b); }
7227__ai uint8_t vqsubb_u8(uint8_t __a, uint8_t __b) {
7228 return (uint8_t)__builtin_neon_vqsubb_u8(__a, __b); }
7229__ai uint16_t vqsubh_u16(uint16_t __a, uint16_t __b) {
7230 return (uint16_t)__builtin_neon_vqsubh_u16(__a, __b); }
7231__ai uint32_t vqsubs_u32(uint32_t __a, uint32_t __b) {
7232 return (uint32_t)__builtin_neon_vqsubs_u32(__a, __b); }
7233__ai uint64_t vqsubd_u64(uint64_t __a, uint64_t __b) {
7234 return (uint64_t)__builtin_neon_vqsubd_u64(__a, __b); }
7235
7236__ai int64_t vrshld_s64(int64_t __a, int64_t __b) {
7237 return (int64_t)__builtin_neon_vrshld_s64(__a, __b); }
7238__ai uint64_t vrshld_u64(uint64_t __a, uint64_t __b) {
7239 return (uint64_t)__builtin_neon_vrshld_u64(__a, __b); }
7240
7241__ai float64_t vcvtd_f64_s64(int64_t __a) {
7242 return (float64_t)__builtin_neon_vcvtd_f64_s64(__a); }
7243
7244__ai float32_t vcvts_f32_s32(int32_t __a) {
7245 return (float32_t)__builtin_neon_vcvts_f32_s32(__a); }
7246
7247#define vcvts_n_f32_s32(a, __b) __extension__ ({ \
7248 int32_t __a = (a); \
7249 (float32_t)__builtin_neon_vcvts_n_f32_s32(__a, __b); })
7250#define vcvts_n_f32_u32(a, __b) __extension__ ({ \
7251 uint32_t __a = (a); \
7252 (float32_t)__builtin_neon_vcvts_n_f32_u32(__a, __b); })
7253
7254#define vcvtd_n_f64_s64(a, __b) __extension__ ({ \
7255 int64_t __a = (a); \
7256 (float64_t)__builtin_neon_vcvtd_n_f64_s64(__a, __b); })
7257#define vcvtd_n_f64_u64(a, __b) __extension__ ({ \
7258 uint64_t __a = (a); \
7259 (float64_t)__builtin_neon_vcvtd_n_f64_u64(__a, __b); })
7260
7261#define vset_lane_f16(a, b, __c) __extension__ ({ \
7262 float16_t __a = (a); float16x4_t __b = (b); \
7263 int16_t __a1 = (int16_t)__a;\
7264 int16x4_t __b1 = vreinterpret_s16_f16(b);\
7265 int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\
7266 vreinterpret_f16_s16(__b2); })
7267#define vsetq_lane_f16(a, b, __c) __extension__ ({ \
7268 float16_t __a = (a); float16x8_t __b = (b); \
7269 int16_t __a1 = (int16_t)__a;\
7270 int16x8_t __b1 = vreinterpretq_s16_f16(b);\
7271 int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\
7272 vreinterpretq_f16_s16(__b2); })
7273
7274__ai int64_t vshld_s64(int64_t __a, int64_t __b) {
7275 return (int64_t)__builtin_neon_vshld_s64(__a, __b); }
7276__ai uint64_t vshld_u64(uint64_t __a, uint64_t __b) {
7277 return (uint64_t)__builtin_neon_vshld_u64(__a, __b); }
7278
7279#define vshld_n_s64(a, __b) __extension__ ({ \
7280 int64_t __a = (a); \
7281 (int64_t)__builtin_neon_vshld_n_s64(__a, __b); })
7282#define vshld_n_u64(a, __b) __extension__ ({ \
7283 uint64_t __a = (a); \
7284 (uint64_t)__builtin_neon_vshld_n_u64(__a, __b); })
7285
7286#define vslid_n_s64(a, b, __c) __extension__ ({ \
7287 int64_t __a = (a); int64_t __b = (b); \
7288 (int64_t)__builtin_neon_vslid_n_s64(__a, __b, __c); })
7289#define vslid_n_u64(a, b, __c) __extension__ ({ \
7290 uint64_t __a = (a); uint64_t __b = (b); \
7291 (uint64_t)__builtin_neon_vslid_n_u64(__a, __b, __c); })
7292
7293__ai int8_t vqabsb_s8(int8_t __a) {
7294 return (int8_t)__builtin_neon_vqabsb_s8(__a); }
7295__ai int16_t vqabsh_s16(int16_t __a) {
7296 return (int16_t)__builtin_neon_vqabsh_s16(__a); }
7297__ai int32_t vqabss_s32(int32_t __a) {
7298 return (int32_t)__builtin_neon_vqabss_s32(__a); }
7299__ai int64_t vqabsd_s64(int64_t __a) {
7300 return (int64_t)__builtin_neon_vqabsd_s64(__a); }
7301
7302__ai int32_t vqdmlalh_s16(int32_t __a, int16_t __b, int16_t __c) {
7303 return (int32_t)__builtin_neon_vqdmlalh_s16(__a, __b, __c); }
7304__ai int64_t vqdmlals_s32(int64_t __a, int32_t __b, int32_t __c) {
7305 return (int64_t)__builtin_neon_vqdmlals_s32(__a, __b, __c); }
7306
7307#define vqdmlalh_lane_s16(a, b, c, __d) __extension__ ({ \
7308 int32_t __a = (a); int16_t __b = (b); int16x4_t __c = (c); \
7309 (int32_t)__builtin_neon_vqdmlalh_lane_s16(__a, __b, __c, __d); })
7310#define vqdmlals_lane_s32(a, b, c, __d) __extension__ ({ \
7311 int64_t __a = (a); int32_t __b = (b); int32x2_t __c = (c); \
7312 (int64_t)__builtin_neon_vqdmlals_lane_s32(__a, __b, __c, __d); })
7313
7314#define vqdmlalh_laneq_s16(a, b, c, __d) __extension__ ({ \
7315 int32_t __a = (a); int16_t __b = (b); int16x8_t __c = (c); \
7316 (int32_t)__builtin_neon_vqdmlalh_laneq_s16(__a, __b, __c, __d); })
7317#define vqdmlals_laneq_s32(a, b, c, __d) __extension__ ({ \
7318 int64_t __a = (a); int32_t __b = (b); int32x4_t __c = (c); \
7319 (int64_t)__builtin_neon_vqdmlals_laneq_s32(__a, __b, __c, __d); })
7320
7321__ai int32_t vqdmlslh_s16(int32_t __a, int16_t __b, int16_t __c) {
7322 return (int32_t)__builtin_neon_vqdmlslh_s16(__a, __b, __c); }
7323__ai int64_t vqdmlsls_s32(int64_t __a, int32_t __b, int32_t __c) {
7324 return (int64_t)__builtin_neon_vqdmlsls_s32(__a, __b, __c); }
7325
7326#define vqdmlslh_lane_s16(a, b, c, __d) __extension__ ({ \
7327 int32_t __a = (a); int16_t __b = (b); int16x4_t __c = (c); \
7328 (int32_t)__builtin_neon_vqdmlslh_lane_s16(__a, __b, __c, __d); })
7329#define vqdmlsls_lane_s32(a, b, c, __d) __extension__ ({ \
7330 int64_t __a = (a); int32_t __b = (b); int32x2_t __c = (c); \
7331 (int64_t)__builtin_neon_vqdmlsls_lane_s32(__a, __b, __c, __d); })
7332
7333#define vqdmlslh_laneq_s16(a, b, c, __d) __extension__ ({ \
7334 int32_t __a = (a); int16_t __b = (b); int16x8_t __c = (c); \
7335 (int32_t)__builtin_neon_vqdmlslh_laneq_s16(__a, __b, __c, __d); })
7336#define vqdmlsls_laneq_s32(a, b, c, __d) __extension__ ({ \
7337 int64_t __a = (a); int32_t __b = (b); int32x4_t __c = (c); \
7338 (int64_t)__builtin_neon_vqdmlsls_laneq_s32(__a, __b, __c, __d); })
7339
7340__ai int16_t vqdmulhh_s16(int16_t __a, int16_t __b) {
7341 return (int16_t)__builtin_neon_vqdmulhh_s16(__a, __b); }
7342__ai int32_t vqdmulhs_s32(int32_t __a, int32_t __b) {
7343 return (int32_t)__builtin_neon_vqdmulhs_s32(__a, __b); }
7344
7345#define vqdmulhh_lane_s16(a, b, __c) __extension__ ({ \
7346 int16_t __a = (a); int16x4_t __b = (b); \
7347 vqdmulhh_s16(__a, vget_lane_s16(__b, __c)); })
7348#define vqdmulhs_lane_s32(a, b, __c) __extension__ ({ \
7349 int32_t __a = (a); int32x2_t __b = (b); \
7350 vqdmulhs_s32(__a, vget_lane_s32(__b, __c)); })
7351
7352#define vqdmulhh_laneq_s16(a, b, __c) __extension__ ({ \
7353 int16_t __a = (a); int16x8_t __b = (b); \
7354 vqdmulhh_s16(__a, vgetq_lane_s16(__b, __c)); })
7355#define vqdmulhs_laneq_s32(a, b, __c) __extension__ ({ \
7356 int32_t __a = (a); int32x4_t __b = (b); \
7357 vqdmulhs_s32(__a, vgetq_lane_s32(__b, __c)); })
7358
7359__ai int32_t vqdmullh_s16(int16_t __a, int16_t __b) {
7360 return (int32_t)__builtin_neon_vqdmullh_s16(__a, __b); }
7361__ai int64_t vqdmulls_s32(int32_t __a, int32_t __b) {
7362 return (int64_t)__builtin_neon_vqdmulls_s32(__a, __b); }
7363
7364#define vqdmullh_lane_s16(a, b, __c) __extension__ ({ \
7365 int16_t __a = (a); int16x4_t __b = (b); \
7366 vqdmullh_s16(__a, vget_lane_s16(b, __c)); })
7367#define vqdmulls_lane_s32(a, b, __c) __extension__ ({ \
7368 int32_t __a = (a); int32x2_t __b = (b); \
7369 vqdmulls_s32(__a, vget_lane_s32(b, __c)); })
7370
7371#define vqdmullh_laneq_s16(a, b, __c) __extension__ ({ \
7372 int16_t __a = (a); int16x8_t __b = (b); \
7373 vqdmullh_s16(__a, vgetq_lane_s16(b, __c)); })
7374#define vqdmulls_laneq_s32(a, b, __c) __extension__ ({ \
7375 int32_t __a = (a); int32x4_t __b = (b); \
7376 vqdmulls_s32(__a, vgetq_lane_s32(b, __c)); })
7377
7378__ai int8_t vqnegb_s8(int8_t __a) {
7379 return (int8_t)__builtin_neon_vqnegb_s8(__a); }
7380__ai int16_t vqnegh_s16(int16_t __a) {
7381 return (int16_t)__builtin_neon_vqnegh_s16(__a); }
7382__ai int32_t vqnegs_s32(int32_t __a) {
7383 return (int32_t)__builtin_neon_vqnegs_s32(__a); }
7384__ai int64_t vqnegd_s64(int64_t __a) {
7385 return (int64_t)__builtin_neon_vqnegd_s64(__a); }
7386
7387__ai int16_t vqrdmulhh_s16(int16_t __a, int16_t __b) {
7388 return (int16_t)__builtin_neon_vqrdmulhh_s16(__a, __b); }
7389__ai int32_t vqrdmulhs_s32(int32_t __a, int32_t __b) {
7390 return (int32_t)__builtin_neon_vqrdmulhs_s32(__a, __b); }
7391
7392#define vqrdmulhh_lane_s16(a, b, __c) __extension__ ({ \
7393 int16_t __a = (a); int16x4_t __b = (b); \
7394 vqrdmulhh_s16(__a, vget_lane_s16(__b, __c)); })
7395#define vqrdmulhs_lane_s32(a, b, __c) __extension__ ({ \
7396 int32_t __a = (a); int32x2_t __b = (b); \
7397 vqrdmulhs_s32(__a, vget_lane_s32(__b, __c)); })
7398
7399#define vqrdmulhh_laneq_s16(a, b, __c) __extension__ ({ \
7400 int16_t __a = (a); int16x8_t __b = (b); \
7401 vqrdmulhh_s16(__a, vgetq_lane_s16(__b, __c)); })
7402#define vqrdmulhs_laneq_s32(a, b, __c) __extension__ ({ \
7403 int32_t __a = (a); int32x4_t __b = (b); \
7404 vqrdmulhs_s32(__a, vgetq_lane_s32(__b, __c)); })
7405
7406#define vqrshrnh_n_s16(a, __b) __extension__ ({ \
7407 int16_t __a = (a); \
7408 (int8_t)__builtin_neon_vqrshrnh_n_s16(__a, __b); })
7409#define vqrshrns_n_s32(a, __b) __extension__ ({ \
7410 int32_t __a = (a); \
7411 (int16_t)__builtin_neon_vqrshrns_n_s32(__a, __b); })
7412#define vqrshrnd_n_s64(a, __b) __extension__ ({ \
7413 int64_t __a = (a); \
7414 (int32_t)__builtin_neon_vqrshrnd_n_s64(__a, __b); })
7415#define vqrshrnh_n_u16(a, __b) __extension__ ({ \
7416 uint16_t __a = (a); \
7417 (uint8_t)__builtin_neon_vqrshrnh_n_u16(__a, __b); })
7418#define vqrshrns_n_u32(a, __b) __extension__ ({ \
7419 uint32_t __a = (a); \
7420 (uint16_t)__builtin_neon_vqrshrns_n_u32(__a, __b); })
7421#define vqrshrnd_n_u64(a, __b) __extension__ ({ \
7422 uint64_t __a = (a); \
7423 (uint32_t)__builtin_neon_vqrshrnd_n_u64(__a, __b); })
7424
7425#define vqrshrunh_n_s16(a, __b) __extension__ ({ \
7426 int16_t __a = (a); \
7427 (int8_t)__builtin_neon_vqrshrunh_n_s16(__a, __b); })
7428#define vqrshruns_n_s32(a, __b) __extension__ ({ \
7429 int32_t __a = (a); \
7430 (int16_t)__builtin_neon_vqrshruns_n_s32(__a, __b); })
7431#define vqrshrund_n_s64(a, __b) __extension__ ({ \
7432 int64_t __a = (a); \
7433 (int32_t)__builtin_neon_vqrshrund_n_s64(__a, __b); })
7434
7435#define vqshlub_n_s8(a, __b) __extension__ ({ \
7436 int8_t __a = (a); \
7437 (int8_t)__builtin_neon_vqshlub_n_s8(__a, __b); })
7438#define vqshluh_n_s16(a, __b) __extension__ ({ \
7439 int16_t __a = (a); \
7440 (int16_t)__builtin_neon_vqshluh_n_s16(__a, __b); })
7441#define vqshlus_n_s32(a, __b) __extension__ ({ \
7442 int32_t __a = (a); \
7443 (int32_t)__builtin_neon_vqshlus_n_s32(__a, __b); })
7444#define vqshlud_n_s64(a, __b) __extension__ ({ \
7445 int64_t __a = (a); \
7446 (int64_t)__builtin_neon_vqshlud_n_s64(__a, __b); })
7447
7448#define vqshlb_n_s8(a, __b) __extension__ ({ \
7449 int8_t __a = (a); \
7450 (int8_t)__builtin_neon_vqshlb_n_s8(__a, __b); })
7451#define vqshlh_n_s16(a, __b) __extension__ ({ \
7452 int16_t __a = (a); \
7453 (int16_t)__builtin_neon_vqshlh_n_s16(__a, __b); })
7454#define vqshls_n_s32(a, __b) __extension__ ({ \
7455 int32_t __a = (a); \
7456 (int32_t)__builtin_neon_vqshls_n_s32(__a, __b); })
7457#define vqshld_n_s64(a, __b) __extension__ ({ \
7458 int64_t __a = (a); \
7459 (int64_t)__builtin_neon_vqshld_n_s64(__a, __b); })
7460#define vqshlb_n_u8(a, __b) __extension__ ({ \
7461 uint8_t __a = (a); \
7462 (uint8_t)__builtin_neon_vqshlb_n_u8(__a, __b); })
7463#define vqshlh_n_u16(a, __b) __extension__ ({ \
7464 uint16_t __a = (a); \
7465 (uint16_t)__builtin_neon_vqshlh_n_u16(__a, __b); })
7466#define vqshls_n_u32(a, __b) __extension__ ({ \
7467 uint32_t __a = (a); \
7468 (uint32_t)__builtin_neon_vqshls_n_u32(__a, __b); })
7469#define vqshld_n_u64(a, __b) __extension__ ({ \
7470 uint64_t __a = (a); \
7471 (uint64_t)__builtin_neon_vqshld_n_u64(__a, __b); })
7472
7473#define vqshrnh_n_s16(a, __b) __extension__ ({ \
7474 int16_t __a = (a); \
7475 (int8_t)__builtin_neon_vqshrnh_n_s16(__a, __b); })
7476#define vqshrns_n_s32(a, __b) __extension__ ({ \
7477 int32_t __a = (a); \
7478 (int16_t)__builtin_neon_vqshrns_n_s32(__a, __b); })
7479#define vqshrnd_n_s64(a, __b) __extension__ ({ \
7480 int64_t __a = (a); \
7481 (int32_t)__builtin_neon_vqshrnd_n_s64(__a, __b); })
7482#define vqshrnh_n_u16(a, __b) __extension__ ({ \
7483 uint16_t __a = (a); \
7484 (uint8_t)__builtin_neon_vqshrnh_n_u16(__a, __b); })
7485#define vqshrns_n_u32(a, __b) __extension__ ({ \
7486 uint32_t __a = (a); \
7487 (uint16_t)__builtin_neon_vqshrns_n_u32(__a, __b); })
7488#define vqshrnd_n_u64(a, __b) __extension__ ({ \
7489 uint64_t __a = (a); \
7490 (uint32_t)__builtin_neon_vqshrnd_n_u64(__a, __b); })
7491
7492#define vqshrunh_n_s16(a, __b) __extension__ ({ \
7493 int16_t __a = (a); \
7494 (int8_t)__builtin_neon_vqshrunh_n_s16(__a, __b); })
7495#define vqshruns_n_s32(a, __b) __extension__ ({ \
7496 int32_t __a = (a); \
7497 (int16_t)__builtin_neon_vqshruns_n_s32(__a, __b); })
7498#define vqshrund_n_s64(a, __b) __extension__ ({ \
7499 int64_t __a = (a); \
7500 (int32_t)__builtin_neon_vqshrund_n_s64(__a, __b); })
7501
7502__ai int8_t vqmovnh_s16(int16_t __a) {
7503 return (int8_t)__builtin_neon_vqmovnh_s16(__a); }
7504__ai int16_t vqmovns_s32(int32_t __a) {
7505 return (int16_t)__builtin_neon_vqmovns_s32(__a); }
7506__ai int32_t vqmovnd_s64(int64_t __a) {
7507 return (int32_t)__builtin_neon_vqmovnd_s64(__a); }
7508
7509__ai int8_t vqmovunh_s16(int16_t __a) {
7510 return (int8_t)__builtin_neon_vqmovunh_s16(__a); }
7511__ai int16_t vqmovuns_s32(int32_t __a) {
7512 return (int16_t)__builtin_neon_vqmovuns_s32(__a); }
7513__ai int32_t vqmovund_s64(int64_t __a) {
7514 return (int32_t)__builtin_neon_vqmovund_s64(__a); }
7515
7516#define vsrid_n_s64(a, b, __c) __extension__ ({ \
7517 int64_t __a = (a); int64_t __b = (b); \
7518 (int64_t)__builtin_neon_vsrid_n_s64(__a, __b, __c); })
7519#define vsrid_n_u64(a, b, __c) __extension__ ({ \
7520 uint64_t __a = (a); uint64_t __b = (b); \
7521 (uint64_t)__builtin_neon_vsrid_n_u64(__a, __b, __c); })
7522
7523#define vrshrd_n_s64(a, __b) __extension__ ({ \
7524 int64_t __a = (a); \
7525 (int64_t)__builtin_neon_vrshrd_n_s64(__a, __b); })
7526#define vrshrd_n_u64(a, __b) __extension__ ({ \
7527 uint64_t __a = (a); \
7528 (uint64_t)__builtin_neon_vrshrd_n_u64(__a, __b); })
7529
7530#define vrsrad_n_s64(a, b, __c) __extension__ ({ \
7531 int64_t __a = (a); int64_t __b = (b); \
7532 (int64_t)__builtin_neon_vrsrad_n_s64(__a, __b, __c); })
7533#define vrsrad_n_u64(a, b, __c) __extension__ ({ \
7534 uint64_t __a = (a); uint64_t __b = (b); \
7535 (uint64_t)__builtin_neon_vrsrad_n_u64(__a, __b, __c); })
7536
7537#define vshrd_n_s64(a, __b) __extension__ ({ \
7538 int64_t __a = (a); \
7539 (int64_t)__builtin_neon_vshrd_n_s64(__a, __b); })
7540#define vshrd_n_u64(a, __b) __extension__ ({ \
7541 uint64_t __a = (a); \
7542 (uint64_t)__builtin_neon_vshrd_n_u64(__a, __b); })
7543
7544#define vsrad_n_s64(a, b, __c) __extension__ ({ \
7545 int64_t __a = (a); int64_t __b = (b); \
7546 (int64_t)__builtin_neon_vsrad_n_s64(__a, __b, __c); })
7547#define vsrad_n_u64(a, b, __c) __extension__ ({ \
7548 uint64_t __a = (a); uint64_t __b = (b); \
7549 (uint64_t)__builtin_neon_vsrad_n_u64(__a, __b, __c); })
7550
7551__ai int64_t vsubd_s64(int64_t __a, int64_t __b) {
7552 return (int64_t)__builtin_neon_vsubd_s64(__a, __b); }
7553__ai uint64_t vsubd_u64(uint64_t __a, uint64_t __b) {
7554 return (uint64_t)__builtin_neon_vsubd_u64(__a, __b); }
7555
7556__ai int8_t vuqaddb_s8(int8_t __a, int8_t __b) {
7557 return (int8_t)__builtin_neon_vuqaddb_s8(__a, __b); }
7558__ai int16_t vuqaddh_s16(int16_t __a, int16_t __b) {
7559 return (int16_t)__builtin_neon_vuqaddh_s16(__a, __b); }
7560__ai int32_t vuqadds_s32(int32_t __a, int32_t __b) {
7561 return (int32_t)__builtin_neon_vuqadds_s32(__a, __b); }
7562__ai int64_t vuqaddd_s64(int64_t __a, int64_t __b) {
7563 return (int64_t)__builtin_neon_vuqaddd_s64(__a, __b); }
7564
7565__ai float64_t vcvtd_f64_u64(uint64_t __a) {
7566 return (float64_t)__builtin_neon_vcvtd_f64_u64(__a); }
7567
7568__ai float32_t vcvts_f32_u32(uint32_t __a) {
7569 return (float32_t)__builtin_neon_vcvts_f32_u32(__a); }
7570
7571__ai uint8_t vqmovnh_u16(uint16_t __a) {
7572 return (uint8_t)__builtin_neon_vqmovnh_u16(__a); }
7573__ai uint16_t vqmovns_u32(uint32_t __a) {
7574 return (uint16_t)__builtin_neon_vqmovns_u32(__a); }
7575__ai uint32_t vqmovnd_u64(uint64_t __a) {
7576 return (uint32_t)__builtin_neon_vqmovnd_u64(__a); }
7577
7578__ai uint8_t vsqaddb_u8(uint8_t __a, uint8_t __b) {
7579 return (uint8_t)__builtin_neon_vsqaddb_u8(__a, __b); }
7580__ai uint16_t vsqaddh_u16(uint16_t __a, uint16_t __b) {
7581 return (uint16_t)__builtin_neon_vsqaddh_u16(__a, __b); }
7582__ai uint32_t vsqadds_u32(uint32_t __a, uint32_t __b) {
7583 return (uint32_t)__builtin_neon_vsqadds_u32(__a, __b); }
7584__ai uint64_t vsqaddd_u64(uint64_t __a, uint64_t __b) {
7585 return (uint64_t)__builtin_neon_vsqaddd_u64(__a, __b); }
7586
7587#define vdupb_lane_s8(a, __b) __extension__ ({ \
7588 int8x8_t __a = (a); \
7589 (int8_t)__builtin_neon_vdupb_lane_i8(__a, __b); })
7590#define vduph_lane_s16(a, __b) __extension__ ({ \
7591 int16x4_t __a = (a); \
7592 (int16_t)__builtin_neon_vduph_lane_i16(__a, __b); })
7593#define vdups_lane_s32(a, __b) __extension__ ({ \
7594 int32x2_t __a = (a); \
7595 (int32_t)__builtin_neon_vdups_lane_i32(__a, __b); })
7596#define vdupd_lane_s64(a, __b) __extension__ ({ \
7597 int64x1_t __a = (a); \
7598 (int64_t)__builtin_neon_vdupd_lane_i64(__a, __b); })
7599#define vdups_lane_f32(a, __b) __extension__ ({ \
7600 float32x2_t __a = (a); \
7601 (float32_t)__builtin_neon_vdups_lane_f32(__a, __b); })
7602#define vdupd_lane_f64(a, __b) __extension__ ({ \
7603 float64x1_t __a = (a); \
7604 (float64_t)__builtin_neon_vdupd_lane_f64(__a, __b); })
7605#define vdupb_lane_u8(a, __b) __extension__ ({ \
7606 uint8x8_t __a = (a); \
7607 (uint8_t)__builtin_neon_vdupb_lane_i8((int8x8_t)__a, __b); })
7608#define vduph_lane_u16(a, __b) __extension__ ({ \
7609 uint16x4_t __a = (a); \
7610 (uint16_t)__builtin_neon_vduph_lane_i16((int16x4_t)__a, __b); })
7611#define vdups_lane_u32(a, __b) __extension__ ({ \
7612 uint32x2_t __a = (a); \
7613 (uint32_t)__builtin_neon_vdups_lane_i32((int32x2_t)__a, __b); })
7614#define vdupd_lane_u64(a, __b) __extension__ ({ \
7615 uint64x1_t __a = (a); \
7616 (uint64_t)__builtin_neon_vdupd_lane_i64((int64x1_t)__a, __b); })
7617#define vdupb_lane_p8(a, __b) __extension__ ({ \
7618 poly8x8_t __a = (a); \
7619 (poly8_t)__builtin_neon_vdupb_lane_i8((int8x8_t)__a, __b); })
7620#define vduph_lane_p16(a, __b) __extension__ ({ \
7621 poly16x4_t __a = (a); \
7622 (poly16_t)__builtin_neon_vduph_lane_i16((int16x4_t)__a, __b); })
7623
7624#define vdupb_laneq_s8(a, __b) __extension__ ({ \
7625 int8x16_t __a = (a); \
7626 (int8_t)__builtin_neon_vdupb_laneq_i8(__a, __b); })
7627#define vduph_laneq_s16(a, __b) __extension__ ({ \
7628 int16x8_t __a = (a); \
7629 (int16_t)__builtin_neon_vduph_laneq_i16(__a, __b); })
7630#define vdups_laneq_s32(a, __b) __extension__ ({ \
7631 int32x4_t __a = (a); \
7632 (int32_t)__builtin_neon_vdups_laneq_i32(__a, __b); })
7633#define vdupd_laneq_s64(a, __b) __extension__ ({ \
7634 int64x2_t __a = (a); \
7635 (int64_t)__builtin_neon_vdupd_laneq_i64(__a, __b); })
7636#define vdups_laneq_f32(a, __b) __extension__ ({ \
7637 float32x4_t __a = (a); \
7638 (float32_t)__builtin_neon_vdups_laneq_f32(__a, __b); })
7639#define vdupd_laneq_f64(a, __b) __extension__ ({ \
7640 float64x2_t __a = (a); \
7641 (float64_t)__builtin_neon_vdupd_laneq_f64(__a, __b); })
7642#define vdupb_laneq_u8(a, __b) __extension__ ({ \
7643 uint8x16_t __a = (a); \
7644 (uint8_t)__builtin_neon_vdupb_laneq_i8((int8x16_t)__a, __b); })
7645#define vduph_laneq_u16(a, __b) __extension__ ({ \
7646 uint16x8_t __a = (a); \
7647 (uint16_t)__builtin_neon_vduph_laneq_i16((int16x8_t)__a, __b); })
7648#define vdups_laneq_u32(a, __b) __extension__ ({ \
7649 uint32x4_t __a = (a); \
7650 (uint32_t)__builtin_neon_vdups_laneq_i32((int32x4_t)__a, __b); })
7651#define vdupd_laneq_u64(a, __b) __extension__ ({ \
7652 uint64x2_t __a = (a); \
7653 (uint64_t)__builtin_neon_vdupd_laneq_i64((int64x2_t)__a, __b); })
7654#define vdupb_laneq_p8(a, __b) __extension__ ({ \
7655 poly8x16_t __a = (a); \
7656 (poly8_t)__builtin_neon_vdupb_laneq_i8((int8x16_t)__a, __b); })
7657#define vduph_laneq_p16(a, __b) __extension__ ({ \
7658 poly16x8_t __a = (a); \
7659 (poly16_t)__builtin_neon_vduph_laneq_i16((int16x8_t)__a, __b); })
7660
7661#define vmulx_lane_f64(a, b, __c) __extension__ ({ \
7662 float64x1_t __a = (a); float64x1_t __b = (b); \
7663 float64_t __d1 = vget_lane_f64(__a, 0);\
7664 float64_t __e1 = vget_lane_f64(__b, __c);\
7665 float64_t __f1 = vmulxd_f64(__d1, __e1);\
7666 float64x1_t __g1;\
7667 vset_lane_f64(__f1, __g1, __c); })
7668
7669#define vmulx_laneq_f64(a, b, __c) __extension__ ({ \
7670 float64x1_t __a = (a); float64x2_t __b = (b); \
7671 float64_t __d1 = vget_lane_f64(__a, 0);\
7672 float64_t __e1 = vgetq_lane_f64(__b, __c);\
7673 float64_t __f1 = vmulxd_f64(__d1, __e1);\
7674 float64x1_t __g1;\
7675 vset_lane_f64(__f1, __g1, 0); })
7676
7677#define vmul_lane_f64(a, b, __c) __extension__ ({ \
7678 float64x1_t __a = (a); float64x1_t __b = (b); \
7679 (float64x1_t)__builtin_neon_vmul_lane_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); })
7680
7681#define vmul_laneq_f64(a, b, __c) __extension__ ({ \
7682 float64x1_t __a = (a); float64x2_t __b = (b); \
7683 (float64x1_t)__builtin_neon_vmul_laneq_v((int8x8_t)__a, (int8x16_t)__b, __c, 9); })
7684
7685__ai float64x1_t vmul_n_f64(float64x1_t __a, float64_t __b) {
7686 return (float64x1_t)__builtin_neon_vmul_n_f64(__a, __b); }
7687
7688#define vset_lane_f64(a, b, __c) __extension__ ({ \
7689 float64_t __a = (a); float64x1_t __b = (b); \
7690 (float64x1_t)__builtin_neon_vset_lane_f64(__a, __b, __c); })
7691#define vsetq_lane_f64(a, b, __c) __extension__ ({ \
7692 float64_t __a = (a); float64x2_t __b = (b); \
7693 (float64x2_t)__builtin_neon_vsetq_lane_f64(__a, __b, __c); })
7694#define vset_lane_p64(a, b, __c) __extension__ ({ \
7695 poly64_t __a = (a); poly64x1_t __b = (b); \
7696 (poly64x1_t)__builtin_neon_vset_lane_i64(__a, (int64x1_t)__b, __c); })
7697#define vsetq_lane_p64(a, b, __c) __extension__ ({ \
7698 poly64_t __a = (a); poly64x2_t __b = (b); \
7699 (poly64x2_t)__builtin_neon_vsetq_lane_i64(__a, (int64x2_t)__b, __c); })
7700
7701#define vshll_high_n_s8(a, __b) __extension__ ({ \
7702 int8x16_t __a = (a); \
7703 int8x8_t __a1 = vget_high_s8(__a); \
7704 (int16x8_t)vshll_n_s8(__a1, __b); })
7705#define vshll_high_n_s16(a, __b) __extension__ ({ \
7706 int16x8_t __a = (a); \
7707 int16x4_t __a1 = vget_high_s16(__a); \
7708 (int32x4_t)vshll_n_s16(__a1, __b); })
7709#define vshll_high_n_s32(a, __b) __extension__ ({ \
7710 int32x4_t __a = (a); \
7711 int32x2_t __a1 = vget_high_s32(__a); \
7712 (int64x2_t)vshll_n_s32(__a1, __b); })
7713#define vshll_high_n_u8(a, __b) __extension__ ({ \
7714 uint8x16_t __a = (a); \
7715 uint8x8_t __a1 = vget_high_u8(__a); \
7716 (uint16x8_t)vshll_n_u8(__a1, __b); })
7717#define vshll_high_n_u16(a, __b) __extension__ ({ \
7718 uint16x8_t __a = (a); \
7719 uint16x4_t __a1 = vget_high_u16(__a); \
7720 (uint32x4_t)vshll_n_u16(__a1, __b); })
7721#define vshll_high_n_u32(a, __b) __extension__ ({ \
7722 uint32x4_t __a = (a); \
7723 uint32x2_t __a1 = vget_high_u32(__a); \
7724 (uint64x2_t)vshll_n_u32(__a1, __b); })
7725
7726#define vshrn_high_n_s16(a, b, __c) __extension__ ({ \
7727 int8x8_t __a = (a); int16x8_t __b = (b); \
7728 (int8x16_t)vcombine_s16(__a, vshrn_n_s16(__b, __c)); })
7729#define vshrn_high_n_s32(a, b, __c) __extension__ ({ \
7730 int16x4_t __a = (a); int32x4_t __b = (b); \
7731 (int16x8_t)vcombine_s32(__a, vshrn_n_s32(__b, __c)); })
7732#define vshrn_high_n_s64(a, b, __c) __extension__ ({ \
7733 int32x2_t __a = (a); int64x2_t __b = (b); \
7734 (int32x4_t)vcombine_s64(__a, vshrn_n_s64(__b, __c)); })
7735#define vshrn_high_n_u16(a, b, __c) __extension__ ({ \
7736 uint8x8_t __a = (a); uint16x8_t __b = (b); \
7737 (uint8x16_t)vcombine_u16(__a, vshrn_n_u16(__b, __c)); })
7738#define vshrn_high_n_u32(a, b, __c) __extension__ ({ \
7739 uint16x4_t __a = (a); uint32x4_t __b = (b); \
7740 (uint16x8_t)vcombine_u32(__a, vshrn_n_u32(__b, __c)); })
7741#define vshrn_high_n_u64(a, b, __c) __extension__ ({ \
7742 uint32x2_t __a = (a); uint64x2_t __b = (b); \
7743 (uint32x4_t)vcombine_u64(__a, vshrn_n_u64(__b, __c)); })
7744
7745#define vsli_n_p64(a, b, __c) __extension__ ({ \
7746 poly64x1_t __a = (a); poly64x1_t __b = (b); \
7747 (poly64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); })
7748#define vsliq_n_p64(a, b, __c) __extension__ ({ \
7749 poly64x2_t __a = (a); poly64x2_t __b = (b); \
7750 (poly64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 38); })
7751
7752__ai int8x16_t vqmovun_high_s16(int8x8_t __a, int16x8_t __b) {
7753 int8x8_t __a1 = vqmovun_s16(__b);
7754 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); }
7755__ai int16x8_t vqmovun_high_s32(int16x4_t __a, int32x4_t __b) {
7756 int16x4_t __a1 = vqmovun_s32(__b);
7757 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7); }
7758__ai int32x4_t vqmovun_high_s64(int32x2_t __a, int64x2_t __b) {
7759 int32x2_t __a1 = vqmovun_s64(__b);
7760 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
7761
7762#define vsri_n_p64(a, b, __c) __extension__ ({ \
7763 poly64x1_t __a = (a); poly64x1_t __b = (b); \
7764 (poly64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); })
7765#define vsriq_n_p64(a, b, __c) __extension__ ({ \
7766 poly64x2_t __a = (a); poly64x2_t __b = (b); \
7767 (poly64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 38); })
7768
7769#define vst1q_f64(__a, b) __extension__ ({ \
7770 float64x2_t __b = (b); \
7771 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 41); })
7772#define vst1_f64(__a, b) __extension__ ({ \
7773 float64x1_t __b = (b); \
7774 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 9); })
7775#define vst1_p64(__a, b) __extension__ ({ \
7776 poly64x1_t __b = (b); \
7777 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); })
7778#define vst1q_p64(__a, b) __extension__ ({ \
7779 poly64x2_t __b = (b); \
7780 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 38); })
7781
7782#define vst1q_lane_f64(__a, b, __c) __extension__ ({ \
7783 float64x2_t __b = (b); \
7784 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 41); })
7785#define vst1q_lane_p64(__a, b, __c) __extension__ ({ \
7786 poly64x2_t __b = (b); \
7787 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 38); })
7788#define vst1_lane_f64(__a, b, __c) __extension__ ({ \
7789 float64x1_t __b = (b); \
7790 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 9); })
7791#define vst1_lane_p64(__a, b, __c) __extension__ ({ \
7792 poly64x1_t __b = (b); \
7793 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); })
7794
7795#define vst1q_u8_x2(__a, b) __extension__ ({ \
7796 uint8x16x2_t __b = (b); \
7797 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); })
7798#define vst1q_u16_x2(__a, b) __extension__ ({ \
7799 uint16x8x2_t __b = (b); \
7800 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); })
7801#define vst1q_u32_x2(__a, b) __extension__ ({ \
7802 uint32x4x2_t __b = (b); \
7803 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); })
7804#define vst1q_u64_x2(__a, b) __extension__ ({ \
7805 uint64x2x2_t __b = (b); \
7806 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 51); })
7807#define vst1q_s8_x2(__a, b) __extension__ ({ \
7808 int8x16x2_t __b = (b); \
7809 __builtin_neon_vst1q_x2_v(__a, __b.val[0], __b.val[1], 32); })
7810#define vst1q_s16_x2(__a, b) __extension__ ({ \
7811 int16x8x2_t __b = (b); \
7812 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); })
7813#define vst1q_s32_x2(__a, b) __extension__ ({ \
7814 int32x4x2_t __b = (b); \
7815 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); })
7816#define vst1q_s64_x2(__a, b) __extension__ ({ \
7817 int64x2x2_t __b = (b); \
7818 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 35); })
7819#define vst1q_f16_x2(__a, b) __extension__ ({ \
7820 float16x8x2_t __b = (b); \
7821 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); })
7822#define vst1q_f32_x2(__a, b) __extension__ ({ \
7823 float32x4x2_t __b = (b); \
7824 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 40); })
7825#define vst1q_f64_x2(__a, b) __extension__ ({ \
7826 float64x2x2_t __b = (b); \
7827 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 41); })
7828#define vst1q_p8_x2(__a, b) __extension__ ({ \
7829 poly8x16x2_t __b = (b); \
7830 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); })
7831#define vst1q_p16_x2(__a, b) __extension__ ({ \
7832 poly16x8x2_t __b = (b); \
7833 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); })
7834#define vst1q_p64_x2(__a, b) __extension__ ({ \
7835 poly64x2x2_t __b = (b); \
7836 __builtin_neon_vst1q_x2_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); })
7837#define vst1_u8_x2(__a, b) __extension__ ({ \
7838 uint8x8x2_t __b = (b); \
7839 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); })
7840#define vst1_u16_x2(__a, b) __extension__ ({ \
7841 uint16x4x2_t __b = (b); \
7842 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); })
7843#define vst1_u32_x2(__a, b) __extension__ ({ \
7844 uint32x2x2_t __b = (b); \
7845 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); })
7846#define vst1_u64_x2(__a, b) __extension__ ({ \
7847 uint64x1x2_t __b = (b); \
7848 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); })
7849#define vst1_s8_x2(__a, b) __extension__ ({ \
7850 int8x8x2_t __b = (b); \
7851 __builtin_neon_vst1_x2_v(__a, __b.val[0], __b.val[1], 0); })
7852#define vst1_s16_x2(__a, b) __extension__ ({ \
7853 int16x4x2_t __b = (b); \
7854 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); })
7855#define vst1_s32_x2(__a, b) __extension__ ({ \
7856 int32x2x2_t __b = (b); \
7857 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); })
7858#define vst1_s64_x2(__a, b) __extension__ ({ \
7859 int64x1x2_t __b = (b); \
7860 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); })
7861#define vst1_f16_x2(__a, b) __extension__ ({ \
7862 float16x4x2_t __b = (b); \
7863 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); })
7864#define vst1_f32_x2(__a, b) __extension__ ({ \
7865 float32x2x2_t __b = (b); \
7866 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 8); })
7867#define vst1_f64_x2(__a, b) __extension__ ({ \
7868 float64x1x2_t __b = (b); \
7869 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 9); })
7870#define vst1_p8_x2(__a, b) __extension__ ({ \
7871 poly8x8x2_t __b = (b); \
7872 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); })
7873#define vst1_p16_x2(__a, b) __extension__ ({ \
7874 poly16x4x2_t __b = (b); \
7875 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); })
7876#define vst1_p64_x2(__a, b) __extension__ ({ \
7877 poly64x1x2_t __b = (b); \
7878 __builtin_neon_vst1_x2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); })
7879
7880#define vst1q_u8_x3(__a, b) __extension__ ({ \
7881 uint8x16x3_t __b = (b); \
7882 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 48); })
7883#define vst1q_u16_x3(__a, b) __extension__ ({ \
7884 uint16x8x3_t __b = (b); \
7885 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 49); })
7886#define vst1q_u32_x3(__a, b) __extension__ ({ \
7887 uint32x4x3_t __b = (b); \
7888 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 50); })
7889#define vst1q_u64_x3(__a, b) __extension__ ({ \
7890 uint64x2x3_t __b = (b); \
7891 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 51); })
7892#define vst1q_s8_x3(__a, b) __extension__ ({ \
7893 int8x16x3_t __b = (b); \
7894 __builtin_neon_vst1q_x3_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); })
7895#define vst1q_s16_x3(__a, b) __extension__ ({ \
7896 int16x8x3_t __b = (b); \
7897 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 33); })
7898#define vst1q_s32_x3(__a, b) __extension__ ({ \
7899 int32x4x3_t __b = (b); \
7900 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 34); })
7901#define vst1q_s64_x3(__a, b) __extension__ ({ \
7902 int64x2x3_t __b = (b); \
7903 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 35); })
7904#define vst1q_f16_x3(__a, b) __extension__ ({ \
7905 float16x8x3_t __b = (b); \
7906 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 39); })
7907#define vst1q_f32_x3(__a, b) __extension__ ({ \
7908 float32x4x3_t __b = (b); \
7909 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 40); })
7910#define vst1q_f64_x3(__a, b) __extension__ ({ \
7911 float64x2x3_t __b = (b); \
7912 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 41); })
7913#define vst1q_p8_x3(__a, b) __extension__ ({ \
7914 poly8x16x3_t __b = (b); \
7915 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 36); })
7916#define vst1q_p16_x3(__a, b) __extension__ ({ \
7917 poly16x8x3_t __b = (b); \
7918 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 37); })
7919#define vst1q_p64_x3(__a, b) __extension__ ({ \
7920 poly64x2x3_t __b = (b); \
7921 __builtin_neon_vst1q_x3_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 38); })
7922#define vst1_u8_x3(__a, b) __extension__ ({ \
7923 uint8x8x3_t __b = (b); \
7924 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 16); })
7925#define vst1_u16_x3(__a, b) __extension__ ({ \
7926 uint16x4x3_t __b = (b); \
7927 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 17); })
7928#define vst1_u32_x3(__a, b) __extension__ ({ \
7929 uint32x2x3_t __b = (b); \
7930 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 18); })
7931#define vst1_u64_x3(__a, b) __extension__ ({ \
7932 uint64x1x3_t __b = (b); \
7933 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 19); })
7934#define vst1_s8_x3(__a, b) __extension__ ({ \
7935 int8x8x3_t __b = (b); \
7936 __builtin_neon_vst1_x3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); })
7937#define vst1_s16_x3(__a, b) __extension__ ({ \
7938 int16x4x3_t __b = (b); \
7939 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 1); })
7940#define vst1_s32_x3(__a, b) __extension__ ({ \
7941 int32x2x3_t __b = (b); \
7942 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 2); })
7943#define vst1_s64_x3(__a, b) __extension__ ({ \
7944 int64x1x3_t __b = (b); \
7945 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 3); })
7946#define vst1_f16_x3(__a, b) __extension__ ({ \
7947 float16x4x3_t __b = (b); \
7948 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 7); })
7949#define vst1_f32_x3(__a, b) __extension__ ({ \
7950 float32x2x3_t __b = (b); \
7951 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 8); })
7952#define vst1_f64_x3(__a, b) __extension__ ({ \
7953 float64x1x3_t __b = (b); \
7954 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 9); })
7955#define vst1_p8_x3(__a, b) __extension__ ({ \
7956 poly8x8x3_t __b = (b); \
7957 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 4); })
7958#define vst1_p16_x3(__a, b) __extension__ ({ \
7959 poly16x4x3_t __b = (b); \
7960 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 5); })
7961#define vst1_p64_x3(__a, b) __extension__ ({ \
7962 poly64x1x3_t __b = (b); \
7963 __builtin_neon_vst1_x3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 6); })
7964
7965#define vst1q_u8_x4(__a, b) __extension__ ({ \
7966 uint8x16x4_t __b = (b); \
7967 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); })
7968#define vst1q_u16_x4(__a, b) __extension__ ({ \
7969 uint16x8x4_t __b = (b); \
7970 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); })
7971#define vst1q_u32_x4(__a, b) __extension__ ({ \
7972 uint32x4x4_t __b = (b); \
7973 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); })
7974#define vst1q_u64_x4(__a, b) __extension__ ({ \
7975 uint64x2x4_t __b = (b); \
7976 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 51); })
7977#define vst1q_s8_x4(__a, b) __extension__ ({ \
7978 int8x16x4_t __b = (b); \
7979 __builtin_neon_vst1q_x4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32); })
7980#define vst1q_s16_x4(__a, b) __extension__ ({ \
7981 int16x8x4_t __b = (b); \
7982 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); })
7983#define vst1q_s32_x4(__a, b) __extension__ ({ \
7984 int32x4x4_t __b = (b); \
7985 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); })
7986#define vst1q_s64_x4(__a, b) __extension__ ({ \
7987 int64x2x4_t __b = (b); \
7988 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 35); })
7989#define vst1q_f16_x4(__a, b) __extension__ ({ \
7990 float16x8x4_t __b = (b); \
7991 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); })
7992#define vst1q_f32_x4(__a, b) __extension__ ({ \
7993 float32x4x4_t __b = (b); \
7994 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 40); })
7995#define vst1q_f64_x4(__a, b) __extension__ ({ \
7996 float64x2x4_t __b = (b); \
7997 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 41); })
7998#define vst1q_p8_x4(__a, b) __extension__ ({ \
7999 poly8x16x4_t __b = (b); \
8000 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); })
8001#define vst1q_p16_x4(__a, b) __extension__ ({ \
8002 poly16x8x4_t __b = (b); \
8003 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); })
8004#define vst1q_p64_x4(__a, b) __extension__ ({ \
8005 poly64x2x4_t __b = (b); \
8006 __builtin_neon_vst1q_x4_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); })
8007#define vst1_u8_x4(__a, b) __extension__ ({ \
8008 uint8x8x4_t __b = (b); \
8009 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 16); })
8010#define vst1_u16_x4(__a, b) __extension__ ({ \
8011 uint16x4x4_t __b = (b); \
8012 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 17); })
8013#define vst1_u32_x4(__a, b) __extension__ ({ \
8014 uint32x2x4_t __b = (b); \
8015 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 18); })
8016#define vst1_u64_x4(__a, b) __extension__ ({ \
8017 uint64x1x4_t __b = (b); \
8018 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 19); })
8019#define vst1_s8_x4(__a, b) __extension__ ({ \
8020 int8x8x4_t __b = (b); \
8021 __builtin_neon_vst1_x4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); })
8022#define vst1_s16_x4(__a, b) __extension__ ({ \
8023 int16x4x4_t __b = (b); \
8024 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 1); })
8025#define vst1_s32_x4(__a, b) __extension__ ({ \
8026 int32x2x4_t __b = (b); \
8027 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 2); })
8028#define vst1_s64_x4(__a, b) __extension__ ({ \
8029 int64x1x4_t __b = (b); \
8030 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 3); })
8031#define vst1_f16_x4(__a, b) __extension__ ({ \
8032 float16x4x4_t __b = (b); \
8033 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 7); })
8034#define vst1_f32_x4(__a, b) __extension__ ({ \
8035 float32x2x4_t __b = (b); \
8036 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 8); })
8037#define vst1_f64_x4(__a, b) __extension__ ({ \
8038 float64x1x4_t __b = (b); \
8039 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 9); })
8040#define vst1_p8_x4(__a, b) __extension__ ({ \
8041 poly8x8x4_t __b = (b); \
8042 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 4); })
8043#define vst1_p16_x4(__a, b) __extension__ ({ \
8044 poly16x4x4_t __b = (b); \
8045 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 5); })
8046#define vst1_p64_x4(__a, b) __extension__ ({ \
8047 poly64x1x4_t __b = (b); \
8048 __builtin_neon_vst1_x4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 6); })
8049
8050#define vst2q_u64(__a, b) __extension__ ({ \
8051 uint64x2x2_t __b = (b); \
8052 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 51); })
8053#define vst2q_s64(__a, b) __extension__ ({ \
8054 int64x2x2_t __b = (b); \
8055 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 35); })
8056#define vst2q_f64(__a, b) __extension__ ({ \
8057 float64x2x2_t __b = (b); \
8058 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 41); })
8059#define vst2_f64(__a, b) __extension__ ({ \
8060 float64x1x2_t __b = (b); \
8061 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 9); })
8062#define vst2_p64(__a, b) __extension__ ({ \
8063 poly64x1x2_t __b = (b); \
8064 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); })
8065#define vst2q_p64(__a, b) __extension__ ({ \
8066 poly64x2x2_t __b = (b); \
8067 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); })
8068
8069#define vst2q_lane_u8(__a, b, __c) __extension__ ({ \
8070 uint8x16x2_t __b = (b); \
8071 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 48); })
8072#define vst2q_lane_u64(__a, b, __c) __extension__ ({ \
8073 uint64x2x2_t __b = (b); \
8074 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 51); })
8075#define vst2q_lane_s8(__a, b, __c) __extension__ ({ \
8076 int8x16x2_t __b = (b); \
8077 __builtin_neon_vst2q_lane_v(__a, __b.val[0], __b.val[1], __c, 32); })
8078#define vst2q_lane_s64(__a, b, __c) __extension__ ({ \
8079 int64x2x2_t __b = (b); \
8080 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 35); })
8081#define vst2q_lane_f64(__a, b, __c) __extension__ ({ \
8082 float64x2x2_t __b = (b); \
8083 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 41); })
8084#define vst2q_lane_p8(__a, b, __c) __extension__ ({ \
8085 poly8x16x2_t __b = (b); \
8086 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 36); })
8087#define vst2q_lane_p64(__a, b, __c) __extension__ ({ \
8088 poly64x2x2_t __b = (b); \
8089 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); })
8090#define vst2_lane_u64(__a, b, __c) __extension__ ({ \
8091 uint64x1x2_t __b = (b); \
8092 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 19); })
8093#define vst2_lane_s64(__a, b, __c) __extension__ ({ \
8094 int64x1x2_t __b = (b); \
8095 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 3); })
8096#define vst2_lane_f64(__a, b, __c) __extension__ ({ \
8097 float64x1x2_t __b = (b); \
8098 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 9); })
8099#define vst2_lane_p64(__a, b, __c) __extension__ ({ \
8100 poly64x1x2_t __b = (b); \
8101 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); })
8102
8103#define vst3q_u64(__a, b) __extension__ ({ \
8104 uint64x2x3_t __b = (b); \
8105 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 51); })
8106#define vst3q_s64(__a, b) __extension__ ({ \
8107 int64x2x3_t __b = (b); \
8108 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 35); })
8109#define vst3q_f64(__a, b) __extension__ ({ \
8110 float64x2x3_t __b = (b); \
8111 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 41); })
8112#define vst3_f64(__a, b) __extension__ ({ \
8113 float64x1x3_t __b = (b); \
8114 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 9); })
8115#define vst3_p64(__a, b) __extension__ ({ \
8116 poly64x1x3_t __b = (b); \
8117 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 6); })
8118#define vst3q_p64(__a, b) __extension__ ({ \
8119 poly64x2x3_t __b = (b); \
8120 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 38); })
8121
8122#define vst3q_lane_u8(__a, b, __c) __extension__ ({ \
8123 uint8x16x3_t __b = (b); \
8124 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 48); })
8125#define vst3q_lane_u64(__a, b, __c) __extension__ ({ \
8126 uint64x2x3_t __b = (b); \
8127 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 51); })
8128#define vst3q_lane_s8(__a, b, __c) __extension__ ({ \
8129 int8x16x3_t __b = (b); \
8130 __builtin_neon_vst3q_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 32); })
8131#define vst3q_lane_s64(__a, b, __c) __extension__ ({ \
8132 int64x2x3_t __b = (b); \
8133 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 35); })
8134#define vst3q_lane_f64(__a, b, __c) __extension__ ({ \
8135 float64x2x3_t __b = (b); \
8136 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 41); })
8137#define vst3q_lane_p8(__a, b, __c) __extension__ ({ \
8138 poly8x16x3_t __b = (b); \
8139 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 36); })
8140#define vst3q_lane_p64(__a, b, __c) __extension__ ({ \
8141 poly64x2x3_t __b = (b); \
8142 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); })
8143#define vst3_lane_u64(__a, b, __c) __extension__ ({ \
8144 uint64x1x3_t __b = (b); \
8145 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 19); })
8146#define vst3_lane_s64(__a, b, __c) __extension__ ({ \
8147 int64x1x3_t __b = (b); \
8148 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 3); })
8149#define vst3_lane_f64(__a, b, __c) __extension__ ({ \
8150 float64x1x3_t __b = (b); \
8151 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 9); })
8152#define vst3_lane_p64(__a, b, __c) __extension__ ({ \
8153 poly64x1x3_t __b = (b); \
8154 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); })
8155
8156#define vst4q_u64(__a, b) __extension__ ({ \
8157 uint64x2x4_t __b = (b); \
8158 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 51); })
8159#define vst4q_s64(__a, b) __extension__ ({ \
8160 int64x2x4_t __b = (b); \
8161 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 35); })
8162#define vst4q_f64(__a, b) __extension__ ({ \
8163 float64x2x4_t __b = (b); \
8164 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 41); })
8165#define vst4_f64(__a, b) __extension__ ({ \
8166 float64x1x4_t __b = (b); \
8167 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 9); })
8168#define vst4_p64(__a, b) __extension__ ({ \
8169 poly64x1x4_t __b = (b); \
8170 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 6); })
8171#define vst4q_p64(__a, b) __extension__ ({ \
8172 poly64x2x4_t __b = (b); \
8173 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); })
8174
8175#define vst4q_lane_u8(__a, b, __c) __extension__ ({ \
8176 uint8x16x4_t __b = (b); \
8177 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 48); })
8178#define vst4q_lane_u64(__a, b, __c) __extension__ ({ \
8179 uint64x2x4_t __b = (b); \
8180 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 51); })
8181#define vst4q_lane_s8(__a, b, __c) __extension__ ({ \
8182 int8x16x4_t __b = (b); \
8183 __builtin_neon_vst4q_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 32); })
8184#define vst4q_lane_s64(__a, b, __c) __extension__ ({ \
8185 int64x2x4_t __b = (b); \
8186 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 35); })
8187#define vst4q_lane_f64(__a, b, __c) __extension__ ({ \
8188 float64x2x4_t __b = (b); \
8189 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 41); })
8190#define vst4q_lane_p8(__a, b, __c) __extension__ ({ \
8191 poly8x16x4_t __b = (b); \
8192 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 36); })
8193#define vst4q_lane_p64(__a, b, __c) __extension__ ({ \
8194 poly64x2x4_t __b = (b); \
8195 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); })
8196#define vst4_lane_u64(__a, b, __c) __extension__ ({ \
8197 uint64x1x4_t __b = (b); \
8198 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 19); })
8199#define vst4_lane_s64(__a, b, __c) __extension__ ({ \
8200 int64x1x4_t __b = (b); \
8201 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 3); })
8202#define vst4_lane_f64(__a, b, __c) __extension__ ({ \
8203 float64x1x4_t __b = (b); \
8204 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 9); })
8205#define vst4_lane_p64(__a, b, __c) __extension__ ({ \
8206 poly64x1x4_t __b = (b); \
8207 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); })
8208
8209__ai float64x1_t vsub_f64(float64x1_t __a, float64x1_t __b) {
8210 return __a - __b; }
8211__ai float64x2_t vsubq_f64(float64x2_t __a, float64x2_t __b) {
8212 return __a - __b; }
8213
8214__ai int8x8_t vuqadd_s8(int8x8_t __a, int8x8_t __b) {
8215 return (int8x8_t)__builtin_neon_vuqadd_v(__a, __b, 0); }
8216__ai int16x4_t vuqadd_s16(int16x4_t __a, int16x4_t __b) {
8217 return (int16x4_t)__builtin_neon_vuqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
8218__ai int32x2_t vuqadd_s32(int32x2_t __a, int32x2_t __b) {
8219 return (int32x2_t)__builtin_neon_vuqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
8220__ai int64x1_t vuqadd_s64(int64x1_t __a, int64x1_t __b) {
8221 return (int64x1_t)__builtin_neon_vuqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); }
8222__ai int8x16_t vuqaddq_s8(int8x16_t __a, int8x16_t __b) {
8223 return (int8x16_t)__builtin_neon_vuqaddq_v(__a, __b, 32); }
8224__ai int16x8_t vuqaddq_s16(int16x8_t __a, int16x8_t __b) {
8225 return (int16x8_t)__builtin_neon_vuqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
8226__ai int32x4_t vuqaddq_s32(int32x4_t __a, int32x4_t __b) {
8227 return (int32x4_t)__builtin_neon_vuqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
8228__ai int64x2_t vuqaddq_s64(int64x2_t __a, int64x2_t __b) {
8229 return (int64x2_t)__builtin_neon_vuqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
8230
8231__ai uint8x8_t vsqadd_u8(uint8x8_t __a, uint8x8_t __b) {
8232 return (uint8x8_t)__builtin_neon_vsqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
8233__ai uint16x4_t vsqadd_u16(uint16x4_t __a, uint16x4_t __b) {
8234 return (uint16x4_t)__builtin_neon_vsqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
8235__ai uint32x2_t vsqadd_u32(uint32x2_t __a, uint32x2_t __b) {
8236 return (uint32x2_t)__builtin_neon_vsqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
8237__ai uint64x1_t vsqadd_u64(uint64x1_t __a, uint64x1_t __b) {
8238 return (uint64x1_t)__builtin_neon_vsqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); }
8239__ai uint8x16_t vsqaddq_u8(uint8x16_t __a, uint8x16_t __b) {
8240 return (uint8x16_t)__builtin_neon_vsqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
8241__ai uint16x8_t vsqaddq_u16(uint16x8_t __a, uint16x8_t __b) {
8242 return (uint16x8_t)__builtin_neon_vsqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
8243__ai uint32x4_t vsqaddq_u32(uint32x4_t __a, uint32x4_t __b) {
8244 return (uint32x4_t)__builtin_neon_vsqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
8245__ai uint64x2_t vsqaddq_u64(uint64x2_t __a, uint64x2_t __b) {
8246 return (uint64x2_t)__builtin_neon_vsqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
8247
8248__ai int16x8_t vabal_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) {
8249 return vabal_s8(__a, vget_high_s8(__b), vget_high_s8(__c)); }
8250__ai int32x4_t vabal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) {
8251 return vabal_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); }
8252__ai int64x2_t vabal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) {
8253 return vabal_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); }
8254__ai uint16x8_t vabal_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) {
8255 return vabal_u8(__a, vget_high_u8(__b), vget_high_u8(__c)); }
8256__ai uint32x4_t vabal_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) {
8257 return vabal_u16(__a, vget_high_u16(__b), vget_high_u16(__c)); }
8258__ai uint64x2_t vabal_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) {
8259 return vabal_u32(__a, vget_high_u32(__b), vget_high_u32(__c)); }
8260
8261
8262__ai int8x16_t vaddhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) {
8263 return vcombine_s8(__a, vaddhn_s16(__b, __c)); }
8264__ai int16x8_t vaddhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) {
8265 return vcombine_s16(__a, vaddhn_s32(__b, __c)); }
8266__ai int32x4_t vaddhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) {
8267 return vcombine_s32(__a, vaddhn_s64(__b, __c)); }
8268__ai uint8x16_t vaddhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) {
8269 return vcombine_u8(__a, vaddhn_u16(__b, __c)); }
8270__ai uint16x8_t vaddhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) {
8271 return vcombine_u16(__a, vaddhn_u32(__b, __c)); }
8272__ai uint32x4_t vaddhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) {
8273 return vcombine_u32(__a, vaddhn_u64(__b, __c)); }
8274
8275__ai int16_t vaddlv_s8(int8x8_t __a) {
8276 return (int16_t)__builtin_neon_vaddlv_s8(__a); }
8277__ai int32_t vaddlv_s16(int16x4_t __a) {
8278 return (int32_t)__builtin_neon_vaddlv_s16(__a); }
8279__ai int64_t vaddlv_s32(int32x2_t __a) {
8280 return (int64_t)__builtin_neon_vaddlv_s32(__a); }
8281__ai uint16_t vaddlv_u8(uint8x8_t __a) {
8282 return (uint16_t)__builtin_neon_vaddlv_u8((int8x8_t)__a); }
8283__ai uint32_t vaddlv_u16(uint16x4_t __a) {
8284 return (uint32_t)__builtin_neon_vaddlv_u16((int16x4_t)__a); }
8285__ai uint64_t vaddlv_u32(uint32x2_t __a) {
8286 return (uint64_t)__builtin_neon_vaddlv_u32((int32x2_t)__a); }
8287__ai int16_t vaddlvq_s8(int8x16_t __a) {
8288 return (int16_t)__builtin_neon_vaddlvq_s8(__a); }
8289__ai int32_t vaddlvq_s16(int16x8_t __a) {
8290 return (int32_t)__builtin_neon_vaddlvq_s16(__a); }
8291__ai int64_t vaddlvq_s32(int32x4_t __a) {
8292 return (int64_t)__builtin_neon_vaddlvq_s32(__a); }
8293__ai uint16_t vaddlvq_u8(uint8x16_t __a) {
8294 return (uint16_t)__builtin_neon_vaddlvq_u8((int8x16_t)__a); }
8295__ai uint32_t vaddlvq_u16(uint16x8_t __a) {
8296 return (uint32_t)__builtin_neon_vaddlvq_u16((int16x8_t)__a); }
8297__ai uint64_t vaddlvq_u32(uint32x4_t __a) {
8298 return (uint64_t)__builtin_neon_vaddlvq_u32((int32x4_t)__a); }
8299
8300__ai int16x8_t vaddl_high_s8(int8x16_t __a, int8x16_t __b) {
8301 return vmovl_high_s8(__a) + vmovl_high_s8(__b); }
8302__ai int32x4_t vaddl_high_s16(int16x8_t __a, int16x8_t __b) {
8303 return vmovl_high_s16(__a) + vmovl_high_s16(__b); }
8304__ai int64x2_t vaddl_high_s32(int32x4_t __a, int32x4_t __b) {
8305 return vmovl_high_s32(__a) + vmovl_high_s32(__b); }
8306__ai uint16x8_t vaddl_high_u8(uint8x16_t __a, uint8x16_t __b) {
8307 return vmovl_high_u8(__a) + vmovl_high_u8(__b); }
8308__ai uint32x4_t vaddl_high_u16(uint16x8_t __a, uint16x8_t __b) {
8309 return vmovl_high_u16(__a) + vmovl_high_u16(__b); }
8310__ai uint64x2_t vaddl_high_u32(uint32x4_t __a, uint32x4_t __b) {
8311 return vmovl_high_u32(__a) + vmovl_high_u32(__b); }
8312
8313__ai int8_t vaddv_s8(int8x8_t __a) {
8314 return (int8_t)__builtin_neon_vaddv_s8(__a); }
8315__ai int16_t vaddv_s16(int16x4_t __a) {
8316 return (int16_t)__builtin_neon_vaddv_s16(__a); }
8317__ai int32_t vaddv_s32(int32x2_t __a) {
8318 return (int32_t)__builtin_neon_vaddv_s32(__a); }
8319__ai float32_t vaddv_f32(float32x2_t __a) {
8320 return (float32_t)__builtin_neon_vaddv_f32(__a); }
8321__ai uint8_t vaddv_u8(uint8x8_t __a) {
8322 return (uint8_t)__builtin_neon_vaddv_u8((int8x8_t)__a); }
8323__ai uint16_t vaddv_u16(uint16x4_t __a) {
8324 return (uint16_t)__builtin_neon_vaddv_u16((int16x4_t)__a); }
8325__ai uint32_t vaddv_u32(uint32x2_t __a) {
8326 return (uint32_t)__builtin_neon_vaddv_u32((int32x2_t)__a); }
8327__ai int8_t vaddvq_s8(int8x16_t __a) {
8328 return (int8_t)__builtin_neon_vaddvq_s8(__a); }
8329__ai int16_t vaddvq_s16(int16x8_t __a) {
8330 return (int16_t)__builtin_neon_vaddvq_s16(__a); }
8331__ai int32_t vaddvq_s32(int32x4_t __a) {
8332 return (int32_t)__builtin_neon_vaddvq_s32(__a); }
8333__ai uint8_t vaddvq_u8(uint8x16_t __a) {
8334 return (uint8_t)__builtin_neon_vaddvq_u8((int8x16_t)__a); }
8335__ai uint16_t vaddvq_u16(uint16x8_t __a) {
8336 return (uint16_t)__builtin_neon_vaddvq_u16((int16x8_t)__a); }
8337__ai uint32_t vaddvq_u32(uint32x4_t __a) {
8338 return (uint32_t)__builtin_neon_vaddvq_u32((int32x4_t)__a); }
8339__ai float32_t vaddvq_f32(float32x4_t __a) {
8340 return (float32_t)__builtin_neon_vaddvq_f32(__a); }
8341__ai float64_t vaddvq_f64(float64x2_t __a) {
8342 return (float64_t)__builtin_neon_vaddvq_f64(__a); }
8343__ai int64_t vaddvq_s64(int64x2_t __a) {
8344 return (int64_t)__builtin_neon_vaddvq_s64(__a); }
8345__ai uint64_t vaddvq_u64(uint64x2_t __a) {
8346 return (uint64_t)__builtin_neon_vaddvq_u64((int64x2_t)__a); }
8347
8348__ai int16x8_t vaddw_high_s8(int16x8_t __a, int8x16_t __b) {
8349 return __a + vmovl_high_s8(__b); }
8350__ai int32x4_t vaddw_high_s16(int32x4_t __a, int16x8_t __b) {
8351 return __a + vmovl_high_s16(__b); }
8352__ai int64x2_t vaddw_high_s32(int64x2_t __a, int32x4_t __b) {
8353 return __a + vmovl_high_s32(__b); }
8354__ai uint16x8_t vaddw_high_u8(uint16x8_t __a, uint8x16_t __b) {
8355 return __a + vmovl_high_u8(__b); }
8356__ai uint32x4_t vaddw_high_u16(uint32x4_t __a, uint16x8_t __b) {
8357 return __a + vmovl_high_u16(__b); }
8358__ai uint64x2_t vaddw_high_u32(uint64x2_t __a, uint32x4_t __b) {
8359 return __a + vmovl_high_u32(__b); }
8360
8361__ai float32x2_t vcvtx_f32_f64(float64x2_t __a) {
8362 return (float32x2_t)__builtin_neon_vcvtx_f32_v((int8x16_t)__a, 41); }
8363
8364__ai float32x4_t vcvtx_high_f32_f64(float32x2_t __a, float64x2_t __b) {
8365 float32x2_t __a1 = vcvtx_f32_f64(__b);
8366 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
8367
8368__ai float32x2_t vcvt_f32_f64(float64x2_t __a) {
8369 return (float32x2_t)__builtin_neon_vcvt_f32_f64((int8x16_t)__a, 41); }
8370
8371__ai float64x1_t vcvt_f64_s64(int64x1_t __a) {
8372 return (float64x1_t)__builtin_neon_vcvt_f64_v((int8x8_t)__a, 3); }
8373__ai float64x1_t vcvt_f64_u64(uint64x1_t __a) {
8374 return (float64x1_t)__builtin_neon_vcvt_f64_v((int8x8_t)__a, 19); }
8375__ai float64x2_t vcvtq_f64_s64(int64x2_t __a) {
8376 return (float64x2_t)__builtin_neon_vcvtq_f64_v((int8x16_t)__a, 35); }
8377__ai float64x2_t vcvtq_f64_u64(uint64x2_t __a) {
8378 return (float64x2_t)__builtin_neon_vcvtq_f64_v((int8x16_t)__a, 51); }
8379
8380__ai float64x2_t vcvt_f64_f32(float32x2_t __a) {
8381 return (float64x2_t)__builtin_neon_vcvt_f64_f32((int8x8_t)__a, 41); }
8382
8383__ai float16x8_t vcvt_high_f16_f32(float16x4_t __a, float32x4_t __b) {
8384 float16x4_t __a1 = vcvt_f16_f32(__b);
8385 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7); }
8386
8387__ai float32x4_t vcvt_high_f32_f16(float16x8_t __a) {
8388 float16x4_t __a1 = vget_high_f16(__a);
8389 return vcvt_f32_f16(__a1); }
8390
8391__ai float32x4_t vcvt_high_f32_f64(float32x2_t __a, float64x2_t __b) {
8392 float32x2_t __a1 = vcvt_f32_f64(__b);
8393 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
8394
8395__ai float64x2_t vcvt_high_f64_f32(float32x4_t __a) {
8396 float32x2_t __a1 = vget_high_f32(__a);
8397 return vcvt_f64_f32(__a1); }
8398
8399__ai int64x1_t vcvt_s64_f64(float64x1_t __a) {
8400 return (int64x1_t)__builtin_neon_vcvt_s64_v((int8x8_t)__a, 3); }
8401__ai int64x2_t vcvtq_s64_f64(float64x2_t __a) {
8402 return (int64x2_t)__builtin_neon_vcvtq_s64_v((int8x16_t)__a, 35); }
8403
8404__ai uint64x1_t vcvt_u64_f64(float64x1_t __a) {
8405 return (uint64x1_t)__builtin_neon_vcvt_u64_v((int8x8_t)__a, 19); }
8406__ai uint64x2_t vcvtq_u64_f64(float64x2_t __a) {
8407 return (uint64x2_t)__builtin_neon_vcvtq_u64_v((int8x16_t)__a, 51); }
8408
8409#define vdup_lane_f16(a, __b) __extension__ ({ \
8410 float16x4_t __a = (a); \
8411 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8412#define vdup_lane_f64(a, __b) __extension__ ({ \
8413 float64x1_t __a = (a); \
8414 __builtin_shufflevector(__a, __a, __b); })
8415#define vdupq_lane_f16(a, __b) __extension__ ({ \
8416 float16x4_t __a = (a); \
8417 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8418#define vdupq_lane_f64(a, __b) __extension__ ({ \
8419 float64x1_t __a = (a); \
8420 __builtin_shufflevector(__a, __a, __b, __b); })
8421#define vdup_lane_p64(a, __b) __extension__ ({ \
8422 poly64x1_t __a = (a); \
8423 __builtin_shufflevector(__a, __a, __b); })
8424#define vdupq_lane_p64(a, __b) __extension__ ({ \
8425 poly64x1_t __a = (a); \
8426 __builtin_shufflevector(__a, __a, __b, __b); })
8427
8428#define vdup_laneq_s8(a, __b) __extension__ ({ \
8429 int8x16_t __a = (a); \
8430 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8431#define vdup_laneq_s16(a, __b) __extension__ ({ \
8432 int16x8_t __a = (a); \
8433 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8434#define vdup_laneq_s32(a, __b) __extension__ ({ \
8435 int32x4_t __a = (a); \
8436 __builtin_shufflevector(__a, __a, __b, __b); })
8437#define vdup_laneq_s64(a, __b) __extension__ ({ \
8438 int64x2_t __a = (a); \
8439 __builtin_shufflevector(__a, __a, __b); })
8440#define vdup_laneq_p8(a, __b) __extension__ ({ \
8441 poly8x16_t __a = (a); \
8442 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8443#define vdup_laneq_p16(a, __b) __extension__ ({ \
8444 poly16x8_t __a = (a); \
8445 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8446#define vdup_laneq_u8(a, __b) __extension__ ({ \
8447 uint8x16_t __a = (a); \
8448 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8449#define vdup_laneq_u16(a, __b) __extension__ ({ \
8450 uint16x8_t __a = (a); \
8451 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8452#define vdup_laneq_u32(a, __b) __extension__ ({ \
8453 uint32x4_t __a = (a); \
8454 __builtin_shufflevector(__a, __a, __b, __b); })
8455#define vdup_laneq_u64(a, __b) __extension__ ({ \
8456 uint64x2_t __a = (a); \
8457 __builtin_shufflevector(__a, __a, __b); })
8458#define vdup_laneq_f16(a, __b) __extension__ ({ \
8459 float16x8_t __a = (a); \
8460 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8461#define vdup_laneq_f32(a, __b) __extension__ ({ \
8462 float32x4_t __a = (a); \
8463 __builtin_shufflevector(__a, __a, __b, __b); })
8464#define vdup_laneq_f64(a, __b) __extension__ ({ \
8465 float64x2_t __a = (a); \
8466 __builtin_shufflevector(__a, __a, __b); })
8467#define vdupq_laneq_s8(a, __b) __extension__ ({ \
8468 int8x16_t __a = (a); \
8469 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
8470#define vdupq_laneq_s16(a, __b) __extension__ ({ \
8471 int16x8_t __a = (a); \
8472 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8473#define vdupq_laneq_s32(a, __b) __extension__ ({ \
8474 int32x4_t __a = (a); \
8475 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8476#define vdupq_laneq_s64(a, __b) __extension__ ({ \
8477 int64x2_t __a = (a); \
8478 __builtin_shufflevector(__a, __a, __b, __b); })
8479#define vdupq_laneq_p8(a, __b) __extension__ ({ \
8480 poly8x16_t __a = (a); \
8481 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
8482#define vdupq_laneq_p16(a, __b) __extension__ ({ \
8483 poly16x8_t __a = (a); \
8484 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8485#define vdupq_laneq_u8(a, __b) __extension__ ({ \
8486 uint8x16_t __a = (a); \
8487 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
8488#define vdupq_laneq_u16(a, __b) __extension__ ({ \
8489 uint16x8_t __a = (a); \
8490 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8491#define vdupq_laneq_u32(a, __b) __extension__ ({ \
8492 uint32x4_t __a = (a); \
8493 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8494#define vdupq_laneq_u64(a, __b) __extension__ ({ \
8495 uint64x2_t __a = (a); \
8496 __builtin_shufflevector(__a, __a, __b, __b); })
8497#define vdupq_laneq_f16(a, __b) __extension__ ({ \
8498 float16x8_t __a = (a); \
8499 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
8500#define vdupq_laneq_f32(a, __b) __extension__ ({ \
8501 float32x4_t __a = (a); \
8502 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
8503#define vdupq_laneq_f64(a, __b) __extension__ ({ \
8504 float64x2_t __a = (a); \
8505 __builtin_shufflevector(__a, __a, __b, __b); })
8506#define vdup_laneq_p64(a, __b) __extension__ ({ \
8507 poly64x2_t __a = (a); \
8508 __builtin_shufflevector(__a, __a, __b); })
8509#define vdupq_laneq_p64(a, __b) __extension__ ({ \
8510 poly64x2_t __a = (a); \
8511 __builtin_shufflevector(__a, __a, __b, __b); })
8512
8513#define vext_f64(a, b, __c) __extension__ ({ \
8514 float64x1_t __a = (a); float64x1_t __b = (b); \
8515 (float64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); })
8516#define vextq_f64(a, b, __c) __extension__ ({ \
8517 float64x2_t __a = (a); float64x2_t __b = (b); \
8518 (float64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 41); })
8519#define vext_p64(a, b, __c) __extension__ ({ \
8520 poly64x1_t __a = (a); poly64x1_t __b = (b); \
8521 (poly64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); })
8522#define vextq_p64(a, b, __c) __extension__ ({ \
8523 poly64x2_t __a = (a); poly64x2_t __b = (b); \
8524 (poly64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 38); })
8525
8526#define vfma_lane_f32(a, b, c, __d) __extension__ ({ \
8527 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \
8528 (float32x2_t)__builtin_neon_vfma_lane_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, __d, 8); })
8529#define vfma_lane_f64(a, b, c, __d) __extension__ ({ \
8530 float64x1_t __a = (a); float64x1_t __b = (b); float64x1_t __c = (c); \
8531 (float64x1_t)__builtin_neon_vfma_lane_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, __d, 9); })
8532#define vfmaq_lane_f32(a, b, c, __d) __extension__ ({ \
8533 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \
8534 (float32x4_t)__builtin_neon_vfmaq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x8_t)__c, __d, 40); })
8535#define vfmaq_lane_f64(a, b, c, __d) __extension__ ({ \
8536 float64x2_t __a = (a); float64x2_t __b = (b); float64x1_t __c = (c); \
8537 (float64x2_t)__builtin_neon_vfmaq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x8_t)__c, __d, 41); })
8538
8539#define vfma_laneq_f32(a, b, c, __d) __extension__ ({ \
8540 float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \
8541 (float32x2_t)__builtin_neon_vfma_laneq_v((int8x8_t)__a, (int8x8_t)__b, (int8x16_t)__c, __d, 8); })
8542#define vfma_laneq_f64(a, b, c, __d) __extension__ ({ \
8543 float64x1_t __a = (a); float64x1_t __b = (b); float64x2_t __c = (c); \
8544 (float64x1_t)__builtin_neon_vfma_laneq_v((int8x8_t)__a, (int8x8_t)__b, (int8x16_t)__c, __d, 9); })
8545#define vfmaq_laneq_f32(a, b, c, __d) __extension__ ({ \
8546 float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \
8547 (float32x4_t)__builtin_neon_vfmaq_laneq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 40); })
8548#define vfmaq_laneq_f64(a, b, c, __d) __extension__ ({ \
8549 float64x2_t __a = (a); float64x2_t __b = (b); float64x2_t __c = (c); \
8550 (float64x2_t)__builtin_neon_vfmaq_laneq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 41); })
8551
8552#define vfms_lane_f32(a, b, c, __d) __extension__ ({ \
8553 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \
8554 float32x2_t __a1 = __a; \
8555 float32x2_t __b1 = __b; \
8556 float32x2_t __c1 = __c; \
8557 vfma_lane_f32(__a1, __b1, -__c1, __d); })
8558#define vfms_lane_f64(a, b, c, __d) __extension__ ({ \
8559 float64x1_t __a = (a); float64x1_t __b = (b); float64x1_t __c = (c); \
8560 float64x1_t __a1 = __a; \
8561 float64x1_t __b1 = __b; \
8562 float64x1_t __c1 = __c; \
8563 vfma_lane_f64(__a1, __b1, -__c1, __d); })
8564#define vfmsq_lane_f32(a, b, c, __d) __extension__ ({ \
8565 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \
8566 float32x4_t __a1 = __a; \
8567 float32x4_t __b1 = __b; \
8568 float32x2_t __c1 = __c; \
8569 vfmaq_lane_f32(__a1, __b1, -__c1, __d); })
8570#define vfmsq_lane_f64(a, b, c, __d) __extension__ ({ \
8571 float64x2_t __a = (a); float64x2_t __b = (b); float64x1_t __c = (c); \
8572 float64x2_t __a1 = __a; \
8573 float64x2_t __b1 = __b; \
8574 float64x1_t __c1 = __c; \
8575 vfmaq_lane_f64(__a1, __b1, -__c1, __d); })
8576
8577#define vfms_laneq_f32(a, b, c, __d) __extension__ ({ \
8578 float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \
8579 float32x2_t __a1 = __a; \
8580 float32x2_t __b1 = __b; \
8581 float32x4_t __c1 = __c; \
8582 vfma_laneq_f32(__a1, __b1, -__c1, __d); })
8583#define vfms_laneq_f64(a, b, c, __d) __extension__ ({ \
8584 float64x1_t __a = (a); float64x1_t __b = (b); float64x2_t __c = (c); \
8585 float64x1_t __a1 = __a; \
8586 float64x1_t __b1 = __b; \
8587 float64x2_t __c1 = __c; \
8588 vfma_laneq_f64(__a1, __b1, -__c1, __d); })
8589#define vfmsq_laneq_f32(a, b, c, __d) __extension__ ({ \
8590 float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \
8591 float32x4_t __a1 = __a; \
8592 float32x4_t __b1 = __b; \
8593 float32x4_t __c1 = __c; \
8594 vfmaq_laneq_f32(__a1, __b1, -__c1, __d); })
8595#define vfmsq_laneq_f64(a, b, c, __d) __extension__ ({ \
8596 float64x2_t __a = (a); float64x2_t __b = (b); float64x2_t __c = (c); \
8597 float64x2_t __a1 = __a; \
8598 float64x2_t __b1 = __b; \
8599 float64x2_t __c1 = __c; \
8600 vfmaq_laneq_f64(__a1, __b1, -__c1, __d); })
8601
8602__ai float64x1_t vget_high_f64(float64x2_t __a) {
8603 return __builtin_shufflevector(__a, __a, 1); }
8604__ai poly64x1_t vget_high_p64(poly64x2_t __a) {
8605 return __builtin_shufflevector(__a, __a, 1); }
8606
8607__ai float64x1_t vget_low_f64(float64x2_t __a) {
8608 return __builtin_shufflevector(__a, __a, 0); }
8609__ai poly64x1_t vget_low_p64(poly64x2_t __a) {
8610 return __builtin_shufflevector(__a, __a, 0); }
8611
8612__ai int8_t vmaxv_s8(int8x8_t __a) {
8613 return (int8_t)__builtin_neon_vmaxv_s8(__a); }
8614__ai int16_t vmaxv_s16(int16x4_t __a) {
8615 return (int16_t)__builtin_neon_vmaxv_s16(__a); }
8616__ai int32_t vmaxv_s32(int32x2_t __a) {
8617 return (int32_t)__builtin_neon_vmaxv_s32(__a); }
8618__ai float32_t vmaxv_f32(float32x2_t __a) {
8619 return (float32_t)__builtin_neon_vmaxv_f32(__a); }
8620__ai uint8_t vmaxv_u8(uint8x8_t __a) {
8621 return (uint8_t)__builtin_neon_vmaxv_u8((int8x8_t)__a); }
8622__ai uint16_t vmaxv_u16(uint16x4_t __a) {
8623 return (uint16_t)__builtin_neon_vmaxv_u16((int16x4_t)__a); }
8624__ai uint32_t vmaxv_u32(uint32x2_t __a) {
8625 return (uint32_t)__builtin_neon_vmaxv_u32((int32x2_t)__a); }
8626__ai int8_t vmaxvq_s8(int8x16_t __a) {
8627 return (int8_t)__builtin_neon_vmaxvq_s8(__a); }
8628__ai int16_t vmaxvq_s16(int16x8_t __a) {
8629 return (int16_t)__builtin_neon_vmaxvq_s16(__a); }
8630__ai int32_t vmaxvq_s32(int32x4_t __a) {
8631 return (int32_t)__builtin_neon_vmaxvq_s32(__a); }
8632__ai uint8_t vmaxvq_u8(uint8x16_t __a) {
8633 return (uint8_t)__builtin_neon_vmaxvq_u8((int8x16_t)__a); }
8634__ai uint16_t vmaxvq_u16(uint16x8_t __a) {
8635 return (uint16_t)__builtin_neon_vmaxvq_u16((int16x8_t)__a); }
8636__ai uint32_t vmaxvq_u32(uint32x4_t __a) {
8637 return (uint32_t)__builtin_neon_vmaxvq_u32((int32x4_t)__a); }
8638__ai float32_t vmaxvq_f32(float32x4_t __a) {
8639 return (float32_t)__builtin_neon_vmaxvq_f32(__a); }
8640__ai float64_t vmaxvq_f64(float64x2_t __a) {
8641 return (float64_t)__builtin_neon_vmaxvq_f64(__a); }
8642
8643__ai int8_t vminv_s8(int8x8_t __a) {
8644 return (int8_t)__builtin_neon_vminv_s8(__a); }
8645__ai int16_t vminv_s16(int16x4_t __a) {
8646 return (int16_t)__builtin_neon_vminv_s16(__a); }
8647__ai int32_t vminv_s32(int32x2_t __a) {
8648 return (int32_t)__builtin_neon_vminv_s32(__a); }
8649__ai float32_t vminv_f32(float32x2_t __a) {
8650 return (float32_t)__builtin_neon_vminv_f32(__a); }
8651__ai uint8_t vminv_u8(uint8x8_t __a) {
8652 return (uint8_t)__builtin_neon_vminv_u8((int8x8_t)__a); }
8653__ai uint16_t vminv_u16(uint16x4_t __a) {
8654 return (uint16_t)__builtin_neon_vminv_u16((int16x4_t)__a); }
8655__ai uint32_t vminv_u32(uint32x2_t __a) {
8656 return (uint32_t)__builtin_neon_vminv_u32((int32x2_t)__a); }
8657__ai int8_t vminvq_s8(int8x16_t __a) {
8658 return (int8_t)__builtin_neon_vminvq_s8(__a); }
8659__ai int16_t vminvq_s16(int16x8_t __a) {
8660 return (int16_t)__builtin_neon_vminvq_s16(__a); }
8661__ai int32_t vminvq_s32(int32x4_t __a) {
8662 return (int32_t)__builtin_neon_vminvq_s32(__a); }
8663__ai uint8_t vminvq_u8(uint8x16_t __a) {
8664 return (uint8_t)__builtin_neon_vminvq_u8((int8x16_t)__a); }
8665__ai uint16_t vminvq_u16(uint16x8_t __a) {
8666 return (uint16_t)__builtin_neon_vminvq_u16((int16x8_t)__a); }
8667__ai uint32_t vminvq_u32(uint32x4_t __a) {
8668 return (uint32_t)__builtin_neon_vminvq_u32((int32x4_t)__a); }
8669__ai float32_t vminvq_f32(float32x4_t __a) {
8670 return (float32_t)__builtin_neon_vminvq_f32(__a); }
8671__ai float64_t vminvq_f64(float64x2_t __a) {
8672 return (float64_t)__builtin_neon_vminvq_f64(__a); }
8673
8674__ai int16x8_t vmlal_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) {
8675 return vmlal_s8(__a, vget_high_s8(__b), vget_high_s8(__c)); }
8676__ai int32x4_t vmlal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) {
8677 return vmlal_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); }
8678__ai int64x2_t vmlal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) {
8679 return vmlal_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); }
8680__ai uint16x8_t vmlal_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) {
8681 return vmlal_u8(__a, vget_high_u8(__b), vget_high_u8(__c)); }
8682__ai uint32x4_t vmlal_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) {
8683 return vmlal_u16(__a, vget_high_u16(__b), vget_high_u16(__c)); }
8684__ai uint64x2_t vmlal_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) {
8685 return vmlal_u32(__a, vget_high_u32(__b), vget_high_u32(__c)); }
8686
8687#define vmlal_high_lane_s16(a, b, c, __d) __extension__ ({ \
8688 int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
8689 __a + vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8690#define vmlal_high_lane_s32(a, b, c, __d) __extension__ ({ \
8691 int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
8692 __a + vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8693#define vmlal_high_lane_u16(a, b, c, __d) __extension__ ({ \
8694 uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \
8695 __a + vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8696#define vmlal_high_lane_u32(a, b, c, __d) __extension__ ({ \
8697 uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \
8698 __a + vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8699
8700#define vmlal_high_laneq_s16(a, b, c, __d) __extension__ ({ \
8701 int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \
8702 __a + vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8703#define vmlal_high_laneq_s32(a, b, c, __d) __extension__ ({ \
8704 int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \
8705 __a + vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8706#define vmlal_high_laneq_u16(a, b, c, __d) __extension__ ({ \
8707 uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \
8708 __a + vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8709#define vmlal_high_laneq_u32(a, b, c, __d) __extension__ ({ \
8710 uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \
8711 __a + vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8712
8713__ai int32x4_t vmlal_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) {
8714 return vmlal_n_s16(__a, vget_high_s16(__b), __c); }
8715__ai int64x2_t vmlal_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) {
8716 return vmlal_n_s32(__a, vget_high_s32(__b), __c); }
8717__ai uint32x4_t vmlal_high_n_u16(uint32x4_t __a, uint16x8_t __b, uint16_t __c) {
8718 return vmlal_n_u16(__a, vget_high_u16(__b), __c); }
8719__ai uint64x2_t vmlal_high_n_u32(uint64x2_t __a, uint32x4_t __b, uint32_t __c) {
8720 return vmlal_n_u32(__a, vget_high_u32(__b), __c); }
8721
8722#define vmlal_laneq_s16(a, b, c, __d) __extension__ ({ \
8723 int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \
8724 __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8725#define vmlal_laneq_s32(a, b, c, __d) __extension__ ({ \
8726 int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \
8727 __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
8728#define vmlal_laneq_u16(a, b, c, __d) __extension__ ({ \
8729 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \
8730 __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8731#define vmlal_laneq_u32(a, b, c, __d) __extension__ ({ \
8732 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \
8733 __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
8734
8735#define vmla_laneq_s16(a, b, c, __d) __extension__ ({ \
8736 int16x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \
8737 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8738#define vmla_laneq_s32(a, b, c, __d) __extension__ ({ \
8739 int32x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \
8740 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
8741#define vmla_laneq_u16(a, b, c, __d) __extension__ ({ \
8742 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \
8743 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8744#define vmla_laneq_u32(a, b, c, __d) __extension__ ({ \
8745 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \
8746 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
8747#define vmla_laneq_f32(a, b, c, __d) __extension__ ({ \
8748 float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \
8749 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
8750#define vmlaq_laneq_s16(a, b, c, __d) __extension__ ({ \
8751 int16x8_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \
8752 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
8753#define vmlaq_laneq_s32(a, b, c, __d) __extension__ ({ \
8754 int32x4_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \
8755 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8756#define vmlaq_laneq_u16(a, b, c, __d) __extension__ ({ \
8757 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \
8758 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
8759#define vmlaq_laneq_u32(a, b, c, __d) __extension__ ({ \
8760 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \
8761 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8762#define vmlaq_laneq_f32(a, b, c, __d) __extension__ ({ \
8763 float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \
8764 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8765
8766__ai int16x8_t vmlsl_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) {
8767 return vmlsl_s8(__a, vget_high_s8(__b), vget_high_s8(__c)); }
8768__ai int32x4_t vmlsl_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) {
8769 return vmlsl_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); }
8770__ai int64x2_t vmlsl_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) {
8771 return vmlsl_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); }
8772__ai uint16x8_t vmlsl_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) {
8773 return vmlsl_u8(__a, vget_high_u8(__b), vget_high_u8(__c)); }
8774__ai uint32x4_t vmlsl_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) {
8775 return vmlsl_u16(__a, vget_high_u16(__b), vget_high_u16(__c)); }
8776__ai uint64x2_t vmlsl_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) {
8777 return vmlsl_u32(__a, vget_high_u32(__b), vget_high_u32(__c)); }
8778
8779#define vmlsl_high_lane_s16(a, b, c, __d) __extension__ ({ \
8780 int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
8781 __a - vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8782#define vmlsl_high_lane_s32(a, b, c, __d) __extension__ ({ \
8783 int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
8784 __a - vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8785#define vmlsl_high_lane_u16(a, b, c, __d) __extension__ ({ \
8786 uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \
8787 __a - vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8788#define vmlsl_high_lane_u32(a, b, c, __d) __extension__ ({ \
8789 uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \
8790 __a - vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8791
8792#define vmlsl_high_laneq_s16(a, b, c, __d) __extension__ ({ \
8793 int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \
8794 __a - vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8795#define vmlsl_high_laneq_s32(a, b, c, __d) __extension__ ({ \
8796 int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \
8797 __a - vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8798#define vmlsl_high_laneq_u16(a, b, c, __d) __extension__ ({ \
8799 uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \
8800 __a - vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8801#define vmlsl_high_laneq_u32(a, b, c, __d) __extension__ ({ \
8802 uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \
8803 __a - vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8804
8805__ai int32x4_t vmlsl_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) {
8806 return vmlsl_n_s16(__a, vget_high_s16(__b), __c); }
8807__ai int64x2_t vmlsl_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) {
8808 return vmlsl_n_s32(__a, vget_high_s32(__b), __c); }
8809__ai uint32x4_t vmlsl_high_n_u16(uint32x4_t __a, uint16x8_t __b, uint16_t __c) {
8810 return vmlsl_n_u16(__a, vget_high_u16(__b), __c); }
8811__ai uint64x2_t vmlsl_high_n_u32(uint64x2_t __a, uint32x4_t __b, uint32_t __c) {
8812 return vmlsl_n_u32(__a, vget_high_u32(__b), __c); }
8813
8814#define vmlsl_laneq_s16(a, b, c, __d) __extension__ ({ \
8815 int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \
8816 __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8817#define vmlsl_laneq_s32(a, b, c, __d) __extension__ ({ \
8818 int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \
8819 __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
8820#define vmlsl_laneq_u16(a, b, c, __d) __extension__ ({ \
8821 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \
8822 __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8823#define vmlsl_laneq_u32(a, b, c, __d) __extension__ ({ \
8824 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \
8825 __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
8826
8827#define vmls_laneq_s16(a, b, c, __d) __extension__ ({ \
8828 int16x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \
8829 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8830#define vmls_laneq_s32(a, b, c, __d) __extension__ ({ \
8831 int32x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \
8832 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
8833#define vmls_laneq_u16(a, b, c, __d) __extension__ ({ \
8834 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \
8835 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8836#define vmls_laneq_u32(a, b, c, __d) __extension__ ({ \
8837 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \
8838 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
8839#define vmls_laneq_f32(a, b, c, __d) __extension__ ({ \
8840 float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \
8841 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
8842#define vmlsq_laneq_s16(a, b, c, __d) __extension__ ({ \
8843 int16x8_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \
8844 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
8845#define vmlsq_laneq_s32(a, b, c, __d) __extension__ ({ \
8846 int32x4_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \
8847 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8848#define vmlsq_laneq_u16(a, b, c, __d) __extension__ ({ \
8849 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \
8850 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); })
8851#define vmlsq_laneq_u32(a, b, c, __d) __extension__ ({ \
8852 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \
8853 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8854#define vmlsq_laneq_f32(a, b, c, __d) __extension__ ({ \
8855 float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \
8856 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8857
8858
8859
8860#define vmull_high_lane_s16(a, b, __c) __extension__ ({ \
8861 int16x8_t __a = (a); int16x4_t __b = (b); \
8862 vmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8863#define vmull_high_lane_s32(a, b, __c) __extension__ ({ \
8864 int32x4_t __a = (a); int32x2_t __b = (b); \
8865 vmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); })
8866#define vmull_high_lane_u16(a, b, __c) __extension__ ({ \
8867 uint16x8_t __a = (a); uint16x4_t __b = (b); \
8868 vmull_u16(vget_high_u16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8869#define vmull_high_lane_u32(a, b, __c) __extension__ ({ \
8870 uint32x4_t __a = (a); uint32x2_t __b = (b); \
8871 vmull_u32(vget_high_u32(__a), __builtin_shufflevector(__b, __b, __c, __c)); })
8872
8873#define vmull_high_laneq_s16(a, b, __c) __extension__ ({ \
8874 int16x8_t __a = (a); int16x8_t __b = (b); \
8875 vmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8876#define vmull_high_laneq_s32(a, b, __c) __extension__ ({ \
8877 int32x4_t __a = (a); int32x4_t __b = (b); \
8878 vmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); })
8879#define vmull_high_laneq_u16(a, b, __c) __extension__ ({ \
8880 uint16x8_t __a = (a); uint16x8_t __b = (b); \
8881 vmull_u16(vget_high_u16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8882#define vmull_high_laneq_u32(a, b, __c) __extension__ ({ \
8883 uint32x4_t __a = (a); uint32x4_t __b = (b); \
8884 vmull_u32(vget_high_u32(__a), __builtin_shufflevector(__b, __b, __c, __c)); })
8885
8886__ai int32x4_t vmull_high_n_s16(int16x8_t __a, int16_t __b) {
8887 return vmull_n_s16(vget_high_s16(__a), __b); }
8888__ai int64x2_t vmull_high_n_s32(int32x4_t __a, int32_t __b) {
8889 return vmull_n_s32(vget_high_s32(__a), __b); }
8890__ai uint32x4_t vmull_high_n_u16(uint16x8_t __a, uint16_t __b) {
8891 return vmull_n_u16(vget_high_u16(__a), __b); }
8892__ai uint64x2_t vmull_high_n_u32(uint32x4_t __a, uint32_t __b) {
8893 return vmull_n_u32(vget_high_u32(__a), __b); }
8894
8895#define vmull_laneq_s16(a, b, __c) __extension__ ({ \
8896 int16x4_t __a = (a); int16x8_t __b = (b); \
8897 vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8898#define vmull_laneq_s32(a, b, __c) __extension__ ({ \
8899 int32x2_t __a = (a); int32x4_t __b = (b); \
8900 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
8901#define vmull_laneq_u16(a, b, __c) __extension__ ({ \
8902 uint16x4_t __a = (a); uint16x8_t __b = (b); \
8903 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8904#define vmull_laneq_u32(a, b, __c) __extension__ ({ \
8905 uint32x2_t __a = (a); uint32x4_t __b = (b); \
8906 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
8907
8908#define vmulx_lane_f32(a, b, __c) __extension__ ({ \
8909 float32x2_t __a = (a); float32x2_t __b = (b); \
8910 vmulx_f32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
8911#define vmulxq_lane_f32(a, b, __c) __extension__ ({ \
8912 float32x4_t __a = (a); float32x2_t __b = (b); \
8913 vmulxq_f32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8914#define vmulxq_lane_f64(a, b, __c) __extension__ ({ \
8915 float64x2_t __a = (a); float64x1_t __b = (b); \
8916 vmulxq_f64(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
8917
8918#define vmulx_laneq_f32(a, b, __c) __extension__ ({ \
8919 float32x2_t __a = (a); float32x4_t __b = (b); \
8920 vmulx_f32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
8921#define vmulxq_laneq_f32(a, b, __c) __extension__ ({ \
8922 float32x4_t __a = (a); float32x4_t __b = (b); \
8923 vmulxq_f32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
8924#define vmulxq_laneq_f64(a, b, __c) __extension__ ({ \
8925 float64x2_t __a = (a); float64x2_t __b = (b); \
8926 vmulxq_f64(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
8927
8928#define vmul_laneq_s16(a, b, __c) __extension__ ({ \
8929 int16x4_t __a = (a); int16x8_t __b = (b); \
8930 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
8931#define vmul_laneq_s32(a, b, __c) __extension__ ({ \
8932 int32x2_t __a = (a); int32x4_t __b = (b); \
8933 __a * __builtin_shufflevector(__b, __b, __c, __c); })
8934#define vmul_laneq_f32(a, b, __c) __extension__ ({ \
8935 float32x2_t __a = (a); float32x4_t __b = (b); \
8936 __a * __builtin_shufflevector(__b, __b, __c, __c); })
8937#define vmul_laneq_u16(a, b, __c) __extension__ ({ \
8938 uint16x4_t __a = (a); uint16x8_t __b = (b); \
8939 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
8940#define vmul_laneq_u32(a, b, __c) __extension__ ({ \
8941 uint32x2_t __a = (a); uint32x4_t __b = (b); \
8942 __a * __builtin_shufflevector(__b, __b, __c, __c); })
8943#define vmulq_laneq_s16(a, b, __c) __extension__ ({ \
8944 int16x8_t __a = (a); int16x8_t __b = (b); \
8945 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); })
8946#define vmulq_laneq_s32(a, b, __c) __extension__ ({ \
8947 int32x4_t __a = (a); int32x4_t __b = (b); \
8948 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
8949#define vmulq_laneq_f32(a, b, __c) __extension__ ({ \
8950 float32x4_t __a = (a); float32x4_t __b = (b); \
8951 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
8952#define vmulq_laneq_u16(a, b, __c) __extension__ ({ \
8953 uint16x8_t __a = (a); uint16x8_t __b = (b); \
8954 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); })
8955#define vmulq_laneq_u32(a, b, __c) __extension__ ({ \
8956 uint32x4_t __a = (a); uint32x4_t __b = (b); \
8957 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
8958#define vmulq_laneq_f64(a, b, __c) __extension__ ({ \
8959 float64x2_t __a = (a); float64x2_t __b = (b); \
8960 __a * __builtin_shufflevector(__b, __b, __c, __c); })
8961
8962#define vmulq_lane_f64(a, b, __c) __extension__ ({ \
8963 float64x2_t __a = (a); float64x1_t __b = (b); \
8964 __a * __builtin_shufflevector(__b, __b, __c, __c); })
8965
8966__ai float64x2_t vmulq_n_f64(float64x2_t __a, float64_t __b) {
8967 return __a * (float64x2_t){ __b, __b }; }
8968
8969__ai int32x4_t vqdmlal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) {
8970 return vqdmlal_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); }
8971__ai int64x2_t vqdmlal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) {
8972 return vqdmlal_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); }
8973
8974#define vqdmlal_high_lane_s16(a, b, c, __d) __extension__ ({ \
8975 int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
8976 vqdmlal_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8977#define vqdmlal_high_lane_s32(a, b, c, __d) __extension__ ({ \
8978 int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
8979 vqdmlal_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8980
8981#define vqdmlal_high_laneq_s16(a, b, c, __d) __extension__ ({ \
8982 int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \
8983 vqdmlal_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8984#define vqdmlal_high_laneq_s32(a, b, c, __d) __extension__ ({ \
8985 int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \
8986 vqdmlal_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
8987
8988__ai int32x4_t vqdmlal_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) {
8989 return vqdmlal_n_s16(__a, vget_high_s16(__b), __c); }
8990__ai int64x2_t vqdmlal_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) {
8991 return vqdmlal_n_s32(__a, vget_high_s32(__b), __c); }
8992
8993#define vqdmlal_laneq_s16(a, b, c, __d) __extension__ ({ \
8994 int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \
8995 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
8996#define vqdmlal_laneq_s32(a, b, c, __d) __extension__ ({ \
8997 int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \
8998 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
8999
9000__ai int32x4_t vqdmlsl_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) {
9001 return vqdmlsl_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); }
9002__ai int64x2_t vqdmlsl_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) {
9003 return vqdmlsl_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); }
9004
9005#define vqdmlsl_high_lane_s16(a, b, c, __d) __extension__ ({ \
9006 int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
9007 vqdmlsl_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
9008#define vqdmlsl_high_lane_s32(a, b, c, __d) __extension__ ({ \
9009 int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
9010 vqdmlsl_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
9011
9012#define vqdmlsl_high_laneq_s16(a, b, c, __d) __extension__ ({ \
9013 int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \
9014 vqdmlsl_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
9015#define vqdmlsl_high_laneq_s32(a, b, c, __d) __extension__ ({ \
9016 int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \
9017 vqdmlsl_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); })
9018
9019__ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) {
9020 return vqdmlsl_n_s16(__a, vget_high_s16(__b), __c); }
9021__ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) {
9022 return vqdmlsl_n_s32(__a, vget_high_s32(__b), __c); }
9023
9024#define vqdmlsl_laneq_s16(a, b, c, __d) __extension__ ({ \
9025 int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \
9026 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
9027#define vqdmlsl_laneq_s32(a, b, c, __d) __extension__ ({ \
9028 int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \
9029 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
9030
9031#define vqdmulh_laneq_s16(a, b, __c) __extension__ ({ \
9032 int16x4_t __a = (a); int16x8_t __b = (b); \
9033 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9034#define vqdmulh_laneq_s32(a, b, __c) __extension__ ({ \
9035 int32x2_t __a = (a); int32x4_t __b = (b); \
9036 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
9037#define vqdmulhq_laneq_s16(a, b, __c) __extension__ ({ \
9038 int16x8_t __a = (a); int16x8_t __b = (b); \
9039 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); })
9040#define vqdmulhq_laneq_s32(a, b, __c) __extension__ ({ \
9041 int32x4_t __a = (a); int32x4_t __b = (b); \
9042 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9043
9044__ai int32x4_t vqdmull_high_s16(int16x8_t __a, int16x8_t __b) {
9045 return vqdmull_s16(vget_high_s16(__a), vget_high_s16(__b)); }
9046__ai int64x2_t vqdmull_high_s32(int32x4_t __a, int32x4_t __b) {
9047 return vqdmull_s32(vget_high_s32(__a), vget_high_s32(__b)); }
9048
9049#define vqdmull_high_lane_s16(a, b, __c) __extension__ ({ \
9050 int16x8_t __a = (a); int16x4_t __b = (b); \
9051 vqdmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9052#define vqdmull_high_lane_s32(a, b, __c) __extension__ ({ \
9053 int32x4_t __a = (a); int32x2_t __b = (b); \
9054 vqdmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); })
9055
9056#define vqdmull_high_laneq_s16(a, b, __c) __extension__ ({ \
9057 int16x8_t __a = (a); int16x8_t __b = (b); \
9058 vqdmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9059#define vqdmull_high_laneq_s32(a, b, __c) __extension__ ({ \
9060 int32x4_t __a = (a); int32x4_t __b = (b); \
9061 vqdmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); })
9062
9063__ai int32x4_t vqdmull_high_n_s16(int16x8_t __a, int16_t __b) {
9064 return vqdmull_n_s16(vget_high_s16(__a), __b); }
9065__ai int64x2_t vqdmull_high_n_s32(int32x4_t __a, int32_t __b) {
9066 return vqdmull_n_s32(vget_high_s32(__a), __b); }
9067
9068#define vqdmull_laneq_s16(a, b, __c) __extension__ ({ \
9069 int16x4_t __a = (a); int16x8_t __b = (b); \
9070 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9071#define vqdmull_laneq_s32(a, b, __c) __extension__ ({ \
9072 int32x2_t __a = (a); int32x4_t __b = (b); \
9073 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
9074
9075#define vqrdmulh_laneq_s16(a, b, __c) __extension__ ({ \
9076 int16x4_t __a = (a); int16x8_t __b = (b); \
9077 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9078#define vqrdmulh_laneq_s32(a, b, __c) __extension__ ({ \
9079 int32x2_t __a = (a); int32x4_t __b = (b); \
9080 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
9081#define vqrdmulhq_laneq_s16(a, b, __c) __extension__ ({ \
9082 int16x8_t __a = (a); int16x8_t __b = (b); \
9083 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); })
9084#define vqrdmulhq_laneq_s32(a, b, __c) __extension__ ({ \
9085 int32x4_t __a = (a); int32x4_t __b = (b); \
9086 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
9087
9088__ai uint8x8_t vqtbl1_u8(uint8x16_t __a, uint8x8_t __b) {
9089 return (uint8x8_t)__builtin_neon_vqtbl1_v((int8x16_t)__a, (int8x8_t)__b, 16); }
9090__ai int8x8_t vqtbl1_s8(int8x16_t __a, int8x8_t __b) {
9091 return (int8x8_t)__builtin_neon_vqtbl1_v(__a, __b, 0); }
9092__ai poly8x8_t vqtbl1_p8(poly8x16_t __a, uint8x8_t __b) {
9093 return (poly8x8_t)__builtin_neon_vqtbl1_v((int8x16_t)__a, (int8x8_t)__b, 4); }
9094__ai uint8x16_t vqtbl1q_u8(uint8x16_t __a, uint8x16_t __b) {
9095 return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__a, (int8x16_t)__b, 48); }
9096__ai int8x16_t vqtbl1q_s8(int8x16_t __a, int8x16_t __b) {
9097 return (int8x16_t)__builtin_neon_vqtbl1q_v(__a, __b, 32); }
9098__ai poly8x16_t vqtbl1q_p8(poly8x16_t __a, uint8x16_t __b) {
9099 return (poly8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__a, (int8x16_t)__b, 36); }
9100
9101__ai uint8x8_t vqtbl2_u8(uint8x16x2_t __a, uint8x8_t __b) {
9102 return (uint8x8_t)__builtin_neon_vqtbl2_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x8_t)__b, 16); }
9103__ai int8x8_t vqtbl2_s8(int8x16x2_t __a, int8x8_t __b) {
9104 return (int8x8_t)__builtin_neon_vqtbl2_v(__a.val[0], __a.val[1], __b, 0); }
9105__ai poly8x8_t vqtbl2_p8(poly8x16x2_t __a, uint8x8_t __b) {
9106 return (poly8x8_t)__builtin_neon_vqtbl2_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x8_t)__b, 4); }
9107__ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __a, uint8x16_t __b) {
9108 return (uint8x16_t)__builtin_neon_vqtbl2q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__b, 48); }
9109__ai int8x16_t vqtbl2q_s8(int8x16x2_t __a, int8x16_t __b) {
9110 return (int8x16_t)__builtin_neon_vqtbl2q_v(__a.val[0], __a.val[1], __b, 32); }
9111__ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __a, uint8x16_t __b) {
9112 return (poly8x16_t)__builtin_neon_vqtbl2q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__b, 36); }
9113
9114__ai uint8x8_t vqtbl3_u8(uint8x16x3_t __a, uint8x8_t __b) {
9115 return (uint8x8_t)__builtin_neon_vqtbl3_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x8_t)__b, 16); }
9116__ai int8x8_t vqtbl3_s8(int8x16x3_t __a, int8x8_t __b) {
9117 return (int8x8_t)__builtin_neon_vqtbl3_v(__a.val[0], __a.val[1], __a.val[2], __b, 0); }
9118__ai poly8x8_t vqtbl3_p8(poly8x16x3_t __a, uint8x8_t __b) {
9119 return (poly8x8_t)__builtin_neon_vqtbl3_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x8_t)__b, 4); }
9120__ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __a, uint8x16_t __b) {
9121 return (uint8x16_t)__builtin_neon_vqtbl3q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__b, 48); }
9122__ai int8x16_t vqtbl3q_s8(int8x16x3_t __a, int8x16_t __b) {
9123 return (int8x16_t)__builtin_neon_vqtbl3q_v(__a.val[0], __a.val[1], __a.val[2], __b, 32); }
9124__ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __a, uint8x16_t __b) {
9125 return (poly8x16_t)__builtin_neon_vqtbl3q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__b, 36); }
9126
9127__ai uint8x8_t vqtbl4_u8(uint8x16x4_t __a, uint8x8_t __b) {
9128 return (uint8x8_t)__builtin_neon_vqtbl4_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x8_t)__b, 16); }
9129__ai int8x8_t vqtbl4_s8(int8x16x4_t __a, int8x8_t __b) {
9130 return (int8x8_t)__builtin_neon_vqtbl4_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 0); }
9131__ai poly8x8_t vqtbl4_p8(poly8x16x4_t __a, uint8x8_t __b) {
9132 return (poly8x8_t)__builtin_neon_vqtbl4_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x8_t)__b, 4); }
9133__ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __a, uint8x16_t __b) {
9134 return (uint8x16_t)__builtin_neon_vqtbl4q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x16_t)__b, 48); }
9135__ai int8x16_t vqtbl4q_s8(int8x16x4_t __a, int8x16_t __b) {
9136 return (int8x16_t)__builtin_neon_vqtbl4q_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 32); }
9137__ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __a, uint8x16_t __b) {
9138 return (poly8x16_t)__builtin_neon_vqtbl4q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x16_t)__b, 36); }
9139
9140__ai uint8x8_t vqtbx1_u8(uint8x8_t __a, uint8x16_t __b, uint8x8_t __c) {
9141 return (uint8x8_t)__builtin_neon_vqtbx1_v((int8x8_t)__a, (int8x16_t)__b, (int8x8_t)__c, 16); }
9142__ai int8x8_t vqtbx1_s8(int8x8_t __a, int8x16_t __b, int8x8_t __c) {
9143 return (int8x8_t)__builtin_neon_vqtbx1_v(__a, __b, __c, 0); }
9144__ai poly8x8_t vqtbx1_p8(poly8x8_t __a, poly8x16_t __b, uint8x8_t __c) {
9145 return (poly8x8_t)__builtin_neon_vqtbx1_v((int8x8_t)__a, (int8x16_t)__b, (int8x8_t)__c, 4); }
9146__ai uint8x16_t vqtbx1q_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
9147 return (uint8x16_t)__builtin_neon_vqtbx1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); }
9148__ai int8x16_t vqtbx1q_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
9149 return (int8x16_t)__builtin_neon_vqtbx1q_v(__a, __b, __c, 32); }
9150__ai poly8x16_t vqtbx1q_p8(poly8x16_t __a, poly8x16_t __b, uint8x16_t __c) {
9151 return (poly8x16_t)__builtin_neon_vqtbx1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 36); }
9152
9153__ai uint8x8_t vqtbx2_u8(uint8x8_t __a, uint8x16x2_t __b, uint8x8_t __c) {
9154 return (uint8x8_t)__builtin_neon_vqtbx2_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x8_t)__c, 16); }
9155__ai int8x8_t vqtbx2_s8(int8x8_t __a, int8x16x2_t __b, int8x8_t __c) {
9156 return (int8x8_t)__builtin_neon_vqtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); }
9157__ai poly8x8_t vqtbx2_p8(poly8x8_t __a, poly8x16x2_t __b, uint8x8_t __c) {
9158 return (poly8x8_t)__builtin_neon_vqtbx2_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x8_t)__c, 4); }
9159__ai uint8x16_t vqtbx2q_u8(uint8x16_t __a, uint8x16x2_t __b, uint8x16_t __c) {
9160 return (uint8x16_t)__builtin_neon_vqtbx2q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__c, 48); }
9161__ai int8x16_t vqtbx2q_s8(int8x16_t __a, int8x16x2_t __b, int8x16_t __c) {
9162 return (int8x16_t)__builtin_neon_vqtbx2q_v(__a, __b.val[0], __b.val[1], __c, 32); }
9163__ai poly8x16_t vqtbx2q_p8(poly8x16_t __a, poly8x16x2_t __b, uint8x16_t __c) {
9164 return (poly8x16_t)__builtin_neon_vqtbx2q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__c, 36); }
9165
9166__ai uint8x8_t vqtbx3_u8(uint8x8_t __a, uint8x16x3_t __b, uint8x8_t __c) {
9167 return (uint8x8_t)__builtin_neon_vqtbx3_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x8_t)__c, 16); }
9168__ai int8x8_t vqtbx3_s8(int8x8_t __a, int8x16x3_t __b, int8x8_t __c) {
9169 return (int8x8_t)__builtin_neon_vqtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); }
9170__ai poly8x8_t vqtbx3_p8(poly8x8_t __a, poly8x16x3_t __b, uint8x8_t __c) {
9171 return (poly8x8_t)__builtin_neon_vqtbx3_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x8_t)__c, 4); }
9172__ai uint8x16_t vqtbx3q_u8(uint8x16_t __a, uint8x16x3_t __b, uint8x16_t __c) {
9173 return (uint8x16_t)__builtin_neon_vqtbx3q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__c, 48); }
9174__ai int8x16_t vqtbx3q_s8(int8x16_t __a, int8x16x3_t __b, int8x16_t __c) {
9175 return (int8x16_t)__builtin_neon_vqtbx3q_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 32); }
9176__ai poly8x16_t vqtbx3q_p8(poly8x16_t __a, poly8x16x3_t __b, uint8x16_t __c) {
9177 return (poly8x16_t)__builtin_neon_vqtbx3q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__c, 36); }
9178
9179__ai uint8x8_t vqtbx4_u8(uint8x8_t __a, uint8x16x4_t __b, uint8x8_t __c) {
9180 return (uint8x8_t)__builtin_neon_vqtbx4_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x8_t)__c, 16); }
9181__ai int8x8_t vqtbx4_s8(int8x8_t __a, int8x16x4_t __b, int8x8_t __c) {
9182 return (int8x8_t)__builtin_neon_vqtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); }
9183__ai poly8x8_t vqtbx4_p8(poly8x8_t __a, poly8x16x4_t __b, uint8x8_t __c) {
9184 return (poly8x8_t)__builtin_neon_vqtbx4_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x8_t)__c, 4); }
9185__ai uint8x16_t vqtbx4q_u8(uint8x16_t __a, uint8x16x4_t __b, uint8x16_t __c) {
9186 return (uint8x16_t)__builtin_neon_vqtbx4q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x16_t)__c, 48); }
9187__ai int8x16_t vqtbx4q_s8(int8x16_t __a, int8x16x4_t __b, int8x16_t __c) {
9188 return (int8x16_t)__builtin_neon_vqtbx4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 32); }
9189__ai poly8x16_t vqtbx4q_p8(poly8x16_t __a, poly8x16x4_t __b, uint8x16_t __c) {
9190 return (poly8x16_t)__builtin_neon_vqtbx4q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x16_t)__c, 36); }
9191
9192__ai int8x16_t vraddhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) {
9193 return vcombine_s8(__a, vraddhn_s16(__b, __c)); }
9194__ai int16x8_t vraddhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) {
9195 return vcombine_s16(__a, vraddhn_s32(__b, __c)); }
9196__ai int32x4_t vraddhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) {
9197 return vcombine_s32(__a, vraddhn_s64(__b, __c)); }
9198__ai uint8x16_t vraddhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) {
9199 return vcombine_u8(__a, vraddhn_u16(__b, __c)); }
9200__ai uint16x8_t vraddhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9201 return vcombine_u16(__a, vraddhn_u32(__b, __c)); }
9202__ai uint32x4_t vraddhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) {
9203 return vcombine_u32(__a, vraddhn_u64(__b, __c)); }
9204
9205__ai int8x16_t vrsubhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) {
9206 return vcombine_s8(__a, vrsubhn_s16(__b, __c)); }
9207__ai int16x8_t vrsubhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) {
9208 return vcombine_s16(__a, vrsubhn_s32(__b, __c)); }
9209__ai int32x4_t vrsubhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) {
9210 return vcombine_s32(__a, vrsubhn_s64(__b, __c)); }
9211__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) {
9212 return vcombine_u8(__a, vrsubhn_u16(__b, __c)); }
9213__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9214 return vcombine_u16(__a, vrsubhn_u32(__b, __c)); }
9215__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) {
9216 return vcombine_u32(__a, vrsubhn_u64(__b, __c)); }
9217
9218__ai int8x16_t vsubhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) {
9219 return vcombine_s8(__a, vsubhn_s16(__b, __c)); }
9220__ai int16x8_t vsubhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) {
9221 return vcombine_s16(__a, vsubhn_s32(__b, __c)); }
9222__ai int32x4_t vsubhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) {
9223 return vcombine_s32(__a, vsubhn_s64(__b, __c)); }
9224__ai uint8x16_t vsubhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) {
9225 return vcombine_u8(__a, vsubhn_u16(__b, __c)); }
9226__ai uint16x8_t vsubhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9227 return vcombine_u16(__a, vsubhn_u32(__b, __c)); }
9228__ai uint32x4_t vsubhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) {
9229 return vcombine_u32(__a, vsubhn_u64(__b, __c)); }
9230
9231__ai int16x8_t vsubl_high_s8(int8x16_t __a, int8x16_t __b) {
9232 return vmovl_high_s8(__a) - vmovl_high_s8(__b); }
9233__ai int32x4_t vsubl_high_s16(int16x8_t __a, int16x8_t __b) {
9234 return vmovl_high_s16(__a) - vmovl_high_s16(__b); }
9235__ai int64x2_t vsubl_high_s32(int32x4_t __a, int32x4_t __b) {
9236 return vmovl_high_s32(__a) - vmovl_high_s32(__b); }
9237__ai uint16x8_t vsubl_high_u8(uint8x16_t __a, uint8x16_t __b) {
9238 return vmovl_high_u8(__a) - vmovl_high_u8(__b); }
9239__ai uint32x4_t vsubl_high_u16(uint16x8_t __a, uint16x8_t __b) {
9240 return vmovl_high_u16(__a) - vmovl_high_u16(__b); }
9241__ai uint64x2_t vsubl_high_u32(uint32x4_t __a, uint32x4_t __b) {
9242 return vmovl_high_u32(__a) - vmovl_high_u32(__b); }
9243
9244__ai int16x8_t vsubw_high_s8(int16x8_t __a, int8x16_t __b) {
9245 return __a - vmovl_high_s8(__b); }
9246__ai int32x4_t vsubw_high_s16(int32x4_t __a, int16x8_t __b) {
9247 return __a - vmovl_high_s16(__b); }
9248__ai int64x2_t vsubw_high_s32(int64x2_t __a, int32x4_t __b) {
9249 return __a - vmovl_high_s32(__b); }
9250__ai uint16x8_t vsubw_high_u8(uint16x8_t __a, uint8x16_t __b) {
9251 return __a - vmovl_high_u8(__b); }
9252__ai uint32x4_t vsubw_high_u16(uint32x4_t __a, uint16x8_t __b) {
9253 return __a - vmovl_high_u16(__b); }
9254__ai uint64x2_t vsubw_high_u32(uint64x2_t __a, uint32x4_t __b) {
9255 return __a - vmovl_high_u32(__b); }
9256
9257__ai int8x8_t vtrn1_s8(int8x8_t __a, int8x8_t __b) {
9258 return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); }
9259__ai int16x4_t vtrn1_s16(int16x4_t __a, int16x4_t __b) {
9260 return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); }
9261__ai int32x2_t vtrn1_s32(int32x2_t __a, int32x2_t __b) {
9262 return __builtin_shufflevector(__a, __b, 0, 2); }
9263__ai uint8x8_t vtrn1_u8(uint8x8_t __a, uint8x8_t __b) {
9264 return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); }
9265__ai uint16x4_t vtrn1_u16(uint16x4_t __a, uint16x4_t __b) {
9266 return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); }
9267__ai uint32x2_t vtrn1_u32(uint32x2_t __a, uint32x2_t __b) {
9268 return __builtin_shufflevector(__a, __b, 0, 2); }
9269__ai float32x2_t vtrn1_f32(float32x2_t __a, float32x2_t __b) {
9270 return __builtin_shufflevector(__a, __b, 0, 2); }
9271__ai poly8x8_t vtrn1_p8(poly8x8_t __a, poly8x8_t __b) {
9272 return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); }
9273__ai poly16x4_t vtrn1_p16(poly16x4_t __a, poly16x4_t __b) {
9274 return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); }
9275__ai int8x16_t vtrn1q_s8(int8x16_t __a, int8x16_t __b) {
9276 return __builtin_shufflevector(__a, __b, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); }
9277__ai int16x8_t vtrn1q_s16(int16x8_t __a, int16x8_t __b) {
9278 return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); }
9279__ai int32x4_t vtrn1q_s32(int32x4_t __a, int32x4_t __b) {
9280 return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); }
9281__ai int64x2_t vtrn1q_s64(int64x2_t __a, int64x2_t __b) {
9282 return __builtin_shufflevector(__a, __b, 0, 2); }
9283__ai uint8x16_t vtrn1q_u8(uint8x16_t __a, uint8x16_t __b) {
9284 return __builtin_shufflevector(__a, __b, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); }
9285__ai uint16x8_t vtrn1q_u16(uint16x8_t __a, uint16x8_t __b) {
9286 return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); }
9287__ai uint32x4_t vtrn1q_u32(uint32x4_t __a, uint32x4_t __b) {
9288 return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); }
9289__ai uint64x2_t vtrn1q_u64(uint64x2_t __a, uint64x2_t __b) {
9290 return __builtin_shufflevector(__a, __b, 0, 2); }
9291__ai float32x4_t vtrn1q_f32(float32x4_t __a, float32x4_t __b) {
9292 return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); }
9293__ai float64x2_t vtrn1q_f64(float64x2_t __a, float64x2_t __b) {
9294 return __builtin_shufflevector(__a, __b, 0, 2); }
9295__ai poly8x16_t vtrn1q_p8(poly8x16_t __a, poly8x16_t __b) {
9296 return __builtin_shufflevector(__a, __b, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); }
9297__ai poly16x8_t vtrn1q_p16(poly16x8_t __a, poly16x8_t __b) {
9298 return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); }
9299__ai poly64x2_t vtrn1q_p64(poly64x2_t __a, poly64x2_t __b) {
9300 return __builtin_shufflevector(__a, __b, 0, 2); }
9301
9302__ai int8x8_t vtrn2_s8(int8x8_t __a, int8x8_t __b) {
9303 return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); }
9304__ai int16x4_t vtrn2_s16(int16x4_t __a, int16x4_t __b) {
9305 return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); }
9306__ai int32x2_t vtrn2_s32(int32x2_t __a, int32x2_t __b) {
9307 return __builtin_shufflevector(__a, __b, 1, 3); }
9308__ai uint8x8_t vtrn2_u8(uint8x8_t __a, uint8x8_t __b) {
9309 return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); }
9310__ai uint16x4_t vtrn2_u16(uint16x4_t __a, uint16x4_t __b) {
9311 return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); }
9312__ai uint32x2_t vtrn2_u32(uint32x2_t __a, uint32x2_t __b) {
9313 return __builtin_shufflevector(__a, __b, 1, 3); }
9314__ai float32x2_t vtrn2_f32(float32x2_t __a, float32x2_t __b) {
9315 return __builtin_shufflevector(__a, __b, 1, 3); }
9316__ai poly8x8_t vtrn2_p8(poly8x8_t __a, poly8x8_t __b) {
9317 return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); }
9318__ai poly16x4_t vtrn2_p16(poly16x4_t __a, poly16x4_t __b) {
9319 return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); }
9320__ai int8x16_t vtrn2q_s8(int8x16_t __a, int8x16_t __b) {
9321 return __builtin_shufflevector(__a, __b, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); }
9322__ai int16x8_t vtrn2q_s16(int16x8_t __a, int16x8_t __b) {
9323 return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); }
9324__ai int32x4_t vtrn2q_s32(int32x4_t __a, int32x4_t __b) {
9325 return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); }
9326__ai int64x2_t vtrn2q_s64(int64x2_t __a, int64x2_t __b) {
9327 return __builtin_shufflevector(__a, __b, 1, 3); }
9328__ai uint8x16_t vtrn2q_u8(uint8x16_t __a, uint8x16_t __b) {
9329 return __builtin_shufflevector(__a, __b, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); }
9330__ai uint16x8_t vtrn2q_u16(uint16x8_t __a, uint16x8_t __b) {
9331 return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); }
9332__ai uint32x4_t vtrn2q_u32(uint32x4_t __a, uint32x4_t __b) {
9333 return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); }
9334__ai uint64x2_t vtrn2q_u64(uint64x2_t __a, uint64x2_t __b) {
9335 return __builtin_shufflevector(__a, __b, 1, 3); }
9336__ai float32x4_t vtrn2q_f32(float32x4_t __a, float32x4_t __b) {
9337 return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); }
9338__ai float64x2_t vtrn2q_f64(float64x2_t __a, float64x2_t __b) {
9339 return __builtin_shufflevector(__a, __b, 1, 3); }
9340__ai poly8x16_t vtrn2q_p8(poly8x16_t __a, poly8x16_t __b) {
9341 return __builtin_shufflevector(__a, __b, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); }
9342__ai poly16x8_t vtrn2q_p16(poly16x8_t __a, poly16x8_t __b) {
9343 return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); }
9344__ai poly64x2_t vtrn2q_p64(poly64x2_t __a, poly64x2_t __b) {
9345 return __builtin_shufflevector(__a, __b, 1, 3); }
9346
9347__ai int8x8_t vuzp1_s8(int8x8_t __a, int8x8_t __b) {
9348 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); }
9349__ai int16x4_t vuzp1_s16(int16x4_t __a, int16x4_t __b) {
9350 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); }
9351__ai int32x2_t vuzp1_s32(int32x2_t __a, int32x2_t __b) {
9352 return __builtin_shufflevector(__a, __b, 0, 2); }
9353__ai uint8x8_t vuzp1_u8(uint8x8_t __a, uint8x8_t __b) {
9354 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); }
9355__ai uint16x4_t vuzp1_u16(uint16x4_t __a, uint16x4_t __b) {
9356 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); }
9357__ai uint32x2_t vuzp1_u32(uint32x2_t __a, uint32x2_t __b) {
9358 return __builtin_shufflevector(__a, __b, 0, 2); }
9359__ai float32x2_t vuzp1_f32(float32x2_t __a, float32x2_t __b) {
9360 return __builtin_shufflevector(__a, __b, 0, 2); }
9361__ai poly8x8_t vuzp1_p8(poly8x8_t __a, poly8x8_t __b) {
9362 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); }
9363__ai poly16x4_t vuzp1_p16(poly16x4_t __a, poly16x4_t __b) {
9364 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); }
9365__ai int8x16_t vuzp1q_s8(int8x16_t __a, int8x16_t __b) {
9366 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); }
9367__ai int16x8_t vuzp1q_s16(int16x8_t __a, int16x8_t __b) {
9368 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); }
9369__ai int32x4_t vuzp1q_s32(int32x4_t __a, int32x4_t __b) {
9370 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); }
9371__ai int64x2_t vuzp1q_s64(int64x2_t __a, int64x2_t __b) {
9372 return __builtin_shufflevector(__a, __b, 0, 2); }
9373__ai uint8x16_t vuzp1q_u8(uint8x16_t __a, uint8x16_t __b) {
9374 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); }
9375__ai uint16x8_t vuzp1q_u16(uint16x8_t __a, uint16x8_t __b) {
9376 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); }
9377__ai uint32x4_t vuzp1q_u32(uint32x4_t __a, uint32x4_t __b) {
9378 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); }
9379__ai uint64x2_t vuzp1q_u64(uint64x2_t __a, uint64x2_t __b) {
9380 return __builtin_shufflevector(__a, __b, 0, 2); }
9381__ai float32x4_t vuzp1q_f32(float32x4_t __a, float32x4_t __b) {
9382 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); }
9383__ai float64x2_t vuzp1q_f64(float64x2_t __a, float64x2_t __b) {
9384 return __builtin_shufflevector(__a, __b, 0, 2); }
9385__ai poly8x16_t vuzp1q_p8(poly8x16_t __a, poly8x16_t __b) {
9386 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); }
9387__ai poly16x8_t vuzp1q_p16(poly16x8_t __a, poly16x8_t __b) {
9388 return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); }
9389__ai poly64x2_t vuzp1q_p64(poly64x2_t __a, poly64x2_t __b) {
9390 return __builtin_shufflevector(__a, __b, 0, 2); }
9391
9392__ai int8x8_t vuzp2_s8(int8x8_t __a, int8x8_t __b) {
9393 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); }
9394__ai int16x4_t vuzp2_s16(int16x4_t __a, int16x4_t __b) {
9395 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); }
9396__ai int32x2_t vuzp2_s32(int32x2_t __a, int32x2_t __b) {
9397 return __builtin_shufflevector(__a, __b, 1, 3); }
9398__ai uint8x8_t vuzp2_u8(uint8x8_t __a, uint8x8_t __b) {
9399 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); }
9400__ai uint16x4_t vuzp2_u16(uint16x4_t __a, uint16x4_t __b) {
9401 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); }
9402__ai uint32x2_t vuzp2_u32(uint32x2_t __a, uint32x2_t __b) {
9403 return __builtin_shufflevector(__a, __b, 1, 3); }
9404__ai float32x2_t vuzp2_f32(float32x2_t __a, float32x2_t __b) {
9405 return __builtin_shufflevector(__a, __b, 1, 3); }
9406__ai poly8x8_t vuzp2_p8(poly8x8_t __a, poly8x8_t __b) {
9407 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); }
9408__ai poly16x4_t vuzp2_p16(poly16x4_t __a, poly16x4_t __b) {
9409 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); }
9410__ai int8x16_t vuzp2q_s8(int8x16_t __a, int8x16_t __b) {
9411 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); }
9412__ai int16x8_t vuzp2q_s16(int16x8_t __a, int16x8_t __b) {
9413 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); }
9414__ai int32x4_t vuzp2q_s32(int32x4_t __a, int32x4_t __b) {
9415 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); }
9416__ai int64x2_t vuzp2q_s64(int64x2_t __a, int64x2_t __b) {
9417 return __builtin_shufflevector(__a, __b, 1, 3); }
9418__ai uint8x16_t vuzp2q_u8(uint8x16_t __a, uint8x16_t __b) {
9419 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); }
9420__ai uint16x8_t vuzp2q_u16(uint16x8_t __a, uint16x8_t __b) {
9421 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); }
9422__ai uint32x4_t vuzp2q_u32(uint32x4_t __a, uint32x4_t __b) {
9423 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); }
9424__ai uint64x2_t vuzp2q_u64(uint64x2_t __a, uint64x2_t __b) {
9425 return __builtin_shufflevector(__a, __b, 1, 3); }
9426__ai float32x4_t vuzp2q_f32(float32x4_t __a, float32x4_t __b) {
9427 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); }
9428__ai float64x2_t vuzp2q_f64(float64x2_t __a, float64x2_t __b) {
9429 return __builtin_shufflevector(__a, __b, 1, 3); }
9430__ai poly8x16_t vuzp2q_p8(poly8x16_t __a, poly8x16_t __b) {
9431 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); }
9432__ai poly16x8_t vuzp2q_p16(poly16x8_t __a, poly16x8_t __b) {
9433 return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); }
9434__ai poly64x2_t vuzp2q_p64(poly64x2_t __a, poly64x2_t __b) {
9435 return __builtin_shufflevector(__a, __b, 1, 3); }
9436
9437__ai int8x8_t vzip1_s8(int8x8_t __a, int8x8_t __b) {
9438 return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); }
9439__ai int16x4_t vzip1_s16(int16x4_t __a, int16x4_t __b) {
9440 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); }
9441__ai int32x2_t vzip1_s32(int32x2_t __a, int32x2_t __b) {
9442 return __builtin_shufflevector(__a, __b, 0, 2); }
9443__ai uint8x8_t vzip1_u8(uint8x8_t __a, uint8x8_t __b) {
9444 return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); }
9445__ai uint16x4_t vzip1_u16(uint16x4_t __a, uint16x4_t __b) {
9446 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); }
9447__ai uint32x2_t vzip1_u32(uint32x2_t __a, uint32x2_t __b) {
9448 return __builtin_shufflevector(__a, __b, 0, 2); }
9449__ai float32x2_t vzip1_f32(float32x2_t __a, float32x2_t __b) {
9450 return __builtin_shufflevector(__a, __b, 0, 2); }
9451__ai poly8x8_t vzip1_p8(poly8x8_t __a, poly8x8_t __b) {
9452 return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); }
9453__ai poly16x4_t vzip1_p16(poly16x4_t __a, poly16x4_t __b) {
9454 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); }
9455__ai int8x16_t vzip1q_s8(int8x16_t __a, int8x16_t __b) {
9456 return __builtin_shufflevector(__a, __b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); }
9457__ai int16x8_t vzip1q_s16(int16x8_t __a, int16x8_t __b) {
9458 return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); }
9459__ai int32x4_t vzip1q_s32(int32x4_t __a, int32x4_t __b) {
9460 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); }
9461__ai int64x2_t vzip1q_s64(int64x2_t __a, int64x2_t __b) {
9462 return __builtin_shufflevector(__a, __b, 0, 2); }
9463__ai uint8x16_t vzip1q_u8(uint8x16_t __a, uint8x16_t __b) {
9464 return __builtin_shufflevector(__a, __b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); }
9465__ai uint16x8_t vzip1q_u16(uint16x8_t __a, uint16x8_t __b) {
9466 return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); }
9467__ai uint32x4_t vzip1q_u32(uint32x4_t __a, uint32x4_t __b) {
9468 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); }
9469__ai uint64x2_t vzip1q_u64(uint64x2_t __a, uint64x2_t __b) {
9470 return __builtin_shufflevector(__a, __b, 0, 2); }
9471__ai float32x4_t vzip1q_f32(float32x4_t __a, float32x4_t __b) {
9472 return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); }
9473__ai float64x2_t vzip1q_f64(float64x2_t __a, float64x2_t __b) {
9474 return __builtin_shufflevector(__a, __b, 0, 2); }
9475__ai poly8x16_t vzip1q_p8(poly8x16_t __a, poly8x16_t __b) {
9476 return __builtin_shufflevector(__a, __b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); }
9477__ai poly16x8_t vzip1q_p16(poly16x8_t __a, poly16x8_t __b) {
9478 return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); }
9479__ai poly64x2_t vzip1q_p64(poly64x2_t __a, poly64x2_t __b) {
9480 return __builtin_shufflevector(__a, __b, 0, 2); }
9481
9482__ai int8x8_t vzip2_s8(int8x8_t __a, int8x8_t __b) {
9483 return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); }
9484__ai int16x4_t vzip2_s16(int16x4_t __a, int16x4_t __b) {
9485 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); }
9486__ai int32x2_t vzip2_s32(int32x2_t __a, int32x2_t __b) {
9487 return __builtin_shufflevector(__a, __b, 1, 3); }
9488__ai uint8x8_t vzip2_u8(uint8x8_t __a, uint8x8_t __b) {
9489 return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); }
9490__ai uint16x4_t vzip2_u16(uint16x4_t __a, uint16x4_t __b) {
9491 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); }
9492__ai uint32x2_t vzip2_u32(uint32x2_t __a, uint32x2_t __b) {
9493 return __builtin_shufflevector(__a, __b, 1, 3); }
9494__ai float32x2_t vzip2_f32(float32x2_t __a, float32x2_t __b) {
9495 return __builtin_shufflevector(__a, __b, 1, 3); }
9496__ai poly8x8_t vzip2_p8(poly8x8_t __a, poly8x8_t __b) {
9497 return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); }
9498__ai poly16x4_t vzip2_p16(poly16x4_t __a, poly16x4_t __b) {
9499 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); }
9500__ai int8x16_t vzip2q_s8(int8x16_t __a, int8x16_t __b) {
9501 return __builtin_shufflevector(__a, __b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); }
9502__ai int16x8_t vzip2q_s16(int16x8_t __a, int16x8_t __b) {
9503 return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); }
9504__ai int32x4_t vzip2q_s32(int32x4_t __a, int32x4_t __b) {
9505 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); }
9506__ai int64x2_t vzip2q_s64(int64x2_t __a, int64x2_t __b) {
9507 return __builtin_shufflevector(__a, __b, 1, 3); }
9508__ai uint8x16_t vzip2q_u8(uint8x16_t __a, uint8x16_t __b) {
9509 return __builtin_shufflevector(__a, __b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); }
9510__ai uint16x8_t vzip2q_u16(uint16x8_t __a, uint16x8_t __b) {
9511 return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); }
9512__ai uint32x4_t vzip2q_u32(uint32x4_t __a, uint32x4_t __b) {
9513 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); }
9514__ai uint64x2_t vzip2q_u64(uint64x2_t __a, uint64x2_t __b) {
9515 return __builtin_shufflevector(__a, __b, 1, 3); }
9516__ai float32x4_t vzip2q_f32(float32x4_t __a, float32x4_t __b) {
9517 return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); }
9518__ai float64x2_t vzip2q_f64(float64x2_t __a, float64x2_t __b) {
9519 return __builtin_shufflevector(__a, __b, 1, 3); }
9520__ai poly8x16_t vzip2q_p8(poly8x16_t __a, poly8x16_t __b) {
9521 return __builtin_shufflevector(__a, __b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); }
9522__ai poly16x8_t vzip2q_p16(poly16x8_t __a, poly16x8_t __b) {
9523 return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); }
9524__ai poly64x2_t vzip2q_p64(poly64x2_t __a, poly64x2_t __b) {
9525 return __builtin_shufflevector(__a, __b, 1, 3); }
9526
9527__ai int8x16_t vmovn_high_s16(int8x8_t __a, int16x8_t __b) {
9528 int8x8_t __a1 = vmovn_s16(__b);
9529 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); }
9530__ai int16x8_t vmovn_high_s32(int16x4_t __a, int32x4_t __b) {
9531 int16x4_t __a1 = vmovn_s32(__b);
9532 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7); }
9533__ai int32x4_t vmovn_high_s64(int32x2_t __a, int64x2_t __b) {
9534 int32x2_t __a1 = vmovn_s64(__b);
9535 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
9536__ai uint8x16_t vmovn_high_u16(uint8x8_t __a, uint16x8_t __b) {
9537 uint8x8_t __a1 = vmovn_u16(__b);
9538 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); }
9539__ai uint16x8_t vmovn_high_u32(uint16x4_t __a, uint32x4_t __b) {
9540 uint16x4_t __a1 = vmovn_u32(__b);
9541 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3, 4, 5, 6, 7); }
9542__ai uint32x4_t vmovn_high_u64(uint32x2_t __a, uint64x2_t __b) {
9543 uint32x2_t __a1 = vmovn_u64(__b);
9544 return __builtin_shufflevector(__a, __a1, 0, 1, 2, 3); }
9545
9546#ifdef __ARM_FEATURE_CRYPTO
9547__ai uint8x16_t vaesdq_u8(uint8x16_t __a, uint8x16_t __b) {
9548 return (uint8x16_t)__builtin_neon_vaesdq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
9549
9550__ai uint8x16_t vaeseq_u8(uint8x16_t __a, uint8x16_t __b) {
9551 return (uint8x16_t)__builtin_neon_vaeseq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
9552
9553__ai uint8x16_t vaesimcq_u8(uint8x16_t __a) {
9554 return (uint8x16_t)__builtin_neon_vaesimcq_v((int8x16_t)__a, 48); }
9555
9556__ai uint8x16_t vaesmcq_u8(uint8x16_t __a) {
9557 return (uint8x16_t)__builtin_neon_vaesmcq_v((int8x16_t)__a, 48); }
9558
9559__ai uint32x4_t vsha1cq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) {
9560 return (uint32x4_t)__builtin_neon_vsha1cq_u32((int32x4_t)__a, __b, (int32x4_t)__c); }
9561
9562__ai uint32_t vsha1h_u32(uint32_t __a) {
9563 return (uint32_t)__builtin_neon_vsha1h_u32(__a); }
9564
9565__ai uint32x4_t vsha1mq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) {
9566 return (uint32x4_t)__builtin_neon_vsha1mq_u32((int32x4_t)__a, __b, (int32x4_t)__c); }
9567
9568__ai uint32x4_t vsha1pq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) {
9569 return (uint32x4_t)__builtin_neon_vsha1pq_u32((int32x4_t)__a, __b, (int32x4_t)__c); }
9570
9571__ai uint32x4_t vsha1su0q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9572 return (uint32x4_t)__builtin_neon_vsha1su0q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); }
9573
9574__ai uint32x4_t vsha1su1q_u32(uint32x4_t __a, uint32x4_t __b) {
9575 return (uint32x4_t)__builtin_neon_vsha1su1q_v((int8x16_t)__a, (int8x16_t)__b, 50); }
9576
9577__ai uint32x4_t vsha256hq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9578 return (uint32x4_t)__builtin_neon_vsha256hq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); }
9579
9580__ai uint32x4_t vsha256h2q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9581 return (uint32x4_t)__builtin_neon_vsha256h2q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); }
9582
9583__ai uint32x4_t vsha256su0q_u32(uint32x4_t __a, uint32x4_t __b) {
9584 return (uint32x4_t)__builtin_neon_vsha256su0q_v((int8x16_t)__a, (int8x16_t)__b, 50); }
9585
9586__ai uint32x4_t vsha256su1q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
9587 return (uint32x4_t)__builtin_neon_vsha256su1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); }
9588
9589#endif
9590
9591#endif
9592
9593#undef __ai
9594
9595#endif /* __ARM_NEON_H */