blob: c01669b2ca62a08037cdac060997d73ff82f5cb8 [file] [log] [blame]
Ben Cheng7823f2a2014-04-08 14:53:42 -07001/* ARM NEON intrinsics include file.
2
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27#ifndef _AARCH64_NEON_H_
28#define _AARCH64_NEON_H_
29
30#include <stdint.h>
31
32#define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33#define __AARCH64_INT64_C(__C) ((int64_t) __C)
34
35typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41typedef int64_t int64x1_t;
42typedef int32_t int32x1_t;
43typedef int16_t int16x1_t;
44typedef int8_t int8x1_t;
45typedef double float64x1_t;
46typedef __builtin_aarch64_simd_sf float32x2_t
47 __attribute__ ((__vector_size__ (8)));
48typedef __builtin_aarch64_simd_poly8 poly8x8_t
49 __attribute__ ((__vector_size__ (8)));
50typedef __builtin_aarch64_simd_poly16 poly16x4_t
51 __attribute__ ((__vector_size__ (8)));
52typedef __builtin_aarch64_simd_uqi uint8x8_t
53 __attribute__ ((__vector_size__ (8)));
54typedef __builtin_aarch64_simd_uhi uint16x4_t
55 __attribute__ ((__vector_size__ (8)));
56typedef __builtin_aarch64_simd_usi uint32x2_t
57 __attribute__ ((__vector_size__ (8)));
58typedef uint64_t uint64x1_t;
59typedef uint32_t uint32x1_t;
60typedef uint16_t uint16x1_t;
61typedef uint8_t uint8x1_t;
62typedef __builtin_aarch64_simd_qi int8x16_t
63 __attribute__ ((__vector_size__ (16)));
64typedef __builtin_aarch64_simd_hi int16x8_t
65 __attribute__ ((__vector_size__ (16)));
66typedef __builtin_aarch64_simd_si int32x4_t
67 __attribute__ ((__vector_size__ (16)));
68typedef __builtin_aarch64_simd_di int64x2_t
69 __attribute__ ((__vector_size__ (16)));
70typedef __builtin_aarch64_simd_sf float32x4_t
71 __attribute__ ((__vector_size__ (16)));
72typedef __builtin_aarch64_simd_df float64x2_t
73 __attribute__ ((__vector_size__ (16)));
74typedef __builtin_aarch64_simd_poly8 poly8x16_t
75 __attribute__ ((__vector_size__ (16)));
76typedef __builtin_aarch64_simd_poly16 poly16x8_t
77 __attribute__ ((__vector_size__ (16)));
78typedef __builtin_aarch64_simd_poly64 poly64x2_t
79 __attribute__ ((__vector_size__ (16)));
80typedef __builtin_aarch64_simd_uqi uint8x16_t
81 __attribute__ ((__vector_size__ (16)));
82typedef __builtin_aarch64_simd_uhi uint16x8_t
83 __attribute__ ((__vector_size__ (16)));
84typedef __builtin_aarch64_simd_usi uint32x4_t
85 __attribute__ ((__vector_size__ (16)));
86typedef __builtin_aarch64_simd_udi uint64x2_t
87 __attribute__ ((__vector_size__ (16)));
88
89typedef float float32_t;
90typedef double float64_t;
91typedef __builtin_aarch64_simd_poly8 poly8_t;
92typedef __builtin_aarch64_simd_poly16 poly16_t;
93typedef __builtin_aarch64_simd_poly64 poly64_t;
94typedef __builtin_aarch64_simd_poly128 poly128_t;
95
96typedef struct int8x8x2_t
97{
98 int8x8_t val[2];
99} int8x8x2_t;
100
101typedef struct int8x16x2_t
102{
103 int8x16_t val[2];
104} int8x16x2_t;
105
106typedef struct int16x4x2_t
107{
108 int16x4_t val[2];
109} int16x4x2_t;
110
111typedef struct int16x8x2_t
112{
113 int16x8_t val[2];
114} int16x8x2_t;
115
116typedef struct int32x2x2_t
117{
118 int32x2_t val[2];
119} int32x2x2_t;
120
121typedef struct int32x4x2_t
122{
123 int32x4_t val[2];
124} int32x4x2_t;
125
126typedef struct int64x1x2_t
127{
128 int64x1_t val[2];
129} int64x1x2_t;
130
131typedef struct int64x2x2_t
132{
133 int64x2_t val[2];
134} int64x2x2_t;
135
136typedef struct uint8x8x2_t
137{
138 uint8x8_t val[2];
139} uint8x8x2_t;
140
141typedef struct uint8x16x2_t
142{
143 uint8x16_t val[2];
144} uint8x16x2_t;
145
146typedef struct uint16x4x2_t
147{
148 uint16x4_t val[2];
149} uint16x4x2_t;
150
151typedef struct uint16x8x2_t
152{
153 uint16x8_t val[2];
154} uint16x8x2_t;
155
156typedef struct uint32x2x2_t
157{
158 uint32x2_t val[2];
159} uint32x2x2_t;
160
161typedef struct uint32x4x2_t
162{
163 uint32x4_t val[2];
164} uint32x4x2_t;
165
166typedef struct uint64x1x2_t
167{
168 uint64x1_t val[2];
169} uint64x1x2_t;
170
171typedef struct uint64x2x2_t
172{
173 uint64x2_t val[2];
174} uint64x2x2_t;
175
176typedef struct float32x2x2_t
177{
178 float32x2_t val[2];
179} float32x2x2_t;
180
181typedef struct float32x4x2_t
182{
183 float32x4_t val[2];
184} float32x4x2_t;
185
186typedef struct float64x2x2_t
187{
188 float64x2_t val[2];
189} float64x2x2_t;
190
191typedef struct float64x1x2_t
192{
193 float64x1_t val[2];
194} float64x1x2_t;
195
196typedef struct poly8x8x2_t
197{
198 poly8x8_t val[2];
199} poly8x8x2_t;
200
201typedef struct poly8x16x2_t
202{
203 poly8x16_t val[2];
204} poly8x16x2_t;
205
206typedef struct poly16x4x2_t
207{
208 poly16x4_t val[2];
209} poly16x4x2_t;
210
211typedef struct poly16x8x2_t
212{
213 poly16x8_t val[2];
214} poly16x8x2_t;
215
216typedef struct int8x8x3_t
217{
218 int8x8_t val[3];
219} int8x8x3_t;
220
221typedef struct int8x16x3_t
222{
223 int8x16_t val[3];
224} int8x16x3_t;
225
226typedef struct int16x4x3_t
227{
228 int16x4_t val[3];
229} int16x4x3_t;
230
231typedef struct int16x8x3_t
232{
233 int16x8_t val[3];
234} int16x8x3_t;
235
236typedef struct int32x2x3_t
237{
238 int32x2_t val[3];
239} int32x2x3_t;
240
241typedef struct int32x4x3_t
242{
243 int32x4_t val[3];
244} int32x4x3_t;
245
246typedef struct int64x1x3_t
247{
248 int64x1_t val[3];
249} int64x1x3_t;
250
251typedef struct int64x2x3_t
252{
253 int64x2_t val[3];
254} int64x2x3_t;
255
256typedef struct uint8x8x3_t
257{
258 uint8x8_t val[3];
259} uint8x8x3_t;
260
261typedef struct uint8x16x3_t
262{
263 uint8x16_t val[3];
264} uint8x16x3_t;
265
266typedef struct uint16x4x3_t
267{
268 uint16x4_t val[3];
269} uint16x4x3_t;
270
271typedef struct uint16x8x3_t
272{
273 uint16x8_t val[3];
274} uint16x8x3_t;
275
276typedef struct uint32x2x3_t
277{
278 uint32x2_t val[3];
279} uint32x2x3_t;
280
281typedef struct uint32x4x3_t
282{
283 uint32x4_t val[3];
284} uint32x4x3_t;
285
286typedef struct uint64x1x3_t
287{
288 uint64x1_t val[3];
289} uint64x1x3_t;
290
291typedef struct uint64x2x3_t
292{
293 uint64x2_t val[3];
294} uint64x2x3_t;
295
296typedef struct float32x2x3_t
297{
298 float32x2_t val[3];
299} float32x2x3_t;
300
301typedef struct float32x4x3_t
302{
303 float32x4_t val[3];
304} float32x4x3_t;
305
306typedef struct float64x2x3_t
307{
308 float64x2_t val[3];
309} float64x2x3_t;
310
311typedef struct float64x1x3_t
312{
313 float64x1_t val[3];
314} float64x1x3_t;
315
316typedef struct poly8x8x3_t
317{
318 poly8x8_t val[3];
319} poly8x8x3_t;
320
321typedef struct poly8x16x3_t
322{
323 poly8x16_t val[3];
324} poly8x16x3_t;
325
326typedef struct poly16x4x3_t
327{
328 poly16x4_t val[3];
329} poly16x4x3_t;
330
331typedef struct poly16x8x3_t
332{
333 poly16x8_t val[3];
334} poly16x8x3_t;
335
336typedef struct int8x8x4_t
337{
338 int8x8_t val[4];
339} int8x8x4_t;
340
341typedef struct int8x16x4_t
342{
343 int8x16_t val[4];
344} int8x16x4_t;
345
346typedef struct int16x4x4_t
347{
348 int16x4_t val[4];
349} int16x4x4_t;
350
351typedef struct int16x8x4_t
352{
353 int16x8_t val[4];
354} int16x8x4_t;
355
356typedef struct int32x2x4_t
357{
358 int32x2_t val[4];
359} int32x2x4_t;
360
361typedef struct int32x4x4_t
362{
363 int32x4_t val[4];
364} int32x4x4_t;
365
366typedef struct int64x1x4_t
367{
368 int64x1_t val[4];
369} int64x1x4_t;
370
371typedef struct int64x2x4_t
372{
373 int64x2_t val[4];
374} int64x2x4_t;
375
376typedef struct uint8x8x4_t
377{
378 uint8x8_t val[4];
379} uint8x8x4_t;
380
381typedef struct uint8x16x4_t
382{
383 uint8x16_t val[4];
384} uint8x16x4_t;
385
386typedef struct uint16x4x4_t
387{
388 uint16x4_t val[4];
389} uint16x4x4_t;
390
391typedef struct uint16x8x4_t
392{
393 uint16x8_t val[4];
394} uint16x8x4_t;
395
396typedef struct uint32x2x4_t
397{
398 uint32x2_t val[4];
399} uint32x2x4_t;
400
401typedef struct uint32x4x4_t
402{
403 uint32x4_t val[4];
404} uint32x4x4_t;
405
406typedef struct uint64x1x4_t
407{
408 uint64x1_t val[4];
409} uint64x1x4_t;
410
411typedef struct uint64x2x4_t
412{
413 uint64x2_t val[4];
414} uint64x2x4_t;
415
416typedef struct float32x2x4_t
417{
418 float32x2_t val[4];
419} float32x2x4_t;
420
421typedef struct float32x4x4_t
422{
423 float32x4_t val[4];
424} float32x4x4_t;
425
426typedef struct float64x2x4_t
427{
428 float64x2_t val[4];
429} float64x2x4_t;
430
431typedef struct float64x1x4_t
432{
433 float64x1_t val[4];
434} float64x1x4_t;
435
436typedef struct poly8x8x4_t
437{
438 poly8x8_t val[4];
439} poly8x8x4_t;
440
441typedef struct poly8x16x4_t
442{
443 poly8x16_t val[4];
444} poly8x16x4_t;
445
446typedef struct poly16x4x4_t
447{
448 poly16x4_t val[4];
449} poly16x4x4_t;
450
451typedef struct poly16x8x4_t
452{
453 poly16x8_t val[4];
454} poly16x8x4_t;
455
456/* vget_lane internal macros. */
457
458#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
459 (__cast_ret \
460 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
461
462#define __aarch64_vget_lane_f32(__a, __b) \
463 __aarch64_vget_lane_any (v2sf, , , __a, __b)
464#define __aarch64_vget_lane_f64(__a, __b) (__a)
465
466#define __aarch64_vget_lane_p8(__a, __b) \
467 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
468#define __aarch64_vget_lane_p16(__a, __b) \
469 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
470
471#define __aarch64_vget_lane_s8(__a, __b) \
472 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
473#define __aarch64_vget_lane_s16(__a, __b) \
474 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
475#define __aarch64_vget_lane_s32(__a, __b) \
476 __aarch64_vget_lane_any (v2si, , ,__a, __b)
477#define __aarch64_vget_lane_s64(__a, __b) (__a)
478
479#define __aarch64_vget_lane_u8(__a, __b) \
480 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
481#define __aarch64_vget_lane_u16(__a, __b) \
482 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
483#define __aarch64_vget_lane_u32(__a, __b) \
484 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
485#define __aarch64_vget_lane_u64(__a, __b) (__a)
486
487#define __aarch64_vgetq_lane_f32(__a, __b) \
488 __aarch64_vget_lane_any (v4sf, , , __a, __b)
489#define __aarch64_vgetq_lane_f64(__a, __b) \
490 __aarch64_vget_lane_any (v2df, , , __a, __b)
491
492#define __aarch64_vgetq_lane_p8(__a, __b) \
493 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
494#define __aarch64_vgetq_lane_p16(__a, __b) \
495 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
496
497#define __aarch64_vgetq_lane_s8(__a, __b) \
498 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
499#define __aarch64_vgetq_lane_s16(__a, __b) \
500 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
501#define __aarch64_vgetq_lane_s32(__a, __b) \
502 __aarch64_vget_lane_any (v4si, , ,__a, __b)
503#define __aarch64_vgetq_lane_s64(__a, __b) \
504 __aarch64_vget_lane_any (v2di, , ,__a, __b)
505
506#define __aarch64_vgetq_lane_u8(__a, __b) \
507 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
508#define __aarch64_vgetq_lane_u16(__a, __b) \
509 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
510#define __aarch64_vgetq_lane_u32(__a, __b) \
511 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
512#define __aarch64_vgetq_lane_u64(__a, __b) \
513 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
514
515/* __aarch64_vdup_lane internal macros. */
516#define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
517 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
518
519#define __aarch64_vdup_lane_f32(__a, __b) \
520 __aarch64_vdup_lane_any (f32, , , __a, __b)
521#define __aarch64_vdup_lane_f64(__a, __b) (__a)
522#define __aarch64_vdup_lane_p8(__a, __b) \
523 __aarch64_vdup_lane_any (p8, , , __a, __b)
524#define __aarch64_vdup_lane_p16(__a, __b) \
525 __aarch64_vdup_lane_any (p16, , , __a, __b)
526#define __aarch64_vdup_lane_s8(__a, __b) \
527 __aarch64_vdup_lane_any (s8, , , __a, __b)
528#define __aarch64_vdup_lane_s16(__a, __b) \
529 __aarch64_vdup_lane_any (s16, , , __a, __b)
530#define __aarch64_vdup_lane_s32(__a, __b) \
531 __aarch64_vdup_lane_any (s32, , , __a, __b)
532#define __aarch64_vdup_lane_s64(__a, __b) (__a)
533#define __aarch64_vdup_lane_u8(__a, __b) \
534 __aarch64_vdup_lane_any (u8, , , __a, __b)
535#define __aarch64_vdup_lane_u16(__a, __b) \
536 __aarch64_vdup_lane_any (u16, , , __a, __b)
537#define __aarch64_vdup_lane_u32(__a, __b) \
538 __aarch64_vdup_lane_any (u32, , , __a, __b)
539#define __aarch64_vdup_lane_u64(__a, __b) (__a)
540
541/* __aarch64_vdup_laneq internal macros. */
542#define __aarch64_vdup_laneq_f32(__a, __b) \
543 __aarch64_vdup_lane_any (f32, , q, __a, __b)
544#define __aarch64_vdup_laneq_f64(__a, __b) \
545 __aarch64_vdup_lane_any (f64, , q, __a, __b)
546#define __aarch64_vdup_laneq_p8(__a, __b) \
547 __aarch64_vdup_lane_any (p8, , q, __a, __b)
548#define __aarch64_vdup_laneq_p16(__a, __b) \
549 __aarch64_vdup_lane_any (p16, , q, __a, __b)
550#define __aarch64_vdup_laneq_s8(__a, __b) \
551 __aarch64_vdup_lane_any (s8, , q, __a, __b)
552#define __aarch64_vdup_laneq_s16(__a, __b) \
553 __aarch64_vdup_lane_any (s16, , q, __a, __b)
554#define __aarch64_vdup_laneq_s32(__a, __b) \
555 __aarch64_vdup_lane_any (s32, , q, __a, __b)
556#define __aarch64_vdup_laneq_s64(__a, __b) \
557 __aarch64_vdup_lane_any (s64, , q, __a, __b)
558#define __aarch64_vdup_laneq_u8(__a, __b) \
559 __aarch64_vdup_lane_any (u8, , q, __a, __b)
560#define __aarch64_vdup_laneq_u16(__a, __b) \
561 __aarch64_vdup_lane_any (u16, , q, __a, __b)
562#define __aarch64_vdup_laneq_u32(__a, __b) \
563 __aarch64_vdup_lane_any (u32, , q, __a, __b)
564#define __aarch64_vdup_laneq_u64(__a, __b) \
565 __aarch64_vdup_lane_any (u64, , q, __a, __b)
566
567/* __aarch64_vdupq_lane internal macros. */
568#define __aarch64_vdupq_lane_f32(__a, __b) \
569 __aarch64_vdup_lane_any (f32, q, , __a, __b)
570#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a))
571#define __aarch64_vdupq_lane_p8(__a, __b) \
572 __aarch64_vdup_lane_any (p8, q, , __a, __b)
573#define __aarch64_vdupq_lane_p16(__a, __b) \
574 __aarch64_vdup_lane_any (p16, q, , __a, __b)
575#define __aarch64_vdupq_lane_s8(__a, __b) \
576 __aarch64_vdup_lane_any (s8, q, , __a, __b)
577#define __aarch64_vdupq_lane_s16(__a, __b) \
578 __aarch64_vdup_lane_any (s16, q, , __a, __b)
579#define __aarch64_vdupq_lane_s32(__a, __b) \
580 __aarch64_vdup_lane_any (s32, q, , __a, __b)
581#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a))
582#define __aarch64_vdupq_lane_u8(__a, __b) \
583 __aarch64_vdup_lane_any (u8, q, , __a, __b)
584#define __aarch64_vdupq_lane_u16(__a, __b) \
585 __aarch64_vdup_lane_any (u16, q, , __a, __b)
586#define __aarch64_vdupq_lane_u32(__a, __b) \
587 __aarch64_vdup_lane_any (u32, q, , __a, __b)
588#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a))
589
590/* __aarch64_vdupq_laneq internal macros. */
591#define __aarch64_vdupq_laneq_f32(__a, __b) \
592 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
593#define __aarch64_vdupq_laneq_f64(__a, __b) \
594 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
595#define __aarch64_vdupq_laneq_p8(__a, __b) \
596 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
597#define __aarch64_vdupq_laneq_p16(__a, __b) \
598 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
599#define __aarch64_vdupq_laneq_s8(__a, __b) \
600 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
601#define __aarch64_vdupq_laneq_s16(__a, __b) \
602 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
603#define __aarch64_vdupq_laneq_s32(__a, __b) \
604 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
605#define __aarch64_vdupq_laneq_s64(__a, __b) \
606 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
607#define __aarch64_vdupq_laneq_u8(__a, __b) \
608 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
609#define __aarch64_vdupq_laneq_u16(__a, __b) \
610 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
611#define __aarch64_vdupq_laneq_u32(__a, __b) \
612 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
613#define __aarch64_vdupq_laneq_u64(__a, __b) \
614 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
615
616/* vadd */
617__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
618vadd_s8 (int8x8_t __a, int8x8_t __b)
619{
620 return __a + __b;
621}
622
623__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
624vadd_s16 (int16x4_t __a, int16x4_t __b)
625{
626 return __a + __b;
627}
628
629__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
630vadd_s32 (int32x2_t __a, int32x2_t __b)
631{
632 return __a + __b;
633}
634
635__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
636vadd_f32 (float32x2_t __a, float32x2_t __b)
637{
638 return __a + __b;
639}
640
641__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
642vadd_f64 (float64x1_t __a, float64x1_t __b)
643{
644 return __a + __b;
645}
646
647__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
648vadd_u8 (uint8x8_t __a, uint8x8_t __b)
649{
650 return __a + __b;
651}
652
653__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
654vadd_u16 (uint16x4_t __a, uint16x4_t __b)
655{
656 return __a + __b;
657}
658
659__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
660vadd_u32 (uint32x2_t __a, uint32x2_t __b)
661{
662 return __a + __b;
663}
664
665__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
666vadd_s64 (int64x1_t __a, int64x1_t __b)
667{
668 return __a + __b;
669}
670
671__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
672vadd_u64 (uint64x1_t __a, uint64x1_t __b)
673{
674 return __a + __b;
675}
676
677__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
678vaddq_s8 (int8x16_t __a, int8x16_t __b)
679{
680 return __a + __b;
681}
682
683__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
684vaddq_s16 (int16x8_t __a, int16x8_t __b)
685{
686 return __a + __b;
687}
688
689__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
690vaddq_s32 (int32x4_t __a, int32x4_t __b)
691{
692 return __a + __b;
693}
694
695__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
696vaddq_s64 (int64x2_t __a, int64x2_t __b)
697{
698 return __a + __b;
699}
700
701__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
702vaddq_f32 (float32x4_t __a, float32x4_t __b)
703{
704 return __a + __b;
705}
706
707__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
708vaddq_f64 (float64x2_t __a, float64x2_t __b)
709{
710 return __a + __b;
711}
712
713__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
714vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
715{
716 return __a + __b;
717}
718
719__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
720vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
721{
722 return __a + __b;
723}
724
725__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
726vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
727{
728 return __a + __b;
729}
730
731__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
732vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
733{
734 return __a + __b;
735}
736
737__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
738vaddl_s8 (int8x8_t __a, int8x8_t __b)
739{
740 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
741}
742
743__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
744vaddl_s16 (int16x4_t __a, int16x4_t __b)
745{
746 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
747}
748
749__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
750vaddl_s32 (int32x2_t __a, int32x2_t __b)
751{
752 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
753}
754
755__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
756vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
757{
758 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
759 (int8x8_t) __b);
760}
761
762__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
763vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
764{
765 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
766 (int16x4_t) __b);
767}
768
769__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
770vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
771{
772 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
773 (int32x2_t) __b);
774}
775
776__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
777vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
778{
779 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
780}
781
782__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
783vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
784{
785 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
786}
787
788__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
789vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
790{
791 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
792}
793
794__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
795vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
796{
797 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
798 (int8x16_t) __b);
799}
800
801__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
802vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
803{
804 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
805 (int16x8_t) __b);
806}
807
808__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
809vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
810{
811 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
812 (int32x4_t) __b);
813}
814
815__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
816vaddw_s8 (int16x8_t __a, int8x8_t __b)
817{
818 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
819}
820
821__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
822vaddw_s16 (int32x4_t __a, int16x4_t __b)
823{
824 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
825}
826
827__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
828vaddw_s32 (int64x2_t __a, int32x2_t __b)
829{
830 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
831}
832
833__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
834vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
835{
836 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
837 (int8x8_t) __b);
838}
839
840__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
841vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
842{
843 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
844 (int16x4_t) __b);
845}
846
847__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
848vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
849{
850 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
851 (int32x2_t) __b);
852}
853
854__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
855vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
856{
857 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
858}
859
860__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
861vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
862{
863 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
864}
865
866__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
867vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
868{
869 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
870}
871
872__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
873vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
874{
875 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
876 (int8x16_t) __b);
877}
878
879__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
880vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
881{
882 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
883 (int16x8_t) __b);
884}
885
886__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
887vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
888{
889 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
890 (int32x4_t) __b);
891}
892
893__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
894vhadd_s8 (int8x8_t __a, int8x8_t __b)
895{
896 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
897}
898
899__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
900vhadd_s16 (int16x4_t __a, int16x4_t __b)
901{
902 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
903}
904
905__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
906vhadd_s32 (int32x2_t __a, int32x2_t __b)
907{
908 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
909}
910
911__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
912vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
913{
914 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
915 (int8x8_t) __b);
916}
917
918__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
919vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
920{
921 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
922 (int16x4_t) __b);
923}
924
925__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
926vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
927{
928 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
929 (int32x2_t) __b);
930}
931
932__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
933vhaddq_s8 (int8x16_t __a, int8x16_t __b)
934{
935 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
936}
937
938__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
939vhaddq_s16 (int16x8_t __a, int16x8_t __b)
940{
941 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
942}
943
944__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
945vhaddq_s32 (int32x4_t __a, int32x4_t __b)
946{
947 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
948}
949
950__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
951vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
952{
953 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
954 (int8x16_t) __b);
955}
956
957__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
958vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
959{
960 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
961 (int16x8_t) __b);
962}
963
964__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
965vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
966{
967 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
968 (int32x4_t) __b);
969}
970
971__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
972vrhadd_s8 (int8x8_t __a, int8x8_t __b)
973{
974 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
975}
976
977__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
978vrhadd_s16 (int16x4_t __a, int16x4_t __b)
979{
980 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
981}
982
983__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
984vrhadd_s32 (int32x2_t __a, int32x2_t __b)
985{
986 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
987}
988
989__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
990vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
991{
992 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
993 (int8x8_t) __b);
994}
995
996__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
997vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
998{
999 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1000 (int16x4_t) __b);
1001}
1002
1003__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1004vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1005{
1006 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1007 (int32x2_t) __b);
1008}
1009
1010__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1011vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1012{
1013 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1014}
1015
1016__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1017vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1018{
1019 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1020}
1021
1022__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1023vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1024{
1025 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1026}
1027
1028__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1029vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1030{
1031 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1032 (int8x16_t) __b);
1033}
1034
1035__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1036vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1037{
1038 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1039 (int16x8_t) __b);
1040}
1041
1042__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1043vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1044{
1045 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1046 (int32x4_t) __b);
1047}
1048
1049__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1050vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1051{
1052 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1053}
1054
1055__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1056vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1057{
1058 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1059}
1060
1061__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1062vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1063{
1064 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1065}
1066
1067__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1068vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1069{
1070 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1071 (int16x8_t) __b);
1072}
1073
1074__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1075vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1076{
1077 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1078 (int32x4_t) __b);
1079}
1080
1081__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1082vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1083{
1084 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1085 (int64x2_t) __b);
1086}
1087
1088__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1089vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1090{
1091 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1092}
1093
1094__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1095vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1096{
1097 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1098}
1099
1100__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1101vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1102{
1103 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1104}
1105
1106__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1107vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1108{
1109 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1110 (int16x8_t) __b);
1111}
1112
1113__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1114vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1115{
1116 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1117 (int32x4_t) __b);
1118}
1119
1120__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1121vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1122{
1123 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1124 (int64x2_t) __b);
1125}
1126
1127__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1128vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1129{
1130 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1131}
1132
1133__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1134vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1135{
1136 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1137}
1138
1139__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1140vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1141{
1142 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1143}
1144
1145__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1146vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1147{
1148 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1149 (int16x8_t) __b,
1150 (int16x8_t) __c);
1151}
1152
1153__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1154vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1155{
1156 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1157 (int32x4_t) __b,
1158 (int32x4_t) __c);
1159}
1160
1161__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1162vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1163{
1164 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1165 (int64x2_t) __b,
1166 (int64x2_t) __c);
1167}
1168
1169__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1170vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1171{
1172 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1173}
1174
1175__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1176vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1177{
1178 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1179}
1180
1181__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1182vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1183{
1184 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1185}
1186
1187__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1188vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1189{
1190 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1191 (int16x8_t) __b,
1192 (int16x8_t) __c);
1193}
1194
1195__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1196vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1197{
1198 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1199 (int32x4_t) __b,
1200 (int32x4_t) __c);
1201}
1202
1203__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1204vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1205{
1206 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1207 (int64x2_t) __b,
1208 (int64x2_t) __c);
1209}
1210
1211__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1212vdiv_f32 (float32x2_t __a, float32x2_t __b)
1213{
1214 return __a / __b;
1215}
1216
1217__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1218vdiv_f64 (float64x1_t __a, float64x1_t __b)
1219{
1220 return __a / __b;
1221}
1222
1223__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1224vdivq_f32 (float32x4_t __a, float32x4_t __b)
1225{
1226 return __a / __b;
1227}
1228
1229__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1230vdivq_f64 (float64x2_t __a, float64x2_t __b)
1231{
1232 return __a / __b;
1233}
1234
1235__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1236vmul_s8 (int8x8_t __a, int8x8_t __b)
1237{
1238 return __a * __b;
1239}
1240
1241__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1242vmul_s16 (int16x4_t __a, int16x4_t __b)
1243{
1244 return __a * __b;
1245}
1246
1247__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1248vmul_s32 (int32x2_t __a, int32x2_t __b)
1249{
1250 return __a * __b;
1251}
1252
1253__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1254vmul_f32 (float32x2_t __a, float32x2_t __b)
1255{
1256 return __a * __b;
1257}
1258
1259__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1260vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1261{
1262 return __a * __b;
1263}
1264
1265__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1266vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1267{
1268 return __a * __b;
1269}
1270
1271__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1272vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1273{
1274 return __a * __b;
1275}
1276
1277__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1278vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1279{
1280 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1281 (int8x8_t) __b);
1282}
1283
1284__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1285vmulq_s8 (int8x16_t __a, int8x16_t __b)
1286{
1287 return __a * __b;
1288}
1289
1290__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1291vmulq_s16 (int16x8_t __a, int16x8_t __b)
1292{
1293 return __a * __b;
1294}
1295
1296__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1297vmulq_s32 (int32x4_t __a, int32x4_t __b)
1298{
1299 return __a * __b;
1300}
1301
1302__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1303vmulq_f32 (float32x4_t __a, float32x4_t __b)
1304{
1305 return __a * __b;
1306}
1307
1308__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1309vmulq_f64 (float64x2_t __a, float64x2_t __b)
1310{
1311 return __a * __b;
1312}
1313
1314__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1315vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1316{
1317 return __a * __b;
1318}
1319
1320__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1321vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1322{
1323 return __a * __b;
1324}
1325
1326__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1327vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1328{
1329 return __a * __b;
1330}
1331
1332__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1333vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1334{
1335 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1336 (int8x16_t) __b);
1337}
1338
1339__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1340vand_s8 (int8x8_t __a, int8x8_t __b)
1341{
1342 return __a & __b;
1343}
1344
1345__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1346vand_s16 (int16x4_t __a, int16x4_t __b)
1347{
1348 return __a & __b;
1349}
1350
1351__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1352vand_s32 (int32x2_t __a, int32x2_t __b)
1353{
1354 return __a & __b;
1355}
1356
1357__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1358vand_u8 (uint8x8_t __a, uint8x8_t __b)
1359{
1360 return __a & __b;
1361}
1362
1363__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1364vand_u16 (uint16x4_t __a, uint16x4_t __b)
1365{
1366 return __a & __b;
1367}
1368
1369__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1370vand_u32 (uint32x2_t __a, uint32x2_t __b)
1371{
1372 return __a & __b;
1373}
1374
1375__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1376vand_s64 (int64x1_t __a, int64x1_t __b)
1377{
1378 return __a & __b;
1379}
1380
1381__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1382vand_u64 (uint64x1_t __a, uint64x1_t __b)
1383{
1384 return __a & __b;
1385}
1386
1387__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1388vandq_s8 (int8x16_t __a, int8x16_t __b)
1389{
1390 return __a & __b;
1391}
1392
1393__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1394vandq_s16 (int16x8_t __a, int16x8_t __b)
1395{
1396 return __a & __b;
1397}
1398
1399__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1400vandq_s32 (int32x4_t __a, int32x4_t __b)
1401{
1402 return __a & __b;
1403}
1404
1405__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1406vandq_s64 (int64x2_t __a, int64x2_t __b)
1407{
1408 return __a & __b;
1409}
1410
1411__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1412vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1413{
1414 return __a & __b;
1415}
1416
1417__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1418vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1419{
1420 return __a & __b;
1421}
1422
1423__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1424vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1425{
1426 return __a & __b;
1427}
1428
1429__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1430vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1431{
1432 return __a & __b;
1433}
1434
1435__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1436vorr_s8 (int8x8_t __a, int8x8_t __b)
1437{
1438 return __a | __b;
1439}
1440
1441__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1442vorr_s16 (int16x4_t __a, int16x4_t __b)
1443{
1444 return __a | __b;
1445}
1446
1447__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1448vorr_s32 (int32x2_t __a, int32x2_t __b)
1449{
1450 return __a | __b;
1451}
1452
1453__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1454vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1455{
1456 return __a | __b;
1457}
1458
1459__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1460vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1461{
1462 return __a | __b;
1463}
1464
1465__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1466vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1467{
1468 return __a | __b;
1469}
1470
1471__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1472vorr_s64 (int64x1_t __a, int64x1_t __b)
1473{
1474 return __a | __b;
1475}
1476
1477__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1478vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1479{
1480 return __a | __b;
1481}
1482
1483__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1484vorrq_s8 (int8x16_t __a, int8x16_t __b)
1485{
1486 return __a | __b;
1487}
1488
1489__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1490vorrq_s16 (int16x8_t __a, int16x8_t __b)
1491{
1492 return __a | __b;
1493}
1494
1495__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1496vorrq_s32 (int32x4_t __a, int32x4_t __b)
1497{
1498 return __a | __b;
1499}
1500
1501__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1502vorrq_s64 (int64x2_t __a, int64x2_t __b)
1503{
1504 return __a | __b;
1505}
1506
1507__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1508vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1509{
1510 return __a | __b;
1511}
1512
1513__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1514vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1515{
1516 return __a | __b;
1517}
1518
1519__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1520vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1521{
1522 return __a | __b;
1523}
1524
1525__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1526vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1527{
1528 return __a | __b;
1529}
1530
1531__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1532veor_s8 (int8x8_t __a, int8x8_t __b)
1533{
1534 return __a ^ __b;
1535}
1536
1537__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1538veor_s16 (int16x4_t __a, int16x4_t __b)
1539{
1540 return __a ^ __b;
1541}
1542
1543__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1544veor_s32 (int32x2_t __a, int32x2_t __b)
1545{
1546 return __a ^ __b;
1547}
1548
1549__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1550veor_u8 (uint8x8_t __a, uint8x8_t __b)
1551{
1552 return __a ^ __b;
1553}
1554
1555__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1556veor_u16 (uint16x4_t __a, uint16x4_t __b)
1557{
1558 return __a ^ __b;
1559}
1560
1561__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1562veor_u32 (uint32x2_t __a, uint32x2_t __b)
1563{
1564 return __a ^ __b;
1565}
1566
1567__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1568veor_s64 (int64x1_t __a, int64x1_t __b)
1569{
1570 return __a ^ __b;
1571}
1572
1573__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1574veor_u64 (uint64x1_t __a, uint64x1_t __b)
1575{
1576 return __a ^ __b;
1577}
1578
1579__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1580veorq_s8 (int8x16_t __a, int8x16_t __b)
1581{
1582 return __a ^ __b;
1583}
1584
1585__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1586veorq_s16 (int16x8_t __a, int16x8_t __b)
1587{
1588 return __a ^ __b;
1589}
1590
1591__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1592veorq_s32 (int32x4_t __a, int32x4_t __b)
1593{
1594 return __a ^ __b;
1595}
1596
1597__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1598veorq_s64 (int64x2_t __a, int64x2_t __b)
1599{
1600 return __a ^ __b;
1601}
1602
1603__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1604veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1605{
1606 return __a ^ __b;
1607}
1608
1609__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1610veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1611{
1612 return __a ^ __b;
1613}
1614
1615__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1616veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1617{
1618 return __a ^ __b;
1619}
1620
1621__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1622veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1623{
1624 return __a ^ __b;
1625}
1626
1627__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1628vbic_s8 (int8x8_t __a, int8x8_t __b)
1629{
1630 return __a & ~__b;
1631}
1632
1633__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1634vbic_s16 (int16x4_t __a, int16x4_t __b)
1635{
1636 return __a & ~__b;
1637}
1638
1639__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1640vbic_s32 (int32x2_t __a, int32x2_t __b)
1641{
1642 return __a & ~__b;
1643}
1644
1645__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1646vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1647{
1648 return __a & ~__b;
1649}
1650
1651__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1652vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1653{
1654 return __a & ~__b;
1655}
1656
1657__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1658vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1659{
1660 return __a & ~__b;
1661}
1662
1663__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1664vbic_s64 (int64x1_t __a, int64x1_t __b)
1665{
1666 return __a & ~__b;
1667}
1668
1669__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1670vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1671{
1672 return __a & ~__b;
1673}
1674
1675__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1676vbicq_s8 (int8x16_t __a, int8x16_t __b)
1677{
1678 return __a & ~__b;
1679}
1680
1681__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1682vbicq_s16 (int16x8_t __a, int16x8_t __b)
1683{
1684 return __a & ~__b;
1685}
1686
1687__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1688vbicq_s32 (int32x4_t __a, int32x4_t __b)
1689{
1690 return __a & ~__b;
1691}
1692
1693__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1694vbicq_s64 (int64x2_t __a, int64x2_t __b)
1695{
1696 return __a & ~__b;
1697}
1698
1699__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1700vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1701{
1702 return __a & ~__b;
1703}
1704
1705__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1706vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1707{
1708 return __a & ~__b;
1709}
1710
1711__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1712vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1713{
1714 return __a & ~__b;
1715}
1716
1717__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1718vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1719{
1720 return __a & ~__b;
1721}
1722
1723__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1724vorn_s8 (int8x8_t __a, int8x8_t __b)
1725{
1726 return __a | ~__b;
1727}
1728
1729__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1730vorn_s16 (int16x4_t __a, int16x4_t __b)
1731{
1732 return __a | ~__b;
1733}
1734
1735__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1736vorn_s32 (int32x2_t __a, int32x2_t __b)
1737{
1738 return __a | ~__b;
1739}
1740
1741__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1742vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1743{
1744 return __a | ~__b;
1745}
1746
1747__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1748vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1749{
1750 return __a | ~__b;
1751}
1752
1753__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1754vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1755{
1756 return __a | ~__b;
1757}
1758
1759__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1760vorn_s64 (int64x1_t __a, int64x1_t __b)
1761{
1762 return __a | ~__b;
1763}
1764
1765__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1766vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1767{
1768 return __a | ~__b;
1769}
1770
1771__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1772vornq_s8 (int8x16_t __a, int8x16_t __b)
1773{
1774 return __a | ~__b;
1775}
1776
1777__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1778vornq_s16 (int16x8_t __a, int16x8_t __b)
1779{
1780 return __a | ~__b;
1781}
1782
1783__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1784vornq_s32 (int32x4_t __a, int32x4_t __b)
1785{
1786 return __a | ~__b;
1787}
1788
1789__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1790vornq_s64 (int64x2_t __a, int64x2_t __b)
1791{
1792 return __a | ~__b;
1793}
1794
1795__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1796vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1797{
1798 return __a | ~__b;
1799}
1800
1801__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1802vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1803{
1804 return __a | ~__b;
1805}
1806
1807__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1808vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1809{
1810 return __a | ~__b;
1811}
1812
1813__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1814vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1815{
1816 return __a | ~__b;
1817}
1818
1819__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1820vsub_s8 (int8x8_t __a, int8x8_t __b)
1821{
1822 return __a - __b;
1823}
1824
1825__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1826vsub_s16 (int16x4_t __a, int16x4_t __b)
1827{
1828 return __a - __b;
1829}
1830
1831__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1832vsub_s32 (int32x2_t __a, int32x2_t __b)
1833{
1834 return __a - __b;
1835}
1836
1837__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1838vsub_f32 (float32x2_t __a, float32x2_t __b)
1839{
1840 return __a - __b;
1841}
1842
1843__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1844vsub_f64 (float64x1_t __a, float64x1_t __b)
1845{
1846 return __a - __b;
1847}
1848
1849__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1850vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1851{
1852 return __a - __b;
1853}
1854
1855__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1856vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1857{
1858 return __a - __b;
1859}
1860
1861__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1862vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1863{
1864 return __a - __b;
1865}
1866
1867__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1868vsub_s64 (int64x1_t __a, int64x1_t __b)
1869{
1870 return __a - __b;
1871}
1872
1873__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1874vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1875{
1876 return __a - __b;
1877}
1878
1879__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1880vsubq_s8 (int8x16_t __a, int8x16_t __b)
1881{
1882 return __a - __b;
1883}
1884
1885__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1886vsubq_s16 (int16x8_t __a, int16x8_t __b)
1887{
1888 return __a - __b;
1889}
1890
1891__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1892vsubq_s32 (int32x4_t __a, int32x4_t __b)
1893{
1894 return __a - __b;
1895}
1896
1897__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1898vsubq_s64 (int64x2_t __a, int64x2_t __b)
1899{
1900 return __a - __b;
1901}
1902
1903__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1904vsubq_f32 (float32x4_t __a, float32x4_t __b)
1905{
1906 return __a - __b;
1907}
1908
1909__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1910vsubq_f64 (float64x2_t __a, float64x2_t __b)
1911{
1912 return __a - __b;
1913}
1914
1915__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1916vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1917{
1918 return __a - __b;
1919}
1920
1921__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1922vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1923{
1924 return __a - __b;
1925}
1926
1927__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1928vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1929{
1930 return __a - __b;
1931}
1932
1933__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1934vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1935{
1936 return __a - __b;
1937}
1938
1939__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1940vsubl_s8 (int8x8_t __a, int8x8_t __b)
1941{
1942 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1943}
1944
1945__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1946vsubl_s16 (int16x4_t __a, int16x4_t __b)
1947{
1948 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1949}
1950
1951__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1952vsubl_s32 (int32x2_t __a, int32x2_t __b)
1953{
1954 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1955}
1956
1957__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1958vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1959{
1960 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1961 (int8x8_t) __b);
1962}
1963
1964__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1965vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1966{
1967 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1968 (int16x4_t) __b);
1969}
1970
1971__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1972vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1973{
1974 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1975 (int32x2_t) __b);
1976}
1977
1978__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1979vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1980{
1981 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1982}
1983
1984__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1985vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1986{
1987 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1988}
1989
1990__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1991vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1992{
1993 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1994}
1995
1996__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1997vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1998{
1999 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2000 (int8x16_t) __b);
2001}
2002
2003__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2004vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2005{
2006 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2007 (int16x8_t) __b);
2008}
2009
2010__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2011vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2012{
2013 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2014 (int32x4_t) __b);
2015}
2016
2017__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2018vsubw_s8 (int16x8_t __a, int8x8_t __b)
2019{
2020 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2021}
2022
2023__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2024vsubw_s16 (int32x4_t __a, int16x4_t __b)
2025{
2026 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2027}
2028
2029__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2030vsubw_s32 (int64x2_t __a, int32x2_t __b)
2031{
2032 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2033}
2034
2035__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2036vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2037{
2038 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2039 (int8x8_t) __b);
2040}
2041
2042__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2043vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2044{
2045 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2046 (int16x4_t) __b);
2047}
2048
2049__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2050vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2051{
2052 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2053 (int32x2_t) __b);
2054}
2055
2056__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2057vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2058{
2059 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2060}
2061
2062__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2063vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2064{
2065 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2066}
2067
2068__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2069vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2070{
2071 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2072}
2073
2074__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2075vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2076{
2077 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2078 (int8x16_t) __b);
2079}
2080
2081__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2082vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2083{
2084 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2085 (int16x8_t) __b);
2086}
2087
2088__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2089vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2090{
2091 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2092 (int32x4_t) __b);
2093}
2094
2095__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2096vqadd_s8 (int8x8_t __a, int8x8_t __b)
2097{
2098 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2099}
2100
2101__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2102vqadd_s16 (int16x4_t __a, int16x4_t __b)
2103{
2104 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2105}
2106
2107__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2108vqadd_s32 (int32x2_t __a, int32x2_t __b)
2109{
2110 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2111}
2112
2113__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2114vqadd_s64 (int64x1_t __a, int64x1_t __b)
2115{
2116 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
2117}
2118
2119__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2120vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2121{
2122 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
2123 (int8x8_t) __b);
2124}
2125
2126__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2127vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2128{
2129 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2130 (int16x4_t) __b);
2131}
2132
2133__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2134vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2135{
2136 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2137 (int32x2_t) __b);
2138}
2139
2140__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2141vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2142{
2143 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2144 (int64x1_t) __b);
2145}
2146
2147__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2148vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2149{
2150 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2151}
2152
2153__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2154vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2155{
2156 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2157}
2158
2159__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2160vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2161{
2162 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2163}
2164
2165__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2166vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2167{
2168 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2169}
2170
2171__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2172vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2173{
2174 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2175 (int8x16_t) __b);
2176}
2177
2178__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2179vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2180{
2181 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2182 (int16x8_t) __b);
2183}
2184
2185__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2186vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2187{
2188 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2189 (int32x4_t) __b);
2190}
2191
2192__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2193vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2194{
2195 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2196 (int64x2_t) __b);
2197}
2198
2199__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2200vqsub_s8 (int8x8_t __a, int8x8_t __b)
2201{
2202 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2203}
2204
2205__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2206vqsub_s16 (int16x4_t __a, int16x4_t __b)
2207{
2208 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2209}
2210
2211__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2212vqsub_s32 (int32x2_t __a, int32x2_t __b)
2213{
2214 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2215}
2216
2217__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2218vqsub_s64 (int64x1_t __a, int64x1_t __b)
2219{
2220 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2221}
2222
2223__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2224vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2225{
2226 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2227 (int8x8_t) __b);
2228}
2229
2230__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2231vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2232{
2233 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2234 (int16x4_t) __b);
2235}
2236
2237__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2238vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2239{
2240 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2241 (int32x2_t) __b);
2242}
2243
2244__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2245vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2246{
2247 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2248 (int64x1_t) __b);
2249}
2250
2251__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2252vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2253{
2254 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2255}
2256
2257__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2258vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2259{
2260 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2261}
2262
2263__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2264vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2265{
2266 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2267}
2268
2269__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2270vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2271{
2272 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2273}
2274
2275__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2276vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2277{
2278 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2279 (int8x16_t) __b);
2280}
2281
2282__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2283vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2284{
2285 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2286 (int16x8_t) __b);
2287}
2288
2289__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2290vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2291{
2292 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2293 (int32x4_t) __b);
2294}
2295
2296__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2297vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2298{
2299 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2300 (int64x2_t) __b);
2301}
2302
2303__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2304vqneg_s8 (int8x8_t __a)
2305{
2306 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2307}
2308
2309__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2310vqneg_s16 (int16x4_t __a)
2311{
2312 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2313}
2314
2315__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2316vqneg_s32 (int32x2_t __a)
2317{
2318 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2319}
2320
2321__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2322vqnegq_s8 (int8x16_t __a)
2323{
2324 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2325}
2326
2327__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2328vqnegq_s16 (int16x8_t __a)
2329{
2330 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2331}
2332
2333__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2334vqnegq_s32 (int32x4_t __a)
2335{
2336 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2337}
2338
2339__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2340vqabs_s8 (int8x8_t __a)
2341{
2342 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2343}
2344
2345__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2346vqabs_s16 (int16x4_t __a)
2347{
2348 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2349}
2350
2351__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2352vqabs_s32 (int32x2_t __a)
2353{
2354 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2355}
2356
2357__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2358vqabsq_s8 (int8x16_t __a)
2359{
2360 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2361}
2362
2363__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2364vqabsq_s16 (int16x8_t __a)
2365{
2366 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2367}
2368
2369__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2370vqabsq_s32 (int32x4_t __a)
2371{
2372 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2373}
2374
2375__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2376vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2377{
2378 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2379}
2380
2381__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2382vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2383{
2384 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2385}
2386
2387__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2388vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2389{
2390 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2391}
2392
2393__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2394vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2395{
2396 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2397}
2398
2399__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2400vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2401{
2402 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2403}
2404
2405__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2406vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2407{
2408 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2409}
2410
2411__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2412vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2413{
2414 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2415}
2416
2417__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2418vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2419{
2420 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2421}
2422
2423__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2424vcreate_s8 (uint64_t __a)
2425{
2426 return (int8x8_t) __a;
2427}
2428
2429__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2430vcreate_s16 (uint64_t __a)
2431{
2432 return (int16x4_t) __a;
2433}
2434
2435__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2436vcreate_s32 (uint64_t __a)
2437{
2438 return (int32x2_t) __a;
2439}
2440
2441__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2442vcreate_s64 (uint64_t __a)
2443{
2444 return (int64x1_t) __a;
2445}
2446
2447__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2448vcreate_f32 (uint64_t __a)
2449{
2450 return (float32x2_t) __a;
2451}
2452
2453__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2454vcreate_u8 (uint64_t __a)
2455{
2456 return (uint8x8_t) __a;
2457}
2458
2459__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2460vcreate_u16 (uint64_t __a)
2461{
2462 return (uint16x4_t) __a;
2463}
2464
2465__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2466vcreate_u32 (uint64_t __a)
2467{
2468 return (uint32x2_t) __a;
2469}
2470
2471__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2472vcreate_u64 (uint64_t __a)
2473{
2474 return (uint64x1_t) __a;
2475}
2476
2477__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2478vcreate_f64 (uint64_t __a)
2479{
2480 return (float64x1_t) __builtin_aarch64_createdf (__a);
2481}
2482
2483__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2484vcreate_p8 (uint64_t __a)
2485{
2486 return (poly8x8_t) __a;
2487}
2488
2489__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2490vcreate_p16 (uint64_t __a)
2491{
2492 return (poly16x4_t) __a;
2493}
2494
2495/* vget_lane */
2496
2497__extension__ static __inline float32_t __attribute__ ((__always_inline__))
2498vget_lane_f32 (float32x2_t __a, const int __b)
2499{
2500 return __aarch64_vget_lane_f32 (__a, __b);
2501}
2502
2503__extension__ static __inline float64_t __attribute__ ((__always_inline__))
2504vget_lane_f64 (float64x1_t __a, const int __b)
2505{
2506 return __aarch64_vget_lane_f64 (__a, __b);
2507}
2508
2509__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2510vget_lane_p8 (poly8x8_t __a, const int __b)
2511{
2512 return __aarch64_vget_lane_p8 (__a, __b);
2513}
2514
2515__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2516vget_lane_p16 (poly16x4_t __a, const int __b)
2517{
2518 return __aarch64_vget_lane_p16 (__a, __b);
2519}
2520
2521__extension__ static __inline int8_t __attribute__ ((__always_inline__))
2522vget_lane_s8 (int8x8_t __a, const int __b)
2523{
2524 return __aarch64_vget_lane_s8 (__a, __b);
2525}
2526
2527__extension__ static __inline int16_t __attribute__ ((__always_inline__))
2528vget_lane_s16 (int16x4_t __a, const int __b)
2529{
2530 return __aarch64_vget_lane_s16 (__a, __b);
2531}
2532
2533__extension__ static __inline int32_t __attribute__ ((__always_inline__))
2534vget_lane_s32 (int32x2_t __a, const int __b)
2535{
2536 return __aarch64_vget_lane_s32 (__a, __b);
2537}
2538
2539__extension__ static __inline int64_t __attribute__ ((__always_inline__))
2540vget_lane_s64 (int64x1_t __a, const int __b)
2541{
2542 return __aarch64_vget_lane_s64 (__a, __b);
2543}
2544
2545__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2546vget_lane_u8 (uint8x8_t __a, const int __b)
2547{
2548 return __aarch64_vget_lane_u8 (__a, __b);
2549}
2550
2551__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2552vget_lane_u16 (uint16x4_t __a, const int __b)
2553{
2554 return __aarch64_vget_lane_u16 (__a, __b);
2555}
2556
2557__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2558vget_lane_u32 (uint32x2_t __a, const int __b)
2559{
2560 return __aarch64_vget_lane_u32 (__a, __b);
2561}
2562
2563__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2564vget_lane_u64 (uint64x1_t __a, const int __b)
2565{
2566 return __aarch64_vget_lane_u64 (__a, __b);
2567}
2568
2569/* vgetq_lane */
2570
2571__extension__ static __inline float32_t __attribute__ ((__always_inline__))
2572vgetq_lane_f32 (float32x4_t __a, const int __b)
2573{
2574 return __aarch64_vgetq_lane_f32 (__a, __b);
2575}
2576
2577__extension__ static __inline float64_t __attribute__ ((__always_inline__))
2578vgetq_lane_f64 (float64x2_t __a, const int __b)
2579{
2580 return __aarch64_vgetq_lane_f64 (__a, __b);
2581}
2582
2583__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2584vgetq_lane_p8 (poly8x16_t __a, const int __b)
2585{
2586 return __aarch64_vgetq_lane_p8 (__a, __b);
2587}
2588
2589__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2590vgetq_lane_p16 (poly16x8_t __a, const int __b)
2591{
2592 return __aarch64_vgetq_lane_p16 (__a, __b);
2593}
2594
2595__extension__ static __inline int8_t __attribute__ ((__always_inline__))
2596vgetq_lane_s8 (int8x16_t __a, const int __b)
2597{
2598 return __aarch64_vgetq_lane_s8 (__a, __b);
2599}
2600
2601__extension__ static __inline int16_t __attribute__ ((__always_inline__))
2602vgetq_lane_s16 (int16x8_t __a, const int __b)
2603{
2604 return __aarch64_vgetq_lane_s16 (__a, __b);
2605}
2606
2607__extension__ static __inline int32_t __attribute__ ((__always_inline__))
2608vgetq_lane_s32 (int32x4_t __a, const int __b)
2609{
2610 return __aarch64_vgetq_lane_s32 (__a, __b);
2611}
2612
2613__extension__ static __inline int64_t __attribute__ ((__always_inline__))
2614vgetq_lane_s64 (int64x2_t __a, const int __b)
2615{
2616 return __aarch64_vgetq_lane_s64 (__a, __b);
2617}
2618
2619__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2620vgetq_lane_u8 (uint8x16_t __a, const int __b)
2621{
2622 return __aarch64_vgetq_lane_u8 (__a, __b);
2623}
2624
2625__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2626vgetq_lane_u16 (uint16x8_t __a, const int __b)
2627{
2628 return __aarch64_vgetq_lane_u16 (__a, __b);
2629}
2630
2631__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2632vgetq_lane_u32 (uint32x4_t __a, const int __b)
2633{
2634 return __aarch64_vgetq_lane_u32 (__a, __b);
2635}
2636
2637__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2638vgetq_lane_u64 (uint64x2_t __a, const int __b)
2639{
2640 return __aarch64_vgetq_lane_u64 (__a, __b);
2641}
2642
2643/* vreinterpret */
2644
2645__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2646vreinterpret_p8_s8 (int8x8_t __a)
2647{
2648 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2649}
2650
2651__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2652vreinterpret_p8_s16 (int16x4_t __a)
2653{
2654 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2655}
2656
2657__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2658vreinterpret_p8_s32 (int32x2_t __a)
2659{
2660 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2661}
2662
2663__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2664vreinterpret_p8_s64 (int64x1_t __a)
2665{
2666 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2667}
2668
2669__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2670vreinterpret_p8_f32 (float32x2_t __a)
2671{
2672 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2673}
2674
2675__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2676vreinterpret_p8_u8 (uint8x8_t __a)
2677{
2678 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2679}
2680
2681__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2682vreinterpret_p8_u16 (uint16x4_t __a)
2683{
2684 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2685}
2686
2687__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2688vreinterpret_p8_u32 (uint32x2_t __a)
2689{
2690 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2691}
2692
2693__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2694vreinterpret_p8_u64 (uint64x1_t __a)
2695{
2696 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2697}
2698
2699__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2700vreinterpret_p8_p16 (poly16x4_t __a)
2701{
2702 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2703}
2704
2705__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2706vreinterpretq_p8_s8 (int8x16_t __a)
2707{
2708 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2709}
2710
2711__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2712vreinterpretq_p8_s16 (int16x8_t __a)
2713{
2714 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2715}
2716
2717__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2718vreinterpretq_p8_s32 (int32x4_t __a)
2719{
2720 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2721}
2722
2723__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2724vreinterpretq_p8_s64 (int64x2_t __a)
2725{
2726 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2727}
2728
2729__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2730vreinterpretq_p8_f32 (float32x4_t __a)
2731{
2732 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2733}
2734
2735__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2736vreinterpretq_p8_u8 (uint8x16_t __a)
2737{
2738 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2739 __a);
2740}
2741
2742__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2743vreinterpretq_p8_u16 (uint16x8_t __a)
2744{
2745 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2746 __a);
2747}
2748
2749__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2750vreinterpretq_p8_u32 (uint32x4_t __a)
2751{
2752 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2753 __a);
2754}
2755
2756__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2757vreinterpretq_p8_u64 (uint64x2_t __a)
2758{
2759 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2760 __a);
2761}
2762
2763__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2764vreinterpretq_p8_p16 (poly16x8_t __a)
2765{
2766 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2767 __a);
2768}
2769
2770__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2771vreinterpret_p16_s8 (int8x8_t __a)
2772{
2773 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2774}
2775
2776__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2777vreinterpret_p16_s16 (int16x4_t __a)
2778{
2779 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2780}
2781
2782__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2783vreinterpret_p16_s32 (int32x2_t __a)
2784{
2785 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2786}
2787
2788__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2789vreinterpret_p16_s64 (int64x1_t __a)
2790{
2791 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2792}
2793
2794__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2795vreinterpret_p16_f32 (float32x2_t __a)
2796{
2797 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2798}
2799
2800__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2801vreinterpret_p16_u8 (uint8x8_t __a)
2802{
2803 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2804}
2805
2806__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2807vreinterpret_p16_u16 (uint16x4_t __a)
2808{
2809 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2810}
2811
2812__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2813vreinterpret_p16_u32 (uint32x2_t __a)
2814{
2815 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2816}
2817
2818__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2819vreinterpret_p16_u64 (uint64x1_t __a)
2820{
2821 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2822}
2823
2824__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2825vreinterpret_p16_p8 (poly8x8_t __a)
2826{
2827 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2828}
2829
2830__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2831vreinterpretq_p16_s8 (int8x16_t __a)
2832{
2833 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2834}
2835
2836__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2837vreinterpretq_p16_s16 (int16x8_t __a)
2838{
2839 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2840}
2841
2842__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2843vreinterpretq_p16_s32 (int32x4_t __a)
2844{
2845 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2846}
2847
2848__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2849vreinterpretq_p16_s64 (int64x2_t __a)
2850{
2851 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2852}
2853
2854__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2855vreinterpretq_p16_f32 (float32x4_t __a)
2856{
2857 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2858}
2859
2860__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2861vreinterpretq_p16_u8 (uint8x16_t __a)
2862{
2863 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2864 __a);
2865}
2866
2867__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2868vreinterpretq_p16_u16 (uint16x8_t __a)
2869{
2870 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2871}
2872
2873__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2874vreinterpretq_p16_u32 (uint32x4_t __a)
2875{
2876 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2877}
2878
2879__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2880vreinterpretq_p16_u64 (uint64x2_t __a)
2881{
2882 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2883}
2884
2885__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2886vreinterpretq_p16_p8 (poly8x16_t __a)
2887{
2888 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2889 __a);
2890}
2891
2892__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2893vreinterpret_f32_s8 (int8x8_t __a)
2894{
2895 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2896}
2897
2898__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2899vreinterpret_f32_s16 (int16x4_t __a)
2900{
2901 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2902}
2903
2904__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2905vreinterpret_f32_s32 (int32x2_t __a)
2906{
2907 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2908}
2909
2910__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2911vreinterpret_f32_s64 (int64x1_t __a)
2912{
2913 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2914}
2915
2916__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2917vreinterpret_f32_u8 (uint8x8_t __a)
2918{
2919 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2920}
2921
2922__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2923vreinterpret_f32_u16 (uint16x4_t __a)
2924{
2925 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2926 __a);
2927}
2928
2929__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2930vreinterpret_f32_u32 (uint32x2_t __a)
2931{
2932 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2933 __a);
2934}
2935
2936__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2937vreinterpret_f32_u64 (uint64x1_t __a)
2938{
2939 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2940}
2941
2942__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2943vreinterpret_f32_p8 (poly8x8_t __a)
2944{
2945 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2946}
2947
2948__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2949vreinterpret_f32_p16 (poly16x4_t __a)
2950{
2951 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2952 __a);
2953}
2954
2955__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2956vreinterpretq_f32_s8 (int8x16_t __a)
2957{
2958 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2959}
2960
2961__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2962vreinterpretq_f32_s16 (int16x8_t __a)
2963{
2964 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2965}
2966
2967__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2968vreinterpretq_f32_s32 (int32x4_t __a)
2969{
2970 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2971}
2972
2973__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2974vreinterpretq_f32_s64 (int64x2_t __a)
2975{
2976 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2977}
2978
2979__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2980vreinterpretq_f32_u8 (uint8x16_t __a)
2981{
2982 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2983 __a);
2984}
2985
2986__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2987vreinterpretq_f32_u16 (uint16x8_t __a)
2988{
2989 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2990 __a);
2991}
2992
2993__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2994vreinterpretq_f32_u32 (uint32x4_t __a)
2995{
2996 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2997 __a);
2998}
2999
3000__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3001vreinterpretq_f32_u64 (uint64x2_t __a)
3002{
3003 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
3004 __a);
3005}
3006
3007__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3008vreinterpretq_f32_p8 (poly8x16_t __a)
3009{
3010 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
3011 __a);
3012}
3013
3014__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3015vreinterpretq_f32_p16 (poly16x8_t __a)
3016{
3017 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
3018 __a);
3019}
3020
3021__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3022vreinterpret_s64_s8 (int8x8_t __a)
3023{
3024 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3025}
3026
3027__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3028vreinterpret_s64_s16 (int16x4_t __a)
3029{
3030 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3031}
3032
3033__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3034vreinterpret_s64_s32 (int32x2_t __a)
3035{
3036 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3037}
3038
3039__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3040vreinterpret_s64_f32 (float32x2_t __a)
3041{
3042 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3043}
3044
3045__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3046vreinterpret_s64_u8 (uint8x8_t __a)
3047{
3048 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3049}
3050
3051__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3052vreinterpret_s64_u16 (uint16x4_t __a)
3053{
3054 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3055}
3056
3057__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3058vreinterpret_s64_u32 (uint32x2_t __a)
3059{
3060 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3061}
3062
3063__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3064vreinterpret_s64_u64 (uint64x1_t __a)
3065{
3066 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
3067}
3068
3069__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3070vreinterpret_s64_p8 (poly8x8_t __a)
3071{
3072 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3073}
3074
3075__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3076vreinterpret_s64_p16 (poly16x4_t __a)
3077{
3078 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3079}
3080
3081__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3082vreinterpretq_s64_s8 (int8x16_t __a)
3083{
3084 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3085}
3086
3087__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3088vreinterpretq_s64_s16 (int16x8_t __a)
3089{
3090 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3091}
3092
3093__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3094vreinterpretq_s64_s32 (int32x4_t __a)
3095{
3096 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3097}
3098
3099__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3100vreinterpretq_s64_f32 (float32x4_t __a)
3101{
3102 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3103}
3104
3105__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3106vreinterpretq_s64_u8 (uint8x16_t __a)
3107{
3108 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3109}
3110
3111__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3112vreinterpretq_s64_u16 (uint16x8_t __a)
3113{
3114 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3115}
3116
3117__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3118vreinterpretq_s64_u32 (uint32x4_t __a)
3119{
3120 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3121}
3122
3123__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3124vreinterpretq_s64_u64 (uint64x2_t __a)
3125{
3126 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
3127}
3128
3129__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3130vreinterpretq_s64_p8 (poly8x16_t __a)
3131{
3132 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3133}
3134
3135__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3136vreinterpretq_s64_p16 (poly16x8_t __a)
3137{
3138 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3139}
3140
3141__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3142vreinterpret_u64_s8 (int8x8_t __a)
3143{
3144 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3145}
3146
3147__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3148vreinterpret_u64_s16 (int16x4_t __a)
3149{
3150 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3151}
3152
3153__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3154vreinterpret_u64_s32 (int32x2_t __a)
3155{
3156 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3157}
3158
3159__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3160vreinterpret_u64_s64 (int64x1_t __a)
3161{
3162 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
3163}
3164
3165__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3166vreinterpret_u64_f32 (float32x2_t __a)
3167{
3168 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3169}
3170
3171__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3172vreinterpret_u64_u8 (uint8x8_t __a)
3173{
3174 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3175}
3176
3177__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3178vreinterpret_u64_u16 (uint16x4_t __a)
3179{
3180 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3181}
3182
3183__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3184vreinterpret_u64_u32 (uint32x2_t __a)
3185{
3186 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3187}
3188
3189__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3190vreinterpret_u64_p8 (poly8x8_t __a)
3191{
3192 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3193}
3194
3195__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3196vreinterpret_u64_p16 (poly16x4_t __a)
3197{
3198 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3199}
3200
3201__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3202vreinterpretq_u64_s8 (int8x16_t __a)
3203{
3204 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3205}
3206
3207__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3208vreinterpretq_u64_s16 (int16x8_t __a)
3209{
3210 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3211}
3212
3213__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3214vreinterpretq_u64_s32 (int32x4_t __a)
3215{
3216 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3217}
3218
3219__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3220vreinterpretq_u64_s64 (int64x2_t __a)
3221{
3222 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3223}
3224
3225__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3226vreinterpretq_u64_f32 (float32x4_t __a)
3227{
3228 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3229}
3230
3231__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3232vreinterpretq_u64_u8 (uint8x16_t __a)
3233{
3234 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3235 __a);
3236}
3237
3238__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3239vreinterpretq_u64_u16 (uint16x8_t __a)
3240{
3241 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3242}
3243
3244__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3245vreinterpretq_u64_u32 (uint32x4_t __a)
3246{
3247 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3248}
3249
3250__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3251vreinterpretq_u64_p8 (poly8x16_t __a)
3252{
3253 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3254 __a);
3255}
3256
3257__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3258vreinterpretq_u64_p16 (poly16x8_t __a)
3259{
3260 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3261}
3262
3263__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3264vreinterpret_s8_s16 (int16x4_t __a)
3265{
3266 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3267}
3268
3269__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3270vreinterpret_s8_s32 (int32x2_t __a)
3271{
3272 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3273}
3274
3275__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3276vreinterpret_s8_s64 (int64x1_t __a)
3277{
3278 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3279}
3280
3281__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3282vreinterpret_s8_f32 (float32x2_t __a)
3283{
3284 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3285}
3286
3287__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3288vreinterpret_s8_u8 (uint8x8_t __a)
3289{
3290 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3291}
3292
3293__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3294vreinterpret_s8_u16 (uint16x4_t __a)
3295{
3296 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3297}
3298
3299__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3300vreinterpret_s8_u32 (uint32x2_t __a)
3301{
3302 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3303}
3304
3305__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3306vreinterpret_s8_u64 (uint64x1_t __a)
3307{
3308 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3309}
3310
3311__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3312vreinterpret_s8_p8 (poly8x8_t __a)
3313{
3314 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3315}
3316
3317__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3318vreinterpret_s8_p16 (poly16x4_t __a)
3319{
3320 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3321}
3322
3323__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3324vreinterpretq_s8_s16 (int16x8_t __a)
3325{
3326 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3327}
3328
3329__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3330vreinterpretq_s8_s32 (int32x4_t __a)
3331{
3332 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3333}
3334
3335__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3336vreinterpretq_s8_s64 (int64x2_t __a)
3337{
3338 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3339}
3340
3341__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3342vreinterpretq_s8_f32 (float32x4_t __a)
3343{
3344 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3345}
3346
3347__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3348vreinterpretq_s8_u8 (uint8x16_t __a)
3349{
3350 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3351 __a);
3352}
3353
3354__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3355vreinterpretq_s8_u16 (uint16x8_t __a)
3356{
3357 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3358}
3359
3360__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3361vreinterpretq_s8_u32 (uint32x4_t __a)
3362{
3363 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3364}
3365
3366__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3367vreinterpretq_s8_u64 (uint64x2_t __a)
3368{
3369 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3370}
3371
3372__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3373vreinterpretq_s8_p8 (poly8x16_t __a)
3374{
3375 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3376 __a);
3377}
3378
3379__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3380vreinterpretq_s8_p16 (poly16x8_t __a)
3381{
3382 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3383}
3384
3385__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3386vreinterpret_s16_s8 (int8x8_t __a)
3387{
3388 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3389}
3390
3391__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3392vreinterpret_s16_s32 (int32x2_t __a)
3393{
3394 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3395}
3396
3397__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3398vreinterpret_s16_s64 (int64x1_t __a)
3399{
3400 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3401}
3402
3403__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3404vreinterpret_s16_f32 (float32x2_t __a)
3405{
3406 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3407}
3408
3409__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3410vreinterpret_s16_u8 (uint8x8_t __a)
3411{
3412 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3413}
3414
3415__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3416vreinterpret_s16_u16 (uint16x4_t __a)
3417{
3418 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3419}
3420
3421__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3422vreinterpret_s16_u32 (uint32x2_t __a)
3423{
3424 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3425}
3426
3427__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3428vreinterpret_s16_u64 (uint64x1_t __a)
3429{
3430 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3431}
3432
3433__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3434vreinterpret_s16_p8 (poly8x8_t __a)
3435{
3436 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3437}
3438
3439__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3440vreinterpret_s16_p16 (poly16x4_t __a)
3441{
3442 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3443}
3444
3445__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3446vreinterpretq_s16_s8 (int8x16_t __a)
3447{
3448 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3449}
3450
3451__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3452vreinterpretq_s16_s32 (int32x4_t __a)
3453{
3454 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3455}
3456
3457__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3458vreinterpretq_s16_s64 (int64x2_t __a)
3459{
3460 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3461}
3462
3463__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3464vreinterpretq_s16_f32 (float32x4_t __a)
3465{
3466 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3467}
3468
3469__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3470vreinterpretq_s16_u8 (uint8x16_t __a)
3471{
3472 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3473}
3474
3475__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3476vreinterpretq_s16_u16 (uint16x8_t __a)
3477{
3478 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3479}
3480
3481__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3482vreinterpretq_s16_u32 (uint32x4_t __a)
3483{
3484 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3485}
3486
3487__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3488vreinterpretq_s16_u64 (uint64x2_t __a)
3489{
3490 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3491}
3492
3493__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3494vreinterpretq_s16_p8 (poly8x16_t __a)
3495{
3496 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3497}
3498
3499__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3500vreinterpretq_s16_p16 (poly16x8_t __a)
3501{
3502 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3503}
3504
3505__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3506vreinterpret_s32_s8 (int8x8_t __a)
3507{
3508 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3509}
3510
3511__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3512vreinterpret_s32_s16 (int16x4_t __a)
3513{
3514 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3515}
3516
3517__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3518vreinterpret_s32_s64 (int64x1_t __a)
3519{
3520 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3521}
3522
3523__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3524vreinterpret_s32_f32 (float32x2_t __a)
3525{
3526 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3527}
3528
3529__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3530vreinterpret_s32_u8 (uint8x8_t __a)
3531{
3532 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3533}
3534
3535__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3536vreinterpret_s32_u16 (uint16x4_t __a)
3537{
3538 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3539}
3540
3541__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3542vreinterpret_s32_u32 (uint32x2_t __a)
3543{
3544 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3545}
3546
3547__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3548vreinterpret_s32_u64 (uint64x1_t __a)
3549{
3550 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3551}
3552
3553__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3554vreinterpret_s32_p8 (poly8x8_t __a)
3555{
3556 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3557}
3558
3559__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3560vreinterpret_s32_p16 (poly16x4_t __a)
3561{
3562 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3563}
3564
3565__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3566vreinterpretq_s32_s8 (int8x16_t __a)
3567{
3568 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3569}
3570
3571__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3572vreinterpretq_s32_s16 (int16x8_t __a)
3573{
3574 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3575}
3576
3577__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3578vreinterpretq_s32_s64 (int64x2_t __a)
3579{
3580 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3581}
3582
3583__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3584vreinterpretq_s32_f32 (float32x4_t __a)
3585{
3586 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3587}
3588
3589__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3590vreinterpretq_s32_u8 (uint8x16_t __a)
3591{
3592 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3593}
3594
3595__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3596vreinterpretq_s32_u16 (uint16x8_t __a)
3597{
3598 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3599}
3600
3601__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3602vreinterpretq_s32_u32 (uint32x4_t __a)
3603{
3604 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3605}
3606
3607__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3608vreinterpretq_s32_u64 (uint64x2_t __a)
3609{
3610 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3611}
3612
3613__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3614vreinterpretq_s32_p8 (poly8x16_t __a)
3615{
3616 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3617}
3618
3619__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3620vreinterpretq_s32_p16 (poly16x8_t __a)
3621{
3622 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3623}
3624
3625__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3626vreinterpret_u8_s8 (int8x8_t __a)
3627{
3628 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3629}
3630
3631__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3632vreinterpret_u8_s16 (int16x4_t __a)
3633{
3634 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3635}
3636
3637__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3638vreinterpret_u8_s32 (int32x2_t __a)
3639{
3640 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3641}
3642
3643__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3644vreinterpret_u8_s64 (int64x1_t __a)
3645{
3646 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3647}
3648
3649__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3650vreinterpret_u8_f32 (float32x2_t __a)
3651{
3652 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3653}
3654
3655__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3656vreinterpret_u8_u16 (uint16x4_t __a)
3657{
3658 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3659}
3660
3661__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3662vreinterpret_u8_u32 (uint32x2_t __a)
3663{
3664 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3665}
3666
3667__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3668vreinterpret_u8_u64 (uint64x1_t __a)
3669{
3670 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3671}
3672
3673__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3674vreinterpret_u8_p8 (poly8x8_t __a)
3675{
3676 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3677}
3678
3679__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3680vreinterpret_u8_p16 (poly16x4_t __a)
3681{
3682 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3683}
3684
3685__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3686vreinterpretq_u8_s8 (int8x16_t __a)
3687{
3688 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3689}
3690
3691__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3692vreinterpretq_u8_s16 (int16x8_t __a)
3693{
3694 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3695}
3696
3697__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3698vreinterpretq_u8_s32 (int32x4_t __a)
3699{
3700 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3701}
3702
3703__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3704vreinterpretq_u8_s64 (int64x2_t __a)
3705{
3706 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3707}
3708
3709__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3710vreinterpretq_u8_f32 (float32x4_t __a)
3711{
3712 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3713}
3714
3715__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3716vreinterpretq_u8_u16 (uint16x8_t __a)
3717{
3718 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3719 __a);
3720}
3721
3722__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3723vreinterpretq_u8_u32 (uint32x4_t __a)
3724{
3725 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3726 __a);
3727}
3728
3729__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3730vreinterpretq_u8_u64 (uint64x2_t __a)
3731{
3732 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3733 __a);
3734}
3735
3736__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3737vreinterpretq_u8_p8 (poly8x16_t __a)
3738{
3739 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3740 __a);
3741}
3742
3743__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3744vreinterpretq_u8_p16 (poly16x8_t __a)
3745{
3746 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3747 __a);
3748}
3749
3750__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3751vreinterpret_u16_s8 (int8x8_t __a)
3752{
3753 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3754}
3755
3756__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3757vreinterpret_u16_s16 (int16x4_t __a)
3758{
3759 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3760}
3761
3762__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3763vreinterpret_u16_s32 (int32x2_t __a)
3764{
3765 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3766}
3767
3768__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3769vreinterpret_u16_s64 (int64x1_t __a)
3770{
3771 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3772}
3773
3774__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3775vreinterpret_u16_f32 (float32x2_t __a)
3776{
3777 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3778}
3779
3780__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3781vreinterpret_u16_u8 (uint8x8_t __a)
3782{
3783 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3784}
3785
3786__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3787vreinterpret_u16_u32 (uint32x2_t __a)
3788{
3789 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3790}
3791
3792__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3793vreinterpret_u16_u64 (uint64x1_t __a)
3794{
3795 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3796}
3797
3798__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3799vreinterpret_u16_p8 (poly8x8_t __a)
3800{
3801 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3802}
3803
3804__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3805vreinterpret_u16_p16 (poly16x4_t __a)
3806{
3807 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3808}
3809
3810__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3811vreinterpretq_u16_s8 (int8x16_t __a)
3812{
3813 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3814}
3815
3816__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3817vreinterpretq_u16_s16 (int16x8_t __a)
3818{
3819 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3820}
3821
3822__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3823vreinterpretq_u16_s32 (int32x4_t __a)
3824{
3825 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3826}
3827
3828__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3829vreinterpretq_u16_s64 (int64x2_t __a)
3830{
3831 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3832}
3833
3834__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3835vreinterpretq_u16_f32 (float32x4_t __a)
3836{
3837 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3838}
3839
3840__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3841vreinterpretq_u16_u8 (uint8x16_t __a)
3842{
3843 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3844 __a);
3845}
3846
3847__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3848vreinterpretq_u16_u32 (uint32x4_t __a)
3849{
3850 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3851}
3852
3853__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3854vreinterpretq_u16_u64 (uint64x2_t __a)
3855{
3856 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3857}
3858
3859__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3860vreinterpretq_u16_p8 (poly8x16_t __a)
3861{
3862 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3863 __a);
3864}
3865
3866__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3867vreinterpretq_u16_p16 (poly16x8_t __a)
3868{
3869 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3870}
3871
3872__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3873vreinterpret_u32_s8 (int8x8_t __a)
3874{
3875 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3876}
3877
3878__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3879vreinterpret_u32_s16 (int16x4_t __a)
3880{
3881 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3882}
3883
3884__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3885vreinterpret_u32_s32 (int32x2_t __a)
3886{
3887 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3888}
3889
3890__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3891vreinterpret_u32_s64 (int64x1_t __a)
3892{
3893 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3894}
3895
3896__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3897vreinterpret_u32_f32 (float32x2_t __a)
3898{
3899 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3900}
3901
3902__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3903vreinterpret_u32_u8 (uint8x8_t __a)
3904{
3905 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3906}
3907
3908__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3909vreinterpret_u32_u16 (uint16x4_t __a)
3910{
3911 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3912}
3913
3914__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3915vreinterpret_u32_u64 (uint64x1_t __a)
3916{
3917 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3918}
3919
3920__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3921vreinterpret_u32_p8 (poly8x8_t __a)
3922{
3923 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3924}
3925
3926__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3927vreinterpret_u32_p16 (poly16x4_t __a)
3928{
3929 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3930}
3931
3932__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3933vreinterpretq_u32_s8 (int8x16_t __a)
3934{
3935 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3936}
3937
3938__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3939vreinterpretq_u32_s16 (int16x8_t __a)
3940{
3941 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3942}
3943
3944__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3945vreinterpretq_u32_s32 (int32x4_t __a)
3946{
3947 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3948}
3949
3950__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3951vreinterpretq_u32_s64 (int64x2_t __a)
3952{
3953 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3954}
3955
3956__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3957vreinterpretq_u32_f32 (float32x4_t __a)
3958{
3959 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3960}
3961
3962__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3963vreinterpretq_u32_u8 (uint8x16_t __a)
3964{
3965 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3966 __a);
3967}
3968
3969__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3970vreinterpretq_u32_u16 (uint16x8_t __a)
3971{
3972 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3973}
3974
3975__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3976vreinterpretq_u32_u64 (uint64x2_t __a)
3977{
3978 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3979}
3980
3981__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3982vreinterpretq_u32_p8 (poly8x16_t __a)
3983{
3984 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3985 __a);
3986}
3987
3988__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3989vreinterpretq_u32_p16 (poly16x8_t __a)
3990{
3991 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3992}
3993
3994#define __GET_LOW(__TYPE) \
3995 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
3996 uint64_t lo = vgetq_lane_u64 (tmp, 0); \
3997 return vreinterpret_##__TYPE##_u64 (lo);
3998
3999__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4000vget_low_f32 (float32x4_t __a)
4001{
4002 __GET_LOW (f32);
4003}
4004
4005__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4006vget_low_f64 (float64x2_t __a)
4007{
4008 return vgetq_lane_f64 (__a, 0);
4009}
4010
4011__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4012vget_low_p8 (poly8x16_t __a)
4013{
4014 __GET_LOW (p8);
4015}
4016
4017__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4018vget_low_p16 (poly16x8_t __a)
4019{
4020 __GET_LOW (p16);
4021}
4022
4023__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4024vget_low_s8 (int8x16_t __a)
4025{
4026 __GET_LOW (s8);
4027}
4028
4029__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4030vget_low_s16 (int16x8_t __a)
4031{
4032 __GET_LOW (s16);
4033}
4034
4035__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4036vget_low_s32 (int32x4_t __a)
4037{
4038 __GET_LOW (s32);
4039}
4040
4041__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4042vget_low_s64 (int64x2_t __a)
4043{
4044 return vgetq_lane_s64 (__a, 0);
4045}
4046
4047__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4048vget_low_u8 (uint8x16_t __a)
4049{
4050 __GET_LOW (u8);
4051}
4052
4053__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4054vget_low_u16 (uint16x8_t __a)
4055{
4056 __GET_LOW (u16);
4057}
4058
4059__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4060vget_low_u32 (uint32x4_t __a)
4061{
4062 __GET_LOW (u32);
4063}
4064
4065__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4066vget_low_u64 (uint64x2_t __a)
4067{
4068 return vgetq_lane_u64 (__a, 0);
4069}
4070
4071#undef __GET_LOW
4072
4073__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4074vcombine_s8 (int8x8_t __a, int8x8_t __b)
4075{
4076 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4077}
4078
4079__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4080vcombine_s16 (int16x4_t __a, int16x4_t __b)
4081{
4082 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4083}
4084
4085__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4086vcombine_s32 (int32x2_t __a, int32x2_t __b)
4087{
4088 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4089}
4090
4091__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4092vcombine_s64 (int64x1_t __a, int64x1_t __b)
4093{
4094 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
4095}
4096
4097__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4098vcombine_f32 (float32x2_t __a, float32x2_t __b)
4099{
4100 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4101}
4102
4103__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4104vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4105{
4106 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4107 (int8x8_t) __b);
4108}
4109
4110__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4111vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4112{
4113 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4114 (int16x4_t) __b);
4115}
4116
4117__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4118vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4119{
4120 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4121 (int32x2_t) __b);
4122}
4123
4124__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4125vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4126{
4127 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
4128 (int64x1_t) __b);
4129}
4130
4131__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4132vcombine_f64 (float64x1_t __a, float64x1_t __b)
4133{
4134 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
4135}
4136
4137__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4138vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4139{
4140 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4141 (int8x8_t) __b);
4142}
4143
4144__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4145vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4146{
4147 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4148 (int16x4_t) __b);
4149}
4150
4151/* Start of temporary inline asm implementations. */
4152
4153__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4154vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4155{
4156 int8x8_t result;
4157 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4158 : "=w"(result)
4159 : "0"(a), "w"(b), "w"(c)
4160 : /* No clobbers */);
4161 return result;
4162}
4163
4164__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4165vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4166{
4167 int16x4_t result;
4168 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4169 : "=w"(result)
4170 : "0"(a), "w"(b), "w"(c)
4171 : /* No clobbers */);
4172 return result;
4173}
4174
4175__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4176vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4177{
4178 int32x2_t result;
4179 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4180 : "=w"(result)
4181 : "0"(a), "w"(b), "w"(c)
4182 : /* No clobbers */);
4183 return result;
4184}
4185
4186__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4187vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4188{
4189 uint8x8_t result;
4190 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4191 : "=w"(result)
4192 : "0"(a), "w"(b), "w"(c)
4193 : /* No clobbers */);
4194 return result;
4195}
4196
4197__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4198vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4199{
4200 uint16x4_t result;
4201 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4202 : "=w"(result)
4203 : "0"(a), "w"(b), "w"(c)
4204 : /* No clobbers */);
4205 return result;
4206}
4207
4208__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4209vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4210{
4211 uint32x2_t result;
4212 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4213 : "=w"(result)
4214 : "0"(a), "w"(b), "w"(c)
4215 : /* No clobbers */);
4216 return result;
4217}
4218
4219__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4220vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4221{
4222 int16x8_t result;
4223 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4224 : "=w"(result)
4225 : "0"(a), "w"(b), "w"(c)
4226 : /* No clobbers */);
4227 return result;
4228}
4229
4230__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4231vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4232{
4233 int32x4_t result;
4234 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4235 : "=w"(result)
4236 : "0"(a), "w"(b), "w"(c)
4237 : /* No clobbers */);
4238 return result;
4239}
4240
4241__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4242vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4243{
4244 int64x2_t result;
4245 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4246 : "=w"(result)
4247 : "0"(a), "w"(b), "w"(c)
4248 : /* No clobbers */);
4249 return result;
4250}
4251
4252__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4253vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4254{
4255 uint16x8_t result;
4256 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4257 : "=w"(result)
4258 : "0"(a), "w"(b), "w"(c)
4259 : /* No clobbers */);
4260 return result;
4261}
4262
4263__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4264vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4265{
4266 uint32x4_t result;
4267 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4268 : "=w"(result)
4269 : "0"(a), "w"(b), "w"(c)
4270 : /* No clobbers */);
4271 return result;
4272}
4273
4274__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4275vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4276{
4277 uint64x2_t result;
4278 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4279 : "=w"(result)
4280 : "0"(a), "w"(b), "w"(c)
4281 : /* No clobbers */);
4282 return result;
4283}
4284
4285__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4286vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4287{
4288 int16x8_t result;
4289 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4290 : "=w"(result)
4291 : "0"(a), "w"(b), "w"(c)
4292 : /* No clobbers */);
4293 return result;
4294}
4295
4296__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4297vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4298{
4299 int32x4_t result;
4300 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4301 : "=w"(result)
4302 : "0"(a), "w"(b), "w"(c)
4303 : /* No clobbers */);
4304 return result;
4305}
4306
4307__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4308vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4309{
4310 int64x2_t result;
4311 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4312 : "=w"(result)
4313 : "0"(a), "w"(b), "w"(c)
4314 : /* No clobbers */);
4315 return result;
4316}
4317
4318__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4319vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4320{
4321 uint16x8_t result;
4322 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4323 : "=w"(result)
4324 : "0"(a), "w"(b), "w"(c)
4325 : /* No clobbers */);
4326 return result;
4327}
4328
4329__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4330vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4331{
4332 uint32x4_t result;
4333 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4334 : "=w"(result)
4335 : "0"(a), "w"(b), "w"(c)
4336 : /* No clobbers */);
4337 return result;
4338}
4339
4340__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4341vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4342{
4343 uint64x2_t result;
4344 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4345 : "=w"(result)
4346 : "0"(a), "w"(b), "w"(c)
4347 : /* No clobbers */);
4348 return result;
4349}
4350
4351__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4352vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4353{
4354 int8x16_t result;
4355 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4356 : "=w"(result)
4357 : "0"(a), "w"(b), "w"(c)
4358 : /* No clobbers */);
4359 return result;
4360}
4361
4362__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4363vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4364{
4365 int16x8_t result;
4366 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4367 : "=w"(result)
4368 : "0"(a), "w"(b), "w"(c)
4369 : /* No clobbers */);
4370 return result;
4371}
4372
4373__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4374vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4375{
4376 int32x4_t result;
4377 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4378 : "=w"(result)
4379 : "0"(a), "w"(b), "w"(c)
4380 : /* No clobbers */);
4381 return result;
4382}
4383
4384__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4385vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4386{
4387 uint8x16_t result;
4388 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4389 : "=w"(result)
4390 : "0"(a), "w"(b), "w"(c)
4391 : /* No clobbers */);
4392 return result;
4393}
4394
4395__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4396vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4397{
4398 uint16x8_t result;
4399 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4400 : "=w"(result)
4401 : "0"(a), "w"(b), "w"(c)
4402 : /* No clobbers */);
4403 return result;
4404}
4405
4406__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4407vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4408{
4409 uint32x4_t result;
4410 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4411 : "=w"(result)
4412 : "0"(a), "w"(b), "w"(c)
4413 : /* No clobbers */);
4414 return result;
4415}
4416
4417__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4418vabd_f32 (float32x2_t a, float32x2_t b)
4419{
4420 float32x2_t result;
4421 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4422 : "=w"(result)
4423 : "w"(a), "w"(b)
4424 : /* No clobbers */);
4425 return result;
4426}
4427
4428__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4429vabd_s8 (int8x8_t a, int8x8_t b)
4430{
4431 int8x8_t result;
4432 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4433 : "=w"(result)
4434 : "w"(a), "w"(b)
4435 : /* No clobbers */);
4436 return result;
4437}
4438
4439__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4440vabd_s16 (int16x4_t a, int16x4_t b)
4441{
4442 int16x4_t result;
4443 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4444 : "=w"(result)
4445 : "w"(a), "w"(b)
4446 : /* No clobbers */);
4447 return result;
4448}
4449
4450__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4451vabd_s32 (int32x2_t a, int32x2_t b)
4452{
4453 int32x2_t result;
4454 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4455 : "=w"(result)
4456 : "w"(a), "w"(b)
4457 : /* No clobbers */);
4458 return result;
4459}
4460
4461__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4462vabd_u8 (uint8x8_t a, uint8x8_t b)
4463{
4464 uint8x8_t result;
4465 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4466 : "=w"(result)
4467 : "w"(a), "w"(b)
4468 : /* No clobbers */);
4469 return result;
4470}
4471
4472__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4473vabd_u16 (uint16x4_t a, uint16x4_t b)
4474{
4475 uint16x4_t result;
4476 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4477 : "=w"(result)
4478 : "w"(a), "w"(b)
4479 : /* No clobbers */);
4480 return result;
4481}
4482
4483__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4484vabd_u32 (uint32x2_t a, uint32x2_t b)
4485{
4486 uint32x2_t result;
4487 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4488 : "=w"(result)
4489 : "w"(a), "w"(b)
4490 : /* No clobbers */);
4491 return result;
4492}
4493
4494__extension__ static __inline float64_t __attribute__ ((__always_inline__))
4495vabdd_f64 (float64_t a, float64_t b)
4496{
4497 float64_t result;
4498 __asm__ ("fabd %d0, %d1, %d2"
4499 : "=w"(result)
4500 : "w"(a), "w"(b)
4501 : /* No clobbers */);
4502 return result;
4503}
4504
4505__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4506vabdl_high_s8 (int8x16_t a, int8x16_t b)
4507{
4508 int16x8_t result;
4509 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4510 : "=w"(result)
4511 : "w"(a), "w"(b)
4512 : /* No clobbers */);
4513 return result;
4514}
4515
4516__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4517vabdl_high_s16 (int16x8_t a, int16x8_t b)
4518{
4519 int32x4_t result;
4520 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4521 : "=w"(result)
4522 : "w"(a), "w"(b)
4523 : /* No clobbers */);
4524 return result;
4525}
4526
4527__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4528vabdl_high_s32 (int32x4_t a, int32x4_t b)
4529{
4530 int64x2_t result;
4531 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4532 : "=w"(result)
4533 : "w"(a), "w"(b)
4534 : /* No clobbers */);
4535 return result;
4536}
4537
4538__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4539vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4540{
4541 uint16x8_t result;
4542 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4543 : "=w"(result)
4544 : "w"(a), "w"(b)
4545 : /* No clobbers */);
4546 return result;
4547}
4548
4549__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4550vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4551{
4552 uint32x4_t result;
4553 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4554 : "=w"(result)
4555 : "w"(a), "w"(b)
4556 : /* No clobbers */);
4557 return result;
4558}
4559
4560__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4561vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4562{
4563 uint64x2_t result;
4564 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4565 : "=w"(result)
4566 : "w"(a), "w"(b)
4567 : /* No clobbers */);
4568 return result;
4569}
4570
4571__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4572vabdl_s8 (int8x8_t a, int8x8_t b)
4573{
4574 int16x8_t result;
4575 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4576 : "=w"(result)
4577 : "w"(a), "w"(b)
4578 : /* No clobbers */);
4579 return result;
4580}
4581
4582__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4583vabdl_s16 (int16x4_t a, int16x4_t b)
4584{
4585 int32x4_t result;
4586 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4587 : "=w"(result)
4588 : "w"(a), "w"(b)
4589 : /* No clobbers */);
4590 return result;
4591}
4592
4593__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4594vabdl_s32 (int32x2_t a, int32x2_t b)
4595{
4596 int64x2_t result;
4597 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4598 : "=w"(result)
4599 : "w"(a), "w"(b)
4600 : /* No clobbers */);
4601 return result;
4602}
4603
4604__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4605vabdl_u8 (uint8x8_t a, uint8x8_t b)
4606{
4607 uint16x8_t result;
4608 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4609 : "=w"(result)
4610 : "w"(a), "w"(b)
4611 : /* No clobbers */);
4612 return result;
4613}
4614
4615__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4616vabdl_u16 (uint16x4_t a, uint16x4_t b)
4617{
4618 uint32x4_t result;
4619 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4620 : "=w"(result)
4621 : "w"(a), "w"(b)
4622 : /* No clobbers */);
4623 return result;
4624}
4625
4626__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4627vabdl_u32 (uint32x2_t a, uint32x2_t b)
4628{
4629 uint64x2_t result;
4630 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4631 : "=w"(result)
4632 : "w"(a), "w"(b)
4633 : /* No clobbers */);
4634 return result;
4635}
4636
4637__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4638vabdq_f32 (float32x4_t a, float32x4_t b)
4639{
4640 float32x4_t result;
4641 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4642 : "=w"(result)
4643 : "w"(a), "w"(b)
4644 : /* No clobbers */);
4645 return result;
4646}
4647
4648__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4649vabdq_f64 (float64x2_t a, float64x2_t b)
4650{
4651 float64x2_t result;
4652 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4653 : "=w"(result)
4654 : "w"(a), "w"(b)
4655 : /* No clobbers */);
4656 return result;
4657}
4658
4659__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4660vabdq_s8 (int8x16_t a, int8x16_t b)
4661{
4662 int8x16_t result;
4663 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4664 : "=w"(result)
4665 : "w"(a), "w"(b)
4666 : /* No clobbers */);
4667 return result;
4668}
4669
4670__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4671vabdq_s16 (int16x8_t a, int16x8_t b)
4672{
4673 int16x8_t result;
4674 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4675 : "=w"(result)
4676 : "w"(a), "w"(b)
4677 : /* No clobbers */);
4678 return result;
4679}
4680
4681__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4682vabdq_s32 (int32x4_t a, int32x4_t b)
4683{
4684 int32x4_t result;
4685 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4686 : "=w"(result)
4687 : "w"(a), "w"(b)
4688 : /* No clobbers */);
4689 return result;
4690}
4691
4692__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4693vabdq_u8 (uint8x16_t a, uint8x16_t b)
4694{
4695 uint8x16_t result;
4696 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4697 : "=w"(result)
4698 : "w"(a), "w"(b)
4699 : /* No clobbers */);
4700 return result;
4701}
4702
4703__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4704vabdq_u16 (uint16x8_t a, uint16x8_t b)
4705{
4706 uint16x8_t result;
4707 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4708 : "=w"(result)
4709 : "w"(a), "w"(b)
4710 : /* No clobbers */);
4711 return result;
4712}
4713
4714__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4715vabdq_u32 (uint32x4_t a, uint32x4_t b)
4716{
4717 uint32x4_t result;
4718 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4719 : "=w"(result)
4720 : "w"(a), "w"(b)
4721 : /* No clobbers */);
4722 return result;
4723}
4724
4725__extension__ static __inline float32_t __attribute__ ((__always_inline__))
4726vabds_f32 (float32_t a, float32_t b)
4727{
4728 float32_t result;
4729 __asm__ ("fabd %s0, %s1, %s2"
4730 : "=w"(result)
4731 : "w"(a), "w"(b)
4732 : /* No clobbers */);
4733 return result;
4734}
4735
4736__extension__ static __inline int16_t __attribute__ ((__always_inline__))
4737vaddlv_s8 (int8x8_t a)
4738{
4739 int16_t result;
4740 __asm__ ("saddlv %h0,%1.8b"
4741 : "=w"(result)
4742 : "w"(a)
4743 : /* No clobbers */);
4744 return result;
4745}
4746
4747__extension__ static __inline int32_t __attribute__ ((__always_inline__))
4748vaddlv_s16 (int16x4_t a)
4749{
4750 int32_t result;
4751 __asm__ ("saddlv %s0,%1.4h"
4752 : "=w"(result)
4753 : "w"(a)
4754 : /* No clobbers */);
4755 return result;
4756}
4757
4758__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4759vaddlv_u8 (uint8x8_t a)
4760{
4761 uint16_t result;
4762 __asm__ ("uaddlv %h0,%1.8b"
4763 : "=w"(result)
4764 : "w"(a)
4765 : /* No clobbers */);
4766 return result;
4767}
4768
4769__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4770vaddlv_u16 (uint16x4_t a)
4771{
4772 uint32_t result;
4773 __asm__ ("uaddlv %s0,%1.4h"
4774 : "=w"(result)
4775 : "w"(a)
4776 : /* No clobbers */);
4777 return result;
4778}
4779
4780__extension__ static __inline int16_t __attribute__ ((__always_inline__))
4781vaddlvq_s8 (int8x16_t a)
4782{
4783 int16_t result;
4784 __asm__ ("saddlv %h0,%1.16b"
4785 : "=w"(result)
4786 : "w"(a)
4787 : /* No clobbers */);
4788 return result;
4789}
4790
4791__extension__ static __inline int32_t __attribute__ ((__always_inline__))
4792vaddlvq_s16 (int16x8_t a)
4793{
4794 int32_t result;
4795 __asm__ ("saddlv %s0,%1.8h"
4796 : "=w"(result)
4797 : "w"(a)
4798 : /* No clobbers */);
4799 return result;
4800}
4801
4802__extension__ static __inline int64_t __attribute__ ((__always_inline__))
4803vaddlvq_s32 (int32x4_t a)
4804{
4805 int64_t result;
4806 __asm__ ("saddlv %d0,%1.4s"
4807 : "=w"(result)
4808 : "w"(a)
4809 : /* No clobbers */);
4810 return result;
4811}
4812
4813__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4814vaddlvq_u8 (uint8x16_t a)
4815{
4816 uint16_t result;
4817 __asm__ ("uaddlv %h0,%1.16b"
4818 : "=w"(result)
4819 : "w"(a)
4820 : /* No clobbers */);
4821 return result;
4822}
4823
4824__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4825vaddlvq_u16 (uint16x8_t a)
4826{
4827 uint32_t result;
4828 __asm__ ("uaddlv %s0,%1.8h"
4829 : "=w"(result)
4830 : "w"(a)
4831 : /* No clobbers */);
4832 return result;
4833}
4834
4835__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4836vaddlvq_u32 (uint32x4_t a)
4837{
4838 uint64_t result;
4839 __asm__ ("uaddlv %d0,%1.4s"
4840 : "=w"(result)
4841 : "w"(a)
4842 : /* No clobbers */);
4843 return result;
4844}
4845
4846__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4847vcls_s8 (int8x8_t a)
4848{
4849 int8x8_t result;
4850 __asm__ ("cls %0.8b,%1.8b"
4851 : "=w"(result)
4852 : "w"(a)
4853 : /* No clobbers */);
4854 return result;
4855}
4856
4857__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4858vcls_s16 (int16x4_t a)
4859{
4860 int16x4_t result;
4861 __asm__ ("cls %0.4h,%1.4h"
4862 : "=w"(result)
4863 : "w"(a)
4864 : /* No clobbers */);
4865 return result;
4866}
4867
4868__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4869vcls_s32 (int32x2_t a)
4870{
4871 int32x2_t result;
4872 __asm__ ("cls %0.2s,%1.2s"
4873 : "=w"(result)
4874 : "w"(a)
4875 : /* No clobbers */);
4876 return result;
4877}
4878
4879__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4880vclsq_s8 (int8x16_t a)
4881{
4882 int8x16_t result;
4883 __asm__ ("cls %0.16b,%1.16b"
4884 : "=w"(result)
4885 : "w"(a)
4886 : /* No clobbers */);
4887 return result;
4888}
4889
4890__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4891vclsq_s16 (int16x8_t a)
4892{
4893 int16x8_t result;
4894 __asm__ ("cls %0.8h,%1.8h"
4895 : "=w"(result)
4896 : "w"(a)
4897 : /* No clobbers */);
4898 return result;
4899}
4900
4901__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4902vclsq_s32 (int32x4_t a)
4903{
4904 int32x4_t result;
4905 __asm__ ("cls %0.4s,%1.4s"
4906 : "=w"(result)
4907 : "w"(a)
4908 : /* No clobbers */);
4909 return result;
4910}
4911
4912__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4913vcnt_p8 (poly8x8_t a)
4914{
4915 poly8x8_t result;
4916 __asm__ ("cnt %0.8b,%1.8b"
4917 : "=w"(result)
4918 : "w"(a)
4919 : /* No clobbers */);
4920 return result;
4921}
4922
4923__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4924vcnt_s8 (int8x8_t a)
4925{
4926 int8x8_t result;
4927 __asm__ ("cnt %0.8b,%1.8b"
4928 : "=w"(result)
4929 : "w"(a)
4930 : /* No clobbers */);
4931 return result;
4932}
4933
4934__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4935vcnt_u8 (uint8x8_t a)
4936{
4937 uint8x8_t result;
4938 __asm__ ("cnt %0.8b,%1.8b"
4939 : "=w"(result)
4940 : "w"(a)
4941 : /* No clobbers */);
4942 return result;
4943}
4944
4945__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4946vcntq_p8 (poly8x16_t a)
4947{
4948 poly8x16_t result;
4949 __asm__ ("cnt %0.16b,%1.16b"
4950 : "=w"(result)
4951 : "w"(a)
4952 : /* No clobbers */);
4953 return result;
4954}
4955
4956__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4957vcntq_s8 (int8x16_t a)
4958{
4959 int8x16_t result;
4960 __asm__ ("cnt %0.16b,%1.16b"
4961 : "=w"(result)
4962 : "w"(a)
4963 : /* No clobbers */);
4964 return result;
4965}
4966
4967__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4968vcntq_u8 (uint8x16_t a)
4969{
4970 uint8x16_t result;
4971 __asm__ ("cnt %0.16b,%1.16b"
4972 : "=w"(result)
4973 : "w"(a)
4974 : /* No clobbers */);
4975 return result;
4976}
4977
4978#define vcopyq_lane_f32(a, b, c, d) \
4979 __extension__ \
4980 ({ \
4981 float32x4_t c_ = (c); \
4982 float32x4_t a_ = (a); \
4983 float32x4_t result; \
4984 __asm__ ("ins %0.s[%2], %3.s[%4]" \
4985 : "=w"(result) \
4986 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
4987 : /* No clobbers */); \
4988 result; \
4989 })
4990
4991#define vcopyq_lane_f64(a, b, c, d) \
4992 __extension__ \
4993 ({ \
4994 float64x2_t c_ = (c); \
4995 float64x2_t a_ = (a); \
4996 float64x2_t result; \
4997 __asm__ ("ins %0.d[%2], %3.d[%4]" \
4998 : "=w"(result) \
4999 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5000 : /* No clobbers */); \
5001 result; \
5002 })
5003
5004#define vcopyq_lane_p8(a, b, c, d) \
5005 __extension__ \
5006 ({ \
5007 poly8x16_t c_ = (c); \
5008 poly8x16_t a_ = (a); \
5009 poly8x16_t result; \
5010 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5011 : "=w"(result) \
5012 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5013 : /* No clobbers */); \
5014 result; \
5015 })
5016
5017#define vcopyq_lane_p16(a, b, c, d) \
5018 __extension__ \
5019 ({ \
5020 poly16x8_t c_ = (c); \
5021 poly16x8_t a_ = (a); \
5022 poly16x8_t result; \
5023 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5024 : "=w"(result) \
5025 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5026 : /* No clobbers */); \
5027 result; \
5028 })
5029
5030#define vcopyq_lane_s8(a, b, c, d) \
5031 __extension__ \
5032 ({ \
5033 int8x16_t c_ = (c); \
5034 int8x16_t a_ = (a); \
5035 int8x16_t result; \
5036 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5037 : "=w"(result) \
5038 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5039 : /* No clobbers */); \
5040 result; \
5041 })
5042
5043#define vcopyq_lane_s16(a, b, c, d) \
5044 __extension__ \
5045 ({ \
5046 int16x8_t c_ = (c); \
5047 int16x8_t a_ = (a); \
5048 int16x8_t result; \
5049 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5050 : "=w"(result) \
5051 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5052 : /* No clobbers */); \
5053 result; \
5054 })
5055
5056#define vcopyq_lane_s32(a, b, c, d) \
5057 __extension__ \
5058 ({ \
5059 int32x4_t c_ = (c); \
5060 int32x4_t a_ = (a); \
5061 int32x4_t result; \
5062 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5063 : "=w"(result) \
5064 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5065 : /* No clobbers */); \
5066 result; \
5067 })
5068
5069#define vcopyq_lane_s64(a, b, c, d) \
5070 __extension__ \
5071 ({ \
5072 int64x2_t c_ = (c); \
5073 int64x2_t a_ = (a); \
5074 int64x2_t result; \
5075 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5076 : "=w"(result) \
5077 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5078 : /* No clobbers */); \
5079 result; \
5080 })
5081
5082#define vcopyq_lane_u8(a, b, c, d) \
5083 __extension__ \
5084 ({ \
5085 uint8x16_t c_ = (c); \
5086 uint8x16_t a_ = (a); \
5087 uint8x16_t result; \
5088 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5089 : "=w"(result) \
5090 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5091 : /* No clobbers */); \
5092 result; \
5093 })
5094
5095#define vcopyq_lane_u16(a, b, c, d) \
5096 __extension__ \
5097 ({ \
5098 uint16x8_t c_ = (c); \
5099 uint16x8_t a_ = (a); \
5100 uint16x8_t result; \
5101 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5102 : "=w"(result) \
5103 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5104 : /* No clobbers */); \
5105 result; \
5106 })
5107
5108#define vcopyq_lane_u32(a, b, c, d) \
5109 __extension__ \
5110 ({ \
5111 uint32x4_t c_ = (c); \
5112 uint32x4_t a_ = (a); \
5113 uint32x4_t result; \
5114 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5115 : "=w"(result) \
5116 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5117 : /* No clobbers */); \
5118 result; \
5119 })
5120
5121#define vcopyq_lane_u64(a, b, c, d) \
5122 __extension__ \
5123 ({ \
5124 uint64x2_t c_ = (c); \
5125 uint64x2_t a_ = (a); \
5126 uint64x2_t result; \
5127 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5128 : "=w"(result) \
5129 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5130 : /* No clobbers */); \
5131 result; \
5132 })
5133
5134/* vcvt_f16_f32 not supported */
5135
5136/* vcvt_f32_f16 not supported */
5137
5138/* vcvt_high_f16_f32 not supported */
5139
5140/* vcvt_high_f32_f16 not supported */
5141
5142static float32x2_t vdup_n_f32 (float32_t);
5143
5144#define vcvt_n_f32_s32(a, b) \
5145 __extension__ \
5146 ({ \
5147 int32x2_t a_ = (a); \
5148 float32x2_t result; \
5149 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5150 : "=w"(result) \
5151 : "w"(a_), "i"(b) \
5152 : /* No clobbers */); \
5153 result; \
5154 })
5155
5156#define vcvt_n_f32_u32(a, b) \
5157 __extension__ \
5158 ({ \
5159 uint32x2_t a_ = (a); \
5160 float32x2_t result; \
5161 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5162 : "=w"(result) \
5163 : "w"(a_), "i"(b) \
5164 : /* No clobbers */); \
5165 result; \
5166 })
5167
5168#define vcvt_n_s32_f32(a, b) \
5169 __extension__ \
5170 ({ \
5171 float32x2_t a_ = (a); \
5172 int32x2_t result; \
5173 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5174 : "=w"(result) \
5175 : "w"(a_), "i"(b) \
5176 : /* No clobbers */); \
5177 result; \
5178 })
5179
5180#define vcvt_n_u32_f32(a, b) \
5181 __extension__ \
5182 ({ \
5183 float32x2_t a_ = (a); \
5184 uint32x2_t result; \
5185 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5186 : "=w"(result) \
5187 : "w"(a_), "i"(b) \
5188 : /* No clobbers */); \
5189 result; \
5190 })
5191
5192#define vcvtd_n_f64_s64(a, b) \
5193 __extension__ \
5194 ({ \
5195 int64_t a_ = (a); \
5196 float64_t result; \
5197 __asm__ ("scvtf %d0,%d1,%2" \
5198 : "=w"(result) \
5199 : "w"(a_), "i"(b) \
5200 : /* No clobbers */); \
5201 result; \
5202 })
5203
5204#define vcvtd_n_f64_u64(a, b) \
5205 __extension__ \
5206 ({ \
5207 uint64_t a_ = (a); \
5208 float64_t result; \
5209 __asm__ ("ucvtf %d0,%d1,%2" \
5210 : "=w"(result) \
5211 : "w"(a_), "i"(b) \
5212 : /* No clobbers */); \
5213 result; \
5214 })
5215
5216#define vcvtd_n_s64_f64(a, b) \
5217 __extension__ \
5218 ({ \
5219 float64_t a_ = (a); \
5220 int64_t result; \
5221 __asm__ ("fcvtzs %d0,%d1,%2" \
5222 : "=w"(result) \
5223 : "w"(a_), "i"(b) \
5224 : /* No clobbers */); \
5225 result; \
5226 })
5227
5228#define vcvtd_n_u64_f64(a, b) \
5229 __extension__ \
5230 ({ \
5231 float64_t a_ = (a); \
5232 uint64_t result; \
5233 __asm__ ("fcvtzu %d0,%d1,%2" \
5234 : "=w"(result) \
5235 : "w"(a_), "i"(b) \
5236 : /* No clobbers */); \
5237 result; \
5238 })
5239
5240#define vcvtq_n_f32_s32(a, b) \
5241 __extension__ \
5242 ({ \
5243 int32x4_t a_ = (a); \
5244 float32x4_t result; \
5245 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5246 : "=w"(result) \
5247 : "w"(a_), "i"(b) \
5248 : /* No clobbers */); \
5249 result; \
5250 })
5251
5252#define vcvtq_n_f32_u32(a, b) \
5253 __extension__ \
5254 ({ \
5255 uint32x4_t a_ = (a); \
5256 float32x4_t result; \
5257 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5258 : "=w"(result) \
5259 : "w"(a_), "i"(b) \
5260 : /* No clobbers */); \
5261 result; \
5262 })
5263
5264#define vcvtq_n_f64_s64(a, b) \
5265 __extension__ \
5266 ({ \
5267 int64x2_t a_ = (a); \
5268 float64x2_t result; \
5269 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5270 : "=w"(result) \
5271 : "w"(a_), "i"(b) \
5272 : /* No clobbers */); \
5273 result; \
5274 })
5275
5276#define vcvtq_n_f64_u64(a, b) \
5277 __extension__ \
5278 ({ \
5279 uint64x2_t a_ = (a); \
5280 float64x2_t result; \
5281 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5282 : "=w"(result) \
5283 : "w"(a_), "i"(b) \
5284 : /* No clobbers */); \
5285 result; \
5286 })
5287
5288#define vcvtq_n_s32_f32(a, b) \
5289 __extension__ \
5290 ({ \
5291 float32x4_t a_ = (a); \
5292 int32x4_t result; \
5293 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5294 : "=w"(result) \
5295 : "w"(a_), "i"(b) \
5296 : /* No clobbers */); \
5297 result; \
5298 })
5299
5300#define vcvtq_n_s64_f64(a, b) \
5301 __extension__ \
5302 ({ \
5303 float64x2_t a_ = (a); \
5304 int64x2_t result; \
5305 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5306 : "=w"(result) \
5307 : "w"(a_), "i"(b) \
5308 : /* No clobbers */); \
5309 result; \
5310 })
5311
5312#define vcvtq_n_u32_f32(a, b) \
5313 __extension__ \
5314 ({ \
5315 float32x4_t a_ = (a); \
5316 uint32x4_t result; \
5317 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5318 : "=w"(result) \
5319 : "w"(a_), "i"(b) \
5320 : /* No clobbers */); \
5321 result; \
5322 })
5323
5324#define vcvtq_n_u64_f64(a, b) \
5325 __extension__ \
5326 ({ \
5327 float64x2_t a_ = (a); \
5328 uint64x2_t result; \
5329 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5330 : "=w"(result) \
5331 : "w"(a_), "i"(b) \
5332 : /* No clobbers */); \
5333 result; \
5334 })
5335
5336#define vcvts_n_f32_s32(a, b) \
5337 __extension__ \
5338 ({ \
5339 int32_t a_ = (a); \
5340 float32_t result; \
5341 __asm__ ("scvtf %s0,%s1,%2" \
5342 : "=w"(result) \
5343 : "w"(a_), "i"(b) \
5344 : /* No clobbers */); \
5345 result; \
5346 })
5347
5348#define vcvts_n_f32_u32(a, b) \
5349 __extension__ \
5350 ({ \
5351 uint32_t a_ = (a); \
5352 float32_t result; \
5353 __asm__ ("ucvtf %s0,%s1,%2" \
5354 : "=w"(result) \
5355 : "w"(a_), "i"(b) \
5356 : /* No clobbers */); \
5357 result; \
5358 })
5359
5360#define vcvts_n_s32_f32(a, b) \
5361 __extension__ \
5362 ({ \
5363 float32_t a_ = (a); \
5364 int32_t result; \
5365 __asm__ ("fcvtzs %s0,%s1,%2" \
5366 : "=w"(result) \
5367 : "w"(a_), "i"(b) \
5368 : /* No clobbers */); \
5369 result; \
5370 })
5371
5372#define vcvts_n_u32_f32(a, b) \
5373 __extension__ \
5374 ({ \
5375 float32_t a_ = (a); \
5376 uint32_t result; \
5377 __asm__ ("fcvtzu %s0,%s1,%2" \
5378 : "=w"(result) \
5379 : "w"(a_), "i"(b) \
5380 : /* No clobbers */); \
5381 result; \
5382 })
5383
5384__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5385vcvtx_f32_f64 (float64x2_t a)
5386{
5387 float32x2_t result;
5388 __asm__ ("fcvtxn %0.2s,%1.2d"
5389 : "=w"(result)
5390 : "w"(a)
5391 : /* No clobbers */);
5392 return result;
5393}
5394
5395__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5396vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5397{
5398 float32x4_t result;
5399 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5400 : "=w"(result)
5401 : "w" (b), "0"(a)
5402 : /* No clobbers */);
5403 return result;
5404}
5405
5406__extension__ static __inline float32_t __attribute__ ((__always_inline__))
5407vcvtxd_f32_f64 (float64_t a)
5408{
5409 float32_t result;
5410 __asm__ ("fcvtxn %s0,%d1"
5411 : "=w"(result)
5412 : "w"(a)
5413 : /* No clobbers */);
5414 return result;
5415}
5416
5417#define vext_f32(a, b, c) \
5418 __extension__ \
5419 ({ \
5420 float32x2_t b_ = (b); \
5421 float32x2_t a_ = (a); \
5422 float32x2_t result; \
5423 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5424 : "=w"(result) \
5425 : "w"(a_), "w"(b_), "i"(c) \
5426 : /* No clobbers */); \
5427 result; \
5428 })
5429
5430#define vext_f64(a, b, c) \
5431 __extension__ \
5432 ({ \
5433 float64x1_t b_ = (b); \
5434 float64x1_t a_ = (a); \
5435 float64x1_t result; \
5436 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5437 : "=w"(result) \
5438 : "w"(a_), "w"(b_), "i"(c) \
5439 : /* No clobbers */); \
5440 result; \
5441 })
5442
5443#define vext_p8(a, b, c) \
5444 __extension__ \
5445 ({ \
5446 poly8x8_t b_ = (b); \
5447 poly8x8_t a_ = (a); \
5448 poly8x8_t result; \
5449 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5450 : "=w"(result) \
5451 : "w"(a_), "w"(b_), "i"(c) \
5452 : /* No clobbers */); \
5453 result; \
5454 })
5455
5456#define vext_p16(a, b, c) \
5457 __extension__ \
5458 ({ \
5459 poly16x4_t b_ = (b); \
5460 poly16x4_t a_ = (a); \
5461 poly16x4_t result; \
5462 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5463 : "=w"(result) \
5464 : "w"(a_), "w"(b_), "i"(c) \
5465 : /* No clobbers */); \
5466 result; \
5467 })
5468
5469#define vext_s8(a, b, c) \
5470 __extension__ \
5471 ({ \
5472 int8x8_t b_ = (b); \
5473 int8x8_t a_ = (a); \
5474 int8x8_t result; \
5475 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5476 : "=w"(result) \
5477 : "w"(a_), "w"(b_), "i"(c) \
5478 : /* No clobbers */); \
5479 result; \
5480 })
5481
5482#define vext_s16(a, b, c) \
5483 __extension__ \
5484 ({ \
5485 int16x4_t b_ = (b); \
5486 int16x4_t a_ = (a); \
5487 int16x4_t result; \
5488 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5489 : "=w"(result) \
5490 : "w"(a_), "w"(b_), "i"(c) \
5491 : /* No clobbers */); \
5492 result; \
5493 })
5494
5495#define vext_s32(a, b, c) \
5496 __extension__ \
5497 ({ \
5498 int32x2_t b_ = (b); \
5499 int32x2_t a_ = (a); \
5500 int32x2_t result; \
5501 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5502 : "=w"(result) \
5503 : "w"(a_), "w"(b_), "i"(c) \
5504 : /* No clobbers */); \
5505 result; \
5506 })
5507
5508#define vext_s64(a, b, c) \
5509 __extension__ \
5510 ({ \
5511 int64x1_t b_ = (b); \
5512 int64x1_t a_ = (a); \
5513 int64x1_t result; \
5514 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5515 : "=w"(result) \
5516 : "w"(a_), "w"(b_), "i"(c) \
5517 : /* No clobbers */); \
5518 result; \
5519 })
5520
5521#define vext_u8(a, b, c) \
5522 __extension__ \
5523 ({ \
5524 uint8x8_t b_ = (b); \
5525 uint8x8_t a_ = (a); \
5526 uint8x8_t result; \
5527 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
5528 : "=w"(result) \
5529 : "w"(a_), "w"(b_), "i"(c) \
5530 : /* No clobbers */); \
5531 result; \
5532 })
5533
5534#define vext_u16(a, b, c) \
5535 __extension__ \
5536 ({ \
5537 uint16x4_t b_ = (b); \
5538 uint16x4_t a_ = (a); \
5539 uint16x4_t result; \
5540 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
5541 : "=w"(result) \
5542 : "w"(a_), "w"(b_), "i"(c) \
5543 : /* No clobbers */); \
5544 result; \
5545 })
5546
5547#define vext_u32(a, b, c) \
5548 __extension__ \
5549 ({ \
5550 uint32x2_t b_ = (b); \
5551 uint32x2_t a_ = (a); \
5552 uint32x2_t result; \
5553 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
5554 : "=w"(result) \
5555 : "w"(a_), "w"(b_), "i"(c) \
5556 : /* No clobbers */); \
5557 result; \
5558 })
5559
5560#define vext_u64(a, b, c) \
5561 __extension__ \
5562 ({ \
5563 uint64x1_t b_ = (b); \
5564 uint64x1_t a_ = (a); \
5565 uint64x1_t result; \
5566 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
5567 : "=w"(result) \
5568 : "w"(a_), "w"(b_), "i"(c) \
5569 : /* No clobbers */); \
5570 result; \
5571 })
5572
5573#define vextq_f32(a, b, c) \
5574 __extension__ \
5575 ({ \
5576 float32x4_t b_ = (b); \
5577 float32x4_t a_ = (a); \
5578 float32x4_t result; \
5579 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5580 : "=w"(result) \
5581 : "w"(a_), "w"(b_), "i"(c) \
5582 : /* No clobbers */); \
5583 result; \
5584 })
5585
5586#define vextq_f64(a, b, c) \
5587 __extension__ \
5588 ({ \
5589 float64x2_t b_ = (b); \
5590 float64x2_t a_ = (a); \
5591 float64x2_t result; \
5592 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5593 : "=w"(result) \
5594 : "w"(a_), "w"(b_), "i"(c) \
5595 : /* No clobbers */); \
5596 result; \
5597 })
5598
5599#define vextq_p8(a, b, c) \
5600 __extension__ \
5601 ({ \
5602 poly8x16_t b_ = (b); \
5603 poly8x16_t a_ = (a); \
5604 poly8x16_t result; \
5605 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5606 : "=w"(result) \
5607 : "w"(a_), "w"(b_), "i"(c) \
5608 : /* No clobbers */); \
5609 result; \
5610 })
5611
5612#define vextq_p16(a, b, c) \
5613 __extension__ \
5614 ({ \
5615 poly16x8_t b_ = (b); \
5616 poly16x8_t a_ = (a); \
5617 poly16x8_t result; \
5618 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5619 : "=w"(result) \
5620 : "w"(a_), "w"(b_), "i"(c) \
5621 : /* No clobbers */); \
5622 result; \
5623 })
5624
5625#define vextq_s8(a, b, c) \
5626 __extension__ \
5627 ({ \
5628 int8x16_t b_ = (b); \
5629 int8x16_t a_ = (a); \
5630 int8x16_t result; \
5631 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5632 : "=w"(result) \
5633 : "w"(a_), "w"(b_), "i"(c) \
5634 : /* No clobbers */); \
5635 result; \
5636 })
5637
5638#define vextq_s16(a, b, c) \
5639 __extension__ \
5640 ({ \
5641 int16x8_t b_ = (b); \
5642 int16x8_t a_ = (a); \
5643 int16x8_t result; \
5644 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5645 : "=w"(result) \
5646 : "w"(a_), "w"(b_), "i"(c) \
5647 : /* No clobbers */); \
5648 result; \
5649 })
5650
5651#define vextq_s32(a, b, c) \
5652 __extension__ \
5653 ({ \
5654 int32x4_t b_ = (b); \
5655 int32x4_t a_ = (a); \
5656 int32x4_t result; \
5657 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5658 : "=w"(result) \
5659 : "w"(a_), "w"(b_), "i"(c) \
5660 : /* No clobbers */); \
5661 result; \
5662 })
5663
5664#define vextq_s64(a, b, c) \
5665 __extension__ \
5666 ({ \
5667 int64x2_t b_ = (b); \
5668 int64x2_t a_ = (a); \
5669 int64x2_t result; \
5670 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5671 : "=w"(result) \
5672 : "w"(a_), "w"(b_), "i"(c) \
5673 : /* No clobbers */); \
5674 result; \
5675 })
5676
5677#define vextq_u8(a, b, c) \
5678 __extension__ \
5679 ({ \
5680 uint8x16_t b_ = (b); \
5681 uint8x16_t a_ = (a); \
5682 uint8x16_t result; \
5683 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
5684 : "=w"(result) \
5685 : "w"(a_), "w"(b_), "i"(c) \
5686 : /* No clobbers */); \
5687 result; \
5688 })
5689
5690#define vextq_u16(a, b, c) \
5691 __extension__ \
5692 ({ \
5693 uint16x8_t b_ = (b); \
5694 uint16x8_t a_ = (a); \
5695 uint16x8_t result; \
5696 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
5697 : "=w"(result) \
5698 : "w"(a_), "w"(b_), "i"(c) \
5699 : /* No clobbers */); \
5700 result; \
5701 })
5702
5703#define vextq_u32(a, b, c) \
5704 __extension__ \
5705 ({ \
5706 uint32x4_t b_ = (b); \
5707 uint32x4_t a_ = (a); \
5708 uint32x4_t result; \
5709 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
5710 : "=w"(result) \
5711 : "w"(a_), "w"(b_), "i"(c) \
5712 : /* No clobbers */); \
5713 result; \
5714 })
5715
5716#define vextq_u64(a, b, c) \
5717 __extension__ \
5718 ({ \
5719 uint64x2_t b_ = (b); \
5720 uint64x2_t a_ = (a); \
5721 uint64x2_t result; \
5722 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
5723 : "=w"(result) \
5724 : "w"(a_), "w"(b_), "i"(c) \
5725 : /* No clobbers */); \
5726 result; \
5727 })
5728
5729__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5730vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5731{
5732 float32x2_t result;
5733 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5734 : "=w"(result)
5735 : "0"(a), "w"(b), "w"(c)
5736 : /* No clobbers */);
5737 return result;
5738}
5739
5740__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5741vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5742{
5743 float32x4_t result;
5744 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5745 : "=w"(result)
5746 : "0"(a), "w"(b), "w"(c)
5747 : /* No clobbers */);
5748 return result;
5749}
5750
5751__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5752vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5753{
5754 float64x2_t result;
5755 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5756 : "=w"(result)
5757 : "0"(a), "w"(b), "w"(c)
5758 : /* No clobbers */);
5759 return result;
5760}
5761
5762__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5763vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5764{
5765 float32x2_t result;
5766 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5767 : "=w"(result)
5768 : "0"(a), "w"(b), "w"(c)
5769 : /* No clobbers */);
5770 return result;
5771}
5772
5773__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5774vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5775{
5776 float32x4_t result;
5777 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5778 : "=w"(result)
5779 : "0"(a), "w"(b), "w"(c)
5780 : /* No clobbers */);
5781 return result;
5782}
5783
5784__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5785vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5786{
5787 float64x2_t result;
5788 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5789 : "=w"(result)
5790 : "0"(a), "w"(b), "w"(c)
5791 : /* No clobbers */);
5792 return result;
5793}
5794
5795__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5796vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5797{
5798 float32x2_t result;
5799 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5800 : "=w"(result)
5801 : "0"(a), "w"(b), "w"(c)
5802 : /* No clobbers */);
5803 return result;
5804}
5805
5806__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5807vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5808{
5809 float32x4_t result;
5810 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5811 : "=w"(result)
5812 : "0"(a), "w"(b), "w"(c)
5813 : /* No clobbers */);
5814 return result;
5815}
5816
5817__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5818vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5819{
5820 float64x2_t result;
5821 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
5822 : "=w"(result)
5823 : "0"(a), "w"(b), "w"(c)
5824 : /* No clobbers */);
5825 return result;
5826}
5827
5828__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5829vget_high_f32 (float32x4_t a)
5830{
5831 float32x2_t result;
5832 __asm__ ("ins %0.d[0], %1.d[1]"
5833 : "=w"(result)
5834 : "w"(a)
5835 : /* No clobbers */);
5836 return result;
5837}
5838
5839__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5840vget_high_f64 (float64x2_t a)
5841{
5842 float64x1_t result;
5843 __asm__ ("ins %0.d[0], %1.d[1]"
5844 : "=w"(result)
5845 : "w"(a)
5846 : /* No clobbers */);
5847 return result;
5848}
5849
5850__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5851vget_high_p8 (poly8x16_t a)
5852{
5853 poly8x8_t result;
5854 __asm__ ("ins %0.d[0], %1.d[1]"
5855 : "=w"(result)
5856 : "w"(a)
5857 : /* No clobbers */);
5858 return result;
5859}
5860
5861__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5862vget_high_p16 (poly16x8_t a)
5863{
5864 poly16x4_t result;
5865 __asm__ ("ins %0.d[0], %1.d[1]"
5866 : "=w"(result)
5867 : "w"(a)
5868 : /* No clobbers */);
5869 return result;
5870}
5871
5872__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5873vget_high_s8 (int8x16_t a)
5874{
5875 int8x8_t result;
5876 __asm__ ("ins %0.d[0], %1.d[1]"
5877 : "=w"(result)
5878 : "w"(a)
5879 : /* No clobbers */);
5880 return result;
5881}
5882
5883__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5884vget_high_s16 (int16x8_t a)
5885{
5886 int16x4_t result;
5887 __asm__ ("ins %0.d[0], %1.d[1]"
5888 : "=w"(result)
5889 : "w"(a)
5890 : /* No clobbers */);
5891 return result;
5892}
5893
5894__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5895vget_high_s32 (int32x4_t a)
5896{
5897 int32x2_t result;
5898 __asm__ ("ins %0.d[0], %1.d[1]"
5899 : "=w"(result)
5900 : "w"(a)
5901 : /* No clobbers */);
5902 return result;
5903}
5904
5905__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5906vget_high_s64 (int64x2_t a)
5907{
5908 int64x1_t result;
5909 __asm__ ("ins %0.d[0], %1.d[1]"
5910 : "=w"(result)
5911 : "w"(a)
5912 : /* No clobbers */);
5913 return result;
5914}
5915
5916__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5917vget_high_u8 (uint8x16_t a)
5918{
5919 uint8x8_t result;
5920 __asm__ ("ins %0.d[0], %1.d[1]"
5921 : "=w"(result)
5922 : "w"(a)
5923 : /* No clobbers */);
5924 return result;
5925}
5926
5927__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5928vget_high_u16 (uint16x8_t a)
5929{
5930 uint16x4_t result;
5931 __asm__ ("ins %0.d[0], %1.d[1]"
5932 : "=w"(result)
5933 : "w"(a)
5934 : /* No clobbers */);
5935 return result;
5936}
5937
5938__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5939vget_high_u32 (uint32x4_t a)
5940{
5941 uint32x2_t result;
5942 __asm__ ("ins %0.d[0], %1.d[1]"
5943 : "=w"(result)
5944 : "w"(a)
5945 : /* No clobbers */);
5946 return result;
5947}
5948
5949__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5950vget_high_u64 (uint64x2_t a)
5951{
5952 uint64x1_t result;
5953 __asm__ ("ins %0.d[0], %1.d[1]"
5954 : "=w"(result)
5955 : "w"(a)
5956 : /* No clobbers */);
5957 return result;
5958}
5959
5960__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5961vhsub_s8 (int8x8_t a, int8x8_t b)
5962{
5963 int8x8_t result;
5964 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
5965 : "=w"(result)
5966 : "w"(a), "w"(b)
5967 : /* No clobbers */);
5968 return result;
5969}
5970
5971__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5972vhsub_s16 (int16x4_t a, int16x4_t b)
5973{
5974 int16x4_t result;
5975 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
5976 : "=w"(result)
5977 : "w"(a), "w"(b)
5978 : /* No clobbers */);
5979 return result;
5980}
5981
5982__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5983vhsub_s32 (int32x2_t a, int32x2_t b)
5984{
5985 int32x2_t result;
5986 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
5987 : "=w"(result)
5988 : "w"(a), "w"(b)
5989 : /* No clobbers */);
5990 return result;
5991}
5992
5993__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5994vhsub_u8 (uint8x8_t a, uint8x8_t b)
5995{
5996 uint8x8_t result;
5997 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
5998 : "=w"(result)
5999 : "w"(a), "w"(b)
6000 : /* No clobbers */);
6001 return result;
6002}
6003
6004__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6005vhsub_u16 (uint16x4_t a, uint16x4_t b)
6006{
6007 uint16x4_t result;
6008 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6009 : "=w"(result)
6010 : "w"(a), "w"(b)
6011 : /* No clobbers */);
6012 return result;
6013}
6014
6015__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6016vhsub_u32 (uint32x2_t a, uint32x2_t b)
6017{
6018 uint32x2_t result;
6019 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6020 : "=w"(result)
6021 : "w"(a), "w"(b)
6022 : /* No clobbers */);
6023 return result;
6024}
6025
6026__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6027vhsubq_s8 (int8x16_t a, int8x16_t b)
6028{
6029 int8x16_t result;
6030 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6031 : "=w"(result)
6032 : "w"(a), "w"(b)
6033 : /* No clobbers */);
6034 return result;
6035}
6036
6037__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6038vhsubq_s16 (int16x8_t a, int16x8_t b)
6039{
6040 int16x8_t result;
6041 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6042 : "=w"(result)
6043 : "w"(a), "w"(b)
6044 : /* No clobbers */);
6045 return result;
6046}
6047
6048__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6049vhsubq_s32 (int32x4_t a, int32x4_t b)
6050{
6051 int32x4_t result;
6052 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6053 : "=w"(result)
6054 : "w"(a), "w"(b)
6055 : /* No clobbers */);
6056 return result;
6057}
6058
6059__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6060vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6061{
6062 uint8x16_t result;
6063 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6064 : "=w"(result)
6065 : "w"(a), "w"(b)
6066 : /* No clobbers */);
6067 return result;
6068}
6069
6070__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6071vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6072{
6073 uint16x8_t result;
6074 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6075 : "=w"(result)
6076 : "w"(a), "w"(b)
6077 : /* No clobbers */);
6078 return result;
6079}
6080
6081__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6082vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6083{
6084 uint32x4_t result;
6085 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6086 : "=w"(result)
6087 : "w"(a), "w"(b)
6088 : /* No clobbers */);
6089 return result;
6090}
6091
6092__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6093vld1_dup_f32 (const float32_t * a)
6094{
6095 float32x2_t result;
6096 __asm__ ("ld1r {%0.2s}, %1"
6097 : "=w"(result)
6098 : "Utv"(*a)
6099 : /* No clobbers */);
6100 return result;
6101}
6102
6103__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6104vld1_dup_f64 (const float64_t * a)
6105{
6106 float64x1_t result;
6107 __asm__ ("ld1r {%0.1d}, %1"
6108 : "=w"(result)
6109 : "Utv"(*a)
6110 : /* No clobbers */);
6111 return result;
6112}
6113
6114__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6115vld1_dup_p8 (const poly8_t * a)
6116{
6117 poly8x8_t result;
6118 __asm__ ("ld1r {%0.8b}, %1"
6119 : "=w"(result)
6120 : "Utv"(*a)
6121 : /* No clobbers */);
6122 return result;
6123}
6124
6125__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6126vld1_dup_p16 (const poly16_t * a)
6127{
6128 poly16x4_t result;
6129 __asm__ ("ld1r {%0.4h}, %1"
6130 : "=w"(result)
6131 : "Utv"(*a)
6132 : /* No clobbers */);
6133 return result;
6134}
6135
6136__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6137vld1_dup_s8 (const int8_t * a)
6138{
6139 int8x8_t result;
6140 __asm__ ("ld1r {%0.8b}, %1"
6141 : "=w"(result)
6142 : "Utv"(*a)
6143 : /* No clobbers */);
6144 return result;
6145}
6146
6147__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6148vld1_dup_s16 (const int16_t * a)
6149{
6150 int16x4_t result;
6151 __asm__ ("ld1r {%0.4h}, %1"
6152 : "=w"(result)
6153 : "Utv"(*a)
6154 : /* No clobbers */);
6155 return result;
6156}
6157
6158__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6159vld1_dup_s32 (const int32_t * a)
6160{
6161 int32x2_t result;
6162 __asm__ ("ld1r {%0.2s}, %1"
6163 : "=w"(result)
6164 : "Utv"(*a)
6165 : /* No clobbers */);
6166 return result;
6167}
6168
6169__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6170vld1_dup_s64 (const int64_t * a)
6171{
6172 int64x1_t result;
6173 __asm__ ("ld1r {%0.1d}, %1"
6174 : "=w"(result)
6175 : "Utv"(*a)
6176 : /* No clobbers */);
6177 return result;
6178}
6179
6180__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6181vld1_dup_u8 (const uint8_t * a)
6182{
6183 uint8x8_t result;
6184 __asm__ ("ld1r {%0.8b}, %1"
6185 : "=w"(result)
6186 : "Utv"(*a)
6187 : /* No clobbers */);
6188 return result;
6189}
6190
6191__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6192vld1_dup_u16 (const uint16_t * a)
6193{
6194 uint16x4_t result;
6195 __asm__ ("ld1r {%0.4h}, %1"
6196 : "=w"(result)
6197 : "Utv"(*a)
6198 : /* No clobbers */);
6199 return result;
6200}
6201
6202__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6203vld1_dup_u32 (const uint32_t * a)
6204{
6205 uint32x2_t result;
6206 __asm__ ("ld1r {%0.2s}, %1"
6207 : "=w"(result)
6208 : "Utv"(*a)
6209 : /* No clobbers */);
6210 return result;
6211}
6212
6213__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6214vld1_dup_u64 (const uint64_t * a)
6215{
6216 uint64x1_t result;
6217 __asm__ ("ld1r {%0.1d}, %1"
6218 : "=w"(result)
6219 : "Utv"(*a)
6220 : /* No clobbers */);
6221 return result;
6222}
6223
6224#define vld1_lane_f32(a, b, c) \
6225 __extension__ \
6226 ({ \
6227 float32x2_t b_ = (b); \
6228 const float32_t * a_ = (a); \
6229 float32x2_t result; \
6230 __asm__ ("ld1 {%0.s}[%1], %2" \
6231 : "=w"(result) \
6232 : "i" (c), "Utv"(*a_), "0"(b_) \
6233 : /* No clobbers */); \
6234 result; \
6235 })
6236
6237#define vld1_lane_f64(a, b, c) \
6238 __extension__ \
6239 ({ \
6240 float64x1_t b_ = (b); \
6241 const float64_t * a_ = (a); \
6242 float64x1_t result; \
6243 __asm__ ("ld1 {%0.d}[%1], %2" \
6244 : "=w"(result) \
6245 : "i" (c), "Utv"(*a_), "0"(b_) \
6246 : /* No clobbers */); \
6247 result; \
6248 })
6249
6250#define vld1_lane_p8(a, b, c) \
6251 __extension__ \
6252 ({ \
6253 poly8x8_t b_ = (b); \
6254 const poly8_t * a_ = (a); \
6255 poly8x8_t result; \
6256 __asm__ ("ld1 {%0.b}[%1], %2" \
6257 : "=w"(result) \
6258 : "i" (c), "Utv"(*a_), "0"(b_) \
6259 : /* No clobbers */); \
6260 result; \
6261 })
6262
6263#define vld1_lane_p16(a, b, c) \
6264 __extension__ \
6265 ({ \
6266 poly16x4_t b_ = (b); \
6267 const poly16_t * a_ = (a); \
6268 poly16x4_t result; \
6269 __asm__ ("ld1 {%0.h}[%1], %2" \
6270 : "=w"(result) \
6271 : "i" (c), "Utv"(*a_), "0"(b_) \
6272 : /* No clobbers */); \
6273 result; \
6274 })
6275
6276#define vld1_lane_s8(a, b, c) \
6277 __extension__ \
6278 ({ \
6279 int8x8_t b_ = (b); \
6280 const int8_t * a_ = (a); \
6281 int8x8_t result; \
6282 __asm__ ("ld1 {%0.b}[%1], %2" \
6283 : "=w"(result) \
6284 : "i" (c), "Utv"(*a_), "0"(b_) \
6285 : /* No clobbers */); \
6286 result; \
6287 })
6288
6289#define vld1_lane_s16(a, b, c) \
6290 __extension__ \
6291 ({ \
6292 int16x4_t b_ = (b); \
6293 const int16_t * a_ = (a); \
6294 int16x4_t result; \
6295 __asm__ ("ld1 {%0.h}[%1], %2" \
6296 : "=w"(result) \
6297 : "i" (c), "Utv"(*a_), "0"(b_) \
6298 : /* No clobbers */); \
6299 result; \
6300 })
6301
6302#define vld1_lane_s32(a, b, c) \
6303 __extension__ \
6304 ({ \
6305 int32x2_t b_ = (b); \
6306 const int32_t * a_ = (a); \
6307 int32x2_t result; \
6308 __asm__ ("ld1 {%0.s}[%1], %2" \
6309 : "=w"(result) \
6310 : "i" (c), "Utv"(*a_), "0"(b_) \
6311 : /* No clobbers */); \
6312 result; \
6313 })
6314
6315#define vld1_lane_s64(a, b, c) \
6316 __extension__ \
6317 ({ \
6318 int64x1_t b_ = (b); \
6319 const int64_t * a_ = (a); \
6320 int64x1_t result; \
6321 __asm__ ("ld1 {%0.d}[%1], %2" \
6322 : "=w"(result) \
6323 : "i" (c), "Utv"(*a_), "0"(b_) \
6324 : /* No clobbers */); \
6325 result; \
6326 })
6327
6328#define vld1_lane_u8(a, b, c) \
6329 __extension__ \
6330 ({ \
6331 uint8x8_t b_ = (b); \
6332 const uint8_t * a_ = (a); \
6333 uint8x8_t result; \
6334 __asm__ ("ld1 {%0.b}[%1], %2" \
6335 : "=w"(result) \
6336 : "i" (c), "Utv"(*a_), "0"(b_) \
6337 : /* No clobbers */); \
6338 result; \
6339 })
6340
6341#define vld1_lane_u16(a, b, c) \
6342 __extension__ \
6343 ({ \
6344 uint16x4_t b_ = (b); \
6345 const uint16_t * a_ = (a); \
6346 uint16x4_t result; \
6347 __asm__ ("ld1 {%0.h}[%1], %2" \
6348 : "=w"(result) \
6349 : "i" (c), "Utv"(*a_), "0"(b_) \
6350 : /* No clobbers */); \
6351 result; \
6352 })
6353
6354#define vld1_lane_u32(a, b, c) \
6355 __extension__ \
6356 ({ \
6357 uint32x2_t b_ = (b); \
6358 const uint32_t * a_ = (a); \
6359 uint32x2_t result; \
6360 __asm__ ("ld1 {%0.s}[%1], %2" \
6361 : "=w"(result) \
6362 : "i" (c), "Utv"(*a_), "0"(b_) \
6363 : /* No clobbers */); \
6364 result; \
6365 })
6366
6367#define vld1_lane_u64(a, b, c) \
6368 __extension__ \
6369 ({ \
6370 uint64x1_t b_ = (b); \
6371 const uint64_t * a_ = (a); \
6372 uint64x1_t result; \
6373 __asm__ ("ld1 {%0.d}[%1], %2" \
6374 : "=w"(result) \
6375 : "i" (c), "Utv"(*a_), "0"(b_) \
6376 : /* No clobbers */); \
6377 result; \
6378 })
6379
6380__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6381vld1q_dup_f32 (const float32_t * a)
6382{
6383 float32x4_t result;
6384 __asm__ ("ld1r {%0.4s}, %1"
6385 : "=w"(result)
6386 : "Utv"(*a)
6387 : /* No clobbers */);
6388 return result;
6389}
6390
6391__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6392vld1q_dup_f64 (const float64_t * a)
6393{
6394 float64x2_t result;
6395 __asm__ ("ld1r {%0.2d}, %1"
6396 : "=w"(result)
6397 : "Utv"(*a)
6398 : /* No clobbers */);
6399 return result;
6400}
6401
6402__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6403vld1q_dup_p8 (const poly8_t * a)
6404{
6405 poly8x16_t result;
6406 __asm__ ("ld1r {%0.16b}, %1"
6407 : "=w"(result)
6408 : "Utv"(*a)
6409 : /* No clobbers */);
6410 return result;
6411}
6412
6413__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6414vld1q_dup_p16 (const poly16_t * a)
6415{
6416 poly16x8_t result;
6417 __asm__ ("ld1r {%0.8h}, %1"
6418 : "=w"(result)
6419 : "Utv"(*a)
6420 : /* No clobbers */);
6421 return result;
6422}
6423
6424__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6425vld1q_dup_s8 (const int8_t * a)
6426{
6427 int8x16_t result;
6428 __asm__ ("ld1r {%0.16b}, %1"
6429 : "=w"(result)
6430 : "Utv"(*a)
6431 : /* No clobbers */);
6432 return result;
6433}
6434
6435__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6436vld1q_dup_s16 (const int16_t * a)
6437{
6438 int16x8_t result;
6439 __asm__ ("ld1r {%0.8h}, %1"
6440 : "=w"(result)
6441 : "Utv"(*a)
6442 : /* No clobbers */);
6443 return result;
6444}
6445
6446__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6447vld1q_dup_s32 (const int32_t * a)
6448{
6449 int32x4_t result;
6450 __asm__ ("ld1r {%0.4s}, %1"
6451 : "=w"(result)
6452 : "Utv"(*a)
6453 : /* No clobbers */);
6454 return result;
6455}
6456
6457__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6458vld1q_dup_s64 (const int64_t * a)
6459{
6460 int64x2_t result;
6461 __asm__ ("ld1r {%0.2d}, %1"
6462 : "=w"(result)
6463 : "Utv"(*a)
6464 : /* No clobbers */);
6465 return result;
6466}
6467
6468__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6469vld1q_dup_u8 (const uint8_t * a)
6470{
6471 uint8x16_t result;
6472 __asm__ ("ld1r {%0.16b}, %1"
6473 : "=w"(result)
6474 : "Utv"(*a)
6475 : /* No clobbers */);
6476 return result;
6477}
6478
6479__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6480vld1q_dup_u16 (const uint16_t * a)
6481{
6482 uint16x8_t result;
6483 __asm__ ("ld1r {%0.8h}, %1"
6484 : "=w"(result)
6485 : "Utv"(*a)
6486 : /* No clobbers */);
6487 return result;
6488}
6489
6490__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6491vld1q_dup_u32 (const uint32_t * a)
6492{
6493 uint32x4_t result;
6494 __asm__ ("ld1r {%0.4s}, %1"
6495 : "=w"(result)
6496 : "Utv"(*a)
6497 : /* No clobbers */);
6498 return result;
6499}
6500
6501__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6502vld1q_dup_u64 (const uint64_t * a)
6503{
6504 uint64x2_t result;
6505 __asm__ ("ld1r {%0.2d}, %1"
6506 : "=w"(result)
6507 : "Utv"(*a)
6508 : /* No clobbers */);
6509 return result;
6510}
6511
6512#define vld1q_lane_f32(a, b, c) \
6513 __extension__ \
6514 ({ \
6515 float32x4_t b_ = (b); \
6516 const float32_t * a_ = (a); \
6517 float32x4_t result; \
6518 __asm__ ("ld1 {%0.s}[%1], %2" \
6519 : "=w"(result) \
6520 : "i"(c), "Utv"(*a_), "0"(b_) \
6521 : /* No clobbers */); \
6522 result; \
6523 })
6524
6525#define vld1q_lane_f64(a, b, c) \
6526 __extension__ \
6527 ({ \
6528 float64x2_t b_ = (b); \
6529 const float64_t * a_ = (a); \
6530 float64x2_t result; \
6531 __asm__ ("ld1 {%0.d}[%1], %2" \
6532 : "=w"(result) \
6533 : "i"(c), "Utv"(*a_), "0"(b_) \
6534 : /* No clobbers */); \
6535 result; \
6536 })
6537
6538#define vld1q_lane_p8(a, b, c) \
6539 __extension__ \
6540 ({ \
6541 poly8x16_t b_ = (b); \
6542 const poly8_t * a_ = (a); \
6543 poly8x16_t result; \
6544 __asm__ ("ld1 {%0.b}[%1], %2" \
6545 : "=w"(result) \
6546 : "i"(c), "Utv"(*a_), "0"(b_) \
6547 : /* No clobbers */); \
6548 result; \
6549 })
6550
6551#define vld1q_lane_p16(a, b, c) \
6552 __extension__ \
6553 ({ \
6554 poly16x8_t b_ = (b); \
6555 const poly16_t * a_ = (a); \
6556 poly16x8_t result; \
6557 __asm__ ("ld1 {%0.h}[%1], %2" \
6558 : "=w"(result) \
6559 : "i"(c), "Utv"(*a_), "0"(b_) \
6560 : /* No clobbers */); \
6561 result; \
6562 })
6563
6564#define vld1q_lane_s8(a, b, c) \
6565 __extension__ \
6566 ({ \
6567 int8x16_t b_ = (b); \
6568 const int8_t * a_ = (a); \
6569 int8x16_t result; \
6570 __asm__ ("ld1 {%0.b}[%1], %2" \
6571 : "=w"(result) \
6572 : "i"(c), "Utv"(*a_), "0"(b_) \
6573 : /* No clobbers */); \
6574 result; \
6575 })
6576
6577#define vld1q_lane_s16(a, b, c) \
6578 __extension__ \
6579 ({ \
6580 int16x8_t b_ = (b); \
6581 const int16_t * a_ = (a); \
6582 int16x8_t result; \
6583 __asm__ ("ld1 {%0.h}[%1], %2" \
6584 : "=w"(result) \
6585 : "i"(c), "Utv"(*a_), "0"(b_) \
6586 : /* No clobbers */); \
6587 result; \
6588 })
6589
6590#define vld1q_lane_s32(a, b, c) \
6591 __extension__ \
6592 ({ \
6593 int32x4_t b_ = (b); \
6594 const int32_t * a_ = (a); \
6595 int32x4_t result; \
6596 __asm__ ("ld1 {%0.s}[%1], %2" \
6597 : "=w"(result) \
6598 : "i"(c), "Utv"(*a_), "0"(b_) \
6599 : /* No clobbers */); \
6600 result; \
6601 })
6602
6603#define vld1q_lane_s64(a, b, c) \
6604 __extension__ \
6605 ({ \
6606 int64x2_t b_ = (b); \
6607 const int64_t * a_ = (a); \
6608 int64x2_t result; \
6609 __asm__ ("ld1 {%0.d}[%1], %2" \
6610 : "=w"(result) \
6611 : "i"(c), "Utv"(*a_), "0"(b_) \
6612 : /* No clobbers */); \
6613 result; \
6614 })
6615
6616#define vld1q_lane_u8(a, b, c) \
6617 __extension__ \
6618 ({ \
6619 uint8x16_t b_ = (b); \
6620 const uint8_t * a_ = (a); \
6621 uint8x16_t result; \
6622 __asm__ ("ld1 {%0.b}[%1], %2" \
6623 : "=w"(result) \
6624 : "i"(c), "Utv"(*a_), "0"(b_) \
6625 : /* No clobbers */); \
6626 result; \
6627 })
6628
6629#define vld1q_lane_u16(a, b, c) \
6630 __extension__ \
6631 ({ \
6632 uint16x8_t b_ = (b); \
6633 const uint16_t * a_ = (a); \
6634 uint16x8_t result; \
6635 __asm__ ("ld1 {%0.h}[%1], %2" \
6636 : "=w"(result) \
6637 : "i"(c), "Utv"(*a_), "0"(b_) \
6638 : /* No clobbers */); \
6639 result; \
6640 })
6641
6642#define vld1q_lane_u32(a, b, c) \
6643 __extension__ \
6644 ({ \
6645 uint32x4_t b_ = (b); \
6646 const uint32_t * a_ = (a); \
6647 uint32x4_t result; \
6648 __asm__ ("ld1 {%0.s}[%1], %2" \
6649 : "=w"(result) \
6650 : "i"(c), "Utv"(*a_), "0"(b_) \
6651 : /* No clobbers */); \
6652 result; \
6653 })
6654
6655#define vld1q_lane_u64(a, b, c) \
6656 __extension__ \
6657 ({ \
6658 uint64x2_t b_ = (b); \
6659 const uint64_t * a_ = (a); \
6660 uint64x2_t result; \
6661 __asm__ ("ld1 {%0.d}[%1], %2" \
6662 : "=w"(result) \
6663 : "i"(c), "Utv"(*a_), "0"(b_) \
6664 : /* No clobbers */); \
6665 result; \
6666 })
6667
6668__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6669vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6670{
6671 float32x2_t result;
6672 float32x2_t t1;
6673 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6674 : "=w"(result), "=w"(t1)
6675 : "0"(a), "w"(b), "w"(c)
6676 : /* No clobbers */);
6677 return result;
6678}
6679
6680__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6681vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6682{
6683 int16x4_t result;
6684 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6685 : "=w"(result)
6686 : "0"(a), "w"(b), "x"(c)
6687 : /* No clobbers */);
6688 return result;
6689}
6690
6691__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6692vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6693{
6694 int32x2_t result;
6695 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6696 : "=w"(result)
6697 : "0"(a), "w"(b), "w"(c)
6698 : /* No clobbers */);
6699 return result;
6700}
6701
6702__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6703vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6704{
6705 uint16x4_t result;
6706 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6707 : "=w"(result)
6708 : "0"(a), "w"(b), "x"(c)
6709 : /* No clobbers */);
6710 return result;
6711}
6712
6713__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6714vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6715{
6716 uint32x2_t result;
6717 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6718 : "=w"(result)
6719 : "0"(a), "w"(b), "w"(c)
6720 : /* No clobbers */);
6721 return result;
6722}
6723
6724__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6725vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6726{
6727 int8x8_t result;
6728 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6729 : "=w"(result)
6730 : "0"(a), "w"(b), "w"(c)
6731 : /* No clobbers */);
6732 return result;
6733}
6734
6735__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6736vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6737{
6738 int16x4_t result;
6739 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6740 : "=w"(result)
6741 : "0"(a), "w"(b), "w"(c)
6742 : /* No clobbers */);
6743 return result;
6744}
6745
6746__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6747vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6748{
6749 int32x2_t result;
6750 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6751 : "=w"(result)
6752 : "0"(a), "w"(b), "w"(c)
6753 : /* No clobbers */);
6754 return result;
6755}
6756
6757__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6758vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6759{
6760 uint8x8_t result;
6761 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6762 : "=w"(result)
6763 : "0"(a), "w"(b), "w"(c)
6764 : /* No clobbers */);
6765 return result;
6766}
6767
6768__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6769vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6770{
6771 uint16x4_t result;
6772 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6773 : "=w"(result)
6774 : "0"(a), "w"(b), "w"(c)
6775 : /* No clobbers */);
6776 return result;
6777}
6778
6779__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6780vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6781{
6782 uint32x2_t result;
6783 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6784 : "=w"(result)
6785 : "0"(a), "w"(b), "w"(c)
6786 : /* No clobbers */);
6787 return result;
6788}
6789
6790#define vmlal_high_lane_s16(a, b, c, d) \
6791 __extension__ \
6792 ({ \
6793 int16x8_t c_ = (c); \
6794 int16x8_t b_ = (b); \
6795 int32x4_t a_ = (a); \
6796 int32x4_t result; \
6797 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6798 : "=w"(result) \
6799 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6800 : /* No clobbers */); \
6801 result; \
6802 })
6803
6804#define vmlal_high_lane_s32(a, b, c, d) \
6805 __extension__ \
6806 ({ \
6807 int32x4_t c_ = (c); \
6808 int32x4_t b_ = (b); \
6809 int64x2_t a_ = (a); \
6810 int64x2_t result; \
6811 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6812 : "=w"(result) \
6813 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6814 : /* No clobbers */); \
6815 result; \
6816 })
6817
6818#define vmlal_high_lane_u16(a, b, c, d) \
6819 __extension__ \
6820 ({ \
6821 uint16x8_t c_ = (c); \
6822 uint16x8_t b_ = (b); \
6823 uint32x4_t a_ = (a); \
6824 uint32x4_t result; \
6825 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6826 : "=w"(result) \
6827 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6828 : /* No clobbers */); \
6829 result; \
6830 })
6831
6832#define vmlal_high_lane_u32(a, b, c, d) \
6833 __extension__ \
6834 ({ \
6835 uint32x4_t c_ = (c); \
6836 uint32x4_t b_ = (b); \
6837 uint64x2_t a_ = (a); \
6838 uint64x2_t result; \
6839 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6840 : "=w"(result) \
6841 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6842 : /* No clobbers */); \
6843 result; \
6844 })
6845
6846#define vmlal_high_laneq_s16(a, b, c, d) \
6847 __extension__ \
6848 ({ \
6849 int16x8_t c_ = (c); \
6850 int16x8_t b_ = (b); \
6851 int32x4_t a_ = (a); \
6852 int32x4_t result; \
6853 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6854 : "=w"(result) \
6855 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6856 : /* No clobbers */); \
6857 result; \
6858 })
6859
6860#define vmlal_high_laneq_s32(a, b, c, d) \
6861 __extension__ \
6862 ({ \
6863 int32x4_t c_ = (c); \
6864 int32x4_t b_ = (b); \
6865 int64x2_t a_ = (a); \
6866 int64x2_t result; \
6867 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6868 : "=w"(result) \
6869 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6870 : /* No clobbers */); \
6871 result; \
6872 })
6873
6874#define vmlal_high_laneq_u16(a, b, c, d) \
6875 __extension__ \
6876 ({ \
6877 uint16x8_t c_ = (c); \
6878 uint16x8_t b_ = (b); \
6879 uint32x4_t a_ = (a); \
6880 uint32x4_t result; \
6881 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6882 : "=w"(result) \
6883 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6884 : /* No clobbers */); \
6885 result; \
6886 })
6887
6888#define vmlal_high_laneq_u32(a, b, c, d) \
6889 __extension__ \
6890 ({ \
6891 uint32x4_t c_ = (c); \
6892 uint32x4_t b_ = (b); \
6893 uint64x2_t a_ = (a); \
6894 uint64x2_t result; \
6895 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6896 : "=w"(result) \
6897 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6898 : /* No clobbers */); \
6899 result; \
6900 })
6901
6902__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6903vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6904{
6905 int32x4_t result;
6906 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6907 : "=w"(result)
6908 : "0"(a), "w"(b), "x"(c)
6909 : /* No clobbers */);
6910 return result;
6911}
6912
6913__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6914vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6915{
6916 int64x2_t result;
6917 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6918 : "=w"(result)
6919 : "0"(a), "w"(b), "w"(c)
6920 : /* No clobbers */);
6921 return result;
6922}
6923
6924__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6925vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6926{
6927 uint32x4_t result;
6928 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6929 : "=w"(result)
6930 : "0"(a), "w"(b), "x"(c)
6931 : /* No clobbers */);
6932 return result;
6933}
6934
6935__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6936vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6937{
6938 uint64x2_t result;
6939 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6940 : "=w"(result)
6941 : "0"(a), "w"(b), "w"(c)
6942 : /* No clobbers */);
6943 return result;
6944}
6945
6946__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6947vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6948{
6949 int16x8_t result;
6950 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6951 : "=w"(result)
6952 : "0"(a), "w"(b), "w"(c)
6953 : /* No clobbers */);
6954 return result;
6955}
6956
6957__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6958vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6959{
6960 int32x4_t result;
6961 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6962 : "=w"(result)
6963 : "0"(a), "w"(b), "w"(c)
6964 : /* No clobbers */);
6965 return result;
6966}
6967
6968__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6969vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6970{
6971 int64x2_t result;
6972 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6973 : "=w"(result)
6974 : "0"(a), "w"(b), "w"(c)
6975 : /* No clobbers */);
6976 return result;
6977}
6978
6979__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6980vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6981{
6982 uint16x8_t result;
6983 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6984 : "=w"(result)
6985 : "0"(a), "w"(b), "w"(c)
6986 : /* No clobbers */);
6987 return result;
6988}
6989
6990__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6991vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6992{
6993 uint32x4_t result;
6994 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6995 : "=w"(result)
6996 : "0"(a), "w"(b), "w"(c)
6997 : /* No clobbers */);
6998 return result;
6999}
7000
7001__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7002vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7003{
7004 uint64x2_t result;
7005 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
7006 : "=w"(result)
7007 : "0"(a), "w"(b), "w"(c)
7008 : /* No clobbers */);
7009 return result;
7010}
7011
7012#define vmlal_lane_s16(a, b, c, d) \
7013 __extension__ \
7014 ({ \
7015 int16x4_t c_ = (c); \
7016 int16x4_t b_ = (b); \
7017 int32x4_t a_ = (a); \
7018 int32x4_t result; \
7019 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
7020 : "=w"(result) \
7021 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7022 : /* No clobbers */); \
7023 result; \
7024 })
7025
7026#define vmlal_lane_s32(a, b, c, d) \
7027 __extension__ \
7028 ({ \
7029 int32x2_t c_ = (c); \
7030 int32x2_t b_ = (b); \
7031 int64x2_t a_ = (a); \
7032 int64x2_t result; \
7033 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
7034 : "=w"(result) \
7035 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7036 : /* No clobbers */); \
7037 result; \
7038 })
7039
7040#define vmlal_lane_u16(a, b, c, d) \
7041 __extension__ \
7042 ({ \
7043 uint16x4_t c_ = (c); \
7044 uint16x4_t b_ = (b); \
7045 uint32x4_t a_ = (a); \
7046 uint32x4_t result; \
7047 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
7048 : "=w"(result) \
7049 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7050 : /* No clobbers */); \
7051 result; \
7052 })
7053
7054#define vmlal_lane_u32(a, b, c, d) \
7055 __extension__ \
7056 ({ \
7057 uint32x2_t c_ = (c); \
7058 uint32x2_t b_ = (b); \
7059 uint64x2_t a_ = (a); \
7060 uint64x2_t result; \
7061 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7062 : "=w"(result) \
7063 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7064 : /* No clobbers */); \
7065 result; \
7066 })
7067
7068#define vmlal_laneq_s16(a, b, c, d) \
7069 __extension__ \
7070 ({ \
7071 int16x8_t c_ = (c); \
7072 int16x4_t b_ = (b); \
7073 int32x4_t a_ = (a); \
7074 int32x4_t result; \
7075 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7076 : "=w"(result) \
7077 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7078 : /* No clobbers */); \
7079 result; \
7080 })
7081
7082#define vmlal_laneq_s32(a, b, c, d) \
7083 __extension__ \
7084 ({ \
7085 int32x4_t c_ = (c); \
7086 int32x2_t b_ = (b); \
7087 int64x2_t a_ = (a); \
7088 int64x2_t result; \
7089 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7090 : "=w"(result) \
7091 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7092 : /* No clobbers */); \
7093 result; \
7094 })
7095
7096#define vmlal_laneq_u16(a, b, c, d) \
7097 __extension__ \
7098 ({ \
7099 uint16x8_t c_ = (c); \
7100 uint16x4_t b_ = (b); \
7101 uint32x4_t a_ = (a); \
7102 uint32x4_t result; \
7103 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7104 : "=w"(result) \
7105 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7106 : /* No clobbers */); \
7107 result; \
7108 })
7109
7110#define vmlal_laneq_u32(a, b, c, d) \
7111 __extension__ \
7112 ({ \
7113 uint32x4_t c_ = (c); \
7114 uint32x2_t b_ = (b); \
7115 uint64x2_t a_ = (a); \
7116 uint64x2_t result; \
7117 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7118 : "=w"(result) \
7119 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7120 : /* No clobbers */); \
7121 result; \
7122 })
7123
7124__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7125vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7126{
7127 int32x4_t result;
7128 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7129 : "=w"(result)
7130 : "0"(a), "w"(b), "x"(c)
7131 : /* No clobbers */);
7132 return result;
7133}
7134
7135__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7136vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7137{
7138 int64x2_t result;
7139 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7140 : "=w"(result)
7141 : "0"(a), "w"(b), "w"(c)
7142 : /* No clobbers */);
7143 return result;
7144}
7145
7146__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7147vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7148{
7149 uint32x4_t result;
7150 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7151 : "=w"(result)
7152 : "0"(a), "w"(b), "x"(c)
7153 : /* No clobbers */);
7154 return result;
7155}
7156
7157__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7158vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7159{
7160 uint64x2_t result;
7161 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7162 : "=w"(result)
7163 : "0"(a), "w"(b), "w"(c)
7164 : /* No clobbers */);
7165 return result;
7166}
7167
7168__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7169vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7170{
7171 int16x8_t result;
7172 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7173 : "=w"(result)
7174 : "0"(a), "w"(b), "w"(c)
7175 : /* No clobbers */);
7176 return result;
7177}
7178
7179__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7180vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7181{
7182 int32x4_t result;
7183 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7184 : "=w"(result)
7185 : "0"(a), "w"(b), "w"(c)
7186 : /* No clobbers */);
7187 return result;
7188}
7189
7190__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7191vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7192{
7193 int64x2_t result;
7194 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7195 : "=w"(result)
7196 : "0"(a), "w"(b), "w"(c)
7197 : /* No clobbers */);
7198 return result;
7199}
7200
7201__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7202vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7203{
7204 uint16x8_t result;
7205 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7206 : "=w"(result)
7207 : "0"(a), "w"(b), "w"(c)
7208 : /* No clobbers */);
7209 return result;
7210}
7211
7212__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7213vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7214{
7215 uint32x4_t result;
7216 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7217 : "=w"(result)
7218 : "0"(a), "w"(b), "w"(c)
7219 : /* No clobbers */);
7220 return result;
7221}
7222
7223__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7224vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7225{
7226 uint64x2_t result;
7227 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7228 : "=w"(result)
7229 : "0"(a), "w"(b), "w"(c)
7230 : /* No clobbers */);
7231 return result;
7232}
7233
7234__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7235vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7236{
7237 float32x4_t result;
7238 float32x4_t t1;
7239 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7240 : "=w"(result), "=w"(t1)
7241 : "0"(a), "w"(b), "w"(c)
7242 : /* No clobbers */);
7243 return result;
7244}
7245
7246__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7247vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7248{
7249 float64x2_t result;
7250 float64x2_t t1;
7251 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
7252 : "=w"(result), "=w"(t1)
7253 : "0"(a), "w"(b), "w"(c)
7254 : /* No clobbers */);
7255 return result;
7256}
7257
7258__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7259vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7260{
7261 int16x8_t result;
7262 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7263 : "=w"(result)
7264 : "0"(a), "w"(b), "x"(c)
7265 : /* No clobbers */);
7266 return result;
7267}
7268
7269__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7270vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7271{
7272 int32x4_t result;
7273 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7274 : "=w"(result)
7275 : "0"(a), "w"(b), "w"(c)
7276 : /* No clobbers */);
7277 return result;
7278}
7279
7280__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7281vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7282{
7283 uint16x8_t result;
7284 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7285 : "=w"(result)
7286 : "0"(a), "w"(b), "x"(c)
7287 : /* No clobbers */);
7288 return result;
7289}
7290
7291__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7292vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7293{
7294 uint32x4_t result;
7295 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7296 : "=w"(result)
7297 : "0"(a), "w"(b), "w"(c)
7298 : /* No clobbers */);
7299 return result;
7300}
7301
7302__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7303vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7304{
7305 int8x16_t result;
7306 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7307 : "=w"(result)
7308 : "0"(a), "w"(b), "w"(c)
7309 : /* No clobbers */);
7310 return result;
7311}
7312
7313__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7314vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7315{
7316 int16x8_t result;
7317 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7318 : "=w"(result)
7319 : "0"(a), "w"(b), "w"(c)
7320 : /* No clobbers */);
7321 return result;
7322}
7323
7324__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7325vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7326{
7327 int32x4_t result;
7328 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7329 : "=w"(result)
7330 : "0"(a), "w"(b), "w"(c)
7331 : /* No clobbers */);
7332 return result;
7333}
7334
7335__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7336vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7337{
7338 uint8x16_t result;
7339 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7340 : "=w"(result)
7341 : "0"(a), "w"(b), "w"(c)
7342 : /* No clobbers */);
7343 return result;
7344}
7345
7346__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7347vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7348{
7349 uint16x8_t result;
7350 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7351 : "=w"(result)
7352 : "0"(a), "w"(b), "w"(c)
7353 : /* No clobbers */);
7354 return result;
7355}
7356
7357__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7358vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7359{
7360 uint32x4_t result;
7361 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7362 : "=w"(result)
7363 : "0"(a), "w"(b), "w"(c)
7364 : /* No clobbers */);
7365 return result;
7366}
7367
7368__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7369vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7370{
7371 float32x2_t result;
7372 float32x2_t t1;
7373 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7374 : "=w"(result), "=w"(t1)
7375 : "0"(a), "w"(b), "w"(c)
7376 : /* No clobbers */);
7377 return result;
7378}
7379
7380__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7381vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7382{
7383 int16x4_t result;
7384 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7385 : "=w"(result)
7386 : "0"(a), "w"(b), "x"(c)
7387 : /* No clobbers */);
7388 return result;
7389}
7390
7391__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7392vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7393{
7394 int32x2_t result;
7395 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7396 : "=w"(result)
7397 : "0"(a), "w"(b), "w"(c)
7398 : /* No clobbers */);
7399 return result;
7400}
7401
7402__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7403vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7404{
7405 uint16x4_t result;
7406 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7407 : "=w"(result)
7408 : "0"(a), "w"(b), "x"(c)
7409 : /* No clobbers */);
7410 return result;
7411}
7412
7413__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7414vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7415{
7416 uint32x2_t result;
7417 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7418 : "=w"(result)
7419 : "0"(a), "w"(b), "w"(c)
7420 : /* No clobbers */);
7421 return result;
7422}
7423
7424__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7425vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7426{
7427 int8x8_t result;
7428 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7429 : "=w"(result)
7430 : "0"(a), "w"(b), "w"(c)
7431 : /* No clobbers */);
7432 return result;
7433}
7434
7435__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7436vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7437{
7438 int16x4_t result;
7439 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7440 : "=w"(result)
7441 : "0"(a), "w"(b), "w"(c)
7442 : /* No clobbers */);
7443 return result;
7444}
7445
7446__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7447vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7448{
7449 int32x2_t result;
7450 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7451 : "=w"(result)
7452 : "0"(a), "w"(b), "w"(c)
7453 : /* No clobbers */);
7454 return result;
7455}
7456
7457__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7458vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7459{
7460 uint8x8_t result;
7461 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7462 : "=w"(result)
7463 : "0"(a), "w"(b), "w"(c)
7464 : /* No clobbers */);
7465 return result;
7466}
7467
7468__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7469vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7470{
7471 uint16x4_t result;
7472 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7473 : "=w"(result)
7474 : "0"(a), "w"(b), "w"(c)
7475 : /* No clobbers */);
7476 return result;
7477}
7478
7479__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7480vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7481{
7482 uint32x2_t result;
7483 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7484 : "=w"(result)
7485 : "0"(a), "w"(b), "w"(c)
7486 : /* No clobbers */);
7487 return result;
7488}
7489
7490#define vmlsl_high_lane_s16(a, b, c, d) \
7491 __extension__ \
7492 ({ \
7493 int16x8_t c_ = (c); \
7494 int16x8_t b_ = (b); \
7495 int32x4_t a_ = (a); \
7496 int32x4_t result; \
7497 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7498 : "=w"(result) \
7499 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7500 : /* No clobbers */); \
7501 result; \
7502 })
7503
7504#define vmlsl_high_lane_s32(a, b, c, d) \
7505 __extension__ \
7506 ({ \
7507 int32x4_t c_ = (c); \
7508 int32x4_t b_ = (b); \
7509 int64x2_t a_ = (a); \
7510 int64x2_t result; \
7511 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7512 : "=w"(result) \
7513 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7514 : /* No clobbers */); \
7515 result; \
7516 })
7517
7518#define vmlsl_high_lane_u16(a, b, c, d) \
7519 __extension__ \
7520 ({ \
7521 uint16x8_t c_ = (c); \
7522 uint16x8_t b_ = (b); \
7523 uint32x4_t a_ = (a); \
7524 uint32x4_t result; \
7525 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7526 : "=w"(result) \
7527 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7528 : /* No clobbers */); \
7529 result; \
7530 })
7531
7532#define vmlsl_high_lane_u32(a, b, c, d) \
7533 __extension__ \
7534 ({ \
7535 uint32x4_t c_ = (c); \
7536 uint32x4_t b_ = (b); \
7537 uint64x2_t a_ = (a); \
7538 uint64x2_t result; \
7539 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7540 : "=w"(result) \
7541 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7542 : /* No clobbers */); \
7543 result; \
7544 })
7545
7546#define vmlsl_high_laneq_s16(a, b, c, d) \
7547 __extension__ \
7548 ({ \
7549 int16x8_t c_ = (c); \
7550 int16x8_t b_ = (b); \
7551 int32x4_t a_ = (a); \
7552 int32x4_t result; \
7553 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7554 : "=w"(result) \
7555 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7556 : /* No clobbers */); \
7557 result; \
7558 })
7559
7560#define vmlsl_high_laneq_s32(a, b, c, d) \
7561 __extension__ \
7562 ({ \
7563 int32x4_t c_ = (c); \
7564 int32x4_t b_ = (b); \
7565 int64x2_t a_ = (a); \
7566 int64x2_t result; \
7567 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7568 : "=w"(result) \
7569 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7570 : /* No clobbers */); \
7571 result; \
7572 })
7573
7574#define vmlsl_high_laneq_u16(a, b, c, d) \
7575 __extension__ \
7576 ({ \
7577 uint16x8_t c_ = (c); \
7578 uint16x8_t b_ = (b); \
7579 uint32x4_t a_ = (a); \
7580 uint32x4_t result; \
7581 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7582 : "=w"(result) \
7583 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7584 : /* No clobbers */); \
7585 result; \
7586 })
7587
7588#define vmlsl_high_laneq_u32(a, b, c, d) \
7589 __extension__ \
7590 ({ \
7591 uint32x4_t c_ = (c); \
7592 uint32x4_t b_ = (b); \
7593 uint64x2_t a_ = (a); \
7594 uint64x2_t result; \
7595 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7596 : "=w"(result) \
7597 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7598 : /* No clobbers */); \
7599 result; \
7600 })
7601
7602__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7603vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7604{
7605 int32x4_t result;
7606 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7607 : "=w"(result)
7608 : "0"(a), "w"(b), "x"(c)
7609 : /* No clobbers */);
7610 return result;
7611}
7612
7613__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7614vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7615{
7616 int64x2_t result;
7617 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7618 : "=w"(result)
7619 : "0"(a), "w"(b), "w"(c)
7620 : /* No clobbers */);
7621 return result;
7622}
7623
7624__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7625vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7626{
7627 uint32x4_t result;
7628 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7629 : "=w"(result)
7630 : "0"(a), "w"(b), "x"(c)
7631 : /* No clobbers */);
7632 return result;
7633}
7634
7635__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7636vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7637{
7638 uint64x2_t result;
7639 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7640 : "=w"(result)
7641 : "0"(a), "w"(b), "w"(c)
7642 : /* No clobbers */);
7643 return result;
7644}
7645
7646__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7647vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7648{
7649 int16x8_t result;
7650 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7651 : "=w"(result)
7652 : "0"(a), "w"(b), "w"(c)
7653 : /* No clobbers */);
7654 return result;
7655}
7656
7657__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7658vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7659{
7660 int32x4_t result;
7661 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7662 : "=w"(result)
7663 : "0"(a), "w"(b), "w"(c)
7664 : /* No clobbers */);
7665 return result;
7666}
7667
7668__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7669vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7670{
7671 int64x2_t result;
7672 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7673 : "=w"(result)
7674 : "0"(a), "w"(b), "w"(c)
7675 : /* No clobbers */);
7676 return result;
7677}
7678
7679__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7680vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7681{
7682 uint16x8_t result;
7683 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7684 : "=w"(result)
7685 : "0"(a), "w"(b), "w"(c)
7686 : /* No clobbers */);
7687 return result;
7688}
7689
7690__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7691vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7692{
7693 uint32x4_t result;
7694 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7695 : "=w"(result)
7696 : "0"(a), "w"(b), "w"(c)
7697 : /* No clobbers */);
7698 return result;
7699}
7700
7701__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7702vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7703{
7704 uint64x2_t result;
7705 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7706 : "=w"(result)
7707 : "0"(a), "w"(b), "w"(c)
7708 : /* No clobbers */);
7709 return result;
7710}
7711
7712#define vmlsl_lane_s16(a, b, c, d) \
7713 __extension__ \
7714 ({ \
7715 int16x4_t c_ = (c); \
7716 int16x4_t b_ = (b); \
7717 int32x4_t a_ = (a); \
7718 int32x4_t result; \
7719 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7720 : "=w"(result) \
7721 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7722 : /* No clobbers */); \
7723 result; \
7724 })
7725
7726#define vmlsl_lane_s32(a, b, c, d) \
7727 __extension__ \
7728 ({ \
7729 int32x2_t c_ = (c); \
7730 int32x2_t b_ = (b); \
7731 int64x2_t a_ = (a); \
7732 int64x2_t result; \
7733 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7734 : "=w"(result) \
7735 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7736 : /* No clobbers */); \
7737 result; \
7738 })
7739
7740#define vmlsl_lane_u16(a, b, c, d) \
7741 __extension__ \
7742 ({ \
7743 uint16x4_t c_ = (c); \
7744 uint16x4_t b_ = (b); \
7745 uint32x4_t a_ = (a); \
7746 uint32x4_t result; \
7747 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7748 : "=w"(result) \
7749 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7750 : /* No clobbers */); \
7751 result; \
7752 })
7753
7754#define vmlsl_lane_u32(a, b, c, d) \
7755 __extension__ \
7756 ({ \
7757 uint32x2_t c_ = (c); \
7758 uint32x2_t b_ = (b); \
7759 uint64x2_t a_ = (a); \
7760 uint64x2_t result; \
7761 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7762 : "=w"(result) \
7763 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7764 : /* No clobbers */); \
7765 result; \
7766 })
7767
7768#define vmlsl_laneq_s16(a, b, c, d) \
7769 __extension__ \
7770 ({ \
7771 int16x8_t c_ = (c); \
7772 int16x4_t b_ = (b); \
7773 int32x4_t a_ = (a); \
7774 int32x4_t result; \
7775 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7776 : "=w"(result) \
7777 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7778 : /* No clobbers */); \
7779 result; \
7780 })
7781
7782#define vmlsl_laneq_s32(a, b, c, d) \
7783 __extension__ \
7784 ({ \
7785 int32x4_t c_ = (c); \
7786 int32x2_t b_ = (b); \
7787 int64x2_t a_ = (a); \
7788 int64x2_t result; \
7789 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7790 : "=w"(result) \
7791 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7792 : /* No clobbers */); \
7793 result; \
7794 })
7795
7796#define vmlsl_laneq_u16(a, b, c, d) \
7797 __extension__ \
7798 ({ \
7799 uint16x8_t c_ = (c); \
7800 uint16x4_t b_ = (b); \
7801 uint32x4_t a_ = (a); \
7802 uint32x4_t result; \
7803 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7804 : "=w"(result) \
7805 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7806 : /* No clobbers */); \
7807 result; \
7808 })
7809
7810#define vmlsl_laneq_u32(a, b, c, d) \
7811 __extension__ \
7812 ({ \
7813 uint32x4_t c_ = (c); \
7814 uint32x2_t b_ = (b); \
7815 uint64x2_t a_ = (a); \
7816 uint64x2_t result; \
7817 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7818 : "=w"(result) \
7819 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7820 : /* No clobbers */); \
7821 result; \
7822 })
7823
7824__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7825vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7826{
7827 int32x4_t result;
7828 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7829 : "=w"(result)
7830 : "0"(a), "w"(b), "x"(c)
7831 : /* No clobbers */);
7832 return result;
7833}
7834
7835__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7836vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7837{
7838 int64x2_t result;
7839 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7840 : "=w"(result)
7841 : "0"(a), "w"(b), "w"(c)
7842 : /* No clobbers */);
7843 return result;
7844}
7845
7846__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7847vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7848{
7849 uint32x4_t result;
7850 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7851 : "=w"(result)
7852 : "0"(a), "w"(b), "x"(c)
7853 : /* No clobbers */);
7854 return result;
7855}
7856
7857__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7858vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7859{
7860 uint64x2_t result;
7861 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7862 : "=w"(result)
7863 : "0"(a), "w"(b), "w"(c)
7864 : /* No clobbers */);
7865 return result;
7866}
7867
7868__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7869vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7870{
7871 int16x8_t result;
7872 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7873 : "=w"(result)
7874 : "0"(a), "w"(b), "w"(c)
7875 : /* No clobbers */);
7876 return result;
7877}
7878
7879__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7880vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7881{
7882 int32x4_t result;
7883 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7884 : "=w"(result)
7885 : "0"(a), "w"(b), "w"(c)
7886 : /* No clobbers */);
7887 return result;
7888}
7889
7890__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7891vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7892{
7893 int64x2_t result;
7894 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7895 : "=w"(result)
7896 : "0"(a), "w"(b), "w"(c)
7897 : /* No clobbers */);
7898 return result;
7899}
7900
7901__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7902vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7903{
7904 uint16x8_t result;
7905 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7906 : "=w"(result)
7907 : "0"(a), "w"(b), "w"(c)
7908 : /* No clobbers */);
7909 return result;
7910}
7911
7912__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7913vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7914{
7915 uint32x4_t result;
7916 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7917 : "=w"(result)
7918 : "0"(a), "w"(b), "w"(c)
7919 : /* No clobbers */);
7920 return result;
7921}
7922
7923__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7924vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7925{
7926 uint64x2_t result;
7927 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7928 : "=w"(result)
7929 : "0"(a), "w"(b), "w"(c)
7930 : /* No clobbers */);
7931 return result;
7932}
7933
7934__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7935vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7936{
7937 float32x4_t result;
7938 float32x4_t t1;
7939 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7940 : "=w"(result), "=w"(t1)
7941 : "0"(a), "w"(b), "w"(c)
7942 : /* No clobbers */);
7943 return result;
7944}
7945
7946__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7947vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7948{
7949 float64x2_t result;
7950 float64x2_t t1;
7951 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
7952 : "=w"(result), "=w"(t1)
7953 : "0"(a), "w"(b), "x"(c)
7954 : /* No clobbers */);
7955 return result;
7956}
7957
7958__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7959vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7960{
7961 int16x8_t result;
7962 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7963 : "=w"(result)
7964 : "0"(a), "w"(b), "x"(c)
7965 : /* No clobbers */);
7966 return result;
7967}
7968
7969__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7970vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7971{
7972 int32x4_t result;
7973 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7974 : "=w"(result)
7975 : "0"(a), "w"(b), "w"(c)
7976 : /* No clobbers */);
7977 return result;
7978}
7979
7980__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7981vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7982{
7983 uint16x8_t result;
7984 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7985 : "=w"(result)
7986 : "0"(a), "w"(b), "x"(c)
7987 : /* No clobbers */);
7988 return result;
7989}
7990
7991__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7992vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7993{
7994 uint32x4_t result;
7995 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7996 : "=w"(result)
7997 : "0"(a), "w"(b), "w"(c)
7998 : /* No clobbers */);
7999 return result;
8000}
8001
8002__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8003vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8004{
8005 int8x16_t result;
8006 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8007 : "=w"(result)
8008 : "0"(a), "w"(b), "w"(c)
8009 : /* No clobbers */);
8010 return result;
8011}
8012
8013__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8014vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8015{
8016 int16x8_t result;
8017 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8018 : "=w"(result)
8019 : "0"(a), "w"(b), "w"(c)
8020 : /* No clobbers */);
8021 return result;
8022}
8023
8024__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8025vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8026{
8027 int32x4_t result;
8028 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8029 : "=w"(result)
8030 : "0"(a), "w"(b), "w"(c)
8031 : /* No clobbers */);
8032 return result;
8033}
8034
8035__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8036vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8037{
8038 uint8x16_t result;
8039 __asm__ ("mls %0.16b,%2.16b,%3.16b"
8040 : "=w"(result)
8041 : "0"(a), "w"(b), "w"(c)
8042 : /* No clobbers */);
8043 return result;
8044}
8045
8046__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8047vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8048{
8049 uint16x8_t result;
8050 __asm__ ("mls %0.8h,%2.8h,%3.8h"
8051 : "=w"(result)
8052 : "0"(a), "w"(b), "w"(c)
8053 : /* No clobbers */);
8054 return result;
8055}
8056
8057__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8058vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8059{
8060 uint32x4_t result;
8061 __asm__ ("mls %0.4s,%2.4s,%3.4s"
8062 : "=w"(result)
8063 : "0"(a), "w"(b), "w"(c)
8064 : /* No clobbers */);
8065 return result;
8066}
8067
8068__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8069vmovl_high_s8 (int8x16_t a)
8070{
8071 int16x8_t result;
8072 __asm__ ("sshll2 %0.8h,%1.16b,#0"
8073 : "=w"(result)
8074 : "w"(a)
8075 : /* No clobbers */);
8076 return result;
8077}
8078
8079__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8080vmovl_high_s16 (int16x8_t a)
8081{
8082 int32x4_t result;
8083 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8084 : "=w"(result)
8085 : "w"(a)
8086 : /* No clobbers */);
8087 return result;
8088}
8089
8090__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8091vmovl_high_s32 (int32x4_t a)
8092{
8093 int64x2_t result;
8094 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8095 : "=w"(result)
8096 : "w"(a)
8097 : /* No clobbers */);
8098 return result;
8099}
8100
8101__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8102vmovl_high_u8 (uint8x16_t a)
8103{
8104 uint16x8_t result;
8105 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8106 : "=w"(result)
8107 : "w"(a)
8108 : /* No clobbers */);
8109 return result;
8110}
8111
8112__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8113vmovl_high_u16 (uint16x8_t a)
8114{
8115 uint32x4_t result;
8116 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8117 : "=w"(result)
8118 : "w"(a)
8119 : /* No clobbers */);
8120 return result;
8121}
8122
8123__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8124vmovl_high_u32 (uint32x4_t a)
8125{
8126 uint64x2_t result;
8127 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8128 : "=w"(result)
8129 : "w"(a)
8130 : /* No clobbers */);
8131 return result;
8132}
8133
8134__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8135vmovl_s8 (int8x8_t a)
8136{
8137 int16x8_t result;
8138 __asm__ ("sshll %0.8h,%1.8b,#0"
8139 : "=w"(result)
8140 : "w"(a)
8141 : /* No clobbers */);
8142 return result;
8143}
8144
8145__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8146vmovl_s16 (int16x4_t a)
8147{
8148 int32x4_t result;
8149 __asm__ ("sshll %0.4s,%1.4h,#0"
8150 : "=w"(result)
8151 : "w"(a)
8152 : /* No clobbers */);
8153 return result;
8154}
8155
8156__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8157vmovl_s32 (int32x2_t a)
8158{
8159 int64x2_t result;
8160 __asm__ ("sshll %0.2d,%1.2s,#0"
8161 : "=w"(result)
8162 : "w"(a)
8163 : /* No clobbers */);
8164 return result;
8165}
8166
8167__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8168vmovl_u8 (uint8x8_t a)
8169{
8170 uint16x8_t result;
8171 __asm__ ("ushll %0.8h,%1.8b,#0"
8172 : "=w"(result)
8173 : "w"(a)
8174 : /* No clobbers */);
8175 return result;
8176}
8177
8178__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8179vmovl_u16 (uint16x4_t a)
8180{
8181 uint32x4_t result;
8182 __asm__ ("ushll %0.4s,%1.4h,#0"
8183 : "=w"(result)
8184 : "w"(a)
8185 : /* No clobbers */);
8186 return result;
8187}
8188
8189__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8190vmovl_u32 (uint32x2_t a)
8191{
8192 uint64x2_t result;
8193 __asm__ ("ushll %0.2d,%1.2s,#0"
8194 : "=w"(result)
8195 : "w"(a)
8196 : /* No clobbers */);
8197 return result;
8198}
8199
8200__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8201vmovn_high_s16 (int8x8_t a, int16x8_t b)
8202{
8203 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8204 __asm__ ("xtn2 %0.16b,%1.8h"
8205 : "+w"(result)
8206 : "w"(b)
8207 : /* No clobbers */);
8208 return result;
8209}
8210
8211__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8212vmovn_high_s32 (int16x4_t a, int32x4_t b)
8213{
8214 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8215 __asm__ ("xtn2 %0.8h,%1.4s"
8216 : "+w"(result)
8217 : "w"(b)
8218 : /* No clobbers */);
8219 return result;
8220}
8221
8222__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8223vmovn_high_s64 (int32x2_t a, int64x2_t b)
8224{
8225 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8226 __asm__ ("xtn2 %0.4s,%1.2d"
8227 : "+w"(result)
8228 : "w"(b)
8229 : /* No clobbers */);
8230 return result;
8231}
8232
8233__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8234vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8235{
8236 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8237 __asm__ ("xtn2 %0.16b,%1.8h"
8238 : "+w"(result)
8239 : "w"(b)
8240 : /* No clobbers */);
8241 return result;
8242}
8243
8244__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8245vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8246{
8247 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8248 __asm__ ("xtn2 %0.8h,%1.4s"
8249 : "+w"(result)
8250 : "w"(b)
8251 : /* No clobbers */);
8252 return result;
8253}
8254
8255__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8256vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8257{
8258 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8259 __asm__ ("xtn2 %0.4s,%1.2d"
8260 : "+w"(result)
8261 : "w"(b)
8262 : /* No clobbers */);
8263 return result;
8264}
8265
8266__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8267vmovn_s16 (int16x8_t a)
8268{
8269 int8x8_t result;
8270 __asm__ ("xtn %0.8b,%1.8h"
8271 : "=w"(result)
8272 : "w"(a)
8273 : /* No clobbers */);
8274 return result;
8275}
8276
8277__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8278vmovn_s32 (int32x4_t a)
8279{
8280 int16x4_t result;
8281 __asm__ ("xtn %0.4h,%1.4s"
8282 : "=w"(result)
8283 : "w"(a)
8284 : /* No clobbers */);
8285 return result;
8286}
8287
8288__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8289vmovn_s64 (int64x2_t a)
8290{
8291 int32x2_t result;
8292 __asm__ ("xtn %0.2s,%1.2d"
8293 : "=w"(result)
8294 : "w"(a)
8295 : /* No clobbers */);
8296 return result;
8297}
8298
8299__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8300vmovn_u16 (uint16x8_t a)
8301{
8302 uint8x8_t result;
8303 __asm__ ("xtn %0.8b,%1.8h"
8304 : "=w"(result)
8305 : "w"(a)
8306 : /* No clobbers */);
8307 return result;
8308}
8309
8310__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8311vmovn_u32 (uint32x4_t a)
8312{
8313 uint16x4_t result;
8314 __asm__ ("xtn %0.4h,%1.4s"
8315 : "=w"(result)
8316 : "w"(a)
8317 : /* No clobbers */);
8318 return result;
8319}
8320
8321__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8322vmovn_u64 (uint64x2_t a)
8323{
8324 uint32x2_t result;
8325 __asm__ ("xtn %0.2s,%1.2d"
8326 : "=w"(result)
8327 : "w"(a)
8328 : /* No clobbers */);
8329 return result;
8330}
8331
8332__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8333vmul_n_f32 (float32x2_t a, float32_t b)
8334{
8335 float32x2_t result;
8336 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8337 : "=w"(result)
8338 : "w"(a), "w"(b)
8339 : /* No clobbers */);
8340 return result;
8341}
8342
8343__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8344vmul_n_s16 (int16x4_t a, int16_t b)
8345{
8346 int16x4_t result;
8347 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8348 : "=w"(result)
8349 : "w"(a), "x"(b)
8350 : /* No clobbers */);
8351 return result;
8352}
8353
8354__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8355vmul_n_s32 (int32x2_t a, int32_t b)
8356{
8357 int32x2_t result;
8358 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8359 : "=w"(result)
8360 : "w"(a), "w"(b)
8361 : /* No clobbers */);
8362 return result;
8363}
8364
8365__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8366vmul_n_u16 (uint16x4_t a, uint16_t b)
8367{
8368 uint16x4_t result;
8369 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8370 : "=w"(result)
8371 : "w"(a), "x"(b)
8372 : /* No clobbers */);
8373 return result;
8374}
8375
8376__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8377vmul_n_u32 (uint32x2_t a, uint32_t b)
8378{
8379 uint32x2_t result;
8380 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8381 : "=w"(result)
8382 : "w"(a), "w"(b)
8383 : /* No clobbers */);
8384 return result;
8385}
8386
8387#define vmuld_lane_f64(a, b, c) \
8388 __extension__ \
8389 ({ \
8390 float64x2_t b_ = (b); \
8391 float64_t a_ = (a); \
8392 float64_t result; \
8393 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
8394 : "=w"(result) \
8395 : "w"(a_), "w"(b_), "i"(c) \
8396 : /* No clobbers */); \
8397 result; \
8398 })
8399
8400#define vmull_high_lane_s16(a, b, c) \
8401 __extension__ \
8402 ({ \
8403 int16x8_t b_ = (b); \
8404 int16x8_t a_ = (a); \
8405 int32x4_t result; \
8406 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8407 : "=w"(result) \
8408 : "w"(a_), "x"(b_), "i"(c) \
8409 : /* No clobbers */); \
8410 result; \
8411 })
8412
8413#define vmull_high_lane_s32(a, b, c) \
8414 __extension__ \
8415 ({ \
8416 int32x4_t b_ = (b); \
8417 int32x4_t a_ = (a); \
8418 int64x2_t result; \
8419 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8420 : "=w"(result) \
8421 : "w"(a_), "w"(b_), "i"(c) \
8422 : /* No clobbers */); \
8423 result; \
8424 })
8425
8426#define vmull_high_lane_u16(a, b, c) \
8427 __extension__ \
8428 ({ \
8429 uint16x8_t b_ = (b); \
8430 uint16x8_t a_ = (a); \
8431 uint32x4_t result; \
8432 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8433 : "=w"(result) \
8434 : "w"(a_), "x"(b_), "i"(c) \
8435 : /* No clobbers */); \
8436 result; \
8437 })
8438
8439#define vmull_high_lane_u32(a, b, c) \
8440 __extension__ \
8441 ({ \
8442 uint32x4_t b_ = (b); \
8443 uint32x4_t a_ = (a); \
8444 uint64x2_t result; \
8445 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8446 : "=w"(result) \
8447 : "w"(a_), "w"(b_), "i"(c) \
8448 : /* No clobbers */); \
8449 result; \
8450 })
8451
8452#define vmull_high_laneq_s16(a, b, c) \
8453 __extension__ \
8454 ({ \
8455 int16x8_t b_ = (b); \
8456 int16x8_t a_ = (a); \
8457 int32x4_t result; \
8458 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8459 : "=w"(result) \
8460 : "w"(a_), "x"(b_), "i"(c) \
8461 : /* No clobbers */); \
8462 result; \
8463 })
8464
8465#define vmull_high_laneq_s32(a, b, c) \
8466 __extension__ \
8467 ({ \
8468 int32x4_t b_ = (b); \
8469 int32x4_t a_ = (a); \
8470 int64x2_t result; \
8471 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8472 : "=w"(result) \
8473 : "w"(a_), "w"(b_), "i"(c) \
8474 : /* No clobbers */); \
8475 result; \
8476 })
8477
8478#define vmull_high_laneq_u16(a, b, c) \
8479 __extension__ \
8480 ({ \
8481 uint16x8_t b_ = (b); \
8482 uint16x8_t a_ = (a); \
8483 uint32x4_t result; \
8484 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8485 : "=w"(result) \
8486 : "w"(a_), "x"(b_), "i"(c) \
8487 : /* No clobbers */); \
8488 result; \
8489 })
8490
8491#define vmull_high_laneq_u32(a, b, c) \
8492 __extension__ \
8493 ({ \
8494 uint32x4_t b_ = (b); \
8495 uint32x4_t a_ = (a); \
8496 uint64x2_t result; \
8497 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8498 : "=w"(result) \
8499 : "w"(a_), "w"(b_), "i"(c) \
8500 : /* No clobbers */); \
8501 result; \
8502 })
8503
8504__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8505vmull_high_n_s16 (int16x8_t a, int16_t b)
8506{
8507 int32x4_t result;
8508 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8509 : "=w"(result)
8510 : "w"(a), "x"(b)
8511 : /* No clobbers */);
8512 return result;
8513}
8514
8515__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8516vmull_high_n_s32 (int32x4_t a, int32_t b)
8517{
8518 int64x2_t result;
8519 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8520 : "=w"(result)
8521 : "w"(a), "w"(b)
8522 : /* No clobbers */);
8523 return result;
8524}
8525
8526__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8527vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8528{
8529 uint32x4_t result;
8530 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8531 : "=w"(result)
8532 : "w"(a), "x"(b)
8533 : /* No clobbers */);
8534 return result;
8535}
8536
8537__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8538vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8539{
8540 uint64x2_t result;
8541 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8542 : "=w"(result)
8543 : "w"(a), "w"(b)
8544 : /* No clobbers */);
8545 return result;
8546}
8547
8548__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8549vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8550{
8551 poly16x8_t result;
8552 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8553 : "=w"(result)
8554 : "w"(a), "w"(b)
8555 : /* No clobbers */);
8556 return result;
8557}
8558
8559__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8560vmull_high_s8 (int8x16_t a, int8x16_t b)
8561{
8562 int16x8_t result;
8563 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8564 : "=w"(result)
8565 : "w"(a), "w"(b)
8566 : /* No clobbers */);
8567 return result;
8568}
8569
8570__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8571vmull_high_s16 (int16x8_t a, int16x8_t b)
8572{
8573 int32x4_t result;
8574 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8575 : "=w"(result)
8576 : "w"(a), "w"(b)
8577 : /* No clobbers */);
8578 return result;
8579}
8580
8581__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8582vmull_high_s32 (int32x4_t a, int32x4_t b)
8583{
8584 int64x2_t result;
8585 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8586 : "=w"(result)
8587 : "w"(a), "w"(b)
8588 : /* No clobbers */);
8589 return result;
8590}
8591
8592__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8593vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8594{
8595 uint16x8_t result;
8596 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8597 : "=w"(result)
8598 : "w"(a), "w"(b)
8599 : /* No clobbers */);
8600 return result;
8601}
8602
8603__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8604vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8605{
8606 uint32x4_t result;
8607 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8608 : "=w"(result)
8609 : "w"(a), "w"(b)
8610 : /* No clobbers */);
8611 return result;
8612}
8613
8614__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8615vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8616{
8617 uint64x2_t result;
8618 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8619 : "=w"(result)
8620 : "w"(a), "w"(b)
8621 : /* No clobbers */);
8622 return result;
8623}
8624
8625#define vmull_lane_s16(a, b, c) \
8626 __extension__ \
8627 ({ \
8628 int16x4_t b_ = (b); \
8629 int16x4_t a_ = (a); \
8630 int32x4_t result; \
8631 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8632 : "=w"(result) \
8633 : "w"(a_), "x"(b_), "i"(c) \
8634 : /* No clobbers */); \
8635 result; \
8636 })
8637
8638#define vmull_lane_s32(a, b, c) \
8639 __extension__ \
8640 ({ \
8641 int32x2_t b_ = (b); \
8642 int32x2_t a_ = (a); \
8643 int64x2_t result; \
8644 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8645 : "=w"(result) \
8646 : "w"(a_), "w"(b_), "i"(c) \
8647 : /* No clobbers */); \
8648 result; \
8649 })
8650
8651#define vmull_lane_u16(a, b, c) \
8652 __extension__ \
8653 ({ \
8654 uint16x4_t b_ = (b); \
8655 uint16x4_t a_ = (a); \
8656 uint32x4_t result; \
8657 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8658 : "=w"(result) \
8659 : "w"(a_), "x"(b_), "i"(c) \
8660 : /* No clobbers */); \
8661 result; \
8662 })
8663
8664#define vmull_lane_u32(a, b, c) \
8665 __extension__ \
8666 ({ \
8667 uint32x2_t b_ = (b); \
8668 uint32x2_t a_ = (a); \
8669 uint64x2_t result; \
8670 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8671 : "=w"(result) \
8672 : "w"(a_), "w"(b_), "i"(c) \
8673 : /* No clobbers */); \
8674 result; \
8675 })
8676
8677#define vmull_laneq_s16(a, b, c) \
8678 __extension__ \
8679 ({ \
8680 int16x8_t b_ = (b); \
8681 int16x4_t a_ = (a); \
8682 int32x4_t result; \
8683 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8684 : "=w"(result) \
8685 : "w"(a_), "x"(b_), "i"(c) \
8686 : /* No clobbers */); \
8687 result; \
8688 })
8689
8690#define vmull_laneq_s32(a, b, c) \
8691 __extension__ \
8692 ({ \
8693 int32x4_t b_ = (b); \
8694 int32x2_t a_ = (a); \
8695 int64x2_t result; \
8696 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8697 : "=w"(result) \
8698 : "w"(a_), "w"(b_), "i"(c) \
8699 : /* No clobbers */); \
8700 result; \
8701 })
8702
8703#define vmull_laneq_u16(a, b, c) \
8704 __extension__ \
8705 ({ \
8706 uint16x8_t b_ = (b); \
8707 uint16x4_t a_ = (a); \
8708 uint32x4_t result; \
8709 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8710 : "=w"(result) \
8711 : "w"(a_), "x"(b_), "i"(c) \
8712 : /* No clobbers */); \
8713 result; \
8714 })
8715
8716#define vmull_laneq_u32(a, b, c) \
8717 __extension__ \
8718 ({ \
8719 uint32x4_t b_ = (b); \
8720 uint32x2_t a_ = (a); \
8721 uint64x2_t result; \
8722 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8723 : "=w"(result) \
8724 : "w"(a_), "w"(b_), "i"(c) \
8725 : /* No clobbers */); \
8726 result; \
8727 })
8728
8729__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8730vmull_n_s16 (int16x4_t a, int16_t b)
8731{
8732 int32x4_t result;
8733 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8734 : "=w"(result)
8735 : "w"(a), "x"(b)
8736 : /* No clobbers */);
8737 return result;
8738}
8739
8740__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8741vmull_n_s32 (int32x2_t a, int32_t b)
8742{
8743 int64x2_t result;
8744 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8745 : "=w"(result)
8746 : "w"(a), "w"(b)
8747 : /* No clobbers */);
8748 return result;
8749}
8750
8751__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8752vmull_n_u16 (uint16x4_t a, uint16_t b)
8753{
8754 uint32x4_t result;
8755 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8756 : "=w"(result)
8757 : "w"(a), "x"(b)
8758 : /* No clobbers */);
8759 return result;
8760}
8761
8762__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8763vmull_n_u32 (uint32x2_t a, uint32_t b)
8764{
8765 uint64x2_t result;
8766 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8767 : "=w"(result)
8768 : "w"(a), "w"(b)
8769 : /* No clobbers */);
8770 return result;
8771}
8772
8773__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8774vmull_p8 (poly8x8_t a, poly8x8_t b)
8775{
8776 poly16x8_t result;
8777 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8778 : "=w"(result)
8779 : "w"(a), "w"(b)
8780 : /* No clobbers */);
8781 return result;
8782}
8783
8784__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8785vmull_s8 (int8x8_t a, int8x8_t b)
8786{
8787 int16x8_t result;
8788 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8789 : "=w"(result)
8790 : "w"(a), "w"(b)
8791 : /* No clobbers */);
8792 return result;
8793}
8794
8795__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8796vmull_s16 (int16x4_t a, int16x4_t b)
8797{
8798 int32x4_t result;
8799 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8800 : "=w"(result)
8801 : "w"(a), "w"(b)
8802 : /* No clobbers */);
8803 return result;
8804}
8805
8806__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8807vmull_s32 (int32x2_t a, int32x2_t b)
8808{
8809 int64x2_t result;
8810 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8811 : "=w"(result)
8812 : "w"(a), "w"(b)
8813 : /* No clobbers */);
8814 return result;
8815}
8816
8817__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8818vmull_u8 (uint8x8_t a, uint8x8_t b)
8819{
8820 uint16x8_t result;
8821 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8822 : "=w"(result)
8823 : "w"(a), "w"(b)
8824 : /* No clobbers */);
8825 return result;
8826}
8827
8828__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8829vmull_u16 (uint16x4_t a, uint16x4_t b)
8830{
8831 uint32x4_t result;
8832 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8833 : "=w"(result)
8834 : "w"(a), "w"(b)
8835 : /* No clobbers */);
8836 return result;
8837}
8838
8839__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8840vmull_u32 (uint32x2_t a, uint32x2_t b)
8841{
8842 uint64x2_t result;
8843 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8844 : "=w"(result)
8845 : "w"(a), "w"(b)
8846 : /* No clobbers */);
8847 return result;
8848}
8849
8850__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8851vmulq_n_f32 (float32x4_t a, float32_t b)
8852{
8853 float32x4_t result;
8854 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8855 : "=w"(result)
8856 : "w"(a), "w"(b)
8857 : /* No clobbers */);
8858 return result;
8859}
8860
8861__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8862vmulq_n_f64 (float64x2_t a, float64_t b)
8863{
8864 float64x2_t result;
8865 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8866 : "=w"(result)
8867 : "w"(a), "w"(b)
8868 : /* No clobbers */);
8869 return result;
8870}
8871
8872__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8873vmulq_n_s16 (int16x8_t a, int16_t b)
8874{
8875 int16x8_t result;
8876 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8877 : "=w"(result)
8878 : "w"(a), "x"(b)
8879 : /* No clobbers */);
8880 return result;
8881}
8882
8883__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8884vmulq_n_s32 (int32x4_t a, int32_t b)
8885{
8886 int32x4_t result;
8887 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8888 : "=w"(result)
8889 : "w"(a), "w"(b)
8890 : /* No clobbers */);
8891 return result;
8892}
8893
8894__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8895vmulq_n_u16 (uint16x8_t a, uint16_t b)
8896{
8897 uint16x8_t result;
8898 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8899 : "=w"(result)
8900 : "w"(a), "x"(b)
8901 : /* No clobbers */);
8902 return result;
8903}
8904
8905__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8906vmulq_n_u32 (uint32x4_t a, uint32_t b)
8907{
8908 uint32x4_t result;
8909 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8910 : "=w"(result)
8911 : "w"(a), "w"(b)
8912 : /* No clobbers */);
8913 return result;
8914}
8915
8916#define vmuls_lane_f32(a, b, c) \
8917 __extension__ \
8918 ({ \
8919 float32x4_t b_ = (b); \
8920 float32_t a_ = (a); \
8921 float32_t result; \
8922 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
8923 : "=w"(result) \
8924 : "w"(a_), "w"(b_), "i"(c) \
8925 : /* No clobbers */); \
8926 result; \
8927 })
8928
8929__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8930vmulx_f32 (float32x2_t a, float32x2_t b)
8931{
8932 float32x2_t result;
8933 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8934 : "=w"(result)
8935 : "w"(a), "w"(b)
8936 : /* No clobbers */);
8937 return result;
8938}
8939
8940#define vmulx_lane_f32(a, b, c) \
8941 __extension__ \
8942 ({ \
8943 float32x4_t b_ = (b); \
8944 float32x2_t a_ = (a); \
8945 float32x2_t result; \
8946 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8947 : "=w"(result) \
8948 : "w"(a_), "w"(b_), "i"(c) \
8949 : /* No clobbers */); \
8950 result; \
8951 })
8952
8953__extension__ static __inline float64_t __attribute__ ((__always_inline__))
8954vmulxd_f64 (float64_t a, float64_t b)
8955{
8956 float64_t result;
8957 __asm__ ("fmulx %d0, %d1, %d2"
8958 : "=w"(result)
8959 : "w"(a), "w"(b)
8960 : /* No clobbers */);
8961 return result;
8962}
8963
8964__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8965vmulxq_f32 (float32x4_t a, float32x4_t b)
8966{
8967 float32x4_t result;
8968 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8969 : "=w"(result)
8970 : "w"(a), "w"(b)
8971 : /* No clobbers */);
8972 return result;
8973}
8974
8975__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8976vmulxq_f64 (float64x2_t a, float64x2_t b)
8977{
8978 float64x2_t result;
8979 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8980 : "=w"(result)
8981 : "w"(a), "w"(b)
8982 : /* No clobbers */);
8983 return result;
8984}
8985
8986#define vmulxq_lane_f32(a, b, c) \
8987 __extension__ \
8988 ({ \
8989 float32x4_t b_ = (b); \
8990 float32x4_t a_ = (a); \
8991 float32x4_t result; \
8992 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8993 : "=w"(result) \
8994 : "w"(a_), "w"(b_), "i"(c) \
8995 : /* No clobbers */); \
8996 result; \
8997 })
8998
8999#define vmulxq_lane_f64(a, b, c) \
9000 __extension__ \
9001 ({ \
9002 float64x2_t b_ = (b); \
9003 float64x2_t a_ = (a); \
9004 float64x2_t result; \
9005 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
9006 : "=w"(result) \
9007 : "w"(a_), "w"(b_), "i"(c) \
9008 : /* No clobbers */); \
9009 result; \
9010 })
9011
9012__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9013vmulxs_f32 (float32_t a, float32_t b)
9014{
9015 float32_t result;
9016 __asm__ ("fmulx %s0, %s1, %s2"
9017 : "=w"(result)
9018 : "w"(a), "w"(b)
9019 : /* No clobbers */);
9020 return result;
9021}
9022
9023__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9024vmvn_p8 (poly8x8_t a)
9025{
9026 poly8x8_t result;
9027 __asm__ ("mvn %0.8b,%1.8b"
9028 : "=w"(result)
9029 : "w"(a)
9030 : /* No clobbers */);
9031 return result;
9032}
9033
9034__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9035vmvn_s8 (int8x8_t a)
9036{
9037 int8x8_t result;
9038 __asm__ ("mvn %0.8b,%1.8b"
9039 : "=w"(result)
9040 : "w"(a)
9041 : /* No clobbers */);
9042 return result;
9043}
9044
9045__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9046vmvn_s16 (int16x4_t a)
9047{
9048 int16x4_t result;
9049 __asm__ ("mvn %0.8b,%1.8b"
9050 : "=w"(result)
9051 : "w"(a)
9052 : /* No clobbers */);
9053 return result;
9054}
9055
9056__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9057vmvn_s32 (int32x2_t a)
9058{
9059 int32x2_t result;
9060 __asm__ ("mvn %0.8b,%1.8b"
9061 : "=w"(result)
9062 : "w"(a)
9063 : /* No clobbers */);
9064 return result;
9065}
9066
9067__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9068vmvn_u8 (uint8x8_t a)
9069{
9070 uint8x8_t result;
9071 __asm__ ("mvn %0.8b,%1.8b"
9072 : "=w"(result)
9073 : "w"(a)
9074 : /* No clobbers */);
9075 return result;
9076}
9077
9078__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9079vmvn_u16 (uint16x4_t a)
9080{
9081 uint16x4_t result;
9082 __asm__ ("mvn %0.8b,%1.8b"
9083 : "=w"(result)
9084 : "w"(a)
9085 : /* No clobbers */);
9086 return result;
9087}
9088
9089__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9090vmvn_u32 (uint32x2_t a)
9091{
9092 uint32x2_t result;
9093 __asm__ ("mvn %0.8b,%1.8b"
9094 : "=w"(result)
9095 : "w"(a)
9096 : /* No clobbers */);
9097 return result;
9098}
9099
9100__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9101vmvnq_p8 (poly8x16_t a)
9102{
9103 poly8x16_t result;
9104 __asm__ ("mvn %0.16b,%1.16b"
9105 : "=w"(result)
9106 : "w"(a)
9107 : /* No clobbers */);
9108 return result;
9109}
9110
9111__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9112vmvnq_s8 (int8x16_t a)
9113{
9114 int8x16_t result;
9115 __asm__ ("mvn %0.16b,%1.16b"
9116 : "=w"(result)
9117 : "w"(a)
9118 : /* No clobbers */);
9119 return result;
9120}
9121
9122__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9123vmvnq_s16 (int16x8_t a)
9124{
9125 int16x8_t result;
9126 __asm__ ("mvn %0.16b,%1.16b"
9127 : "=w"(result)
9128 : "w"(a)
9129 : /* No clobbers */);
9130 return result;
9131}
9132
9133__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9134vmvnq_s32 (int32x4_t a)
9135{
9136 int32x4_t result;
9137 __asm__ ("mvn %0.16b,%1.16b"
9138 : "=w"(result)
9139 : "w"(a)
9140 : /* No clobbers */);
9141 return result;
9142}
9143
9144__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9145vmvnq_u8 (uint8x16_t a)
9146{
9147 uint8x16_t result;
9148 __asm__ ("mvn %0.16b,%1.16b"
9149 : "=w"(result)
9150 : "w"(a)
9151 : /* No clobbers */);
9152 return result;
9153}
9154
9155__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9156vmvnq_u16 (uint16x8_t a)
9157{
9158 uint16x8_t result;
9159 __asm__ ("mvn %0.16b,%1.16b"
9160 : "=w"(result)
9161 : "w"(a)
9162 : /* No clobbers */);
9163 return result;
9164}
9165
9166__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9167vmvnq_u32 (uint32x4_t a)
9168{
9169 uint32x4_t result;
9170 __asm__ ("mvn %0.16b,%1.16b"
9171 : "=w"(result)
9172 : "w"(a)
9173 : /* No clobbers */);
9174 return result;
9175}
9176
9177
9178__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9179vpadal_s8 (int16x4_t a, int8x8_t b)
9180{
9181 int16x4_t result;
9182 __asm__ ("sadalp %0.4h,%2.8b"
9183 : "=w"(result)
9184 : "0"(a), "w"(b)
9185 : /* No clobbers */);
9186 return result;
9187}
9188
9189__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9190vpadal_s16 (int32x2_t a, int16x4_t b)
9191{
9192 int32x2_t result;
9193 __asm__ ("sadalp %0.2s,%2.4h"
9194 : "=w"(result)
9195 : "0"(a), "w"(b)
9196 : /* No clobbers */);
9197 return result;
9198}
9199
9200__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9201vpadal_s32 (int64x1_t a, int32x2_t b)
9202{
9203 int64x1_t result;
9204 __asm__ ("sadalp %0.1d,%2.2s"
9205 : "=w"(result)
9206 : "0"(a), "w"(b)
9207 : /* No clobbers */);
9208 return result;
9209}
9210
9211__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9212vpadal_u8 (uint16x4_t a, uint8x8_t b)
9213{
9214 uint16x4_t result;
9215 __asm__ ("uadalp %0.4h,%2.8b"
9216 : "=w"(result)
9217 : "0"(a), "w"(b)
9218 : /* No clobbers */);
9219 return result;
9220}
9221
9222__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9223vpadal_u16 (uint32x2_t a, uint16x4_t b)
9224{
9225 uint32x2_t result;
9226 __asm__ ("uadalp %0.2s,%2.4h"
9227 : "=w"(result)
9228 : "0"(a), "w"(b)
9229 : /* No clobbers */);
9230 return result;
9231}
9232
9233__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9234vpadal_u32 (uint64x1_t a, uint32x2_t b)
9235{
9236 uint64x1_t result;
9237 __asm__ ("uadalp %0.1d,%2.2s"
9238 : "=w"(result)
9239 : "0"(a), "w"(b)
9240 : /* No clobbers */);
9241 return result;
9242}
9243
9244__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9245vpadalq_s8 (int16x8_t a, int8x16_t b)
9246{
9247 int16x8_t result;
9248 __asm__ ("sadalp %0.8h,%2.16b"
9249 : "=w"(result)
9250 : "0"(a), "w"(b)
9251 : /* No clobbers */);
9252 return result;
9253}
9254
9255__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9256vpadalq_s16 (int32x4_t a, int16x8_t b)
9257{
9258 int32x4_t result;
9259 __asm__ ("sadalp %0.4s,%2.8h"
9260 : "=w"(result)
9261 : "0"(a), "w"(b)
9262 : /* No clobbers */);
9263 return result;
9264}
9265
9266__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9267vpadalq_s32 (int64x2_t a, int32x4_t b)
9268{
9269 int64x2_t result;
9270 __asm__ ("sadalp %0.2d,%2.4s"
9271 : "=w"(result)
9272 : "0"(a), "w"(b)
9273 : /* No clobbers */);
9274 return result;
9275}
9276
9277__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9278vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9279{
9280 uint16x8_t result;
9281 __asm__ ("uadalp %0.8h,%2.16b"
9282 : "=w"(result)
9283 : "0"(a), "w"(b)
9284 : /* No clobbers */);
9285 return result;
9286}
9287
9288__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9289vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9290{
9291 uint32x4_t result;
9292 __asm__ ("uadalp %0.4s,%2.8h"
9293 : "=w"(result)
9294 : "0"(a), "w"(b)
9295 : /* No clobbers */);
9296 return result;
9297}
9298
9299__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9300vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9301{
9302 uint64x2_t result;
9303 __asm__ ("uadalp %0.2d,%2.4s"
9304 : "=w"(result)
9305 : "0"(a), "w"(b)
9306 : /* No clobbers */);
9307 return result;
9308}
9309
9310__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9311vpadd_f32 (float32x2_t a, float32x2_t b)
9312{
9313 float32x2_t result;
9314 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9315 : "=w"(result)
9316 : "w"(a), "w"(b)
9317 : /* No clobbers */);
9318 return result;
9319}
9320
9321__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9322vpadd_s8 (int8x8_t __a, int8x8_t __b)
9323{
9324 return __builtin_aarch64_addpv8qi (__a, __b);
9325}
9326
9327__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9328vpadd_s16 (int16x4_t __a, int16x4_t __b)
9329{
9330 return __builtin_aarch64_addpv4hi (__a, __b);
9331}
9332
9333__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9334vpadd_s32 (int32x2_t __a, int32x2_t __b)
9335{
9336 return __builtin_aarch64_addpv2si (__a, __b);
9337}
9338
9339__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9340vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
9341{
9342 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
9343 (int8x8_t) __b);
9344}
9345
9346__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9347vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
9348{
9349 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
9350 (int16x4_t) __b);
9351}
9352
9353__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9354vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
9355{
9356 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
9357 (int32x2_t) __b);
9358}
9359
9360__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9361vpaddd_f64 (float64x2_t a)
9362{
9363 float64_t result;
9364 __asm__ ("faddp %d0,%1.2d"
9365 : "=w"(result)
9366 : "w"(a)
9367 : /* No clobbers */);
9368 return result;
9369}
9370
9371__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9372vpaddl_s8 (int8x8_t a)
9373{
9374 int16x4_t result;
9375 __asm__ ("saddlp %0.4h,%1.8b"
9376 : "=w"(result)
9377 : "w"(a)
9378 : /* No clobbers */);
9379 return result;
9380}
9381
9382__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9383vpaddl_s16 (int16x4_t a)
9384{
9385 int32x2_t result;
9386 __asm__ ("saddlp %0.2s,%1.4h"
9387 : "=w"(result)
9388 : "w"(a)
9389 : /* No clobbers */);
9390 return result;
9391}
9392
9393__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9394vpaddl_s32 (int32x2_t a)
9395{
9396 int64x1_t result;
9397 __asm__ ("saddlp %0.1d,%1.2s"
9398 : "=w"(result)
9399 : "w"(a)
9400 : /* No clobbers */);
9401 return result;
9402}
9403
9404__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9405vpaddl_u8 (uint8x8_t a)
9406{
9407 uint16x4_t result;
9408 __asm__ ("uaddlp %0.4h,%1.8b"
9409 : "=w"(result)
9410 : "w"(a)
9411 : /* No clobbers */);
9412 return result;
9413}
9414
9415__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9416vpaddl_u16 (uint16x4_t a)
9417{
9418 uint32x2_t result;
9419 __asm__ ("uaddlp %0.2s,%1.4h"
9420 : "=w"(result)
9421 : "w"(a)
9422 : /* No clobbers */);
9423 return result;
9424}
9425
9426__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9427vpaddl_u32 (uint32x2_t a)
9428{
9429 uint64x1_t result;
9430 __asm__ ("uaddlp %0.1d,%1.2s"
9431 : "=w"(result)
9432 : "w"(a)
9433 : /* No clobbers */);
9434 return result;
9435}
9436
9437__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9438vpaddlq_s8 (int8x16_t a)
9439{
9440 int16x8_t result;
9441 __asm__ ("saddlp %0.8h,%1.16b"
9442 : "=w"(result)
9443 : "w"(a)
9444 : /* No clobbers */);
9445 return result;
9446}
9447
9448__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9449vpaddlq_s16 (int16x8_t a)
9450{
9451 int32x4_t result;
9452 __asm__ ("saddlp %0.4s,%1.8h"
9453 : "=w"(result)
9454 : "w"(a)
9455 : /* No clobbers */);
9456 return result;
9457}
9458
9459__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9460vpaddlq_s32 (int32x4_t a)
9461{
9462 int64x2_t result;
9463 __asm__ ("saddlp %0.2d,%1.4s"
9464 : "=w"(result)
9465 : "w"(a)
9466 : /* No clobbers */);
9467 return result;
9468}
9469
9470__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9471vpaddlq_u8 (uint8x16_t a)
9472{
9473 uint16x8_t result;
9474 __asm__ ("uaddlp %0.8h,%1.16b"
9475 : "=w"(result)
9476 : "w"(a)
9477 : /* No clobbers */);
9478 return result;
9479}
9480
9481__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9482vpaddlq_u16 (uint16x8_t a)
9483{
9484 uint32x4_t result;
9485 __asm__ ("uaddlp %0.4s,%1.8h"
9486 : "=w"(result)
9487 : "w"(a)
9488 : /* No clobbers */);
9489 return result;
9490}
9491
9492__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9493vpaddlq_u32 (uint32x4_t a)
9494{
9495 uint64x2_t result;
9496 __asm__ ("uaddlp %0.2d,%1.4s"
9497 : "=w"(result)
9498 : "w"(a)
9499 : /* No clobbers */);
9500 return result;
9501}
9502
9503__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9504vpaddq_f32 (float32x4_t a, float32x4_t b)
9505{
9506 float32x4_t result;
9507 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9508 : "=w"(result)
9509 : "w"(a), "w"(b)
9510 : /* No clobbers */);
9511 return result;
9512}
9513
9514__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9515vpaddq_f64 (float64x2_t a, float64x2_t b)
9516{
9517 float64x2_t result;
9518 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9519 : "=w"(result)
9520 : "w"(a), "w"(b)
9521 : /* No clobbers */);
9522 return result;
9523}
9524
9525__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9526vpaddq_s8 (int8x16_t a, int8x16_t b)
9527{
9528 int8x16_t result;
9529 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9530 : "=w"(result)
9531 : "w"(a), "w"(b)
9532 : /* No clobbers */);
9533 return result;
9534}
9535
9536__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9537vpaddq_s16 (int16x8_t a, int16x8_t b)
9538{
9539 int16x8_t result;
9540 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9541 : "=w"(result)
9542 : "w"(a), "w"(b)
9543 : /* No clobbers */);
9544 return result;
9545}
9546
9547__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9548vpaddq_s32 (int32x4_t a, int32x4_t b)
9549{
9550 int32x4_t result;
9551 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9552 : "=w"(result)
9553 : "w"(a), "w"(b)
9554 : /* No clobbers */);
9555 return result;
9556}
9557
9558__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9559vpaddq_s64 (int64x2_t a, int64x2_t b)
9560{
9561 int64x2_t result;
9562 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9563 : "=w"(result)
9564 : "w"(a), "w"(b)
9565 : /* No clobbers */);
9566 return result;
9567}
9568
9569__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9570vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9571{
9572 uint8x16_t result;
9573 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9574 : "=w"(result)
9575 : "w"(a), "w"(b)
9576 : /* No clobbers */);
9577 return result;
9578}
9579
9580__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9581vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9582{
9583 uint16x8_t result;
9584 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9585 : "=w"(result)
9586 : "w"(a), "w"(b)
9587 : /* No clobbers */);
9588 return result;
9589}
9590
9591__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9592vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9593{
9594 uint32x4_t result;
9595 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9596 : "=w"(result)
9597 : "w"(a), "w"(b)
9598 : /* No clobbers */);
9599 return result;
9600}
9601
9602__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9603vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9604{
9605 uint64x2_t result;
9606 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9607 : "=w"(result)
9608 : "w"(a), "w"(b)
9609 : /* No clobbers */);
9610 return result;
9611}
9612
9613__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9614vpadds_f32 (float32x2_t a)
9615{
9616 float32_t result;
9617 __asm__ ("faddp %s0,%1.2s"
9618 : "=w"(result)
9619 : "w"(a)
9620 : /* No clobbers */);
9621 return result;
9622}
9623
9624__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9625vpmax_f32 (float32x2_t a, float32x2_t b)
9626{
9627 float32x2_t result;
9628 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9629 : "=w"(result)
9630 : "w"(a), "w"(b)
9631 : /* No clobbers */);
9632 return result;
9633}
9634
9635__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9636vpmax_s8 (int8x8_t a, int8x8_t b)
9637{
9638 int8x8_t result;
9639 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9640 : "=w"(result)
9641 : "w"(a), "w"(b)
9642 : /* No clobbers */);
9643 return result;
9644}
9645
9646__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9647vpmax_s16 (int16x4_t a, int16x4_t b)
9648{
9649 int16x4_t result;
9650 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9651 : "=w"(result)
9652 : "w"(a), "w"(b)
9653 : /* No clobbers */);
9654 return result;
9655}
9656
9657__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9658vpmax_s32 (int32x2_t a, int32x2_t b)
9659{
9660 int32x2_t result;
9661 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9662 : "=w"(result)
9663 : "w"(a), "w"(b)
9664 : /* No clobbers */);
9665 return result;
9666}
9667
9668__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9669vpmax_u8 (uint8x8_t a, uint8x8_t b)
9670{
9671 uint8x8_t result;
9672 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9673 : "=w"(result)
9674 : "w"(a), "w"(b)
9675 : /* No clobbers */);
9676 return result;
9677}
9678
9679__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9680vpmax_u16 (uint16x4_t a, uint16x4_t b)
9681{
9682 uint16x4_t result;
9683 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9684 : "=w"(result)
9685 : "w"(a), "w"(b)
9686 : /* No clobbers */);
9687 return result;
9688}
9689
9690__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9691vpmax_u32 (uint32x2_t a, uint32x2_t b)
9692{
9693 uint32x2_t result;
9694 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9695 : "=w"(result)
9696 : "w"(a), "w"(b)
9697 : /* No clobbers */);
9698 return result;
9699}
9700
9701__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9702vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9703{
9704 float32x2_t result;
9705 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9706 : "=w"(result)
9707 : "w"(a), "w"(b)
9708 : /* No clobbers */);
9709 return result;
9710}
9711
9712__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9713vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9714{
9715 float32x4_t result;
9716 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9717 : "=w"(result)
9718 : "w"(a), "w"(b)
9719 : /* No clobbers */);
9720 return result;
9721}
9722
9723__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9724vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9725{
9726 float64x2_t result;
9727 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9728 : "=w"(result)
9729 : "w"(a), "w"(b)
9730 : /* No clobbers */);
9731 return result;
9732}
9733
9734__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9735vpmaxnmqd_f64 (float64x2_t a)
9736{
9737 float64_t result;
9738 __asm__ ("fmaxnmp %d0,%1.2d"
9739 : "=w"(result)
9740 : "w"(a)
9741 : /* No clobbers */);
9742 return result;
9743}
9744
9745__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9746vpmaxnms_f32 (float32x2_t a)
9747{
9748 float32_t result;
9749 __asm__ ("fmaxnmp %s0,%1.2s"
9750 : "=w"(result)
9751 : "w"(a)
9752 : /* No clobbers */);
9753 return result;
9754}
9755
9756__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9757vpmaxq_f32 (float32x4_t a, float32x4_t b)
9758{
9759 float32x4_t result;
9760 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9761 : "=w"(result)
9762 : "w"(a), "w"(b)
9763 : /* No clobbers */);
9764 return result;
9765}
9766
9767__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9768vpmaxq_f64 (float64x2_t a, float64x2_t b)
9769{
9770 float64x2_t result;
9771 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9772 : "=w"(result)
9773 : "w"(a), "w"(b)
9774 : /* No clobbers */);
9775 return result;
9776}
9777
9778__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9779vpmaxq_s8 (int8x16_t a, int8x16_t b)
9780{
9781 int8x16_t result;
9782 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9783 : "=w"(result)
9784 : "w"(a), "w"(b)
9785 : /* No clobbers */);
9786 return result;
9787}
9788
9789__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9790vpmaxq_s16 (int16x8_t a, int16x8_t b)
9791{
9792 int16x8_t result;
9793 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9794 : "=w"(result)
9795 : "w"(a), "w"(b)
9796 : /* No clobbers */);
9797 return result;
9798}
9799
9800__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9801vpmaxq_s32 (int32x4_t a, int32x4_t b)
9802{
9803 int32x4_t result;
9804 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9805 : "=w"(result)
9806 : "w"(a), "w"(b)
9807 : /* No clobbers */);
9808 return result;
9809}
9810
9811__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9812vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9813{
9814 uint8x16_t result;
9815 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9816 : "=w"(result)
9817 : "w"(a), "w"(b)
9818 : /* No clobbers */);
9819 return result;
9820}
9821
9822__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9823vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9824{
9825 uint16x8_t result;
9826 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9827 : "=w"(result)
9828 : "w"(a), "w"(b)
9829 : /* No clobbers */);
9830 return result;
9831}
9832
9833__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9834vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9835{
9836 uint32x4_t result;
9837 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9838 : "=w"(result)
9839 : "w"(a), "w"(b)
9840 : /* No clobbers */);
9841 return result;
9842}
9843
9844__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9845vpmaxqd_f64 (float64x2_t a)
9846{
9847 float64_t result;
9848 __asm__ ("fmaxp %d0,%1.2d"
9849 : "=w"(result)
9850 : "w"(a)
9851 : /* No clobbers */);
9852 return result;
9853}
9854
9855__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9856vpmaxs_f32 (float32x2_t a)
9857{
9858 float32_t result;
9859 __asm__ ("fmaxp %s0,%1.2s"
9860 : "=w"(result)
9861 : "w"(a)
9862 : /* No clobbers */);
9863 return result;
9864}
9865
9866__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9867vpmin_f32 (float32x2_t a, float32x2_t b)
9868{
9869 float32x2_t result;
9870 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9871 : "=w"(result)
9872 : "w"(a), "w"(b)
9873 : /* No clobbers */);
9874 return result;
9875}
9876
9877__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9878vpmin_s8 (int8x8_t a, int8x8_t b)
9879{
9880 int8x8_t result;
9881 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9882 : "=w"(result)
9883 : "w"(a), "w"(b)
9884 : /* No clobbers */);
9885 return result;
9886}
9887
9888__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9889vpmin_s16 (int16x4_t a, int16x4_t b)
9890{
9891 int16x4_t result;
9892 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9893 : "=w"(result)
9894 : "w"(a), "w"(b)
9895 : /* No clobbers */);
9896 return result;
9897}
9898
9899__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9900vpmin_s32 (int32x2_t a, int32x2_t b)
9901{
9902 int32x2_t result;
9903 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9904 : "=w"(result)
9905 : "w"(a), "w"(b)
9906 : /* No clobbers */);
9907 return result;
9908}
9909
9910__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9911vpmin_u8 (uint8x8_t a, uint8x8_t b)
9912{
9913 uint8x8_t result;
9914 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9915 : "=w"(result)
9916 : "w"(a), "w"(b)
9917 : /* No clobbers */);
9918 return result;
9919}
9920
9921__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9922vpmin_u16 (uint16x4_t a, uint16x4_t b)
9923{
9924 uint16x4_t result;
9925 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9926 : "=w"(result)
9927 : "w"(a), "w"(b)
9928 : /* No clobbers */);
9929 return result;
9930}
9931
9932__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9933vpmin_u32 (uint32x2_t a, uint32x2_t b)
9934{
9935 uint32x2_t result;
9936 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9937 : "=w"(result)
9938 : "w"(a), "w"(b)
9939 : /* No clobbers */);
9940 return result;
9941}
9942
9943__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9944vpminnm_f32 (float32x2_t a, float32x2_t b)
9945{
9946 float32x2_t result;
9947 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9948 : "=w"(result)
9949 : "w"(a), "w"(b)
9950 : /* No clobbers */);
9951 return result;
9952}
9953
9954__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9955vpminnmq_f32 (float32x4_t a, float32x4_t b)
9956{
9957 float32x4_t result;
9958 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9959 : "=w"(result)
9960 : "w"(a), "w"(b)
9961 : /* No clobbers */);
9962 return result;
9963}
9964
9965__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9966vpminnmq_f64 (float64x2_t a, float64x2_t b)
9967{
9968 float64x2_t result;
9969 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9970 : "=w"(result)
9971 : "w"(a), "w"(b)
9972 : /* No clobbers */);
9973 return result;
9974}
9975
9976__extension__ static __inline float64_t __attribute__ ((__always_inline__))
9977vpminnmqd_f64 (float64x2_t a)
9978{
9979 float64_t result;
9980 __asm__ ("fminnmp %d0,%1.2d"
9981 : "=w"(result)
9982 : "w"(a)
9983 : /* No clobbers */);
9984 return result;
9985}
9986
9987__extension__ static __inline float32_t __attribute__ ((__always_inline__))
9988vpminnms_f32 (float32x2_t a)
9989{
9990 float32_t result;
9991 __asm__ ("fminnmp %s0,%1.2s"
9992 : "=w"(result)
9993 : "w"(a)
9994 : /* No clobbers */);
9995 return result;
9996}
9997
9998__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9999vpminq_f32 (float32x4_t a, float32x4_t b)
10000{
10001 float32x4_t result;
10002 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
10003 : "=w"(result)
10004 : "w"(a), "w"(b)
10005 : /* No clobbers */);
10006 return result;
10007}
10008
10009__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10010vpminq_f64 (float64x2_t a, float64x2_t b)
10011{
10012 float64x2_t result;
10013 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
10014 : "=w"(result)
10015 : "w"(a), "w"(b)
10016 : /* No clobbers */);
10017 return result;
10018}
10019
10020__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10021vpminq_s8 (int8x16_t a, int8x16_t b)
10022{
10023 int8x16_t result;
10024 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
10025 : "=w"(result)
10026 : "w"(a), "w"(b)
10027 : /* No clobbers */);
10028 return result;
10029}
10030
10031__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10032vpminq_s16 (int16x8_t a, int16x8_t b)
10033{
10034 int16x8_t result;
10035 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
10036 : "=w"(result)
10037 : "w"(a), "w"(b)
10038 : /* No clobbers */);
10039 return result;
10040}
10041
10042__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10043vpminq_s32 (int32x4_t a, int32x4_t b)
10044{
10045 int32x4_t result;
10046 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
10047 : "=w"(result)
10048 : "w"(a), "w"(b)
10049 : /* No clobbers */);
10050 return result;
10051}
10052
10053__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10054vpminq_u8 (uint8x16_t a, uint8x16_t b)
10055{
10056 uint8x16_t result;
10057 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
10058 : "=w"(result)
10059 : "w"(a), "w"(b)
10060 : /* No clobbers */);
10061 return result;
10062}
10063
10064__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10065vpminq_u16 (uint16x8_t a, uint16x8_t b)
10066{
10067 uint16x8_t result;
10068 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
10069 : "=w"(result)
10070 : "w"(a), "w"(b)
10071 : /* No clobbers */);
10072 return result;
10073}
10074
10075__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10076vpminq_u32 (uint32x4_t a, uint32x4_t b)
10077{
10078 uint32x4_t result;
10079 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
10080 : "=w"(result)
10081 : "w"(a), "w"(b)
10082 : /* No clobbers */);
10083 return result;
10084}
10085
10086__extension__ static __inline float64_t __attribute__ ((__always_inline__))
10087vpminqd_f64 (float64x2_t a)
10088{
10089 float64_t result;
10090 __asm__ ("fminp %d0,%1.2d"
10091 : "=w"(result)
10092 : "w"(a)
10093 : /* No clobbers */);
10094 return result;
10095}
10096
10097__extension__ static __inline float32_t __attribute__ ((__always_inline__))
10098vpmins_f32 (float32x2_t a)
10099{
10100 float32_t result;
10101 __asm__ ("fminp %s0,%1.2s"
10102 : "=w"(result)
10103 : "w"(a)
10104 : /* No clobbers */);
10105 return result;
10106}
10107
10108__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10109vqdmulh_n_s16 (int16x4_t a, int16_t b)
10110{
10111 int16x4_t result;
10112 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
10113 : "=w"(result)
Ben Chengc8a84b12014-05-27 17:20:18 -070010114 : "w"(a), "x"(b)
Ben Cheng7823f2a2014-04-08 14:53:42 -070010115 : /* No clobbers */);
10116 return result;
10117}
10118
10119__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10120vqdmulh_n_s32 (int32x2_t a, int32_t b)
10121{
10122 int32x2_t result;
10123 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
10124 : "=w"(result)
10125 : "w"(a), "w"(b)
10126 : /* No clobbers */);
10127 return result;
10128}
10129
10130__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10131vqdmulhq_n_s16 (int16x8_t a, int16_t b)
10132{
10133 int16x8_t result;
10134 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
10135 : "=w"(result)
Ben Chengc8a84b12014-05-27 17:20:18 -070010136 : "w"(a), "x"(b)
Ben Cheng7823f2a2014-04-08 14:53:42 -070010137 : /* No clobbers */);
10138 return result;
10139}
10140
10141__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10142vqdmulhq_n_s32 (int32x4_t a, int32_t b)
10143{
10144 int32x4_t result;
10145 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
10146 : "=w"(result)
10147 : "w"(a), "w"(b)
10148 : /* No clobbers */);
10149 return result;
10150}
10151
10152__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10153vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10154{
10155 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10156 __asm__ ("sqxtn2 %0.16b, %1.8h"
10157 : "+w"(result)
10158 : "w"(b)
10159 : /* No clobbers */);
10160 return result;
10161}
10162
10163__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10164vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10165{
10166 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10167 __asm__ ("sqxtn2 %0.8h, %1.4s"
10168 : "+w"(result)
10169 : "w"(b)
10170 : /* No clobbers */);
10171 return result;
10172}
10173
10174__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10175vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10176{
10177 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10178 __asm__ ("sqxtn2 %0.4s, %1.2d"
10179 : "+w"(result)
10180 : "w"(b)
10181 : /* No clobbers */);
10182 return result;
10183}
10184
10185__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10186vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10187{
10188 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10189 __asm__ ("uqxtn2 %0.16b, %1.8h"
10190 : "+w"(result)
10191 : "w"(b)
10192 : /* No clobbers */);
10193 return result;
10194}
10195
10196__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10197vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10198{
10199 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10200 __asm__ ("uqxtn2 %0.8h, %1.4s"
10201 : "+w"(result)
10202 : "w"(b)
10203 : /* No clobbers */);
10204 return result;
10205}
10206
10207__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10208vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10209{
10210 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10211 __asm__ ("uqxtn2 %0.4s, %1.2d"
10212 : "+w"(result)
10213 : "w"(b)
10214 : /* No clobbers */);
10215 return result;
10216}
10217
10218__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10219vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10220{
10221 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10222 __asm__ ("sqxtun2 %0.16b, %1.8h"
10223 : "+w"(result)
10224 : "w"(b)
10225 : /* No clobbers */);
10226 return result;
10227}
10228
10229__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10230vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10231{
10232 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10233 __asm__ ("sqxtun2 %0.8h, %1.4s"
10234 : "+w"(result)
10235 : "w"(b)
10236 : /* No clobbers */);
10237 return result;
10238}
10239
10240__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10241vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10242{
10243 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10244 __asm__ ("sqxtun2 %0.4s, %1.2d"
10245 : "+w"(result)
10246 : "w"(b)
10247 : /* No clobbers */);
10248 return result;
10249}
10250
10251__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10252vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10253{
10254 int16x4_t result;
10255 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10256 : "=w"(result)
10257 : "w"(a), "x"(b)
10258 : /* No clobbers */);
10259 return result;
10260}
10261
10262__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10263vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10264{
10265 int32x2_t result;
10266 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10267 : "=w"(result)
10268 : "w"(a), "w"(b)
10269 : /* No clobbers */);
10270 return result;
10271}
10272
10273__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10274vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10275{
10276 int16x8_t result;
10277 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10278 : "=w"(result)
10279 : "w"(a), "x"(b)
10280 : /* No clobbers */);
10281 return result;
10282}
10283
10284__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10285vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10286{
10287 int32x4_t result;
10288 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10289 : "=w"(result)
10290 : "w"(a), "w"(b)
10291 : /* No clobbers */);
10292 return result;
10293}
10294
10295#define vqrshrn_high_n_s16(a, b, c) \
10296 __extension__ \
10297 ({ \
10298 int16x8_t b_ = (b); \
10299 int8x8_t a_ = (a); \
10300 int8x16_t result = vcombine_s8 \
10301 (a_, vcreate_s8 \
10302 (__AARCH64_UINT64_C (0x0))); \
10303 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10304 : "+w"(result) \
10305 : "w"(b_), "i"(c) \
10306 : /* No clobbers */); \
10307 result; \
10308 })
10309
10310#define vqrshrn_high_n_s32(a, b, c) \
10311 __extension__ \
10312 ({ \
10313 int32x4_t b_ = (b); \
10314 int16x4_t a_ = (a); \
10315 int16x8_t result = vcombine_s16 \
10316 (a_, vcreate_s16 \
10317 (__AARCH64_UINT64_C (0x0))); \
10318 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10319 : "+w"(result) \
10320 : "w"(b_), "i"(c) \
10321 : /* No clobbers */); \
10322 result; \
10323 })
10324
10325#define vqrshrn_high_n_s64(a, b, c) \
10326 __extension__ \
10327 ({ \
10328 int64x2_t b_ = (b); \
10329 int32x2_t a_ = (a); \
10330 int32x4_t result = vcombine_s32 \
10331 (a_, vcreate_s32 \
10332 (__AARCH64_UINT64_C (0x0))); \
10333 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10334 : "+w"(result) \
10335 : "w"(b_), "i"(c) \
10336 : /* No clobbers */); \
10337 result; \
10338 })
10339
10340#define vqrshrn_high_n_u16(a, b, c) \
10341 __extension__ \
10342 ({ \
10343 uint16x8_t b_ = (b); \
10344 uint8x8_t a_ = (a); \
10345 uint8x16_t result = vcombine_u8 \
10346 (a_, vcreate_u8 \
10347 (__AARCH64_UINT64_C (0x0))); \
10348 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10349 : "+w"(result) \
10350 : "w"(b_), "i"(c) \
10351 : /* No clobbers */); \
10352 result; \
10353 })
10354
10355#define vqrshrn_high_n_u32(a, b, c) \
10356 __extension__ \
10357 ({ \
10358 uint32x4_t b_ = (b); \
10359 uint16x4_t a_ = (a); \
10360 uint16x8_t result = vcombine_u16 \
10361 (a_, vcreate_u16 \
10362 (__AARCH64_UINT64_C (0x0))); \
10363 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10364 : "+w"(result) \
10365 : "w"(b_), "i"(c) \
10366 : /* No clobbers */); \
10367 result; \
10368 })
10369
10370#define vqrshrn_high_n_u64(a, b, c) \
10371 __extension__ \
10372 ({ \
10373 uint64x2_t b_ = (b); \
10374 uint32x2_t a_ = (a); \
10375 uint32x4_t result = vcombine_u32 \
10376 (a_, vcreate_u32 \
10377 (__AARCH64_UINT64_C (0x0))); \
10378 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10379 : "+w"(result) \
10380 : "w"(b_), "i"(c) \
10381 : /* No clobbers */); \
10382 result; \
10383 })
10384
10385#define vqrshrun_high_n_s16(a, b, c) \
10386 __extension__ \
10387 ({ \
10388 int16x8_t b_ = (b); \
10389 uint8x8_t a_ = (a); \
10390 uint8x16_t result = vcombine_u8 \
10391 (a_, vcreate_u8 \
10392 (__AARCH64_UINT64_C (0x0))); \
10393 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10394 : "+w"(result) \
10395 : "w"(b_), "i"(c) \
10396 : /* No clobbers */); \
10397 result; \
10398 })
10399
10400#define vqrshrun_high_n_s32(a, b, c) \
10401 __extension__ \
10402 ({ \
10403 int32x4_t b_ = (b); \
10404 uint16x4_t a_ = (a); \
10405 uint16x8_t result = vcombine_u16 \
10406 (a_, vcreate_u16 \
10407 (__AARCH64_UINT64_C (0x0))); \
10408 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10409 : "+w"(result) \
10410 : "w"(b_), "i"(c) \
10411 : /* No clobbers */); \
10412 result; \
10413 })
10414
10415#define vqrshrun_high_n_s64(a, b, c) \
10416 __extension__ \
10417 ({ \
10418 int64x2_t b_ = (b); \
10419 uint32x2_t a_ = (a); \
10420 uint32x4_t result = vcombine_u32 \
10421 (a_, vcreate_u32 \
10422 (__AARCH64_UINT64_C (0x0))); \
10423 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10424 : "+w"(result) \
10425 : "w"(b_), "i"(c) \
10426 : /* No clobbers */); \
10427 result; \
10428 })
10429
10430#define vqshrn_high_n_s16(a, b, c) \
10431 __extension__ \
10432 ({ \
10433 int16x8_t b_ = (b); \
10434 int8x8_t a_ = (a); \
10435 int8x16_t result = vcombine_s8 \
10436 (a_, vcreate_s8 \
10437 (__AARCH64_UINT64_C (0x0))); \
10438 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10439 : "+w"(result) \
10440 : "w"(b_), "i"(c) \
10441 : /* No clobbers */); \
10442 result; \
10443 })
10444
10445#define vqshrn_high_n_s32(a, b, c) \
10446 __extension__ \
10447 ({ \
10448 int32x4_t b_ = (b); \
10449 int16x4_t a_ = (a); \
10450 int16x8_t result = vcombine_s16 \
10451 (a_, vcreate_s16 \
10452 (__AARCH64_UINT64_C (0x0))); \
10453 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10454 : "+w"(result) \
10455 : "w"(b_), "i"(c) \
10456 : /* No clobbers */); \
10457 result; \
10458 })
10459
10460#define vqshrn_high_n_s64(a, b, c) \
10461 __extension__ \
10462 ({ \
10463 int64x2_t b_ = (b); \
10464 int32x2_t a_ = (a); \
10465 int32x4_t result = vcombine_s32 \
10466 (a_, vcreate_s32 \
10467 (__AARCH64_UINT64_C (0x0))); \
10468 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10469 : "+w"(result) \
10470 : "w"(b_), "i"(c) \
10471 : /* No clobbers */); \
10472 result; \
10473 })
10474
10475#define vqshrn_high_n_u16(a, b, c) \
10476 __extension__ \
10477 ({ \
10478 uint16x8_t b_ = (b); \
10479 uint8x8_t a_ = (a); \
10480 uint8x16_t result = vcombine_u8 \
10481 (a_, vcreate_u8 \
10482 (__AARCH64_UINT64_C (0x0))); \
10483 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10484 : "+w"(result) \
10485 : "w"(b_), "i"(c) \
10486 : /* No clobbers */); \
10487 result; \
10488 })
10489
10490#define vqshrn_high_n_u32(a, b, c) \
10491 __extension__ \
10492 ({ \
10493 uint32x4_t b_ = (b); \
10494 uint16x4_t a_ = (a); \
10495 uint16x8_t result = vcombine_u16 \
10496 (a_, vcreate_u16 \
10497 (__AARCH64_UINT64_C (0x0))); \
10498 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10499 : "+w"(result) \
10500 : "w"(b_), "i"(c) \
10501 : /* No clobbers */); \
10502 result; \
10503 })
10504
10505#define vqshrn_high_n_u64(a, b, c) \
10506 __extension__ \
10507 ({ \
10508 uint64x2_t b_ = (b); \
10509 uint32x2_t a_ = (a); \
10510 uint32x4_t result = vcombine_u32 \
10511 (a_, vcreate_u32 \
10512 (__AARCH64_UINT64_C (0x0))); \
10513 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10514 : "+w"(result) \
10515 : "w"(b_), "i"(c) \
10516 : /* No clobbers */); \
10517 result; \
10518 })
10519
10520#define vqshrun_high_n_s16(a, b, c) \
10521 __extension__ \
10522 ({ \
10523 int16x8_t b_ = (b); \
10524 uint8x8_t a_ = (a); \
10525 uint8x16_t result = vcombine_u8 \
10526 (a_, vcreate_u8 \
10527 (__AARCH64_UINT64_C (0x0))); \
10528 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10529 : "+w"(result) \
10530 : "w"(b_), "i"(c) \
10531 : /* No clobbers */); \
10532 result; \
10533 })
10534
10535#define vqshrun_high_n_s32(a, b, c) \
10536 __extension__ \
10537 ({ \
10538 int32x4_t b_ = (b); \
10539 uint16x4_t a_ = (a); \
10540 uint16x8_t result = vcombine_u16 \
10541 (a_, vcreate_u16 \
10542 (__AARCH64_UINT64_C (0x0))); \
10543 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10544 : "+w"(result) \
10545 : "w"(b_), "i"(c) \
10546 : /* No clobbers */); \
10547 result; \
10548 })
10549
10550#define vqshrun_high_n_s64(a, b, c) \
10551 __extension__ \
10552 ({ \
10553 int64x2_t b_ = (b); \
10554 uint32x2_t a_ = (a); \
10555 uint32x4_t result = vcombine_u32 \
10556 (a_, vcreate_u32 \
10557 (__AARCH64_UINT64_C (0x0))); \
10558 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10559 : "+w"(result) \
10560 : "w"(b_), "i"(c) \
10561 : /* No clobbers */); \
10562 result; \
10563 })
10564
10565__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10566vrbit_s8 (int8x8_t a)
10567{
10568 int8x8_t result;
10569 __asm__ ("rbit %0.8b,%1.8b"
10570 : "=w"(result)
10571 : "w"(a)
10572 : /* No clobbers */);
10573 return result;
10574}
10575
10576__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10577vrbit_u8 (uint8x8_t a)
10578{
10579 uint8x8_t result;
10580 __asm__ ("rbit %0.8b,%1.8b"
10581 : "=w"(result)
10582 : "w"(a)
10583 : /* No clobbers */);
10584 return result;
10585}
10586
10587__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10588vrbitq_s8 (int8x16_t a)
10589{
10590 int8x16_t result;
10591 __asm__ ("rbit %0.16b,%1.16b"
10592 : "=w"(result)
10593 : "w"(a)
10594 : /* No clobbers */);
10595 return result;
10596}
10597
10598__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10599vrbitq_u8 (uint8x16_t a)
10600{
10601 uint8x16_t result;
10602 __asm__ ("rbit %0.16b,%1.16b"
10603 : "=w"(result)
10604 : "w"(a)
10605 : /* No clobbers */);
10606 return result;
10607}
10608
10609__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10610vrecpe_u32 (uint32x2_t a)
10611{
10612 uint32x2_t result;
10613 __asm__ ("urecpe %0.2s,%1.2s"
10614 : "=w"(result)
10615 : "w"(a)
10616 : /* No clobbers */);
10617 return result;
10618}
10619
10620__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10621vrecpeq_u32 (uint32x4_t a)
10622{
10623 uint32x4_t result;
10624 __asm__ ("urecpe %0.4s,%1.4s"
10625 : "=w"(result)
10626 : "w"(a)
10627 : /* No clobbers */);
10628 return result;
10629}
10630
10631__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10632vrev16_p8 (poly8x8_t a)
10633{
10634 poly8x8_t result;
10635 __asm__ ("rev16 %0.8b,%1.8b"
10636 : "=w"(result)
10637 : "w"(a)
10638 : /* No clobbers */);
10639 return result;
10640}
10641
10642__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10643vrev16_s8 (int8x8_t a)
10644{
10645 int8x8_t result;
10646 __asm__ ("rev16 %0.8b,%1.8b"
10647 : "=w"(result)
10648 : "w"(a)
10649 : /* No clobbers */);
10650 return result;
10651}
10652
10653__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10654vrev16_u8 (uint8x8_t a)
10655{
10656 uint8x8_t result;
10657 __asm__ ("rev16 %0.8b,%1.8b"
10658 : "=w"(result)
10659 : "w"(a)
10660 : /* No clobbers */);
10661 return result;
10662}
10663
10664__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10665vrev16q_p8 (poly8x16_t a)
10666{
10667 poly8x16_t result;
10668 __asm__ ("rev16 %0.16b,%1.16b"
10669 : "=w"(result)
10670 : "w"(a)
10671 : /* No clobbers */);
10672 return result;
10673}
10674
10675__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10676vrev16q_s8 (int8x16_t a)
10677{
10678 int8x16_t result;
10679 __asm__ ("rev16 %0.16b,%1.16b"
10680 : "=w"(result)
10681 : "w"(a)
10682 : /* No clobbers */);
10683 return result;
10684}
10685
10686__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10687vrev16q_u8 (uint8x16_t a)
10688{
10689 uint8x16_t result;
10690 __asm__ ("rev16 %0.16b,%1.16b"
10691 : "=w"(result)
10692 : "w"(a)
10693 : /* No clobbers */);
10694 return result;
10695}
10696
10697__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10698vrev32_p8 (poly8x8_t a)
10699{
10700 poly8x8_t result;
10701 __asm__ ("rev32 %0.8b,%1.8b"
10702 : "=w"(result)
10703 : "w"(a)
10704 : /* No clobbers */);
10705 return result;
10706}
10707
10708__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10709vrev32_p16 (poly16x4_t a)
10710{
10711 poly16x4_t result;
10712 __asm__ ("rev32 %0.4h,%1.4h"
10713 : "=w"(result)
10714 : "w"(a)
10715 : /* No clobbers */);
10716 return result;
10717}
10718
10719__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10720vrev32_s8 (int8x8_t a)
10721{
10722 int8x8_t result;
10723 __asm__ ("rev32 %0.8b,%1.8b"
10724 : "=w"(result)
10725 : "w"(a)
10726 : /* No clobbers */);
10727 return result;
10728}
10729
10730__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10731vrev32_s16 (int16x4_t a)
10732{
10733 int16x4_t result;
10734 __asm__ ("rev32 %0.4h,%1.4h"
10735 : "=w"(result)
10736 : "w"(a)
10737 : /* No clobbers */);
10738 return result;
10739}
10740
10741__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10742vrev32_u8 (uint8x8_t a)
10743{
10744 uint8x8_t result;
10745 __asm__ ("rev32 %0.8b,%1.8b"
10746 : "=w"(result)
10747 : "w"(a)
10748 : /* No clobbers */);
10749 return result;
10750}
10751
10752__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10753vrev32_u16 (uint16x4_t a)
10754{
10755 uint16x4_t result;
10756 __asm__ ("rev32 %0.4h,%1.4h"
10757 : "=w"(result)
10758 : "w"(a)
10759 : /* No clobbers */);
10760 return result;
10761}
10762
10763__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10764vrev32q_p8 (poly8x16_t a)
10765{
10766 poly8x16_t result;
10767 __asm__ ("rev32 %0.16b,%1.16b"
10768 : "=w"(result)
10769 : "w"(a)
10770 : /* No clobbers */);
10771 return result;
10772}
10773
10774__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10775vrev32q_p16 (poly16x8_t a)
10776{
10777 poly16x8_t result;
10778 __asm__ ("rev32 %0.8h,%1.8h"
10779 : "=w"(result)
10780 : "w"(a)
10781 : /* No clobbers */);
10782 return result;
10783}
10784
10785__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10786vrev32q_s8 (int8x16_t a)
10787{
10788 int8x16_t result;
10789 __asm__ ("rev32 %0.16b,%1.16b"
10790 : "=w"(result)
10791 : "w"(a)
10792 : /* No clobbers */);
10793 return result;
10794}
10795
10796__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10797vrev32q_s16 (int16x8_t a)
10798{
10799 int16x8_t result;
10800 __asm__ ("rev32 %0.8h,%1.8h"
10801 : "=w"(result)
10802 : "w"(a)
10803 : /* No clobbers */);
10804 return result;
10805}
10806
10807__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10808vrev32q_u8 (uint8x16_t a)
10809{
10810 uint8x16_t result;
10811 __asm__ ("rev32 %0.16b,%1.16b"
10812 : "=w"(result)
10813 : "w"(a)
10814 : /* No clobbers */);
10815 return result;
10816}
10817
10818__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10819vrev32q_u16 (uint16x8_t a)
10820{
10821 uint16x8_t result;
10822 __asm__ ("rev32 %0.8h,%1.8h"
10823 : "=w"(result)
10824 : "w"(a)
10825 : /* No clobbers */);
10826 return result;
10827}
10828
10829__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10830vrev64_f32 (float32x2_t a)
10831{
10832 float32x2_t result;
10833 __asm__ ("rev64 %0.2s,%1.2s"
10834 : "=w"(result)
10835 : "w"(a)
10836 : /* No clobbers */);
10837 return result;
10838}
10839
10840__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10841vrev64_p8 (poly8x8_t a)
10842{
10843 poly8x8_t result;
10844 __asm__ ("rev64 %0.8b,%1.8b"
10845 : "=w"(result)
10846 : "w"(a)
10847 : /* No clobbers */);
10848 return result;
10849}
10850
10851__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10852vrev64_p16 (poly16x4_t a)
10853{
10854 poly16x4_t result;
10855 __asm__ ("rev64 %0.4h,%1.4h"
10856 : "=w"(result)
10857 : "w"(a)
10858 : /* No clobbers */);
10859 return result;
10860}
10861
10862__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10863vrev64_s8 (int8x8_t a)
10864{
10865 int8x8_t result;
10866 __asm__ ("rev64 %0.8b,%1.8b"
10867 : "=w"(result)
10868 : "w"(a)
10869 : /* No clobbers */);
10870 return result;
10871}
10872
10873__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10874vrev64_s16 (int16x4_t a)
10875{
10876 int16x4_t result;
10877 __asm__ ("rev64 %0.4h,%1.4h"
10878 : "=w"(result)
10879 : "w"(a)
10880 : /* No clobbers */);
10881 return result;
10882}
10883
10884__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10885vrev64_s32 (int32x2_t a)
10886{
10887 int32x2_t result;
10888 __asm__ ("rev64 %0.2s,%1.2s"
10889 : "=w"(result)
10890 : "w"(a)
10891 : /* No clobbers */);
10892 return result;
10893}
10894
10895__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10896vrev64_u8 (uint8x8_t a)
10897{
10898 uint8x8_t result;
10899 __asm__ ("rev64 %0.8b,%1.8b"
10900 : "=w"(result)
10901 : "w"(a)
10902 : /* No clobbers */);
10903 return result;
10904}
10905
10906__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10907vrev64_u16 (uint16x4_t a)
10908{
10909 uint16x4_t result;
10910 __asm__ ("rev64 %0.4h,%1.4h"
10911 : "=w"(result)
10912 : "w"(a)
10913 : /* No clobbers */);
10914 return result;
10915}
10916
10917__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10918vrev64_u32 (uint32x2_t a)
10919{
10920 uint32x2_t result;
10921 __asm__ ("rev64 %0.2s,%1.2s"
10922 : "=w"(result)
10923 : "w"(a)
10924 : /* No clobbers */);
10925 return result;
10926}
10927
10928__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10929vrev64q_f32 (float32x4_t a)
10930{
10931 float32x4_t result;
10932 __asm__ ("rev64 %0.4s,%1.4s"
10933 : "=w"(result)
10934 : "w"(a)
10935 : /* No clobbers */);
10936 return result;
10937}
10938
10939__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10940vrev64q_p8 (poly8x16_t a)
10941{
10942 poly8x16_t result;
10943 __asm__ ("rev64 %0.16b,%1.16b"
10944 : "=w"(result)
10945 : "w"(a)
10946 : /* No clobbers */);
10947 return result;
10948}
10949
10950__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10951vrev64q_p16 (poly16x8_t a)
10952{
10953 poly16x8_t result;
10954 __asm__ ("rev64 %0.8h,%1.8h"
10955 : "=w"(result)
10956 : "w"(a)
10957 : /* No clobbers */);
10958 return result;
10959}
10960
10961__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10962vrev64q_s8 (int8x16_t a)
10963{
10964 int8x16_t result;
10965 __asm__ ("rev64 %0.16b,%1.16b"
10966 : "=w"(result)
10967 : "w"(a)
10968 : /* No clobbers */);
10969 return result;
10970}
10971
10972__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10973vrev64q_s16 (int16x8_t a)
10974{
10975 int16x8_t result;
10976 __asm__ ("rev64 %0.8h,%1.8h"
10977 : "=w"(result)
10978 : "w"(a)
10979 : /* No clobbers */);
10980 return result;
10981}
10982
10983__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10984vrev64q_s32 (int32x4_t a)
10985{
10986 int32x4_t result;
10987 __asm__ ("rev64 %0.4s,%1.4s"
10988 : "=w"(result)
10989 : "w"(a)
10990 : /* No clobbers */);
10991 return result;
10992}
10993
10994__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10995vrev64q_u8 (uint8x16_t a)
10996{
10997 uint8x16_t result;
10998 __asm__ ("rev64 %0.16b,%1.16b"
10999 : "=w"(result)
11000 : "w"(a)
11001 : /* No clobbers */);
11002 return result;
11003}
11004
11005__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11006vrev64q_u16 (uint16x8_t a)
11007{
11008 uint16x8_t result;
11009 __asm__ ("rev64 %0.8h,%1.8h"
11010 : "=w"(result)
11011 : "w"(a)
11012 : /* No clobbers */);
11013 return result;
11014}
11015
11016__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11017vrev64q_u32 (uint32x4_t a)
11018{
11019 uint32x4_t result;
11020 __asm__ ("rev64 %0.4s,%1.4s"
11021 : "=w"(result)
11022 : "w"(a)
11023 : /* No clobbers */);
11024 return result;
11025}
11026
11027#define vrshrn_high_n_s16(a, b, c) \
11028 __extension__ \
11029 ({ \
11030 int16x8_t b_ = (b); \
11031 int8x8_t a_ = (a); \
11032 int8x16_t result = vcombine_s8 \
11033 (a_, vcreate_s8 \
11034 (__AARCH64_UINT64_C (0x0))); \
11035 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11036 : "+w"(result) \
11037 : "w"(b_), "i"(c) \
11038 : /* No clobbers */); \
11039 result; \
11040 })
11041
11042#define vrshrn_high_n_s32(a, b, c) \
11043 __extension__ \
11044 ({ \
11045 int32x4_t b_ = (b); \
11046 int16x4_t a_ = (a); \
11047 int16x8_t result = vcombine_s16 \
11048 (a_, vcreate_s16 \
11049 (__AARCH64_UINT64_C (0x0))); \
11050 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11051 : "+w"(result) \
11052 : "w"(b_), "i"(c) \
11053 : /* No clobbers */); \
11054 result; \
11055 })
11056
11057#define vrshrn_high_n_s64(a, b, c) \
11058 __extension__ \
11059 ({ \
11060 int64x2_t b_ = (b); \
11061 int32x2_t a_ = (a); \
11062 int32x4_t result = vcombine_s32 \
11063 (a_, vcreate_s32 \
11064 (__AARCH64_UINT64_C (0x0))); \
11065 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11066 : "+w"(result) \
11067 : "w"(b_), "i"(c) \
11068 : /* No clobbers */); \
11069 result; \
11070 })
11071
11072#define vrshrn_high_n_u16(a, b, c) \
11073 __extension__ \
11074 ({ \
11075 uint16x8_t b_ = (b); \
11076 uint8x8_t a_ = (a); \
11077 uint8x16_t result = vcombine_u8 \
11078 (a_, vcreate_u8 \
11079 (__AARCH64_UINT64_C (0x0))); \
11080 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
11081 : "+w"(result) \
11082 : "w"(b_), "i"(c) \
11083 : /* No clobbers */); \
11084 result; \
11085 })
11086
11087#define vrshrn_high_n_u32(a, b, c) \
11088 __extension__ \
11089 ({ \
11090 uint32x4_t b_ = (b); \
11091 uint16x4_t a_ = (a); \
11092 uint16x8_t result = vcombine_u16 \
11093 (a_, vcreate_u16 \
11094 (__AARCH64_UINT64_C (0x0))); \
11095 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
11096 : "+w"(result) \
11097 : "w"(b_), "i"(c) \
11098 : /* No clobbers */); \
11099 result; \
11100 })
11101
11102#define vrshrn_high_n_u64(a, b, c) \
11103 __extension__ \
11104 ({ \
11105 uint64x2_t b_ = (b); \
11106 uint32x2_t a_ = (a); \
11107 uint32x4_t result = vcombine_u32 \
11108 (a_, vcreate_u32 \
11109 (__AARCH64_UINT64_C (0x0))); \
11110 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
11111 : "+w"(result) \
11112 : "w"(b_), "i"(c) \
11113 : /* No clobbers */); \
11114 result; \
11115 })
11116
11117#define vrshrn_n_s16(a, b) \
11118 __extension__ \
11119 ({ \
11120 int16x8_t a_ = (a); \
11121 int8x8_t result; \
11122 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11123 : "=w"(result) \
11124 : "w"(a_), "i"(b) \
11125 : /* No clobbers */); \
11126 result; \
11127 })
11128
11129#define vrshrn_n_s32(a, b) \
11130 __extension__ \
11131 ({ \
11132 int32x4_t a_ = (a); \
11133 int16x4_t result; \
11134 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11135 : "=w"(result) \
11136 : "w"(a_), "i"(b) \
11137 : /* No clobbers */); \
11138 result; \
11139 })
11140
11141#define vrshrn_n_s64(a, b) \
11142 __extension__ \
11143 ({ \
11144 int64x2_t a_ = (a); \
11145 int32x2_t result; \
11146 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11147 : "=w"(result) \
11148 : "w"(a_), "i"(b) \
11149 : /* No clobbers */); \
11150 result; \
11151 })
11152
11153#define vrshrn_n_u16(a, b) \
11154 __extension__ \
11155 ({ \
11156 uint16x8_t a_ = (a); \
11157 uint8x8_t result; \
11158 __asm__ ("rshrn %0.8b,%1.8h,%2" \
11159 : "=w"(result) \
11160 : "w"(a_), "i"(b) \
11161 : /* No clobbers */); \
11162 result; \
11163 })
11164
11165#define vrshrn_n_u32(a, b) \
11166 __extension__ \
11167 ({ \
11168 uint32x4_t a_ = (a); \
11169 uint16x4_t result; \
11170 __asm__ ("rshrn %0.4h,%1.4s,%2" \
11171 : "=w"(result) \
11172 : "w"(a_), "i"(b) \
11173 : /* No clobbers */); \
11174 result; \
11175 })
11176
11177#define vrshrn_n_u64(a, b) \
11178 __extension__ \
11179 ({ \
11180 uint64x2_t a_ = (a); \
11181 uint32x2_t result; \
11182 __asm__ ("rshrn %0.2s,%1.2d,%2" \
11183 : "=w"(result) \
11184 : "w"(a_), "i"(b) \
11185 : /* No clobbers */); \
11186 result; \
11187 })
11188
11189__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11190vrsqrte_f32 (float32x2_t a)
11191{
11192 float32x2_t result;
11193 __asm__ ("frsqrte %0.2s,%1.2s"
11194 : "=w"(result)
11195 : "w"(a)
11196 : /* No clobbers */);
11197 return result;
11198}
11199
11200__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11201vrsqrte_f64 (float64x1_t a)
11202{
11203 float64x1_t result;
11204 __asm__ ("frsqrte %d0,%d1"
11205 : "=w"(result)
11206 : "w"(a)
11207 : /* No clobbers */);
11208 return result;
11209}
11210
11211__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11212vrsqrte_u32 (uint32x2_t a)
11213{
11214 uint32x2_t result;
11215 __asm__ ("ursqrte %0.2s,%1.2s"
11216 : "=w"(result)
11217 : "w"(a)
11218 : /* No clobbers */);
11219 return result;
11220}
11221
11222__extension__ static __inline float64_t __attribute__ ((__always_inline__))
11223vrsqrted_f64 (float64_t a)
11224{
11225 float64_t result;
11226 __asm__ ("frsqrte %d0,%d1"
11227 : "=w"(result)
11228 : "w"(a)
11229 : /* No clobbers */);
11230 return result;
11231}
11232
11233__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11234vrsqrteq_f32 (float32x4_t a)
11235{
11236 float32x4_t result;
11237 __asm__ ("frsqrte %0.4s,%1.4s"
11238 : "=w"(result)
11239 : "w"(a)
11240 : /* No clobbers */);
11241 return result;
11242}
11243
11244__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11245vrsqrteq_f64 (float64x2_t a)
11246{
11247 float64x2_t result;
11248 __asm__ ("frsqrte %0.2d,%1.2d"
11249 : "=w"(result)
11250 : "w"(a)
11251 : /* No clobbers */);
11252 return result;
11253}
11254
11255__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11256vrsqrteq_u32 (uint32x4_t a)
11257{
11258 uint32x4_t result;
11259 __asm__ ("ursqrte %0.4s,%1.4s"
11260 : "=w"(result)
11261 : "w"(a)
11262 : /* No clobbers */);
11263 return result;
11264}
11265
11266__extension__ static __inline float32_t __attribute__ ((__always_inline__))
11267vrsqrtes_f32 (float32_t a)
11268{
11269 float32_t result;
11270 __asm__ ("frsqrte %s0,%s1"
11271 : "=w"(result)
11272 : "w"(a)
11273 : /* No clobbers */);
11274 return result;
11275}
11276
11277__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11278vrsqrts_f32 (float32x2_t a, float32x2_t b)
11279{
11280 float32x2_t result;
11281 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
11282 : "=w"(result)
11283 : "w"(a), "w"(b)
11284 : /* No clobbers */);
11285 return result;
11286}
11287
11288__extension__ static __inline float64_t __attribute__ ((__always_inline__))
11289vrsqrtsd_f64 (float64_t a, float64_t b)
11290{
11291 float64_t result;
11292 __asm__ ("frsqrts %d0,%d1,%d2"
11293 : "=w"(result)
11294 : "w"(a), "w"(b)
11295 : /* No clobbers */);
11296 return result;
11297}
11298
11299__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11300vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
11301{
11302 float32x4_t result;
11303 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
11304 : "=w"(result)
11305 : "w"(a), "w"(b)
11306 : /* No clobbers */);
11307 return result;
11308}
11309
11310__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11311vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
11312{
11313 float64x2_t result;
11314 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11315 : "=w"(result)
11316 : "w"(a), "w"(b)
11317 : /* No clobbers */);
11318 return result;
11319}
11320
11321__extension__ static __inline float32_t __attribute__ ((__always_inline__))
11322vrsqrtss_f32 (float32_t a, float32_t b)
11323{
11324 float32_t result;
11325 __asm__ ("frsqrts %s0,%s1,%s2"
11326 : "=w"(result)
11327 : "w"(a), "w"(b)
11328 : /* No clobbers */);
11329 return result;
11330}
11331
11332__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11333vrsrtsq_f64 (float64x2_t a, float64x2_t b)
11334{
11335 float64x2_t result;
11336 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
11337 : "=w"(result)
11338 : "w"(a), "w"(b)
11339 : /* No clobbers */);
11340 return result;
11341}
11342
11343__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11344vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11345{
11346 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11347 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11348 : "+w"(result)
11349 : "w"(b), "w"(c)
11350 : /* No clobbers */);
11351 return result;
11352}
11353
11354__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11355vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11356{
11357 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11358 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11359 : "+w"(result)
11360 : "w"(b), "w"(c)
11361 : /* No clobbers */);
11362 return result;
11363}
11364
11365__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11366vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11367{
11368 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11369 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11370 : "+w"(result)
11371 : "w"(b), "w"(c)
11372 : /* No clobbers */);
11373 return result;
11374}
11375
11376__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11377vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11378{
11379 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11380 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
11381 : "+w"(result)
11382 : "w"(b), "w"(c)
11383 : /* No clobbers */);
11384 return result;
11385}
11386
11387__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11388vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11389{
11390 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11391 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
11392 : "+w"(result)
11393 : "w"(b), "w"(c)
11394 : /* No clobbers */);
11395 return result;
11396}
11397
11398__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11399vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11400{
11401 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11402 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
11403 : "+w"(result)
11404 : "w"(b), "w"(c)
11405 : /* No clobbers */);
11406 return result;
11407}
11408
11409__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11410vrsubhn_s16 (int16x8_t a, int16x8_t b)
11411{
11412 int8x8_t result;
11413 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11414 : "=w"(result)
11415 : "w"(a), "w"(b)
11416 : /* No clobbers */);
11417 return result;
11418}
11419
11420__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11421vrsubhn_s32 (int32x4_t a, int32x4_t b)
11422{
11423 int16x4_t result;
11424 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11425 : "=w"(result)
11426 : "w"(a), "w"(b)
11427 : /* No clobbers */);
11428 return result;
11429}
11430
11431__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11432vrsubhn_s64 (int64x2_t a, int64x2_t b)
11433{
11434 int32x2_t result;
11435 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11436 : "=w"(result)
11437 : "w"(a), "w"(b)
11438 : /* No clobbers */);
11439 return result;
11440}
11441
11442__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11443vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
11444{
11445 uint8x8_t result;
11446 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
11447 : "=w"(result)
11448 : "w"(a), "w"(b)
11449 : /* No clobbers */);
11450 return result;
11451}
11452
11453__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11454vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
11455{
11456 uint16x4_t result;
11457 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
11458 : "=w"(result)
11459 : "w"(a), "w"(b)
11460 : /* No clobbers */);
11461 return result;
11462}
11463
11464__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11465vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
11466{
11467 uint32x2_t result;
11468 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
11469 : "=w"(result)
11470 : "w"(a), "w"(b)
11471 : /* No clobbers */);
11472 return result;
11473}
11474
11475#define vset_lane_f32(a, b, c) \
11476 __extension__ \
11477 ({ \
11478 float32x2_t b_ = (b); \
11479 float32_t a_ = (a); \
11480 float32x2_t result; \
11481 __asm__ ("ins %0.s[%3], %w1" \
11482 : "=w"(result) \
11483 : "r"(a_), "0"(b_), "i"(c) \
11484 : /* No clobbers */); \
11485 result; \
11486 })
11487
11488#define vset_lane_f64(a, b, c) \
11489 __extension__ \
11490 ({ \
11491 float64x1_t b_ = (b); \
11492 float64_t a_ = (a); \
11493 float64x1_t result; \
11494 __asm__ ("ins %0.d[%3], %x1" \
11495 : "=w"(result) \
11496 : "r"(a_), "0"(b_), "i"(c) \
11497 : /* No clobbers */); \
11498 result; \
11499 })
11500
11501#define vset_lane_p8(a, b, c) \
11502 __extension__ \
11503 ({ \
11504 poly8x8_t b_ = (b); \
11505 poly8_t a_ = (a); \
11506 poly8x8_t result; \
11507 __asm__ ("ins %0.b[%3], %w1" \
11508 : "=w"(result) \
11509 : "r"(a_), "0"(b_), "i"(c) \
11510 : /* No clobbers */); \
11511 result; \
11512 })
11513
11514#define vset_lane_p16(a, b, c) \
11515 __extension__ \
11516 ({ \
11517 poly16x4_t b_ = (b); \
11518 poly16_t a_ = (a); \
11519 poly16x4_t result; \
11520 __asm__ ("ins %0.h[%3], %w1" \
11521 : "=w"(result) \
11522 : "r"(a_), "0"(b_), "i"(c) \
11523 : /* No clobbers */); \
11524 result; \
11525 })
11526
11527#define vset_lane_s8(a, b, c) \
11528 __extension__ \
11529 ({ \
11530 int8x8_t b_ = (b); \
11531 int8_t a_ = (a); \
11532 int8x8_t result; \
11533 __asm__ ("ins %0.b[%3], %w1" \
11534 : "=w"(result) \
11535 : "r"(a_), "0"(b_), "i"(c) \
11536 : /* No clobbers */); \
11537 result; \
11538 })
11539
11540#define vset_lane_s16(a, b, c) \
11541 __extension__ \
11542 ({ \
11543 int16x4_t b_ = (b); \
11544 int16_t a_ = (a); \
11545 int16x4_t result; \
11546 __asm__ ("ins %0.h[%3], %w1" \
11547 : "=w"(result) \
11548 : "r"(a_), "0"(b_), "i"(c) \
11549 : /* No clobbers */); \
11550 result; \
11551 })
11552
11553#define vset_lane_s32(a, b, c) \
11554 __extension__ \
11555 ({ \
11556 int32x2_t b_ = (b); \
11557 int32_t a_ = (a); \
11558 int32x2_t result; \
11559 __asm__ ("ins %0.s[%3], %w1" \
11560 : "=w"(result) \
11561 : "r"(a_), "0"(b_), "i"(c) \
11562 : /* No clobbers */); \
11563 result; \
11564 })
11565
11566#define vset_lane_s64(a, b, c) \
11567 __extension__ \
11568 ({ \
11569 int64x1_t b_ = (b); \
11570 int64_t a_ = (a); \
11571 int64x1_t result; \
11572 __asm__ ("ins %0.d[%3], %x1" \
11573 : "=w"(result) \
11574 : "r"(a_), "0"(b_), "i"(c) \
11575 : /* No clobbers */); \
11576 result; \
11577 })
11578
11579#define vset_lane_u8(a, b, c) \
11580 __extension__ \
11581 ({ \
11582 uint8x8_t b_ = (b); \
11583 uint8_t a_ = (a); \
11584 uint8x8_t result; \
11585 __asm__ ("ins %0.b[%3], %w1" \
11586 : "=w"(result) \
11587 : "r"(a_), "0"(b_), "i"(c) \
11588 : /* No clobbers */); \
11589 result; \
11590 })
11591
11592#define vset_lane_u16(a, b, c) \
11593 __extension__ \
11594 ({ \
11595 uint16x4_t b_ = (b); \
11596 uint16_t a_ = (a); \
11597 uint16x4_t result; \
11598 __asm__ ("ins %0.h[%3], %w1" \
11599 : "=w"(result) \
11600 : "r"(a_), "0"(b_), "i"(c) \
11601 : /* No clobbers */); \
11602 result; \
11603 })
11604
11605#define vset_lane_u32(a, b, c) \
11606 __extension__ \
11607 ({ \
11608 uint32x2_t b_ = (b); \
11609 uint32_t a_ = (a); \
11610 uint32x2_t result; \
11611 __asm__ ("ins %0.s[%3], %w1" \
11612 : "=w"(result) \
11613 : "r"(a_), "0"(b_), "i"(c) \
11614 : /* No clobbers */); \
11615 result; \
11616 })
11617
11618#define vset_lane_u64(a, b, c) \
11619 __extension__ \
11620 ({ \
11621 uint64x1_t b_ = (b); \
11622 uint64_t a_ = (a); \
11623 uint64x1_t result; \
11624 __asm__ ("ins %0.d[%3], %x1" \
11625 : "=w"(result) \
11626 : "r"(a_), "0"(b_), "i"(c) \
11627 : /* No clobbers */); \
11628 result; \
11629 })
11630
11631#define vsetq_lane_f32(a, b, c) \
11632 __extension__ \
11633 ({ \
11634 float32x4_t b_ = (b); \
11635 float32_t a_ = (a); \
11636 float32x4_t result; \
11637 __asm__ ("ins %0.s[%3], %w1" \
11638 : "=w"(result) \
11639 : "r"(a_), "0"(b_), "i"(c) \
11640 : /* No clobbers */); \
11641 result; \
11642 })
11643
11644#define vsetq_lane_f64(a, b, c) \
11645 __extension__ \
11646 ({ \
11647 float64x2_t b_ = (b); \
11648 float64_t a_ = (a); \
11649 float64x2_t result; \
11650 __asm__ ("ins %0.d[%3], %x1" \
11651 : "=w"(result) \
11652 : "r"(a_), "0"(b_), "i"(c) \
11653 : /* No clobbers */); \
11654 result; \
11655 })
11656
11657#define vsetq_lane_p8(a, b, c) \
11658 __extension__ \
11659 ({ \
11660 poly8x16_t b_ = (b); \
11661 poly8_t a_ = (a); \
11662 poly8x16_t result; \
11663 __asm__ ("ins %0.b[%3], %w1" \
11664 : "=w"(result) \
11665 : "r"(a_), "0"(b_), "i"(c) \
11666 : /* No clobbers */); \
11667 result; \
11668 })
11669
11670#define vsetq_lane_p16(a, b, c) \
11671 __extension__ \
11672 ({ \
11673 poly16x8_t b_ = (b); \
11674 poly16_t a_ = (a); \
11675 poly16x8_t result; \
11676 __asm__ ("ins %0.h[%3], %w1" \
11677 : "=w"(result) \
11678 : "r"(a_), "0"(b_), "i"(c) \
11679 : /* No clobbers */); \
11680 result; \
11681 })
11682
11683#define vsetq_lane_s8(a, b, c) \
11684 __extension__ \
11685 ({ \
11686 int8x16_t b_ = (b); \
11687 int8_t a_ = (a); \
11688 int8x16_t result; \
11689 __asm__ ("ins %0.b[%3], %w1" \
11690 : "=w"(result) \
11691 : "r"(a_), "0"(b_), "i"(c) \
11692 : /* No clobbers */); \
11693 result; \
11694 })
11695
11696#define vsetq_lane_s16(a, b, c) \
11697 __extension__ \
11698 ({ \
11699 int16x8_t b_ = (b); \
11700 int16_t a_ = (a); \
11701 int16x8_t result; \
11702 __asm__ ("ins %0.h[%3], %w1" \
11703 : "=w"(result) \
11704 : "r"(a_), "0"(b_), "i"(c) \
11705 : /* No clobbers */); \
11706 result; \
11707 })
11708
11709#define vsetq_lane_s32(a, b, c) \
11710 __extension__ \
11711 ({ \
11712 int32x4_t b_ = (b); \
11713 int32_t a_ = (a); \
11714 int32x4_t result; \
11715 __asm__ ("ins %0.s[%3], %w1" \
11716 : "=w"(result) \
11717 : "r"(a_), "0"(b_), "i"(c) \
11718 : /* No clobbers */); \
11719 result; \
11720 })
11721
11722#define vsetq_lane_s64(a, b, c) \
11723 __extension__ \
11724 ({ \
11725 int64x2_t b_ = (b); \
11726 int64_t a_ = (a); \
11727 int64x2_t result; \
11728 __asm__ ("ins %0.d[%3], %x1" \
11729 : "=w"(result) \
11730 : "r"(a_), "0"(b_), "i"(c) \
11731 : /* No clobbers */); \
11732 result; \
11733 })
11734
11735#define vsetq_lane_u8(a, b, c) \
11736 __extension__ \
11737 ({ \
11738 uint8x16_t b_ = (b); \
11739 uint8_t a_ = (a); \
11740 uint8x16_t result; \
11741 __asm__ ("ins %0.b[%3], %w1" \
11742 : "=w"(result) \
11743 : "r"(a_), "0"(b_), "i"(c) \
11744 : /* No clobbers */); \
11745 result; \
11746 })
11747
11748#define vsetq_lane_u16(a, b, c) \
11749 __extension__ \
11750 ({ \
11751 uint16x8_t b_ = (b); \
11752 uint16_t a_ = (a); \
11753 uint16x8_t result; \
11754 __asm__ ("ins %0.h[%3], %w1" \
11755 : "=w"(result) \
11756 : "r"(a_), "0"(b_), "i"(c) \
11757 : /* No clobbers */); \
11758 result; \
11759 })
11760
11761#define vsetq_lane_u32(a, b, c) \
11762 __extension__ \
11763 ({ \
11764 uint32x4_t b_ = (b); \
11765 uint32_t a_ = (a); \
11766 uint32x4_t result; \
11767 __asm__ ("ins %0.s[%3], %w1" \
11768 : "=w"(result) \
11769 : "r"(a_), "0"(b_), "i"(c) \
11770 : /* No clobbers */); \
11771 result; \
11772 })
11773
11774#define vsetq_lane_u64(a, b, c) \
11775 __extension__ \
11776 ({ \
11777 uint64x2_t b_ = (b); \
11778 uint64_t a_ = (a); \
11779 uint64x2_t result; \
11780 __asm__ ("ins %0.d[%3], %x1" \
11781 : "=w"(result) \
11782 : "r"(a_), "0"(b_), "i"(c) \
11783 : /* No clobbers */); \
11784 result; \
11785 })
11786
11787#define vshrn_high_n_s16(a, b, c) \
11788 __extension__ \
11789 ({ \
11790 int16x8_t b_ = (b); \
11791 int8x8_t a_ = (a); \
11792 int8x16_t result = vcombine_s8 \
11793 (a_, vcreate_s8 \
11794 (__AARCH64_UINT64_C (0x0))); \
11795 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11796 : "+w"(result) \
11797 : "w"(b_), "i"(c) \
11798 : /* No clobbers */); \
11799 result; \
11800 })
11801
11802#define vshrn_high_n_s32(a, b, c) \
11803 __extension__ \
11804 ({ \
11805 int32x4_t b_ = (b); \
11806 int16x4_t a_ = (a); \
11807 int16x8_t result = vcombine_s16 \
11808 (a_, vcreate_s16 \
11809 (__AARCH64_UINT64_C (0x0))); \
11810 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11811 : "+w"(result) \
11812 : "w"(b_), "i"(c) \
11813 : /* No clobbers */); \
11814 result; \
11815 })
11816
11817#define vshrn_high_n_s64(a, b, c) \
11818 __extension__ \
11819 ({ \
11820 int64x2_t b_ = (b); \
11821 int32x2_t a_ = (a); \
11822 int32x4_t result = vcombine_s32 \
11823 (a_, vcreate_s32 \
11824 (__AARCH64_UINT64_C (0x0))); \
11825 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11826 : "+w"(result) \
11827 : "w"(b_), "i"(c) \
11828 : /* No clobbers */); \
11829 result; \
11830 })
11831
11832#define vshrn_high_n_u16(a, b, c) \
11833 __extension__ \
11834 ({ \
11835 uint16x8_t b_ = (b); \
11836 uint8x8_t a_ = (a); \
11837 uint8x16_t result = vcombine_u8 \
11838 (a_, vcreate_u8 \
11839 (__AARCH64_UINT64_C (0x0))); \
11840 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11841 : "+w"(result) \
11842 : "w"(b_), "i"(c) \
11843 : /* No clobbers */); \
11844 result; \
11845 })
11846
11847#define vshrn_high_n_u32(a, b, c) \
11848 __extension__ \
11849 ({ \
11850 uint32x4_t b_ = (b); \
11851 uint16x4_t a_ = (a); \
11852 uint16x8_t result = vcombine_u16 \
11853 (a_, vcreate_u16 \
11854 (__AARCH64_UINT64_C (0x0))); \
11855 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11856 : "+w"(result) \
11857 : "w"(b_), "i"(c) \
11858 : /* No clobbers */); \
11859 result; \
11860 })
11861
11862#define vshrn_high_n_u64(a, b, c) \
11863 __extension__ \
11864 ({ \
11865 uint64x2_t b_ = (b); \
11866 uint32x2_t a_ = (a); \
11867 uint32x4_t result = vcombine_u32 \
11868 (a_, vcreate_u32 \
11869 (__AARCH64_UINT64_C (0x0))); \
11870 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11871 : "+w"(result) \
11872 : "w"(b_), "i"(c) \
11873 : /* No clobbers */); \
11874 result; \
11875 })
11876
11877#define vshrn_n_s16(a, b) \
11878 __extension__ \
11879 ({ \
11880 int16x8_t a_ = (a); \
11881 int8x8_t result; \
11882 __asm__ ("shrn %0.8b,%1.8h,%2" \
11883 : "=w"(result) \
11884 : "w"(a_), "i"(b) \
11885 : /* No clobbers */); \
11886 result; \
11887 })
11888
11889#define vshrn_n_s32(a, b) \
11890 __extension__ \
11891 ({ \
11892 int32x4_t a_ = (a); \
11893 int16x4_t result; \
11894 __asm__ ("shrn %0.4h,%1.4s,%2" \
11895 : "=w"(result) \
11896 : "w"(a_), "i"(b) \
11897 : /* No clobbers */); \
11898 result; \
11899 })
11900
11901#define vshrn_n_s64(a, b) \
11902 __extension__ \
11903 ({ \
11904 int64x2_t a_ = (a); \
11905 int32x2_t result; \
11906 __asm__ ("shrn %0.2s,%1.2d,%2" \
11907 : "=w"(result) \
11908 : "w"(a_), "i"(b) \
11909 : /* No clobbers */); \
11910 result; \
11911 })
11912
11913#define vshrn_n_u16(a, b) \
11914 __extension__ \
11915 ({ \
11916 uint16x8_t a_ = (a); \
11917 uint8x8_t result; \
11918 __asm__ ("shrn %0.8b,%1.8h,%2" \
11919 : "=w"(result) \
11920 : "w"(a_), "i"(b) \
11921 : /* No clobbers */); \
11922 result; \
11923 })
11924
11925#define vshrn_n_u32(a, b) \
11926 __extension__ \
11927 ({ \
11928 uint32x4_t a_ = (a); \
11929 uint16x4_t result; \
11930 __asm__ ("shrn %0.4h,%1.4s,%2" \
11931 : "=w"(result) \
11932 : "w"(a_), "i"(b) \
11933 : /* No clobbers */); \
11934 result; \
11935 })
11936
11937#define vshrn_n_u64(a, b) \
11938 __extension__ \
11939 ({ \
11940 uint64x2_t a_ = (a); \
11941 uint32x2_t result; \
11942 __asm__ ("shrn %0.2s,%1.2d,%2" \
11943 : "=w"(result) \
11944 : "w"(a_), "i"(b) \
11945 : /* No clobbers */); \
11946 result; \
11947 })
11948
11949#define vsli_n_p8(a, b, c) \
11950 __extension__ \
11951 ({ \
11952 poly8x8_t b_ = (b); \
11953 poly8x8_t a_ = (a); \
11954 poly8x8_t result; \
11955 __asm__ ("sli %0.8b,%2.8b,%3" \
11956 : "=w"(result) \
11957 : "0"(a_), "w"(b_), "i"(c) \
11958 : /* No clobbers */); \
11959 result; \
11960 })
11961
11962#define vsli_n_p16(a, b, c) \
11963 __extension__ \
11964 ({ \
11965 poly16x4_t b_ = (b); \
11966 poly16x4_t a_ = (a); \
11967 poly16x4_t result; \
11968 __asm__ ("sli %0.4h,%2.4h,%3" \
11969 : "=w"(result) \
11970 : "0"(a_), "w"(b_), "i"(c) \
11971 : /* No clobbers */); \
11972 result; \
11973 })
11974
11975#define vsliq_n_p8(a, b, c) \
11976 __extension__ \
11977 ({ \
11978 poly8x16_t b_ = (b); \
11979 poly8x16_t a_ = (a); \
11980 poly8x16_t result; \
11981 __asm__ ("sli %0.16b,%2.16b,%3" \
11982 : "=w"(result) \
11983 : "0"(a_), "w"(b_), "i"(c) \
11984 : /* No clobbers */); \
11985 result; \
11986 })
11987
11988#define vsliq_n_p16(a, b, c) \
11989 __extension__ \
11990 ({ \
11991 poly16x8_t b_ = (b); \
11992 poly16x8_t a_ = (a); \
11993 poly16x8_t result; \
11994 __asm__ ("sli %0.8h,%2.8h,%3" \
11995 : "=w"(result) \
11996 : "0"(a_), "w"(b_), "i"(c) \
11997 : /* No clobbers */); \
11998 result; \
11999 })
12000
12001#define vsri_n_p8(a, b, c) \
12002 __extension__ \
12003 ({ \
12004 poly8x8_t b_ = (b); \
12005 poly8x8_t a_ = (a); \
12006 poly8x8_t result; \
12007 __asm__ ("sri %0.8b,%2.8b,%3" \
12008 : "=w"(result) \
12009 : "0"(a_), "w"(b_), "i"(c) \
12010 : /* No clobbers */); \
12011 result; \
12012 })
12013
12014#define vsri_n_p16(a, b, c) \
12015 __extension__ \
12016 ({ \
12017 poly16x4_t b_ = (b); \
12018 poly16x4_t a_ = (a); \
12019 poly16x4_t result; \
12020 __asm__ ("sri %0.4h,%2.4h,%3" \
12021 : "=w"(result) \
12022 : "0"(a_), "w"(b_), "i"(c) \
12023 : /* No clobbers */); \
12024 result; \
12025 })
12026
12027#define vsriq_n_p8(a, b, c) \
12028 __extension__ \
12029 ({ \
12030 poly8x16_t b_ = (b); \
12031 poly8x16_t a_ = (a); \
12032 poly8x16_t result; \
12033 __asm__ ("sri %0.16b,%2.16b,%3" \
12034 : "=w"(result) \
12035 : "0"(a_), "w"(b_), "i"(c) \
12036 : /* No clobbers */); \
12037 result; \
12038 })
12039
12040#define vsriq_n_p16(a, b, c) \
12041 __extension__ \
12042 ({ \
12043 poly16x8_t b_ = (b); \
12044 poly16x8_t a_ = (a); \
12045 poly16x8_t result; \
12046 __asm__ ("sri %0.8h,%2.8h,%3" \
12047 : "=w"(result) \
12048 : "0"(a_), "w"(b_), "i"(c) \
12049 : /* No clobbers */); \
12050 result; \
12051 })
12052
12053#define vst1_lane_f32(a, b, c) \
12054 __extension__ \
12055 ({ \
12056 float32x2_t b_ = (b); \
12057 float32_t * a_ = (a); \
12058 __asm__ ("st1 {%1.s}[%2],[%0]" \
12059 : \
12060 : "r"(a_), "w"(b_), "i"(c) \
12061 : "memory"); \
12062 })
12063
12064#define vst1_lane_f64(a, b, c) \
12065 __extension__ \
12066 ({ \
12067 float64x1_t b_ = (b); \
12068 float64_t * a_ = (a); \
12069 __asm__ ("st1 {%1.d}[%2],[%0]" \
12070 : \
12071 : "r"(a_), "w"(b_), "i"(c) \
12072 : "memory"); \
12073 })
12074
12075#define vst1_lane_p8(a, b, c) \
12076 __extension__ \
12077 ({ \
12078 poly8x8_t b_ = (b); \
12079 poly8_t * a_ = (a); \
12080 __asm__ ("st1 {%1.b}[%2],[%0]" \
12081 : \
12082 : "r"(a_), "w"(b_), "i"(c) \
12083 : "memory"); \
12084 })
12085
12086#define vst1_lane_p16(a, b, c) \
12087 __extension__ \
12088 ({ \
12089 poly16x4_t b_ = (b); \
12090 poly16_t * a_ = (a); \
12091 __asm__ ("st1 {%1.h}[%2],[%0]" \
12092 : \
12093 : "r"(a_), "w"(b_), "i"(c) \
12094 : "memory"); \
12095 })
12096
12097#define vst1_lane_s8(a, b, c) \
12098 __extension__ \
12099 ({ \
12100 int8x8_t b_ = (b); \
12101 int8_t * a_ = (a); \
12102 __asm__ ("st1 {%1.b}[%2],[%0]" \
12103 : \
12104 : "r"(a_), "w"(b_), "i"(c) \
12105 : "memory"); \
12106 })
12107
12108#define vst1_lane_s16(a, b, c) \
12109 __extension__ \
12110 ({ \
12111 int16x4_t b_ = (b); \
12112 int16_t * a_ = (a); \
12113 __asm__ ("st1 {%1.h}[%2],[%0]" \
12114 : \
12115 : "r"(a_), "w"(b_), "i"(c) \
12116 : "memory"); \
12117 })
12118
12119#define vst1_lane_s32(a, b, c) \
12120 __extension__ \
12121 ({ \
12122 int32x2_t b_ = (b); \
12123 int32_t * a_ = (a); \
12124 __asm__ ("st1 {%1.s}[%2],[%0]" \
12125 : \
12126 : "r"(a_), "w"(b_), "i"(c) \
12127 : "memory"); \
12128 })
12129
12130#define vst1_lane_s64(a, b, c) \
12131 __extension__ \
12132 ({ \
12133 int64x1_t b_ = (b); \
12134 int64_t * a_ = (a); \
12135 __asm__ ("st1 {%1.d}[%2],[%0]" \
12136 : \
12137 : "r"(a_), "w"(b_), "i"(c) \
12138 : "memory"); \
12139 })
12140
12141#define vst1_lane_u8(a, b, c) \
12142 __extension__ \
12143 ({ \
12144 uint8x8_t b_ = (b); \
12145 uint8_t * a_ = (a); \
12146 __asm__ ("st1 {%1.b}[%2],[%0]" \
12147 : \
12148 : "r"(a_), "w"(b_), "i"(c) \
12149 : "memory"); \
12150 })
12151
12152#define vst1_lane_u16(a, b, c) \
12153 __extension__ \
12154 ({ \
12155 uint16x4_t b_ = (b); \
12156 uint16_t * a_ = (a); \
12157 __asm__ ("st1 {%1.h}[%2],[%0]" \
12158 : \
12159 : "r"(a_), "w"(b_), "i"(c) \
12160 : "memory"); \
12161 })
12162
12163#define vst1_lane_u32(a, b, c) \
12164 __extension__ \
12165 ({ \
12166 uint32x2_t b_ = (b); \
12167 uint32_t * a_ = (a); \
12168 __asm__ ("st1 {%1.s}[%2],[%0]" \
12169 : \
12170 : "r"(a_), "w"(b_), "i"(c) \
12171 : "memory"); \
12172 })
12173
12174#define vst1_lane_u64(a, b, c) \
12175 __extension__ \
12176 ({ \
12177 uint64x1_t b_ = (b); \
12178 uint64_t * a_ = (a); \
12179 __asm__ ("st1 {%1.d}[%2],[%0]" \
12180 : \
12181 : "r"(a_), "w"(b_), "i"(c) \
12182 : "memory"); \
12183 })
12184
12185
12186#define vst1q_lane_f32(a, b, c) \
12187 __extension__ \
12188 ({ \
12189 float32x4_t b_ = (b); \
12190 float32_t * a_ = (a); \
12191 __asm__ ("st1 {%1.s}[%2],[%0]" \
12192 : \
12193 : "r"(a_), "w"(b_), "i"(c) \
12194 : "memory"); \
12195 })
12196
12197#define vst1q_lane_f64(a, b, c) \
12198 __extension__ \
12199 ({ \
12200 float64x2_t b_ = (b); \
12201 float64_t * a_ = (a); \
12202 __asm__ ("st1 {%1.d}[%2],[%0]" \
12203 : \
12204 : "r"(a_), "w"(b_), "i"(c) \
12205 : "memory"); \
12206 })
12207
12208#define vst1q_lane_p8(a, b, c) \
12209 __extension__ \
12210 ({ \
12211 poly8x16_t b_ = (b); \
12212 poly8_t * a_ = (a); \
12213 __asm__ ("st1 {%1.b}[%2],[%0]" \
12214 : \
12215 : "r"(a_), "w"(b_), "i"(c) \
12216 : "memory"); \
12217 })
12218
12219#define vst1q_lane_p16(a, b, c) \
12220 __extension__ \
12221 ({ \
12222 poly16x8_t b_ = (b); \
12223 poly16_t * a_ = (a); \
12224 __asm__ ("st1 {%1.h}[%2],[%0]" \
12225 : \
12226 : "r"(a_), "w"(b_), "i"(c) \
12227 : "memory"); \
12228 })
12229
12230#define vst1q_lane_s8(a, b, c) \
12231 __extension__ \
12232 ({ \
12233 int8x16_t b_ = (b); \
12234 int8_t * a_ = (a); \
12235 __asm__ ("st1 {%1.b}[%2],[%0]" \
12236 : \
12237 : "r"(a_), "w"(b_), "i"(c) \
12238 : "memory"); \
12239 })
12240
12241#define vst1q_lane_s16(a, b, c) \
12242 __extension__ \
12243 ({ \
12244 int16x8_t b_ = (b); \
12245 int16_t * a_ = (a); \
12246 __asm__ ("st1 {%1.h}[%2],[%0]" \
12247 : \
12248 : "r"(a_), "w"(b_), "i"(c) \
12249 : "memory"); \
12250 })
12251
12252#define vst1q_lane_s32(a, b, c) \
12253 __extension__ \
12254 ({ \
12255 int32x4_t b_ = (b); \
12256 int32_t * a_ = (a); \
12257 __asm__ ("st1 {%1.s}[%2],[%0]" \
12258 : \
12259 : "r"(a_), "w"(b_), "i"(c) \
12260 : "memory"); \
12261 })
12262
12263#define vst1q_lane_s64(a, b, c) \
12264 __extension__ \
12265 ({ \
12266 int64x2_t b_ = (b); \
12267 int64_t * a_ = (a); \
12268 __asm__ ("st1 {%1.d}[%2],[%0]" \
12269 : \
12270 : "r"(a_), "w"(b_), "i"(c) \
12271 : "memory"); \
12272 })
12273
12274#define vst1q_lane_u8(a, b, c) \
12275 __extension__ \
12276 ({ \
12277 uint8x16_t b_ = (b); \
12278 uint8_t * a_ = (a); \
12279 __asm__ ("st1 {%1.b}[%2],[%0]" \
12280 : \
12281 : "r"(a_), "w"(b_), "i"(c) \
12282 : "memory"); \
12283 })
12284
12285#define vst1q_lane_u16(a, b, c) \
12286 __extension__ \
12287 ({ \
12288 uint16x8_t b_ = (b); \
12289 uint16_t * a_ = (a); \
12290 __asm__ ("st1 {%1.h}[%2],[%0]" \
12291 : \
12292 : "r"(a_), "w"(b_), "i"(c) \
12293 : "memory"); \
12294 })
12295
12296#define vst1q_lane_u32(a, b, c) \
12297 __extension__ \
12298 ({ \
12299 uint32x4_t b_ = (b); \
12300 uint32_t * a_ = (a); \
12301 __asm__ ("st1 {%1.s}[%2],[%0]" \
12302 : \
12303 : "r"(a_), "w"(b_), "i"(c) \
12304 : "memory"); \
12305 })
12306
12307#define vst1q_lane_u64(a, b, c) \
12308 __extension__ \
12309 ({ \
12310 uint64x2_t b_ = (b); \
12311 uint64_t * a_ = (a); \
12312 __asm__ ("st1 {%1.d}[%2],[%0]" \
12313 : \
12314 : "r"(a_), "w"(b_), "i"(c) \
12315 : "memory"); \
12316 })
12317
12318__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12319vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
12320{
12321 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
12322 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12323 : "+w"(result)
12324 : "w"(b), "w"(c)
12325 : /* No clobbers */);
12326 return result;
12327}
12328
12329__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12330vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
12331{
12332 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
12333 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12334 : "+w"(result)
12335 : "w"(b), "w"(c)
12336 : /* No clobbers */);
12337 return result;
12338}
12339
12340__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12341vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
12342{
12343 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
12344 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12345 : "+w"(result)
12346 : "w"(b), "w"(c)
12347 : /* No clobbers */);
12348 return result;
12349}
12350
12351__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12352vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
12353{
12354 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
12355 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
12356 : "+w"(result)
12357 : "w"(b), "w"(c)
12358 : /* No clobbers */);
12359 return result;
12360}
12361
12362__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12363vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
12364{
12365 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
12366 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
12367 : "+w"(result)
12368 : "w"(b), "w"(c)
12369 : /* No clobbers */);
12370 return result;
12371}
12372
12373__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12374vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
12375{
12376 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
12377 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
12378 : "+w"(result)
12379 : "w"(b), "w"(c)
12380 : /* No clobbers */);
12381 return result;
12382}
12383
12384__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12385vsubhn_s16 (int16x8_t a, int16x8_t b)
12386{
12387 int8x8_t result;
12388 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12389 : "=w"(result)
12390 : "w"(a), "w"(b)
12391 : /* No clobbers */);
12392 return result;
12393}
12394
12395__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12396vsubhn_s32 (int32x4_t a, int32x4_t b)
12397{
12398 int16x4_t result;
12399 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12400 : "=w"(result)
12401 : "w"(a), "w"(b)
12402 : /* No clobbers */);
12403 return result;
12404}
12405
12406__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12407vsubhn_s64 (int64x2_t a, int64x2_t b)
12408{
12409 int32x2_t result;
12410 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12411 : "=w"(result)
12412 : "w"(a), "w"(b)
12413 : /* No clobbers */);
12414 return result;
12415}
12416
12417__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12418vsubhn_u16 (uint16x8_t a, uint16x8_t b)
12419{
12420 uint8x8_t result;
12421 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
12422 : "=w"(result)
12423 : "w"(a), "w"(b)
12424 : /* No clobbers */);
12425 return result;
12426}
12427
12428__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12429vsubhn_u32 (uint32x4_t a, uint32x4_t b)
12430{
12431 uint16x4_t result;
12432 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
12433 : "=w"(result)
12434 : "w"(a), "w"(b)
12435 : /* No clobbers */);
12436 return result;
12437}
12438
12439__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12440vsubhn_u64 (uint64x2_t a, uint64x2_t b)
12441{
12442 uint32x2_t result;
12443 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
12444 : "=w"(result)
12445 : "w"(a), "w"(b)
12446 : /* No clobbers */);
12447 return result;
12448}
12449
12450__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12451vtrn1_f32 (float32x2_t a, float32x2_t b)
12452{
12453 float32x2_t result;
12454 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12455 : "=w"(result)
12456 : "w"(a), "w"(b)
12457 : /* No clobbers */);
12458 return result;
12459}
12460
12461__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12462vtrn1_p8 (poly8x8_t a, poly8x8_t b)
12463{
12464 poly8x8_t result;
12465 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12466 : "=w"(result)
12467 : "w"(a), "w"(b)
12468 : /* No clobbers */);
12469 return result;
12470}
12471
12472__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12473vtrn1_p16 (poly16x4_t a, poly16x4_t b)
12474{
12475 poly16x4_t result;
12476 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12477 : "=w"(result)
12478 : "w"(a), "w"(b)
12479 : /* No clobbers */);
12480 return result;
12481}
12482
12483__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12484vtrn1_s8 (int8x8_t a, int8x8_t b)
12485{
12486 int8x8_t result;
12487 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12488 : "=w"(result)
12489 : "w"(a), "w"(b)
12490 : /* No clobbers */);
12491 return result;
12492}
12493
12494__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12495vtrn1_s16 (int16x4_t a, int16x4_t b)
12496{
12497 int16x4_t result;
12498 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12499 : "=w"(result)
12500 : "w"(a), "w"(b)
12501 : /* No clobbers */);
12502 return result;
12503}
12504
12505__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12506vtrn1_s32 (int32x2_t a, int32x2_t b)
12507{
12508 int32x2_t result;
12509 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12510 : "=w"(result)
12511 : "w"(a), "w"(b)
12512 : /* No clobbers */);
12513 return result;
12514}
12515
12516__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12517vtrn1_u8 (uint8x8_t a, uint8x8_t b)
12518{
12519 uint8x8_t result;
12520 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
12521 : "=w"(result)
12522 : "w"(a), "w"(b)
12523 : /* No clobbers */);
12524 return result;
12525}
12526
12527__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12528vtrn1_u16 (uint16x4_t a, uint16x4_t b)
12529{
12530 uint16x4_t result;
12531 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
12532 : "=w"(result)
12533 : "w"(a), "w"(b)
12534 : /* No clobbers */);
12535 return result;
12536}
12537
12538__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12539vtrn1_u32 (uint32x2_t a, uint32x2_t b)
12540{
12541 uint32x2_t result;
12542 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
12543 : "=w"(result)
12544 : "w"(a), "w"(b)
12545 : /* No clobbers */);
12546 return result;
12547}
12548
12549__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12550vtrn1q_f32 (float32x4_t a, float32x4_t b)
12551{
12552 float32x4_t result;
12553 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12554 : "=w"(result)
12555 : "w"(a), "w"(b)
12556 : /* No clobbers */);
12557 return result;
12558}
12559
12560__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12561vtrn1q_f64 (float64x2_t a, float64x2_t b)
12562{
12563 float64x2_t result;
12564 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12565 : "=w"(result)
12566 : "w"(a), "w"(b)
12567 : /* No clobbers */);
12568 return result;
12569}
12570
12571__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12572vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
12573{
12574 poly8x16_t result;
12575 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12576 : "=w"(result)
12577 : "w"(a), "w"(b)
12578 : /* No clobbers */);
12579 return result;
12580}
12581
12582__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12583vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
12584{
12585 poly16x8_t result;
12586 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12587 : "=w"(result)
12588 : "w"(a), "w"(b)
12589 : /* No clobbers */);
12590 return result;
12591}
12592
12593__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12594vtrn1q_s8 (int8x16_t a, int8x16_t b)
12595{
12596 int8x16_t result;
12597 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12598 : "=w"(result)
12599 : "w"(a), "w"(b)
12600 : /* No clobbers */);
12601 return result;
12602}
12603
12604__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12605vtrn1q_s16 (int16x8_t a, int16x8_t b)
12606{
12607 int16x8_t result;
12608 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12609 : "=w"(result)
12610 : "w"(a), "w"(b)
12611 : /* No clobbers */);
12612 return result;
12613}
12614
12615__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12616vtrn1q_s32 (int32x4_t a, int32x4_t b)
12617{
12618 int32x4_t result;
12619 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12620 : "=w"(result)
12621 : "w"(a), "w"(b)
12622 : /* No clobbers */);
12623 return result;
12624}
12625
12626__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12627vtrn1q_s64 (int64x2_t a, int64x2_t b)
12628{
12629 int64x2_t result;
12630 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12631 : "=w"(result)
12632 : "w"(a), "w"(b)
12633 : /* No clobbers */);
12634 return result;
12635}
12636
12637__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12638vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
12639{
12640 uint8x16_t result;
12641 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
12642 : "=w"(result)
12643 : "w"(a), "w"(b)
12644 : /* No clobbers */);
12645 return result;
12646}
12647
12648__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12649vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
12650{
12651 uint16x8_t result;
12652 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
12653 : "=w"(result)
12654 : "w"(a), "w"(b)
12655 : /* No clobbers */);
12656 return result;
12657}
12658
12659__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12660vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
12661{
12662 uint32x4_t result;
12663 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
12664 : "=w"(result)
12665 : "w"(a), "w"(b)
12666 : /* No clobbers */);
12667 return result;
12668}
12669
12670__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12671vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
12672{
12673 uint64x2_t result;
12674 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
12675 : "=w"(result)
12676 : "w"(a), "w"(b)
12677 : /* No clobbers */);
12678 return result;
12679}
12680
12681__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12682vtrn2_f32 (float32x2_t a, float32x2_t b)
12683{
12684 float32x2_t result;
12685 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12686 : "=w"(result)
12687 : "w"(a), "w"(b)
12688 : /* No clobbers */);
12689 return result;
12690}
12691
12692__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12693vtrn2_p8 (poly8x8_t a, poly8x8_t b)
12694{
12695 poly8x8_t result;
12696 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12697 : "=w"(result)
12698 : "w"(a), "w"(b)
12699 : /* No clobbers */);
12700 return result;
12701}
12702
12703__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12704vtrn2_p16 (poly16x4_t a, poly16x4_t b)
12705{
12706 poly16x4_t result;
12707 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12708 : "=w"(result)
12709 : "w"(a), "w"(b)
12710 : /* No clobbers */);
12711 return result;
12712}
12713
12714__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12715vtrn2_s8 (int8x8_t a, int8x8_t b)
12716{
12717 int8x8_t result;
12718 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12719 : "=w"(result)
12720 : "w"(a), "w"(b)
12721 : /* No clobbers */);
12722 return result;
12723}
12724
12725__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12726vtrn2_s16 (int16x4_t a, int16x4_t b)
12727{
12728 int16x4_t result;
12729 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12730 : "=w"(result)
12731 : "w"(a), "w"(b)
12732 : /* No clobbers */);
12733 return result;
12734}
12735
12736__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12737vtrn2_s32 (int32x2_t a, int32x2_t b)
12738{
12739 int32x2_t result;
12740 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12741 : "=w"(result)
12742 : "w"(a), "w"(b)
12743 : /* No clobbers */);
12744 return result;
12745}
12746
12747__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12748vtrn2_u8 (uint8x8_t a, uint8x8_t b)
12749{
12750 uint8x8_t result;
12751 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
12752 : "=w"(result)
12753 : "w"(a), "w"(b)
12754 : /* No clobbers */);
12755 return result;
12756}
12757
12758__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12759vtrn2_u16 (uint16x4_t a, uint16x4_t b)
12760{
12761 uint16x4_t result;
12762 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
12763 : "=w"(result)
12764 : "w"(a), "w"(b)
12765 : /* No clobbers */);
12766 return result;
12767}
12768
12769__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12770vtrn2_u32 (uint32x2_t a, uint32x2_t b)
12771{
12772 uint32x2_t result;
12773 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
12774 : "=w"(result)
12775 : "w"(a), "w"(b)
12776 : /* No clobbers */);
12777 return result;
12778}
12779
12780__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12781vtrn2q_f32 (float32x4_t a, float32x4_t b)
12782{
12783 float32x4_t result;
12784 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12785 : "=w"(result)
12786 : "w"(a), "w"(b)
12787 : /* No clobbers */);
12788 return result;
12789}
12790
12791__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12792vtrn2q_f64 (float64x2_t a, float64x2_t b)
12793{
12794 float64x2_t result;
12795 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12796 : "=w"(result)
12797 : "w"(a), "w"(b)
12798 : /* No clobbers */);
12799 return result;
12800}
12801
12802__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12803vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
12804{
12805 poly8x16_t result;
12806 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12807 : "=w"(result)
12808 : "w"(a), "w"(b)
12809 : /* No clobbers */);
12810 return result;
12811}
12812
12813__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12814vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
12815{
12816 poly16x8_t result;
12817 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12818 : "=w"(result)
12819 : "w"(a), "w"(b)
12820 : /* No clobbers */);
12821 return result;
12822}
12823
12824__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12825vtrn2q_s8 (int8x16_t a, int8x16_t b)
12826{
12827 int8x16_t result;
12828 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12829 : "=w"(result)
12830 : "w"(a), "w"(b)
12831 : /* No clobbers */);
12832 return result;
12833}
12834
12835__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12836vtrn2q_s16 (int16x8_t a, int16x8_t b)
12837{
12838 int16x8_t result;
12839 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12840 : "=w"(result)
12841 : "w"(a), "w"(b)
12842 : /* No clobbers */);
12843 return result;
12844}
12845
12846__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12847vtrn2q_s32 (int32x4_t a, int32x4_t b)
12848{
12849 int32x4_t result;
12850 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12851 : "=w"(result)
12852 : "w"(a), "w"(b)
12853 : /* No clobbers */);
12854 return result;
12855}
12856
12857__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12858vtrn2q_s64 (int64x2_t a, int64x2_t b)
12859{
12860 int64x2_t result;
12861 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12862 : "=w"(result)
12863 : "w"(a), "w"(b)
12864 : /* No clobbers */);
12865 return result;
12866}
12867
12868__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12869vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
12870{
12871 uint8x16_t result;
12872 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
12873 : "=w"(result)
12874 : "w"(a), "w"(b)
12875 : /* No clobbers */);
12876 return result;
12877}
12878
12879__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12880vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
12881{
12882 uint16x8_t result;
12883 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
12884 : "=w"(result)
12885 : "w"(a), "w"(b)
12886 : /* No clobbers */);
12887 return result;
12888}
12889
12890__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12891vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
12892{
12893 uint32x4_t result;
12894 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
12895 : "=w"(result)
12896 : "w"(a), "w"(b)
12897 : /* No clobbers */);
12898 return result;
12899}
12900
12901__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12902vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
12903{
12904 uint64x2_t result;
12905 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
12906 : "=w"(result)
12907 : "w"(a), "w"(b)
12908 : /* No clobbers */);
12909 return result;
12910}
12911
12912__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12913vtst_p8 (poly8x8_t a, poly8x8_t b)
12914{
12915 uint8x8_t result;
12916 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
12917 : "=w"(result)
12918 : "w"(a), "w"(b)
12919 : /* No clobbers */);
12920 return result;
12921}
12922
12923__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12924vtst_p16 (poly16x4_t a, poly16x4_t b)
12925{
12926 uint16x4_t result;
12927 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
12928 : "=w"(result)
12929 : "w"(a), "w"(b)
12930 : /* No clobbers */);
12931 return result;
12932}
12933
12934__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12935vtstq_p8 (poly8x16_t a, poly8x16_t b)
12936{
12937 uint8x16_t result;
12938 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
12939 : "=w"(result)
12940 : "w"(a), "w"(b)
12941 : /* No clobbers */);
12942 return result;
12943}
12944
12945__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12946vtstq_p16 (poly16x8_t a, poly16x8_t b)
12947{
12948 uint16x8_t result;
12949 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
12950 : "=w"(result)
12951 : "w"(a), "w"(b)
12952 : /* No clobbers */);
12953 return result;
12954}
12955__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12956vuzp1_f32 (float32x2_t a, float32x2_t b)
12957{
12958 float32x2_t result;
12959 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
12960 : "=w"(result)
12961 : "w"(a), "w"(b)
12962 : /* No clobbers */);
12963 return result;
12964}
12965
12966__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12967vuzp1_p8 (poly8x8_t a, poly8x8_t b)
12968{
12969 poly8x8_t result;
12970 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
12971 : "=w"(result)
12972 : "w"(a), "w"(b)
12973 : /* No clobbers */);
12974 return result;
12975}
12976
12977__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12978vuzp1_p16 (poly16x4_t a, poly16x4_t b)
12979{
12980 poly16x4_t result;
12981 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
12982 : "=w"(result)
12983 : "w"(a), "w"(b)
12984 : /* No clobbers */);
12985 return result;
12986}
12987
12988__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12989vuzp1_s8 (int8x8_t a, int8x8_t b)
12990{
12991 int8x8_t result;
12992 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
12993 : "=w"(result)
12994 : "w"(a), "w"(b)
12995 : /* No clobbers */);
12996 return result;
12997}
12998
12999__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13000vuzp1_s16 (int16x4_t a, int16x4_t b)
13001{
13002 int16x4_t result;
13003 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13004 : "=w"(result)
13005 : "w"(a), "w"(b)
13006 : /* No clobbers */);
13007 return result;
13008}
13009
13010__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13011vuzp1_s32 (int32x2_t a, int32x2_t b)
13012{
13013 int32x2_t result;
13014 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13015 : "=w"(result)
13016 : "w"(a), "w"(b)
13017 : /* No clobbers */);
13018 return result;
13019}
13020
13021__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13022vuzp1_u8 (uint8x8_t a, uint8x8_t b)
13023{
13024 uint8x8_t result;
13025 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
13026 : "=w"(result)
13027 : "w"(a), "w"(b)
13028 : /* No clobbers */);
13029 return result;
13030}
13031
13032__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13033vuzp1_u16 (uint16x4_t a, uint16x4_t b)
13034{
13035 uint16x4_t result;
13036 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
13037 : "=w"(result)
13038 : "w"(a), "w"(b)
13039 : /* No clobbers */);
13040 return result;
13041}
13042
13043__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13044vuzp1_u32 (uint32x2_t a, uint32x2_t b)
13045{
13046 uint32x2_t result;
13047 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
13048 : "=w"(result)
13049 : "w"(a), "w"(b)
13050 : /* No clobbers */);
13051 return result;
13052}
13053
13054__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13055vuzp1q_f32 (float32x4_t a, float32x4_t b)
13056{
13057 float32x4_t result;
13058 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13059 : "=w"(result)
13060 : "w"(a), "w"(b)
13061 : /* No clobbers */);
13062 return result;
13063}
13064
13065__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13066vuzp1q_f64 (float64x2_t a, float64x2_t b)
13067{
13068 float64x2_t result;
13069 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13070 : "=w"(result)
13071 : "w"(a), "w"(b)
13072 : /* No clobbers */);
13073 return result;
13074}
13075
13076__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13077vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
13078{
13079 poly8x16_t result;
13080 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13081 : "=w"(result)
13082 : "w"(a), "w"(b)
13083 : /* No clobbers */);
13084 return result;
13085}
13086
13087__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13088vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
13089{
13090 poly16x8_t result;
13091 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13092 : "=w"(result)
13093 : "w"(a), "w"(b)
13094 : /* No clobbers */);
13095 return result;
13096}
13097
13098__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13099vuzp1q_s8 (int8x16_t a, int8x16_t b)
13100{
13101 int8x16_t result;
13102 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13103 : "=w"(result)
13104 : "w"(a), "w"(b)
13105 : /* No clobbers */);
13106 return result;
13107}
13108
13109__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13110vuzp1q_s16 (int16x8_t a, int16x8_t b)
13111{
13112 int16x8_t result;
13113 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13114 : "=w"(result)
13115 : "w"(a), "w"(b)
13116 : /* No clobbers */);
13117 return result;
13118}
13119
13120__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13121vuzp1q_s32 (int32x4_t a, int32x4_t b)
13122{
13123 int32x4_t result;
13124 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13125 : "=w"(result)
13126 : "w"(a), "w"(b)
13127 : /* No clobbers */);
13128 return result;
13129}
13130
13131__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13132vuzp1q_s64 (int64x2_t a, int64x2_t b)
13133{
13134 int64x2_t result;
13135 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13136 : "=w"(result)
13137 : "w"(a), "w"(b)
13138 : /* No clobbers */);
13139 return result;
13140}
13141
13142__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13143vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
13144{
13145 uint8x16_t result;
13146 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
13147 : "=w"(result)
13148 : "w"(a), "w"(b)
13149 : /* No clobbers */);
13150 return result;
13151}
13152
13153__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13154vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
13155{
13156 uint16x8_t result;
13157 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
13158 : "=w"(result)
13159 : "w"(a), "w"(b)
13160 : /* No clobbers */);
13161 return result;
13162}
13163
13164__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13165vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
13166{
13167 uint32x4_t result;
13168 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
13169 : "=w"(result)
13170 : "w"(a), "w"(b)
13171 : /* No clobbers */);
13172 return result;
13173}
13174
13175__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13176vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
13177{
13178 uint64x2_t result;
13179 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
13180 : "=w"(result)
13181 : "w"(a), "w"(b)
13182 : /* No clobbers */);
13183 return result;
13184}
13185
13186__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13187vuzp2_f32 (float32x2_t a, float32x2_t b)
13188{
13189 float32x2_t result;
13190 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13191 : "=w"(result)
13192 : "w"(a), "w"(b)
13193 : /* No clobbers */);
13194 return result;
13195}
13196
13197__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13198vuzp2_p8 (poly8x8_t a, poly8x8_t b)
13199{
13200 poly8x8_t result;
13201 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13202 : "=w"(result)
13203 : "w"(a), "w"(b)
13204 : /* No clobbers */);
13205 return result;
13206}
13207
13208__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13209vuzp2_p16 (poly16x4_t a, poly16x4_t b)
13210{
13211 poly16x4_t result;
13212 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13213 : "=w"(result)
13214 : "w"(a), "w"(b)
13215 : /* No clobbers */);
13216 return result;
13217}
13218
13219__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13220vuzp2_s8 (int8x8_t a, int8x8_t b)
13221{
13222 int8x8_t result;
13223 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13224 : "=w"(result)
13225 : "w"(a), "w"(b)
13226 : /* No clobbers */);
13227 return result;
13228}
13229
13230__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13231vuzp2_s16 (int16x4_t a, int16x4_t b)
13232{
13233 int16x4_t result;
13234 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13235 : "=w"(result)
13236 : "w"(a), "w"(b)
13237 : /* No clobbers */);
13238 return result;
13239}
13240
13241__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13242vuzp2_s32 (int32x2_t a, int32x2_t b)
13243{
13244 int32x2_t result;
13245 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13246 : "=w"(result)
13247 : "w"(a), "w"(b)
13248 : /* No clobbers */);
13249 return result;
13250}
13251
13252__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13253vuzp2_u8 (uint8x8_t a, uint8x8_t b)
13254{
13255 uint8x8_t result;
13256 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
13257 : "=w"(result)
13258 : "w"(a), "w"(b)
13259 : /* No clobbers */);
13260 return result;
13261}
13262
13263__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13264vuzp2_u16 (uint16x4_t a, uint16x4_t b)
13265{
13266 uint16x4_t result;
13267 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
13268 : "=w"(result)
13269 : "w"(a), "w"(b)
13270 : /* No clobbers */);
13271 return result;
13272}
13273
13274__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13275vuzp2_u32 (uint32x2_t a, uint32x2_t b)
13276{
13277 uint32x2_t result;
13278 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
13279 : "=w"(result)
13280 : "w"(a), "w"(b)
13281 : /* No clobbers */);
13282 return result;
13283}
13284
13285__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13286vuzp2q_f32 (float32x4_t a, float32x4_t b)
13287{
13288 float32x4_t result;
13289 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13290 : "=w"(result)
13291 : "w"(a), "w"(b)
13292 : /* No clobbers */);
13293 return result;
13294}
13295
13296__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13297vuzp2q_f64 (float64x2_t a, float64x2_t b)
13298{
13299 float64x2_t result;
13300 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13301 : "=w"(result)
13302 : "w"(a), "w"(b)
13303 : /* No clobbers */);
13304 return result;
13305}
13306
13307__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13308vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
13309{
13310 poly8x16_t result;
13311 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13312 : "=w"(result)
13313 : "w"(a), "w"(b)
13314 : /* No clobbers */);
13315 return result;
13316}
13317
13318__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13319vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
13320{
13321 poly16x8_t result;
13322 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13323 : "=w"(result)
13324 : "w"(a), "w"(b)
13325 : /* No clobbers */);
13326 return result;
13327}
13328
13329__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13330vuzp2q_s8 (int8x16_t a, int8x16_t b)
13331{
13332 int8x16_t result;
13333 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13334 : "=w"(result)
13335 : "w"(a), "w"(b)
13336 : /* No clobbers */);
13337 return result;
13338}
13339
13340__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13341vuzp2q_s16 (int16x8_t a, int16x8_t b)
13342{
13343 int16x8_t result;
13344 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13345 : "=w"(result)
13346 : "w"(a), "w"(b)
13347 : /* No clobbers */);
13348 return result;
13349}
13350
13351__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13352vuzp2q_s32 (int32x4_t a, int32x4_t b)
13353{
13354 int32x4_t result;
13355 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13356 : "=w"(result)
13357 : "w"(a), "w"(b)
13358 : /* No clobbers */);
13359 return result;
13360}
13361
13362__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13363vuzp2q_s64 (int64x2_t a, int64x2_t b)
13364{
13365 int64x2_t result;
13366 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13367 : "=w"(result)
13368 : "w"(a), "w"(b)
13369 : /* No clobbers */);
13370 return result;
13371}
13372
13373__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13374vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
13375{
13376 uint8x16_t result;
13377 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
13378 : "=w"(result)
13379 : "w"(a), "w"(b)
13380 : /* No clobbers */);
13381 return result;
13382}
13383
13384__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13385vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
13386{
13387 uint16x8_t result;
13388 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
13389 : "=w"(result)
13390 : "w"(a), "w"(b)
13391 : /* No clobbers */);
13392 return result;
13393}
13394
13395__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13396vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
13397{
13398 uint32x4_t result;
13399 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
13400 : "=w"(result)
13401 : "w"(a), "w"(b)
13402 : /* No clobbers */);
13403 return result;
13404}
13405
13406__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13407vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
13408{
13409 uint64x2_t result;
13410 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
13411 : "=w"(result)
13412 : "w"(a), "w"(b)
13413 : /* No clobbers */);
13414 return result;
13415}
13416
13417__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13418vzip1_f32 (float32x2_t a, float32x2_t b)
13419{
13420 float32x2_t result;
13421 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13422 : "=w"(result)
13423 : "w"(a), "w"(b)
13424 : /* No clobbers */);
13425 return result;
13426}
13427
13428__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13429vzip1_p8 (poly8x8_t a, poly8x8_t b)
13430{
13431 poly8x8_t result;
13432 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13433 : "=w"(result)
13434 : "w"(a), "w"(b)
13435 : /* No clobbers */);
13436 return result;
13437}
13438
13439__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13440vzip1_p16 (poly16x4_t a, poly16x4_t b)
13441{
13442 poly16x4_t result;
13443 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13444 : "=w"(result)
13445 : "w"(a), "w"(b)
13446 : /* No clobbers */);
13447 return result;
13448}
13449
13450__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13451vzip1_s8 (int8x8_t a, int8x8_t b)
13452{
13453 int8x8_t result;
13454 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13455 : "=w"(result)
13456 : "w"(a), "w"(b)
13457 : /* No clobbers */);
13458 return result;
13459}
13460
13461__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13462vzip1_s16 (int16x4_t a, int16x4_t b)
13463{
13464 int16x4_t result;
13465 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13466 : "=w"(result)
13467 : "w"(a), "w"(b)
13468 : /* No clobbers */);
13469 return result;
13470}
13471
13472__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13473vzip1_s32 (int32x2_t a, int32x2_t b)
13474{
13475 int32x2_t result;
13476 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13477 : "=w"(result)
13478 : "w"(a), "w"(b)
13479 : /* No clobbers */);
13480 return result;
13481}
13482
13483__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13484vzip1_u8 (uint8x8_t a, uint8x8_t b)
13485{
13486 uint8x8_t result;
13487 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
13488 : "=w"(result)
13489 : "w"(a), "w"(b)
13490 : /* No clobbers */);
13491 return result;
13492}
13493
13494__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13495vzip1_u16 (uint16x4_t a, uint16x4_t b)
13496{
13497 uint16x4_t result;
13498 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
13499 : "=w"(result)
13500 : "w"(a), "w"(b)
13501 : /* No clobbers */);
13502 return result;
13503}
13504
13505__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13506vzip1_u32 (uint32x2_t a, uint32x2_t b)
13507{
13508 uint32x2_t result;
13509 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
13510 : "=w"(result)
13511 : "w"(a), "w"(b)
13512 : /* No clobbers */);
13513 return result;
13514}
13515
13516__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13517vzip1q_f32 (float32x4_t a, float32x4_t b)
13518{
13519 float32x4_t result;
13520 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13521 : "=w"(result)
13522 : "w"(a), "w"(b)
13523 : /* No clobbers */);
13524 return result;
13525}
13526
13527__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13528vzip1q_f64 (float64x2_t a, float64x2_t b)
13529{
13530 float64x2_t result;
13531 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13532 : "=w"(result)
13533 : "w"(a), "w"(b)
13534 : /* No clobbers */);
13535 return result;
13536}
13537
13538__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13539vzip1q_p8 (poly8x16_t a, poly8x16_t b)
13540{
13541 poly8x16_t result;
13542 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13543 : "=w"(result)
13544 : "w"(a), "w"(b)
13545 : /* No clobbers */);
13546 return result;
13547}
13548
13549__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13550vzip1q_p16 (poly16x8_t a, poly16x8_t b)
13551{
13552 poly16x8_t result;
13553 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13554 : "=w"(result)
13555 : "w"(a), "w"(b)
13556 : /* No clobbers */);
13557 return result;
13558}
13559
13560__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13561vzip1q_s8 (int8x16_t a, int8x16_t b)
13562{
13563 int8x16_t result;
13564 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13565 : "=w"(result)
13566 : "w"(a), "w"(b)
13567 : /* No clobbers */);
13568 return result;
13569}
13570
13571__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13572vzip1q_s16 (int16x8_t a, int16x8_t b)
13573{
13574 int16x8_t result;
13575 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13576 : "=w"(result)
13577 : "w"(a), "w"(b)
13578 : /* No clobbers */);
13579 return result;
13580}
13581
13582__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13583vzip1q_s32 (int32x4_t a, int32x4_t b)
13584{
13585 int32x4_t result;
13586 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13587 : "=w"(result)
13588 : "w"(a), "w"(b)
13589 : /* No clobbers */);
13590 return result;
13591}
13592
13593__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13594vzip1q_s64 (int64x2_t a, int64x2_t b)
13595{
13596 int64x2_t result;
13597 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13598 : "=w"(result)
13599 : "w"(a), "w"(b)
13600 : /* No clobbers */);
13601 return result;
13602}
13603
13604__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13605vzip1q_u8 (uint8x16_t a, uint8x16_t b)
13606{
13607 uint8x16_t result;
13608 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
13609 : "=w"(result)
13610 : "w"(a), "w"(b)
13611 : /* No clobbers */);
13612 return result;
13613}
13614
13615__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13616vzip1q_u16 (uint16x8_t a, uint16x8_t b)
13617{
13618 uint16x8_t result;
13619 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
13620 : "=w"(result)
13621 : "w"(a), "w"(b)
13622 : /* No clobbers */);
13623 return result;
13624}
13625
13626__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13627vzip1q_u32 (uint32x4_t a, uint32x4_t b)
13628{
13629 uint32x4_t result;
13630 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
13631 : "=w"(result)
13632 : "w"(a), "w"(b)
13633 : /* No clobbers */);
13634 return result;
13635}
13636
13637__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13638vzip1q_u64 (uint64x2_t a, uint64x2_t b)
13639{
13640 uint64x2_t result;
13641 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
13642 : "=w"(result)
13643 : "w"(a), "w"(b)
13644 : /* No clobbers */);
13645 return result;
13646}
13647
13648__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13649vzip2_f32 (float32x2_t a, float32x2_t b)
13650{
13651 float32x2_t result;
13652 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13653 : "=w"(result)
13654 : "w"(a), "w"(b)
13655 : /* No clobbers */);
13656 return result;
13657}
13658
13659__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13660vzip2_p8 (poly8x8_t a, poly8x8_t b)
13661{
13662 poly8x8_t result;
13663 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13664 : "=w"(result)
13665 : "w"(a), "w"(b)
13666 : /* No clobbers */);
13667 return result;
13668}
13669
13670__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13671vzip2_p16 (poly16x4_t a, poly16x4_t b)
13672{
13673 poly16x4_t result;
13674 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13675 : "=w"(result)
13676 : "w"(a), "w"(b)
13677 : /* No clobbers */);
13678 return result;
13679}
13680
13681__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13682vzip2_s8 (int8x8_t a, int8x8_t b)
13683{
13684 int8x8_t result;
13685 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13686 : "=w"(result)
13687 : "w"(a), "w"(b)
13688 : /* No clobbers */);
13689 return result;
13690}
13691
13692__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13693vzip2_s16 (int16x4_t a, int16x4_t b)
13694{
13695 int16x4_t result;
13696 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13697 : "=w"(result)
13698 : "w"(a), "w"(b)
13699 : /* No clobbers */);
13700 return result;
13701}
13702
13703__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13704vzip2_s32 (int32x2_t a, int32x2_t b)
13705{
13706 int32x2_t result;
13707 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13708 : "=w"(result)
13709 : "w"(a), "w"(b)
13710 : /* No clobbers */);
13711 return result;
13712}
13713
13714__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13715vzip2_u8 (uint8x8_t a, uint8x8_t b)
13716{
13717 uint8x8_t result;
13718 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
13719 : "=w"(result)
13720 : "w"(a), "w"(b)
13721 : /* No clobbers */);
13722 return result;
13723}
13724
13725__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13726vzip2_u16 (uint16x4_t a, uint16x4_t b)
13727{
13728 uint16x4_t result;
13729 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
13730 : "=w"(result)
13731 : "w"(a), "w"(b)
13732 : /* No clobbers */);
13733 return result;
13734}
13735
13736__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13737vzip2_u32 (uint32x2_t a, uint32x2_t b)
13738{
13739 uint32x2_t result;
13740 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
13741 : "=w"(result)
13742 : "w"(a), "w"(b)
13743 : /* No clobbers */);
13744 return result;
13745}
13746
13747__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13748vzip2q_f32 (float32x4_t a, float32x4_t b)
13749{
13750 float32x4_t result;
13751 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13752 : "=w"(result)
13753 : "w"(a), "w"(b)
13754 : /* No clobbers */);
13755 return result;
13756}
13757
13758__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13759vzip2q_f64 (float64x2_t a, float64x2_t b)
13760{
13761 float64x2_t result;
13762 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13763 : "=w"(result)
13764 : "w"(a), "w"(b)
13765 : /* No clobbers */);
13766 return result;
13767}
13768
13769__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13770vzip2q_p8 (poly8x16_t a, poly8x16_t b)
13771{
13772 poly8x16_t result;
13773 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13774 : "=w"(result)
13775 : "w"(a), "w"(b)
13776 : /* No clobbers */);
13777 return result;
13778}
13779
13780__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13781vzip2q_p16 (poly16x8_t a, poly16x8_t b)
13782{
13783 poly16x8_t result;
13784 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13785 : "=w"(result)
13786 : "w"(a), "w"(b)
13787 : /* No clobbers */);
13788 return result;
13789}
13790
13791__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13792vzip2q_s8 (int8x16_t a, int8x16_t b)
13793{
13794 int8x16_t result;
13795 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13796 : "=w"(result)
13797 : "w"(a), "w"(b)
13798 : /* No clobbers */);
13799 return result;
13800}
13801
13802__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13803vzip2q_s16 (int16x8_t a, int16x8_t b)
13804{
13805 int16x8_t result;
13806 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13807 : "=w"(result)
13808 : "w"(a), "w"(b)
13809 : /* No clobbers */);
13810 return result;
13811}
13812
13813__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13814vzip2q_s32 (int32x4_t a, int32x4_t b)
13815{
13816 int32x4_t result;
13817 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13818 : "=w"(result)
13819 : "w"(a), "w"(b)
13820 : /* No clobbers */);
13821 return result;
13822}
13823
13824__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13825vzip2q_s64 (int64x2_t a, int64x2_t b)
13826{
13827 int64x2_t result;
13828 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13829 : "=w"(result)
13830 : "w"(a), "w"(b)
13831 : /* No clobbers */);
13832 return result;
13833}
13834
13835__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13836vzip2q_u8 (uint8x16_t a, uint8x16_t b)
13837{
13838 uint8x16_t result;
13839 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
13840 : "=w"(result)
13841 : "w"(a), "w"(b)
13842 : /* No clobbers */);
13843 return result;
13844}
13845
13846__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13847vzip2q_u16 (uint16x8_t a, uint16x8_t b)
13848{
13849 uint16x8_t result;
13850 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
13851 : "=w"(result)
13852 : "w"(a), "w"(b)
13853 : /* No clobbers */);
13854 return result;
13855}
13856
13857__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13858vzip2q_u32 (uint32x4_t a, uint32x4_t b)
13859{
13860 uint32x4_t result;
13861 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
13862 : "=w"(result)
13863 : "w"(a), "w"(b)
13864 : /* No clobbers */);
13865 return result;
13866}
13867
13868__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13869vzip2q_u64 (uint64x2_t a, uint64x2_t b)
13870{
13871 uint64x2_t result;
13872 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
13873 : "=w"(result)
13874 : "w"(a), "w"(b)
13875 : /* No clobbers */);
13876 return result;
13877}
13878
13879/* End of temporary inline asm implementations. */
13880
13881/* Start of temporary inline asm for vldn, vstn and friends. */
13882
13883/* Create struct element types for duplicating loads.
13884
13885 Create 2 element structures of:
13886
13887 +------+----+----+----+----+
13888 | | 8 | 16 | 32 | 64 |
13889 +------+----+----+----+----+
13890 |int | Y | Y | N | N |
13891 +------+----+----+----+----+
13892 |uint | Y | Y | N | N |
13893 +------+----+----+----+----+
13894 |float | - | - | N | N |
13895 +------+----+----+----+----+
13896 |poly | Y | Y | - | - |
13897 +------+----+----+----+----+
13898
13899 Create 3 element structures of:
13900
13901 +------+----+----+----+----+
13902 | | 8 | 16 | 32 | 64 |
13903 +------+----+----+----+----+
13904 |int | Y | Y | Y | Y |
13905 +------+----+----+----+----+
13906 |uint | Y | Y | Y | Y |
13907 +------+----+----+----+----+
13908 |float | - | - | Y | Y |
13909 +------+----+----+----+----+
13910 |poly | Y | Y | - | - |
13911 +------+----+----+----+----+
13912
13913 Create 4 element structures of:
13914
13915 +------+----+----+----+----+
13916 | | 8 | 16 | 32 | 64 |
13917 +------+----+----+----+----+
13918 |int | Y | N | N | Y |
13919 +------+----+----+----+----+
13920 |uint | Y | N | N | Y |
13921 +------+----+----+----+----+
13922 |float | - | - | N | Y |
13923 +------+----+----+----+----+
13924 |poly | Y | N | - | - |
13925 +------+----+----+----+----+
13926
13927 This is required for casting memory reference. */
13928#define __STRUCTN(t, sz, nelem) \
13929 typedef struct t ## sz ## x ## nelem ## _t { \
13930 t ## sz ## _t val[nelem]; \
13931 } t ## sz ## x ## nelem ## _t;
13932
13933/* 2-element structs. */
13934__STRUCTN (int, 8, 2)
13935__STRUCTN (int, 16, 2)
13936__STRUCTN (uint, 8, 2)
13937__STRUCTN (uint, 16, 2)
13938__STRUCTN (poly, 8, 2)
13939__STRUCTN (poly, 16, 2)
13940/* 3-element structs. */
13941__STRUCTN (int, 8, 3)
13942__STRUCTN (int, 16, 3)
13943__STRUCTN (int, 32, 3)
13944__STRUCTN (int, 64, 3)
13945__STRUCTN (uint, 8, 3)
13946__STRUCTN (uint, 16, 3)
13947__STRUCTN (uint, 32, 3)
13948__STRUCTN (uint, 64, 3)
13949__STRUCTN (float, 32, 3)
13950__STRUCTN (float, 64, 3)
13951__STRUCTN (poly, 8, 3)
13952__STRUCTN (poly, 16, 3)
13953/* 4-element structs. */
13954__STRUCTN (int, 8, 4)
13955__STRUCTN (int, 64, 4)
13956__STRUCTN (uint, 8, 4)
13957__STRUCTN (uint, 64, 4)
13958__STRUCTN (poly, 8, 4)
13959__STRUCTN (float, 64, 4)
13960#undef __STRUCTN
13961
13962#define __LD2R_FUNC(rettype, structtype, ptrtype, \
13963 regsuffix, funcsuffix, Q) \
13964 __extension__ static __inline rettype \
13965 __attribute__ ((__always_inline__)) \
13966 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
13967 { \
13968 rettype result; \
13969 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
13970 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
13971 : "=Q"(result) \
13972 : "Q"(*(const structtype *)ptr) \
13973 : "memory", "v16", "v17"); \
13974 return result; \
13975 }
13976
13977__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
13978__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
13979__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
13980__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
13981__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
13982__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
13983__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
13984__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
13985__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
13986__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
13987__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
13988__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
13989__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
13990__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
13991__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
13992__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
13993__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
13994__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
13995__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
13996__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
13997__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
13998__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
13999__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
14000__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
14001
14002#define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
14003 lnsuffix, funcsuffix, Q) \
14004 __extension__ static __inline rettype \
14005 __attribute__ ((__always_inline__)) \
14006 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14007 rettype b, const int c) \
14008 { \
14009 rettype result; \
14010 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14011 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
14012 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
14013 : "=Q"(result) \
14014 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14015 : "memory", "v16", "v17"); \
14016 return result; \
14017 }
14018
14019__LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
14020__LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14021__LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14022__LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14023__LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14024__LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14025__LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14026__LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14027__LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14028__LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14029__LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14030__LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14031__LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14032__LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14033__LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14034__LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14035__LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14036__LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14037__LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14038__LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14039__LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14040__LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14041__LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14042__LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14043
14044#define __LD3R_FUNC(rettype, structtype, ptrtype, \
14045 regsuffix, funcsuffix, Q) \
14046 __extension__ static __inline rettype \
14047 __attribute__ ((__always_inline__)) \
14048 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14049 { \
14050 rettype result; \
14051 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14052 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14053 : "=Q"(result) \
14054 : "Q"(*(const structtype *)ptr) \
14055 : "memory", "v16", "v17", "v18"); \
14056 return result; \
14057 }
14058
14059__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
14060__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
14061__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
14062__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
14063__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
14064__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
14065__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
14066__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
14067__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
14068__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
14069__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
14070__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
14071__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
14072__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
14073__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
14074__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
14075__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
14076__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
14077__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
14078__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
14079__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
14080__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
14081__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
14082__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
14083
14084#define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
14085 lnsuffix, funcsuffix, Q) \
14086 __extension__ static __inline rettype \
14087 __attribute__ ((__always_inline__)) \
14088 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14089 rettype b, const int c) \
14090 { \
14091 rettype result; \
14092 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14093 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
14094 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
14095 : "=Q"(result) \
14096 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14097 : "memory", "v16", "v17", "v18"); \
14098 return result; \
14099 }
14100
14101__LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
14102__LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14103__LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14104__LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14105__LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14106__LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14107__LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14108__LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14109__LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14110__LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14111__LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14112__LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14113__LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14114__LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14115__LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14116__LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14117__LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14118__LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14119__LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14120__LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14121__LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14122__LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14123__LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14124__LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14125
14126#define __LD4R_FUNC(rettype, structtype, ptrtype, \
14127 regsuffix, funcsuffix, Q) \
14128 __extension__ static __inline rettype \
14129 __attribute__ ((__always_inline__)) \
14130 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
14131 { \
14132 rettype result; \
14133 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14134 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14135 : "=Q"(result) \
14136 : "Q"(*(const structtype *)ptr) \
14137 : "memory", "v16", "v17", "v18", "v19"); \
14138 return result; \
14139 }
14140
14141__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
14142__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
14143__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
14144__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
14145__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
14146__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
14147__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
14148__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
14149__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
14150__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
14151__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
14152__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
14153__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
14154__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
14155__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
14156__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
14157__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
14158__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
14159__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
14160__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
14161__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
14162__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
14163__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
14164__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
14165
14166#define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
14167 lnsuffix, funcsuffix, Q) \
14168 __extension__ static __inline rettype \
14169 __attribute__ ((__always_inline__)) \
14170 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
14171 rettype b, const int c) \
14172 { \
14173 rettype result; \
14174 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14175 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
14176 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
14177 : "=Q"(result) \
14178 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
14179 : "memory", "v16", "v17", "v18", "v19"); \
14180 return result; \
14181 }
14182
14183__LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
14184__LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14185__LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14186__LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14187__LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14188__LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14189__LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14190__LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14191__LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14192__LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14193__LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14194__LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14195__LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14196__LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14197__LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14198__LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14199__LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14200__LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14201__LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14202__LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14203__LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14204__LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14205__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14206__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14207
14208#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
14209 lnsuffix, funcsuffix, Q) \
14210 typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \
14211 __extension__ static __inline void \
14212 __attribute__ ((__always_inline__)) \
14213 vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14214 intype b, const int c) \
14215 { \
14216 __ST2_LANE_STRUCTURE_##intype *__p = \
14217 (__ST2_LANE_STRUCTURE_##intype *)ptr; \
14218 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
14219 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
14220 : "=Q"(*__p) \
14221 : "Q"(b), "i"(c) \
14222 : "v16", "v17"); \
14223 }
14224
14225__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
14226__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
14227__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
14228__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
14229__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
14230__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
14231__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
14232__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
14233__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
14234__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
14235__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
14236__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
14237__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
14238__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
14239__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
14240__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
14241__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
14242__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
14243__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
14244__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
14245__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
14246__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
14247__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
14248__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
14249
14250#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
14251 lnsuffix, funcsuffix, Q) \
14252 typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \
14253 __extension__ static __inline void \
14254 __attribute__ ((__always_inline__)) \
14255 vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14256 intype b, const int c) \
14257 { \
14258 __ST3_LANE_STRUCTURE_##intype *__p = \
14259 (__ST3_LANE_STRUCTURE_##intype *)ptr; \
14260 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
14261 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
14262 : "=Q"(*__p) \
14263 : "Q"(b), "i"(c) \
14264 : "v16", "v17", "v18"); \
14265 }
14266
14267__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
14268__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
14269__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
14270__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
14271__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
14272__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
14273__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
14274__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
14275__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
14276__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
14277__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
14278__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
14279__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
14280__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
14281__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
14282__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
14283__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
14284__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
14285__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
14286__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
14287__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
14288__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
14289__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
14290__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
14291
14292#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
14293 lnsuffix, funcsuffix, Q) \
14294 typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \
14295 __extension__ static __inline void \
14296 __attribute__ ((__always_inline__)) \
14297 vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
14298 intype b, const int c) \
14299 { \
14300 __ST4_LANE_STRUCTURE_##intype *__p = \
14301 (__ST4_LANE_STRUCTURE_##intype *)ptr; \
14302 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
14303 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
14304 : "=Q"(*__p) \
14305 : "Q"(b), "i"(c) \
14306 : "v16", "v17", "v18", "v19"); \
14307 }
14308
14309__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
14310__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
14311__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
14312__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
14313__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
14314__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
14315__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
14316__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
14317__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
14318__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
14319__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
14320__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
14321__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
14322__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
14323__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
14324__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
14325__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
14326__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
14327__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
14328__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
14329__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
14330__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
14331__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
14332__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
14333
14334__extension__ static __inline int64_t __attribute__ ((__always_inline__))
14335vaddlv_s32 (int32x2_t a)
14336{
14337 int64_t result;
14338 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14339 return result;
14340}
14341
14342__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14343vaddlv_u32 (uint32x2_t a)
14344{
14345 uint64_t result;
14346 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
14347 return result;
14348}
14349
14350__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14351vpaddd_s64 (int64x2_t __a)
14352{
14353 return __builtin_aarch64_addpdi (__a);
14354}
14355
14356__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14357vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14358{
14359 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
14360}
14361
14362__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14363vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14364{
14365 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
14366}
14367
14368__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14369vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14370{
14371 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
14372}
14373
14374__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14375vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14376{
14377 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
14378}
14379
14380__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14381vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
14382{
14383 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
14384}
14385
14386__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14387vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
14388{
14389 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
14390}
14391
14392__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14393vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
14394{
14395 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
14396}
14397
14398__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14399vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
14400{
14401 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
14402}
14403
14404/* Table intrinsics. */
14405
14406__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14407vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
14408{
14409 poly8x8_t result;
14410 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14411 : "=w"(result)
14412 : "w"(a), "w"(b)
14413 : /* No clobbers */);
14414 return result;
14415}
14416
14417__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14418vqtbl1_s8 (int8x16_t a, uint8x8_t b)
14419{
14420 int8x8_t result;
14421 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14422 : "=w"(result)
14423 : "w"(a), "w"(b)
14424 : /* No clobbers */);
14425 return result;
14426}
14427
14428__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14429vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
14430{
14431 uint8x8_t result;
14432 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14433 : "=w"(result)
14434 : "w"(a), "w"(b)
14435 : /* No clobbers */);
14436 return result;
14437}
14438
14439__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14440vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
14441{
14442 poly8x16_t result;
14443 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14444 : "=w"(result)
14445 : "w"(a), "w"(b)
14446 : /* No clobbers */);
14447 return result;
14448}
14449
14450__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14451vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
14452{
14453 int8x16_t result;
14454 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14455 : "=w"(result)
14456 : "w"(a), "w"(b)
14457 : /* No clobbers */);
14458 return result;
14459}
14460
14461__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14462vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
14463{
14464 uint8x16_t result;
14465 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
14466 : "=w"(result)
14467 : "w"(a), "w"(b)
14468 : /* No clobbers */);
14469 return result;
14470}
14471
14472__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14473vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
14474{
14475 int8x8_t result;
14476 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14477 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14478 :"=w"(result)
14479 :"Q"(tab),"w"(idx)
14480 :"memory", "v16", "v17");
14481 return result;
14482}
14483
14484__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14485vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
14486{
14487 uint8x8_t result;
14488 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14489 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14490 :"=w"(result)
14491 :"Q"(tab),"w"(idx)
14492 :"memory", "v16", "v17");
14493 return result;
14494}
14495
14496__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14497vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
14498{
14499 poly8x8_t result;
14500 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14501 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14502 :"=w"(result)
14503 :"Q"(tab),"w"(idx)
14504 :"memory", "v16", "v17");
14505 return result;
14506}
14507
14508__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14509vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
14510{
14511 int8x16_t result;
14512 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14513 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14514 :"=w"(result)
14515 :"Q"(tab),"w"(idx)
14516 :"memory", "v16", "v17");
14517 return result;
14518}
14519
14520__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14521vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
14522{
14523 uint8x16_t result;
14524 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14525 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14526 :"=w"(result)
14527 :"Q"(tab),"w"(idx)
14528 :"memory", "v16", "v17");
14529 return result;
14530}
14531
14532__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14533vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
14534{
14535 poly8x16_t result;
14536 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14537 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14538 :"=w"(result)
14539 :"Q"(tab),"w"(idx)
14540 :"memory", "v16", "v17");
14541 return result;
14542}
14543
14544__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14545vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
14546{
14547 int8x8_t result;
14548 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14549 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14550 :"=w"(result)
14551 :"Q"(tab),"w"(idx)
14552 :"memory", "v16", "v17", "v18");
14553 return result;
14554}
14555
14556__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14557vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
14558{
14559 uint8x8_t result;
14560 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14561 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14562 :"=w"(result)
14563 :"Q"(tab),"w"(idx)
14564 :"memory", "v16", "v17", "v18");
14565 return result;
14566}
14567
14568__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14569vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
14570{
14571 poly8x8_t result;
14572 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14573 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14574 :"=w"(result)
14575 :"Q"(tab),"w"(idx)
14576 :"memory", "v16", "v17", "v18");
14577 return result;
14578}
14579
14580__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14581vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
14582{
14583 int8x16_t result;
14584 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14585 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14586 :"=w"(result)
14587 :"Q"(tab),"w"(idx)
14588 :"memory", "v16", "v17", "v18");
14589 return result;
14590}
14591
14592__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14593vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
14594{
14595 uint8x16_t result;
14596 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14597 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14598 :"=w"(result)
14599 :"Q"(tab),"w"(idx)
14600 :"memory", "v16", "v17", "v18");
14601 return result;
14602}
14603
14604__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14605vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
14606{
14607 poly8x16_t result;
14608 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14609 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14610 :"=w"(result)
14611 :"Q"(tab),"w"(idx)
14612 :"memory", "v16", "v17", "v18");
14613 return result;
14614}
14615
14616__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14617vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
14618{
14619 int8x8_t result;
14620 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14621 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14622 :"=w"(result)
14623 :"Q"(tab),"w"(idx)
14624 :"memory", "v16", "v17", "v18", "v19");
14625 return result;
14626}
14627
14628__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14629vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
14630{
14631 uint8x8_t result;
14632 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14633 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14634 :"=w"(result)
14635 :"Q"(tab),"w"(idx)
14636 :"memory", "v16", "v17", "v18", "v19");
14637 return result;
14638}
14639
14640__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14641vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
14642{
14643 poly8x8_t result;
14644 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14645 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14646 :"=w"(result)
14647 :"Q"(tab),"w"(idx)
14648 :"memory", "v16", "v17", "v18", "v19");
14649 return result;
14650}
14651
14652
14653__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14654vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
14655{
14656 int8x16_t result;
14657 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14658 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14659 :"=w"(result)
14660 :"Q"(tab),"w"(idx)
14661 :"memory", "v16", "v17", "v18", "v19");
14662 return result;
14663}
14664
14665__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14666vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
14667{
14668 uint8x16_t result;
14669 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14670 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14671 :"=w"(result)
14672 :"Q"(tab),"w"(idx)
14673 :"memory", "v16", "v17", "v18", "v19");
14674 return result;
14675}
14676
14677__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14678vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
14679{
14680 poly8x16_t result;
14681 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14682 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14683 :"=w"(result)
14684 :"Q"(tab),"w"(idx)
14685 :"memory", "v16", "v17", "v18", "v19");
14686 return result;
14687}
14688
14689
14690__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14691vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
14692{
14693 int8x8_t result = r;
14694 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14695 : "+w"(result)
14696 : "w"(tab), "w"(idx)
14697 : /* No clobbers */);
14698 return result;
14699}
14700
14701__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14702vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
14703{
14704 uint8x8_t result = r;
14705 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14706 : "+w"(result)
14707 : "w"(tab), "w"(idx)
14708 : /* No clobbers */);
14709 return result;
14710}
14711
14712__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14713vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
14714{
14715 poly8x8_t result = r;
14716 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
14717 : "+w"(result)
14718 : "w"(tab), "w"(idx)
14719 : /* No clobbers */);
14720 return result;
14721}
14722
14723__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14724vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
14725{
14726 int8x16_t result = r;
14727 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14728 : "+w"(result)
14729 : "w"(tab), "w"(idx)
14730 : /* No clobbers */);
14731 return result;
14732}
14733
14734__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14735vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
14736{
14737 uint8x16_t result = r;
14738 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14739 : "+w"(result)
14740 : "w"(tab), "w"(idx)
14741 : /* No clobbers */);
14742 return result;
14743}
14744
14745__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14746vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
14747{
14748 poly8x16_t result = r;
14749 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
14750 : "+w"(result)
14751 : "w"(tab), "w"(idx)
14752 : /* No clobbers */);
14753 return result;
14754}
14755
14756__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14757vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
14758{
14759 int8x8_t result = r;
14760 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14761 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14762 :"+w"(result)
14763 :"Q"(tab),"w"(idx)
14764 :"memory", "v16", "v17");
14765 return result;
14766}
14767
14768__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14769vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
14770{
14771 uint8x8_t result = r;
14772 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14773 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14774 :"+w"(result)
14775 :"Q"(tab),"w"(idx)
14776 :"memory", "v16", "v17");
14777 return result;
14778}
14779
14780__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14781vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
14782{
14783 poly8x8_t result = r;
14784 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14785 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
14786 :"+w"(result)
14787 :"Q"(tab),"w"(idx)
14788 :"memory", "v16", "v17");
14789 return result;
14790}
14791
14792
14793__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14794vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
14795{
14796 int8x16_t result = r;
14797 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14798 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14799 :"+w"(result)
14800 :"Q"(tab),"w"(idx)
14801 :"memory", "v16", "v17");
14802 return result;
14803}
14804
14805__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14806vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
14807{
14808 uint8x16_t result = r;
14809 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14810 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14811 :"+w"(result)
14812 :"Q"(tab),"w"(idx)
14813 :"memory", "v16", "v17");
14814 return result;
14815}
14816
14817__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14818vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
14819{
14820 poly8x16_t result = r;
14821 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
14822 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
14823 :"+w"(result)
14824 :"Q"(tab),"w"(idx)
14825 :"memory", "v16", "v17");
14826 return result;
14827}
14828
14829
14830__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14831vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
14832{
14833 int8x8_t result = r;
14834 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14835 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14836 :"+w"(result)
14837 :"Q"(tab),"w"(idx)
14838 :"memory", "v16", "v17", "v18");
14839 return result;
14840}
14841
14842__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14843vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
14844{
14845 uint8x8_t result = r;
14846 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14847 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14848 :"+w"(result)
14849 :"Q"(tab),"w"(idx)
14850 :"memory", "v16", "v17", "v18");
14851 return result;
14852}
14853
14854__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14855vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
14856{
14857 poly8x8_t result = r;
14858 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14859 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
14860 :"+w"(result)
14861 :"Q"(tab),"w"(idx)
14862 :"memory", "v16", "v17", "v18");
14863 return result;
14864}
14865
14866
14867__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14868vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
14869{
14870 int8x16_t result = r;
14871 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14872 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14873 :"+w"(result)
14874 :"Q"(tab),"w"(idx)
14875 :"memory", "v16", "v17", "v18");
14876 return result;
14877}
14878
14879__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14880vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
14881{
14882 uint8x16_t result = r;
14883 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14884 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14885 :"+w"(result)
14886 :"Q"(tab),"w"(idx)
14887 :"memory", "v16", "v17", "v18");
14888 return result;
14889}
14890
14891__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14892vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
14893{
14894 poly8x16_t result = r;
14895 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
14896 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
14897 :"+w"(result)
14898 :"Q"(tab),"w"(idx)
14899 :"memory", "v16", "v17", "v18");
14900 return result;
14901}
14902
14903
14904__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14905vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
14906{
14907 int8x8_t result = r;
14908 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14909 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14910 :"+w"(result)
14911 :"Q"(tab),"w"(idx)
14912 :"memory", "v16", "v17", "v18", "v19");
14913 return result;
14914}
14915
14916__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14917vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
14918{
14919 uint8x8_t result = r;
14920 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14921 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14922 :"+w"(result)
14923 :"Q"(tab),"w"(idx)
14924 :"memory", "v16", "v17", "v18", "v19");
14925 return result;
14926}
14927
14928__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14929vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
14930{
14931 poly8x8_t result = r;
14932 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14933 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
14934 :"+w"(result)
14935 :"Q"(tab),"w"(idx)
14936 :"memory", "v16", "v17", "v18", "v19");
14937 return result;
14938}
14939
14940
14941__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14942vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
14943{
14944 int8x16_t result = r;
14945 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14946 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14947 :"+w"(result)
14948 :"Q"(tab),"w"(idx)
14949 :"memory", "v16", "v17", "v18", "v19");
14950 return result;
14951}
14952
14953__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14954vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
14955{
14956 uint8x16_t result = r;
14957 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14958 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14959 :"+w"(result)
14960 :"Q"(tab),"w"(idx)
14961 :"memory", "v16", "v17", "v18", "v19");
14962 return result;
14963}
14964
14965__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14966vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
14967{
14968 poly8x16_t result = r;
14969 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
14970 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
14971 :"+w"(result)
14972 :"Q"(tab),"w"(idx)
14973 :"memory", "v16", "v17", "v18", "v19");
14974 return result;
14975}
14976
14977/* V7 legacy table intrinsics. */
14978
14979__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14980vtbl1_s8 (int8x8_t tab, int8x8_t idx)
14981{
14982 int8x8_t result;
14983 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
14984 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14985 : "=w"(result)
14986 : "w"(temp), "w"(idx)
14987 : /* No clobbers */);
14988 return result;
14989}
14990
14991__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14992vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
14993{
14994 uint8x8_t result;
14995 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
14996 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
14997 : "=w"(result)
14998 : "w"(temp), "w"(idx)
14999 : /* No clobbers */);
15000 return result;
15001}
15002
15003__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15004vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
15005{
15006 poly8x8_t result;
15007 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15008 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15009 : "=w"(result)
15010 : "w"(temp), "w"(idx)
15011 : /* No clobbers */);
15012 return result;
15013}
15014
15015__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15016vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
15017{
15018 int8x8_t result;
15019 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15020 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15021 : "=w"(result)
15022 : "w"(temp), "w"(idx)
15023 : /* No clobbers */);
15024 return result;
15025}
15026
15027__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15028vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
15029{
15030 uint8x8_t result;
15031 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15032 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15033 : "=w"(result)
15034 : "w"(temp), "w"(idx)
15035 : /* No clobbers */);
15036 return result;
15037}
15038
15039__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15040vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
15041{
15042 poly8x8_t result;
15043 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15044 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
15045 : "=w"(result)
15046 : "w"(temp), "w"(idx)
15047 : /* No clobbers */);
15048 return result;
15049}
15050
15051__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15052vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
15053{
15054 int8x8_t result;
15055 int8x16x2_t temp;
15056 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15057 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
15058 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15059 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15060 : "=w"(result)
15061 : "Q"(temp), "w"(idx)
15062 : "v16", "v17", "memory");
15063 return result;
15064}
15065
15066__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15067vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
15068{
15069 uint8x8_t result;
15070 uint8x16x2_t temp;
15071 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15072 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
15073 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15074 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15075 : "=w"(result)
15076 : "Q"(temp), "w"(idx)
15077 : "v16", "v17", "memory");
15078 return result;
15079}
15080
15081__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15082vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
15083{
15084 poly8x8_t result;
15085 poly8x16x2_t temp;
15086 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15087 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
15088 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15089 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15090 : "=w"(result)
15091 : "Q"(temp), "w"(idx)
15092 : "v16", "v17", "memory");
15093 return result;
15094}
15095
15096__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15097vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
15098{
15099 int8x8_t result;
15100 int8x16x2_t temp;
15101 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15102 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15103 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15104 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15105 : "=w"(result)
15106 : "Q"(temp), "w"(idx)
15107 : "v16", "v17", "memory");
15108 return result;
15109}
15110
15111__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15112vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
15113{
15114 uint8x8_t result;
15115 uint8x16x2_t temp;
15116 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15117 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15118 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15119 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15120 : "=w"(result)
15121 : "Q"(temp), "w"(idx)
15122 : "v16", "v17", "memory");
15123 return result;
15124}
15125
15126__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15127vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
15128{
15129 poly8x8_t result;
15130 poly8x16x2_t temp;
15131 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15132 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15133 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15134 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15135 : "=w"(result)
15136 : "Q"(temp), "w"(idx)
15137 : "v16", "v17", "memory");
15138 return result;
15139}
15140
15141__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15142vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
15143{
15144 int8x8_t result = r;
15145 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
15146 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15147 : "+w"(result)
15148 : "w"(temp), "w"(idx)
15149 : /* No clobbers */);
15150 return result;
15151}
15152
15153__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15154vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
15155{
15156 uint8x8_t result = r;
15157 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
15158 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15159 : "+w"(result)
15160 : "w"(temp), "w"(idx)
15161 : /* No clobbers */);
15162 return result;
15163}
15164
15165__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15166vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
15167{
15168 poly8x8_t result = r;
15169 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
15170 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
15171 : "+w"(result)
15172 : "w"(temp), "w"(idx)
15173 : /* No clobbers */);
15174 return result;
15175}
15176
15177__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15178vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
15179{
15180 int8x8_t result = r;
15181 int8x16x2_t temp;
15182 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
15183 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
15184 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15185 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15186 : "+w"(result)
15187 : "Q"(temp), "w"(idx)
15188 : "v16", "v17", "memory");
15189 return result;
15190}
15191
15192__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15193vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
15194{
15195 uint8x8_t result = r;
15196 uint8x16x2_t temp;
15197 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
15198 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
15199 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15200 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15201 : "+w"(result)
15202 : "Q"(temp), "w"(idx)
15203 : "v16", "v17", "memory");
15204 return result;
15205}
15206
15207__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15208vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
15209{
15210 poly8x8_t result = r;
15211 poly8x16x2_t temp;
15212 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
15213 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
15214 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
15215 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
15216 : "+w"(result)
15217 : "Q"(temp), "w"(idx)
15218 : "v16", "v17", "memory");
15219 return result;
15220}
15221
15222/* End of temporary inline asm. */
15223
15224/* Start of optimal implementations in approved order. */
15225
15226/* vabs */
15227
15228__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15229vabs_f32 (float32x2_t __a)
15230{
15231 return __builtin_aarch64_absv2sf (__a);
15232}
15233
15234__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15235vabs_f64 (float64x1_t __a)
15236{
15237 return __builtin_fabs (__a);
15238}
15239
15240__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15241vabs_s8 (int8x8_t __a)
15242{
15243 return __builtin_aarch64_absv8qi (__a);
15244}
15245
15246__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15247vabs_s16 (int16x4_t __a)
15248{
15249 return __builtin_aarch64_absv4hi (__a);
15250}
15251
15252__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15253vabs_s32 (int32x2_t __a)
15254{
15255 return __builtin_aarch64_absv2si (__a);
15256}
15257
15258__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15259vabs_s64 (int64x1_t __a)
15260{
15261 return __builtin_llabs (__a);
15262}
15263
15264__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15265vabsq_f32 (float32x4_t __a)
15266{
15267 return __builtin_aarch64_absv4sf (__a);
15268}
15269
15270__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15271vabsq_f64 (float64x2_t __a)
15272{
15273 return __builtin_aarch64_absv2df (__a);
15274}
15275
15276__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15277vabsq_s8 (int8x16_t __a)
15278{
15279 return __builtin_aarch64_absv16qi (__a);
15280}
15281
15282__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15283vabsq_s16 (int16x8_t __a)
15284{
15285 return __builtin_aarch64_absv8hi (__a);
15286}
15287
15288__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15289vabsq_s32 (int32x4_t __a)
15290{
15291 return __builtin_aarch64_absv4si (__a);
15292}
15293
15294__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15295vabsq_s64 (int64x2_t __a)
15296{
15297 return __builtin_aarch64_absv2di (__a);
15298}
15299
15300/* vadd */
15301
15302__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15303vaddd_s64 (int64x1_t __a, int64x1_t __b)
15304{
15305 return __a + __b;
15306}
15307
15308__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15309vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
15310{
15311 return __a + __b;
15312}
15313
15314/* vaddv */
15315
15316__extension__ static __inline int8_t __attribute__ ((__always_inline__))
15317vaddv_s8 (int8x8_t __a)
15318{
15319 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
15320}
15321
15322__extension__ static __inline int16_t __attribute__ ((__always_inline__))
15323vaddv_s16 (int16x4_t __a)
15324{
15325 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
15326}
15327
15328__extension__ static __inline int32_t __attribute__ ((__always_inline__))
15329vaddv_s32 (int32x2_t __a)
15330{
15331 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
15332}
15333
15334__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15335vaddv_u8 (uint8x8_t __a)
15336{
15337 return vget_lane_u8 ((uint8x8_t)
15338 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
15339 0);
15340}
15341
15342__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15343vaddv_u16 (uint16x4_t __a)
15344{
15345 return vget_lane_u16 ((uint16x4_t)
15346 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
15347 0);
15348}
15349
15350__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15351vaddv_u32 (uint32x2_t __a)
15352{
15353 return vget_lane_u32 ((uint32x2_t)
15354 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
15355 0);
15356}
15357
15358__extension__ static __inline int8_t __attribute__ ((__always_inline__))
15359vaddvq_s8 (int8x16_t __a)
15360{
15361 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
15362 0);
15363}
15364
15365__extension__ static __inline int16_t __attribute__ ((__always_inline__))
15366vaddvq_s16 (int16x8_t __a)
15367{
15368 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
15369}
15370
15371__extension__ static __inline int32_t __attribute__ ((__always_inline__))
15372vaddvq_s32 (int32x4_t __a)
15373{
15374 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
15375}
15376
15377__extension__ static __inline int64_t __attribute__ ((__always_inline__))
15378vaddvq_s64 (int64x2_t __a)
15379{
15380 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
15381}
15382
15383__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
15384vaddvq_u8 (uint8x16_t __a)
15385{
15386 return vgetq_lane_u8 ((uint8x16_t)
15387 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
15388 0);
15389}
15390
15391__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
15392vaddvq_u16 (uint16x8_t __a)
15393{
15394 return vgetq_lane_u16 ((uint16x8_t)
15395 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
15396 0);
15397}
15398
15399__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15400vaddvq_u32 (uint32x4_t __a)
15401{
15402 return vgetq_lane_u32 ((uint32x4_t)
15403 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
15404 0);
15405}
15406
15407__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15408vaddvq_u64 (uint64x2_t __a)
15409{
15410 return vgetq_lane_u64 ((uint64x2_t)
15411 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
15412 0);
15413}
15414
15415__extension__ static __inline float32_t __attribute__ ((__always_inline__))
15416vaddv_f32 (float32x2_t __a)
15417{
15418 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
15419 return vget_lane_f32 (__t, 0);
15420}
15421
15422__extension__ static __inline float32_t __attribute__ ((__always_inline__))
15423vaddvq_f32 (float32x4_t __a)
15424{
15425 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
15426 return vgetq_lane_f32 (__t, 0);
15427}
15428
15429__extension__ static __inline float64_t __attribute__ ((__always_inline__))
15430vaddvq_f64 (float64x2_t __a)
15431{
15432 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
15433 return vgetq_lane_f64 (__t, 0);
15434}
15435
15436/* vbsl */
15437
15438__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15439vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
15440{
15441 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
15442}
15443
15444__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15445vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
15446{
15447 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
15448}
15449
15450__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15451vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
15452{
15453 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
15454}
15455
15456__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15457vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
15458{
15459 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
15460}
15461
15462__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15463vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
15464{
15465 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
15466}
15467
15468__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15469vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
15470{
15471 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
15472}
15473
15474__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15475vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
15476{
15477 return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c);
15478}
15479
15480__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15481vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
15482{
15483 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
15484}
15485
15486__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15487vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
15488{
15489 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
15490}
15491
15492__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15493vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
15494{
15495 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
15496}
15497
15498__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15499vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
15500{
15501 return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c);
15502}
15503
15504__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15505vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
15506{
15507 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
15508}
15509
15510__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15511vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
15512{
15513 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
15514}
15515
15516__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15517vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
15518{
15519 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
15520}
15521
15522__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15523vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
15524{
15525 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
15526}
15527
15528__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15529vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
15530{
15531 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
15532}
15533
15534__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15535vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
15536{
15537 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
15538}
15539
15540__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15541vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
15542{
15543 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
15544}
15545
15546__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15547vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
15548{
15549 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
15550}
15551
15552__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15553vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
15554{
15555 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
15556}
15557
15558__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15559vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
15560{
15561 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
15562}
15563
15564__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15565vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
15566{
15567 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
15568}
15569
15570__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15571vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
15572{
15573 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
15574}
15575
15576#ifdef __ARM_FEATURE_CRYPTO
15577
15578/* vaes */
15579
15580static __inline uint8x16_t
15581vaeseq_u8 (uint8x16_t data, uint8x16_t key)
15582{
15583 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
15584}
15585
15586static __inline uint8x16_t
15587vaesdq_u8 (uint8x16_t data, uint8x16_t key)
15588{
15589 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
15590}
15591
15592static __inline uint8x16_t
15593vaesmcq_u8 (uint8x16_t data)
15594{
15595 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
15596}
15597
15598static __inline uint8x16_t
15599vaesimcq_u8 (uint8x16_t data)
15600{
15601 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
15602}
15603
15604#endif
15605
15606/* vcage */
15607
15608__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15609vcages_f32 (float32_t __a, float32_t __b)
15610{
15611 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
15612}
15613
15614__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15615vcage_f32 (float32x2_t __a, float32x2_t __b)
15616{
15617 return vabs_f32 (__a) >= vabs_f32 (__b);
15618}
15619
15620__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15621vcageq_f32 (float32x4_t __a, float32x4_t __b)
15622{
15623 return vabsq_f32 (__a) >= vabsq_f32 (__b);
15624}
15625
15626__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15627vcaged_f64 (float64_t __a, float64_t __b)
15628{
15629 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
15630}
15631
15632__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15633vcageq_f64 (float64x2_t __a, float64x2_t __b)
15634{
15635 return vabsq_f64 (__a) >= vabsq_f64 (__b);
15636}
15637
15638/* vcagt */
15639
15640__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15641vcagts_f32 (float32_t __a, float32_t __b)
15642{
15643 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
15644}
15645
15646__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15647vcagt_f32 (float32x2_t __a, float32x2_t __b)
15648{
15649 return vabs_f32 (__a) > vabs_f32 (__b);
15650}
15651
15652__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15653vcagtq_f32 (float32x4_t __a, float32x4_t __b)
15654{
15655 return vabsq_f32 (__a) > vabsq_f32 (__b);
15656}
15657
15658__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15659vcagtd_f64 (float64_t __a, float64_t __b)
15660{
15661 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
15662}
15663
15664__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15665vcagtq_f64 (float64x2_t __a, float64x2_t __b)
15666{
15667 return vabsq_f64 (__a) > vabsq_f64 (__b);
15668}
15669
15670/* vcale */
15671
15672__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15673vcale_f32 (float32x2_t __a, float32x2_t __b)
15674{
15675 return vabs_f32 (__a) <= vabs_f32 (__b);
15676}
15677
15678__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15679vcaleq_f32 (float32x4_t __a, float32x4_t __b)
15680{
15681 return vabsq_f32 (__a) <= vabsq_f32 (__b);
15682}
15683
15684__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15685vcaleq_f64 (float64x2_t __a, float64x2_t __b)
15686{
15687 return vabsq_f64 (__a) <= vabsq_f64 (__b);
15688}
15689
15690/* vcalt */
15691
15692__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15693vcalt_f32 (float32x2_t __a, float32x2_t __b)
15694{
15695 return vabs_f32 (__a) < vabs_f32 (__b);
15696}
15697
15698__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15699vcaltq_f32 (float32x4_t __a, float32x4_t __b)
15700{
15701 return vabsq_f32 (__a) < vabsq_f32 (__b);
15702}
15703
15704__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15705vcaltq_f64 (float64x2_t __a, float64x2_t __b)
15706{
15707 return vabsq_f64 (__a) < vabsq_f64 (__b);
15708}
15709
15710/* vceq - vector. */
15711
15712__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15713vceq_f32 (float32x2_t __a, float32x2_t __b)
15714{
15715 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15716}
15717
15718__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15719vceq_f64 (float64x1_t __a, float64x1_t __b)
15720{
15721 return __a == __b ? -1ll : 0ll;
15722}
15723
15724__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15725vceq_p8 (poly8x8_t __a, poly8x8_t __b)
15726{
15727 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15728 (int8x8_t) __b);
15729}
15730
15731__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15732vceq_s8 (int8x8_t __a, int8x8_t __b)
15733{
15734 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15735}
15736
15737__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15738vceq_s16 (int16x4_t __a, int16x4_t __b)
15739{
15740 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15741}
15742
15743__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15744vceq_s32 (int32x2_t __a, int32x2_t __b)
15745{
15746 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15747}
15748
15749__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15750vceq_s64 (int64x1_t __a, int64x1_t __b)
15751{
15752 return __a == __b ? -1ll : 0ll;
15753}
15754
15755__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15756vceq_u8 (uint8x8_t __a, uint8x8_t __b)
15757{
15758 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15759 (int8x8_t) __b);
15760}
15761
15762__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15763vceq_u16 (uint16x4_t __a, uint16x4_t __b)
15764{
15765 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15766 (int16x4_t) __b);
15767}
15768
15769__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15770vceq_u32 (uint32x2_t __a, uint32x2_t __b)
15771{
15772 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15773 (int32x2_t) __b);
15774}
15775
15776__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15777vceq_u64 (uint64x1_t __a, uint64x1_t __b)
15778{
15779 return __a == __b ? -1ll : 0ll;
15780}
15781
15782__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15783vceqq_f32 (float32x4_t __a, float32x4_t __b)
15784{
15785 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15786}
15787
15788__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15789vceqq_f64 (float64x2_t __a, float64x2_t __b)
15790{
15791 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15792}
15793
15794__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15795vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
15796{
15797 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15798 (int8x16_t) __b);
15799}
15800
15801__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15802vceqq_s8 (int8x16_t __a, int8x16_t __b)
15803{
15804 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15805}
15806
15807__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15808vceqq_s16 (int16x8_t __a, int16x8_t __b)
15809{
15810 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15811}
15812
15813__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15814vceqq_s32 (int32x4_t __a, int32x4_t __b)
15815{
15816 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
15817}
15818
15819__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15820vceqq_s64 (int64x2_t __a, int64x2_t __b)
15821{
15822 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
15823}
15824
15825__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15826vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
15827{
15828 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15829 (int8x16_t) __b);
15830}
15831
15832__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15833vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
15834{
15835 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
15836 (int16x8_t) __b);
15837}
15838
15839__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15840vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
15841{
15842 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
15843 (int32x4_t) __b);
15844}
15845
15846__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15847vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
15848{
15849 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
15850 (int64x2_t) __b);
15851}
15852
15853/* vceq - scalar. */
15854
15855__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15856vceqs_f32 (float32_t __a, float32_t __b)
15857{
15858 return __a == __b ? -1 : 0;
15859}
15860
15861__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15862vceqd_s64 (int64x1_t __a, int64x1_t __b)
15863{
15864 return __a == __b ? -1ll : 0ll;
15865}
15866
15867__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15868vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
15869{
15870 return __a == __b ? -1ll : 0ll;
15871}
15872
15873__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15874vceqd_f64 (float64_t __a, float64_t __b)
15875{
15876 return __a == __b ? -1ll : 0ll;
15877}
15878
15879/* vceqz - vector. */
15880
15881__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15882vceqz_f32 (float32x2_t __a)
15883{
15884 float32x2_t __b = {0.0f, 0.0f};
15885 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
15886}
15887
15888__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15889vceqz_f64 (float64x1_t __a)
15890{
15891 return __a == 0.0 ? -1ll : 0ll;
15892}
15893
15894__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15895vceqz_p8 (poly8x8_t __a)
15896{
15897 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15898 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15899 (int8x8_t) __b);
15900}
15901
15902__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15903vceqz_s8 (int8x8_t __a)
15904{
15905 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15906 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
15907}
15908
15909__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15910vceqz_s16 (int16x4_t __a)
15911{
15912 int16x4_t __b = {0, 0, 0, 0};
15913 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
15914}
15915
15916__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15917vceqz_s32 (int32x2_t __a)
15918{
15919 int32x2_t __b = {0, 0};
15920 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
15921}
15922
15923__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15924vceqz_s64 (int64x1_t __a)
15925{
15926 return __a == 0ll ? -1ll : 0ll;
15927}
15928
15929__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15930vceqz_u8 (uint8x8_t __a)
15931{
15932 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15933 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
15934 (int8x8_t) __b);
15935}
15936
15937__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15938vceqz_u16 (uint16x4_t __a)
15939{
15940 uint16x4_t __b = {0, 0, 0, 0};
15941 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
15942 (int16x4_t) __b);
15943}
15944
15945__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15946vceqz_u32 (uint32x2_t __a)
15947{
15948 uint32x2_t __b = {0, 0};
15949 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
15950 (int32x2_t) __b);
15951}
15952
15953__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15954vceqz_u64 (uint64x1_t __a)
15955{
15956 return __a == 0ll ? -1ll : 0ll;
15957}
15958
15959__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15960vceqzq_f32 (float32x4_t __a)
15961{
15962 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15963 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
15964}
15965
15966__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15967vceqzq_f64 (float64x2_t __a)
15968{
15969 float64x2_t __b = {0.0, 0.0};
15970 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
15971}
15972
15973__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15974vceqzq_p8 (poly8x16_t __a)
15975{
15976 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15977 0, 0, 0, 0, 0, 0, 0, 0};
15978 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
15979 (int8x16_t) __b);
15980}
15981
15982__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15983vceqzq_s8 (int8x16_t __a)
15984{
15985 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15986 0, 0, 0, 0, 0, 0, 0, 0};
15987 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
15988}
15989
15990__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15991vceqzq_s16 (int16x8_t __a)
15992{
15993 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15994 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
15995}
15996
15997__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15998vceqzq_s32 (int32x4_t __a)
15999{
16000 int32x4_t __b = {0, 0, 0, 0};
16001 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
16002}
16003
16004__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16005vceqzq_s64 (int64x2_t __a)
16006{
16007 int64x2_t __b = {0, 0};
16008 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
16009}
16010
16011__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16012vceqzq_u8 (uint8x16_t __a)
16013{
16014 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16015 0, 0, 0, 0, 0, 0, 0, 0};
16016 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
16017 (int8x16_t) __b);
16018}
16019
16020__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16021vceqzq_u16 (uint16x8_t __a)
16022{
16023 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16024 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
16025 (int16x8_t) __b);
16026}
16027
16028__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16029vceqzq_u32 (uint32x4_t __a)
16030{
16031 uint32x4_t __b = {0, 0, 0, 0};
16032 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
16033 (int32x4_t) __b);
16034}
16035
16036__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16037vceqzq_u64 (uint64x2_t __a)
16038{
16039 uint64x2_t __b = {0, 0};
16040 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
16041 (int64x2_t) __b);
16042}
16043
16044/* vceqz - scalar. */
16045
16046__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16047vceqzs_f32 (float32_t __a)
16048{
16049 return __a == 0.0f ? -1 : 0;
16050}
16051
16052__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16053vceqzd_s64 (int64x1_t __a)
16054{
16055 return __a == 0 ? -1ll : 0ll;
16056}
16057
16058__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16059vceqzd_u64 (int64x1_t __a)
16060{
16061 return __a == 0 ? -1ll : 0ll;
16062}
16063
16064__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16065vceqzd_f64 (float64_t __a)
16066{
16067 return __a == 0.0 ? -1ll : 0ll;
16068}
16069
16070/* vcge - vector. */
16071
16072__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16073vcge_f32 (float32x2_t __a, float32x2_t __b)
16074{
16075 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16076}
16077
16078__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16079vcge_f64 (float64x1_t __a, float64x1_t __b)
16080{
16081 return __a >= __b ? -1ll : 0ll;
16082}
16083
16084__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16085vcge_p8 (poly8x8_t __a, poly8x8_t __b)
16086{
16087 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16088 (int8x8_t) __b);
16089}
16090
16091__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16092vcge_s8 (int8x8_t __a, int8x8_t __b)
16093{
16094 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16095}
16096
16097__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16098vcge_s16 (int16x4_t __a, int16x4_t __b)
16099{
16100 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16101}
16102
16103__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16104vcge_s32 (int32x2_t __a, int32x2_t __b)
16105{
16106 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16107}
16108
16109__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16110vcge_s64 (int64x1_t __a, int64x1_t __b)
16111{
16112 return __a >= __b ? -1ll : 0ll;
16113}
16114
16115__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16116vcge_u8 (uint8x8_t __a, uint8x8_t __b)
16117{
16118 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16119 (int8x8_t) __b);
16120}
16121
16122__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16123vcge_u16 (uint16x4_t __a, uint16x4_t __b)
16124{
16125 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16126 (int16x4_t) __b);
16127}
16128
16129__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16130vcge_u32 (uint32x2_t __a, uint32x2_t __b)
16131{
16132 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16133 (int32x2_t) __b);
16134}
16135
16136__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16137vcge_u64 (uint64x1_t __a, uint64x1_t __b)
16138{
16139 return __a >= __b ? -1ll : 0ll;
16140}
16141
16142__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16143vcgeq_f32 (float32x4_t __a, float32x4_t __b)
16144{
16145 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16146}
16147
16148__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16149vcgeq_f64 (float64x2_t __a, float64x2_t __b)
16150{
16151 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16152}
16153
16154__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16155vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
16156{
16157 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16158 (int8x16_t) __b);
16159}
16160
16161__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16162vcgeq_s8 (int8x16_t __a, int8x16_t __b)
16163{
16164 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16165}
16166
16167__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16168vcgeq_s16 (int16x8_t __a, int16x8_t __b)
16169{
16170 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16171}
16172
16173__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16174vcgeq_s32 (int32x4_t __a, int32x4_t __b)
16175{
16176 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16177}
16178
16179__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16180vcgeq_s64 (int64x2_t __a, int64x2_t __b)
16181{
16182 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16183}
16184
16185__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16186vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
16187{
16188 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16189 (int8x16_t) __b);
16190}
16191
16192__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16193vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
16194{
16195 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16196 (int16x8_t) __b);
16197}
16198
16199__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16200vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
16201{
16202 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16203 (int32x4_t) __b);
16204}
16205
16206__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16207vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
16208{
16209 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16210 (int64x2_t) __b);
16211}
16212
16213/* vcge - scalar. */
16214
16215__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16216vcges_f32 (float32_t __a, float32_t __b)
16217{
16218 return __a >= __b ? -1 : 0;
16219}
16220
16221__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16222vcged_s64 (int64x1_t __a, int64x1_t __b)
16223{
16224 return __a >= __b ? -1ll : 0ll;
16225}
16226
16227__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16228vcged_u64 (uint64x1_t __a, uint64x1_t __b)
16229{
16230 return __a >= __b ? -1ll : 0ll;
16231}
16232
16233__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16234vcged_f64 (float64_t __a, float64_t __b)
16235{
16236 return __a >= __b ? -1ll : 0ll;
16237}
16238
16239/* vcgez - vector. */
16240
16241__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16242vcgez_f32 (float32x2_t __a)
16243{
16244 float32x2_t __b = {0.0f, 0.0f};
16245 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
16246}
16247
16248__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16249vcgez_f64 (float64x1_t __a)
16250{
16251 return __a >= 0.0 ? -1ll : 0ll;
16252}
16253
16254__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16255vcgez_p8 (poly8x8_t __a)
16256{
16257 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16258 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
16259 (int8x8_t) __b);
16260}
16261
16262__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16263vcgez_s8 (int8x8_t __a)
16264{
16265 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16266 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
16267}
16268
16269__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16270vcgez_s16 (int16x4_t __a)
16271{
16272 int16x4_t __b = {0, 0, 0, 0};
16273 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
16274}
16275
16276__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16277vcgez_s32 (int32x2_t __a)
16278{
16279 int32x2_t __b = {0, 0};
16280 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
16281}
16282
16283__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16284vcgez_s64 (int64x1_t __a)
16285{
16286 return __a >= 0ll ? -1ll : 0ll;
16287}
16288
16289__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16290vcgez_u8 (uint8x8_t __a)
16291{
16292 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16293 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
16294 (int8x8_t) __b);
16295}
16296
16297__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16298vcgez_u16 (uint16x4_t __a)
16299{
16300 uint16x4_t __b = {0, 0, 0, 0};
16301 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
16302 (int16x4_t) __b);
16303}
16304
16305__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16306vcgez_u32 (uint32x2_t __a)
16307{
16308 uint32x2_t __b = {0, 0};
16309 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
16310 (int32x2_t) __b);
16311}
16312
16313__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16314vcgez_u64 (uint64x1_t __a)
16315{
16316 return __a >= 0ll ? -1ll : 0ll;
16317}
16318
16319__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16320vcgezq_f32 (float32x4_t __a)
16321{
16322 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16323 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
16324}
16325
16326__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16327vcgezq_f64 (float64x2_t __a)
16328{
16329 float64x2_t __b = {0.0, 0.0};
16330 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
16331}
16332
16333__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16334vcgezq_p8 (poly8x16_t __a)
16335{
16336 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16337 0, 0, 0, 0, 0, 0, 0, 0};
16338 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
16339 (int8x16_t) __b);
16340}
16341
16342__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16343vcgezq_s8 (int8x16_t __a)
16344{
16345 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16346 0, 0, 0, 0, 0, 0, 0, 0};
16347 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
16348}
16349
16350__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16351vcgezq_s16 (int16x8_t __a)
16352{
16353 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16354 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
16355}
16356
16357__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16358vcgezq_s32 (int32x4_t __a)
16359{
16360 int32x4_t __b = {0, 0, 0, 0};
16361 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
16362}
16363
16364__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16365vcgezq_s64 (int64x2_t __a)
16366{
16367 int64x2_t __b = {0, 0};
16368 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
16369}
16370
16371__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16372vcgezq_u8 (uint8x16_t __a)
16373{
16374 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16375 0, 0, 0, 0, 0, 0, 0, 0};
16376 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
16377 (int8x16_t) __b);
16378}
16379
16380__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16381vcgezq_u16 (uint16x8_t __a)
16382{
16383 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16384 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
16385 (int16x8_t) __b);
16386}
16387
16388__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16389vcgezq_u32 (uint32x4_t __a)
16390{
16391 uint32x4_t __b = {0, 0, 0, 0};
16392 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
16393 (int32x4_t) __b);
16394}
16395
16396__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16397vcgezq_u64 (uint64x2_t __a)
16398{
16399 uint64x2_t __b = {0, 0};
16400 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
16401 (int64x2_t) __b);
16402}
16403
16404/* vcgez - scalar. */
16405
16406__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16407vcgezs_f32 (float32_t __a)
16408{
16409 return __a >= 0.0f ? -1 : 0;
16410}
16411
16412__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16413vcgezd_s64 (int64x1_t __a)
16414{
16415 return __a >= 0 ? -1ll : 0ll;
16416}
16417
16418__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16419vcgezd_u64 (int64x1_t __a)
16420{
16421 return __a >= 0 ? -1ll : 0ll;
16422}
16423
16424__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16425vcgezd_f64 (float64_t __a)
16426{
16427 return __a >= 0.0 ? -1ll : 0ll;
16428}
16429
16430/* vcgt - vector. */
16431
16432__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16433vcgt_f32 (float32x2_t __a, float32x2_t __b)
16434{
16435 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16436}
16437
16438__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16439vcgt_f64 (float64x1_t __a, float64x1_t __b)
16440{
16441 return __a > __b ? -1ll : 0ll;
16442}
16443
16444__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16445vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
16446{
16447 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16448 (int8x8_t) __b);
16449}
16450
16451__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16452vcgt_s8 (int8x8_t __a, int8x8_t __b)
16453{
16454 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16455}
16456
16457__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16458vcgt_s16 (int16x4_t __a, int16x4_t __b)
16459{
16460 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16461}
16462
16463__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16464vcgt_s32 (int32x2_t __a, int32x2_t __b)
16465{
16466 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16467}
16468
16469__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16470vcgt_s64 (int64x1_t __a, int64x1_t __b)
16471{
16472 return __a > __b ? -1ll : 0ll;
16473}
16474
16475__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16476vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
16477{
16478 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16479 (int8x8_t) __b);
16480}
16481
16482__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16483vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
16484{
16485 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16486 (int16x4_t) __b);
16487}
16488
16489__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16490vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
16491{
16492 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16493 (int32x2_t) __b);
16494}
16495
16496__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16497vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
16498{
16499 return __a > __b ? -1ll : 0ll;
16500}
16501
16502__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16503vcgtq_f32 (float32x4_t __a, float32x4_t __b)
16504{
16505 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16506}
16507
16508__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16509vcgtq_f64 (float64x2_t __a, float64x2_t __b)
16510{
16511 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16512}
16513
16514__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16515vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
16516{
16517 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16518 (int8x16_t) __b);
16519}
16520
16521__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16522vcgtq_s8 (int8x16_t __a, int8x16_t __b)
16523{
16524 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16525}
16526
16527__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16528vcgtq_s16 (int16x8_t __a, int16x8_t __b)
16529{
16530 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16531}
16532
16533__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16534vcgtq_s32 (int32x4_t __a, int32x4_t __b)
16535{
16536 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16537}
16538
16539__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16540vcgtq_s64 (int64x2_t __a, int64x2_t __b)
16541{
16542 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16543}
16544
16545__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16546vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
16547{
16548 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16549 (int8x16_t) __b);
16550}
16551
16552__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16553vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
16554{
16555 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16556 (int16x8_t) __b);
16557}
16558
16559__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16560vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
16561{
16562 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16563 (int32x4_t) __b);
16564}
16565
16566__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16567vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
16568{
16569 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16570 (int64x2_t) __b);
16571}
16572
16573/* vcgt - scalar. */
16574
16575__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16576vcgts_f32 (float32_t __a, float32_t __b)
16577{
16578 return __a > __b ? -1 : 0;
16579}
16580
16581__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16582vcgtd_s64 (int64x1_t __a, int64x1_t __b)
16583{
16584 return __a > __b ? -1ll : 0ll;
16585}
16586
16587__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16588vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
16589{
16590 return __a > __b ? -1ll : 0ll;
16591}
16592
16593__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16594vcgtd_f64 (float64_t __a, float64_t __b)
16595{
16596 return __a > __b ? -1ll : 0ll;
16597}
16598
16599/* vcgtz - vector. */
16600
16601__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16602vcgtz_f32 (float32x2_t __a)
16603{
16604 float32x2_t __b = {0.0f, 0.0f};
16605 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
16606}
16607
16608__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16609vcgtz_f64 (float64x1_t __a)
16610{
16611 return __a > 0.0 ? -1ll : 0ll;
16612}
16613
16614__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16615vcgtz_p8 (poly8x8_t __a)
16616{
16617 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16618 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
16619 (int8x8_t) __b);
16620}
16621
16622__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16623vcgtz_s8 (int8x8_t __a)
16624{
16625 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16626 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
16627}
16628
16629__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16630vcgtz_s16 (int16x4_t __a)
16631{
16632 int16x4_t __b = {0, 0, 0, 0};
16633 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
16634}
16635
16636__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16637vcgtz_s32 (int32x2_t __a)
16638{
16639 int32x2_t __b = {0, 0};
16640 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
16641}
16642
16643__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16644vcgtz_s64 (int64x1_t __a)
16645{
16646 return __a > 0ll ? -1ll : 0ll;
16647}
16648
16649__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16650vcgtz_u8 (uint8x8_t __a)
16651{
16652 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16653 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
16654 (int8x8_t) __b);
16655}
16656
16657__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16658vcgtz_u16 (uint16x4_t __a)
16659{
16660 uint16x4_t __b = {0, 0, 0, 0};
16661 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
16662 (int16x4_t) __b);
16663}
16664
16665__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16666vcgtz_u32 (uint32x2_t __a)
16667{
16668 uint32x2_t __b = {0, 0};
16669 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
16670 (int32x2_t) __b);
16671}
16672
16673__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16674vcgtz_u64 (uint64x1_t __a)
16675{
16676 return __a > 0ll ? -1ll : 0ll;
16677}
16678
16679__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16680vcgtzq_f32 (float32x4_t __a)
16681{
16682 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
16683 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
16684}
16685
16686__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16687vcgtzq_f64 (float64x2_t __a)
16688{
16689 float64x2_t __b = {0.0, 0.0};
16690 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
16691}
16692
16693__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16694vcgtzq_p8 (poly8x16_t __a)
16695{
16696 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16697 0, 0, 0, 0, 0, 0, 0, 0};
16698 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
16699 (int8x16_t) __b);
16700}
16701
16702__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16703vcgtzq_s8 (int8x16_t __a)
16704{
16705 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16706 0, 0, 0, 0, 0, 0, 0, 0};
16707 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
16708}
16709
16710__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16711vcgtzq_s16 (int16x8_t __a)
16712{
16713 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16714 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
16715}
16716
16717__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16718vcgtzq_s32 (int32x4_t __a)
16719{
16720 int32x4_t __b = {0, 0, 0, 0};
16721 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
16722}
16723
16724__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16725vcgtzq_s64 (int64x2_t __a)
16726{
16727 int64x2_t __b = {0, 0};
16728 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
16729}
16730
16731__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16732vcgtzq_u8 (uint8x16_t __a)
16733{
16734 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
16735 0, 0, 0, 0, 0, 0, 0, 0};
16736 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
16737 (int8x16_t) __b);
16738}
16739
16740__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16741vcgtzq_u16 (uint16x8_t __a)
16742{
16743 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16744 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
16745 (int16x8_t) __b);
16746}
16747
16748__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16749vcgtzq_u32 (uint32x4_t __a)
16750{
16751 uint32x4_t __b = {0, 0, 0, 0};
16752 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
16753 (int32x4_t) __b);
16754}
16755
16756__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16757vcgtzq_u64 (uint64x2_t __a)
16758{
16759 uint64x2_t __b = {0, 0};
16760 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
16761 (int64x2_t) __b);
16762}
16763
16764/* vcgtz - scalar. */
16765
16766__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16767vcgtzs_f32 (float32_t __a)
16768{
16769 return __a > 0.0f ? -1 : 0;
16770}
16771
16772__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16773vcgtzd_s64 (int64x1_t __a)
16774{
16775 return __a > 0 ? -1ll : 0ll;
16776}
16777
16778__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16779vcgtzd_u64 (int64x1_t __a)
16780{
16781 return __a > 0 ? -1ll : 0ll;
16782}
16783
16784__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16785vcgtzd_f64 (float64_t __a)
16786{
16787 return __a > 0.0 ? -1ll : 0ll;
16788}
16789
16790/* vcle - vector. */
16791
16792__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16793vcle_f32 (float32x2_t __a, float32x2_t __b)
16794{
16795 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
16796}
16797
16798__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16799vcle_f64 (float64x1_t __a, float64x1_t __b)
16800{
16801 return __a <= __b ? -1ll : 0ll;
16802}
16803
16804__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16805vcle_p8 (poly8x8_t __a, poly8x8_t __b)
16806{
16807 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
16808 (int8x8_t) __a);
16809}
16810
16811__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16812vcle_s8 (int8x8_t __a, int8x8_t __b)
16813{
16814 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
16815}
16816
16817__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16818vcle_s16 (int16x4_t __a, int16x4_t __b)
16819{
16820 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
16821}
16822
16823__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16824vcle_s32 (int32x2_t __a, int32x2_t __b)
16825{
16826 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
16827}
16828
16829__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16830vcle_s64 (int64x1_t __a, int64x1_t __b)
16831{
16832 return __a <= __b ? -1ll : 0ll;
16833}
16834
16835__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16836vcle_u8 (uint8x8_t __a, uint8x8_t __b)
16837{
16838 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
16839 (int8x8_t) __a);
16840}
16841
16842__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16843vcle_u16 (uint16x4_t __a, uint16x4_t __b)
16844{
16845 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
16846 (int16x4_t) __a);
16847}
16848
16849__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16850vcle_u32 (uint32x2_t __a, uint32x2_t __b)
16851{
16852 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
16853 (int32x2_t) __a);
16854}
16855
16856__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16857vcle_u64 (uint64x1_t __a, uint64x1_t __b)
16858{
16859 return __a <= __b ? -1ll : 0ll;
16860}
16861
16862__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16863vcleq_f32 (float32x4_t __a, float32x4_t __b)
16864{
16865 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
16866}
16867
16868__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16869vcleq_f64 (float64x2_t __a, float64x2_t __b)
16870{
16871 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
16872}
16873
16874__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16875vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
16876{
16877 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
16878 (int8x16_t) __a);
16879}
16880
16881__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16882vcleq_s8 (int8x16_t __a, int8x16_t __b)
16883{
16884 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
16885}
16886
16887__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16888vcleq_s16 (int16x8_t __a, int16x8_t __b)
16889{
16890 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
16891}
16892
16893__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16894vcleq_s32 (int32x4_t __a, int32x4_t __b)
16895{
16896 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
16897}
16898
16899__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16900vcleq_s64 (int64x2_t __a, int64x2_t __b)
16901{
16902 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
16903}
16904
16905__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16906vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
16907{
16908 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
16909 (int8x16_t) __a);
16910}
16911
16912__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16913vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
16914{
16915 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
16916 (int16x8_t) __a);
16917}
16918
16919__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16920vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
16921{
16922 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
16923 (int32x4_t) __a);
16924}
16925
16926__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16927vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
16928{
16929 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
16930 (int64x2_t) __a);
16931}
16932
16933/* vcle - scalar. */
16934
16935__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16936vcles_f32 (float32_t __a, float32_t __b)
16937{
16938 return __a <= __b ? -1 : 0;
16939}
16940
16941__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16942vcled_s64 (int64x1_t __a, int64x1_t __b)
16943{
16944 return __a <= __b ? -1ll : 0ll;
16945}
16946
16947__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16948vcled_u64 (uint64x1_t __a, uint64x1_t __b)
16949{
16950 return __a <= __b ? -1ll : 0ll;
16951}
16952
16953__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16954vcled_f64 (float64_t __a, float64_t __b)
16955{
16956 return __a <= __b ? -1ll : 0ll;
16957}
16958
16959/* vclez - vector. */
16960
16961__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16962vclez_f32 (float32x2_t __a)
16963{
16964 float32x2_t __b = {0.0f, 0.0f};
16965 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
16966}
16967
16968__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16969vclez_f64 (float64x1_t __a)
16970{
16971 return __a <= 0.0 ? -1ll : 0ll;
16972}
16973
16974__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16975vclez_p8 (poly8x8_t __a)
16976{
16977 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16978 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
16979 (int8x8_t) __b);
16980}
16981
16982__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16983vclez_s8 (int8x8_t __a)
16984{
16985 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
16986 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
16987}
16988
16989__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16990vclez_s16 (int16x4_t __a)
16991{
16992 int16x4_t __b = {0, 0, 0, 0};
16993 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
16994}
16995
16996__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16997vclez_s32 (int32x2_t __a)
16998{
16999 int32x2_t __b = {0, 0};
17000 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
17001}
17002
17003__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17004vclez_s64 (int64x1_t __a)
17005{
17006 return __a <= 0ll ? -1ll : 0ll;
17007}
17008
17009__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17010vclez_u64 (uint64x1_t __a)
17011{
17012 return __a <= 0ll ? -1ll : 0ll;
17013}
17014
17015__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17016vclezq_f32 (float32x4_t __a)
17017{
17018 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17019 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
17020}
17021
17022__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17023vclezq_f64 (float64x2_t __a)
17024{
17025 float64x2_t __b = {0.0, 0.0};
17026 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
17027}
17028
17029__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17030vclezq_p8 (poly8x16_t __a)
17031{
17032 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17033 0, 0, 0, 0, 0, 0, 0, 0};
17034 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
17035 (int8x16_t) __b);
17036}
17037
17038__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17039vclezq_s8 (int8x16_t __a)
17040{
17041 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17042 0, 0, 0, 0, 0, 0, 0, 0};
17043 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
17044}
17045
17046__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17047vclezq_s16 (int16x8_t __a)
17048{
17049 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17050 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
17051}
17052
17053__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17054vclezq_s32 (int32x4_t __a)
17055{
17056 int32x4_t __b = {0, 0, 0, 0};
17057 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
17058}
17059
17060__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17061vclezq_s64 (int64x2_t __a)
17062{
17063 int64x2_t __b = {0, 0};
17064 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
17065}
17066
17067/* vclez - scalar. */
17068
17069__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17070vclezs_f32 (float32_t __a)
17071{
17072 return __a <= 0.0f ? -1 : 0;
17073}
17074
17075__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17076vclezd_s64 (int64x1_t __a)
17077{
17078 return __a <= 0 ? -1ll : 0ll;
17079}
17080
17081__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17082vclezd_u64 (int64x1_t __a)
17083{
17084 return __a <= 0 ? -1ll : 0ll;
17085}
17086
17087__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17088vclezd_f64 (float64_t __a)
17089{
17090 return __a <= 0.0 ? -1ll : 0ll;
17091}
17092
17093/* vclt - vector. */
17094
17095__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17096vclt_f32 (float32x2_t __a, float32x2_t __b)
17097{
17098 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
17099}
17100
17101__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17102vclt_f64 (float64x1_t __a, float64x1_t __b)
17103{
17104 return __a < __b ? -1ll : 0ll;
17105}
17106
17107__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17108vclt_p8 (poly8x8_t __a, poly8x8_t __b)
17109{
17110 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
17111 (int8x8_t) __a);
17112}
17113
17114__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17115vclt_s8 (int8x8_t __a, int8x8_t __b)
17116{
17117 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
17118}
17119
17120__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17121vclt_s16 (int16x4_t __a, int16x4_t __b)
17122{
17123 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
17124}
17125
17126__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17127vclt_s32 (int32x2_t __a, int32x2_t __b)
17128{
17129 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
17130}
17131
17132__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17133vclt_s64 (int64x1_t __a, int64x1_t __b)
17134{
17135 return __a < __b ? -1ll : 0ll;
17136}
17137
17138__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17139vclt_u8 (uint8x8_t __a, uint8x8_t __b)
17140{
17141 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
17142 (int8x8_t) __a);
17143}
17144
17145__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17146vclt_u16 (uint16x4_t __a, uint16x4_t __b)
17147{
17148 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
17149 (int16x4_t) __a);
17150}
17151
17152__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17153vclt_u32 (uint32x2_t __a, uint32x2_t __b)
17154{
17155 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
17156 (int32x2_t) __a);
17157}
17158
17159__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17160vclt_u64 (uint64x1_t __a, uint64x1_t __b)
17161{
17162 return __a < __b ? -1ll : 0ll;
17163}
17164
17165__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17166vcltq_f32 (float32x4_t __a, float32x4_t __b)
17167{
17168 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
17169}
17170
17171__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17172vcltq_f64 (float64x2_t __a, float64x2_t __b)
17173{
17174 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
17175}
17176
17177__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17178vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
17179{
17180 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
17181 (int8x16_t) __a);
17182}
17183
17184__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17185vcltq_s8 (int8x16_t __a, int8x16_t __b)
17186{
17187 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
17188}
17189
17190__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17191vcltq_s16 (int16x8_t __a, int16x8_t __b)
17192{
17193 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
17194}
17195
17196__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17197vcltq_s32 (int32x4_t __a, int32x4_t __b)
17198{
17199 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
17200}
17201
17202__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17203vcltq_s64 (int64x2_t __a, int64x2_t __b)
17204{
17205 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
17206}
17207
17208__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17209vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
17210{
17211 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
17212 (int8x16_t) __a);
17213}
17214
17215__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17216vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
17217{
17218 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
17219 (int16x8_t) __a);
17220}
17221
17222__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17223vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
17224{
17225 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
17226 (int32x4_t) __a);
17227}
17228
17229__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17230vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
17231{
17232 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
17233 (int64x2_t) __a);
17234}
17235
17236/* vclt - scalar. */
17237
17238__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17239vclts_f32 (float32_t __a, float32_t __b)
17240{
17241 return __a < __b ? -1 : 0;
17242}
17243
17244__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17245vcltd_s64 (int64x1_t __a, int64x1_t __b)
17246{
17247 return __a < __b ? -1ll : 0ll;
17248}
17249
17250__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17251vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
17252{
17253 return __a < __b ? -1ll : 0ll;
17254}
17255
17256__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17257vcltd_f64 (float64_t __a, float64_t __b)
17258{
17259 return __a < __b ? -1ll : 0ll;
17260}
17261
17262/* vcltz - vector. */
17263
17264__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17265vcltz_f32 (float32x2_t __a)
17266{
17267 float32x2_t __b = {0.0f, 0.0f};
17268 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
17269}
17270
17271__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17272vcltz_f64 (float64x1_t __a)
17273{
17274 return __a < 0.0 ? -1ll : 0ll;
17275}
17276
17277__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17278vcltz_p8 (poly8x8_t __a)
17279{
17280 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17281 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
17282 (int8x8_t) __b);
17283}
17284
17285__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17286vcltz_s8 (int8x8_t __a)
17287{
17288 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17289 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
17290}
17291
17292__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17293vcltz_s16 (int16x4_t __a)
17294{
17295 int16x4_t __b = {0, 0, 0, 0};
17296 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
17297}
17298
17299__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17300vcltz_s32 (int32x2_t __a)
17301{
17302 int32x2_t __b = {0, 0};
17303 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
17304}
17305
17306__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17307vcltz_s64 (int64x1_t __a)
17308{
17309 return __a < 0ll ? -1ll : 0ll;
17310}
17311
17312__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17313vcltzq_f32 (float32x4_t __a)
17314{
17315 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17316 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
17317}
17318
17319__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17320vcltzq_f64 (float64x2_t __a)
17321{
17322 float64x2_t __b = {0.0, 0.0};
17323 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
17324}
17325
17326__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17327vcltzq_p8 (poly8x16_t __a)
17328{
17329 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17330 0, 0, 0, 0, 0, 0, 0, 0};
17331 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
17332 (int8x16_t) __b);
17333}
17334
17335__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17336vcltzq_s8 (int8x16_t __a)
17337{
17338 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17339 0, 0, 0, 0, 0, 0, 0, 0};
17340 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
17341}
17342
17343__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17344vcltzq_s16 (int16x8_t __a)
17345{
17346 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17347 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
17348}
17349
17350__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17351vcltzq_s32 (int32x4_t __a)
17352{
17353 int32x4_t __b = {0, 0, 0, 0};
17354 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
17355}
17356
17357__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17358vcltzq_s64 (int64x2_t __a)
17359{
17360 int64x2_t __b = {0, 0};
17361 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
17362}
17363
17364/* vcltz - scalar. */
17365
17366__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17367vcltzs_f32 (float32_t __a)
17368{
17369 return __a < 0.0f ? -1 : 0;
17370}
17371
17372__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17373vcltzd_s64 (int64x1_t __a)
17374{
17375 return __a < 0 ? -1ll : 0ll;
17376}
17377
17378__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17379vcltzd_u64 (int64x1_t __a)
17380{
17381 return __a < 0 ? -1ll : 0ll;
17382}
17383
17384__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17385vcltzd_f64 (float64_t __a)
17386{
17387 return __a < 0.0 ? -1ll : 0ll;
17388}
17389
17390/* vclz. */
17391
17392__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17393vclz_s8 (int8x8_t __a)
17394{
17395 return __builtin_aarch64_clzv8qi (__a);
17396}
17397
17398__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17399vclz_s16 (int16x4_t __a)
17400{
17401 return __builtin_aarch64_clzv4hi (__a);
17402}
17403
17404__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17405vclz_s32 (int32x2_t __a)
17406{
17407 return __builtin_aarch64_clzv2si (__a);
17408}
17409
17410__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17411vclz_u8 (uint8x8_t __a)
17412{
17413 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
17414}
17415
17416__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17417vclz_u16 (uint16x4_t __a)
17418{
17419 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
17420}
17421
17422__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17423vclz_u32 (uint32x2_t __a)
17424{
17425 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
17426}
17427
17428__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17429vclzq_s8 (int8x16_t __a)
17430{
17431 return __builtin_aarch64_clzv16qi (__a);
17432}
17433
17434__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17435vclzq_s16 (int16x8_t __a)
17436{
17437 return __builtin_aarch64_clzv8hi (__a);
17438}
17439
17440__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17441vclzq_s32 (int32x4_t __a)
17442{
17443 return __builtin_aarch64_clzv4si (__a);
17444}
17445
17446__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17447vclzq_u8 (uint8x16_t __a)
17448{
17449 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
17450}
17451
17452__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17453vclzq_u16 (uint16x8_t __a)
17454{
17455 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
17456}
17457
17458__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17459vclzq_u32 (uint32x4_t __a)
17460{
17461 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
17462}
17463
17464/* vcvt (double -> float). */
17465
17466__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17467vcvt_f32_f64 (float64x2_t __a)
17468{
17469 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
17470}
17471
17472__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17473vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
17474{
17475 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
17476}
17477
17478/* vcvt (float -> double). */
17479
17480__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17481vcvt_f64_f32 (float32x2_t __a)
17482{
17483
17484 return __builtin_aarch64_float_extend_lo_v2df (__a);
17485}
17486
17487__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17488vcvt_high_f64_f32 (float32x4_t __a)
17489{
17490 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
17491}
17492
17493/* vcvt (<u>int -> float) */
17494
17495__extension__ static __inline float64_t __attribute__ ((__always_inline__))
17496vcvtd_f64_s64 (int64_t __a)
17497{
17498 return (float64_t) __a;
17499}
17500
17501__extension__ static __inline float64_t __attribute__ ((__always_inline__))
17502vcvtd_f64_u64 (uint64_t __a)
17503{
17504 return (float64_t) __a;
17505}
17506
17507__extension__ static __inline float32_t __attribute__ ((__always_inline__))
17508vcvts_f32_s32 (int32_t __a)
17509{
17510 return (float32_t) __a;
17511}
17512
17513__extension__ static __inline float32_t __attribute__ ((__always_inline__))
17514vcvts_f32_u32 (uint32_t __a)
17515{
17516 return (float32_t) __a;
17517}
17518
17519__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17520vcvt_f32_s32 (int32x2_t __a)
17521{
17522 return __builtin_aarch64_floatv2siv2sf (__a);
17523}
17524
17525__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17526vcvt_f32_u32 (uint32x2_t __a)
17527{
17528 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
17529}
17530
17531__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17532vcvtq_f32_s32 (int32x4_t __a)
17533{
17534 return __builtin_aarch64_floatv4siv4sf (__a);
17535}
17536
17537__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17538vcvtq_f32_u32 (uint32x4_t __a)
17539{
17540 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
17541}
17542
17543__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17544vcvtq_f64_s64 (int64x2_t __a)
17545{
17546 return __builtin_aarch64_floatv2div2df (__a);
17547}
17548
17549__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17550vcvtq_f64_u64 (uint64x2_t __a)
17551{
17552 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
17553}
17554
17555/* vcvt (float -> <u>int) */
17556
17557__extension__ static __inline int64_t __attribute__ ((__always_inline__))
17558vcvtd_s64_f64 (float64_t __a)
17559{
17560 return (int64_t) __a;
17561}
17562
17563__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17564vcvtd_u64_f64 (float64_t __a)
17565{
17566 return (uint64_t) __a;
17567}
17568
17569__extension__ static __inline int32_t __attribute__ ((__always_inline__))
17570vcvts_s32_f32 (float32_t __a)
17571{
17572 return (int32_t) __a;
17573}
17574
17575__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17576vcvts_u32_f32 (float32_t __a)
17577{
17578 return (uint32_t) __a;
17579}
17580
17581__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17582vcvt_s32_f32 (float32x2_t __a)
17583{
17584 return __builtin_aarch64_lbtruncv2sfv2si (__a);
17585}
17586
17587__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17588vcvt_u32_f32 (float32x2_t __a)
17589{
17590 /* TODO: This cast should go away when builtins have
17591 their correct types. */
17592 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
17593}
17594
17595__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17596vcvtq_s32_f32 (float32x4_t __a)
17597{
17598 return __builtin_aarch64_lbtruncv4sfv4si (__a);
17599}
17600
17601__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17602vcvtq_u32_f32 (float32x4_t __a)
17603{
17604 /* TODO: This cast should go away when builtins have
17605 their correct types. */
17606 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
17607}
17608
17609__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17610vcvtq_s64_f64 (float64x2_t __a)
17611{
17612 return __builtin_aarch64_lbtruncv2dfv2di (__a);
17613}
17614
17615__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17616vcvtq_u64_f64 (float64x2_t __a)
17617{
17618 /* TODO: This cast should go away when builtins have
17619 their correct types. */
17620 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
17621}
17622
17623/* vcvta */
17624
17625__extension__ static __inline int64_t __attribute__ ((__always_inline__))
17626vcvtad_s64_f64 (float64_t __a)
17627{
17628 return __builtin_aarch64_lrounddfdi (__a);
17629}
17630
17631__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17632vcvtad_u64_f64 (float64_t __a)
17633{
17634 return __builtin_aarch64_lroundudfdi (__a);
17635}
17636
17637__extension__ static __inline int32_t __attribute__ ((__always_inline__))
17638vcvtas_s32_f32 (float32_t __a)
17639{
17640 return __builtin_aarch64_lroundsfsi (__a);
17641}
17642
17643__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17644vcvtas_u32_f32 (float32_t __a)
17645{
17646 return __builtin_aarch64_lroundusfsi (__a);
17647}
17648
17649__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17650vcvta_s32_f32 (float32x2_t __a)
17651{
17652 return __builtin_aarch64_lroundv2sfv2si (__a);
17653}
17654
17655__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17656vcvta_u32_f32 (float32x2_t __a)
17657{
17658 /* TODO: This cast should go away when builtins have
17659 their correct types. */
17660 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
17661}
17662
17663__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17664vcvtaq_s32_f32 (float32x4_t __a)
17665{
17666 return __builtin_aarch64_lroundv4sfv4si (__a);
17667}
17668
17669__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17670vcvtaq_u32_f32 (float32x4_t __a)
17671{
17672 /* TODO: This cast should go away when builtins have
17673 their correct types. */
17674 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
17675}
17676
17677__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17678vcvtaq_s64_f64 (float64x2_t __a)
17679{
17680 return __builtin_aarch64_lroundv2dfv2di (__a);
17681}
17682
17683__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17684vcvtaq_u64_f64 (float64x2_t __a)
17685{
17686 /* TODO: This cast should go away when builtins have
17687 their correct types. */
17688 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
17689}
17690
17691/* vcvtm */
17692
17693__extension__ static __inline int64_t __attribute__ ((__always_inline__))
17694vcvtmd_s64_f64 (float64_t __a)
17695{
17696 return __builtin_llfloor (__a);
17697}
17698
17699__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17700vcvtmd_u64_f64 (float64_t __a)
17701{
17702 return __builtin_aarch64_lfloorudfdi (__a);
17703}
17704
17705__extension__ static __inline int32_t __attribute__ ((__always_inline__))
17706vcvtms_s32_f32 (float32_t __a)
17707{
17708 return __builtin_ifloorf (__a);
17709}
17710
17711__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17712vcvtms_u32_f32 (float32_t __a)
17713{
17714 return __builtin_aarch64_lfloorusfsi (__a);
17715}
17716
17717__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17718vcvtm_s32_f32 (float32x2_t __a)
17719{
17720 return __builtin_aarch64_lfloorv2sfv2si (__a);
17721}
17722
17723__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17724vcvtm_u32_f32 (float32x2_t __a)
17725{
17726 /* TODO: This cast should go away when builtins have
17727 their correct types. */
17728 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
17729}
17730
17731__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17732vcvtmq_s32_f32 (float32x4_t __a)
17733{
17734 return __builtin_aarch64_lfloorv4sfv4si (__a);
17735}
17736
17737__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17738vcvtmq_u32_f32 (float32x4_t __a)
17739{
17740 /* TODO: This cast should go away when builtins have
17741 their correct types. */
17742 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
17743}
17744
17745__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17746vcvtmq_s64_f64 (float64x2_t __a)
17747{
17748 return __builtin_aarch64_lfloorv2dfv2di (__a);
17749}
17750
17751__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17752vcvtmq_u64_f64 (float64x2_t __a)
17753{
17754 /* TODO: This cast should go away when builtins have
17755 their correct types. */
17756 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
17757}
17758
17759/* vcvtn */
17760
17761__extension__ static __inline int64_t __attribute__ ((__always_inline__))
17762vcvtnd_s64_f64 (float64_t __a)
17763{
17764 return __builtin_aarch64_lfrintndfdi (__a);
17765}
17766
17767__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17768vcvtnd_u64_f64 (float64_t __a)
17769{
17770 return __builtin_aarch64_lfrintnudfdi (__a);
17771}
17772
17773__extension__ static __inline int32_t __attribute__ ((__always_inline__))
17774vcvtns_s32_f32 (float32_t __a)
17775{
17776 return __builtin_aarch64_lfrintnsfsi (__a);
17777}
17778
17779__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17780vcvtns_u32_f32 (float32_t __a)
17781{
17782 return __builtin_aarch64_lfrintnusfsi (__a);
17783}
17784
17785__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17786vcvtn_s32_f32 (float32x2_t __a)
17787{
17788 return __builtin_aarch64_lfrintnv2sfv2si (__a);
17789}
17790
17791__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17792vcvtn_u32_f32 (float32x2_t __a)
17793{
17794 /* TODO: This cast should go away when builtins have
17795 their correct types. */
17796 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
17797}
17798
17799__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17800vcvtnq_s32_f32 (float32x4_t __a)
17801{
17802 return __builtin_aarch64_lfrintnv4sfv4si (__a);
17803}
17804
17805__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17806vcvtnq_u32_f32 (float32x4_t __a)
17807{
17808 /* TODO: This cast should go away when builtins have
17809 their correct types. */
17810 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
17811}
17812
17813__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17814vcvtnq_s64_f64 (float64x2_t __a)
17815{
17816 return __builtin_aarch64_lfrintnv2dfv2di (__a);
17817}
17818
17819__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17820vcvtnq_u64_f64 (float64x2_t __a)
17821{
17822 /* TODO: This cast should go away when builtins have
17823 their correct types. */
17824 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
17825}
17826
17827/* vcvtp */
17828
17829__extension__ static __inline int64_t __attribute__ ((__always_inline__))
17830vcvtpd_s64_f64 (float64_t __a)
17831{
17832 return __builtin_llceil (__a);
17833}
17834
17835__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17836vcvtpd_u64_f64 (float64_t __a)
17837{
17838 return __builtin_aarch64_lceiludfdi (__a);
17839}
17840
17841__extension__ static __inline int32_t __attribute__ ((__always_inline__))
17842vcvtps_s32_f32 (float32_t __a)
17843{
17844 return __builtin_iceilf (__a);
17845}
17846
17847__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17848vcvtps_u32_f32 (float32_t __a)
17849{
17850 return __builtin_aarch64_lceilusfsi (__a);
17851}
17852
17853__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17854vcvtp_s32_f32 (float32x2_t __a)
17855{
17856 return __builtin_aarch64_lceilv2sfv2si (__a);
17857}
17858
17859__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17860vcvtp_u32_f32 (float32x2_t __a)
17861{
17862 /* TODO: This cast should go away when builtins have
17863 their correct types. */
17864 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
17865}
17866
17867__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17868vcvtpq_s32_f32 (float32x4_t __a)
17869{
17870 return __builtin_aarch64_lceilv4sfv4si (__a);
17871}
17872
17873__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17874vcvtpq_u32_f32 (float32x4_t __a)
17875{
17876 /* TODO: This cast should go away when builtins have
17877 their correct types. */
17878 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
17879}
17880
17881__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17882vcvtpq_s64_f64 (float64x2_t __a)
17883{
17884 return __builtin_aarch64_lceilv2dfv2di (__a);
17885}
17886
17887__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17888vcvtpq_u64_f64 (float64x2_t __a)
17889{
17890 /* TODO: This cast should go away when builtins have
17891 their correct types. */
17892 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
17893}
17894
17895/* vdup_n */
17896
17897__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17898vdup_n_f32 (float32_t __a)
17899{
17900 return (float32x2_t) {__a, __a};
17901}
17902
17903__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17904vdup_n_f64 (float64_t __a)
17905{
17906 return __a;
17907}
17908
17909__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17910vdup_n_p8 (poly8_t __a)
17911{
17912 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17913}
17914
17915__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17916vdup_n_p16 (poly16_t __a)
17917{
17918 return (poly16x4_t) {__a, __a, __a, __a};
17919}
17920
17921__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17922vdup_n_s8 (int8_t __a)
17923{
17924 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17925}
17926
17927__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17928vdup_n_s16 (int16_t __a)
17929{
17930 return (int16x4_t) {__a, __a, __a, __a};
17931}
17932
17933__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17934vdup_n_s32 (int32_t __a)
17935{
17936 return (int32x2_t) {__a, __a};
17937}
17938
17939__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17940vdup_n_s64 (int64_t __a)
17941{
17942 return __a;
17943}
17944
17945__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17946vdup_n_u8 (uint8_t __a)
17947{
17948 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17949}
17950
17951__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17952vdup_n_u16 (uint16_t __a)
17953{
17954 return (uint16x4_t) {__a, __a, __a, __a};
17955}
17956
17957__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17958vdup_n_u32 (uint32_t __a)
17959{
17960 return (uint32x2_t) {__a, __a};
17961}
17962
17963__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17964vdup_n_u64 (uint64_t __a)
17965{
17966 return __a;
17967}
17968
17969/* vdupq_n */
17970
17971__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17972vdupq_n_f32 (float32_t __a)
17973{
17974 return (float32x4_t) {__a, __a, __a, __a};
17975}
17976
17977__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17978vdupq_n_f64 (float64_t __a)
17979{
17980 return (float64x2_t) {__a, __a};
17981}
17982
17983__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17984vdupq_n_p8 (uint32_t __a)
17985{
17986 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
17987 __a, __a, __a, __a, __a, __a, __a, __a};
17988}
17989
17990__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17991vdupq_n_p16 (uint32_t __a)
17992{
17993 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
17994}
17995
17996__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17997vdupq_n_s8 (int32_t __a)
17998{
17999 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18000 __a, __a, __a, __a, __a, __a, __a, __a};
18001}
18002
18003__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18004vdupq_n_s16 (int32_t __a)
18005{
18006 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18007}
18008
18009__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18010vdupq_n_s32 (int32_t __a)
18011{
18012 return (int32x4_t) {__a, __a, __a, __a};
18013}
18014
18015__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18016vdupq_n_s64 (int64_t __a)
18017{
18018 return (int64x2_t) {__a, __a};
18019}
18020
18021__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18022vdupq_n_u8 (uint32_t __a)
18023{
18024 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
18025 __a, __a, __a, __a, __a, __a, __a, __a};
18026}
18027
18028__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18029vdupq_n_u16 (uint32_t __a)
18030{
18031 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
18032}
18033
18034__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18035vdupq_n_u32 (uint32_t __a)
18036{
18037 return (uint32x4_t) {__a, __a, __a, __a};
18038}
18039
18040__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18041vdupq_n_u64 (uint64_t __a)
18042{
18043 return (uint64x2_t) {__a, __a};
18044}
18045
18046/* vdup_lane */
18047
18048__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18049vdup_lane_f32 (float32x2_t __a, const int __b)
18050{
18051 return __aarch64_vdup_lane_f32 (__a, __b);
18052}
18053
18054__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18055vdup_lane_f64 (float64x1_t __a, const int __b)
18056{
18057 return __aarch64_vdup_lane_f64 (__a, __b);
18058}
18059
18060__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18061vdup_lane_p8 (poly8x8_t __a, const int __b)
18062{
18063 return __aarch64_vdup_lane_p8 (__a, __b);
18064}
18065
18066__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18067vdup_lane_p16 (poly16x4_t __a, const int __b)
18068{
18069 return __aarch64_vdup_lane_p16 (__a, __b);
18070}
18071
18072__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18073vdup_lane_s8 (int8x8_t __a, const int __b)
18074{
18075 return __aarch64_vdup_lane_s8 (__a, __b);
18076}
18077
18078__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18079vdup_lane_s16 (int16x4_t __a, const int __b)
18080{
18081 return __aarch64_vdup_lane_s16 (__a, __b);
18082}
18083
18084__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18085vdup_lane_s32 (int32x2_t __a, const int __b)
18086{
18087 return __aarch64_vdup_lane_s32 (__a, __b);
18088}
18089
18090__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18091vdup_lane_s64 (int64x1_t __a, const int __b)
18092{
18093 return __aarch64_vdup_lane_s64 (__a, __b);
18094}
18095
18096__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18097vdup_lane_u8 (uint8x8_t __a, const int __b)
18098{
18099 return __aarch64_vdup_lane_u8 (__a, __b);
18100}
18101
18102__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18103vdup_lane_u16 (uint16x4_t __a, const int __b)
18104{
18105 return __aarch64_vdup_lane_u16 (__a, __b);
18106}
18107
18108__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18109vdup_lane_u32 (uint32x2_t __a, const int __b)
18110{
18111 return __aarch64_vdup_lane_u32 (__a, __b);
18112}
18113
18114__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18115vdup_lane_u64 (uint64x1_t __a, const int __b)
18116{
18117 return __aarch64_vdup_lane_u64 (__a, __b);
18118}
18119
18120/* vdup_laneq */
18121
18122__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18123vdup_laneq_f32 (float32x4_t __a, const int __b)
18124{
18125 return __aarch64_vdup_laneq_f32 (__a, __b);
18126}
18127
18128__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18129vdup_laneq_f64 (float64x2_t __a, const int __b)
18130{
18131 return __aarch64_vdup_laneq_f64 (__a, __b);
18132}
18133
18134__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18135vdup_laneq_p8 (poly8x16_t __a, const int __b)
18136{
18137 return __aarch64_vdup_laneq_p8 (__a, __b);
18138}
18139
18140__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18141vdup_laneq_p16 (poly16x8_t __a, const int __b)
18142{
18143 return __aarch64_vdup_laneq_p16 (__a, __b);
18144}
18145
18146__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18147vdup_laneq_s8 (int8x16_t __a, const int __b)
18148{
18149 return __aarch64_vdup_laneq_s8 (__a, __b);
18150}
18151
18152__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18153vdup_laneq_s16 (int16x8_t __a, const int __b)
18154{
18155 return __aarch64_vdup_laneq_s16 (__a, __b);
18156}
18157
18158__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18159vdup_laneq_s32 (int32x4_t __a, const int __b)
18160{
18161 return __aarch64_vdup_laneq_s32 (__a, __b);
18162}
18163
18164__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18165vdup_laneq_s64 (int64x2_t __a, const int __b)
18166{
18167 return __aarch64_vdup_laneq_s64 (__a, __b);
18168}
18169
18170__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18171vdup_laneq_u8 (uint8x16_t __a, const int __b)
18172{
18173 return __aarch64_vdup_laneq_u8 (__a, __b);
18174}
18175
18176__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18177vdup_laneq_u16 (uint16x8_t __a, const int __b)
18178{
18179 return __aarch64_vdup_laneq_u16 (__a, __b);
18180}
18181
18182__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18183vdup_laneq_u32 (uint32x4_t __a, const int __b)
18184{
18185 return __aarch64_vdup_laneq_u32 (__a, __b);
18186}
18187
18188__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18189vdup_laneq_u64 (uint64x2_t __a, const int __b)
18190{
18191 return __aarch64_vdup_laneq_u64 (__a, __b);
18192}
18193
18194/* vdupq_lane */
18195__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18196vdupq_lane_f32 (float32x2_t __a, const int __b)
18197{
18198 return __aarch64_vdupq_lane_f32 (__a, __b);
18199}
18200
18201__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18202vdupq_lane_f64 (float64x1_t __a, const int __b)
18203{
18204 return __aarch64_vdupq_lane_f64 (__a, __b);
18205}
18206
18207__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18208vdupq_lane_p8 (poly8x8_t __a, const int __b)
18209{
18210 return __aarch64_vdupq_lane_p8 (__a, __b);
18211}
18212
18213__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18214vdupq_lane_p16 (poly16x4_t __a, const int __b)
18215{
18216 return __aarch64_vdupq_lane_p16 (__a, __b);
18217}
18218
18219__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18220vdupq_lane_s8 (int8x8_t __a, const int __b)
18221{
18222 return __aarch64_vdupq_lane_s8 (__a, __b);
18223}
18224
18225__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18226vdupq_lane_s16 (int16x4_t __a, const int __b)
18227{
18228 return __aarch64_vdupq_lane_s16 (__a, __b);
18229}
18230
18231__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18232vdupq_lane_s32 (int32x2_t __a, const int __b)
18233{
18234 return __aarch64_vdupq_lane_s32 (__a, __b);
18235}
18236
18237__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18238vdupq_lane_s64 (int64x1_t __a, const int __b)
18239{
18240 return __aarch64_vdupq_lane_s64 (__a, __b);
18241}
18242
18243__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18244vdupq_lane_u8 (uint8x8_t __a, const int __b)
18245{
18246 return __aarch64_vdupq_lane_u8 (__a, __b);
18247}
18248
18249__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18250vdupq_lane_u16 (uint16x4_t __a, const int __b)
18251{
18252 return __aarch64_vdupq_lane_u16 (__a, __b);
18253}
18254
18255__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18256vdupq_lane_u32 (uint32x2_t __a, const int __b)
18257{
18258 return __aarch64_vdupq_lane_u32 (__a, __b);
18259}
18260
18261__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18262vdupq_lane_u64 (uint64x1_t __a, const int __b)
18263{
18264 return __aarch64_vdupq_lane_u64 (__a, __b);
18265}
18266
18267/* vdupq_laneq */
18268__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18269vdupq_laneq_f32 (float32x4_t __a, const int __b)
18270{
18271 return __aarch64_vdupq_laneq_f32 (__a, __b);
18272}
18273
18274__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18275vdupq_laneq_f64 (float64x2_t __a, const int __b)
18276{
18277 return __aarch64_vdupq_laneq_f64 (__a, __b);
18278}
18279
18280__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18281vdupq_laneq_p8 (poly8x16_t __a, const int __b)
18282{
18283 return __aarch64_vdupq_laneq_p8 (__a, __b);
18284}
18285
18286__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18287vdupq_laneq_p16 (poly16x8_t __a, const int __b)
18288{
18289 return __aarch64_vdupq_laneq_p16 (__a, __b);
18290}
18291
18292__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18293vdupq_laneq_s8 (int8x16_t __a, const int __b)
18294{
18295 return __aarch64_vdupq_laneq_s8 (__a, __b);
18296}
18297
18298__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18299vdupq_laneq_s16 (int16x8_t __a, const int __b)
18300{
18301 return __aarch64_vdupq_laneq_s16 (__a, __b);
18302}
18303
18304__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18305vdupq_laneq_s32 (int32x4_t __a, const int __b)
18306{
18307 return __aarch64_vdupq_laneq_s32 (__a, __b);
18308}
18309
18310__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18311vdupq_laneq_s64 (int64x2_t __a, const int __b)
18312{
18313 return __aarch64_vdupq_laneq_s64 (__a, __b);
18314}
18315
18316__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18317vdupq_laneq_u8 (uint8x16_t __a, const int __b)
18318{
18319 return __aarch64_vdupq_laneq_u8 (__a, __b);
18320}
18321
18322__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18323vdupq_laneq_u16 (uint16x8_t __a, const int __b)
18324{
18325 return __aarch64_vdupq_laneq_u16 (__a, __b);
18326}
18327
18328__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18329vdupq_laneq_u32 (uint32x4_t __a, const int __b)
18330{
18331 return __aarch64_vdupq_laneq_u32 (__a, __b);
18332}
18333
18334__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18335vdupq_laneq_u64 (uint64x2_t __a, const int __b)
18336{
18337 return __aarch64_vdupq_laneq_u64 (__a, __b);
18338}
18339
18340/* vdupb_lane */
18341__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18342vdupb_lane_p8 (poly8x8_t __a, const int __b)
18343{
18344 return __aarch64_vget_lane_p8 (__a, __b);
18345}
18346
18347__extension__ static __inline int8_t __attribute__ ((__always_inline__))
18348vdupb_lane_s8 (int8x8_t __a, const int __b)
18349{
18350 return __aarch64_vget_lane_s8 (__a, __b);
18351}
18352
18353__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18354vdupb_lane_u8 (uint8x8_t __a, const int __b)
18355{
18356 return __aarch64_vget_lane_u8 (__a, __b);
18357}
18358
18359/* vduph_lane */
18360__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18361vduph_lane_p16 (poly16x4_t __a, const int __b)
18362{
18363 return __aarch64_vget_lane_p16 (__a, __b);
18364}
18365
18366__extension__ static __inline int16_t __attribute__ ((__always_inline__))
18367vduph_lane_s16 (int16x4_t __a, const int __b)
18368{
18369 return __aarch64_vget_lane_s16 (__a, __b);
18370}
18371
18372__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18373vduph_lane_u16 (uint16x4_t __a, const int __b)
18374{
18375 return __aarch64_vget_lane_u16 (__a, __b);
18376}
18377
18378/* vdups_lane */
18379__extension__ static __inline float32_t __attribute__ ((__always_inline__))
18380vdups_lane_f32 (float32x2_t __a, const int __b)
18381{
18382 return __aarch64_vget_lane_f32 (__a, __b);
18383}
18384
18385__extension__ static __inline int32_t __attribute__ ((__always_inline__))
18386vdups_lane_s32 (int32x2_t __a, const int __b)
18387{
18388 return __aarch64_vget_lane_s32 (__a, __b);
18389}
18390
18391__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18392vdups_lane_u32 (uint32x2_t __a, const int __b)
18393{
18394 return __aarch64_vget_lane_u32 (__a, __b);
18395}
18396
18397/* vdupd_lane */
18398__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18399vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b)
18400{
18401 return __a;
18402}
18403
18404__extension__ static __inline int64_t __attribute__ ((__always_inline__))
18405vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b)
18406{
18407 return __a;
18408}
18409
18410__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18411vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b)
18412{
18413 return __a;
18414}
18415
18416/* vdupb_laneq */
18417__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
18418vdupb_laneq_p8 (poly8x16_t __a, const int __b)
18419{
18420 return __aarch64_vgetq_lane_p8 (__a, __b);
18421}
18422
18423__extension__ static __inline int8_t __attribute__ ((__always_inline__))
18424vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
18425{
18426 return __aarch64_vgetq_lane_s8 (__a, __b);
18427}
18428
18429__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18430vdupb_laneq_u8 (uint8x16_t __a, const int __b)
18431{
18432 return __aarch64_vgetq_lane_u8 (__a, __b);
18433}
18434
18435/* vduph_laneq */
18436__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
18437vduph_laneq_p16 (poly16x8_t __a, const int __b)
18438{
18439 return __aarch64_vgetq_lane_p16 (__a, __b);
18440}
18441
18442__extension__ static __inline int16_t __attribute__ ((__always_inline__))
18443vduph_laneq_s16 (int16x8_t __a, const int __b)
18444{
18445 return __aarch64_vgetq_lane_s16 (__a, __b);
18446}
18447
18448__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18449vduph_laneq_u16 (uint16x8_t __a, const int __b)
18450{
18451 return __aarch64_vgetq_lane_u16 (__a, __b);
18452}
18453
18454/* vdups_laneq */
18455__extension__ static __inline float32_t __attribute__ ((__always_inline__))
18456vdups_laneq_f32 (float32x4_t __a, const int __b)
18457{
18458 return __aarch64_vgetq_lane_f32 (__a, __b);
18459}
18460
18461__extension__ static __inline int32_t __attribute__ ((__always_inline__))
18462vdups_laneq_s32 (int32x4_t __a, const int __b)
18463{
18464 return __aarch64_vgetq_lane_s32 (__a, __b);
18465}
18466
18467__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18468vdups_laneq_u32 (uint32x4_t __a, const int __b)
18469{
18470 return __aarch64_vgetq_lane_u32 (__a, __b);
18471}
18472
18473/* vdupd_laneq */
18474__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18475vdupd_laneq_f64 (float64x2_t __a, const int __b)
18476{
18477 return __aarch64_vgetq_lane_f64 (__a, __b);
18478}
18479
18480__extension__ static __inline int64_t __attribute__ ((__always_inline__))
18481vdupd_laneq_s64 (int64x2_t __a, const int __b)
18482{
18483 return __aarch64_vgetq_lane_s64 (__a, __b);
18484}
18485
18486__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18487vdupd_laneq_u64 (uint64x2_t __a, const int __b)
18488{
18489 return __aarch64_vgetq_lane_u64 (__a, __b);
18490}
18491
18492/* vfma_lane */
18493
18494__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18495vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
18496 float32x2_t __c, const int __lane)
18497{
18498 return __builtin_aarch64_fmav2sf (__b,
18499 __aarch64_vdup_lane_f32 (__c, __lane),
18500 __a);
18501}
18502
18503__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18504vfma_lane_f64 (float64_t __a, float64_t __b,
18505 float64_t __c, const int __lane)
18506{
18507 return __builtin_fma (__b, __c, __a);
18508}
18509
18510__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18511vfmad_lane_f64 (float64_t __a, float64_t __b,
18512 float64_t __c, const int __lane)
18513{
18514 return __builtin_fma (__b, __c, __a);
18515}
18516
18517__extension__ static __inline float32_t __attribute__ ((__always_inline__))
18518vfmas_lane_f32 (float32_t __a, float32_t __b,
18519 float32x2_t __c, const int __lane)
18520{
18521 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18522}
18523
18524/* vfma_laneq */
18525
18526__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18527vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
18528 float32x4_t __c, const int __lane)
18529{
18530 return __builtin_aarch64_fmav2sf (__b,
18531 __aarch64_vdup_laneq_f32 (__c, __lane),
18532 __a);
18533}
18534
18535__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18536vfma_laneq_f64 (float64_t __a, float64_t __b,
18537 float64x2_t __c, const int __lane)
18538{
18539 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18540}
18541
18542__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18543vfmad_laneq_f64 (float64_t __a, float64_t __b,
18544 float64x2_t __c, const int __lane)
18545{
18546 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18547}
18548
18549__extension__ static __inline float32_t __attribute__ ((__always_inline__))
18550vfmas_laneq_f32 (float32_t __a, float32_t __b,
18551 float32x4_t __c, const int __lane)
18552{
18553 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18554}
18555
18556/* vfmaq_lane */
18557
18558__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18559vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18560 float32x2_t __c, const int __lane)
18561{
18562 return __builtin_aarch64_fmav4sf (__b,
18563 __aarch64_vdupq_lane_f32 (__c, __lane),
18564 __a);
18565}
18566
18567__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18568vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
18569 float64_t __c, const int __lane)
18570{
18571 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
18572}
18573
18574/* vfmaq_laneq */
18575
18576__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18577vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18578 float32x4_t __c, const int __lane)
18579{
18580 return __builtin_aarch64_fmav4sf (__b,
18581 __aarch64_vdupq_laneq_f32 (__c, __lane),
18582 __a);
18583}
18584
18585__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18586vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18587 float64x2_t __c, const int __lane)
18588{
18589 return __builtin_aarch64_fmav2df (__b,
18590 __aarch64_vdupq_laneq_f64 (__c, __lane),
18591 __a);
18592}
18593
18594/* vfms_lane */
18595
18596__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18597vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
18598 float32x2_t __c, const int __lane)
18599{
18600 return __builtin_aarch64_fmav2sf (-__b,
18601 __aarch64_vdup_lane_f32 (__c, __lane),
18602 __a);
18603}
18604
18605__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18606vfms_lane_f64 (float64_t __a, float64_t __b,
18607 float64_t __c, const int __lane)
18608{
18609 return __builtin_fma (-__b, __c, __a);
18610}
18611
18612__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18613vfmsd_lane_f64 (float64_t __a, float64_t __b,
18614 float64_t __c, const int __lane)
18615{
18616 return __builtin_fma (-__b, __c, __a);
18617}
18618
18619__extension__ static __inline float32_t __attribute__ ((__always_inline__))
18620vfmss_lane_f32 (float32_t __a, float32_t __b,
18621 float32x2_t __c, const int __lane)
18622{
18623 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
18624}
18625
18626/* vfms_laneq */
18627
18628__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18629vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
18630 float32x4_t __c, const int __lane)
18631{
18632 return __builtin_aarch64_fmav2sf (-__b,
18633 __aarch64_vdup_laneq_f32 (__c, __lane),
18634 __a);
18635}
18636
18637__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18638vfms_laneq_f64 (float64_t __a, float64_t __b,
18639 float64x2_t __c, const int __lane)
18640{
18641 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18642}
18643
18644__extension__ static __inline float64_t __attribute__ ((__always_inline__))
18645vfmsd_laneq_f64 (float64_t __a, float64_t __b,
18646 float64x2_t __c, const int __lane)
18647{
18648 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
18649}
18650
18651__extension__ static __inline float32_t __attribute__ ((__always_inline__))
18652vfmss_laneq_f32 (float32_t __a, float32_t __b,
18653 float32x4_t __c, const int __lane)
18654{
18655 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
18656}
18657
18658/* vfmsq_lane */
18659
18660__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18661vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18662 float32x2_t __c, const int __lane)
18663{
18664 return __builtin_aarch64_fmav4sf (-__b,
18665 __aarch64_vdupq_lane_f32 (__c, __lane),
18666 __a);
18667}
18668
18669__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18670vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
18671 float64_t __c, const int __lane)
18672{
18673 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
18674}
18675
18676/* vfmsq_laneq */
18677
18678__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18679vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18680 float32x4_t __c, const int __lane)
18681{
18682 return __builtin_aarch64_fmav4sf (-__b,
18683 __aarch64_vdupq_laneq_f32 (__c, __lane),
18684 __a);
18685}
18686
18687__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18688vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
18689 float64x2_t __c, const int __lane)
18690{
18691 return __builtin_aarch64_fmav2df (-__b,
18692 __aarch64_vdupq_laneq_f64 (__c, __lane),
18693 __a);
18694}
18695
18696/* vld1 */
18697
18698__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18699vld1_f32 (const float32_t *a)
18700{
18701 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
18702}
18703
18704__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18705vld1_f64 (const float64_t *a)
18706{
18707 return *a;
18708}
18709
18710__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18711vld1_p8 (const poly8_t *a)
18712{
18713 return (poly8x8_t)
18714 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18715}
18716
18717__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18718vld1_p16 (const poly16_t *a)
18719{
18720 return (poly16x4_t)
18721 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18722}
18723
18724__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18725vld1_s8 (const int8_t *a)
18726{
18727 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18728}
18729
18730__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18731vld1_s16 (const int16_t *a)
18732{
18733 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18734}
18735
18736__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18737vld1_s32 (const int32_t *a)
18738{
18739 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18740}
18741
18742__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18743vld1_s64 (const int64_t *a)
18744{
18745 return *a;
18746}
18747
18748__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18749vld1_u8 (const uint8_t *a)
18750{
18751 return (uint8x8_t)
18752 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
18753}
18754
18755__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18756vld1_u16 (const uint16_t *a)
18757{
18758 return (uint16x4_t)
18759 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
18760}
18761
18762__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18763vld1_u32 (const uint32_t *a)
18764{
18765 return (uint32x2_t)
18766 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
18767}
18768
18769__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18770vld1_u64 (const uint64_t *a)
18771{
18772 return *a;
18773}
18774
18775/* vld1q */
18776
18777__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18778vld1q_f32 (const float32_t *a)
18779{
18780 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
18781}
18782
18783__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18784vld1q_f64 (const float64_t *a)
18785{
18786 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
18787}
18788
18789__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18790vld1q_p8 (const poly8_t *a)
18791{
18792 return (poly8x16_t)
18793 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18794}
18795
18796__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18797vld1q_p16 (const poly16_t *a)
18798{
18799 return (poly16x8_t)
18800 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18801}
18802
18803__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18804vld1q_s8 (const int8_t *a)
18805{
18806 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18807}
18808
18809__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18810vld1q_s16 (const int16_t *a)
18811{
18812 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18813}
18814
18815__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18816vld1q_s32 (const int32_t *a)
18817{
18818 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18819}
18820
18821__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18822vld1q_s64 (const int64_t *a)
18823{
18824 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18825}
18826
18827__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18828vld1q_u8 (const uint8_t *a)
18829{
18830 return (uint8x16_t)
18831 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
18832}
18833
18834__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18835vld1q_u16 (const uint16_t *a)
18836{
18837 return (uint16x8_t)
18838 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
18839}
18840
18841__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18842vld1q_u32 (const uint32_t *a)
18843{
18844 return (uint32x4_t)
18845 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
18846}
18847
18848__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18849vld1q_u64 (const uint64_t *a)
18850{
18851 return (uint64x2_t)
18852 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
18853}
18854
18855/* vldn */
18856
18857__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
18858vld2_s64 (const int64_t * __a)
18859{
18860 int64x1x2_t ret;
18861 __builtin_aarch64_simd_oi __o;
18862 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18863 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18864 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18865 return ret;
18866}
18867
18868__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
18869vld2_u64 (const uint64_t * __a)
18870{
18871 uint64x1x2_t ret;
18872 __builtin_aarch64_simd_oi __o;
18873 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
18874 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
18875 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
18876 return ret;
18877}
18878
18879__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
18880vld2_f64 (const float64_t * __a)
18881{
18882 float64x1x2_t ret;
18883 __builtin_aarch64_simd_oi __o;
18884 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
18885 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
18886 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
18887 return ret;
18888}
18889
18890__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
18891vld2_s8 (const int8_t * __a)
18892{
18893 int8x8x2_t ret;
18894 __builtin_aarch64_simd_oi __o;
18895 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18896 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18897 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18898 return ret;
18899}
18900
18901__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
18902vld2_p8 (const poly8_t * __a)
18903{
18904 poly8x8x2_t ret;
18905 __builtin_aarch64_simd_oi __o;
18906 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18907 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18908 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18909 return ret;
18910}
18911
18912__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
18913vld2_s16 (const int16_t * __a)
18914{
18915 int16x4x2_t ret;
18916 __builtin_aarch64_simd_oi __o;
18917 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18918 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18919 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18920 return ret;
18921}
18922
18923__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
18924vld2_p16 (const poly16_t * __a)
18925{
18926 poly16x4x2_t ret;
18927 __builtin_aarch64_simd_oi __o;
18928 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18929 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18930 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18931 return ret;
18932}
18933
18934__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
18935vld2_s32 (const int32_t * __a)
18936{
18937 int32x2x2_t ret;
18938 __builtin_aarch64_simd_oi __o;
18939 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18940 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18941 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18942 return ret;
18943}
18944
18945__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
18946vld2_u8 (const uint8_t * __a)
18947{
18948 uint8x8x2_t ret;
18949 __builtin_aarch64_simd_oi __o;
18950 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
18951 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
18952 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
18953 return ret;
18954}
18955
18956__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
18957vld2_u16 (const uint16_t * __a)
18958{
18959 uint16x4x2_t ret;
18960 __builtin_aarch64_simd_oi __o;
18961 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
18962 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
18963 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
18964 return ret;
18965}
18966
18967__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
18968vld2_u32 (const uint32_t * __a)
18969{
18970 uint32x2x2_t ret;
18971 __builtin_aarch64_simd_oi __o;
18972 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
18973 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
18974 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
18975 return ret;
18976}
18977
18978__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
18979vld2_f32 (const float32_t * __a)
18980{
18981 float32x2x2_t ret;
18982 __builtin_aarch64_simd_oi __o;
18983 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
18984 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
18985 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
18986 return ret;
18987}
18988
18989__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
18990vld2q_s8 (const int8_t * __a)
18991{
18992 int8x16x2_t ret;
18993 __builtin_aarch64_simd_oi __o;
18994 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
18995 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
18996 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
18997 return ret;
18998}
18999
19000__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
19001vld2q_p8 (const poly8_t * __a)
19002{
19003 poly8x16x2_t ret;
19004 __builtin_aarch64_simd_oi __o;
19005 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19006 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19007 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19008 return ret;
19009}
19010
19011__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
19012vld2q_s16 (const int16_t * __a)
19013{
19014 int16x8x2_t ret;
19015 __builtin_aarch64_simd_oi __o;
19016 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19017 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19018 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19019 return ret;
19020}
19021
19022__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
19023vld2q_p16 (const poly16_t * __a)
19024{
19025 poly16x8x2_t ret;
19026 __builtin_aarch64_simd_oi __o;
19027 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19028 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19029 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19030 return ret;
19031}
19032
19033__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
19034vld2q_s32 (const int32_t * __a)
19035{
19036 int32x4x2_t ret;
19037 __builtin_aarch64_simd_oi __o;
19038 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19039 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19040 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19041 return ret;
19042}
19043
19044__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
19045vld2q_s64 (const int64_t * __a)
19046{
19047 int64x2x2_t ret;
19048 __builtin_aarch64_simd_oi __o;
19049 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19050 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19051 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19052 return ret;
19053}
19054
19055__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
19056vld2q_u8 (const uint8_t * __a)
19057{
19058 uint8x16x2_t ret;
19059 __builtin_aarch64_simd_oi __o;
19060 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
19061 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
19062 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
19063 return ret;
19064}
19065
19066__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
19067vld2q_u16 (const uint16_t * __a)
19068{
19069 uint16x8x2_t ret;
19070 __builtin_aarch64_simd_oi __o;
19071 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
19072 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
19073 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
19074 return ret;
19075}
19076
19077__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
19078vld2q_u32 (const uint32_t * __a)
19079{
19080 uint32x4x2_t ret;
19081 __builtin_aarch64_simd_oi __o;
19082 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
19083 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
19084 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
19085 return ret;
19086}
19087
19088__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
19089vld2q_u64 (const uint64_t * __a)
19090{
19091 uint64x2x2_t ret;
19092 __builtin_aarch64_simd_oi __o;
19093 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
19094 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
19095 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
19096 return ret;
19097}
19098
19099__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
19100vld2q_f32 (const float32_t * __a)
19101{
19102 float32x4x2_t ret;
19103 __builtin_aarch64_simd_oi __o;
19104 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
19105 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
19106 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
19107 return ret;
19108}
19109
19110__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
19111vld2q_f64 (const float64_t * __a)
19112{
19113 float64x2x2_t ret;
19114 __builtin_aarch64_simd_oi __o;
19115 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
19116 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
19117 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
19118 return ret;
19119}
19120
19121__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
19122vld3_s64 (const int64_t * __a)
19123{
19124 int64x1x3_t ret;
19125 __builtin_aarch64_simd_ci __o;
19126 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19127 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19128 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19129 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19130 return ret;
19131}
19132
19133__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
19134vld3_u64 (const uint64_t * __a)
19135{
19136 uint64x1x3_t ret;
19137 __builtin_aarch64_simd_ci __o;
19138 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
19139 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
19140 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
19141 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
19142 return ret;
19143}
19144
19145__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
19146vld3_f64 (const float64_t * __a)
19147{
19148 float64x1x3_t ret;
19149 __builtin_aarch64_simd_ci __o;
19150 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
19151 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
19152 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
19153 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
19154 return ret;
19155}
19156
19157__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
19158vld3_s8 (const int8_t * __a)
19159{
19160 int8x8x3_t ret;
19161 __builtin_aarch64_simd_ci __o;
19162 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19163 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19164 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19165 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19166 return ret;
19167}
19168
19169__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
19170vld3_p8 (const poly8_t * __a)
19171{
19172 poly8x8x3_t ret;
19173 __builtin_aarch64_simd_ci __o;
19174 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19175 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19176 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19177 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19178 return ret;
19179}
19180
19181__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
19182vld3_s16 (const int16_t * __a)
19183{
19184 int16x4x3_t ret;
19185 __builtin_aarch64_simd_ci __o;
19186 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19187 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19188 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19189 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19190 return ret;
19191}
19192
19193__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
19194vld3_p16 (const poly16_t * __a)
19195{
19196 poly16x4x3_t ret;
19197 __builtin_aarch64_simd_ci __o;
19198 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19199 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19200 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19201 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19202 return ret;
19203}
19204
19205__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
19206vld3_s32 (const int32_t * __a)
19207{
19208 int32x2x3_t ret;
19209 __builtin_aarch64_simd_ci __o;
19210 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19211 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19212 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19213 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19214 return ret;
19215}
19216
19217__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
19218vld3_u8 (const uint8_t * __a)
19219{
19220 uint8x8x3_t ret;
19221 __builtin_aarch64_simd_ci __o;
19222 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
19223 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
19224 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
19225 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
19226 return ret;
19227}
19228
19229__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
19230vld3_u16 (const uint16_t * __a)
19231{
19232 uint16x4x3_t ret;
19233 __builtin_aarch64_simd_ci __o;
19234 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
19235 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
19236 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
19237 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
19238 return ret;
19239}
19240
19241__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
19242vld3_u32 (const uint32_t * __a)
19243{
19244 uint32x2x3_t ret;
19245 __builtin_aarch64_simd_ci __o;
19246 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
19247 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
19248 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
19249 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
19250 return ret;
19251}
19252
19253__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
19254vld3_f32 (const float32_t * __a)
19255{
19256 float32x2x3_t ret;
19257 __builtin_aarch64_simd_ci __o;
19258 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
19259 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
19260 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
19261 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
19262 return ret;
19263}
19264
19265__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
19266vld3q_s8 (const int8_t * __a)
19267{
19268 int8x16x3_t ret;
19269 __builtin_aarch64_simd_ci __o;
19270 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19271 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19272 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19273 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19274 return ret;
19275}
19276
19277__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
19278vld3q_p8 (const poly8_t * __a)
19279{
19280 poly8x16x3_t ret;
19281 __builtin_aarch64_simd_ci __o;
19282 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19283 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19284 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19285 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19286 return ret;
19287}
19288
19289__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
19290vld3q_s16 (const int16_t * __a)
19291{
19292 int16x8x3_t ret;
19293 __builtin_aarch64_simd_ci __o;
19294 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19295 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19296 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19297 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19298 return ret;
19299}
19300
19301__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
19302vld3q_p16 (const poly16_t * __a)
19303{
19304 poly16x8x3_t ret;
19305 __builtin_aarch64_simd_ci __o;
19306 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19307 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19308 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19309 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19310 return ret;
19311}
19312
19313__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
19314vld3q_s32 (const int32_t * __a)
19315{
19316 int32x4x3_t ret;
19317 __builtin_aarch64_simd_ci __o;
19318 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19319 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19320 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19321 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19322 return ret;
19323}
19324
19325__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
19326vld3q_s64 (const int64_t * __a)
19327{
19328 int64x2x3_t ret;
19329 __builtin_aarch64_simd_ci __o;
19330 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19331 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19332 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19333 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19334 return ret;
19335}
19336
19337__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
19338vld3q_u8 (const uint8_t * __a)
19339{
19340 uint8x16x3_t ret;
19341 __builtin_aarch64_simd_ci __o;
19342 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
19343 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
19344 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
19345 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
19346 return ret;
19347}
19348
19349__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
19350vld3q_u16 (const uint16_t * __a)
19351{
19352 uint16x8x3_t ret;
19353 __builtin_aarch64_simd_ci __o;
19354 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
19355 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
19356 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
19357 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
19358 return ret;
19359}
19360
19361__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
19362vld3q_u32 (const uint32_t * __a)
19363{
19364 uint32x4x3_t ret;
19365 __builtin_aarch64_simd_ci __o;
19366 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
19367 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
19368 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
19369 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
19370 return ret;
19371}
19372
19373__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
19374vld3q_u64 (const uint64_t * __a)
19375{
19376 uint64x2x3_t ret;
19377 __builtin_aarch64_simd_ci __o;
19378 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
19379 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
19380 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
19381 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
19382 return ret;
19383}
19384
19385__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
19386vld3q_f32 (const float32_t * __a)
19387{
19388 float32x4x3_t ret;
19389 __builtin_aarch64_simd_ci __o;
19390 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
19391 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
19392 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
19393 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
19394 return ret;
19395}
19396
19397__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
19398vld3q_f64 (const float64_t * __a)
19399{
19400 float64x2x3_t ret;
19401 __builtin_aarch64_simd_ci __o;
19402 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
19403 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
19404 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
19405 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
19406 return ret;
19407}
19408
19409__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
19410vld4_s64 (const int64_t * __a)
19411{
19412 int64x1x4_t ret;
19413 __builtin_aarch64_simd_xi __o;
19414 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19415 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19416 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19417 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19418 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19419 return ret;
19420}
19421
19422__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
19423vld4_u64 (const uint64_t * __a)
19424{
19425 uint64x1x4_t ret;
19426 __builtin_aarch64_simd_xi __o;
19427 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
19428 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
19429 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
19430 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
19431 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
19432 return ret;
19433}
19434
19435__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
19436vld4_f64 (const float64_t * __a)
19437{
19438 float64x1x4_t ret;
19439 __builtin_aarch64_simd_xi __o;
19440 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
19441 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
19442 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
19443 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
19444 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
19445 return ret;
19446}
19447
19448__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
19449vld4_s8 (const int8_t * __a)
19450{
19451 int8x8x4_t ret;
19452 __builtin_aarch64_simd_xi __o;
19453 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19454 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19455 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19456 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19457 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19458 return ret;
19459}
19460
19461__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
19462vld4_p8 (const poly8_t * __a)
19463{
19464 poly8x8x4_t ret;
19465 __builtin_aarch64_simd_xi __o;
19466 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19467 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19468 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19469 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19470 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19471 return ret;
19472}
19473
19474__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
19475vld4_s16 (const int16_t * __a)
19476{
19477 int16x4x4_t ret;
19478 __builtin_aarch64_simd_xi __o;
19479 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19480 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19481 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19482 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19483 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19484 return ret;
19485}
19486
19487__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
19488vld4_p16 (const poly16_t * __a)
19489{
19490 poly16x4x4_t ret;
19491 __builtin_aarch64_simd_xi __o;
19492 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19493 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19494 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19495 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19496 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19497 return ret;
19498}
19499
19500__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
19501vld4_s32 (const int32_t * __a)
19502{
19503 int32x2x4_t ret;
19504 __builtin_aarch64_simd_xi __o;
19505 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19506 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19507 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19508 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19509 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19510 return ret;
19511}
19512
19513__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
19514vld4_u8 (const uint8_t * __a)
19515{
19516 uint8x8x4_t ret;
19517 __builtin_aarch64_simd_xi __o;
19518 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
19519 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
19520 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
19521 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
19522 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
19523 return ret;
19524}
19525
19526__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
19527vld4_u16 (const uint16_t * __a)
19528{
19529 uint16x4x4_t ret;
19530 __builtin_aarch64_simd_xi __o;
19531 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
19532 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
19533 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
19534 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
19535 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
19536 return ret;
19537}
19538
19539__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
19540vld4_u32 (const uint32_t * __a)
19541{
19542 uint32x2x4_t ret;
19543 __builtin_aarch64_simd_xi __o;
19544 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
19545 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
19546 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
19547 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
19548 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
19549 return ret;
19550}
19551
19552__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
19553vld4_f32 (const float32_t * __a)
19554{
19555 float32x2x4_t ret;
19556 __builtin_aarch64_simd_xi __o;
19557 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
19558 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
19559 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
19560 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
19561 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
19562 return ret;
19563}
19564
19565__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
19566vld4q_s8 (const int8_t * __a)
19567{
19568 int8x16x4_t ret;
19569 __builtin_aarch64_simd_xi __o;
19570 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19571 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19572 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19573 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19574 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19575 return ret;
19576}
19577
19578__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
19579vld4q_p8 (const poly8_t * __a)
19580{
19581 poly8x16x4_t ret;
19582 __builtin_aarch64_simd_xi __o;
19583 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19584 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19585 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19586 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19587 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19588 return ret;
19589}
19590
19591__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
19592vld4q_s16 (const int16_t * __a)
19593{
19594 int16x8x4_t ret;
19595 __builtin_aarch64_simd_xi __o;
19596 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19597 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19598 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19599 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19600 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19601 return ret;
19602}
19603
19604__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
19605vld4q_p16 (const poly16_t * __a)
19606{
19607 poly16x8x4_t ret;
19608 __builtin_aarch64_simd_xi __o;
19609 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19610 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19611 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19612 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19613 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19614 return ret;
19615}
19616
19617__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
19618vld4q_s32 (const int32_t * __a)
19619{
19620 int32x4x4_t ret;
19621 __builtin_aarch64_simd_xi __o;
19622 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19623 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19624 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19625 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19626 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19627 return ret;
19628}
19629
19630__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
19631vld4q_s64 (const int64_t * __a)
19632{
19633 int64x2x4_t ret;
19634 __builtin_aarch64_simd_xi __o;
19635 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19636 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19637 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19638 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19639 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19640 return ret;
19641}
19642
19643__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
19644vld4q_u8 (const uint8_t * __a)
19645{
19646 uint8x16x4_t ret;
19647 __builtin_aarch64_simd_xi __o;
19648 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
19649 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
19650 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
19651 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
19652 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
19653 return ret;
19654}
19655
19656__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
19657vld4q_u16 (const uint16_t * __a)
19658{
19659 uint16x8x4_t ret;
19660 __builtin_aarch64_simd_xi __o;
19661 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
19662 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
19663 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
19664 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
19665 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
19666 return ret;
19667}
19668
19669__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
19670vld4q_u32 (const uint32_t * __a)
19671{
19672 uint32x4x4_t ret;
19673 __builtin_aarch64_simd_xi __o;
19674 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
19675 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
19676 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
19677 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
19678 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
19679 return ret;
19680}
19681
19682__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
19683vld4q_u64 (const uint64_t * __a)
19684{
19685 uint64x2x4_t ret;
19686 __builtin_aarch64_simd_xi __o;
19687 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
19688 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
19689 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
19690 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
19691 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
19692 return ret;
19693}
19694
19695__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
19696vld4q_f32 (const float32_t * __a)
19697{
19698 float32x4x4_t ret;
19699 __builtin_aarch64_simd_xi __o;
19700 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
19701 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
19702 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
19703 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
19704 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
19705 return ret;
19706}
19707
19708__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
19709vld4q_f64 (const float64_t * __a)
19710{
19711 float64x2x4_t ret;
19712 __builtin_aarch64_simd_xi __o;
19713 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
19714 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
19715 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
19716 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
19717 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
19718 return ret;
19719}
19720
19721/* vmax */
19722
19723__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19724vmax_f32 (float32x2_t __a, float32x2_t __b)
19725{
19726 return __builtin_aarch64_smax_nanv2sf (__a, __b);
19727}
19728
19729__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19730vmax_s8 (int8x8_t __a, int8x8_t __b)
19731{
19732 return __builtin_aarch64_smaxv8qi (__a, __b);
19733}
19734
19735__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19736vmax_s16 (int16x4_t __a, int16x4_t __b)
19737{
19738 return __builtin_aarch64_smaxv4hi (__a, __b);
19739}
19740
19741__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19742vmax_s32 (int32x2_t __a, int32x2_t __b)
19743{
19744 return __builtin_aarch64_smaxv2si (__a, __b);
19745}
19746
19747__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19748vmax_u8 (uint8x8_t __a, uint8x8_t __b)
19749{
19750 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
19751 (int8x8_t) __b);
19752}
19753
19754__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19755vmax_u16 (uint16x4_t __a, uint16x4_t __b)
19756{
19757 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
19758 (int16x4_t) __b);
19759}
19760
19761__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19762vmax_u32 (uint32x2_t __a, uint32x2_t __b)
19763{
19764 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
19765 (int32x2_t) __b);
19766}
19767
19768__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19769vmaxq_f32 (float32x4_t __a, float32x4_t __b)
19770{
19771 return __builtin_aarch64_smax_nanv4sf (__a, __b);
19772}
19773
19774__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19775vmaxq_f64 (float64x2_t __a, float64x2_t __b)
19776{
19777 return __builtin_aarch64_smax_nanv2df (__a, __b);
19778}
19779
19780__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19781vmaxq_s8 (int8x16_t __a, int8x16_t __b)
19782{
19783 return __builtin_aarch64_smaxv16qi (__a, __b);
19784}
19785
19786__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19787vmaxq_s16 (int16x8_t __a, int16x8_t __b)
19788{
19789 return __builtin_aarch64_smaxv8hi (__a, __b);
19790}
19791
19792__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19793vmaxq_s32 (int32x4_t __a, int32x4_t __b)
19794{
19795 return __builtin_aarch64_smaxv4si (__a, __b);
19796}
19797
19798__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19799vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
19800{
19801 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
19802 (int8x16_t) __b);
19803}
19804
19805__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19806vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
19807{
19808 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
19809 (int16x8_t) __b);
19810}
19811
19812__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19813vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
19814{
19815 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
19816 (int32x4_t) __b);
19817}
19818
19819/* vmaxnm */
19820
19821__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19822vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
19823{
19824 return __builtin_aarch64_smaxv2sf (__a, __b);
19825}
19826
19827__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19828vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
19829{
19830 return __builtin_aarch64_smaxv4sf (__a, __b);
19831}
19832
19833__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19834vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
19835{
19836 return __builtin_aarch64_smaxv2df (__a, __b);
19837}
19838
19839/* vmaxv */
19840
19841__extension__ static __inline float32_t __attribute__ ((__always_inline__))
19842vmaxv_f32 (float32x2_t __a)
19843{
19844 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
19845 0);
19846}
19847
19848__extension__ static __inline int8_t __attribute__ ((__always_inline__))
19849vmaxv_s8 (int8x8_t __a)
19850{
19851 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
19852}
19853
19854__extension__ static __inline int16_t __attribute__ ((__always_inline__))
19855vmaxv_s16 (int16x4_t __a)
19856{
19857 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
19858}
19859
19860__extension__ static __inline int32_t __attribute__ ((__always_inline__))
19861vmaxv_s32 (int32x2_t __a)
19862{
19863 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
19864}
19865
19866__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19867vmaxv_u8 (uint8x8_t __a)
19868{
19869 return vget_lane_u8 ((uint8x8_t)
19870 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
19871 0);
19872}
19873
19874__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19875vmaxv_u16 (uint16x4_t __a)
19876{
19877 return vget_lane_u16 ((uint16x4_t)
19878 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
19879 0);
19880}
19881
19882__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19883vmaxv_u32 (uint32x2_t __a)
19884{
19885 return vget_lane_u32 ((uint32x2_t)
19886 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
19887 0);
19888}
19889
19890__extension__ static __inline float32_t __attribute__ ((__always_inline__))
19891vmaxvq_f32 (float32x4_t __a)
19892{
19893 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
19894 0);
19895}
19896
19897__extension__ static __inline float64_t __attribute__ ((__always_inline__))
19898vmaxvq_f64 (float64x2_t __a)
19899{
19900 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
19901 0);
19902}
19903
19904__extension__ static __inline int8_t __attribute__ ((__always_inline__))
19905vmaxvq_s8 (int8x16_t __a)
19906{
19907 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
19908}
19909
19910__extension__ static __inline int16_t __attribute__ ((__always_inline__))
19911vmaxvq_s16 (int16x8_t __a)
19912{
19913 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
19914}
19915
19916__extension__ static __inline int32_t __attribute__ ((__always_inline__))
19917vmaxvq_s32 (int32x4_t __a)
19918{
19919 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
19920}
19921
19922__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19923vmaxvq_u8 (uint8x16_t __a)
19924{
19925 return vgetq_lane_u8 ((uint8x16_t)
19926 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
19927 0);
19928}
19929
19930__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19931vmaxvq_u16 (uint16x8_t __a)
19932{
19933 return vgetq_lane_u16 ((uint16x8_t)
19934 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
19935 0);
19936}
19937
19938__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19939vmaxvq_u32 (uint32x4_t __a)
19940{
19941 return vgetq_lane_u32 ((uint32x4_t)
19942 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
19943 0);
19944}
19945
19946/* vmaxnmv */
19947
19948__extension__ static __inline float32_t __attribute__ ((__always_inline__))
19949vmaxnmv_f32 (float32x2_t __a)
19950{
19951 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
19952 0);
19953}
19954
19955__extension__ static __inline float32_t __attribute__ ((__always_inline__))
19956vmaxnmvq_f32 (float32x4_t __a)
19957{
19958 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
19959}
19960
19961__extension__ static __inline float64_t __attribute__ ((__always_inline__))
19962vmaxnmvq_f64 (float64x2_t __a)
19963{
19964 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
19965}
19966
19967/* vmin */
19968
19969__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19970vmin_f32 (float32x2_t __a, float32x2_t __b)
19971{
19972 return __builtin_aarch64_smin_nanv2sf (__a, __b);
19973}
19974
19975__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19976vmin_s8 (int8x8_t __a, int8x8_t __b)
19977{
19978 return __builtin_aarch64_sminv8qi (__a, __b);
19979}
19980
19981__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19982vmin_s16 (int16x4_t __a, int16x4_t __b)
19983{
19984 return __builtin_aarch64_sminv4hi (__a, __b);
19985}
19986
19987__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19988vmin_s32 (int32x2_t __a, int32x2_t __b)
19989{
19990 return __builtin_aarch64_sminv2si (__a, __b);
19991}
19992
19993__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19994vmin_u8 (uint8x8_t __a, uint8x8_t __b)
19995{
19996 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
19997 (int8x8_t) __b);
19998}
19999
20000__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20001vmin_u16 (uint16x4_t __a, uint16x4_t __b)
20002{
20003 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
20004 (int16x4_t) __b);
20005}
20006
20007__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20008vmin_u32 (uint32x2_t __a, uint32x2_t __b)
20009{
20010 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
20011 (int32x2_t) __b);
20012}
20013
20014__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20015vminq_f32 (float32x4_t __a, float32x4_t __b)
20016{
20017 return __builtin_aarch64_smin_nanv4sf (__a, __b);
20018}
20019
20020__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20021vminq_f64 (float64x2_t __a, float64x2_t __b)
20022{
20023 return __builtin_aarch64_smin_nanv2df (__a, __b);
20024}
20025
20026__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20027vminq_s8 (int8x16_t __a, int8x16_t __b)
20028{
20029 return __builtin_aarch64_sminv16qi (__a, __b);
20030}
20031
20032__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20033vminq_s16 (int16x8_t __a, int16x8_t __b)
20034{
20035 return __builtin_aarch64_sminv8hi (__a, __b);
20036}
20037
20038__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20039vminq_s32 (int32x4_t __a, int32x4_t __b)
20040{
20041 return __builtin_aarch64_sminv4si (__a, __b);
20042}
20043
20044__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20045vminq_u8 (uint8x16_t __a, uint8x16_t __b)
20046{
20047 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
20048 (int8x16_t) __b);
20049}
20050
20051__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20052vminq_u16 (uint16x8_t __a, uint16x8_t __b)
20053{
20054 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
20055 (int16x8_t) __b);
20056}
20057
20058__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20059vminq_u32 (uint32x4_t __a, uint32x4_t __b)
20060{
20061 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
20062 (int32x4_t) __b);
20063}
20064
20065/* vminnm */
20066
20067__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20068vminnm_f32 (float32x2_t __a, float32x2_t __b)
20069{
20070 return __builtin_aarch64_sminv2sf (__a, __b);
20071}
20072
20073__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20074vminnmq_f32 (float32x4_t __a, float32x4_t __b)
20075{
20076 return __builtin_aarch64_sminv4sf (__a, __b);
20077}
20078
20079__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20080vminnmq_f64 (float64x2_t __a, float64x2_t __b)
20081{
20082 return __builtin_aarch64_sminv2df (__a, __b);
20083}
20084
20085/* vminv */
20086
20087__extension__ static __inline float32_t __attribute__ ((__always_inline__))
20088vminv_f32 (float32x2_t __a)
20089{
20090 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
20091 0);
20092}
20093
20094__extension__ static __inline int8_t __attribute__ ((__always_inline__))
20095vminv_s8 (int8x8_t __a)
20096{
20097 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
20098 0);
20099}
20100
20101__extension__ static __inline int16_t __attribute__ ((__always_inline__))
20102vminv_s16 (int16x4_t __a)
20103{
20104 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
20105}
20106
20107__extension__ static __inline int32_t __attribute__ ((__always_inline__))
20108vminv_s32 (int32x2_t __a)
20109{
20110 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
20111}
20112
20113__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20114vminv_u8 (uint8x8_t __a)
20115{
20116 return vget_lane_u8 ((uint8x8_t)
20117 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
20118 0);
20119}
20120
20121__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20122vminv_u16 (uint16x4_t __a)
20123{
20124 return vget_lane_u16 ((uint16x4_t)
20125 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
20126 0);
20127}
20128
20129__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20130vminv_u32 (uint32x2_t __a)
20131{
20132 return vget_lane_u32 ((uint32x2_t)
20133 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
20134 0);
20135}
20136
20137__extension__ static __inline float32_t __attribute__ ((__always_inline__))
20138vminvq_f32 (float32x4_t __a)
20139{
20140 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
20141 0);
20142}
20143
20144__extension__ static __inline float64_t __attribute__ ((__always_inline__))
20145vminvq_f64 (float64x2_t __a)
20146{
20147 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
20148 0);
20149}
20150
20151__extension__ static __inline int8_t __attribute__ ((__always_inline__))
20152vminvq_s8 (int8x16_t __a)
20153{
20154 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
20155}
20156
20157__extension__ static __inline int16_t __attribute__ ((__always_inline__))
20158vminvq_s16 (int16x8_t __a)
20159{
20160 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
20161}
20162
20163__extension__ static __inline int32_t __attribute__ ((__always_inline__))
20164vminvq_s32 (int32x4_t __a)
20165{
20166 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
20167}
20168
20169__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20170vminvq_u8 (uint8x16_t __a)
20171{
20172 return vgetq_lane_u8 ((uint8x16_t)
20173 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
20174 0);
20175}
20176
20177__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20178vminvq_u16 (uint16x8_t __a)
20179{
20180 return vgetq_lane_u16 ((uint16x8_t)
20181 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
20182 0);
20183}
20184
20185__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20186vminvq_u32 (uint32x4_t __a)
20187{
20188 return vgetq_lane_u32 ((uint32x4_t)
20189 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
20190 0);
20191}
20192
20193/* vminnmv */
20194
20195__extension__ static __inline float32_t __attribute__ ((__always_inline__))
20196vminnmv_f32 (float32x2_t __a)
20197{
20198 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
20199}
20200
20201__extension__ static __inline float32_t __attribute__ ((__always_inline__))
20202vminnmvq_f32 (float32x4_t __a)
20203{
20204 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
20205}
20206
20207__extension__ static __inline float64_t __attribute__ ((__always_inline__))
20208vminnmvq_f64 (float64x2_t __a)
20209{
20210 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
20211}
20212
20213/* vmla */
20214
20215__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20216vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20217{
20218 return a + b * c;
20219}
20220
20221__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20222vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20223{
20224 return a + b * c;
20225}
20226
20227__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20228vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20229{
20230 return a + b * c;
20231}
20232
20233/* vmla_lane */
20234
20235__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20236vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
20237 float32x2_t __c, const int __lane)
20238{
20239 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20240}
20241
20242__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20243vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
20244 int16x4_t __c, const int __lane)
20245{
20246 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20247}
20248
20249__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20250vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
20251 int32x2_t __c, const int __lane)
20252{
20253 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20254}
20255
20256__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20257vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20258 uint16x4_t __c, const int __lane)
20259{
20260 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20261}
20262
20263__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20264vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20265 uint32x2_t __c, const int __lane)
20266{
20267 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20268}
20269
20270/* vmla_laneq */
20271
20272__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20273vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
20274 float32x4_t __c, const int __lane)
20275{
20276 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20277}
20278
20279__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20280vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
20281 int16x8_t __c, const int __lane)
20282{
20283 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20284}
20285
20286__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20287vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
20288 int32x4_t __c, const int __lane)
20289{
20290 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20291}
20292
20293__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20294vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20295 uint16x8_t __c, const int __lane)
20296{
20297 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20298}
20299
20300__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20301vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20302 uint32x4_t __c, const int __lane)
20303{
20304 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20305}
20306
20307/* vmlaq_lane */
20308
20309__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20310vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
20311 float32x2_t __c, const int __lane)
20312{
20313 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20314}
20315
20316__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20317vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
20318 int16x4_t __c, const int __lane)
20319{
20320 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20321}
20322
20323__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20324vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
20325 int32x2_t __c, const int __lane)
20326{
20327 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20328}
20329
20330__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20331vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20332 uint16x4_t __c, const int __lane)
20333{
20334 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20335}
20336
20337__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20338vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20339 uint32x2_t __c, const int __lane)
20340{
20341 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20342}
20343
20344 /* vmlaq_laneq */
20345
20346__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20347vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20348 float32x4_t __c, const int __lane)
20349{
20350 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20351}
20352
20353__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20354vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20355 int16x8_t __c, const int __lane)
20356{
20357 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20358}
20359
20360__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20361vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20362 int32x4_t __c, const int __lane)
20363{
20364 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20365}
20366
20367__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20368vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20369 uint16x8_t __c, const int __lane)
20370{
20371 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20372}
20373
20374__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20375vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20376 uint32x4_t __c, const int __lane)
20377{
20378 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20379}
20380
20381/* vmls */
20382
20383__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20384vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
20385{
20386 return a - b * c;
20387}
20388
20389__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20390vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
20391{
20392 return a - b * c;
20393}
20394
20395__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20396vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
20397{
20398 return a - b * c;
20399}
20400
20401/* vmls_lane */
20402
20403__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20404vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
20405 float32x2_t __c, const int __lane)
20406{
20407 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20408}
20409
20410__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20411vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
20412 int16x4_t __c, const int __lane)
20413{
20414 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20415}
20416
20417__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20418vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
20419 int32x2_t __c, const int __lane)
20420{
20421 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20422}
20423
20424__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20425vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
20426 uint16x4_t __c, const int __lane)
20427{
20428 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20429}
20430
20431__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20432vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
20433 uint32x2_t __c, const int __lane)
20434{
20435 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20436}
20437
20438/* vmls_laneq */
20439
20440__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20441vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
20442 float32x4_t __c, const int __lane)
20443{
20444 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20445}
20446
20447__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20448vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
20449 int16x8_t __c, const int __lane)
20450{
20451 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20452}
20453
20454__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20455vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
20456 int32x4_t __c, const int __lane)
20457{
20458 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20459}
20460
20461__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20462vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
20463 uint16x8_t __c, const int __lane)
20464{
20465 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20466}
20467
20468__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20469vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
20470 uint32x4_t __c, const int __lane)
20471{
20472 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20473}
20474
20475/* vmlsq_lane */
20476
20477__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20478vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
20479 float32x2_t __c, const int __lane)
20480{
20481 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
20482}
20483
20484__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20485vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
20486 int16x4_t __c, const int __lane)
20487{
20488 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
20489}
20490
20491__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20492vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
20493 int32x2_t __c, const int __lane)
20494{
20495 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
20496}
20497
20498__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20499vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
20500 uint16x4_t __c, const int __lane)
20501{
20502 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
20503}
20504
20505__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20506vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
20507 uint32x2_t __c, const int __lane)
20508{
20509 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
20510}
20511
20512 /* vmlsq_laneq */
20513
20514__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20515vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
20516 float32x4_t __c, const int __lane)
20517{
20518 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
20519}
20520
20521__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20522vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
20523 int16x8_t __c, const int __lane)
20524{
20525 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
20526}
20527
20528__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20529vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
20530 int32x4_t __c, const int __lane)
20531{
20532 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
20533}
20534__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20535vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
20536 uint16x8_t __c, const int __lane)
20537{
20538 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
20539}
20540
20541__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20542vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
20543 uint32x4_t __c, const int __lane)
20544{
20545 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
20546}
20547
20548/* vmov_n_ */
20549
20550__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20551vmov_n_f32 (float32_t __a)
20552{
20553 return vdup_n_f32 (__a);
20554}
20555
20556__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20557vmov_n_f64 (float64_t __a)
20558{
20559 return __a;
20560}
20561
20562__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20563vmov_n_p8 (poly8_t __a)
20564{
20565 return vdup_n_p8 (__a);
20566}
20567
20568__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20569vmov_n_p16 (poly16_t __a)
20570{
20571 return vdup_n_p16 (__a);
20572}
20573
20574__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20575vmov_n_s8 (int8_t __a)
20576{
20577 return vdup_n_s8 (__a);
20578}
20579
20580__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20581vmov_n_s16 (int16_t __a)
20582{
20583 return vdup_n_s16 (__a);
20584}
20585
20586__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20587vmov_n_s32 (int32_t __a)
20588{
20589 return vdup_n_s32 (__a);
20590}
20591
20592__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20593vmov_n_s64 (int64_t __a)
20594{
20595 return __a;
20596}
20597
20598__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20599vmov_n_u8 (uint8_t __a)
20600{
20601 return vdup_n_u8 (__a);
20602}
20603
20604__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20605vmov_n_u16 (uint16_t __a)
20606{
20607 return vdup_n_u16 (__a);
20608}
20609
20610__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20611vmov_n_u32 (uint32_t __a)
20612{
20613 return vdup_n_u32 (__a);
20614}
20615
20616__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20617vmov_n_u64 (uint64_t __a)
20618{
20619 return __a;
20620}
20621
20622__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20623vmovq_n_f32 (float32_t __a)
20624{
20625 return vdupq_n_f32 (__a);
20626}
20627
20628__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20629vmovq_n_f64 (float64_t __a)
20630{
20631 return vdupq_n_f64 (__a);
20632}
20633
20634__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20635vmovq_n_p8 (poly8_t __a)
20636{
20637 return vdupq_n_p8 (__a);
20638}
20639
20640__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20641vmovq_n_p16 (poly16_t __a)
20642{
20643 return vdupq_n_p16 (__a);
20644}
20645
20646__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20647vmovq_n_s8 (int8_t __a)
20648{
20649 return vdupq_n_s8 (__a);
20650}
20651
20652__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20653vmovq_n_s16 (int16_t __a)
20654{
20655 return vdupq_n_s16 (__a);
20656}
20657
20658__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20659vmovq_n_s32 (int32_t __a)
20660{
20661 return vdupq_n_s32 (__a);
20662}
20663
20664__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20665vmovq_n_s64 (int64_t __a)
20666{
20667 return vdupq_n_s64 (__a);
20668}
20669
20670__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20671vmovq_n_u8 (uint8_t __a)
20672{
20673 return vdupq_n_u8 (__a);
20674}
20675
20676__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20677vmovq_n_u16 (uint16_t __a)
20678{
20679 return vdupq_n_u16 (__a);
20680}
20681
20682__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20683vmovq_n_u32 (uint32_t __a)
20684{
20685 return vdupq_n_u32 (__a);
20686}
20687
20688__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20689vmovq_n_u64 (uint64_t __a)
20690{
20691 return vdupq_n_u64 (__a);
20692}
20693
20694/* vmul_lane */
20695
20696__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20697vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
20698{
20699 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20700}
20701
20702__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20703vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
20704{
20705 return __a * __b;
20706}
20707
20708__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20709vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
20710{
20711 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20712}
20713
20714__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20715vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
20716{
20717 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20718}
20719
20720__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20721vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
20722{
20723 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20724}
20725
20726__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20727vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
20728{
20729 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20730}
20731
20732/* vmul_laneq */
20733
20734__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20735vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
20736{
20737 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20738}
20739
20740__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20741vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
20742{
20743 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20744}
20745
20746__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20747vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
20748{
20749 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20750}
20751
20752__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20753vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
20754{
20755 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20756}
20757
20758__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20759vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
20760{
20761 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20762}
20763
20764__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20765vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
20766{
20767 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20768}
20769
20770/* vmulq_lane */
20771
20772__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20773vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
20774{
20775 return __a * __aarch64_vget_lane_f32 (__b, __lane);
20776}
20777
20778__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20779vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
20780{
20781 return __a * __b;
20782}
20783
20784__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20785vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
20786{
20787 return __a * __aarch64_vget_lane_s16 (__b, __lane);
20788}
20789
20790__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20791vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
20792{
20793 return __a * __aarch64_vget_lane_s32 (__b, __lane);
20794}
20795
20796__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20797vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
20798{
20799 return __a * __aarch64_vget_lane_u16 (__b, __lane);
20800}
20801
20802__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20803vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
20804{
20805 return __a * __aarch64_vget_lane_u32 (__b, __lane);
20806}
20807
20808/* vmulq_laneq */
20809
20810__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20811vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
20812{
20813 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
20814}
20815
20816__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20817vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
20818{
20819 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
20820}
20821
20822__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20823vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
20824{
20825 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
20826}
20827
20828__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20829vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
20830{
20831 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
20832}
20833
20834__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20835vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
20836{
20837 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
20838}
20839
20840__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20841vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
20842{
20843 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
20844}
20845
20846/* vneg */
20847
20848__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20849vneg_f32 (float32x2_t __a)
20850{
20851 return -__a;
20852}
20853
20854__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
20855vneg_f64 (float64x1_t __a)
20856{
20857 return -__a;
20858}
20859
20860__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20861vneg_s8 (int8x8_t __a)
20862{
20863 return -__a;
20864}
20865
20866__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20867vneg_s16 (int16x4_t __a)
20868{
20869 return -__a;
20870}
20871
20872__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20873vneg_s32 (int32x2_t __a)
20874{
20875 return -__a;
20876}
20877
20878__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20879vneg_s64 (int64x1_t __a)
20880{
20881 return -__a;
20882}
20883
20884__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20885vnegq_f32 (float32x4_t __a)
20886{
20887 return -__a;
20888}
20889
20890__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20891vnegq_f64 (float64x2_t __a)
20892{
20893 return -__a;
20894}
20895
20896__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20897vnegq_s8 (int8x16_t __a)
20898{
20899 return -__a;
20900}
20901
20902__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20903vnegq_s16 (int16x8_t __a)
20904{
20905 return -__a;
20906}
20907
20908__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20909vnegq_s32 (int32x4_t __a)
20910{
20911 return -__a;
20912}
20913
20914__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20915vnegq_s64 (int64x2_t __a)
20916{
20917 return -__a;
20918}
20919
20920/* vqabs */
20921
20922__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20923vqabsq_s64 (int64x2_t __a)
20924{
20925 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
20926}
20927
20928__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20929vqabsb_s8 (int8x1_t __a)
20930{
20931 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
20932}
20933
20934__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20935vqabsh_s16 (int16x1_t __a)
20936{
20937 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
20938}
20939
20940__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20941vqabss_s32 (int32x1_t __a)
20942{
20943 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
20944}
20945
20946/* vqadd */
20947
20948__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20949vqaddb_s8 (int8x1_t __a, int8x1_t __b)
20950{
20951 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
20952}
20953
20954__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20955vqaddh_s16 (int16x1_t __a, int16x1_t __b)
20956{
20957 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
20958}
20959
20960__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20961vqadds_s32 (int32x1_t __a, int32x1_t __b)
20962{
20963 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
20964}
20965
20966__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20967vqaddd_s64 (int64x1_t __a, int64x1_t __b)
20968{
20969 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
20970}
20971
20972__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20973vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
20974{
20975 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
20976}
20977
20978__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20979vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
20980{
20981 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
20982}
20983
20984__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20985vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
20986{
20987 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
20988}
20989
20990__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20991vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
20992{
20993 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
20994}
20995
20996/* vqdmlal */
20997
20998__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20999vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21000{
21001 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21002}
21003
21004__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21005vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21006{
21007 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21008}
21009
21010__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021011vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
Ben Cheng7823f2a2014-04-08 14:53:42 -070021012 int const __d)
21013{
21014 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21015}
21016
21017__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21018vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21019 int const __d)
21020{
21021 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21022}
21023
21024__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21025vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21026{
21027 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21028}
21029
21030__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21031vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21032{
Rong Xua1f277f2014-09-05 13:42:16 -070021033 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
Ben Cheng7823f2a2014-04-08 14:53:42 -070021034}
21035
21036__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21037vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21038{
21039 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21040}
21041
21042__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21043vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21044{
21045 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21046}
21047
21048__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21049vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21050{
21051 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21052}
21053
21054__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21055vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21056{
21057 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21058}
21059
21060__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021061vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
Ben Cheng7823f2a2014-04-08 14:53:42 -070021062 int const __d)
21063{
21064 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21065}
21066
21067__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21068vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21069 int const __d)
21070{
21071 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21072}
21073
21074__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21075vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21076{
21077 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21078}
21079
21080__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21081vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21082{
Rong Xua1f277f2014-09-05 13:42:16 -070021083 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
Ben Cheng7823f2a2014-04-08 14:53:42 -070021084}
21085
21086__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21087vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21088{
21089 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21090}
21091
21092__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21093vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21094{
21095 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21096}
21097
21098__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21099vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21100{
21101 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21102}
21103
21104__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021105vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021106{
21107 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21108}
21109
21110__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21111vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21112{
21113 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21114}
21115
21116__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021117vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021118{
21119 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21120}
21121
21122/* vqdmlsl */
21123
21124__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21125vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21126{
21127 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21128}
21129
21130__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21131vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21132{
21133 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21134}
21135
21136__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021137vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
Ben Cheng7823f2a2014-04-08 14:53:42 -070021138 int const __d)
21139{
21140 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21141}
21142
21143__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21144vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21145 int const __d)
21146{
21147 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21148}
21149
21150__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21151vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21152{
21153 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21154}
21155
21156__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21157vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21158{
Rong Xua1f277f2014-09-05 13:42:16 -070021159 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
Ben Cheng7823f2a2014-04-08 14:53:42 -070021160}
21161
21162__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21163vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21164{
21165 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21166}
21167
21168__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21169vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21170{
21171 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21172}
21173
21174__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21175vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21176{
21177 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21178}
21179
21180__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21181vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21182{
21183 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21184}
21185
21186__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021187vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
Ben Cheng7823f2a2014-04-08 14:53:42 -070021188 int const __d)
21189{
21190 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21191}
21192
21193__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21194vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21195 int const __d)
21196{
21197 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21198}
21199
21200__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21201vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21202{
21203 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21204}
21205
21206__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21207vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21208{
Rong Xua1f277f2014-09-05 13:42:16 -070021209 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
Ben Cheng7823f2a2014-04-08 14:53:42 -070021210}
21211
21212__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21213vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21214{
21215 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21216}
21217
21218__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21219vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21220{
21221 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21222}
21223
21224__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21225vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21226{
21227 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21228}
21229
21230__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021231vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021232{
21233 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21234}
21235
21236__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21237vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21238{
21239 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21240}
21241
21242__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021243vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021244{
21245 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21246}
21247
21248/* vqdmulh */
21249
21250__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21251vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21252{
21253 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21254}
21255
21256__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21257vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21258{
21259 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21260}
21261
21262__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21263vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21264{
21265 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21266}
21267
21268__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21269vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21270{
21271 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21272}
21273
21274__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21275vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21276{
21277 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21278}
21279
21280__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021281vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021282{
21283 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21284}
21285
21286__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21287vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21288{
21289 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21290}
21291
21292__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021293vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021294{
21295 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21296}
21297
21298/* vqdmull */
21299
21300__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21301vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21302{
21303 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21304}
21305
21306__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21307vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21308{
21309 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21310}
21311
21312__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021313vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021314{
21315 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21316}
21317
21318__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21319vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21320{
21321 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21322}
21323
21324__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21325vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21326{
21327 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21328}
21329
21330__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21331vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21332{
Rong Xua1f277f2014-09-05 13:42:16 -070021333 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
Ben Cheng7823f2a2014-04-08 14:53:42 -070021334}
21335
21336__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21337vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21338{
21339 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21340}
21341
21342__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21343vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21344{
21345 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21346}
21347
21348__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21349vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21350{
21351 return __builtin_aarch64_sqdmullv2si (__a, __b);
21352}
21353
21354__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21355vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21356{
21357 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21358}
21359
21360__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021361vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021362{
21363 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21364}
21365
21366__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21367vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21368{
21369 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21370}
21371
21372__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21373vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21374{
21375 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21376}
21377
21378__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21379vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21380{
Rong Xua1f277f2014-09-05 13:42:16 -070021381 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
Ben Cheng7823f2a2014-04-08 14:53:42 -070021382}
21383
21384__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21385vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21386{
21387 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21388}
21389
21390__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21391vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21392{
21393 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21394}
21395
21396__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21397vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21398{
21399 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21400}
21401
21402__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021403vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021404{
21405 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21406}
21407
21408__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21409vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21410{
21411 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21412}
21413
21414__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021415vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021416{
21417 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21418}
21419
21420/* vqmovn */
21421
21422__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21423vqmovn_s16 (int16x8_t __a)
21424{
21425 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21426}
21427
21428__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21429vqmovn_s32 (int32x4_t __a)
21430{
21431 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21432}
21433
21434__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21435vqmovn_s64 (int64x2_t __a)
21436{
21437 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21438}
21439
21440__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21441vqmovn_u16 (uint16x8_t __a)
21442{
21443 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21444}
21445
21446__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21447vqmovn_u32 (uint32x4_t __a)
21448{
21449 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21450}
21451
21452__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21453vqmovn_u64 (uint64x2_t __a)
21454{
21455 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21456}
21457
21458__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21459vqmovnh_s16 (int16x1_t __a)
21460{
21461 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21462}
21463
21464__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21465vqmovns_s32 (int32x1_t __a)
21466{
21467 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21468}
21469
21470__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21471vqmovnd_s64 (int64x1_t __a)
21472{
21473 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21474}
21475
21476__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21477vqmovnh_u16 (uint16x1_t __a)
21478{
21479 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21480}
21481
21482__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21483vqmovns_u32 (uint32x1_t __a)
21484{
21485 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21486}
21487
21488__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21489vqmovnd_u64 (uint64x1_t __a)
21490{
21491 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21492}
21493
21494/* vqmovun */
21495
21496__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21497vqmovun_s16 (int16x8_t __a)
21498{
21499 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21500}
21501
21502__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21503vqmovun_s32 (int32x4_t __a)
21504{
21505 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21506}
21507
21508__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21509vqmovun_s64 (int64x2_t __a)
21510{
21511 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21512}
21513
21514__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21515vqmovunh_s16 (int16x1_t __a)
21516{
21517 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21518}
21519
21520__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21521vqmovuns_s32 (int32x1_t __a)
21522{
21523 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21524}
21525
21526__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21527vqmovund_s64 (int64x1_t __a)
21528{
21529 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21530}
21531
21532/* vqneg */
21533
21534__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21535vqnegq_s64 (int64x2_t __a)
21536{
21537 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21538}
21539
21540__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21541vqnegb_s8 (int8x1_t __a)
21542{
21543 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21544}
21545
21546__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21547vqnegh_s16 (int16x1_t __a)
21548{
21549 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21550}
21551
21552__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21553vqnegs_s32 (int32x1_t __a)
21554{
21555 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21556}
21557
21558/* vqrdmulh */
21559
21560__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21561vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21562{
21563 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21564}
21565
21566__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21567vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21568{
21569 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21570}
21571
21572__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21573vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21574{
21575 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21576}
21577
21578__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21579vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21580{
21581 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21582}
21583
21584__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21585vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21586{
21587 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21588}
21589
21590__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021591vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021592{
21593 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21594}
21595
21596__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21597vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21598{
21599 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
21600}
21601
21602__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
Rong Xua1f277f2014-09-05 13:42:16 -070021603vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
Ben Cheng7823f2a2014-04-08 14:53:42 -070021604{
21605 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
21606}
21607
21608/* vqrshl */
21609
21610__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21611vqrshl_s8 (int8x8_t __a, int8x8_t __b)
21612{
21613 return __builtin_aarch64_sqrshlv8qi (__a, __b);
21614}
21615
21616__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21617vqrshl_s16 (int16x4_t __a, int16x4_t __b)
21618{
21619 return __builtin_aarch64_sqrshlv4hi (__a, __b);
21620}
21621
21622__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21623vqrshl_s32 (int32x2_t __a, int32x2_t __b)
21624{
21625 return __builtin_aarch64_sqrshlv2si (__a, __b);
21626}
21627
21628__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21629vqrshl_s64 (int64x1_t __a, int64x1_t __b)
21630{
21631 return __builtin_aarch64_sqrshldi (__a, __b);
21632}
21633
21634__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21635vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
21636{
21637 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
21638}
21639
21640__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21641vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
21642{
21643 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
21644}
21645
21646__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21647vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
21648{
21649 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
21650}
21651
21652__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21653vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
21654{
21655 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
21656}
21657
21658__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21659vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
21660{
21661 return __builtin_aarch64_sqrshlv16qi (__a, __b);
21662}
21663
21664__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21665vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
21666{
21667 return __builtin_aarch64_sqrshlv8hi (__a, __b);
21668}
21669
21670__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21671vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
21672{
21673 return __builtin_aarch64_sqrshlv4si (__a, __b);
21674}
21675
21676__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21677vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
21678{
21679 return __builtin_aarch64_sqrshlv2di (__a, __b);
21680}
21681
21682__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21683vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21684{
21685 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
21686}
21687
21688__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21689vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21690{
21691 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
21692}
21693
21694__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21695vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21696{
21697 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
21698}
21699
21700__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21701vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21702{
21703 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
21704}
21705
21706__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21707vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
21708{
21709 return __builtin_aarch64_sqrshlqi (__a, __b);
21710}
21711
21712__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21713vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
21714{
21715 return __builtin_aarch64_sqrshlhi (__a, __b);
21716}
21717
21718__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21719vqrshls_s32 (int32x1_t __a, int32x1_t __b)
21720{
21721 return __builtin_aarch64_sqrshlsi (__a, __b);
21722}
21723
21724__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21725vqrshld_s64 (int64x1_t __a, int64x1_t __b)
21726{
21727 return __builtin_aarch64_sqrshldi (__a, __b);
21728}
21729
21730__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21731vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21732{
21733 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
21734}
21735
21736__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21737vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21738{
21739 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
21740}
21741
21742__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21743vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
21744{
21745 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
21746}
21747
21748__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21749vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
21750{
21751 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
21752}
21753
21754/* vqrshrn */
21755
21756__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21757vqrshrn_n_s16 (int16x8_t __a, const int __b)
21758{
21759 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
21760}
21761
21762__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21763vqrshrn_n_s32 (int32x4_t __a, const int __b)
21764{
21765 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
21766}
21767
21768__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21769vqrshrn_n_s64 (int64x2_t __a, const int __b)
21770{
21771 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
21772}
21773
21774__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21775vqrshrn_n_u16 (uint16x8_t __a, const int __b)
21776{
21777 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
21778}
21779
21780__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21781vqrshrn_n_u32 (uint32x4_t __a, const int __b)
21782{
21783 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
21784}
21785
21786__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21787vqrshrn_n_u64 (uint64x2_t __a, const int __b)
21788{
21789 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
21790}
21791
21792__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21793vqrshrnh_n_s16 (int16x1_t __a, const int __b)
21794{
21795 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
21796}
21797
21798__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21799vqrshrns_n_s32 (int32x1_t __a, const int __b)
21800{
21801 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
21802}
21803
21804__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21805vqrshrnd_n_s64 (int64x1_t __a, const int __b)
21806{
21807 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
21808}
21809
21810__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21811vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
21812{
21813 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
21814}
21815
21816__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21817vqrshrns_n_u32 (uint32x1_t __a, const int __b)
21818{
21819 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
21820}
21821
21822__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21823vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
21824{
21825 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
21826}
21827
21828/* vqrshrun */
21829
21830__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21831vqrshrun_n_s16 (int16x8_t __a, const int __b)
21832{
21833 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
21834}
21835
21836__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21837vqrshrun_n_s32 (int32x4_t __a, const int __b)
21838{
21839 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
21840}
21841
21842__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21843vqrshrun_n_s64 (int64x2_t __a, const int __b)
21844{
21845 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
21846}
21847
21848__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21849vqrshrunh_n_s16 (int16x1_t __a, const int __b)
21850{
21851 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
21852}
21853
21854__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21855vqrshruns_n_s32 (int32x1_t __a, const int __b)
21856{
21857 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
21858}
21859
21860__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21861vqrshrund_n_s64 (int64x1_t __a, const int __b)
21862{
21863 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
21864}
21865
21866/* vqshl */
21867
21868__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21869vqshl_s8 (int8x8_t __a, int8x8_t __b)
21870{
21871 return __builtin_aarch64_sqshlv8qi (__a, __b);
21872}
21873
21874__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21875vqshl_s16 (int16x4_t __a, int16x4_t __b)
21876{
21877 return __builtin_aarch64_sqshlv4hi (__a, __b);
21878}
21879
21880__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21881vqshl_s32 (int32x2_t __a, int32x2_t __b)
21882{
21883 return __builtin_aarch64_sqshlv2si (__a, __b);
21884}
21885
21886__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21887vqshl_s64 (int64x1_t __a, int64x1_t __b)
21888{
21889 return __builtin_aarch64_sqshldi (__a, __b);
21890}
21891
21892__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21893vqshl_u8 (uint8x8_t __a, int8x8_t __b)
21894{
21895 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
21896}
21897
21898__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21899vqshl_u16 (uint16x4_t __a, int16x4_t __b)
21900{
21901 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
21902}
21903
21904__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21905vqshl_u32 (uint32x2_t __a, int32x2_t __b)
21906{
21907 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
21908}
21909
21910__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21911vqshl_u64 (uint64x1_t __a, int64x1_t __b)
21912{
21913 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
21914}
21915
21916__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21917vqshlq_s8 (int8x16_t __a, int8x16_t __b)
21918{
21919 return __builtin_aarch64_sqshlv16qi (__a, __b);
21920}
21921
21922__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21923vqshlq_s16 (int16x8_t __a, int16x8_t __b)
21924{
21925 return __builtin_aarch64_sqshlv8hi (__a, __b);
21926}
21927
21928__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21929vqshlq_s32 (int32x4_t __a, int32x4_t __b)
21930{
21931 return __builtin_aarch64_sqshlv4si (__a, __b);
21932}
21933
21934__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21935vqshlq_s64 (int64x2_t __a, int64x2_t __b)
21936{
21937 return __builtin_aarch64_sqshlv2di (__a, __b);
21938}
21939
21940__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21941vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
21942{
21943 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
21944}
21945
21946__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21947vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
21948{
21949 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
21950}
21951
21952__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21953vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
21954{
21955 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
21956}
21957
21958__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21959vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
21960{
21961 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
21962}
21963
21964__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21965vqshlb_s8 (int8x1_t __a, int8x1_t __b)
21966{
21967 return __builtin_aarch64_sqshlqi (__a, __b);
21968}
21969
21970__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21971vqshlh_s16 (int16x1_t __a, int16x1_t __b)
21972{
21973 return __builtin_aarch64_sqshlhi (__a, __b);
21974}
21975
21976__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21977vqshls_s32 (int32x1_t __a, int32x1_t __b)
21978{
21979 return __builtin_aarch64_sqshlsi (__a, __b);
21980}
21981
21982__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21983vqshld_s64 (int64x1_t __a, int64x1_t __b)
21984{
21985 return __builtin_aarch64_sqshldi (__a, __b);
21986}
21987
21988__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21989vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
21990{
21991 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
21992}
21993
21994__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21995vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
21996{
21997 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
21998}
21999
22000__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22001vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22002{
22003 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22004}
22005
22006__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22007vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22008{
22009 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22010}
22011
22012__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22013vqshl_n_s8 (int8x8_t __a, const int __b)
22014{
22015 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22016}
22017
22018__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22019vqshl_n_s16 (int16x4_t __a, const int __b)
22020{
22021 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22022}
22023
22024__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22025vqshl_n_s32 (int32x2_t __a, const int __b)
22026{
22027 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22028}
22029
22030__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22031vqshl_n_s64 (int64x1_t __a, const int __b)
22032{
22033 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22034}
22035
22036__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22037vqshl_n_u8 (uint8x8_t __a, const int __b)
22038{
22039 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22040}
22041
22042__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22043vqshl_n_u16 (uint16x4_t __a, const int __b)
22044{
22045 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22046}
22047
22048__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22049vqshl_n_u32 (uint32x2_t __a, const int __b)
22050{
22051 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22052}
22053
22054__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22055vqshl_n_u64 (uint64x1_t __a, const int __b)
22056{
22057 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22058}
22059
22060__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22061vqshlq_n_s8 (int8x16_t __a, const int __b)
22062{
22063 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22064}
22065
22066__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22067vqshlq_n_s16 (int16x8_t __a, const int __b)
22068{
22069 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22070}
22071
22072__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22073vqshlq_n_s32 (int32x4_t __a, const int __b)
22074{
22075 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22076}
22077
22078__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22079vqshlq_n_s64 (int64x2_t __a, const int __b)
22080{
22081 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22082}
22083
22084__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22085vqshlq_n_u8 (uint8x16_t __a, const int __b)
22086{
22087 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22088}
22089
22090__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22091vqshlq_n_u16 (uint16x8_t __a, const int __b)
22092{
22093 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22094}
22095
22096__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22097vqshlq_n_u32 (uint32x4_t __a, const int __b)
22098{
22099 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22100}
22101
22102__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22103vqshlq_n_u64 (uint64x2_t __a, const int __b)
22104{
22105 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22106}
22107
22108__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22109vqshlb_n_s8 (int8x1_t __a, const int __b)
22110{
22111 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22112}
22113
22114__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22115vqshlh_n_s16 (int16x1_t __a, const int __b)
22116{
22117 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22118}
22119
22120__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22121vqshls_n_s32 (int32x1_t __a, const int __b)
22122{
22123 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22124}
22125
22126__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22127vqshld_n_s64 (int64x1_t __a, const int __b)
22128{
22129 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22130}
22131
22132__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22133vqshlb_n_u8 (uint8x1_t __a, const int __b)
22134{
22135 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22136}
22137
22138__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22139vqshlh_n_u16 (uint16x1_t __a, const int __b)
22140{
22141 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22142}
22143
22144__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22145vqshls_n_u32 (uint32x1_t __a, const int __b)
22146{
22147 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22148}
22149
22150__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22151vqshld_n_u64 (uint64x1_t __a, const int __b)
22152{
22153 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22154}
22155
22156/* vqshlu */
22157
22158__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22159vqshlu_n_s8 (int8x8_t __a, const int __b)
22160{
22161 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22162}
22163
22164__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22165vqshlu_n_s16 (int16x4_t __a, const int __b)
22166{
22167 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22168}
22169
22170__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22171vqshlu_n_s32 (int32x2_t __a, const int __b)
22172{
22173 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22174}
22175
22176__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22177vqshlu_n_s64 (int64x1_t __a, const int __b)
22178{
22179 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22180}
22181
22182__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22183vqshluq_n_s8 (int8x16_t __a, const int __b)
22184{
22185 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22186}
22187
22188__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22189vqshluq_n_s16 (int16x8_t __a, const int __b)
22190{
22191 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22192}
22193
22194__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22195vqshluq_n_s32 (int32x4_t __a, const int __b)
22196{
22197 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22198}
22199
22200__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22201vqshluq_n_s64 (int64x2_t __a, const int __b)
22202{
22203 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22204}
22205
22206__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22207vqshlub_n_s8 (int8x1_t __a, const int __b)
22208{
22209 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22210}
22211
22212__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22213vqshluh_n_s16 (int16x1_t __a, const int __b)
22214{
22215 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22216}
22217
22218__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22219vqshlus_n_s32 (int32x1_t __a, const int __b)
22220{
22221 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22222}
22223
22224__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22225vqshlud_n_s64 (int64x1_t __a, const int __b)
22226{
22227 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22228}
22229
22230/* vqshrn */
22231
22232__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22233vqshrn_n_s16 (int16x8_t __a, const int __b)
22234{
22235 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22236}
22237
22238__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22239vqshrn_n_s32 (int32x4_t __a, const int __b)
22240{
22241 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22242}
22243
22244__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22245vqshrn_n_s64 (int64x2_t __a, const int __b)
22246{
22247 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22248}
22249
22250__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22251vqshrn_n_u16 (uint16x8_t __a, const int __b)
22252{
22253 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22254}
22255
22256__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22257vqshrn_n_u32 (uint32x4_t __a, const int __b)
22258{
22259 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22260}
22261
22262__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22263vqshrn_n_u64 (uint64x2_t __a, const int __b)
22264{
22265 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22266}
22267
22268__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22269vqshrnh_n_s16 (int16x1_t __a, const int __b)
22270{
22271 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22272}
22273
22274__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22275vqshrns_n_s32 (int32x1_t __a, const int __b)
22276{
22277 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22278}
22279
22280__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22281vqshrnd_n_s64 (int64x1_t __a, const int __b)
22282{
22283 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22284}
22285
22286__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22287vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22288{
22289 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22290}
22291
22292__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22293vqshrns_n_u32 (uint32x1_t __a, const int __b)
22294{
22295 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22296}
22297
22298__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22299vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22300{
22301 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22302}
22303
22304/* vqshrun */
22305
22306__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22307vqshrun_n_s16 (int16x8_t __a, const int __b)
22308{
22309 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22310}
22311
22312__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22313vqshrun_n_s32 (int32x4_t __a, const int __b)
22314{
22315 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22316}
22317
22318__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22319vqshrun_n_s64 (int64x2_t __a, const int __b)
22320{
22321 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22322}
22323
22324__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22325vqshrunh_n_s16 (int16x1_t __a, const int __b)
22326{
22327 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22328}
22329
22330__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22331vqshruns_n_s32 (int32x1_t __a, const int __b)
22332{
22333 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22334}
22335
22336__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22337vqshrund_n_s64 (int64x1_t __a, const int __b)
22338{
22339 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22340}
22341
22342/* vqsub */
22343
22344__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22345vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22346{
22347 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22348}
22349
22350__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22351vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22352{
22353 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22354}
22355
22356__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22357vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22358{
22359 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22360}
22361
22362__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22363vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22364{
22365 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22366}
22367
22368__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22369vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22370{
22371 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22372}
22373
22374__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22375vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22376{
22377 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22378}
22379
22380__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22381vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22382{
22383 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22384}
22385
22386__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22387vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22388{
22389 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22390}
22391
22392/* vrecpe */
22393
22394__extension__ static __inline float32_t __attribute__ ((__always_inline__))
22395vrecpes_f32 (float32_t __a)
22396{
22397 return __builtin_aarch64_frecpesf (__a);
22398}
22399
22400__extension__ static __inline float64_t __attribute__ ((__always_inline__))
22401vrecped_f64 (float64_t __a)
22402{
22403 return __builtin_aarch64_frecpedf (__a);
22404}
22405
22406__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22407vrecpe_f32 (float32x2_t __a)
22408{
22409 return __builtin_aarch64_frecpev2sf (__a);
22410}
22411
22412__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22413vrecpeq_f32 (float32x4_t __a)
22414{
22415 return __builtin_aarch64_frecpev4sf (__a);
22416}
22417
22418__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22419vrecpeq_f64 (float64x2_t __a)
22420{
22421 return __builtin_aarch64_frecpev2df (__a);
22422}
22423
22424/* vrecps */
22425
22426__extension__ static __inline float32_t __attribute__ ((__always_inline__))
22427vrecpss_f32 (float32_t __a, float32_t __b)
22428{
22429 return __builtin_aarch64_frecpssf (__a, __b);
22430}
22431
22432__extension__ static __inline float64_t __attribute__ ((__always_inline__))
22433vrecpsd_f64 (float64_t __a, float64_t __b)
22434{
22435 return __builtin_aarch64_frecpsdf (__a, __b);
22436}
22437
22438__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22439vrecps_f32 (float32x2_t __a, float32x2_t __b)
22440{
22441 return __builtin_aarch64_frecpsv2sf (__a, __b);
22442}
22443
22444__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22445vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22446{
22447 return __builtin_aarch64_frecpsv4sf (__a, __b);
22448}
22449
22450__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22451vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22452{
22453 return __builtin_aarch64_frecpsv2df (__a, __b);
22454}
22455
22456/* vrecpx */
22457
22458__extension__ static __inline float32_t __attribute__ ((__always_inline__))
22459vrecpxs_f32 (float32_t __a)
22460{
22461 return __builtin_aarch64_frecpxsf (__a);
22462}
22463
22464__extension__ static __inline float64_t __attribute__ ((__always_inline__))
22465vrecpxd_f64 (float64_t __a)
22466{
22467 return __builtin_aarch64_frecpxdf (__a);
22468}
22469
22470/* vrnd */
22471
22472__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22473vrnd_f32 (float32x2_t __a)
22474{
22475 return __builtin_aarch64_btruncv2sf (__a);
22476}
22477
22478__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22479vrndq_f32 (float32x4_t __a)
22480{
22481 return __builtin_aarch64_btruncv4sf (__a);
22482}
22483
22484__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22485vrndq_f64 (float64x2_t __a)
22486{
22487 return __builtin_aarch64_btruncv2df (__a);
22488}
22489
22490/* vrnda */
22491
22492__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22493vrnda_f32 (float32x2_t __a)
22494{
22495 return __builtin_aarch64_roundv2sf (__a);
22496}
22497
22498__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22499vrndaq_f32 (float32x4_t __a)
22500{
22501 return __builtin_aarch64_roundv4sf (__a);
22502}
22503
22504__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22505vrndaq_f64 (float64x2_t __a)
22506{
22507 return __builtin_aarch64_roundv2df (__a);
22508}
22509
22510/* vrndi */
22511
22512__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22513vrndi_f32 (float32x2_t __a)
22514{
22515 return __builtin_aarch64_nearbyintv2sf (__a);
22516}
22517
22518__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22519vrndiq_f32 (float32x4_t __a)
22520{
22521 return __builtin_aarch64_nearbyintv4sf (__a);
22522}
22523
22524__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22525vrndiq_f64 (float64x2_t __a)
22526{
22527 return __builtin_aarch64_nearbyintv2df (__a);
22528}
22529
22530/* vrndm */
22531
22532__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22533vrndm_f32 (float32x2_t __a)
22534{
22535 return __builtin_aarch64_floorv2sf (__a);
22536}
22537
22538__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22539vrndmq_f32 (float32x4_t __a)
22540{
22541 return __builtin_aarch64_floorv4sf (__a);
22542}
22543
22544__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22545vrndmq_f64 (float64x2_t __a)
22546{
22547 return __builtin_aarch64_floorv2df (__a);
22548}
22549
22550/* vrndn */
22551
22552__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22553vrndn_f32 (float32x2_t __a)
22554{
22555 return __builtin_aarch64_frintnv2sf (__a);
22556}
22557__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22558vrndnq_f32 (float32x4_t __a)
22559{
22560 return __builtin_aarch64_frintnv4sf (__a);
22561}
22562
22563__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22564vrndnq_f64 (float64x2_t __a)
22565{
22566 return __builtin_aarch64_frintnv2df (__a);
22567}
22568
22569/* vrndp */
22570
22571__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22572vrndp_f32 (float32x2_t __a)
22573{
22574 return __builtin_aarch64_ceilv2sf (__a);
22575}
22576
22577__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22578vrndpq_f32 (float32x4_t __a)
22579{
22580 return __builtin_aarch64_ceilv4sf (__a);
22581}
22582
22583__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22584vrndpq_f64 (float64x2_t __a)
22585{
22586 return __builtin_aarch64_ceilv2df (__a);
22587}
22588
22589/* vrndx */
22590
22591__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22592vrndx_f32 (float32x2_t __a)
22593{
22594 return __builtin_aarch64_rintv2sf (__a);
22595}
22596
22597__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22598vrndxq_f32 (float32x4_t __a)
22599{
22600 return __builtin_aarch64_rintv4sf (__a);
22601}
22602
22603__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22604vrndxq_f64 (float64x2_t __a)
22605{
22606 return __builtin_aarch64_rintv2df (__a);
22607}
22608
22609/* vrshl */
22610
22611__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22612vrshl_s8 (int8x8_t __a, int8x8_t __b)
22613{
22614 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
22615}
22616
22617__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22618vrshl_s16 (int16x4_t __a, int16x4_t __b)
22619{
22620 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
22621}
22622
22623__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22624vrshl_s32 (int32x2_t __a, int32x2_t __b)
22625{
22626 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
22627}
22628
22629__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22630vrshl_s64 (int64x1_t __a, int64x1_t __b)
22631{
22632 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22633}
22634
22635__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22636vrshl_u8 (uint8x8_t __a, int8x8_t __b)
22637{
22638 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
22639}
22640
22641__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22642vrshl_u16 (uint16x4_t __a, int16x4_t __b)
22643{
22644 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
22645}
22646
22647__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22648vrshl_u32 (uint32x2_t __a, int32x2_t __b)
22649{
22650 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
22651}
22652
22653__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22654vrshl_u64 (uint64x1_t __a, int64x1_t __b)
22655{
22656 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
22657}
22658
22659__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22660vrshlq_s8 (int8x16_t __a, int8x16_t __b)
22661{
22662 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
22663}
22664
22665__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22666vrshlq_s16 (int16x8_t __a, int16x8_t __b)
22667{
22668 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
22669}
22670
22671__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22672vrshlq_s32 (int32x4_t __a, int32x4_t __b)
22673{
22674 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
22675}
22676
22677__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22678vrshlq_s64 (int64x2_t __a, int64x2_t __b)
22679{
22680 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
22681}
22682
22683__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22684vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22685{
22686 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
22687}
22688
22689__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22690vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22691{
22692 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
22693}
22694
22695__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22696vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22697{
22698 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
22699}
22700
22701__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22702vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22703{
22704 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
22705}
22706
22707__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22708vrshld_s64 (int64x1_t __a, int64x1_t __b)
22709{
22710 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
22711}
22712
22713__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22714vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22715{
22716 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
22717}
22718
22719/* vrshr */
22720
22721__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22722vrshr_n_s8 (int8x8_t __a, const int __b)
22723{
22724 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
22725}
22726
22727__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22728vrshr_n_s16 (int16x4_t __a, const int __b)
22729{
22730 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
22731}
22732
22733__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22734vrshr_n_s32 (int32x2_t __a, const int __b)
22735{
22736 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
22737}
22738
22739__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22740vrshr_n_s64 (int64x1_t __a, const int __b)
22741{
22742 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22743}
22744
22745__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22746vrshr_n_u8 (uint8x8_t __a, const int __b)
22747{
22748 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
22749}
22750
22751__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22752vrshr_n_u16 (uint16x4_t __a, const int __b)
22753{
22754 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
22755}
22756
22757__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22758vrshr_n_u32 (uint32x2_t __a, const int __b)
22759{
22760 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
22761}
22762
22763__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22764vrshr_n_u64 (uint64x1_t __a, const int __b)
22765{
22766 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
22767}
22768
22769__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22770vrshrq_n_s8 (int8x16_t __a, const int __b)
22771{
22772 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
22773}
22774
22775__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22776vrshrq_n_s16 (int16x8_t __a, const int __b)
22777{
22778 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
22779}
22780
22781__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22782vrshrq_n_s32 (int32x4_t __a, const int __b)
22783{
22784 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
22785}
22786
22787__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22788vrshrq_n_s64 (int64x2_t __a, const int __b)
22789{
22790 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
22791}
22792
22793__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22794vrshrq_n_u8 (uint8x16_t __a, const int __b)
22795{
22796 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
22797}
22798
22799__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22800vrshrq_n_u16 (uint16x8_t __a, const int __b)
22801{
22802 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
22803}
22804
22805__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22806vrshrq_n_u32 (uint32x4_t __a, const int __b)
22807{
22808 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
22809}
22810
22811__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22812vrshrq_n_u64 (uint64x2_t __a, const int __b)
22813{
22814 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
22815}
22816
22817__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22818vrshrd_n_s64 (int64x1_t __a, const int __b)
22819{
22820 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
22821}
22822
22823__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22824vrshrd_n_u64 (uint64x1_t __a, const int __b)
22825{
22826 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
22827}
22828
22829/* vrsra */
22830
22831__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22832vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22833{
22834 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
22835}
22836
22837__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22838vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22839{
22840 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
22841}
22842
22843__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22844vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22845{
22846 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
22847}
22848
22849__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22850vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22851{
22852 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22853}
22854
22855__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22856vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22857{
22858 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
22859 (int8x8_t) __b, __c);
22860}
22861
22862__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22863vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22864{
22865 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
22866 (int16x4_t) __b, __c);
22867}
22868
22869__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22870vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22871{
22872 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
22873 (int32x2_t) __b, __c);
22874}
22875
22876__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22877vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22878{
22879 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
22880 (int64x1_t) __b, __c);
22881}
22882
22883__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22884vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22885{
22886 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
22887}
22888
22889__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22890vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22891{
22892 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
22893}
22894
22895__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22896vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22897{
22898 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
22899}
22900
22901__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22902vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22903{
22904 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
22905}
22906
22907__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22908vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22909{
22910 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
22911 (int8x16_t) __b, __c);
22912}
22913
22914__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22915vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22916{
22917 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
22918 (int16x8_t) __b, __c);
22919}
22920
22921__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22922vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22923{
22924 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
22925 (int32x4_t) __b, __c);
22926}
22927
22928__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22929vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22930{
22931 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
22932 (int64x2_t) __b, __c);
22933}
22934
22935__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22936vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22937{
22938 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
22939}
22940
22941__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22942vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22943{
22944 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
22945}
22946
22947#ifdef __ARM_FEATURE_CRYPTO
22948
22949/* vsha1 */
22950
22951static __inline uint32x4_t
22952vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22953{
22954 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
22955}
22956static __inline uint32x4_t
22957vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22958{
22959 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
22960}
22961static __inline uint32x4_t
22962vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
22963{
22964 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
22965}
22966
22967static __inline uint32_t
22968vsha1h_u32 (uint32_t hash_e)
22969{
22970 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
22971}
22972
22973static __inline uint32x4_t
22974vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
22975{
22976 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
22977}
22978
22979static __inline uint32x4_t
22980vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
22981{
22982 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
22983}
22984
22985static __inline uint32x4_t
22986vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
22987{
22988 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
22989}
22990
22991static __inline uint32x4_t
22992vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
22993{
22994 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
22995}
22996
22997static __inline uint32x4_t
22998vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
22999{
23000 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
23001}
23002
23003static __inline uint32x4_t
23004vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
23005{
23006 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
23007}
23008
23009static __inline poly128_t
23010vmull_p64 (poly64_t a, poly64_t b)
23011{
23012 return
23013 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
23014}
23015
23016static __inline poly128_t
23017vmull_high_p64 (poly64x2_t a, poly64x2_t b)
23018{
23019 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
23020}
23021
23022#endif
23023
23024/* vshl */
23025
23026__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23027vshl_n_s8 (int8x8_t __a, const int __b)
23028{
23029 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23030}
23031
23032__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23033vshl_n_s16 (int16x4_t __a, const int __b)
23034{
23035 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23036}
23037
23038__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23039vshl_n_s32 (int32x2_t __a, const int __b)
23040{
23041 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23042}
23043
23044__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23045vshl_n_s64 (int64x1_t __a, const int __b)
23046{
23047 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23048}
23049
23050__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23051vshl_n_u8 (uint8x8_t __a, const int __b)
23052{
23053 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23054}
23055
23056__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23057vshl_n_u16 (uint16x4_t __a, const int __b)
23058{
23059 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23060}
23061
23062__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23063vshl_n_u32 (uint32x2_t __a, const int __b)
23064{
23065 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23066}
23067
23068__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23069vshl_n_u64 (uint64x1_t __a, const int __b)
23070{
23071 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23072}
23073
23074__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23075vshlq_n_s8 (int8x16_t __a, const int __b)
23076{
23077 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23078}
23079
23080__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23081vshlq_n_s16 (int16x8_t __a, const int __b)
23082{
23083 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23084}
23085
23086__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23087vshlq_n_s32 (int32x4_t __a, const int __b)
23088{
23089 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23090}
23091
23092__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23093vshlq_n_s64 (int64x2_t __a, const int __b)
23094{
23095 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23096}
23097
23098__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23099vshlq_n_u8 (uint8x16_t __a, const int __b)
23100{
23101 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23102}
23103
23104__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23105vshlq_n_u16 (uint16x8_t __a, const int __b)
23106{
23107 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23108}
23109
23110__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23111vshlq_n_u32 (uint32x4_t __a, const int __b)
23112{
23113 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23114}
23115
23116__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23117vshlq_n_u64 (uint64x2_t __a, const int __b)
23118{
23119 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23120}
23121
23122__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23123vshld_n_s64 (int64x1_t __a, const int __b)
23124{
23125 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23126}
23127
23128__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23129vshld_n_u64 (uint64x1_t __a, const int __b)
23130{
23131 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23132}
23133
23134__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23135vshl_s8 (int8x8_t __a, int8x8_t __b)
23136{
23137 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23138}
23139
23140__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23141vshl_s16 (int16x4_t __a, int16x4_t __b)
23142{
23143 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23144}
23145
23146__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23147vshl_s32 (int32x2_t __a, int32x2_t __b)
23148{
23149 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23150}
23151
23152__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23153vshl_s64 (int64x1_t __a, int64x1_t __b)
23154{
23155 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23156}
23157
23158__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23159vshl_u8 (uint8x8_t __a, int8x8_t __b)
23160{
23161 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23162}
23163
23164__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23165vshl_u16 (uint16x4_t __a, int16x4_t __b)
23166{
23167 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23168}
23169
23170__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23171vshl_u32 (uint32x2_t __a, int32x2_t __b)
23172{
23173 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23174}
23175
23176__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23177vshl_u64 (uint64x1_t __a, int64x1_t __b)
23178{
23179 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23180}
23181
23182__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23183vshlq_s8 (int8x16_t __a, int8x16_t __b)
23184{
23185 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23186}
23187
23188__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23189vshlq_s16 (int16x8_t __a, int16x8_t __b)
23190{
23191 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23192}
23193
23194__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23195vshlq_s32 (int32x4_t __a, int32x4_t __b)
23196{
23197 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23198}
23199
23200__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23201vshlq_s64 (int64x2_t __a, int64x2_t __b)
23202{
23203 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23204}
23205
23206__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23207vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23208{
23209 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23210}
23211
23212__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23213vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23214{
23215 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23216}
23217
23218__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23219vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23220{
23221 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23222}
23223
23224__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23225vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23226{
23227 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23228}
23229
23230__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23231vshld_s64 (int64x1_t __a, int64x1_t __b)
23232{
23233 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23234}
23235
23236__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23237vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23238{
23239 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23240}
23241
23242__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23243vshll_high_n_s8 (int8x16_t __a, const int __b)
23244{
23245 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23246}
23247
23248__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23249vshll_high_n_s16 (int16x8_t __a, const int __b)
23250{
23251 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23252}
23253
23254__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23255vshll_high_n_s32 (int32x4_t __a, const int __b)
23256{
23257 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23258}
23259
23260__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23261vshll_high_n_u8 (uint8x16_t __a, const int __b)
23262{
23263 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23264}
23265
23266__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23267vshll_high_n_u16 (uint16x8_t __a, const int __b)
23268{
23269 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23270}
23271
23272__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23273vshll_high_n_u32 (uint32x4_t __a, const int __b)
23274{
23275 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23276}
23277
23278__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23279vshll_n_s8 (int8x8_t __a, const int __b)
23280{
23281 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23282}
23283
23284__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23285vshll_n_s16 (int16x4_t __a, const int __b)
23286{
23287 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23288}
23289
23290__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23291vshll_n_s32 (int32x2_t __a, const int __b)
23292{
23293 return __builtin_aarch64_sshll_nv2si (__a, __b);
23294}
23295
23296__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23297vshll_n_u8 (uint8x8_t __a, const int __b)
23298{
23299 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23300}
23301
23302__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23303vshll_n_u16 (uint16x4_t __a, const int __b)
23304{
23305 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23306}
23307
23308__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23309vshll_n_u32 (uint32x2_t __a, const int __b)
23310{
23311 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23312}
23313
23314/* vshr */
23315
23316__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23317vshr_n_s8 (int8x8_t __a, const int __b)
23318{
23319 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23320}
23321
23322__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23323vshr_n_s16 (int16x4_t __a, const int __b)
23324{
23325 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23326}
23327
23328__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23329vshr_n_s32 (int32x2_t __a, const int __b)
23330{
23331 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23332}
23333
23334__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23335vshr_n_s64 (int64x1_t __a, const int __b)
23336{
23337 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
23338}
23339
23340__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23341vshr_n_u8 (uint8x8_t __a, const int __b)
23342{
23343 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23344}
23345
23346__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23347vshr_n_u16 (uint16x4_t __a, const int __b)
23348{
23349 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23350}
23351
23352__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23353vshr_n_u32 (uint32x2_t __a, const int __b)
23354{
23355 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23356}
23357
23358__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23359vshr_n_u64 (uint64x1_t __a, const int __b)
23360{
23361 return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
23362}
23363
23364__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23365vshrq_n_s8 (int8x16_t __a, const int __b)
23366{
23367 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23368}
23369
23370__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23371vshrq_n_s16 (int16x8_t __a, const int __b)
23372{
23373 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23374}
23375
23376__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23377vshrq_n_s32 (int32x4_t __a, const int __b)
23378{
23379 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23380}
23381
23382__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23383vshrq_n_s64 (int64x2_t __a, const int __b)
23384{
23385 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23386}
23387
23388__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23389vshrq_n_u8 (uint8x16_t __a, const int __b)
23390{
23391 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23392}
23393
23394__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23395vshrq_n_u16 (uint16x8_t __a, const int __b)
23396{
23397 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23398}
23399
23400__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23401vshrq_n_u32 (uint32x4_t __a, const int __b)
23402{
23403 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23404}
23405
23406__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23407vshrq_n_u64 (uint64x2_t __a, const int __b)
23408{
23409 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23410}
23411
23412__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23413vshrd_n_s64 (int64x1_t __a, const int __b)
23414{
23415 return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
23416}
23417
23418__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23419vshrd_n_u64 (uint64_t __a, const int __b)
23420{
23421 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
23422}
23423
23424/* vsli */
23425
23426__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23427vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23428{
23429 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23430}
23431
23432__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23433vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23434{
23435 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23436}
23437
23438__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23439vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23440{
23441 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23442}
23443
23444__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23445vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23446{
23447 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23448}
23449
23450__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23451vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23452{
23453 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23454 (int8x8_t) __b, __c);
23455}
23456
23457__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23458vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23459{
23460 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23461 (int16x4_t) __b, __c);
23462}
23463
23464__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23465vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23466{
23467 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23468 (int32x2_t) __b, __c);
23469}
23470
23471__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23472vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23473{
23474 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23475 (int64x1_t) __b, __c);
23476}
23477
23478__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23479vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23480{
23481 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23482}
23483
23484__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23485vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23486{
23487 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23488}
23489
23490__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23491vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23492{
23493 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23494}
23495
23496__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23497vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23498{
23499 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23500}
23501
23502__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23503vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23504{
23505 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23506 (int8x16_t) __b, __c);
23507}
23508
23509__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23510vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23511{
23512 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23513 (int16x8_t) __b, __c);
23514}
23515
23516__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23517vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23518{
23519 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23520 (int32x4_t) __b, __c);
23521}
23522
23523__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23524vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23525{
23526 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23527 (int64x2_t) __b, __c);
23528}
23529
23530__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23531vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23532{
23533 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23534}
23535
23536__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23537vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23538{
23539 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23540}
23541
23542/* vsqadd */
23543
23544__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23545vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23546{
23547 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23548 (int8x8_t) __b);
23549}
23550
23551__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23552vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23553{
23554 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23555 (int16x4_t) __b);
23556}
23557
23558__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23559vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23560{
23561 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23562 (int32x2_t) __b);
23563}
23564
23565__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23566vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23567{
23568 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23569}
23570
23571__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23572vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23573{
23574 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23575 (int8x16_t) __b);
23576}
23577
23578__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23579vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23580{
23581 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23582 (int16x8_t) __b);
23583}
23584
23585__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23586vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23587{
23588 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23589 (int32x4_t) __b);
23590}
23591
23592__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23593vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23594{
23595 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23596 (int64x2_t) __b);
23597}
23598
23599__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23600vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23601{
23602 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23603}
23604
23605__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23606vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23607{
23608 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23609}
23610
23611__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23612vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23613{
23614 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23615}
23616
23617__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23618vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23619{
23620 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23621}
23622
23623/* vsqrt */
23624__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23625vsqrt_f32 (float32x2_t a)
23626{
23627 return __builtin_aarch64_sqrtv2sf (a);
23628}
23629
23630__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23631vsqrtq_f32 (float32x4_t a)
23632{
23633 return __builtin_aarch64_sqrtv4sf (a);
23634}
23635
23636__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23637vsqrtq_f64 (float64x2_t a)
23638{
23639 return __builtin_aarch64_sqrtv2df (a);
23640}
23641
23642/* vsra */
23643
23644__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23645vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23646{
23647 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23648}
23649
23650__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23651vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23652{
23653 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23654}
23655
23656__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23657vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23658{
23659 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23660}
23661
23662__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23663vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23664{
23665 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23666}
23667
23668__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23669vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23670{
23671 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23672 (int8x8_t) __b, __c);
23673}
23674
23675__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23676vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23677{
23678 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
23679 (int16x4_t) __b, __c);
23680}
23681
23682__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23683vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23684{
23685 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
23686 (int32x2_t) __b, __c);
23687}
23688
23689__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23690vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23691{
23692 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
23693 (int64x1_t) __b, __c);
23694}
23695
23696__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23697vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23698{
23699 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
23700}
23701
23702__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23703vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23704{
23705 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
23706}
23707
23708__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23709vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23710{
23711 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
23712}
23713
23714__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23715vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23716{
23717 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
23718}
23719
23720__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23721vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23722{
23723 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
23724 (int8x16_t) __b, __c);
23725}
23726
23727__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23728vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23729{
23730 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
23731 (int16x8_t) __b, __c);
23732}
23733
23734__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23735vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23736{
23737 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
23738 (int32x4_t) __b, __c);
23739}
23740
23741__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23742vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23743{
23744 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
23745 (int64x2_t) __b, __c);
23746}
23747
23748__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23749vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23750{
23751 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23752}
23753
23754__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23755vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23756{
23757 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
23758}
23759
23760/* vsri */
23761
23762__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23763vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23764{
23765 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
23766}
23767
23768__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23769vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23770{
23771 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
23772}
23773
23774__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23775vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23776{
23777 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
23778}
23779
23780__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23781vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23782{
23783 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23784}
23785
23786__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23787vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23788{
23789 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
23790 (int8x8_t) __b, __c);
23791}
23792
23793__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23794vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23795{
23796 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
23797 (int16x4_t) __b, __c);
23798}
23799
23800__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23801vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23802{
23803 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
23804 (int32x2_t) __b, __c);
23805}
23806
23807__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23808vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23809{
23810 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
23811 (int64x1_t) __b, __c);
23812}
23813
23814__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23815vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23816{
23817 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
23818}
23819
23820__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23821vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23822{
23823 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
23824}
23825
23826__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23827vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23828{
23829 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
23830}
23831
23832__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23833vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23834{
23835 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
23836}
23837
23838__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23839vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23840{
23841 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
23842 (int8x16_t) __b, __c);
23843}
23844
23845__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23846vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23847{
23848 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
23849 (int16x8_t) __b, __c);
23850}
23851
23852__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23853vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23854{
23855 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
23856 (int32x4_t) __b, __c);
23857}
23858
23859__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23860vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23861{
23862 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
23863 (int64x2_t) __b, __c);
23864}
23865
23866__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23867vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23868{
23869 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
23870}
23871
23872__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23873vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23874{
23875 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
23876}
23877
23878/* vst1 */
23879
23880__extension__ static __inline void __attribute__ ((__always_inline__))
23881vst1_f32 (float32_t *a, float32x2_t b)
23882{
23883 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
23884}
23885
23886__extension__ static __inline void __attribute__ ((__always_inline__))
23887vst1_f64 (float64_t *a, float64x1_t b)
23888{
23889 *a = b;
23890}
23891
23892__extension__ static __inline void __attribute__ ((__always_inline__))
23893vst1_p8 (poly8_t *a, poly8x8_t b)
23894{
23895 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23896 (int8x8_t) b);
23897}
23898
23899__extension__ static __inline void __attribute__ ((__always_inline__))
23900vst1_p16 (poly16_t *a, poly16x4_t b)
23901{
23902 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23903 (int16x4_t) b);
23904}
23905
23906__extension__ static __inline void __attribute__ ((__always_inline__))
23907vst1_s8 (int8_t *a, int8x8_t b)
23908{
23909 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
23910}
23911
23912__extension__ static __inline void __attribute__ ((__always_inline__))
23913vst1_s16 (int16_t *a, int16x4_t b)
23914{
23915 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
23916}
23917
23918__extension__ static __inline void __attribute__ ((__always_inline__))
23919vst1_s32 (int32_t *a, int32x2_t b)
23920{
23921 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
23922}
23923
23924__extension__ static __inline void __attribute__ ((__always_inline__))
23925vst1_s64 (int64_t *a, int64x1_t b)
23926{
23927 *a = b;
23928}
23929
23930__extension__ static __inline void __attribute__ ((__always_inline__))
23931vst1_u8 (uint8_t *a, uint8x8_t b)
23932{
23933 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
23934 (int8x8_t) b);
23935}
23936
23937__extension__ static __inline void __attribute__ ((__always_inline__))
23938vst1_u16 (uint16_t *a, uint16x4_t b)
23939{
23940 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
23941 (int16x4_t) b);
23942}
23943
23944__extension__ static __inline void __attribute__ ((__always_inline__))
23945vst1_u32 (uint32_t *a, uint32x2_t b)
23946{
23947 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
23948 (int32x2_t) b);
23949}
23950
23951__extension__ static __inline void __attribute__ ((__always_inline__))
23952vst1_u64 (uint64_t *a, uint64x1_t b)
23953{
23954 *a = b;
23955}
23956
23957__extension__ static __inline void __attribute__ ((__always_inline__))
23958vst1q_f32 (float32_t *a, float32x4_t b)
23959{
23960 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
23961}
23962
23963__extension__ static __inline void __attribute__ ((__always_inline__))
23964vst1q_f64 (float64_t *a, float64x2_t b)
23965{
23966 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
23967}
23968
23969/* vst1q */
23970
23971__extension__ static __inline void __attribute__ ((__always_inline__))
23972vst1q_p8 (poly8_t *a, poly8x16_t b)
23973{
23974 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23975 (int8x16_t) b);
23976}
23977
23978__extension__ static __inline void __attribute__ ((__always_inline__))
23979vst1q_p16 (poly16_t *a, poly16x8_t b)
23980{
23981 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23982 (int16x8_t) b);
23983}
23984
23985__extension__ static __inline void __attribute__ ((__always_inline__))
23986vst1q_s8 (int8_t *a, int8x16_t b)
23987{
23988 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
23989}
23990
23991__extension__ static __inline void __attribute__ ((__always_inline__))
23992vst1q_s16 (int16_t *a, int16x8_t b)
23993{
23994 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
23995}
23996
23997__extension__ static __inline void __attribute__ ((__always_inline__))
23998vst1q_s32 (int32_t *a, int32x4_t b)
23999{
24000 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24001}
24002
24003__extension__ static __inline void __attribute__ ((__always_inline__))
24004vst1q_s64 (int64_t *a, int64x2_t b)
24005{
24006 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24007}
24008
24009__extension__ static __inline void __attribute__ ((__always_inline__))
24010vst1q_u8 (uint8_t *a, uint8x16_t b)
24011{
24012 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24013 (int8x16_t) b);
24014}
24015
24016__extension__ static __inline void __attribute__ ((__always_inline__))
24017vst1q_u16 (uint16_t *a, uint16x8_t b)
24018{
24019 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24020 (int16x8_t) b);
24021}
24022
24023__extension__ static __inline void __attribute__ ((__always_inline__))
24024vst1q_u32 (uint32_t *a, uint32x4_t b)
24025{
24026 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24027 (int32x4_t) b);
24028}
24029
24030__extension__ static __inline void __attribute__ ((__always_inline__))
24031vst1q_u64 (uint64_t *a, uint64x2_t b)
24032{
24033 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24034 (int64x2_t) b);
24035}
24036
24037/* vstn */
24038
24039__extension__ static __inline void
24040vst2_s64 (int64_t * __a, int64x1x2_t val)
24041{
24042 __builtin_aarch64_simd_oi __o;
24043 int64x2x2_t temp;
24044 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24045 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24046 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24047 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24048 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24049}
24050
24051__extension__ static __inline void
24052vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24053{
24054 __builtin_aarch64_simd_oi __o;
24055 uint64x2x2_t temp;
24056 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24057 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24058 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24059 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24060 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24061}
24062
24063__extension__ static __inline void
24064vst2_f64 (float64_t * __a, float64x1x2_t val)
24065{
24066 __builtin_aarch64_simd_oi __o;
24067 float64x2x2_t temp;
24068 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24069 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24070 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24071 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24072 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24073}
24074
24075__extension__ static __inline void
24076vst2_s8 (int8_t * __a, int8x8x2_t val)
24077{
24078 __builtin_aarch64_simd_oi __o;
24079 int8x16x2_t temp;
24080 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24081 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24082 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24083 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24084 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24085}
24086
24087__extension__ static __inline void __attribute__ ((__always_inline__))
24088vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24089{
24090 __builtin_aarch64_simd_oi __o;
24091 poly8x16x2_t temp;
24092 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24093 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24094 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24095 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24096 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24097}
24098
24099__extension__ static __inline void __attribute__ ((__always_inline__))
24100vst2_s16 (int16_t * __a, int16x4x2_t val)
24101{
24102 __builtin_aarch64_simd_oi __o;
24103 int16x8x2_t temp;
24104 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24105 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24106 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24107 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24108 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24109}
24110
24111__extension__ static __inline void __attribute__ ((__always_inline__))
24112vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24113{
24114 __builtin_aarch64_simd_oi __o;
24115 poly16x8x2_t temp;
24116 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24117 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24118 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24119 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24120 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24121}
24122
24123__extension__ static __inline void __attribute__ ((__always_inline__))
24124vst2_s32 (int32_t * __a, int32x2x2_t val)
24125{
24126 __builtin_aarch64_simd_oi __o;
24127 int32x4x2_t temp;
24128 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24129 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24130 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24131 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24132 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24133}
24134
24135__extension__ static __inline void __attribute__ ((__always_inline__))
24136vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24137{
24138 __builtin_aarch64_simd_oi __o;
24139 uint8x16x2_t temp;
24140 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24141 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24142 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24143 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24144 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24145}
24146
24147__extension__ static __inline void __attribute__ ((__always_inline__))
24148vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24149{
24150 __builtin_aarch64_simd_oi __o;
24151 uint16x8x2_t temp;
24152 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24153 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24154 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24155 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24156 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24157}
24158
24159__extension__ static __inline void __attribute__ ((__always_inline__))
24160vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24161{
24162 __builtin_aarch64_simd_oi __o;
24163 uint32x4x2_t temp;
24164 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24165 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24166 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24167 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24168 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24169}
24170
24171__extension__ static __inline void __attribute__ ((__always_inline__))
24172vst2_f32 (float32_t * __a, float32x2x2_t val)
24173{
24174 __builtin_aarch64_simd_oi __o;
24175 float32x4x2_t temp;
24176 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24177 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24178 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24179 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24180 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24181}
24182
24183__extension__ static __inline void __attribute__ ((__always_inline__))
24184vst2q_s8 (int8_t * __a, int8x16x2_t val)
24185{
24186 __builtin_aarch64_simd_oi __o;
24187 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24188 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24189 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24190}
24191
24192__extension__ static __inline void __attribute__ ((__always_inline__))
24193vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24194{
24195 __builtin_aarch64_simd_oi __o;
24196 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24197 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24198 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24199}
24200
24201__extension__ static __inline void __attribute__ ((__always_inline__))
24202vst2q_s16 (int16_t * __a, int16x8x2_t val)
24203{
24204 __builtin_aarch64_simd_oi __o;
24205 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24206 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24207 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24208}
24209
24210__extension__ static __inline void __attribute__ ((__always_inline__))
24211vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24212{
24213 __builtin_aarch64_simd_oi __o;
24214 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24215 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24216 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24217}
24218
24219__extension__ static __inline void __attribute__ ((__always_inline__))
24220vst2q_s32 (int32_t * __a, int32x4x2_t val)
24221{
24222 __builtin_aarch64_simd_oi __o;
24223 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24224 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24225 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24226}
24227
24228__extension__ static __inline void __attribute__ ((__always_inline__))
24229vst2q_s64 (int64_t * __a, int64x2x2_t val)
24230{
24231 __builtin_aarch64_simd_oi __o;
24232 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24233 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24234 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24235}
24236
24237__extension__ static __inline void __attribute__ ((__always_inline__))
24238vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24239{
24240 __builtin_aarch64_simd_oi __o;
24241 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24242 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24243 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24244}
24245
24246__extension__ static __inline void __attribute__ ((__always_inline__))
24247vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24248{
24249 __builtin_aarch64_simd_oi __o;
24250 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24251 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24252 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24253}
24254
24255__extension__ static __inline void __attribute__ ((__always_inline__))
24256vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24257{
24258 __builtin_aarch64_simd_oi __o;
24259 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24260 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24261 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24262}
24263
24264__extension__ static __inline void __attribute__ ((__always_inline__))
24265vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24266{
24267 __builtin_aarch64_simd_oi __o;
24268 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24269 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24270 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24271}
24272
24273__extension__ static __inline void __attribute__ ((__always_inline__))
24274vst2q_f32 (float32_t * __a, float32x4x2_t val)
24275{
24276 __builtin_aarch64_simd_oi __o;
24277 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24278 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24279 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24280}
24281
24282__extension__ static __inline void __attribute__ ((__always_inline__))
24283vst2q_f64 (float64_t * __a, float64x2x2_t val)
24284{
24285 __builtin_aarch64_simd_oi __o;
24286 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24287 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24288 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24289}
24290
24291__extension__ static __inline void
24292vst3_s64 (int64_t * __a, int64x1x3_t val)
24293{
24294 __builtin_aarch64_simd_ci __o;
24295 int64x2x3_t temp;
24296 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24297 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24298 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24299 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24300 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24301 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24302 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24303}
24304
24305__extension__ static __inline void
24306vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24307{
24308 __builtin_aarch64_simd_ci __o;
24309 uint64x2x3_t temp;
24310 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24311 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24312 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24313 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24314 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24315 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24316 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24317}
24318
24319__extension__ static __inline void
24320vst3_f64 (float64_t * __a, float64x1x3_t val)
24321{
24322 __builtin_aarch64_simd_ci __o;
24323 float64x2x3_t temp;
24324 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24325 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24326 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24327 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24328 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24329 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24330 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24331}
24332
24333__extension__ static __inline void
24334vst3_s8 (int8_t * __a, int8x8x3_t val)
24335{
24336 __builtin_aarch64_simd_ci __o;
24337 int8x16x3_t temp;
24338 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24339 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24340 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24341 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24342 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24343 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24344 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24345}
24346
24347__extension__ static __inline void __attribute__ ((__always_inline__))
24348vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24349{
24350 __builtin_aarch64_simd_ci __o;
24351 poly8x16x3_t temp;
24352 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24353 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24354 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24355 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24356 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24357 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24358 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24359}
24360
24361__extension__ static __inline void __attribute__ ((__always_inline__))
24362vst3_s16 (int16_t * __a, int16x4x3_t val)
24363{
24364 __builtin_aarch64_simd_ci __o;
24365 int16x8x3_t temp;
24366 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24367 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24368 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24369 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24370 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24371 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24372 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24373}
24374
24375__extension__ static __inline void __attribute__ ((__always_inline__))
24376vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24377{
24378 __builtin_aarch64_simd_ci __o;
24379 poly16x8x3_t temp;
24380 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24381 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24382 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24383 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24384 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24385 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24386 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24387}
24388
24389__extension__ static __inline void __attribute__ ((__always_inline__))
24390vst3_s32 (int32_t * __a, int32x2x3_t val)
24391{
24392 __builtin_aarch64_simd_ci __o;
24393 int32x4x3_t temp;
24394 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24395 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24396 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24397 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24398 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24399 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24400 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24401}
24402
24403__extension__ static __inline void __attribute__ ((__always_inline__))
24404vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24405{
24406 __builtin_aarch64_simd_ci __o;
24407 uint8x16x3_t temp;
24408 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24409 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24410 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24411 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24412 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24413 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24414 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24415}
24416
24417__extension__ static __inline void __attribute__ ((__always_inline__))
24418vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24419{
24420 __builtin_aarch64_simd_ci __o;
24421 uint16x8x3_t temp;
24422 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24423 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24424 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24425 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24426 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24427 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24428 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24429}
24430
24431__extension__ static __inline void __attribute__ ((__always_inline__))
24432vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24433{
24434 __builtin_aarch64_simd_ci __o;
24435 uint32x4x3_t temp;
24436 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24437 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24438 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24439 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24440 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24441 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24442 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24443}
24444
24445__extension__ static __inline void __attribute__ ((__always_inline__))
24446vst3_f32 (float32_t * __a, float32x2x3_t val)
24447{
24448 __builtin_aarch64_simd_ci __o;
24449 float32x4x3_t temp;
24450 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24451 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24452 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24453 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24454 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24455 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24456 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24457}
24458
24459__extension__ static __inline void __attribute__ ((__always_inline__))
24460vst3q_s8 (int8_t * __a, int8x16x3_t val)
24461{
24462 __builtin_aarch64_simd_ci __o;
24463 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24464 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24465 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24466 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24467}
24468
24469__extension__ static __inline void __attribute__ ((__always_inline__))
24470vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24471{
24472 __builtin_aarch64_simd_ci __o;
24473 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24474 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24475 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24476 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24477}
24478
24479__extension__ static __inline void __attribute__ ((__always_inline__))
24480vst3q_s16 (int16_t * __a, int16x8x3_t val)
24481{
24482 __builtin_aarch64_simd_ci __o;
24483 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24484 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24485 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24486 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24487}
24488
24489__extension__ static __inline void __attribute__ ((__always_inline__))
24490vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24491{
24492 __builtin_aarch64_simd_ci __o;
24493 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24494 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24495 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24496 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24497}
24498
24499__extension__ static __inline void __attribute__ ((__always_inline__))
24500vst3q_s32 (int32_t * __a, int32x4x3_t val)
24501{
24502 __builtin_aarch64_simd_ci __o;
24503 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24504 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24505 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24506 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24507}
24508
24509__extension__ static __inline void __attribute__ ((__always_inline__))
24510vst3q_s64 (int64_t * __a, int64x2x3_t val)
24511{
24512 __builtin_aarch64_simd_ci __o;
24513 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24514 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24515 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24516 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24517}
24518
24519__extension__ static __inline void __attribute__ ((__always_inline__))
24520vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24521{
24522 __builtin_aarch64_simd_ci __o;
24523 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24524 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24525 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24526 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24527}
24528
24529__extension__ static __inline void __attribute__ ((__always_inline__))
24530vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24531{
24532 __builtin_aarch64_simd_ci __o;
24533 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24534 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24535 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24536 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24537}
24538
24539__extension__ static __inline void __attribute__ ((__always_inline__))
24540vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24541{
24542 __builtin_aarch64_simd_ci __o;
24543 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24544 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24545 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24546 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24547}
24548
24549__extension__ static __inline void __attribute__ ((__always_inline__))
24550vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24551{
24552 __builtin_aarch64_simd_ci __o;
24553 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24554 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24555 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24556 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24557}
24558
24559__extension__ static __inline void __attribute__ ((__always_inline__))
24560vst3q_f32 (float32_t * __a, float32x4x3_t val)
24561{
24562 __builtin_aarch64_simd_ci __o;
24563 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24564 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24565 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24566 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24567}
24568
24569__extension__ static __inline void __attribute__ ((__always_inline__))
24570vst3q_f64 (float64_t * __a, float64x2x3_t val)
24571{
24572 __builtin_aarch64_simd_ci __o;
24573 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24574 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24575 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24576 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24577}
24578
24579__extension__ static __inline void
24580vst4_s64 (int64_t * __a, int64x1x4_t val)
24581{
24582 __builtin_aarch64_simd_xi __o;
24583 int64x2x4_t temp;
24584 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
24585 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
24586 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
24587 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
24588 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24589 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24590 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24591 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24592 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24593}
24594
24595__extension__ static __inline void
24596vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24597{
24598 __builtin_aarch64_simd_xi __o;
24599 uint64x2x4_t temp;
24600 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
24601 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
24602 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
24603 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
24604 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24605 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24606 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24607 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24608 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24609}
24610
24611__extension__ static __inline void
24612vst4_f64 (float64_t * __a, float64x1x4_t val)
24613{
24614 __builtin_aarch64_simd_xi __o;
24615 float64x2x4_t temp;
24616 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
24617 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
24618 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
24619 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
24620 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24621 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24622 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24623 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24624 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24625}
24626
24627__extension__ static __inline void
24628vst4_s8 (int8_t * __a, int8x8x4_t val)
24629{
24630 __builtin_aarch64_simd_xi __o;
24631 int8x16x4_t temp;
24632 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
24633 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
24634 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
24635 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
24636 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24637 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24638 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24639 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24640 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24641}
24642
24643__extension__ static __inline void __attribute__ ((__always_inline__))
24644vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24645{
24646 __builtin_aarch64_simd_xi __o;
24647 poly8x16x4_t temp;
24648 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
24649 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
24650 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
24651 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
24652 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24653 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24654 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24655 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24656 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24657}
24658
24659__extension__ static __inline void __attribute__ ((__always_inline__))
24660vst4_s16 (int16_t * __a, int16x4x4_t val)
24661{
24662 __builtin_aarch64_simd_xi __o;
24663 int16x8x4_t temp;
24664 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
24665 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
24666 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
24667 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
24668 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24669 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24670 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24671 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24672 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24673}
24674
24675__extension__ static __inline void __attribute__ ((__always_inline__))
24676vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24677{
24678 __builtin_aarch64_simd_xi __o;
24679 poly16x8x4_t temp;
24680 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
24681 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
24682 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
24683 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
24684 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24685 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24686 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24687 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24688 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24689}
24690
24691__extension__ static __inline void __attribute__ ((__always_inline__))
24692vst4_s32 (int32_t * __a, int32x2x4_t val)
24693{
24694 __builtin_aarch64_simd_xi __o;
24695 int32x4x4_t temp;
24696 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
24697 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
24698 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
24699 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
24700 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24701 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24702 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24703 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24704 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24705}
24706
24707__extension__ static __inline void __attribute__ ((__always_inline__))
24708vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24709{
24710 __builtin_aarch64_simd_xi __o;
24711 uint8x16x4_t temp;
24712 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
24713 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
24714 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
24715 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
24716 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24717 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24718 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24719 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24720 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24721}
24722
24723__extension__ static __inline void __attribute__ ((__always_inline__))
24724vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24725{
24726 __builtin_aarch64_simd_xi __o;
24727 uint16x8x4_t temp;
24728 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
24729 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
24730 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
24731 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
24732 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24733 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24734 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24735 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24736 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24737}
24738
24739__extension__ static __inline void __attribute__ ((__always_inline__))
24740vst4_u32 (uint32_t * __a, uint32x2x4_t val)
24741{
24742 __builtin_aarch64_simd_xi __o;
24743 uint32x4x4_t temp;
24744 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
24745 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
24746 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
24747 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
24748 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24749 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24750 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24751 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24752 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24753}
24754
24755__extension__ static __inline void __attribute__ ((__always_inline__))
24756vst4_f32 (float32_t * __a, float32x2x4_t val)
24757{
24758 __builtin_aarch64_simd_xi __o;
24759 float32x4x4_t temp;
24760 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
24761 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
24762 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
24763 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
24764 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
24765 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
24766 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
24767 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
24768 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24769}
24770
24771__extension__ static __inline void __attribute__ ((__always_inline__))
24772vst4q_s8 (int8_t * __a, int8x16x4_t val)
24773{
24774 __builtin_aarch64_simd_xi __o;
24775 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24776 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24777 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24778 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24779 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24780}
24781
24782__extension__ static __inline void __attribute__ ((__always_inline__))
24783vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
24784{
24785 __builtin_aarch64_simd_xi __o;
24786 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24787 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24788 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24789 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24790 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24791}
24792
24793__extension__ static __inline void __attribute__ ((__always_inline__))
24794vst4q_s16 (int16_t * __a, int16x8x4_t val)
24795{
24796 __builtin_aarch64_simd_xi __o;
24797 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24798 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24799 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24800 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24801 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24802}
24803
24804__extension__ static __inline void __attribute__ ((__always_inline__))
24805vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24806{
24807 __builtin_aarch64_simd_xi __o;
24808 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24809 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24810 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24811 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24812 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24813}
24814
24815__extension__ static __inline void __attribute__ ((__always_inline__))
24816vst4q_s32 (int32_t * __a, int32x4x4_t val)
24817{
24818 __builtin_aarch64_simd_xi __o;
24819 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24820 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24821 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24822 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24823 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24824}
24825
24826__extension__ static __inline void __attribute__ ((__always_inline__))
24827vst4q_s64 (int64_t * __a, int64x2x4_t val)
24828{
24829 __builtin_aarch64_simd_xi __o;
24830 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24831 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24832 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24833 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24834 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24835}
24836
24837__extension__ static __inline void __attribute__ ((__always_inline__))
24838vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
24839{
24840 __builtin_aarch64_simd_xi __o;
24841 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24842 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24843 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24844 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24845 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24846}
24847
24848__extension__ static __inline void __attribute__ ((__always_inline__))
24849vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
24850{
24851 __builtin_aarch64_simd_xi __o;
24852 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24853 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24854 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24855 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24856 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24857}
24858
24859__extension__ static __inline void __attribute__ ((__always_inline__))
24860vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
24861{
24862 __builtin_aarch64_simd_xi __o;
24863 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24864 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24865 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24866 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24867 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24868}
24869
24870__extension__ static __inline void __attribute__ ((__always_inline__))
24871vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
24872{
24873 __builtin_aarch64_simd_xi __o;
24874 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24875 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24876 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24877 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24878 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24879}
24880
24881__extension__ static __inline void __attribute__ ((__always_inline__))
24882vst4q_f32 (float32_t * __a, float32x4x4_t val)
24883{
24884 __builtin_aarch64_simd_xi __o;
24885 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
24886 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
24887 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
24888 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
24889 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24890}
24891
24892__extension__ static __inline void __attribute__ ((__always_inline__))
24893vst4q_f64 (float64_t * __a, float64x2x4_t val)
24894{
24895 __builtin_aarch64_simd_xi __o;
24896 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
24897 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
24898 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24899 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24900 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24901}
24902
24903/* vsub */
24904
24905__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24906vsubd_s64 (int64x1_t __a, int64x1_t __b)
24907{
24908 return __a - __b;
24909}
24910
24911__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24912vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
24913{
24914 return __a - __b;
24915}
24916
24917/* vtbx1 */
24918
24919__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24920vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24921{
24922 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24923 vmov_n_u8 (8));
24924 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24925
24926 return vbsl_s8 (__mask, __tbl, __r);
24927}
24928
24929__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24930vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24931{
24932 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24933 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24934
24935 return vbsl_u8 (__mask, __tbl, __r);
24936}
24937
24938__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24939vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24940{
24941 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24942 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24943
24944 return vbsl_p8 (__mask, __tbl, __r);
24945}
24946
24947/* vtbx3 */
24948
24949__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24950vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24951{
24952 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24953 vmov_n_u8 (24));
24954 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24955
24956 return vbsl_s8 (__mask, __tbl, __r);
24957}
24958
24959__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24960vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24961{
24962 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24963 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24964
24965 return vbsl_u8 (__mask, __tbl, __r);
24966}
24967
24968__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24969vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24970{
24971 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24972 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24973
24974 return vbsl_p8 (__mask, __tbl, __r);
24975}
24976
24977/* vtrn */
24978
24979__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24980vtrn_f32 (float32x2_t a, float32x2_t b)
24981{
24982 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24983}
24984
24985__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24986vtrn_p8 (poly8x8_t a, poly8x8_t b)
24987{
24988 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24989}
24990
24991__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24992vtrn_p16 (poly16x4_t a, poly16x4_t b)
24993{
24994 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24995}
24996
24997__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24998vtrn_s8 (int8x8_t a, int8x8_t b)
24999{
25000 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25001}
25002
25003__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25004vtrn_s16 (int16x4_t a, int16x4_t b)
25005{
25006 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25007}
25008
25009__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25010vtrn_s32 (int32x2_t a, int32x2_t b)
25011{
25012 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25013}
25014
25015__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25016vtrn_u8 (uint8x8_t a, uint8x8_t b)
25017{
25018 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25019}
25020
25021__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25022vtrn_u16 (uint16x4_t a, uint16x4_t b)
25023{
25024 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25025}
25026
25027__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25028vtrn_u32 (uint32x2_t a, uint32x2_t b)
25029{
25030 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25031}
25032
25033__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25034vtrnq_f32 (float32x4_t a, float32x4_t b)
25035{
25036 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25037}
25038
25039__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25040vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25041{
25042 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25043}
25044
25045__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25046vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25047{
25048 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25049}
25050
25051__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25052vtrnq_s8 (int8x16_t a, int8x16_t b)
25053{
25054 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25055}
25056
25057__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25058vtrnq_s16 (int16x8_t a, int16x8_t b)
25059{
25060 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25061}
25062
25063__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25064vtrnq_s32 (int32x4_t a, int32x4_t b)
25065{
25066 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25067}
25068
25069__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25070vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25071{
25072 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25073}
25074
25075__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25076vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25077{
25078 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25079}
25080
25081__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25082vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25083{
25084 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25085}
25086
25087/* vtst */
25088
25089__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25090vtst_s8 (int8x8_t __a, int8x8_t __b)
25091{
25092 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25093}
25094
25095__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25096vtst_s16 (int16x4_t __a, int16x4_t __b)
25097{
25098 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25099}
25100
25101__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25102vtst_s32 (int32x2_t __a, int32x2_t __b)
25103{
25104 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25105}
25106
25107__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25108vtst_s64 (int64x1_t __a, int64x1_t __b)
25109{
25110 return (__a & __b) ? -1ll : 0ll;
25111}
25112
25113__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25114vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25115{
25116 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25117 (int8x8_t) __b);
25118}
25119
25120__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25121vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25122{
25123 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25124 (int16x4_t) __b);
25125}
25126
25127__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25128vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25129{
25130 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25131 (int32x2_t) __b);
25132}
25133
25134__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25135vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25136{
25137 return (__a & __b) ? -1ll : 0ll;
25138}
25139
25140__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25141vtstq_s8 (int8x16_t __a, int8x16_t __b)
25142{
25143 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25144}
25145
25146__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25147vtstq_s16 (int16x8_t __a, int16x8_t __b)
25148{
25149 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25150}
25151
25152__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25153vtstq_s32 (int32x4_t __a, int32x4_t __b)
25154{
25155 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25156}
25157
25158__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25159vtstq_s64 (int64x2_t __a, int64x2_t __b)
25160{
25161 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25162}
25163
25164__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25165vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25166{
25167 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25168 (int8x16_t) __b);
25169}
25170
25171__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25172vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25173{
25174 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25175 (int16x8_t) __b);
25176}
25177
25178__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25179vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25180{
25181 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25182 (int32x4_t) __b);
25183}
25184
25185__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25186vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25187{
25188 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25189 (int64x2_t) __b);
25190}
25191
25192__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25193vtstd_s64 (int64x1_t __a, int64x1_t __b)
25194{
25195 return (__a & __b) ? -1ll : 0ll;
25196}
25197
25198__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25199vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25200{
25201 return (__a & __b) ? -1ll : 0ll;
25202}
25203
25204/* vuqadd */
25205
25206__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25207vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25208{
25209 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25210}
25211
25212__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25213vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25214{
25215 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25216}
25217
25218__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25219vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25220{
25221 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25222}
25223
25224__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25225vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25226{
25227 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25228}
25229
25230__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25231vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25232{
25233 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25234}
25235
25236__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25237vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25238{
25239 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25240}
25241
25242__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25243vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25244{
25245 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25246}
25247
25248__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25249vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25250{
25251 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25252}
25253
25254__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25255vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25256{
25257 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25258}
25259
25260__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25261vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25262{
25263 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25264}
25265
25266__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25267vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25268{
25269 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25270}
25271
25272__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25273vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25274{
25275 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25276}
25277
25278#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25279 __extension__ static __inline rettype \
25280 __attribute__ ((__always_inline__)) \
25281 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25282 { \
25283 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25284 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25285 }
25286
25287#define __INTERLEAVE_LIST(op) \
25288 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25289 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25290 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25291 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25292 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25293 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25294 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25295 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25296 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25297 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25298 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25299 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25300 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25301 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25302 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25303 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25304 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25305 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25306
25307/* vuzp */
25308
25309__INTERLEAVE_LIST (uzp)
25310
25311/* vzip */
25312
25313__INTERLEAVE_LIST (zip)
25314
25315#undef __INTERLEAVE_LIST
25316#undef __DEFINTERLEAVE
25317
25318/* End of optimal implementations in approved order. */
25319
25320#undef __aarch64_vget_lane_any
25321#undef __aarch64_vget_lane_f32
25322#undef __aarch64_vget_lane_f64
25323#undef __aarch64_vget_lane_p8
25324#undef __aarch64_vget_lane_p16
25325#undef __aarch64_vget_lane_s8
25326#undef __aarch64_vget_lane_s16
25327#undef __aarch64_vget_lane_s32
25328#undef __aarch64_vget_lane_s64
25329#undef __aarch64_vget_lane_u8
25330#undef __aarch64_vget_lane_u16
25331#undef __aarch64_vget_lane_u32
25332#undef __aarch64_vget_lane_u64
25333
25334#undef __aarch64_vgetq_lane_f32
25335#undef __aarch64_vgetq_lane_f64
25336#undef __aarch64_vgetq_lane_p8
25337#undef __aarch64_vgetq_lane_p16
25338#undef __aarch64_vgetq_lane_s8
25339#undef __aarch64_vgetq_lane_s16
25340#undef __aarch64_vgetq_lane_s32
25341#undef __aarch64_vgetq_lane_s64
25342#undef __aarch64_vgetq_lane_u8
25343#undef __aarch64_vgetq_lane_u16
25344#undef __aarch64_vgetq_lane_u32
25345#undef __aarch64_vgetq_lane_u64
25346
25347#undef __aarch64_vdup_lane_any
25348#undef __aarch64_vdup_lane_f32
25349#undef __aarch64_vdup_lane_f64
25350#undef __aarch64_vdup_lane_p8
25351#undef __aarch64_vdup_lane_p16
25352#undef __aarch64_vdup_lane_s8
25353#undef __aarch64_vdup_lane_s16
25354#undef __aarch64_vdup_lane_s32
25355#undef __aarch64_vdup_lane_s64
25356#undef __aarch64_vdup_lane_u8
25357#undef __aarch64_vdup_lane_u16
25358#undef __aarch64_vdup_lane_u32
25359#undef __aarch64_vdup_lane_u64
25360#undef __aarch64_vdup_laneq_f32
25361#undef __aarch64_vdup_laneq_f64
25362#undef __aarch64_vdup_laneq_p8
25363#undef __aarch64_vdup_laneq_p16
25364#undef __aarch64_vdup_laneq_s8
25365#undef __aarch64_vdup_laneq_s16
25366#undef __aarch64_vdup_laneq_s32
25367#undef __aarch64_vdup_laneq_s64
25368#undef __aarch64_vdup_laneq_u8
25369#undef __aarch64_vdup_laneq_u16
25370#undef __aarch64_vdup_laneq_u32
25371#undef __aarch64_vdup_laneq_u64
25372#undef __aarch64_vdupq_lane_f32
25373#undef __aarch64_vdupq_lane_f64
25374#undef __aarch64_vdupq_lane_p8
25375#undef __aarch64_vdupq_lane_p16
25376#undef __aarch64_vdupq_lane_s8
25377#undef __aarch64_vdupq_lane_s16
25378#undef __aarch64_vdupq_lane_s32
25379#undef __aarch64_vdupq_lane_s64
25380#undef __aarch64_vdupq_lane_u8
25381#undef __aarch64_vdupq_lane_u16
25382#undef __aarch64_vdupq_lane_u32
25383#undef __aarch64_vdupq_lane_u64
25384#undef __aarch64_vdupq_laneq_f32
25385#undef __aarch64_vdupq_laneq_f64
25386#undef __aarch64_vdupq_laneq_p8
25387#undef __aarch64_vdupq_laneq_p16
25388#undef __aarch64_vdupq_laneq_s8
25389#undef __aarch64_vdupq_laneq_s16
25390#undef __aarch64_vdupq_laneq_s32
25391#undef __aarch64_vdupq_laneq_s64
25392#undef __aarch64_vdupq_laneq_u8
25393#undef __aarch64_vdupq_laneq_u16
25394#undef __aarch64_vdupq_laneq_u32
25395#undef __aarch64_vdupq_laneq_u64
25396
25397#endif