blob: a725518f1b8870eb4ed8fd3b6b60189d497ad2b9 [file] [log] [blame]
sewardj94f53cb2014-03-01 11:27:18 +00001
2#include <stdio.h>
3#include <assert.h>
4#include <malloc.h> // memalign
5#include <string.h> // memset
sewardjd89499b2014-03-02 12:48:34 +00006#include <math.h> // isnormal
sewardj94f53cb2014-03-01 11:27:18 +00007
8typedef unsigned char UChar;
9typedef unsigned short int UShort;
10typedef unsigned int UInt;
11typedef signed int Int;
12typedef unsigned char UChar;
13typedef unsigned long long int ULong;
14
15typedef unsigned char Bool;
16#define False ((Bool)0)
17#define True ((Bool)1)
18
19
sewardj01213b32014-03-07 22:54:19 +000020#define ITERS 10
sewardjd89499b2014-03-02 12:48:34 +000021
22
sewardj94f53cb2014-03-01 11:27:18 +000023union _V128 {
sewardjd89499b2014-03-02 12:48:34 +000024 UChar u8[16];
25 UShort u16[8];
26 UInt u32[4];
27 ULong u64[2];
28 float f32[4];
29 double f64[2];
sewardj94f53cb2014-03-01 11:27:18 +000030};
31typedef union _V128 V128;
32
sewardjd89499b2014-03-02 12:48:34 +000033static inline UChar randUChar ( void )
sewardj94f53cb2014-03-01 11:27:18 +000034{
35 static UInt seed = 80021;
36 seed = 1103515245 * seed + 12345;
37 return (seed >> 17) & 0xFF;
38}
39
40static ULong randULong ( void )
41{
42 Int i;
43 ULong r = 0;
44 for (i = 0; i < 8; i++) {
45 r = (r << 8) | (ULong)(0xFF & randUChar());
46 }
47 return r;
48}
49
sewardjd89499b2014-03-02 12:48:34 +000050/* Generates a random V128. Ensures that that it contains normalised
51 FP numbers when viewed as either F32x4 or F64x2, so that it is
52 reasonable to use in FP test cases. */
sewardj94f53cb2014-03-01 11:27:18 +000053static void randV128 ( V128* v )
54{
sewardjd89499b2014-03-02 12:48:34 +000055 static UInt nCalls = 0, nIters = 0;
sewardj94f53cb2014-03-01 11:27:18 +000056 Int i;
sewardjd89499b2014-03-02 12:48:34 +000057 nCalls++;
58 while (1) {
59 nIters++;
60 for (i = 0; i < 16; i++) {
61 v->u8[i] = randUChar();
62 }
63 if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
64 && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
65 break;
66 }
67 if (0 == (nCalls & 0xFF))
68 printf("randV128: %u calls, %u iters\n", nCalls, nIters);
sewardj94f53cb2014-03-01 11:27:18 +000069}
70
71static void showV128 ( V128* v )
72{
73 Int i;
74 for (i = 15; i >= 0; i--)
sewardjd89499b2014-03-02 12:48:34 +000075 printf("%02x", (Int)v->u8[i]);
sewardj94f53cb2014-03-01 11:27:18 +000076}
77
78__attribute__((unused))
79static void* memalign16(size_t szB)
80{
81 void* x;
82 x = memalign(16, szB);
83 assert(x);
84 assert(0 == ((16-1) & (unsigned long)x));
85 return x;
86}
87
88
89void test_UMINV ( void )
90{
91 int i;
92 V128 block[2];
93
94 /* -- 4s -- */
95
96 for (i = 0; i < 10; i++) {
97 memset(&block, 0x55, sizeof(block));
98 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +000099 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000100 __asm__ __volatile__(
101 "ldr q7, [%0, #0] ; "
102 "uminv s8, v7.4s ; "
103 "str q8, [%0, #16] "
104 : : "r"(&block[0]) : "memory", "v7", "v8"
105 );
106 printf("UMINV v8, v7.4s ");
107 showV128(&block[0]); printf(" ");
108 showV128(&block[1]); printf("\n");
109 }
110
111 /* -- 8h -- */
112
113 for (i = 0; i < 10; i++) {
114 memset(&block, 0x55, sizeof(block));
115 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000116 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000117 __asm__ __volatile__(
118 "ldr q7, [%0, #0] ; "
119 "uminv h8, v7.8h ; "
120 "str q8, [%0, #16] "
121 : : "r"(&block[0]) : "memory", "v7", "v8"
122 );
123 printf("UMINV h8, v7.8h ");
124 showV128(&block[0]); printf(" ");
125 showV128(&block[1]); printf("\n");
126 }
127
128 /* -- 4h -- */
129
130 for (i = 0; i < 10; i++) {
131 memset(&block, 0x55, sizeof(block));
132 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000133 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000134 __asm__ __volatile__(
135 "ldr q7, [%0, #0] ; "
136 "uminv h8, v7.4h ; "
137 "str q8, [%0, #16] "
138 : : "r"(&block[0]) : "memory", "v7", "v8"
139 );
140 printf("UMINV h8, v7.4h ");
141 showV128(&block[0]); printf(" ");
142 showV128(&block[1]); printf("\n");
143 }
144
145 /* -- 16b -- */
146
147 for (i = 0; i < 10; i++) {
148 memset(&block, 0x55, sizeof(block));
149 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000150 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000151 __asm__ __volatile__(
152 "ldr q7, [%0, #0] ; "
153 "uminv b8, v7.16b ; "
154 "str q8, [%0, #16] "
155 : : "r"(&block[0]) : "memory", "v7", "v8"
156 );
157 printf("UMINV b8, v7.16b ");
158 showV128(&block[0]); printf(" ");
159 showV128(&block[1]); printf("\n");
160 }
161
162 /* -- 8b -- */
163
164 for (i = 0; i < 10; i++) {
165 memset(&block, 0x55, sizeof(block));
166 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000167 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000168 __asm__ __volatile__(
169 "ldr q7, [%0, #0] ; "
170 "uminv b8, v7.8b ; "
171 "str q8, [%0, #16] "
172 : : "r"(&block[0]) : "memory", "v7", "v8"
173 );
174 printf("UMINV b8, v7.8b ");
175 showV128(&block[0]); printf(" ");
176 showV128(&block[1]); printf("\n");
177 }
178
179}
180
181
182void test_UMAXV ( void )
183{
184 int i;
185 V128 block[2];
186
187 /* -- 4s -- */
188
189 for (i = 0; i < 10; i++) {
190 memset(&block, 0x55, sizeof(block));
191 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000192 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000193 __asm__ __volatile__(
194 "ldr q7, [%0, #0] ; "
195 "umaxv s8, v7.4s ; "
196 "str q8, [%0, #16] "
197 : : "r"(&block[0]) : "memory", "v7", "v8"
198 );
199 printf("UMAXV v8, v7.4s ");
200 showV128(&block[0]); printf(" ");
201 showV128(&block[1]); printf("\n");
202 }
203
204 /* -- 8h -- */
205
206 for (i = 0; i < 10; i++) {
207 memset(&block, 0x55, sizeof(block));
208 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000209 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000210 __asm__ __volatile__(
211 "ldr q7, [%0, #0] ; "
212 "umaxv h8, v7.8h ; "
213 "str q8, [%0, #16] "
214 : : "r"(&block[0]) : "memory", "v7", "v8"
215 );
216 printf("UMAXV h8, v7.8h ");
217 showV128(&block[0]); printf(" ");
218 showV128(&block[1]); printf("\n");
219 }
220
221 /* -- 4h -- */
222
223 for (i = 0; i < 10; i++) {
224 memset(&block, 0x55, sizeof(block));
225 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000226 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000227 __asm__ __volatile__(
228 "ldr q7, [%0, #0] ; "
229 "umaxv h8, v7.4h ; "
230 "str q8, [%0, #16] "
231 : : "r"(&block[0]) : "memory", "v7", "v8"
232 );
233 printf("UMAXV h8, v7.4h ");
234 showV128(&block[0]); printf(" ");
235 showV128(&block[1]); printf("\n");
236 }
237
238 /* -- 16b -- */
239
240 for (i = 0; i < 10; i++) {
241 memset(&block, 0x55, sizeof(block));
242 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000243 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000244 __asm__ __volatile__(
245 "ldr q7, [%0, #0] ; "
246 "umaxv b8, v7.16b ; "
247 "str q8, [%0, #16] "
248 : : "r"(&block[0]) : "memory", "v7", "v8"
249 );
250 printf("UMAXV b8, v7.16b ");
251 showV128(&block[0]); printf(" ");
252 showV128(&block[1]); printf("\n");
253 }
254
255 /* -- 8b -- */
256
257 for (i = 0; i < 10; i++) {
258 memset(&block, 0x55, sizeof(block));
259 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000260 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000261 __asm__ __volatile__(
262 "ldr q7, [%0, #0] ; "
263 "umaxv b8, v7.8b ; "
264 "str q8, [%0, #16] "
265 : : "r"(&block[0]) : "memory", "v7", "v8"
266 );
267 printf("UMAXV b8, v7.8b ");
268 showV128(&block[0]); printf(" ");
269 showV128(&block[1]); printf("\n");
270 }
271
272}
273
274
275void test_INS_general ( void )
276{
277 V128 block[3];
278
279 /* -- D[0..1] -- */
280
281 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000282 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000283 __asm__ __volatile__(
284 "ldr q7, [%0, #0] ; "
285 "ldr x19, [%0, #16] ; "
286 "ins v7.d[0], x19 ; "
287 "str q7, [%0, #32] "
288 : : "r"(&block[0]) : "memory", "x19", "v7"
289 );
sewardjd89499b2014-03-02 12:48:34 +0000290 printf("INS v7.u64[0],x19 ");
291 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000292 showV128(&block[2]); printf("\n");
293
294 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000295 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000296 __asm__ __volatile__(
297 "ldr q7, [%0, #0] ; "
298 "ldr x19, [%0, #16] ; "
299 "ins v7.d[1], x19 ; "
300 "str q7, [%0, #32] "
301 : : "r"(&block[0]) : "memory", "x19", "v7"
302 );
303 printf("INS v7.d[1],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000304 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000305 showV128(&block[2]); printf("\n");
306
307 /* -- S[0..3] -- */
308
309 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000310 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000311 __asm__ __volatile__(
312 "ldr q7, [%0, #0] ; "
313 "ldr x19, [%0, #16] ; "
314 "ins v7.s[0], w19 ; "
315 "str q7, [%0, #32] "
316 : : "r"(&block[0]) : "memory", "x19", "v7"
317 );
318 printf("INS v7.s[0],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000319 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000320 showV128(&block[2]); printf("\n");
321
322 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000323 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000324 __asm__ __volatile__(
325 "ldr q7, [%0, #0] ; "
326 "ldr x19, [%0, #16] ; "
327 "ins v7.s[1], w19 ; "
328 "str q7, [%0, #32] "
329 : : "r"(&block[0]) : "memory", "x19", "v7"
330 );
331 printf("INS v7.s[1],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000332 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000333 showV128(&block[2]); printf("\n");
334
335 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000336 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000337 __asm__ __volatile__(
338 "ldr q7, [%0, #0] ; "
339 "ldr x19, [%0, #16] ; "
340 "ins v7.s[2], w19 ; "
341 "str q7, [%0, #32] "
342 : : "r"(&block[0]) : "memory", "x19", "v7"
343 );
344 printf("INS v7.s[2],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000345 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000346 showV128(&block[2]); printf("\n");
347
348 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000349 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000350 __asm__ __volatile__(
351 "ldr q7, [%0, #0] ; "
352 "ldr x19, [%0, #16] ; "
353 "ins v7.s[3], w19 ; "
354 "str q7, [%0, #32] "
355 : : "r"(&block[0]) : "memory", "x19", "v7"
356 );
357 printf("INS v7.s[3],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000358 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000359 showV128(&block[2]); printf("\n");
360
361 /* -- H[0..7] -- */
362
363 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000364 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000365 __asm__ __volatile__(
366 "ldr q7, [%0, #0] ; "
367 "ldr x19, [%0, #16] ; "
368 "ins v7.h[0], w19 ; "
369 "str q7, [%0, #32] "
370 : : "r"(&block[0]) : "memory", "x19", "v7"
371 );
372 printf("INS v7.h[0],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000373 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000374 showV128(&block[2]); printf("\n");
375
376 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000377 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000378 __asm__ __volatile__(
379 "ldr q7, [%0, #0] ; "
380 "ldr x19, [%0, #16] ; "
381 "ins v7.h[1], w19 ; "
382 "str q7, [%0, #32] "
383 : : "r"(&block[0]) : "memory", "x19", "v7"
384 );
385 printf("INS v7.h[1],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000386 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000387 showV128(&block[2]); printf("\n");
388
389 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000390 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000391 __asm__ __volatile__(
392 "ldr q7, [%0, #0] ; "
393 "ldr x19, [%0, #16] ; "
394 "ins v7.h[2], w19 ; "
395 "str q7, [%0, #32] "
396 : : "r"(&block[0]) : "memory", "x19", "v7"
397 );
398 printf("INS v7.h[2],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000399 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000400 showV128(&block[2]); printf("\n");
401
402 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000403 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000404 __asm__ __volatile__(
405 "ldr q7, [%0, #0] ; "
406 "ldr x19, [%0, #16] ; "
407 "ins v7.h[3], w19 ; "
408 "str q7, [%0, #32] "
409 : : "r"(&block[0]) : "memory", "x19", "v7"
410 );
411 printf("INS v7.h[3],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000412 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000413 showV128(&block[2]); printf("\n");
414
415 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000416 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000417 __asm__ __volatile__(
418 "ldr q7, [%0, #0] ; "
419 "ldr x19, [%0, #16] ; "
420 "ins v7.h[4], w19 ; "
421 "str q7, [%0, #32] "
422 : : "r"(&block[0]) : "memory", "x19", "v7"
423 );
424 printf("INS v7.h[4],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000425 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000426 showV128(&block[2]); printf("\n");
427
428 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000429 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000430 __asm__ __volatile__(
431 "ldr q7, [%0, #0] ; "
432 "ldr x19, [%0, #16] ; "
433 "ins v7.h[5], w19 ; "
434 "str q7, [%0, #32] "
435 : : "r"(&block[0]) : "memory", "x19", "v7"
436 );
437 printf("INS v7.h[5],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000438 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000439 showV128(&block[2]); printf("\n");
440
441 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000442 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000443 __asm__ __volatile__(
444 "ldr q7, [%0, #0] ; "
445 "ldr x19, [%0, #16] ; "
446 "ins v7.h[6], w19 ; "
447 "str q7, [%0, #32] "
448 : : "r"(&block[0]) : "memory", "x19", "v7"
449 );
450 printf("INS v7.h[6],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000451 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000452 showV128(&block[2]); printf("\n");
453
454 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000455 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000456 __asm__ __volatile__(
457 "ldr q7, [%0, #0] ; "
458 "ldr x19, [%0, #16] ; "
459 "ins v7.h[7], w19 ; "
460 "str q7, [%0, #32] "
461 : : "r"(&block[0]) : "memory", "x19", "v7"
462 );
463 printf("INS v7.h[7],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000464 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000465 showV128(&block[2]); printf("\n");
466
467 /* -- B[0,15] -- */
468
469 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000470 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000471 __asm__ __volatile__(
472 "ldr q7, [%0, #0] ; "
473 "ldr x19, [%0, #16] ; "
474 "ins v7.b[0], w19 ; "
475 "str q7, [%0, #32] "
476 : : "r"(&block[0]) : "memory", "x19", "v7"
477 );
478 printf("INS v7.b[0],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000479 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000480 showV128(&block[2]); printf("\n");
481
482 memset(&block, 0x55, sizeof(block));
sewardjd89499b2014-03-02 12:48:34 +0000483 block[1].u64[0] = randULong();
sewardj94f53cb2014-03-01 11:27:18 +0000484 __asm__ __volatile__(
485 "ldr q7, [%0, #0] ; "
486 "ldr x19, [%0, #16] ; "
487 "ins v7.b[15], w19 ; "
488 "str q7, [%0, #32] "
489 : : "r"(&block[0]) : "memory", "x19", "v7"
490 );
491 printf("INS v7.b[15],x19 ");
sewardjd89499b2014-03-02 12:48:34 +0000492 showV128(&block[0]); printf(" %016llx ", block[1].u64[0]);
sewardj94f53cb2014-03-01 11:27:18 +0000493 showV128(&block[2]); printf("\n");
494}
495
496
497
498void test_SMINV ( void )
499{
500 int i;
501 V128 block[2];
502
503 /* -- 4s -- */
504
505 for (i = 0; i < 10; i++) {
506 memset(&block, 0x55, sizeof(block));
507 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000508 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000509 __asm__ __volatile__(
510 "ldr q7, [%0, #0] ; "
511 "sminv s8, v7.4s ; "
512 "str q8, [%0, #16] "
513 : : "r"(&block[0]) : "memory", "v7", "v8"
514 );
515 printf("SMINV v8, v7.4s ");
516 showV128(&block[0]); printf(" ");
517 showV128(&block[1]); printf("\n");
518 }
519
520 /* -- 8h -- */
521
522 for (i = 0; i < 10; i++) {
523 memset(&block, 0x55, sizeof(block));
524 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000525 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000526 __asm__ __volatile__(
527 "ldr q7, [%0, #0] ; "
528 "sminv h8, v7.8h ; "
529 "str q8, [%0, #16] "
530 : : "r"(&block[0]) : "memory", "v7", "v8"
531 );
532 printf("SMINV h8, v7.8h ");
533 showV128(&block[0]); printf(" ");
534 showV128(&block[1]); printf("\n");
535 }
536
537 /* -- 4h -- */
538
539 for (i = 0; i < 10; i++) {
540 memset(&block, 0x55, sizeof(block));
541 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000542 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000543 __asm__ __volatile__(
544 "ldr q7, [%0, #0] ; "
545 "sminv h8, v7.4h ; "
546 "str q8, [%0, #16] "
547 : : "r"(&block[0]) : "memory", "v7", "v8"
548 );
549 printf("SMINV h8, v7.4h ");
550 showV128(&block[0]); printf(" ");
551 showV128(&block[1]); printf("\n");
552 }
553
554 /* -- 16b -- */
555
556 for (i = 0; i < 10; i++) {
557 memset(&block, 0x55, sizeof(block));
558 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000559 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000560 __asm__ __volatile__(
561 "ldr q7, [%0, #0] ; "
562 "sminv b8, v7.16b ; "
563 "str q8, [%0, #16] "
564 : : "r"(&block[0]) : "memory", "v7", "v8"
565 );
566 printf("SMINV b8, v7.16b ");
567 showV128(&block[0]); printf(" ");
568 showV128(&block[1]); printf("\n");
569 }
570
571 /* -- 8b -- */
572
573 for (i = 0; i < 10; i++) {
574 memset(&block, 0x55, sizeof(block));
575 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000576 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000577 __asm__ __volatile__(
578 "ldr q7, [%0, #0] ; "
579 "sminv b8, v7.8b ; "
580 "str q8, [%0, #16] "
581 : : "r"(&block[0]) : "memory", "v7", "v8"
582 );
583 printf("SMINV b8, v7.8b ");
584 showV128(&block[0]); printf(" ");
585 showV128(&block[1]); printf("\n");
586 }
587
588}
589
590
591void test_SMAXV ( void )
592{
593 int i;
594 V128 block[2];
595
596 /* -- 4s -- */
597
598 for (i = 0; i < 10; i++) {
599 memset(&block, 0x55, sizeof(block));
600 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000601 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000602 __asm__ __volatile__(
603 "ldr q7, [%0, #0] ; "
604 "smaxv s8, v7.4s ; "
605 "str q8, [%0, #16] "
606 : : "r"(&block[0]) : "memory", "v7", "v8"
607 );
608 printf("SMAXV v8, v7.4s ");
609 showV128(&block[0]); printf(" ");
610 showV128(&block[1]); printf("\n");
611 }
612
613 /* -- 8h -- */
614
615 for (i = 0; i < 10; i++) {
616 memset(&block, 0x55, sizeof(block));
617 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000618 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000619 __asm__ __volatile__(
620 "ldr q7, [%0, #0] ; "
621 "smaxv h8, v7.8h ; "
622 "str q8, [%0, #16] "
623 : : "r"(&block[0]) : "memory", "v7", "v8"
624 );
625 printf("SMAXV h8, v7.8h ");
626 showV128(&block[0]); printf(" ");
627 showV128(&block[1]); printf("\n");
628 }
629
630 /* -- 4h -- */
631
632 for (i = 0; i < 10; i++) {
633 memset(&block, 0x55, sizeof(block));
634 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000635 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000636 __asm__ __volatile__(
637 "ldr q7, [%0, #0] ; "
638 "smaxv h8, v7.4h ; "
639 "str q8, [%0, #16] "
640 : : "r"(&block[0]) : "memory", "v7", "v8"
641 );
642 printf("SMAXV h8, v7.4h ");
643 showV128(&block[0]); printf(" ");
644 showV128(&block[1]); printf("\n");
645 }
646
647 /* -- 16b -- */
648
649 for (i = 0; i < 10; i++) {
650 memset(&block, 0x55, sizeof(block));
651 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000652 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000653 __asm__ __volatile__(
654 "ldr q7, [%0, #0] ; "
655 "smaxv b8, v7.16b ; "
656 "str q8, [%0, #16] "
657 : : "r"(&block[0]) : "memory", "v7", "v8"
658 );
659 printf("SMAXV b8, v7.16b ");
660 showV128(&block[0]); printf(" ");
661 showV128(&block[1]); printf("\n");
662 }
663
664 /* -- 8b -- */
665
666 for (i = 0; i < 10; i++) {
667 memset(&block, 0x55, sizeof(block));
668 randV128(&block[0]);
sewardjd89499b2014-03-02 12:48:34 +0000669 randV128(&block[1]);
sewardj94f53cb2014-03-01 11:27:18 +0000670 __asm__ __volatile__(
671 "ldr q7, [%0, #0] ; "
672 "smaxv b8, v7.8b ; "
673 "str q8, [%0, #16] "
674 : : "r"(&block[0]) : "memory", "v7", "v8"
675 );
676 printf("SMAXV b8, v7.8b ");
677 showV128(&block[0]); printf(" ");
678 showV128(&block[1]); printf("\n");
679 }
680
681}
682
sewardj94f53cb2014-03-01 11:27:18 +0000683/* Note this also sets the destination register to a known value (0x55..55)
684 since it can sometimes be an input to the instruction too. */
685#define GEN_BINARY_TEST(INSN,SUFFIX) \
686 __attribute__((noinline)) \
687 static void test_##INSN##_##SUFFIX ( void ) { \
688 Int i; \
689 for (i = 0; i < ITERS; i++) { \
690 V128 block[3]; \
691 memset(block, 0x55, sizeof(block)); \
692 randV128(&block[0]); \
693 randV128(&block[1]); \
sewardjd89499b2014-03-02 12:48:34 +0000694 randV128(&block[2]); \
sewardj94f53cb2014-03-01 11:27:18 +0000695 __asm__ __volatile__( \
696 "ldr q7, [%0, #0] ; " \
697 "ldr q8, [%0, #16] ; " \
698 "ldr q9, [%0, #32] ; " \
699 #INSN " v9." #SUFFIX ", v7." #SUFFIX ", v8." #SUFFIX " ; " \
700 "str q9, [%0, #32] " \
701 : : "r"(&block[0]) : "memory", "v7", "v8", "v9" \
702 ); \
703 printf(#INSN " v9." #SUFFIX ", v7." #SUFFIX ", v8." #SUFFIX " "); \
704 showV128(&block[0]); printf(" "); \
705 showV128(&block[1]); printf(" "); \
706 showV128(&block[2]); printf("\n"); \
707 } \
708 }
709
710
711/* Note this also sets the destination register to a known value (0x55..55)
712 since it can sometimes be an input to the instruction too. */
713#define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
714 __attribute__((noinline)) \
715 static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( void ) { \
716 Int i; \
717 for (i = 0; i < ITERS; i++) { \
718 V128 block[2]; \
719 memset(block, 0x55, sizeof(block)); \
720 randV128(&block[0]); \
sewardjd89499b2014-03-02 12:48:34 +0000721 randV128(&block[1]); \
sewardj94f53cb2014-03-01 11:27:18 +0000722 __asm__ __volatile__( \
723 "ldr q7, [%0, #0] ; " \
724 "ldr q8, [%0, #16] ; " \
725 #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
726 "str q8, [%0, #16] " \
727 : : "r"(&block[0]) : "memory", "v7", "v8" \
728 ); \
729 printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " "); \
730 showV128(&block[0]); printf(" "); \
731 showV128(&block[1]); printf("\n"); \
732 } \
733 }
734
735/* Note this also sets the destination register to a known value (0x55..55)
736 since it can sometimes be an input to the instruction too. */
737#define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
738 __attribute__((noinline)) \
739 static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( void ) { \
740 Int i; \
741 for (i = 0; i < ITERS; i++) { \
742 V128 block[2]; \
743 memset(block, 0x55, sizeof(block)); \
744 randV128(&block[0]); \
sewardjd89499b2014-03-02 12:48:34 +0000745 randV128(&block[1]); \
sewardj94f53cb2014-03-01 11:27:18 +0000746 __asm__ __volatile__( \
747 "ldr q7, [%0, #0] ; " \
748 "ldr q8, [%0, #16] ; " \
749 #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
750 "str q8, [%0, #16] " \
751 : : "r"(&block[0]) : "memory", "v7", "v8" \
752 ); \
753 printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN); \
754 showV128(&block[0]); printf(" "); \
755 showV128(&block[1]); printf("\n"); \
756 } \
757 }
758
759
760GEN_BINARY_TEST(umax, 4s)
761GEN_BINARY_TEST(umax, 8h)
762GEN_BINARY_TEST(umax, 4h)
763GEN_BINARY_TEST(umax, 16b)
764GEN_BINARY_TEST(umax, 8b)
765
766GEN_BINARY_TEST(umin, 4s)
767GEN_BINARY_TEST(umin, 8h)
768GEN_BINARY_TEST(umin, 4h)
769GEN_BINARY_TEST(umin, 16b)
770GEN_BINARY_TEST(umin, 8b)
771
772GEN_BINARY_TEST(smax, 4s)
773GEN_BINARY_TEST(smax, 8h)
774GEN_BINARY_TEST(smax, 4h)
775GEN_BINARY_TEST(smax, 16b)
776GEN_BINARY_TEST(smax, 8b)
777
778GEN_BINARY_TEST(smin, 4s)
779GEN_BINARY_TEST(smin, 8h)
780GEN_BINARY_TEST(smin, 4h)
781GEN_BINARY_TEST(smin, 16b)
782GEN_BINARY_TEST(smin, 8b)
783
784GEN_BINARY_TEST(add, 2d)
785GEN_BINARY_TEST(add, 4s)
786GEN_BINARY_TEST(add, 2s)
787GEN_BINARY_TEST(add, 8h)
788GEN_BINARY_TEST(add, 4h)
789GEN_BINARY_TEST(add, 16b)
790GEN_BINARY_TEST(add, 8b)
791
792GEN_BINARY_TEST(sub, 2d)
793GEN_BINARY_TEST(sub, 4s)
794GEN_BINARY_TEST(sub, 2s)
795GEN_BINARY_TEST(sub, 8h)
796GEN_BINARY_TEST(sub, 4h)
797GEN_BINARY_TEST(sub, 16b)
798GEN_BINARY_TEST(sub, 8b)
799
800GEN_BINARY_TEST(mul, 4s)
801GEN_BINARY_TEST(mul, 2s)
802GEN_BINARY_TEST(mul, 8h)
803GEN_BINARY_TEST(mul, 4h)
804GEN_BINARY_TEST(mul, 16b)
805GEN_BINARY_TEST(mul, 8b)
806
807GEN_BINARY_TEST(mla, 4s)
808GEN_BINARY_TEST(mla, 2s)
809GEN_BINARY_TEST(mla, 8h)
810GEN_BINARY_TEST(mla, 4h)
811GEN_BINARY_TEST(mla, 16b)
812GEN_BINARY_TEST(mla, 8b)
813
814GEN_BINARY_TEST(mls, 4s)
815GEN_BINARY_TEST(mls, 2s)
816GEN_BINARY_TEST(mls, 8h)
817GEN_BINARY_TEST(mls, 4h)
818GEN_BINARY_TEST(mls, 16b)
819GEN_BINARY_TEST(mls, 8b)
820
821GEN_BINARY_TEST(and, 16b)
822GEN_BINARY_TEST(and, 8b)
823
824GEN_BINARY_TEST(bic, 16b)
825GEN_BINARY_TEST(bic, 8b)
826
827GEN_BINARY_TEST(orr, 16b)
828GEN_BINARY_TEST(orr, 8b)
829
830GEN_BINARY_TEST(orn, 16b)
831GEN_BINARY_TEST(orn, 8b)
832
833GEN_BINARY_TEST(eor, 16b)
834GEN_BINARY_TEST(eor, 8b)
835
836GEN_BINARY_TEST(bsl, 16b)
837GEN_BINARY_TEST(bsl, 8b)
838
839GEN_BINARY_TEST(bit, 16b)
840GEN_BINARY_TEST(bit, 8b)
841
842GEN_BINARY_TEST(bif, 16b)
843GEN_BINARY_TEST(bif, 8b)
844
845GEN_BINARY_TEST(cmeq, 2d)
846GEN_BINARY_TEST(cmeq, 4s)
847GEN_BINARY_TEST(cmeq, 2s)
848GEN_BINARY_TEST(cmeq, 8h)
849GEN_BINARY_TEST(cmeq, 4h)
850GEN_BINARY_TEST(cmeq, 16b)
851GEN_BINARY_TEST(cmeq, 8b)
852
853GEN_BINARY_TEST(cmtst, 2d)
854GEN_BINARY_TEST(cmtst, 4s)
855GEN_BINARY_TEST(cmtst, 2s)
856GEN_BINARY_TEST(cmtst, 8h)
857GEN_BINARY_TEST(cmtst, 4h)
858GEN_BINARY_TEST(cmtst, 16b)
859GEN_BINARY_TEST(cmtst, 8b)
860
861GEN_BINARY_TEST(cmhi, 2d)
862GEN_BINARY_TEST(cmhi, 4s)
863GEN_BINARY_TEST(cmhi, 2s)
864GEN_BINARY_TEST(cmhi, 8h)
865GEN_BINARY_TEST(cmhi, 4h)
866GEN_BINARY_TEST(cmhi, 16b)
867GEN_BINARY_TEST(cmhi, 8b)
868
869GEN_BINARY_TEST(cmgt, 2d)
870GEN_BINARY_TEST(cmgt, 4s)
871GEN_BINARY_TEST(cmgt, 2s)
872GEN_BINARY_TEST(cmgt, 8h)
873GEN_BINARY_TEST(cmgt, 4h)
874GEN_BINARY_TEST(cmgt, 16b)
875GEN_BINARY_TEST(cmgt, 8b)
876
877GEN_BINARY_TEST(cmhs, 2d)
878GEN_BINARY_TEST(cmhs, 4s)
879GEN_BINARY_TEST(cmhs, 2s)
880GEN_BINARY_TEST(cmhs, 8h)
881GEN_BINARY_TEST(cmhs, 4h)
882GEN_BINARY_TEST(cmhs, 16b)
883GEN_BINARY_TEST(cmhs, 8b)
884
885GEN_BINARY_TEST(cmge, 2d)
886GEN_BINARY_TEST(cmge, 4s)
887GEN_BINARY_TEST(cmge, 2s)
888GEN_BINARY_TEST(cmge, 8h)
889GEN_BINARY_TEST(cmge, 4h)
890GEN_BINARY_TEST(cmge, 16b)
891GEN_BINARY_TEST(cmge, 8b)
892
893GEN_SHIFT_TEST(ushr, 2d, 2d, 1)
894GEN_SHIFT_TEST(ushr, 2d, 2d, 13)
895GEN_SHIFT_TEST(ushr, 2d, 2d, 63)
896GEN_SHIFT_TEST(sshr, 2d, 2d, 1)
897GEN_SHIFT_TEST(sshr, 2d, 2d, 13)
898GEN_SHIFT_TEST(sshr, 2d, 2d, 63)
sewardjd89499b2014-03-02 12:48:34 +0000899GEN_SHIFT_TEST(shl, 2d, 2d, 1)
900GEN_SHIFT_TEST(shl, 2d, 2d, 13)
901GEN_SHIFT_TEST(shl, 2d, 2d, 63)
sewardj94f53cb2014-03-01 11:27:18 +0000902
903GEN_SHIFT_TEST(ushr, 4s, 4s, 1)
904GEN_SHIFT_TEST(ushr, 4s, 4s, 13)
905GEN_SHIFT_TEST(ushr, 4s, 4s, 31)
906GEN_SHIFT_TEST(sshr, 4s, 4s, 1)
907GEN_SHIFT_TEST(sshr, 4s, 4s, 13)
908GEN_SHIFT_TEST(sshr, 4s, 4s, 31)
sewardjd89499b2014-03-02 12:48:34 +0000909GEN_SHIFT_TEST(shl, 4s, 4s, 1)
910GEN_SHIFT_TEST(shl, 4s, 4s, 13)
911GEN_SHIFT_TEST(shl, 4s, 4s, 31)
sewardj94f53cb2014-03-01 11:27:18 +0000912
913GEN_SHIFT_TEST(ushr, 2s, 2s, 1)
914GEN_SHIFT_TEST(ushr, 2s, 2s, 13)
915GEN_SHIFT_TEST(ushr, 2s, 2s, 31)
916GEN_SHIFT_TEST(sshr, 2s, 2s, 1)
917GEN_SHIFT_TEST(sshr, 2s, 2s, 13)
918GEN_SHIFT_TEST(sshr, 2s, 2s, 31)
sewardjd89499b2014-03-02 12:48:34 +0000919GEN_SHIFT_TEST(shl, 2s, 2s, 1)
920GEN_SHIFT_TEST(shl, 2s, 2s, 13)
921GEN_SHIFT_TEST(shl, 2s, 2s, 31)
sewardj94f53cb2014-03-01 11:27:18 +0000922
923GEN_SHIFT_TEST(ushr, 8h, 8h, 1)
924GEN_SHIFT_TEST(ushr, 8h, 8h, 13)
925GEN_SHIFT_TEST(ushr, 8h, 8h, 15)
926GEN_SHIFT_TEST(sshr, 8h, 8h, 1)
927GEN_SHIFT_TEST(sshr, 8h, 8h, 13)
928GEN_SHIFT_TEST(sshr, 8h, 8h, 15)
sewardjd89499b2014-03-02 12:48:34 +0000929GEN_SHIFT_TEST(shl, 8h, 8h, 1)
930GEN_SHIFT_TEST(shl, 8h, 8h, 13)
931GEN_SHIFT_TEST(shl, 8h, 8h, 15)
sewardj94f53cb2014-03-01 11:27:18 +0000932
933GEN_SHIFT_TEST(ushr, 4h, 4h, 1)
934GEN_SHIFT_TEST(ushr, 4h, 4h, 13)
935GEN_SHIFT_TEST(ushr, 4h, 4h, 15)
936GEN_SHIFT_TEST(sshr, 4h, 4h, 1)
937GEN_SHIFT_TEST(sshr, 4h, 4h, 13)
938GEN_SHIFT_TEST(sshr, 4h, 4h, 15)
sewardjd89499b2014-03-02 12:48:34 +0000939GEN_SHIFT_TEST(shl, 4h, 4h, 1)
940GEN_SHIFT_TEST(shl, 4h, 4h, 13)
941GEN_SHIFT_TEST(shl, 4h, 4h, 15)
sewardj94f53cb2014-03-01 11:27:18 +0000942
943GEN_SHIFT_TEST(ushr, 16b, 16b, 1)
944GEN_SHIFT_TEST(ushr, 16b, 16b, 7)
945GEN_SHIFT_TEST(sshr, 16b, 16b, 1)
946GEN_SHIFT_TEST(sshr, 16b, 16b, 7)
sewardjd89499b2014-03-02 12:48:34 +0000947GEN_SHIFT_TEST(shl, 16b, 16b, 1)
948GEN_SHIFT_TEST(shl, 16b, 16b, 7)
sewardj94f53cb2014-03-01 11:27:18 +0000949
950GEN_SHIFT_TEST(ushr, 8b, 8b, 1)
951GEN_SHIFT_TEST(ushr, 8b, 8b, 7)
952GEN_SHIFT_TEST(sshr, 8b, 8b, 1)
953GEN_SHIFT_TEST(sshr, 8b, 8b, 7)
sewardjd89499b2014-03-02 12:48:34 +0000954GEN_SHIFT_TEST(shl, 8b, 8b, 1)
955GEN_SHIFT_TEST(shl, 8b, 8b, 7)
sewardj94f53cb2014-03-01 11:27:18 +0000956
957GEN_SHIFT_TEST(ushll, 2d, 2s, 0)
958GEN_SHIFT_TEST(ushll, 2d, 2s, 15)
959GEN_SHIFT_TEST(ushll, 2d, 2s, 31)
960GEN_SHIFT_TEST(ushll2, 2d, 4s, 0)
961GEN_SHIFT_TEST(ushll2, 2d, 4s, 15)
962GEN_SHIFT_TEST(ushll2, 2d, 4s, 31)
963
964GEN_SHIFT_TEST(sshll, 2d, 2s, 0)
965GEN_SHIFT_TEST(sshll, 2d, 2s, 15)
966GEN_SHIFT_TEST(sshll, 2d, 2s, 31)
967GEN_SHIFT_TEST(sshll2, 2d, 4s, 0)
968GEN_SHIFT_TEST(sshll2, 2d, 4s, 15)
969GEN_SHIFT_TEST(sshll2, 2d, 4s, 31)
970
971GEN_UNARY_TEST(xtn, 2s, 2d)
972GEN_UNARY_TEST(xtn2, 4s, 2d)
973GEN_UNARY_TEST(xtn, 4h, 4s)
974GEN_UNARY_TEST(xtn2, 8h, 4s)
sewardj01213b32014-03-07 22:54:19 +0000975GEN_UNARY_TEST(xtn, 8b, 8h)
976GEN_UNARY_TEST(xtn2, 16b, 8h)
sewardj94f53cb2014-03-01 11:27:18 +0000977
sewardjd89499b2014-03-02 12:48:34 +0000978
979/* Generate a test that involves one integer reg and one vector reg,
980 with no bias as towards which is input or output. */
981#define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
982 __attribute__((noinline)) \
983 static void test_##TESTNAME ( void ) { \
984 Int i; \
985 for (i = 0; i < ITERS; i++) { \
986 V128 block[4]; \
987 memset(block, 0x55, sizeof(block)); \
988 randV128(&block[0]); \
989 randV128(&block[1]); \
990 randV128(&block[2]); \
991 randV128(&block[3]); \
992 __asm__ __volatile__( \
993 "ldr q"#VECREGNO", [%0, #0] ; " \
994 "ldr x"#INTREGNO", [%0, #16] ; " \
995 INSN " ; " \
996 "str q"#VECREGNO", [%0, #32] ; " \
997 "str x"#INTREGNO", [%0, #48] ; " \
998 : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO \
999 ); \
1000 printf(INSN " "); \
1001 showV128(&block[0]); printf(" "); \
1002 showV128(&block[1]); printf(" "); \
1003 showV128(&block[2]); printf(" "); \
1004 showV128(&block[3]); printf("\n"); \
1005 } \
1006 }
1007
1008GEN_ONEINT_ONEVEC_TEST(umov_01, "umov x9, v10.d[0]", 9, 10)
1009GEN_ONEINT_ONEVEC_TEST(umov_02, "umov x9, v10.d[1]", 9, 10)
1010GEN_ONEINT_ONEVEC_TEST(umov_03, "umov w9, v10.s[0]", 9, 10)
1011GEN_ONEINT_ONEVEC_TEST(umov_04, "umov w9, v10.s[3]", 9, 10)
1012GEN_ONEINT_ONEVEC_TEST(umov_05, "umov w9, v10.h[0]", 9, 10)
1013GEN_ONEINT_ONEVEC_TEST(umov_06, "umov w9, v10.h[7]", 9, 10)
1014GEN_ONEINT_ONEVEC_TEST(umov_07, "umov w9, v10.b[0]", 9, 10)
1015GEN_ONEINT_ONEVEC_TEST(umov_08, "umov w9, v10.b[15]", 9, 10)
1016
1017GEN_ONEINT_ONEVEC_TEST(smov_01, "smov x9, v10.s[0]", 9, 10)
1018GEN_ONEINT_ONEVEC_TEST(smov_02, "smov x9, v10.s[3]", 9, 10)
1019
1020GEN_ONEINT_ONEVEC_TEST(smov_03, "smov x9, v10.h[0]", 9, 10)
1021GEN_ONEINT_ONEVEC_TEST(smov_04, "smov x9, v10.h[7]", 9, 10)
1022GEN_ONEINT_ONEVEC_TEST(smov_05, "smov w9, v10.h[0]", 9, 10)
1023GEN_ONEINT_ONEVEC_TEST(smov_06, "smov w9, v10.h[7]", 9, 10)
1024
1025GEN_ONEINT_ONEVEC_TEST(smov_07, "smov x9, v10.b[0]", 9, 10)
1026GEN_ONEINT_ONEVEC_TEST(smov_08, "smov x9, v10.b[15]", 9, 10)
1027GEN_ONEINT_ONEVEC_TEST(smov_09, "smov w9, v10.b[0]", 9, 10)
1028GEN_ONEINT_ONEVEC_TEST(smov_10, "smov w9, v10.b[15]", 9, 10)
1029
1030/* Generate a test that involves two vector regs,
1031 with no bias as towards which is input or output. */
1032#define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
1033 __attribute__((noinline)) \
1034 static void test_##TESTNAME ( void ) { \
1035 Int i; \
1036 for (i = 0; i < ITERS; i++) { \
1037 V128 block[4]; \
1038 memset(block, 0x55, sizeof(block)); \
1039 randV128(&block[0]); \
1040 randV128(&block[1]); \
1041 randV128(&block[2]); \
1042 randV128(&block[3]); \
1043 __asm__ __volatile__( \
1044 "ldr q"#VECREG1NO", [%0, #0] ; " \
1045 "ldr q"#VECREG2NO", [%0, #16] ; " \
1046 INSN " ; " \
1047 "str q"#VECREG1NO", [%0, #32] ; " \
1048 "str q"#VECREG2NO", [%0, #48] ; " \
1049 : : "r"(&block[0]) : "memory", "v"#VECREG1NO, "v"#VECREG2NO \
1050 ); \
1051 printf(INSN " "); \
1052 showV128(&block[0]); printf(" "); \
1053 showV128(&block[1]); printf(" "); \
1054 showV128(&block[2]); printf(" "); \
1055 showV128(&block[3]); printf("\n"); \
1056 } \
1057 }
1058
1059GEN_TWOVEC_TEST(fcvtn_01, "fcvtn v22.2s, v23.2d", 22, 23)
1060GEN_TWOVEC_TEST(fcvtn_02, "fcvtn2 v22.4s, v23.2d", 22, 23)
1061
1062GEN_UNARY_TEST(neg, 2d, 2d)
1063GEN_UNARY_TEST(neg, 4s, 4s)
1064GEN_UNARY_TEST(neg, 2s, 2s)
1065GEN_UNARY_TEST(neg, 8h, 8h)
1066GEN_UNARY_TEST(neg, 4h, 4h)
1067GEN_UNARY_TEST(neg, 16b, 16b)
1068GEN_UNARY_TEST(neg, 8b, 8b)
1069GEN_BINARY_TEST(fadd, 2d)
1070GEN_BINARY_TEST(fadd, 4s)
1071GEN_BINARY_TEST(fadd, 2s)
1072GEN_BINARY_TEST(fsub, 2d)
1073GEN_BINARY_TEST(fsub, 4s)
1074GEN_BINARY_TEST(fsub, 2s)
1075GEN_BINARY_TEST(fmul, 2d)
1076GEN_BINARY_TEST(fmul, 4s)
1077GEN_BINARY_TEST(fmul, 2s)
1078GEN_BINARY_TEST(fdiv, 2d)
1079GEN_BINARY_TEST(fdiv, 4s)
1080GEN_BINARY_TEST(fdiv, 2s)
1081GEN_BINARY_TEST(fmla, 2d)
1082GEN_BINARY_TEST(fmla, 4s)
1083GEN_BINARY_TEST(fmla, 2s)
1084GEN_BINARY_TEST(fmls, 2d)
1085GEN_BINARY_TEST(fmls, 4s)
1086GEN_BINARY_TEST(fmls, 2s)
1087GEN_BINARY_TEST(fabd, 2d)
1088GEN_BINARY_TEST(fabd, 4s)
1089GEN_BINARY_TEST(fabd, 2s)
1090
1091/* Generate a test that involves three vector regs,
sewardj1092f192014-04-03 23:00:45 +00001092 with no bias as towards which is input or output. It's also OK
1093 to use v16, v17, v18 as scratch. */
sewardjd89499b2014-03-02 12:48:34 +00001094#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
1095 __attribute__((noinline)) \
1096 static void test_##TESTNAME ( void ) { \
1097 Int i; \
1098 for (i = 0; i < ITERS; i++) { \
1099 V128 block[6]; \
1100 memset(block, 0x55, sizeof(block)); \
1101 randV128(&block[0]); \
1102 randV128(&block[1]); \
1103 randV128(&block[2]); \
1104 randV128(&block[3]); \
1105 randV128(&block[4]); \
1106 randV128(&block[5]); \
1107 __asm__ __volatile__( \
1108 "ldr q"#VECREG1NO", [%0, #0] ; " \
1109 "ldr q"#VECREG2NO", [%0, #16] ; " \
1110 "ldr q"#VECREG3NO", [%0, #32] ; " \
1111 INSN " ; " \
1112 "str q"#VECREG1NO", [%0, #48] ; " \
1113 "str q"#VECREG2NO", [%0, #64] ; " \
1114 "str q"#VECREG3NO", [%0, #80] ; " \
sewardj1092f192014-04-03 23:00:45 +00001115 : : "r"(&block[0]) \
1116 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
1117 "v16", "v17", "v18" \
sewardjd89499b2014-03-02 12:48:34 +00001118 ); \
1119 printf(INSN " "); \
1120 showV128(&block[0]); printf(" "); \
1121 showV128(&block[1]); printf(" "); \
1122 showV128(&block[2]); printf(" "); \
1123 showV128(&block[3]); printf(" "); \
1124 showV128(&block[4]); printf(" "); \
1125 showV128(&block[5]); printf("\n"); \
1126 } \
1127 }
1128
1129GEN_THREEVEC_TEST(add_d_d_d, "add d21, d22, d23", 21, 22, 23)
1130GEN_THREEVEC_TEST(sub_d_d_d, "sub d21, d22, d23", 21, 22, 23)
1131
1132/* overkill -- don't need two vecs, only one */
1133GEN_TWOVEC_TEST(fmov_scalar_imm_01, "fmov d22, #0.125", 22, 23)
1134GEN_TWOVEC_TEST(fmov_scalar_imm_02, "fmov d22, #-4.0", 22, 23)
1135GEN_TWOVEC_TEST(fmov_scalar_imm_03, "fmov d22, #1.0", 22, 23)
1136GEN_TWOVEC_TEST(fmov_scalar_imm_04, "fmov s22, #0.125", 22, 23)
1137GEN_TWOVEC_TEST(fmov_scalar_imm_05, "fmov s22, #-4.0", 22, 23)
1138GEN_TWOVEC_TEST(fmov_scalar_imm_06, "fmov s22, #-1.0", 22, 23)
1139
1140GEN_ONEINT_ONEVEC_TEST(fmov_gen_01, "fmov s7, w15", 15, 7)
1141GEN_ONEINT_ONEVEC_TEST(fmov_gen_02, "fmov d7, x15", 15, 7)
1142GEN_ONEINT_ONEVEC_TEST(fmov_gen_03, "fmov v7.d[1], x15", 15, 7)
1143GEN_ONEINT_ONEVEC_TEST(fmov_gen_04, "fmov w15, s7", 15, 7)
1144GEN_ONEINT_ONEVEC_TEST(fmov_gen_05, "fmov x15, d7", 15, 7)
1145GEN_ONEINT_ONEVEC_TEST(fmov_gen_06, "fmov x15, v7.d[1]", 15, 7)
1146
1147GEN_TWOVEC_TEST(movi_vector_imm_01, "fmov d22, #0.125", 22, 23)
1148GEN_TWOVEC_TEST(movi_vector_imm_02, "fmov d22, #-4.0", 22, 23)
1149GEN_TWOVEC_TEST(movi_vector_imm_03, "fmov d22, #1.0", 22, 23)
1150GEN_TWOVEC_TEST(movi_vector_imm_04, "fmov v22.2d, #0.125", 22, 23)
1151GEN_TWOVEC_TEST(movi_vector_imm_05, "fmov v22.2d, #-4.0", 22, 23)
1152GEN_TWOVEC_TEST(movi_vector_imm_06, "fmov v22.2d, #1.0", 22, 23)
1153
1154GEN_ONEINT_ONEVEC_TEST(sucvtf_01, "scvtf s7, w15", 15, 7)
1155GEN_ONEINT_ONEVEC_TEST(sucvtf_02, "scvtf d7, w15", 15, 7)
1156GEN_ONEINT_ONEVEC_TEST(sucvtf_03, "scvtf s7, x15", 15, 7)
1157GEN_ONEINT_ONEVEC_TEST(sucvtf_04, "scvtf d7, x15", 15, 7)
1158GEN_ONEINT_ONEVEC_TEST(sucvtf_05, "ucvtf s7, w15", 15, 7)
1159GEN_ONEINT_ONEVEC_TEST(sucvtf_06, "ucvtf d7, w15", 15, 7)
1160GEN_ONEINT_ONEVEC_TEST(sucvtf_07, "ucvtf s7, x15", 15, 7)
1161GEN_ONEINT_ONEVEC_TEST(sucvtf_08, "ucvtf d7, x15", 15, 7)
1162
1163GEN_THREEVEC_TEST(fadd_d, "fadd d2, d11, d29", 2, 11, 29)
1164GEN_THREEVEC_TEST(fadd_s, "fadd s2, s11, s29", 2, 11, 29)
1165GEN_THREEVEC_TEST(fsub_d, "fsub d2, d11, d29", 2, 11, 29)
1166GEN_THREEVEC_TEST(fsub_s, "fsub s2, s11, s29", 2, 11, 29)
1167GEN_THREEVEC_TEST(fmul_d, "fmul d2, d11, d29", 2, 11, 29)
1168GEN_THREEVEC_TEST(fmul_s, "fmul s2, s11, s29", 2, 11, 29)
1169GEN_THREEVEC_TEST(fdiv_d, "fdiv d2, d11, d29", 2, 11, 29)
1170GEN_THREEVEC_TEST(fdiv_s, "fdiv s2, s11, s29", 2, 11, 29)
1171GEN_THREEVEC_TEST(fnmul_d, "fnmul d2, d11, d29", 2, 11, 29)
1172GEN_THREEVEC_TEST(fnmul_s, "fnmul s2, s11, s29", 2, 11, 29)
1173
1174GEN_THREEVEC_TEST(fabd_d, "fabd d2, d11, d29", 2, 11, 29)
1175GEN_THREEVEC_TEST(fabd_s, "fabd s2, s11, s29", 2, 11, 29)
1176
1177GEN_TWOVEC_TEST(fmov_d, "fmov d22, d23", 22, 23)
1178GEN_TWOVEC_TEST(fmov_s, "fmov s22, s23", 22, 23)
1179GEN_TWOVEC_TEST(fabs_d, "fabs d22, d23", 22, 23)
1180GEN_TWOVEC_TEST(fabs_s, "fabs s22, s23", 22, 23)
1181GEN_TWOVEC_TEST(fneg_d, "fneg d22, d23", 22, 23)
1182GEN_TWOVEC_TEST(fneg_s, "fneg s22, s23", 22, 23)
1183GEN_TWOVEC_TEST(fsqrt_d, "fsqrt d22, d23", 22, 23)
1184GEN_TWOVEC_TEST(fsqrt_s, "fsqrt s22, s23", 22, 23)
1185
1186GEN_UNARY_TEST(fneg, 2d, 2d)
1187GEN_UNARY_TEST(fneg, 4s, 4s)
1188GEN_UNARY_TEST(fneg, 2s, 2s)
1189GEN_UNARY_TEST(fabs, 2d, 2d)
1190GEN_UNARY_TEST(fabs, 4s, 4s)
1191GEN_UNARY_TEST(fabs, 2s, 2s)
1192
sewardj1092f192014-04-03 23:00:45 +00001193GEN_BINARY_TEST(fcmeq, 2d)
1194GEN_BINARY_TEST(fcmeq, 4s)
1195GEN_BINARY_TEST(fcmeq, 2s)
1196GEN_BINARY_TEST(fcmge, 2d)
1197GEN_BINARY_TEST(fcmge, 4s)
1198GEN_BINARY_TEST(fcmge, 2s)
1199GEN_BINARY_TEST(fcmgt, 2d)
1200GEN_BINARY_TEST(fcmgt, 4s)
1201GEN_BINARY_TEST(fcmgt, 2s)
1202GEN_BINARY_TEST(facge, 2d)
1203GEN_BINARY_TEST(facge, 4s)
1204GEN_BINARY_TEST(facge, 2s)
1205GEN_BINARY_TEST(facgt, 2d)
1206GEN_BINARY_TEST(facgt, 4s)
1207GEN_BINARY_TEST(facgt, 2s)
1208
1209// Uses v15 as the first table entry
1210GEN_THREEVEC_TEST(
1211 tbl_16b_1reg, "tbl v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
1212// and v15 ^ v21 as the second table entry
1213GEN_THREEVEC_TEST(
1214 tbl_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
1215 "tbl v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
1216// and v15 ^ v23 as the third table entry
1217GEN_THREEVEC_TEST(
1218 tbl_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
1219 "eor v17.16b, v15.16b, v23.16b ; "
1220 "tbl v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
1221 21, 15, 23)
1222// and v21 ^ v23 as the fourth table entry
1223GEN_THREEVEC_TEST(
1224 tbl_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
1225 "eor v17.16b, v15.16b, v23.16b ; "
1226 "eor v18.16b, v21.16b, v23.16b ; "
1227 "tbl v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
1228 21, 15, 23)
1229
1230// Same register scheme for tbl .8b, tbx .16b, tbx.8b
1231GEN_THREEVEC_TEST(
1232 tbl_8b_1reg, "tbl v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
1233GEN_THREEVEC_TEST(
1234 tbl_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
1235 "tbl v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
1236GEN_THREEVEC_TEST(
1237 tbl_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
1238 "eor v17.16b, v15.16b, v23.16b ; "
1239 "tbl v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
1240 21, 15, 23)
1241GEN_THREEVEC_TEST(
1242 tbl_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
1243 "eor v17.16b, v15.16b, v23.16b ; "
1244 "eor v18.16b, v21.16b, v23.16b ; "
1245 "tbl v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
1246 21, 15, 23)
1247
1248GEN_THREEVEC_TEST(
1249 tbx_16b_1reg, "tbx v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
1250GEN_THREEVEC_TEST(
1251 tbx_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
1252 "tbx v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
1253GEN_THREEVEC_TEST(
1254 tbx_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
1255 "eor v17.16b, v15.16b, v23.16b ; "
1256 "tbx v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
1257 21, 15, 23)
1258GEN_THREEVEC_TEST(
1259 tbx_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
1260 "eor v17.16b, v15.16b, v23.16b ; "
1261 "eor v18.16b, v21.16b, v23.16b ; "
1262 "tbx v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
1263 21, 15, 23)
1264
1265// Same register scheme for tbx .8b, tbx .16b, tbx.8b
1266GEN_THREEVEC_TEST(
1267 tbx_8b_1reg, "tbx v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
1268GEN_THREEVEC_TEST(
1269 tbx_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
1270 "tbx v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
1271GEN_THREEVEC_TEST(
1272 tbx_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
1273 "eor v17.16b, v15.16b, v23.16b ; "
1274 "tbx v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
1275 21, 15, 23)
1276GEN_THREEVEC_TEST(
1277 tbx_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
1278 "eor v17.16b, v15.16b, v23.16b ; "
1279 "eor v18.16b, v21.16b, v23.16b ; "
1280 "tbx v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
1281 21, 15, 23)
1282
sewardj7c58a422014-04-27 12:03:37 +00001283GEN_TWOVEC_TEST(cmge_zero_2d, "cmge v5.2d, v22.2d, #0", 5, 22)
1284GEN_TWOVEC_TEST(cmge_zero_4s, "cmge v5.4s, v22.4s, #0", 5, 22)
1285GEN_TWOVEC_TEST(cmge_zero_2s, "cmge v5.2s, v22.2s, #0", 5, 22)
1286GEN_TWOVEC_TEST(cmge_zero_8h, "cmge v5.8h, v22.8h, #0", 5, 22)
1287GEN_TWOVEC_TEST(cmge_zero_4h, "cmge v5.4h, v22.4h, #0", 5, 22)
1288GEN_TWOVEC_TEST(cmge_zero_16b, "cmge v5.16b, v22.16b, #0", 5, 22)
1289GEN_TWOVEC_TEST(cmge_zero_8b, "cmge v5.8b, v22.8b, #0", 5, 22)
1290
1291GEN_TWOVEC_TEST(cmgt_zero_2d, "cmgt v5.2d, v22.2d, #0", 5, 22)
1292GEN_TWOVEC_TEST(cmgt_zero_4s, "cmgt v5.4s, v22.4s, #0", 5, 22)
1293GEN_TWOVEC_TEST(cmgt_zero_2s, "cmgt v5.2s, v22.2s, #0", 5, 22)
1294GEN_TWOVEC_TEST(cmgt_zero_8h, "cmgt v5.8h, v22.8h, #0", 5, 22)
1295GEN_TWOVEC_TEST(cmgt_zero_4h, "cmgt v5.4h, v22.4h, #0", 5, 22)
1296GEN_TWOVEC_TEST(cmgt_zero_16b, "cmgt v5.16b, v22.16b, #0", 5, 22)
1297GEN_TWOVEC_TEST(cmgt_zero_8b, "cmgt v5.8b, v22.8b, #0", 5, 22)
1298
1299GEN_TWOVEC_TEST(cmle_zero_2d, "cmle v5.2d, v22.2d, #0", 5, 22)
1300GEN_TWOVEC_TEST(cmle_zero_4s, "cmle v5.4s, v22.4s, #0", 5, 22)
1301GEN_TWOVEC_TEST(cmle_zero_2s, "cmle v5.2s, v22.2s, #0", 5, 22)
1302GEN_TWOVEC_TEST(cmle_zero_8h, "cmle v5.8h, v22.8h, #0", 5, 22)
1303GEN_TWOVEC_TEST(cmle_zero_4h, "cmle v5.4h, v22.4h, #0", 5, 22)
1304GEN_TWOVEC_TEST(cmle_zero_16b, "cmle v5.16b, v22.16b, #0", 5, 22)
1305GEN_TWOVEC_TEST(cmle_zero_8b, "cmle v5.8b, v22.8b, #0", 5, 22)
1306
1307GEN_TWOVEC_TEST(cmeq_zero_2d, "cmeq v5.2d, v22.2d, #0", 5, 22)
1308GEN_TWOVEC_TEST(cmeq_zero_4s, "cmeq v5.4s, v22.4s, #0", 5, 22)
1309GEN_TWOVEC_TEST(cmeq_zero_2s, "cmeq v5.2s, v22.2s, #0", 5, 22)
1310GEN_TWOVEC_TEST(cmeq_zero_8h, "cmeq v5.8h, v22.8h, #0", 5, 22)
1311GEN_TWOVEC_TEST(cmeq_zero_4h, "cmeq v5.4h, v22.4h, #0", 5, 22)
1312GEN_TWOVEC_TEST(cmeq_zero_16b, "cmeq v5.16b, v22.16b, #0", 5, 22)
1313GEN_TWOVEC_TEST(cmeq_zero_8b, "cmeq v5.8b, v22.8b, #0", 5, 22)
1314
1315GEN_TWOVEC_TEST(cmlt_zero_2d, "cmlt v5.2d, v22.2d, #0", 5, 22)
1316GEN_TWOVEC_TEST(cmlt_zero_4s, "cmlt v5.4s, v22.4s, #0", 5, 22)
1317GEN_TWOVEC_TEST(cmlt_zero_2s, "cmlt v5.2s, v22.2s, #0", 5, 22)
1318GEN_TWOVEC_TEST(cmlt_zero_8h, "cmlt v5.8h, v22.8h, #0", 5, 22)
1319GEN_TWOVEC_TEST(cmlt_zero_4h, "cmlt v5.4h, v22.4h, #0", 5, 22)
1320GEN_TWOVEC_TEST(cmlt_zero_16b, "cmlt v5.16b, v22.16b, #0", 5, 22)
1321GEN_TWOVEC_TEST(cmlt_zero_8b, "cmlt v5.8b, v22.8b, #0", 5, 22)
1322
sewardj1092f192014-04-03 23:00:45 +00001323
1324
sewardjd89499b2014-03-02 12:48:34 +00001325/* IMPORTANT: keep the tests in here in the same order as the
1326 implementations are in guest_arm64_toIR.c. */
sewardj94f53cb2014-03-01 11:27:18 +00001327int main ( void )
1328{
1329 assert(sizeof(V128) == 16);
1330
sewardjd89499b2014-03-02 12:48:34 +00001331 printf("BEGIN: FMOV (general)\n");
1332 test_fmov_gen_01();
1333 test_fmov_gen_02();
1334 test_fmov_gen_03();
1335 test_fmov_gen_04();
1336 test_fmov_gen_05();
1337 test_fmov_gen_06();
1338 printf("END: FMOV (general)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001339
sewardjd89499b2014-03-02 12:48:34 +00001340 printf("BEGIN: FMOV (scalar, immediate)\n");
1341 test_fmov_scalar_imm_01();
1342 test_fmov_scalar_imm_02();
1343 test_fmov_scalar_imm_03();
1344 test_fmov_scalar_imm_04();
1345 test_fmov_scalar_imm_05();
1346 test_fmov_scalar_imm_06();
1347 printf("END: FMOV (scalar, immediate)\n\n");
1348
1349 printf("BEGIN: {FMOV,MOVI} (vector, immediate)\n");
1350 test_movi_vector_imm_01();
1351 test_movi_vector_imm_02();
1352 test_movi_vector_imm_03();
1353 test_movi_vector_imm_04();
1354 test_movi_vector_imm_05();
1355 test_movi_vector_imm_06();
1356 printf("END: {FMOV,MOVI} (vector, immediate)\n\n");
1357
1358 printf("BEGIN: {S,U}CVTF (scalar, integer)\n");
1359 test_sucvtf_01();
1360 test_sucvtf_02();
1361 test_sucvtf_03();
1362 test_sucvtf_04();
sewardj7c58a422014-04-27 12:03:37 +00001363 test_sucvtf_05();
sewardjd89499b2014-03-02 12:48:34 +00001364 test_sucvtf_06();
1365 test_sucvtf_07();
1366 test_sucvtf_08();
sewardj7c58a422014-04-27 12:03:37 +00001367 printf("END: {S,U}CVTF (scalar, integer)\n\n");
sewardjd89499b2014-03-02 12:48:34 +00001368
1369 printf("BEGIN: F{ADD,SUB,MUL,DIV,NMUL} (scalar)\n");
1370 test_fadd_d();
1371 test_fadd_s();
1372 test_fsub_d();
1373 test_fsub_s();
1374 test_fmul_d();
1375 test_fmul_s();
1376 test_fdiv_d();
1377 test_fdiv_s();
1378 test_fnmul_d();
1379 test_fnmul_s();
1380 printf("END: F{ADD,SUB,MUL,DIV,NMUL} (scalar)\n\n");
1381
1382 printf("BEGIN: F{MOV,ABS,NEG,SQRT} D/D or S/S\n");
1383 test_fmov_d();
1384 test_fmov_s();
1385 test_fabs_d();
1386 test_fabs_s();
1387 test_fneg_d();
1388 test_fneg_s();
1389 test_fsqrt_d();
1390 test_fsqrt_s();
1391 printf("END: F{MOV,ABS,NEG,SQRT} D/D or S/S\n\n");
1392
1393 printf("BEGIN: F{ABS,NEG} (vector)\n");
1394 test_fabs_2d_2d();
sewardj7c58a422014-04-27 12:03:37 +00001395 test_fabs_4s_4s();
1396 test_fabs_2s_2s();
sewardjd89499b2014-03-02 12:48:34 +00001397 test_fneg_2d_2d();
sewardj7c58a422014-04-27 12:03:37 +00001398 test_fneg_4s_4s();
1399 test_fneg_2s_2s();
1400 printf("END: F{ABS,NEG} (vector)\n\n");
sewardjd89499b2014-03-02 12:48:34 +00001401
1402 printf("FCMP,FCMPE MISSING\n\n");
1403
1404 printf("F{N}M{ADD,SUB} MISSING\n\n");
1405
1406 printf("FCVT{N,P,M,Z}{S,U} (scalar, integer) MISSING\n\n");
1407
1408 printf("FRINT{I,M,P,Z} (scalar) MISSING\n\n");
1409
1410 printf("FCVT (scalar) MISSING\n\n");
1411
1412 printf("BEGIN: FABD (scalar) MISSING\n");
1413 test_fabd_d();
1414 test_fabd_s();
1415 printf("END: FABD (scalar) MISSING\n\n");
1416
1417 printf("{S,U}CVTF (vector, integer) MISSING\n\n");
1418
1419 printf("BEGIN: F{ADD,SUB,MUL,DIV,MLA,MLS,ABD} (vector)\n");
1420 test_fadd_2d();
1421 test_fadd_4s();
1422 test_fadd_2s();
1423 test_fsub_2d();
1424 test_fsub_4s();
1425 test_fsub_2s();
1426 test_fmul_2d();
1427 test_fmul_4s();
1428 test_fmul_2s();
1429 test_fdiv_2d();
1430 test_fdiv_4s();
1431 test_fdiv_2s();
1432 test_fmla_2d();
1433 test_fmla_4s();
1434 test_fmla_2s();
1435 test_fmls_2d();
1436 test_fmls_4s();
1437 test_fmls_2s();
1438 test_fabd_2d();
sewardj7c58a422014-04-27 12:03:37 +00001439 test_fabd_4s();
1440 test_fabd_2s();
1441 printf("END: F{ADD,SUB,MUL,DIV,MLA,MLS,ABD} (vector)\n\n");
sewardjd89499b2014-03-02 12:48:34 +00001442
sewardj1092f192014-04-03 23:00:45 +00001443 printf("BEGIN: FCM{EQ,GE,GT}, FAC{GE,GT} (vector)\n");
1444 test_fcmeq_2d();
1445 test_fcmeq_4s();
1446 test_fcmeq_2s();
1447 test_fcmge_2d();
1448 test_fcmge_4s();
1449 test_fcmge_2s();
1450 test_fcmgt_2d();
1451 test_fcmgt_4s();
1452 test_fcmgt_2s();
1453 test_facge_2d();
1454 test_facge_4s();
1455 test_facge_2s();
1456 test_facgt_2d();
1457 test_facgt_4s();
1458 test_facgt_2s();
1459 printf("END: FCM{EQ,GE,GT}, FAC{GE,GT} (vector)\n");
1460
sewardjd89499b2014-03-02 12:48:34 +00001461 printf("BEGIN: FCVTN (MISSING 16F <- 32F cases)\n");
1462 test_fcvtn_01();
1463 test_fcvtn_02();
1464 printf("END: FCVTN (MISSING 16F <- 32F cases)\n\n");
1465
1466 printf("BEGIN: ADD/SUB (vector)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001467 test_add_2d();
1468 test_add_4s();
1469 test_add_2s();
1470 test_add_8h();
1471 test_add_4h();
sewardj1092f192014-04-03 23:00:45 +00001472 test_add_16b();
1473 test_add_8b();
sewardj94f53cb2014-03-01 11:27:18 +00001474 test_sub_2d();
1475 test_sub_4s();
1476 test_sub_2s();
1477 test_sub_8h();
1478 test_sub_4h();
sewardj1092f192014-04-03 23:00:45 +00001479 test_sub_16b();
1480 test_sub_8b();
1481 printf("END: ADD/SUB (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001482
sewardjd89499b2014-03-02 12:48:34 +00001483 printf("BEGIN: ADD/SUB (scalar)\n");
1484 test_add_d_d_d();
1485 test_sub_d_d_d();
1486 printf("END: ADD/SUB (scalar)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001487
sewardjd89499b2014-03-02 12:48:34 +00001488 printf("BEGIN: MUL/PMUL/MLA/MLS (vector)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001489 test_mul_4s();
1490 test_mul_2s();
1491 test_mul_8h();
1492 test_mul_4h();
sewardj7c58a422014-04-27 12:03:37 +00001493 test_mul_16b();
1494 test_mul_8b();
sewardj94f53cb2014-03-01 11:27:18 +00001495 test_mla_4s();
1496 test_mla_2s();
1497 test_mla_8h();
1498 test_mla_4h();
sewardj7c58a422014-04-27 12:03:37 +00001499 test_mla_16b();
1500 test_mla_8b();
sewardj94f53cb2014-03-01 11:27:18 +00001501 test_mls_4s();
1502 test_mls_2s();
1503 test_mls_8h();
1504 test_mls_4h();
sewardj7c58a422014-04-27 12:03:37 +00001505 test_mls_16b();
1506 test_mls_8b();
1507 printf("END: MUL/PMUL/MLA/MLS (vector) (MISSING PMUL)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001508
sewardjd89499b2014-03-02 12:48:34 +00001509 printf("BEGIN: {S,U}{MIN,MAX} (vector)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001510 test_umax_4s();
1511 test_umax_8h();
1512 test_umax_4h();
1513 test_umax_16b();
1514 test_umax_8b();
1515 test_umin_4s();
1516 test_umin_8h();
1517 test_umin_4h();
1518 test_umin_16b();
1519 test_umin_8b();
1520 test_smax_4s();
1521 test_smax_8h();
1522 test_smax_4h();
1523 test_smax_16b();
1524 test_smax_8b();
1525 test_smin_4s();
1526 test_smin_8h();
1527 test_smin_4h();
1528 test_smin_16b();
1529 test_smin_8b();
sewardjd89499b2014-03-02 12:48:34 +00001530 printf("END: {S,U}{MIN,MAX} (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001531
sewardjd89499b2014-03-02 12:48:34 +00001532 printf("BEGIN: {S,U}{MIN,MAX}V\n");
sewardj94f53cb2014-03-01 11:27:18 +00001533 test_UMINV();
1534 test_UMAXV();
1535 test_SMINV();
1536 test_SMAXV();
sewardjd89499b2014-03-02 12:48:34 +00001537 printf("END: {S,U}{MIN,MAX}V\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001538
sewardjd89499b2014-03-02 12:48:34 +00001539 printf("BEGIN: {AND,BIC,ORR,ORN} (vector)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001540 test_and_16b();
1541 test_and_8b();
1542 test_bic_16b();
1543 test_bic_8b();
1544 test_orr_16b();
1545 test_orr_8b();
1546 test_orn_16b();
1547 test_orn_8b();
sewardjd89499b2014-03-02 12:48:34 +00001548 printf("END: {AND,BIC,ORR,ORN} (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001549
sewardjd89499b2014-03-02 12:48:34 +00001550 printf("BEGIN: CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001551 test_cmeq_2d();
sewardj94f53cb2014-03-01 11:27:18 +00001552 test_cmeq_4s();
1553 test_cmeq_2s();
1554 test_cmeq_8h();
1555 test_cmeq_4h();
1556 test_cmeq_16b();
1557 test_cmeq_8b();
1558 test_cmtst_2d();
1559 test_cmtst_4s();
1560 test_cmtst_2s();
1561 test_cmtst_8h();
1562 test_cmtst_4h();
1563 test_cmtst_16b();
1564 test_cmtst_8b();
1565 test_cmhi_2d();
1566 test_cmhi_4s();
1567 test_cmhi_2s();
1568 test_cmhi_8h();
1569 test_cmhi_4h();
1570 test_cmhi_16b();
1571 test_cmhi_8b();
1572 test_cmgt_2d();
1573 test_cmgt_4s();
1574 test_cmgt_2s();
1575 test_cmgt_8h();
1576 test_cmgt_4h();
1577 test_cmgt_16b();
1578 test_cmgt_8b();
1579 test_cmhs_2d();
1580 test_cmhs_4s();
1581 test_cmhs_2s();
1582 test_cmhs_8h();
1583 test_cmhs_4h();
1584 test_cmhs_16b();
1585 test_cmhs_8b();
1586 test_cmge_2d();
1587 test_cmge_4s();
1588 test_cmge_2s();
1589 test_cmge_8h();
1590 test_cmge_4h();
1591 test_cmge_16b();
1592 test_cmge_8b();
sewardj7c58a422014-04-27 12:03:37 +00001593 test_cmge_zero_2d();
1594 test_cmge_zero_4s();
1595 test_cmge_zero_2s();
1596 test_cmge_zero_8h();
1597 test_cmge_zero_4h();
1598 test_cmge_zero_16b();
1599 test_cmge_zero_8b();
1600 test_cmgt_zero_2d();
1601 test_cmgt_zero_4s();
1602 test_cmgt_zero_2s();
1603 test_cmgt_zero_8h();
1604 test_cmgt_zero_4h();
1605 test_cmgt_zero_16b();
1606 test_cmgt_zero_8b();
1607 test_cmle_zero_2d();
1608 test_cmle_zero_4s();
1609 test_cmle_zero_2s();
1610 test_cmle_zero_8h();
1611 test_cmle_zero_4h();
1612 test_cmle_zero_16b();
1613 test_cmle_zero_8b();
1614 test_cmeq_zero_2d();
1615 test_cmeq_zero_4s();
1616 test_cmeq_zero_2s();
1617 test_cmeq_zero_8h();
1618 test_cmeq_zero_4h();
1619 test_cmeq_zero_16b();
1620 test_cmeq_zero_8b();
1621 test_cmlt_zero_2d();
1622 test_cmlt_zero_4s();
1623 test_cmlt_zero_2s();
1624 test_cmlt_zero_8h();
1625 test_cmlt_zero_4h();
1626 test_cmlt_zero_16b();
1627 test_cmlt_zero_8b();
1628 printf("END: CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001629
sewardjd89499b2014-03-02 12:48:34 +00001630 printf("BEGIN: {EOR,BSL,BIT,BIF} (vector)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001631 test_eor_16b();
1632 test_eor_8b();
1633 test_bsl_16b();
1634 test_bsl_8b();
1635 test_bit_16b();
1636 test_bit_8b();
1637 test_bif_16b();
1638 test_bif_8b();
sewardjd89499b2014-03-02 12:48:34 +00001639 printf("END: {EOR,BSL,BIT,BIF} (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001640
sewardjd89499b2014-03-02 12:48:34 +00001641 printf("BEGIN: {USHR,SSHR,SHL} (vector, immediate)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001642 test_ushr_2d_2d_1();
1643 test_ushr_2d_2d_13();
1644 test_ushr_2d_2d_63();
1645 test_sshr_2d_2d_1();
1646 test_sshr_2d_2d_13();
1647 test_sshr_2d_2d_63();
sewardjd89499b2014-03-02 12:48:34 +00001648 test_shl_2d_2d_1();
1649 test_shl_2d_2d_13();
1650 test_shl_2d_2d_63();
1651
sewardj94f53cb2014-03-01 11:27:18 +00001652 test_ushr_4s_4s_1();
1653 test_ushr_4s_4s_13();
1654 test_ushr_4s_4s_31();
1655 test_sshr_4s_4s_1();
1656 test_sshr_4s_4s_13();
1657 test_sshr_4s_4s_31();
sewardjd89499b2014-03-02 12:48:34 +00001658 test_shl_4s_4s_1();
1659 test_shl_4s_4s_13();
1660 test_shl_4s_4s_31();
sewardj7c58a422014-04-27 12:03:37 +00001661
sewardj94f53cb2014-03-01 11:27:18 +00001662 test_ushr_2s_2s_1();
1663 test_ushr_2s_2s_13();
1664 test_ushr_2s_2s_31();
1665 test_sshr_2s_2s_1();
1666 test_sshr_2s_2s_13();
1667 test_sshr_2s_2s_31();
sewardjd89499b2014-03-02 12:48:34 +00001668 test_shl_2s_2s_1();
1669 test_shl_2s_2s_13();
1670 test_shl_2s_2s_31();
sewardj7c58a422014-04-27 12:03:37 +00001671
sewardj94f53cb2014-03-01 11:27:18 +00001672 test_ushr_8h_8h_1();
1673 test_ushr_8h_8h_13();
1674 test_ushr_8h_8h_15();
1675 test_sshr_8h_8h_1();
1676 test_sshr_8h_8h_13();
1677 test_sshr_8h_8h_15();
sewardjd89499b2014-03-02 12:48:34 +00001678 test_shl_8h_8h_1();
1679 test_shl_8h_8h_13();
1680 test_shl_8h_8h_15();
1681
sewardj94f53cb2014-03-01 11:27:18 +00001682 test_ushr_4h_4h_1();
1683 test_ushr_4h_4h_13();
1684 test_ushr_4h_4h_15();
1685 test_sshr_4h_4h_1();
1686 test_sshr_4h_4h_13();
1687 test_sshr_4h_4h_15();
sewardjd89499b2014-03-02 12:48:34 +00001688 test_shl_4h_4h_1();
1689 test_shl_4h_4h_13();
1690 test_shl_4h_4h_15();
1691
sewardj94f53cb2014-03-01 11:27:18 +00001692 test_ushr_16b_16b_1();
1693 test_ushr_16b_16b_7();
1694 test_sshr_16b_16b_1();
1695 test_sshr_16b_16b_7();
sewardjd89499b2014-03-02 12:48:34 +00001696 test_shl_16b_16b_1();
1697 test_shl_16b_16b_7();
1698
sewardj94f53cb2014-03-01 11:27:18 +00001699 test_ushr_8b_8b_1();
1700 test_ushr_8b_8b_7();
1701 test_sshr_8b_8b_1();
1702 test_sshr_8b_8b_7();
sewardjd89499b2014-03-02 12:48:34 +00001703 test_shl_8b_8b_1();
1704 test_shl_8b_8b_7();
sewardj7c58a422014-04-27 12:03:37 +00001705 printf("END: {USHR,SSHR,SHL} (vector, immediate)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001706
sewardjd89499b2014-03-02 12:48:34 +00001707 printf("BEGIN: {U,S}SHLL{,2}\n");
sewardj94f53cb2014-03-01 11:27:18 +00001708 test_ushll_2d_2s_0();
1709 test_ushll_2d_2s_15();
1710 test_ushll_2d_2s_31();
1711 test_ushll2_2d_4s_0();
1712 test_ushll2_2d_4s_15();
1713 test_ushll2_2d_4s_31();
1714 test_sshll_2d_2s_0();
1715 test_sshll_2d_2s_15();
1716 test_sshll_2d_2s_31();
1717 test_sshll2_2d_4s_0();
1718 test_sshll2_2d_4s_15();
1719 test_sshll2_2d_4s_31();
sewardjd89499b2014-03-02 12:48:34 +00001720 printf("END: {U,S}SHLL{,2} (MISSING h_b and s_h versions)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001721
sewardjd89499b2014-03-02 12:48:34 +00001722 printf("BEGIN: XTN{,2}\n");
sewardj94f53cb2014-03-01 11:27:18 +00001723 test_xtn_2s_2d();
1724 test_xtn2_4s_2d();
1725 test_xtn_4h_4s();
1726 test_xtn2_8h_4s();
sewardj01213b32014-03-07 22:54:19 +00001727 test_xtn_8b_8h();
1728 test_xtn2_16b_8h();
1729 printf("END: XTN{,2}\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001730
sewardjd89499b2014-03-02 12:48:34 +00001731 printf("DUP (element, vector) COMPLETELY MISSING\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001732
sewardjd89499b2014-03-02 12:48:34 +00001733 printf("DUP (general, vector) COMPLETELY MISSING\n\n");
1734
1735 printf("BEGIN: {S,U}MOV\n");
1736 test_umov_01();
1737 test_umov_02();
1738 test_umov_03();
1739 test_umov_04();
1740 test_umov_05();
1741 test_umov_06();
1742 test_umov_07();
1743 test_umov_08();
1744 test_smov_01();
1745 test_smov_02();
1746 test_smov_03();
1747 test_smov_04();
1748 test_smov_05();
1749 test_smov_06();
1750 test_smov_07();
1751 test_smov_08();
1752 test_smov_09();
1753 test_smov_10();
1754 printf("END: {S,U}MOV\n\n");
1755
1756 printf("BEGIN: INS (general)\n");
sewardj94f53cb2014-03-01 11:27:18 +00001757 test_INS_general();
sewardjd89499b2014-03-02 12:48:34 +00001758 printf("END: INS (general)\n\n");
1759
1760 printf("BEGIN: NEG (vector)\n");
1761 test_neg_2d_2d();
1762 test_neg_4s_4s();
1763 test_neg_2s_2s();
1764 test_neg_8h_8h();
1765 test_neg_4h_4h();
sewardj7c58a422014-04-27 12:03:37 +00001766 test_neg_16b_16b();
1767 test_neg_8b_8b();
sewardj8edd6252014-04-28 22:12:39 +00001768 printf("END: NEG (vector)\n\n");
sewardj94f53cb2014-03-01 11:27:18 +00001769
sewardj1092f192014-04-03 23:00:45 +00001770 printf("BEGIN: TBL, TBX\n");
1771 test_tbl_16b_1reg();
1772 test_tbl_16b_2reg();
1773 test_tbl_16b_3reg();
1774 test_tbl_16b_4reg();
1775 test_tbl_8b_1reg();
1776 test_tbl_8b_2reg();
1777 test_tbl_8b_3reg();
1778 test_tbl_8b_4reg();
1779 test_tbx_16b_1reg();
1780 test_tbx_16b_2reg();
1781 test_tbx_16b_3reg();
1782 test_tbx_16b_4reg();
1783 test_tbx_8b_1reg();
1784 test_tbx_8b_2reg();
1785 test_tbx_8b_3reg();
1786 test_tbx_8b_4reg();
1787 printf("END: TBL, TBX\n");
1788
sewardj94f53cb2014-03-01 11:27:18 +00001789 return 0;
1790}
sewardj8edd6252014-04-28 22:12:39 +00001791
1792/*
1793 abs d
1794 abs 2d,4s,2s,8h,4h,16b,8b
1795 add d
1796 add 2d,4s,2s,8h,4h,16b,8b
1797 addhn 2s.2d.2d, 4s.2d.2d, h_from_s and b_from_h (add and get high half)
1798 addp d (add pairs, across)
1799 addp 2d,4s,2s,8h,4h,16b,8b
1800 addv 4s,8h,4h,16b,18b (reduce across vector)
1801 aesd 16b (aes single round decryption)
1802 aese 16b (aes single round encryption)
1803 aesimc 16b (aes inverse mix columns)
1804 aesmc 16b (aes mix columns)
1805 and 16b,8b
1806
1807 bic 4s,2s,8h,4h (vector, imm)
1808 also movi, mvni, orr
1809
1810 bic 16b,8b (vector,reg) (bit clear)
1811 bif 16b,8b (vector) (bit insert if false)
1812 bit 16b,8b (vector) (bit insert if true)
1813 bsl 16b,8b (vector) (bit select)
1814
1815 cls 4s,2s,8h,4h,16b,8b (count leading sign bits)
1816 clz 4s,2s,8h,4h,16b,8b (count leading zero bits)
1817
1818 cmeq d
1819 cmeq 2d,4s,2s,8h,4h,16b,8b
1820 cmeq_z d
1821 cmeq_z 2d,4s,2s,8h,4h,16b,8b
1822
1823 cmge d
1824 cmge 2d,4s,2s,8h,4h,16b,8b
1825 cmge_z d
1826 cmge_z 2d,4s,2s,8h,4h,16b,8b
1827
1828 cmgt d
1829 cmgt 2d,4s,2s,8h,4h,16b,8b
1830 cmgt_z d
1831 cmgt_z 2d,4s,2s,8h,4h,16b,8b
1832
1833 cmhi d
1834 cmhi 2d,4s,2s,8h,4h,16b,8b
1835
1836 cmhs d
1837 cmhs 2d,4s,2s,8h,4h,16b,8b
1838
1839 cmle_z d
1840 cmle_z 2d,4s,2s,8h,4h,16b,8b
1841
1842 cmlt_z d
1843 cmlt_z 2d,4s,2s,8h,4h,16b,8b
1844
1845 cmtst d
1846 cmtst 2d,4s,2s,8h,4h,16b,8b
1847
1848 cnt 16b,8b (population count per byte)
1849
1850 dup d,s,h,b (vec elem to scalar)
1851 dup 2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
1852 dup 2d,4s,2s,8h,4h,16b,8b (general reg to vector)
1853
1854 eor 16b,8b (vector)
1855 ext 16b,8b,#imm4 (concat 2 vectors, then slice)
1856
1857 fabd d,s
1858 fabd 2d,4s,2s
1859
1860 fabs d,s
1861 fabs 2d,4s,2s
1862
1863 facge s,d (floating abs compare GE)
1864 facge 2d,4s,2s
1865
1866 facgt s,d (floating abs compare GE)
1867 facgt 2d,4s,2s
1868
1869 fadd d,s
1870 fadd 2d,4s,2s
1871
1872 faddp d,s (floating add pair)
1873 faddp 2d,4s,2s
1874
1875 fccmp d,s (floating point conditional quiet compare)
1876 fccmpe d,s (floating point conditional signaling compare)
1877
1878 fcmeq d,s
1879 fcmeq 2d,4s,2s
1880 fcmeq_z d,s
1881 fcmeq_z 2d,4s,2s
1882
1883 fcmge d,s
1884 fcmge 2d,4s,2s
1885 fcmge_z d,s
1886 fcmge_z 2d,4s,2s
1887
1888 fcmgt d,s
1889 fcmgt 2d,4s,2s
1890 fcmgt_z d,s
1891 fcmgt_z 2d,4s,2s
1892
1893 fcmle_z d,s
1894 fcmle_z 2d,4s,2s
1895
1896 fcmlt_z d,s
1897 fcmlt_z 2d,4s,2s
1898
1899 fcmp d,s (floating point quiet, set flags)
1900 fcmp_z d,s
1901 fcmpe d,s (floating point signaling, set flags)
1902 fcmpe_z d,s
1903
1904 fcsel d,s (fp cond select)
1905
1906 fcvt s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
1907
1908 fcvtas d,s (fcvt to signed int, nearest, ties away)
1909 fcvtas 2d,4s,2s
1910 fcvtas w_s,x_s,w_d,x_d
1911
1912 fcvtau d,s (fcvt to unsigned int, nearest, ties away)
1913 fcvtau 2d,4s,2s
1914 fcvtau w_s,x_s,w_d,x_d
1915
1916 fcvtl{2} 4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
1917
1918 fcvtms d,s (fcvt to signed int, minus inf)
1919 fcvtms 2d,4s,2s
1920 fcvtms w_s,x_s,w_d,x_d
1921
1922 fcvtmu d,s (fcvt to unsigned int, minus inf)
1923 fcvtmu 2d,4s,2s
1924 fcvtmu w_s,x_s,w_d,x_d
1925
1926 fcvtn{2} 4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
1927
1928 fcvtns d,s (fcvt to signed int, nearest)
1929 fcvtns 2d,4s,2s
1930 fcvtns w_s,x_s,w_d,x_d
1931
1932 fcvtnu d,s (fcvt to unsigned int, nearest)
1933 fcvtnu 2d,4s,2s
1934 fcvtnu w_s,x_s,w_d,x_d
1935
1936 fcvtps d,s (fcvt to signed int, plus inf)
1937 fcvtps 2d,4s,2s
1938 fcvtps w_s,x_s,w_d,x_d
1939
1940 fcvtpu d,s (fcvt to unsigned int, plus inf)
1941 fcvtpu 2d,4s,2s
1942 fcvtpu w_s,x_s,w_d,x_d
1943
1944 fcvtxn s_d (fcvt to lower prec narrow, rounding to odd)
1945 fcvtxn 2s_2d,4s_2d
1946
1947 fcvtzs s,d (fcvt to signed fixedpt, to zero) (w/ #fbits)
1948 fcvtzs 2d,4s,2s
1949
1950 fcvtzs s,d (fcvt to signed integer, to zero)
1951 fcvtzs 2d,4s,2s
1952
1953 fcvtzs w_s,x_s,w_d,x_d (fcvt to signed fixedpt, to zero) (w/ #fbits)
1954
1955 fcvtzs w_s,x_s,w_d,x_d (fcvt to signed integer, to zero)
1956
1957 fcvtzu s,d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
1958 fcvtzu 2d,4s,2s
1959
1960 fcvtzu s,d (fcvt to unsigned integer, to zero)
1961 fcvtzu 2d,4s,2s
1962
1963 fcvtzu w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
1964
1965 fcvtzu w_s,x_s,w_d,x_d (fcvt to unsigned integer, to zero)
1966
1967 fdiv d,s
1968 fdiv 2d,4s,2s
1969
1970 fmadd d,s
1971 fnmadd d,s
1972 fnmsub d,s
1973 fnmul d,s
1974
1975 fmax d,s
1976 fmin d,s
1977
1978 fmax 2d,4s,2s
1979 fmin 2d,4s,2s
1980
1981 fmaxnm d,s ("max number")
1982 fminnm d,s
1983
1984 fmaxnm 2d,4s,2s
1985 fminnm 2d,4s,2s
1986
1987 fmaxnmp d_2d,s_2s ("max number pairwise")
1988 fminnmp d_2d,s_2s
1989
1990 fmaxnmp 2d,4s,2s
1991 fminnmp 2d,4s,2s
1992
1993 fmaxnmv s_4s (maxnum across vector)
1994 fminnmv s_4s
1995
1996 fmaxp d_2d,s_2s (max of a pair)
1997 fminp d_2d,s_2s (max of a pair)
1998
1999 fmaxp 2d,4s,2s (max pairwise)
2000 fminp 2d,4s,2s
2001
2002 fmaxv s_4s (max across vector)
2003 fminv s_4s
2004
2005 fmla d_d_d[],s_s_s[] (by element)
2006 fmla 2d_2d_d[],4s_4s_s[],2s_2s_s[]
2007
2008 fmla 2d,4s,2s
2009
2010 fmls d_d_d[],s_s_s[] (by element)
2011 fmls 2d_2d_d[],4s_4s_s[],2s_2s_s[]
2012
2013 fmls 2d,4s,2s
2014
2015 fmov 2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
2016
2017 fmov d_d,s_s
2018
2019 fmov s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
2020
2021 fmov d,s #imm
2022
2023 fmsub d,s
2024
2025 fmul d_d_d[],s_s_s[]
2026 fmul 2d_2d_d[],4s_4s_s[],2s_2s_s[]
2027
2028 fmul 2d,4s,2s
2029 fmul d,s
2030
2031 fmulx d_d_d[],s_s_s[]
2032 fmulx 2d_2d_d[],4s_4s_s[],2s_2s_s[]
2033
2034 fmulx d,s
2035 fmulx 2d,4s,2s
2036
2037 fneg d,s
2038 fneg 2d,4s,2s
2039
2040 frecpe d,s (recip estimate)
2041 frecpe 2d,4s,2s
2042
2043 frecps d,s (recip step)
2044 frecps 2d,4s,2s
2045
2046 frecpx d,s (recip exponent)
2047
2048 frinta 2d,4s,2s (round to integral, nearest away)
2049 frinta d,s
2050
2051 frinti 2d,4s,2s (round to integral, per FPCR)
2052 frinti d,s
2053
2054 frintm 2d,4s,2s (round to integral, minus inf)
2055 frintm d,s
2056
2057 frintn 2d,4s,2s (round to integral, nearest, to even)
2058 frintn d,s
2059
2060 frintp 2d,4s,2s (round to integral, plus inf)
2061 frintp d,s
2062
2063 frintx 2d,4s,2s (round to integral exact, per FPCR)
2064 frintx d,s
2065
2066 frintz 2d,4s,2s (round to integral, zero)
2067 frintz d,s
2068
2069 frsqrte d,s (est)
2070 frsqrte 2d,4s,2s
2071
2072 frsqrts d,s (step)
2073 frsqrts 2d,4s,2s
2074
2075 fsqrt d,s
2076 fsqrt 2d,4s,2s
2077
2078 fsub d,s
2079 fsub 2d,4s,2s
2080
2081 ins d[]_d[],s[]_s[],h[]_h[],b[]_b[]
2082
2083 ins d[]_x, s[]_w, h[]_w, b[]_w
2084
2085 ld1 (multiple 1-element structures to 1/2/3/4 regs)
2086 ld1 (single 1-element structure to one lane of 1 reg)
2087 ld1r (single 1-element structure and rep to all lanes of 1 reg)
2088
2089 ld2 (multiple 2-element structures to 2 regs)
2090 ld2 (single 2-element structure to one lane of 2 regs)
2091 ld2r (single 2-element structure and rep to all lanes of 2 regs)
2092
2093 ld3 (multiple 3-element structures to 3 regs)
2094 ld3 (single 3-element structure to one lane of 3 regs)
2095 ld3r (single 3-element structure and rep to all lanes of 3 regs)
2096
2097 ld4 (multiple 4-element structures to 4 regs)
2098 ld4 (single 4-element structure to one lane of 4 regs)
2099 ld4r (single 4-element structure and rep to all lanes of 4 regs)
2100
2101 ldnp q_q_addr,d_d_addr,s_s_addr (load pair w/ non-temporal hint)
2102 addr = reg + uimm7 * reg_size
2103
2104 ldp q_q_addr,d_d_addr,s_s_addr (load pair)
2105 addr = [Xn|SP],#imm or [Xn|SP,#imm]! or [Xn|SP,#imm]
2106
2107 ldr q,d,s,h,b from addr
2108 addr = [Xn|SP],#imm or [Xn|SP,#imm]! or [Xn|SP,#imm]
2109
2110 ldr q,d,s from pc+#imm19
2111
2112 ldr q,d,s,h,b from addr
2113 addr = [Xn|SP, R <extend> <shift]
2114
2115 ldur q,d,s,h,b from addr
2116 addr = [Xn|SP,#imm] (unscaled offset)
2117
2118 mla 4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
2119 mla 4s,2s,8h,4h,16b,8b
2120
2121 mls 4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
2122 mls 4s,2s,8h,4h,16b,8b
2123
2124 movi 16b,8b #imm8, LSL #0
2125 movi 8h,4h #imm8, LSL #0 or 8
2126 movi 4s,2s #imm8, LSL #0, 8, 16, 24
2127 movi 4s,2s #imm8, MSL #8 or 16
2128 movi d, #imm64
2129 movi 2d, #imm64
2130
2131 mul 4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
2132 mul 4s,2s,8h,4h,16b,8b
2133
2134 mvni 8h,4h #imm8, LSL #0 or 8
2135 mvni 4s,2s #imm8, LSL #0, 8, 16, 24
2136 mvni 4s,2s #imm8, MSL #8 or 16
2137
2138 neg d
2139 neg 2d,4s,2s,8h,4h,16b,8b
2140
2141 not 16b,8b
2142
2143 orn 16b,8b
2144
2145 orr 8h,4h #imm8, LSL #0 or 8
2146 orr 4s,2s #imm8, LSL #0, 8, 16 or 24
2147
2148 orr 16b,8b
2149
2150 pmul 16b,8b
2151
2152 pmull{2} 8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
2153
2154 raddhn{2} 2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
2155
2156 rbit 16b,8b
2157 rev16 16b,8b
2158 rev32 16b,8b,8h,4h
2159 rev64 16b,8b,8h,4h,4s,2s
2160
2161 rshrn{2} 2s/4s_2d, 8h/4h_4s, 2s/4s_2d, #imm in 1 .. elem_bits
2162
2163 rsubhn{2} 2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
2164
2165 saba 16b,8b,8h,4h,4s,2s
2166 sabal{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2167
2168 sabd 16b,8b,8h,4h,4s,2s
2169 sabdl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2170
2171 sadalp 4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
2172
2173 saddl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2174
2175 saddlp 4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
2176
2177 saddlv h_16b/8b, s_8h/4h, d_4s
2178
2179 saddw{2} 8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
2180
2181 scvtf d,s _#fbits
2182 scvtf 2d,4s,2s _#fbits
2183
2184 scvtf d,s
2185 scvtf 2d,4s,2s
2186
2187 scvtf s_w, d_w, s_x, d_x, _#fbits
2188 scvtf s_w, d_w, s_x, d_x
2189
2190 sha1c q_s_4s
2191 sha1h s_s
2192 sha1m q_s_4s
2193 sha1p q_s_4s
2194 sha1su0 4s_4s_4s
2195 sha1su1 4s_4s
2196 sha256h2 q_q_4s
2197 sha256h q_q_4s
2198 sha256su0 4s_4s
2199 sha256su1 4s_4s_4s
2200
2201 shadd 16b,8b,8h,4h,4s,2s
2202
2203 shl d_#imm
2204 shl 16b,8b,8h,4h,4s,2s,2d _#imm
2205
2206 shll{2} 8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
2207
2208 shrn{2} 2s/4s_2d, 8h/4h_4s, 2s/4s_2d, #imm in 1 .. elem_bits
2209
2210 shsub 16b,8b,8h,4h,4s,2s
2211
2212 sli d_#imm
2213 sli 2d,4s,2s,8h,4h,16b,8b _#imm
2214
2215 smax 4s,2s,8h,4h,16b,8b
2216
2217 smaxp 4s,2s,8h,4h,16b,8b
2218
2219 smaxv s_4s,h_8h,h_4h,b_16b,b_8b
2220
2221 smin 4s,2s,8h,4h,16b,8b
2222
2223 sminp 4s,2s,8h,4h,16b,8b
2224
2225 sminv s_4s,h_8h,h_4h,b_16b,b_8b
2226
2227 smlal{2} 2d_2s/4s_s[], 4s_4h/8h_h[]
2228 smlal{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2229
2230 smlsl{2} 2d_2s/4s_s[], 4s_4h/8h_h[]
2231 smlsl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2232
2233 smov w_b[], w_h[], x_b[], x_h[], x_s[]
2234
2235 smull{2} 2d_2s/4s_s[]. 4s_4h/8h_h[]
2236 smull{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2237
2238 sqabs d,s,h,b
2239 sqabs 2d,4s,2s,8h,4h,16b,8b
2240
2241 sqadd d,s,h,b
2242 sqadd 2d,4s,2s,8h,4h,16b,8b
2243
2244 sqdmlal d_s_s[], s_h_h[]
2245 sqdmlal{2} 2d_2s/4s_s[], 4s_4h/8h_h[]
2246
2247 sqdmlal d_s_s, s_h_h
2248 sqdmlal{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
2249
2250 sqdmlsl d_s_s[], s_h_h[]
2251 sqdmlsl{2} 2d_2s/4s_s[], 4s_4h/8h_h[]
2252
2253 sqdmlsl d_s_s, s_h_h
2254 sqdmlsl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
2255
2256 sqdmulh s_s_s[], h_h_h[]
2257 sqdmulh 4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
2258
2259 sqdmulh h,s
2260 sqdmulh 4s,2s,8h,4h
2261
2262 sqdmull d_s_s[], s_h_h[]
2263 sqdmull{2} 2d_2s/4s_s[], 4s_4h/2h_h[]
2264
2265 sqdmull d_s_s,s_h_h
2266 sqdmull{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
2267
2268 sqneg d,s,h,b
2269 sqneg 2d,4s,2s,8h,4h,16b,8b
2270
2271 sqrdmulh s_s_s[], h_h_h[]
2272 sqrdmulh 4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
2273
2274 sqrdmulh h,s
2275 sqrdmulh 4s,2s,8h,4h
2276
2277 sqrshl d,s,h,b
2278 sqrshl 2d,4s,2s,8h,4h,16b,8b
2279
2280 sqrshrn s_d, h_s, b_h #imm
2281 sqrshrn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h, #imm
2282
2283 sqrshrun s_d, h_s, b_h #imm
2284 sqrshrun{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h, #imm
2285
2286 sqshl d,s,h,b _#imm
2287 sqshl 2d,4s,2s,8h,4h,16b,8b _#imm
2288
2289 sqshl d,s,h,b
2290 sqshl 2d,4s,2s,8h,4h,16b,8b
2291
2292 sqshlu d,s,h,b _#imm
2293 sqshlu 2d,4s,2s,8h,4h,16b,8b _#imm
2294
2295 sqshrn s_d, h_s, b_h #imm
2296 sqshrn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h, #imm
2297
2298 sqshrun s_d, h_s, b_h #imm
2299 sqshrun{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h, #imm
2300
2301 sqsub d,s,h,b
2302 sqsub 2d,4s,2s,8h,4h,16b,8b
2303
2304 sqxtn s_d,h_s,b_h
2305 sqxtn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h
2306
2307 sqxtun s_d,h_s,b_h
2308 sqxtun{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h
2309
2310 srhadd 4s,2s,8h,4h,16b,8b
2311
2312 sri d_#imm
2313 sri 2d,4s,2s,8h,4h,16b,8b _#imm
2314
2315 srshl (reg) d
2316 srshl 2d,4s,2s,8h,4h,16b,8b
2317
2318 srshr (imm) d
2319 srshr 2d,4s,2s,8h,4h,16b,8b
2320
2321 srsra (imm) d
2322 srsra 2d,4s,2s,8h,4h,16b,8b
2323
2324 sshl (reg) d
2325 sshl 2d,4s,2s,8h,4h,16b,8b
2326
2327 sshll{2} (imm) 2d_2s/4s 4s_4h/8h, 8h_8b/16b
2328
2329 sshr (imm) d
2330 sshr 2d,4s,2s,8h,4h,16b,8b
2331
2332 ssra (imm) d
2333 ssra 2d,4s,2s,8h,4h,16b,8b
2334
2335 ssubl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2336
2337 ssubw{2} 8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
2338
2339 st1 (multiple 1-element structures from 1/2/3/4 regs)
2340 st1 (single 1-element structure for 1 lane of 1 reg)
2341
2342 st2 (multiple 2-element structures from 2 regs)
2343 st2 (single 2-element structure from 1 lane of 2 regs)
2344
2345 st3 (multiple 3-element structures from 3 regs)
2346 st3 (single 3-element structure from 1 lane of 3 regs)
2347
2348 st4 (multiple 4-element structures from 4 regs)
2349 st4 (single 4-element structure from one lane of 4 regs)
2350
2351 stnp q_q_addr, d_d_addr, s_s_addr
2352 addr = [Xn|SP, #imm]
2353
2354 stp q_q_addr, d_d_addr, s_s_addr
2355 addr = [Xn|SP], #imm or [Xn|SP, #imm]! or [Xn|SP, #imm]
2356
2357 str q,d,s,h,b_addr
2358 addr = [Xn|SP], #simm or [Xn|SP, #simm]! or [Xn|SP, #pimm]
2359
2360 str q,d,s,h,b_addr
2361 addr = [Xn|SP, R <extend> <shift]
2362
2363 stur q,d,s,h,b_addr
2364 addr = [Xn|SP,#imm] (unscaled offset)
2365
2366 sub d
2367 sub 2d,4s,2s,8h,4h,16b,8b
2368
2369 subhn{2} 2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
2370
2371 suqadd d,s,h,b
2372 suqadd 2d,4s,2s,8h,4h,16b,8b
2373
2374 tbl 8b_{16b}_8b, 16b_{16b}_16b
2375 tbl 8b_{16b,16b}_8b, 16b_{16b,16b}_16b
2376 tbl 8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
2377 tbl 8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
2378
2379 tbx 8b_{16b}_8b, 16b_{16b}_16b
2380 tbx 8b_{16b,16b}_8b, 16b_{16b,16b}_16b
2381 tbx 8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
2382 tbx 8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
2383
2384 trn1 2d,4s,2s,8h,4h,16b,8b
2385 trn2 2d,4s,2s,8h,4h,16b,8b
2386
2387 uaba 16b,8b,8h,4h,4s,2s
2388 uabal{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2389
2390 uabd 16b,8b,8h,4h,4s,2s
2391 uabdl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2392
2393 uadalp 4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
2394
2395 uaddl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2396
2397 uaddlp 4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
2398
2399 uaddlv h_16b/8b, s_8h/4h, d_4s
2400
2401 uaddw{2} 8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
2402
2403 ucvtf d,s _#fbits
2404 ucvtf 2d,4s,2s _#fbits
2405
2406 ucvtf d,s
2407 ucvtf 2d,4s,2s
2408
2409 ucvtf s_w, d_w, s_x, d_x, _#fbits
2410 ucvtf s_w, d_w, s_x, d_x
2411
2412 uhadd 16b,8b,8h,4h,4s,2s
2413
2414 uhsub 16b,8b,8h,4h,4s,2s
2415
2416 umax 4s,2s,8h,4h,16b,8b
2417
2418 umaxp 4s,2s,8h,4h,16b,8b
2419
2420 umaxv s_4s,h_8h,h_4h,b_16b,b_8b
2421
2422 umin 4s,2s,8h,4h,16b,8b
2423
2424 uminp 4s,2s,8h,4h,16b,8b
2425
2426 uminv s_4s,h_8h,h_4h,b_16b,b_8b
2427
2428 umlal{2} 2d_2s/4s_s[], 4s_4h/8h_h[]
2429 umlal{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2430
2431 umlsl{2} 2d_2s/4s_s[], 4s_4h/8h_h[]
2432 umlsl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2433
2434 umov w_b[], w_h[], x_b[], x_h[], x_s[]
2435
2436 umull{2} 2d_2s/4s_s[]. 4s_4h/8h_h[]
2437 umull{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2438
2439 uqadd d,s,h,b
2440 uqadd 2d,4s,2s,8h,4h,16b,8b
2441
2442 uqrshl d,s,h,b
2443 uqrshl 2d,4s,2s,8h,4h,16b,8b
2444
2445 uqrshrn s_d, h_s, b_h #imm
2446 uqrshrn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h, #imm
2447
2448 uqshl d,s,h,b _#imm
2449 uqshl 2d,4s,2s,8h,4h,16b,8b _#imm
2450
2451 uqshl d,s,h,b
2452 uqshl 2d,4s,2s,8h,4h,16b,8b
2453
2454 uqshrn s_d, h_s, b_h #imm
2455 uqshrn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h, #imm
2456
2457 uqsub d,s,h,b
2458 uqsub 2d,4s,2s,8h,4h,16b,8b
2459
2460 uqxtn s_d,h_s,b_h
2461 uqxtn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h
2462
2463 urecpe 4s,2s
2464
2465 urhadd 4s,2s,8h,4h,16b,8b
2466
2467 urshl (reg) d
2468 urshl 2d,4s,2s,8h,4h,16b,8b
2469
2470 urshr (imm) d
2471 urshr 2d,4s,2s,8h,4h,16b,8b
2472
2473 ursqrte 4s,2s
2474
2475 ursra (imm) d
2476 ursra 2d,4s,2s,8h,4h,16b,8b
2477
2478 ushl (reg) d
2479 ushl 2d,4s,2s,8h,4h,16b,8b
2480
2481 ushll{2} (imm) 2d_2s/4s 4s_4h/8h, 8h_8b/16b
2482
2483 ushr (imm) d
2484 ushr 2d,4s,2s,8h,4h,16b,8b
2485
2486 usqadd d,s,h,b
2487 usqadd 2d,4s,2s,8h,4h,16b,8b
2488
2489 usra (imm) d
2490 usra 2d,4s,2s,8h,4h,16b,8b
2491
2492 usubl{2} 2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
2493
2494 usubw{2} 8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
2495
2496 uzp1 2d,4s,2s,8h,4h,16b,8b
2497 uzp2 2d,4s,2s,8h,4h,16b,8b
2498
2499 xtn{2} 2s/4s_2d, 4h/8h_4s, 8b/16b_8h
2500
2501 zip1 2d,4s,2s,8h,4h,16b,8b
2502 zip2 2d,4s,2s,8h,4h,16b,8b
2503*/