blob: c659ce0a801edc66b49b181b281f8a4de9e85a25 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001
2/*---------------------------------------------------------------*/
3/*--- begin host_arm64_defs.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex.h"
33#include "libvex_trc_values.h"
34
35#include "main_util.h"
36#include "host_generic_regs.h"
37#include "host_arm64_defs.h"
38
39//ZZ UInt arm_hwcaps = 0;
40
41
42/* --------- Registers. --------- */
43
44/* The usual HReg abstraction. We use the following classes only:
45 X regs (64 bit int)
46 D regs (64 bit float, also used for 32 bit float)
47 Q regs (128 bit vector)
48*/
49
50void ppHRegARM64 ( HReg reg ) {
51 Int r;
52 /* Be generic for all virtual regs. */
53 if (hregIsVirtual(reg)) {
54 ppHReg(reg);
55 return;
56 }
57 /* But specific for real regs. */
58 switch (hregClass(reg)) {
59 case HRcInt64:
60 r = hregNumber(reg);
61 vassert(r >= 0 && r < 31);
62 vex_printf("x%d", r);
63 return;
64 case HRcFlt64:
65 r = hregNumber(reg);
66 vassert(r >= 0 && r < 32);
67 vex_printf("d%d", r);
68 return;
69 case HRcVec128:
70 r = hregNumber(reg);
71 vassert(r >= 0 && r < 32);
72 vex_printf("q%d", r);
73 return;
74 default:
75 vpanic("ppHRegARM64");
76 }
77}
78
79static void ppHRegARM64asSreg ( HReg reg ) {
80 ppHRegARM64(reg);
81 vex_printf("(S-reg)");
82}
83
84HReg hregARM64_X0 ( void ) { return mkHReg(0, HRcInt64, False); }
85HReg hregARM64_X1 ( void ) { return mkHReg(1, HRcInt64, False); }
86HReg hregARM64_X2 ( void ) { return mkHReg(2, HRcInt64, False); }
87HReg hregARM64_X3 ( void ) { return mkHReg(3, HRcInt64, False); }
88HReg hregARM64_X4 ( void ) { return mkHReg(4, HRcInt64, False); }
89HReg hregARM64_X5 ( void ) { return mkHReg(5, HRcInt64, False); }
90HReg hregARM64_X6 ( void ) { return mkHReg(6, HRcInt64, False); }
91HReg hregARM64_X7 ( void ) { return mkHReg(7, HRcInt64, False); }
sewardj0ad37a92014-08-29 21:58:03 +000092HReg hregARM64_X8 ( void ) { return mkHReg(8, HRcInt64, False); }
sewardjbbcf1882014-01-12 12:49:10 +000093HReg hregARM64_X9 ( void ) { return mkHReg(9, HRcInt64, False); }
94HReg hregARM64_X10 ( void ) { return mkHReg(10, HRcInt64, False); }
95HReg hregARM64_X11 ( void ) { return mkHReg(11, HRcInt64, False); }
96HReg hregARM64_X12 ( void ) { return mkHReg(12, HRcInt64, False); }
97HReg hregARM64_X13 ( void ) { return mkHReg(13, HRcInt64, False); }
98HReg hregARM64_X14 ( void ) { return mkHReg(14, HRcInt64, False); }
99HReg hregARM64_X15 ( void ) { return mkHReg(15, HRcInt64, False); }
100HReg hregARM64_X21 ( void ) { return mkHReg(21, HRcInt64, False); }
101HReg hregARM64_X22 ( void ) { return mkHReg(22, HRcInt64, False); }
102HReg hregARM64_X23 ( void ) { return mkHReg(23, HRcInt64, False); }
103HReg hregARM64_X24 ( void ) { return mkHReg(24, HRcInt64, False); }
104HReg hregARM64_X25 ( void ) { return mkHReg(25, HRcInt64, False); }
105HReg hregARM64_X26 ( void ) { return mkHReg(26, HRcInt64, False); }
106HReg hregARM64_X27 ( void ) { return mkHReg(27, HRcInt64, False); }
107HReg hregARM64_X28 ( void ) { return mkHReg(28, HRcInt64, False); }
108
109// Should really use D8 .. D15 for class F64, since they are callee
110// save
111HReg hregARM64_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
112HReg hregARM64_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
113HReg hregARM64_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
114HReg hregARM64_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
115HReg hregARM64_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
116HReg hregARM64_D13 ( void ) { return mkHReg(13, HRcFlt64, False); }
sewardjbbcf1882014-01-12 12:49:10 +0000117HReg hregARM64_Q16 ( void ) { return mkHReg(16, HRcVec128, False); }
118HReg hregARM64_Q17 ( void ) { return mkHReg(17, HRcVec128, False); }
119HReg hregARM64_Q18 ( void ) { return mkHReg(18, HRcVec128, False); }
sewardj76ac4762014-06-20 08:30:21 +0000120HReg hregARM64_Q19 ( void ) { return mkHReg(19, HRcVec128, False); }
121HReg hregARM64_Q20 ( void ) { return mkHReg(20, HRcVec128, False); }
sewardjbbcf1882014-01-12 12:49:10 +0000122
123void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr )
124{
125 Int i = 0;
sewardj76ac4762014-06-20 08:30:21 +0000126 *nregs = 26;
sewardjbbcf1882014-01-12 12:49:10 +0000127 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
128
129 // callee saves ones (22 to 28) are listed first, since we prefer
130 // them if they're available
131 (*arr)[i++] = hregARM64_X22();
132 (*arr)[i++] = hregARM64_X23();
133 (*arr)[i++] = hregARM64_X24();
134 (*arr)[i++] = hregARM64_X25();
135 (*arr)[i++] = hregARM64_X26();
136 (*arr)[i++] = hregARM64_X27();
137 (*arr)[i++] = hregARM64_X28();
138
139 (*arr)[i++] = hregARM64_X0();
140 (*arr)[i++] = hregARM64_X1();
141 (*arr)[i++] = hregARM64_X2();
142 (*arr)[i++] = hregARM64_X3();
143 (*arr)[i++] = hregARM64_X4();
144 (*arr)[i++] = hregARM64_X5();
145 (*arr)[i++] = hregARM64_X6();
146 (*arr)[i++] = hregARM64_X7();
sewardj0ad37a92014-08-29 21:58:03 +0000147 // X8 is used as a ProfInc temporary, not available to regalloc.
sewardjbbcf1882014-01-12 12:49:10 +0000148 // X9 is a chaining/spill temporary, not available to regalloc.
149
150 // Do we really need all these?
151 //(*arr)[i++] = hregARM64_X10();
152 //(*arr)[i++] = hregARM64_X11();
153 //(*arr)[i++] = hregARM64_X12();
154 //(*arr)[i++] = hregARM64_X13();
155 //(*arr)[i++] = hregARM64_X14();
156 //(*arr)[i++] = hregARM64_X15();
157 // X21 is the guest state pointer, not available to regalloc.
158
159 // vector regs. Unfortunately not callee-saved.
160 (*arr)[i++] = hregARM64_Q16();
161 (*arr)[i++] = hregARM64_Q17();
162 (*arr)[i++] = hregARM64_Q18();
sewardj76ac4762014-06-20 08:30:21 +0000163 (*arr)[i++] = hregARM64_Q19();
164 (*arr)[i++] = hregARM64_Q20();
sewardjbbcf1882014-01-12 12:49:10 +0000165
166 // F64 regs, all of which are callee-saved
167 (*arr)[i++] = hregARM64_D8();
168 (*arr)[i++] = hregARM64_D9();
169 (*arr)[i++] = hregARM64_D10();
170 (*arr)[i++] = hregARM64_D11();
171 (*arr)[i++] = hregARM64_D12();
172 (*arr)[i++] = hregARM64_D13();
173
174 // unavail: x21 as GSP
sewardj0ad37a92014-08-29 21:58:03 +0000175 // x8 is used as a ProfInc temporary
sewardjbbcf1882014-01-12 12:49:10 +0000176 // x9 is used as a spill/reload/chaining/call temporary
sewardjbbcf1882014-01-12 12:49:10 +0000177 // x30 as LR
178 // x31 because dealing with the SP-vs-ZR overloading is too
179 // confusing, and we don't need to do so, so let's just avoid
180 // the problem
181 //
182 // Currently, we have 15 allocatable integer registers:
183 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
184 //
185 // Hence for the allocatable integer registers we have:
186 //
187 // callee-saved: 22 23 24 25 26 27 28
188 // caller-saved: 0 1 2 3 4 5 6 7
189 //
190 // If the set of available registers changes or if the e/r status
191 // changes, be sure to re-check/sync the definition of
sewardj0ad37a92014-08-29 21:58:03 +0000192 // getRegUsage for ARM64Instr_Call too.
sewardjbbcf1882014-01-12 12:49:10 +0000193 vassert(i == *nregs);
194}
195
196
sewardjbbcf1882014-01-12 12:49:10 +0000197/* --------- Condition codes, ARM64 encoding. --------- */
198
199static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
200 switch (cond) {
201 case ARM64cc_EQ: return "eq";
202 case ARM64cc_NE: return "ne";
203 case ARM64cc_CS: return "cs";
204 case ARM64cc_CC: return "cc";
205 case ARM64cc_MI: return "mi";
206 case ARM64cc_PL: return "pl";
207 case ARM64cc_VS: return "vs";
208 case ARM64cc_VC: return "vc";
209 case ARM64cc_HI: return "hi";
210 case ARM64cc_LS: return "ls";
211 case ARM64cc_GE: return "ge";
212 case ARM64cc_LT: return "lt";
213 case ARM64cc_GT: return "gt";
214 case ARM64cc_LE: return "le";
215 case ARM64cc_AL: return "al"; // default
216 case ARM64cc_NV: return "nv";
217 default: vpanic("showARM64CondCode");
218 }
219}
220
221
222/* --------- Memory address expressions (amodes). --------- */
223
224ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
225 ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
226 am->tag = ARM64am_RI9;
227 am->ARM64am.RI9.reg = reg;
228 am->ARM64am.RI9.simm9 = simm9;
229 vassert(-256 <= simm9 && simm9 <= 255);
230 return am;
231}
232
233ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
234 ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
235 am->tag = ARM64am_RI12;
236 am->ARM64am.RI12.reg = reg;
237 am->ARM64am.RI12.uimm12 = uimm12;
238 am->ARM64am.RI12.szB = szB;
239 vassert(uimm12 >= 0 && uimm12 <= 4095);
240 switch (szB) {
241 case 1: case 2: case 4: case 8: break;
242 default: vassert(0);
243 }
244 return am;
245}
246
247ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
248 ARM64AMode* am = LibVEX_Alloc(sizeof(ARM64AMode));
249 am->tag = ARM64am_RR;
250 am->ARM64am.RR.base = base;
251 am->ARM64am.RR.index = index;
252 return am;
253}
254
255static void ppARM64AMode ( ARM64AMode* am ) {
256 switch (am->tag) {
257 case ARM64am_RI9:
258 vex_printf("%d(", am->ARM64am.RI9.simm9);
259 ppHRegARM64(am->ARM64am.RI9.reg);
260 vex_printf(")");
261 break;
262 case ARM64am_RI12:
263 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
264 * (UInt)am->ARM64am.RI12.uimm12);
265 ppHRegARM64(am->ARM64am.RI12.reg);
266 vex_printf(")");
267 break;
268 case ARM64am_RR:
269 vex_printf("(");
270 ppHRegARM64(am->ARM64am.RR.base);
271 vex_printf(",");
272 ppHRegARM64(am->ARM64am.RR.index);
273 vex_printf(")");
274 break;
275 default:
276 vassert(0);
277 }
278}
279
280static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
281 switch (am->tag) {
282 case ARM64am_RI9:
283 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
284 return;
285 case ARM64am_RI12:
286 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
287 return;
288 case ARM64am_RR:
289 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
290 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
291 return;
292 default:
293 vpanic("addRegUsage_ARM64Amode");
294 }
295}
296
297static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
298 switch (am->tag) {
299 case ARM64am_RI9:
300 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
301 return;
302 case ARM64am_RI12:
303 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
304 return;
305 case ARM64am_RR:
306 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
307 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
308 return;
309 default:
310 vpanic("mapRegs_ARM64Amode");
311 }
312}
313
314
sewardjbbcf1882014-01-12 12:49:10 +0000315/* --------- Reg or uimm12<<{0,12} operands --------- */
316
317ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
318 ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA));
319 riA->tag = ARM64riA_I12;
320 riA->ARM64riA.I12.imm12 = imm12;
321 riA->ARM64riA.I12.shift = shift;
322 vassert(imm12 < 4096);
323 vassert(shift == 0 || shift == 12);
324 return riA;
325}
326ARM64RIA* ARM64RIA_R ( HReg reg ) {
327 ARM64RIA* riA = LibVEX_Alloc(sizeof(ARM64RIA));
328 riA->tag = ARM64riA_R;
329 riA->ARM64riA.R.reg = reg;
330 return riA;
331}
332
333static void ppARM64RIA ( ARM64RIA* riA ) {
334 switch (riA->tag) {
335 case ARM64riA_I12:
336 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
337 << riA->ARM64riA.I12.shift));
338 break;
339 case ARM64riA_R:
340 ppHRegARM64(riA->ARM64riA.R.reg);
341 break;
342 default:
343 vassert(0);
344 }
345}
346
347static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
348 switch (riA->tag) {
349 case ARM64riA_I12:
350 return;
351 case ARM64riA_R:
352 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
353 return;
354 default:
355 vpanic("addRegUsage_ARM64RIA");
356 }
357}
358
359static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
360 switch (riA->tag) {
361 case ARM64riA_I12:
362 return;
363 case ARM64riA_R:
364 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
365 return;
366 default:
367 vpanic("mapRegs_ARM64RIA");
368 }
369}
370
371
372/* --------- Reg or "bitfield" (logic immediate) operands --------- */
373
374ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
375 ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL));
376 riL->tag = ARM64riL_I13;
377 riL->ARM64riL.I13.bitN = bitN;
378 riL->ARM64riL.I13.immR = immR;
379 riL->ARM64riL.I13.immS = immS;
380 vassert(bitN < 2);
381 vassert(immR < 64);
382 vassert(immS < 64);
383 return riL;
384}
385ARM64RIL* ARM64RIL_R ( HReg reg ) {
386 ARM64RIL* riL = LibVEX_Alloc(sizeof(ARM64RIL));
387 riL->tag = ARM64riL_R;
388 riL->ARM64riL.R.reg = reg;
389 return riL;
390}
391
392static void ppARM64RIL ( ARM64RIL* riL ) {
393 switch (riL->tag) {
394 case ARM64riL_I13:
395 vex_printf("#nrs(%u,%u,%u)",
396 (UInt)riL->ARM64riL.I13.bitN,
397 (UInt)riL->ARM64riL.I13.immR,
398 (UInt)riL->ARM64riL.I13.immS);
399 break;
400 case ARM64riL_R:
401 ppHRegARM64(riL->ARM64riL.R.reg);
402 break;
403 default:
404 vassert(0);
405 }
406}
407
408static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
409 switch (riL->tag) {
410 case ARM64riL_I13:
411 return;
412 case ARM64riL_R:
413 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
414 return;
415 default:
416 vpanic("addRegUsage_ARM64RIL");
417 }
418}
419
420static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
421 switch (riL->tag) {
422 case ARM64riL_I13:
423 return;
424 case ARM64riL_R:
425 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
426 return;
427 default:
428 vpanic("mapRegs_ARM64RIL");
429 }
430}
431
432
433/* --------------- Reg or uimm6 operands --------------- */
434
435ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
436 ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6));
437 ri6->tag = ARM64ri6_I6;
438 ri6->ARM64ri6.I6.imm6 = imm6;
439 vassert(imm6 > 0 && imm6 < 64);
440 return ri6;
441}
442ARM64RI6* ARM64RI6_R ( HReg reg ) {
443 ARM64RI6* ri6 = LibVEX_Alloc(sizeof(ARM64RI6));
444 ri6->tag = ARM64ri6_R;
445 ri6->ARM64ri6.R.reg = reg;
446 return ri6;
447}
448
449static void ppARM64RI6 ( ARM64RI6* ri6 ) {
450 switch (ri6->tag) {
451 case ARM64ri6_I6:
452 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
453 break;
454 case ARM64ri6_R:
455 ppHRegARM64(ri6->ARM64ri6.R.reg);
456 break;
457 default:
458 vassert(0);
459 }
460}
461
462static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
463 switch (ri6->tag) {
464 case ARM64ri6_I6:
465 return;
466 case ARM64ri6_R:
467 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
468 return;
469 default:
470 vpanic("addRegUsage_ARM64RI6");
471 }
472}
473
474static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
475 switch (ri6->tag) {
476 case ARM64ri6_I6:
477 return;
478 case ARM64ri6_R:
479 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
480 return;
481 default:
482 vpanic("mapRegs_ARM64RI6");
483 }
484}
485
486
sewardjbbcf1882014-01-12 12:49:10 +0000487/* --------- Instructions. --------- */
488
489static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
490 switch (op) {
491 case ARM64lo_AND: return "and";
492 case ARM64lo_OR: return "orr";
493 case ARM64lo_XOR: return "eor";
494 default: vpanic("showARM64LogicOp");
495 }
496}
497
498static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
499 switch (op) {
500 case ARM64sh_SHL: return "lsl";
501 case ARM64sh_SHR: return "lsr";
502 case ARM64sh_SAR: return "asr";
503 default: vpanic("showARM64ShiftOp");
504 }
505}
506
507static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
508 switch (op) {
509 case ARM64un_NEG: return "neg";
510 case ARM64un_NOT: return "not";
511 case ARM64un_CLZ: return "clz";
512 default: vpanic("showARM64UnaryOp");
513 }
514}
515
516static const HChar* showARM64MulOp ( ARM64MulOp op ) {
517 switch (op) {
518 case ARM64mul_PLAIN: return "mul ";
519 case ARM64mul_ZX: return "umulh";
520 case ARM64mul_SX: return "smulh";
521 default: vpanic("showARM64MulOp");
522 }
523}
524
525static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
526 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
527 ARM64CvtOp op ) {
528 switch (op) {
529 case ARM64cvt_F32_I32S:
530 *syn = 's'; *fszB = 4; *iszB = 4; break;
531 case ARM64cvt_F64_I32S:
532 *syn = 's'; *fszB = 8; *iszB = 4; break;
533 case ARM64cvt_F32_I64S:
534 *syn = 's'; *fszB = 4; *iszB = 8; break;
535 case ARM64cvt_F64_I64S:
536 *syn = 's'; *fszB = 8; *iszB = 8; break;
537 case ARM64cvt_F32_I32U:
538 *syn = 'u'; *fszB = 4; *iszB = 4; break;
539 case ARM64cvt_F64_I32U:
540 *syn = 'u'; *fszB = 8; *iszB = 4; break;
541 case ARM64cvt_F32_I64U:
542 *syn = 'u'; *fszB = 4; *iszB = 8; break;
543 case ARM64cvt_F64_I64U:
544 *syn = 'u'; *fszB = 8; *iszB = 8; break;
545 default:
546 vpanic("characteriseARM64CvtOp");
547 }
548}
549
550static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
551 switch (op) {
552 case ARM64fpb_ADD: return "add";
553 case ARM64fpb_SUB: return "sub";
554 case ARM64fpb_MUL: return "mul";
555 case ARM64fpb_DIV: return "div";
556 default: vpanic("showARM64FpBinOp");
557 }
558}
559
560static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
561 switch (op) {
562 case ARM64fpu_NEG: return "neg ";
563 case ARM64fpu_ABS: return "abs ";
564 case ARM64fpu_SQRT: return "sqrt ";
565 case ARM64fpu_RINT: return "rinti";
566 default: vpanic("showARM64FpUnaryOp");
567 }
568}
569
sewardj606c4ba2014-01-26 19:11:14 +0000570static void showARM64VecBinOp(/*OUT*/const HChar** nm,
571 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
572 switch (op) {
sewardj12972182014-08-04 08:09:47 +0000573 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
574 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
575 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
576 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
577 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
578 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
579 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
580 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
581 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
582 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
583 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
584 case ARM64vecb_FADD64x2: *nm = "fadd "; *ar = "2d"; return;
585 case ARM64vecb_FSUB64x2: *nm = "fsub "; *ar = "2d"; return;
586 case ARM64vecb_FMUL64x2: *nm = "fmul "; *ar = "2d"; return;
587 case ARM64vecb_FDIV64x2: *nm = "fdiv "; *ar = "2d"; return;
588 case ARM64vecb_FADD32x4: *nm = "fadd "; *ar = "4s"; return;
589 case ARM64vecb_FSUB32x4: *nm = "fsub "; *ar = "4s"; return;
590 case ARM64vecb_FMUL32x4: *nm = "fmul "; *ar = "4s"; return;
591 case ARM64vecb_FDIV32x4: *nm = "fdiv "; *ar = "4s"; return;
sewardj76927e62014-11-17 11:21:21 +0000592 case ARM64vecb_FMAX64x2: *nm = "fmax "; *ar = "2d"; return;
593 case ARM64vecb_FMAX32x4: *nm = "fmax "; *ar = "4s"; return;
594 case ARM64vecb_FMIN64x2: *nm = "fmin "; *ar = "2d"; return;
595 case ARM64vecb_FMIN32x4: *nm = "fmin "; *ar = "4s"; return;
sewardj12972182014-08-04 08:09:47 +0000596 case ARM64vecb_UMAX32x4: *nm = "umax "; *ar = "4s"; return;
597 case ARM64vecb_UMAX16x8: *nm = "umax "; *ar = "8h"; return;
598 case ARM64vecb_UMAX8x16: *nm = "umax "; *ar = "16b"; return;
599 case ARM64vecb_UMIN32x4: *nm = "umin "; *ar = "4s"; return;
600 case ARM64vecb_UMIN16x8: *nm = "umin "; *ar = "8h"; return;
601 case ARM64vecb_UMIN8x16: *nm = "umin "; *ar = "16b"; return;
602 case ARM64vecb_SMAX32x4: *nm = "smax "; *ar = "4s"; return;
603 case ARM64vecb_SMAX16x8: *nm = "smax "; *ar = "8h"; return;
604 case ARM64vecb_SMAX8x16: *nm = "smax "; *ar = "16b"; return;
605 case ARM64vecb_SMIN32x4: *nm = "smin "; *ar = "4s"; return;
606 case ARM64vecb_SMIN16x8: *nm = "smin "; *ar = "8h"; return;
607 case ARM64vecb_SMIN8x16: *nm = "smin "; *ar = "16b"; return;
608 case ARM64vecb_AND: *nm = "and "; *ar = "16b"; return;
609 case ARM64vecb_ORR: *nm = "orr "; *ar = "16b"; return;
610 case ARM64vecb_XOR: *nm = "eor "; *ar = "16b"; return;
611 case ARM64vecb_CMEQ64x2: *nm = "cmeq "; *ar = "2d"; return;
612 case ARM64vecb_CMEQ32x4: *nm = "cmeq "; *ar = "4s"; return;
613 case ARM64vecb_CMEQ16x8: *nm = "cmeq "; *ar = "8h"; return;
614 case ARM64vecb_CMEQ8x16: *nm = "cmeq "; *ar = "16b"; return;
615 case ARM64vecb_CMHI64x2: *nm = "cmhi "; *ar = "2d"; return;
616 case ARM64vecb_CMHI32x4: *nm = "cmhi "; *ar = "4s"; return;
617 case ARM64vecb_CMHI16x8: *nm = "cmhi "; *ar = "8h"; return;
618 case ARM64vecb_CMHI8x16: *nm = "cmhi "; *ar = "16b"; return;
619 case ARM64vecb_CMGT64x2: *nm = "cmgt "; *ar = "2d"; return;
620 case ARM64vecb_CMGT32x4: *nm = "cmgt "; *ar = "4s"; return;
621 case ARM64vecb_CMGT16x8: *nm = "cmgt "; *ar = "8h"; return;
622 case ARM64vecb_CMGT8x16: *nm = "cmgt "; *ar = "16b"; return;
623 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq "; *ar = "2d"; return;
624 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq "; *ar = "4s"; return;
625 case ARM64vecb_FCMGE64x2: *nm = "fcmge "; *ar = "2d"; return;
626 case ARM64vecb_FCMGE32x4: *nm = "fcmge "; *ar = "4s"; return;
627 case ARM64vecb_FCMGT64x2: *nm = "fcmgt "; *ar = "2d"; return;
628 case ARM64vecb_FCMGT32x4: *nm = "fcmgt "; *ar = "4s"; return;
629 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
630 case ARM64vecb_UZP164x2: *nm = "uzp1 "; *ar = "2d"; return;
631 case ARM64vecb_UZP132x4: *nm = "uzp1 "; *ar = "4s"; return;
632 case ARM64vecb_UZP116x8: *nm = "uzp1 "; *ar = "8h"; return;
633 case ARM64vecb_UZP18x16: *nm = "uzp1 "; *ar = "16b"; return;
634 case ARM64vecb_UZP264x2: *nm = "uzp2 "; *ar = "2d"; return;
635 case ARM64vecb_UZP232x4: *nm = "uzp2 "; *ar = "4s"; return;
636 case ARM64vecb_UZP216x8: *nm = "uzp2 "; *ar = "8h"; return;
637 case ARM64vecb_UZP28x16: *nm = "uzp2 "; *ar = "16b"; return;
638 case ARM64vecb_ZIP132x4: *nm = "zip1 "; *ar = "4s"; return;
639 case ARM64vecb_ZIP116x8: *nm = "zip1 "; *ar = "8h"; return;
640 case ARM64vecb_ZIP18x16: *nm = "zip1 "; *ar = "16b"; return;
641 case ARM64vecb_ZIP232x4: *nm = "zip2 "; *ar = "4s"; return;
642 case ARM64vecb_ZIP216x8: *nm = "zip2 "; *ar = "8h"; return;
643 case ARM64vecb_ZIP28x16: *nm = "zip2 "; *ar = "16b"; return;
644 case ARM64vecb_PMUL8x16: *nm = "pmul "; *ar = "16b"; return;
645 case ARM64vecb_PMULL8x8: *nm = "pmull "; *ar = "8hbb"; return;
646 case ARM64vecb_UMULL2DSS: *nm = "umull "; *ar = "2dss"; return;
647 case ARM64vecb_UMULL4SHH: *nm = "umull "; *ar = "4shh"; return;
648 case ARM64vecb_UMULL8HBB: *nm = "umull "; *ar = "8hbb"; return;
649 case ARM64vecb_SMULL2DSS: *nm = "smull "; *ar = "2dss"; return;
650 case ARM64vecb_SMULL4SHH: *nm = "smull "; *ar = "4shh"; return;
651 case ARM64vecb_SMULL8HBB: *nm = "smull "; *ar = "8hbb"; return;
652 case ARM64vecb_SQADD64x2: *nm = "sqadd "; *ar = "2d"; return;
653 case ARM64vecb_SQADD32x4: *nm = "sqadd "; *ar = "4s"; return;
654 case ARM64vecb_SQADD16x8: *nm = "sqadd "; *ar = "8h"; return;
655 case ARM64vecb_SQADD8x16: *nm = "sqadd "; *ar = "16b"; return;
656 case ARM64vecb_UQADD64x2: *nm = "uqadd "; *ar = "2d"; return;
657 case ARM64vecb_UQADD32x4: *nm = "uqadd "; *ar = "4s"; return;
658 case ARM64vecb_UQADD16x8: *nm = "uqadd "; *ar = "8h"; return;
659 case ARM64vecb_UQADD8x16: *nm = "uqadd "; *ar = "16b"; return;
660 case ARM64vecb_SQSUB64x2: *nm = "sqsub "; *ar = "2d"; return;
661 case ARM64vecb_SQSUB32x4: *nm = "sqsub "; *ar = "4s"; return;
662 case ARM64vecb_SQSUB16x8: *nm = "sqsub "; *ar = "8h"; return;
663 case ARM64vecb_SQSUB8x16: *nm = "sqsub "; *ar = "16b"; return;
664 case ARM64vecb_UQSUB64x2: *nm = "uqsub "; *ar = "2d"; return;
665 case ARM64vecb_UQSUB32x4: *nm = "uqsub "; *ar = "4s"; return;
666 case ARM64vecb_UQSUB16x8: *nm = "uqsub "; *ar = "8h"; return;
667 case ARM64vecb_UQSUB8x16: *nm = "uqsub "; *ar = "16b"; return;
668 case ARM64vecb_SQDMULL2DSS: *nm = "sqdmull"; *ar = "2dss"; return;
669 case ARM64vecb_SQDMULL4SHH: *nm = "sqdmull"; *ar = "4shh"; return;
670 case ARM64vecb_SQDMULH32x4: *nm = "sqdmulh"; *ar = "4s"; return;
671 case ARM64vecb_SQDMULH16x8: *nm = "sqdmulh"; *ar = "8h"; return;
672 case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh"; *ar = "4s"; return;
673 case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh"; *ar = "8h"; return;
674 case ARM64vecb_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
675 case ARM64vecb_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
676 case ARM64vecb_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
677 case ARM64vecb_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
678 case ARM64vecb_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
679 case ARM64vecb_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
680 case ARM64vecb_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
681 case ARM64vecb_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
682 case ARM64vecb_SQRSHL64x2: *nm = "sqrshl"; *ar = "2d"; return;
683 case ARM64vecb_SQRSHL32x4: *nm = "sqrshl"; *ar = "4s"; return;
684 case ARM64vecb_SQRSHL16x8: *nm = "sqrshl"; *ar = "8h"; return;
685 case ARM64vecb_SQRSHL8x16: *nm = "sqrshl"; *ar = "16b"; return;
686 case ARM64vecb_UQRSHL64x2: *nm = "uqrshl"; *ar = "2d"; return;
687 case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
688 case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
689 case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
sewardja6b61f02014-08-17 18:32:14 +0000690 case ARM64vecb_SSHL64x2: *nm = "sshl"; *ar = "2d"; return;
691 case ARM64vecb_SSHL32x4: *nm = "sshl"; *ar = "4s"; return;
692 case ARM64vecb_SSHL16x8: *nm = "sshl"; *ar = "8h"; return;
693 case ARM64vecb_SSHL8x16: *nm = "sshl"; *ar = "16b"; return;
694 case ARM64vecb_USHL64x2: *nm = "ushl"; *ar = "2d"; return;
695 case ARM64vecb_USHL32x4: *nm = "ushl"; *ar = "4s"; return;
696 case ARM64vecb_USHL16x8: *nm = "ushl"; *ar = "8h"; return;
697 case ARM64vecb_USHL8x16: *nm = "ushl"; *ar = "16b"; return;
698 case ARM64vecb_SRSHL64x2: *nm = "srshl"; *ar = "2d"; return;
699 case ARM64vecb_SRSHL32x4: *nm = "srshl"; *ar = "4s"; return;
700 case ARM64vecb_SRSHL16x8: *nm = "srshl"; *ar = "8h"; return;
701 case ARM64vecb_SRSHL8x16: *nm = "srshl"; *ar = "16b"; return;
702 case ARM64vecb_URSHL64x2: *nm = "urshl"; *ar = "2d"; return;
703 case ARM64vecb_URSHL32x4: *nm = "urshl"; *ar = "4s"; return;
704 case ARM64vecb_URSHL16x8: *nm = "urshl"; *ar = "8h"; return;
705 case ARM64vecb_URSHL8x16: *nm = "urshl"; *ar = "16b"; return;
sewardj606c4ba2014-01-26 19:11:14 +0000706 default: vpanic("showARM64VecBinOp");
707 }
708}
709
sewardjf7003bc2014-08-18 12:28:02 +0000710static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
711 /*OUT*/const HChar** ar,
712 ARM64VecModifyOp op ) {
713 switch (op) {
714 case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
715 case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
716 case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
717 case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
718 case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
719 case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
720 case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
721 case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
722 default: vpanic("showARM64VecModifyOp");
723 }
724}
725
sewardjfab09142014-02-10 10:28:13 +0000726static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
727 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
728{
729 switch (op) {
sewardjfc261d92014-08-24 20:36:14 +0000730 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
731 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
732 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
733 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
734 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
735 case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
736 case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
737 case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
738 case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
739 case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
740 case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
741 case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
742 case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
743 case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
744 case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
745 case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
746 case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
747 case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
748 case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
749 case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
750 case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
751 case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
752 case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
753 case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
754 case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
sewardje520bb32014-02-17 11:00:53 +0000755 default: vpanic("showARM64VecUnaryOp");
756 }
757}
758
sewardja6b61f02014-08-17 18:32:14 +0000759static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
760 /*OUT*/const HChar** ar,
761 ARM64VecShiftImmOp op )
sewardje520bb32014-02-17 11:00:53 +0000762{
763 switch (op) {
sewardja6b61f02014-08-17 18:32:14 +0000764 case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
765 case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
766 case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
767 case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
768 case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
769 case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
770 case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
771 case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
772 case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
773 case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
774 case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
775 case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
776 case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
777 case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
778 case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
779 case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
780 case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
781 case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
782 case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
783 case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
784 case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
785 case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
786 case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
787 case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
788 case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
789 case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
790 case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
791 case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
792 case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
793 case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
794 case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
795 case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
796 case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
797 case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
798 case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
799 case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
800 case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
801 case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
802 case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
803 case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
804 case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
805 case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
806 default: vpanic("showARM64VecShiftImmOp");
sewardjecedd982014-08-11 14:02:47 +0000807 }
808}
809
810static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
811 switch (op) {
812 case ARM64vecna_XTN: return "xtn ";
813 case ARM64vecna_SQXTN: return "sqxtn ";
814 case ARM64vecna_UQXTN: return "uqxtn ";
815 case ARM64vecna_SQXTUN: return "sqxtun";
816 default: vpanic("showARM64VecNarrowOp");
sewardjfab09142014-02-10 10:28:13 +0000817 }
818}
819
sewardjbbcf1882014-01-12 12:49:10 +0000820ARM64Instr* ARM64Instr_Arith ( HReg dst,
821 HReg argL, ARM64RIA* argR, Bool isAdd ) {
822 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
823 i->tag = ARM64in_Arith;
824 i->ARM64in.Arith.dst = dst;
825 i->ARM64in.Arith.argL = argL;
826 i->ARM64in.Arith.argR = argR;
827 i->ARM64in.Arith.isAdd = isAdd;
828 return i;
829}
830ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
831 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
832 i->tag = ARM64in_Cmp;
833 i->ARM64in.Cmp.argL = argL;
834 i->ARM64in.Cmp.argR = argR;
835 i->ARM64in.Cmp.is64 = is64;
836 return i;
837}
838ARM64Instr* ARM64Instr_Logic ( HReg dst,
839 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
840 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
841 i->tag = ARM64in_Logic;
842 i->ARM64in.Logic.dst = dst;
843 i->ARM64in.Logic.argL = argL;
844 i->ARM64in.Logic.argR = argR;
845 i->ARM64in.Logic.op = op;
846 return i;
847}
848ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
849 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
850 i->tag = ARM64in_Test;
851 i->ARM64in.Test.argL = argL;
852 i->ARM64in.Test.argR = argR;
853 return i;
854}
855ARM64Instr* ARM64Instr_Shift ( HReg dst,
856 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
857 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
858 i->tag = ARM64in_Shift;
859 i->ARM64in.Shift.dst = dst;
860 i->ARM64in.Shift.argL = argL;
861 i->ARM64in.Shift.argR = argR;
862 i->ARM64in.Shift.op = op;
863 return i;
864}
865ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
866 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
867 i->tag = ARM64in_Unary;
868 i->ARM64in.Unary.dst = dst;
869 i->ARM64in.Unary.src = src;
870 i->ARM64in.Unary.op = op;
871 return i;
872}
873ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
874 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
875 i->tag = ARM64in_MovI;
876 i->ARM64in.MovI.dst = dst;
877 i->ARM64in.MovI.src = src;
878 vassert(hregClass(src) == HRcInt64);
879 vassert(hregClass(dst) == HRcInt64);
880 return i;
881}
882ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
883 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
884 i->tag = ARM64in_Imm64;
885 i->ARM64in.Imm64.dst = dst;
886 i->ARM64in.Imm64.imm64 = imm64;
887 return i;
888}
889ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
890 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
891 i->tag = ARM64in_LdSt64;
892 i->ARM64in.LdSt64.isLoad = isLoad;
893 i->ARM64in.LdSt64.rD = rD;
894 i->ARM64in.LdSt64.amode = amode;
895 return i;
896}
897ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
898 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
899 i->tag = ARM64in_LdSt32;
900 i->ARM64in.LdSt32.isLoad = isLoad;
901 i->ARM64in.LdSt32.rD = rD;
902 i->ARM64in.LdSt32.amode = amode;
903 return i;
904}
905ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
906 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
907 i->tag = ARM64in_LdSt16;
908 i->ARM64in.LdSt16.isLoad = isLoad;
909 i->ARM64in.LdSt16.rD = rD;
910 i->ARM64in.LdSt16.amode = amode;
911 return i;
912}
913ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
914 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
915 i->tag = ARM64in_LdSt8;
916 i->ARM64in.LdSt8.isLoad = isLoad;
917 i->ARM64in.LdSt8.rD = rD;
918 i->ARM64in.LdSt8.amode = amode;
919 return i;
920}
921ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
922 ARM64CondCode cond, Bool toFastEP ) {
923 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
924 i->tag = ARM64in_XDirect;
925 i->ARM64in.XDirect.dstGA = dstGA;
926 i->ARM64in.XDirect.amPC = amPC;
927 i->ARM64in.XDirect.cond = cond;
928 i->ARM64in.XDirect.toFastEP = toFastEP;
929 return i;
930}
931ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
932 ARM64CondCode cond ) {
933 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
934 i->tag = ARM64in_XIndir;
935 i->ARM64in.XIndir.dstGA = dstGA;
936 i->ARM64in.XIndir.amPC = amPC;
937 i->ARM64in.XIndir.cond = cond;
938 return i;
939}
940ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
941 ARM64CondCode cond, IRJumpKind jk ) {
942 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
943 i->tag = ARM64in_XAssisted;
944 i->ARM64in.XAssisted.dstGA = dstGA;
945 i->ARM64in.XAssisted.amPC = amPC;
946 i->ARM64in.XAssisted.cond = cond;
947 i->ARM64in.XAssisted.jk = jk;
948 return i;
949}
950ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
951 ARM64CondCode cond ) {
952 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
953 i->tag = ARM64in_CSel;
954 i->ARM64in.CSel.dst = dst;
955 i->ARM64in.CSel.argL = argL;
956 i->ARM64in.CSel.argR = argR;
957 i->ARM64in.CSel.cond = cond;
958 return i;
959}
960ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, HWord target, Int nArgRegs,
961 RetLoc rloc ) {
962 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
963 i->tag = ARM64in_Call;
964 i->ARM64in.Call.cond = cond;
965 i->ARM64in.Call.target = target;
966 i->ARM64in.Call.nArgRegs = nArgRegs;
967 i->ARM64in.Call.rloc = rloc;
968 vassert(is_sane_RetLoc(rloc));
969 return i;
970}
971extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
972 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
973 i->tag = ARM64in_AddToSP;
974 i->ARM64in.AddToSP.simm = simm;
975 vassert(-4096 < simm && simm < 4096);
976 vassert(0 == (simm & 0xF));
977 return i;
978}
979extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
980 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
981 i->tag = ARM64in_FromSP;
982 i->ARM64in.FromSP.dst = dst;
983 return i;
984}
985ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
986 ARM64MulOp op ) {
987 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
988 i->tag = ARM64in_Mul;
989 i->ARM64in.Mul.dst = dst;
990 i->ARM64in.Mul.argL = argL;
991 i->ARM64in.Mul.argR = argR;
992 i->ARM64in.Mul.op = op;
993 return i;
994}
sewardj7d009132014-02-20 17:43:38 +0000995ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
996 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
997 i->tag = ARM64in_LdrEX;
998 i->ARM64in.LdrEX.szB = szB;
999 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1000 return i;
1001}
1002ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1003 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1004 i->tag = ARM64in_StrEX;
1005 i->ARM64in.StrEX.szB = szB;
1006 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1007 return i;
1008}
1009ARM64Instr* ARM64Instr_MFence ( void ) {
1010 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1011 i->tag = ARM64in_MFence;
1012 return i;
1013}
sewardjbbcf1882014-01-12 12:49:10 +00001014ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1015 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1016 i->tag = ARM64in_VLdStS;
1017 i->ARM64in.VLdStS.isLoad = isLoad;
1018 i->ARM64in.VLdStS.sD = sD;
1019 i->ARM64in.VLdStS.rN = rN;
1020 i->ARM64in.VLdStS.uimm12 = uimm12;
1021 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1022 return i;
1023}
1024ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1025 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1026 i->tag = ARM64in_VLdStD;
1027 i->ARM64in.VLdStD.isLoad = isLoad;
1028 i->ARM64in.VLdStD.dD = dD;
1029 i->ARM64in.VLdStD.rN = rN;
1030 i->ARM64in.VLdStD.uimm12 = uimm12;
1031 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1032 return i;
1033}
1034ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1035 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1036 i->tag = ARM64in_VLdStQ;
1037 i->ARM64in.VLdStQ.isLoad = isLoad;
1038 i->ARM64in.VLdStQ.rQ = rQ;
1039 i->ARM64in.VLdStQ.rN = rN;
1040 return i;
1041}
1042ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1043 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1044 i->tag = ARM64in_VCvtI2F;
1045 i->ARM64in.VCvtI2F.how = how;
1046 i->ARM64in.VCvtI2F.rD = rD;
1047 i->ARM64in.VCvtI2F.rS = rS;
1048 return i;
1049}
1050ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1051 UChar armRM ) {
1052 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1053 i->tag = ARM64in_VCvtF2I;
1054 i->ARM64in.VCvtF2I.how = how;
1055 i->ARM64in.VCvtF2I.rD = rD;
1056 i->ARM64in.VCvtF2I.rS = rS;
1057 i->ARM64in.VCvtF2I.armRM = armRM;
1058 vassert(armRM <= 3);
1059 return i;
1060}
1061ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1062 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1063 i->tag = ARM64in_VCvtSD;
1064 i->ARM64in.VCvtSD.sToD = sToD;
1065 i->ARM64in.VCvtSD.dst = dst;
1066 i->ARM64in.VCvtSD.src = src;
1067 return i;
1068}
1069ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1070 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1071 i->tag = ARM64in_VUnaryD;
1072 i->ARM64in.VUnaryD.op = op;
1073 i->ARM64in.VUnaryD.dst = dst;
1074 i->ARM64in.VUnaryD.src = src;
1075 return i;
1076}
1077ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1078 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1079 i->tag = ARM64in_VUnaryS;
1080 i->ARM64in.VUnaryS.op = op;
1081 i->ARM64in.VUnaryS.dst = dst;
1082 i->ARM64in.VUnaryS.src = src;
1083 return i;
1084}
1085ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1086 HReg dst, HReg argL, HReg argR ) {
1087 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1088 i->tag = ARM64in_VBinD;
1089 i->ARM64in.VBinD.op = op;
1090 i->ARM64in.VBinD.dst = dst;
1091 i->ARM64in.VBinD.argL = argL;
1092 i->ARM64in.VBinD.argR = argR;
1093 return i;
1094}
1095ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1096 HReg dst, HReg argL, HReg argR ) {
1097 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1098 i->tag = ARM64in_VBinS;
1099 i->ARM64in.VBinS.op = op;
1100 i->ARM64in.VBinS.dst = dst;
1101 i->ARM64in.VBinS.argL = argL;
1102 i->ARM64in.VBinS.argR = argR;
1103 return i;
1104}
1105ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1106 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1107 i->tag = ARM64in_VCmpD;
1108 i->ARM64in.VCmpD.argL = argL;
1109 i->ARM64in.VCmpD.argR = argR;
1110 return i;
1111}
1112ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1113 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1114 i->tag = ARM64in_VCmpS;
1115 i->ARM64in.VCmpS.argL = argL;
1116 i->ARM64in.VCmpS.argR = argR;
1117 return i;
1118}
sewardje23ec112014-11-15 16:07:14 +00001119ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1120 ARM64CondCode cond, Bool isD ) {
1121 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1122 i->tag = ARM64in_VFCSel;
1123 i->ARM64in.VFCSel.dst = dst;
1124 i->ARM64in.VFCSel.argL = argL;
1125 i->ARM64in.VFCSel.argR = argR;
1126 i->ARM64in.VFCSel.cond = cond;
1127 i->ARM64in.VFCSel.isD = isD;
1128 return i;
1129}
sewardjbbcf1882014-01-12 12:49:10 +00001130ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1131 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1132 i->tag = ARM64in_FPCR;
1133 i->ARM64in.FPCR.toFPCR = toFPCR;
1134 i->ARM64in.FPCR.iReg = iReg;
1135 return i;
1136}
sewardj12972182014-08-04 08:09:47 +00001137ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1138 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1139 i->tag = ARM64in_FPSR;
1140 i->ARM64in.FPSR.toFPSR = toFPSR;
1141 i->ARM64in.FPSR.iReg = iReg;
1142 return i;
1143}
sewardj606c4ba2014-01-26 19:11:14 +00001144ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1145 HReg dst, HReg argL, HReg argR ) {
1146 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1147 i->tag = ARM64in_VBinV;
1148 i->ARM64in.VBinV.op = op;
1149 i->ARM64in.VBinV.dst = dst;
1150 i->ARM64in.VBinV.argL = argL;
1151 i->ARM64in.VBinV.argR = argR;
1152 return i;
1153}
sewardjf7003bc2014-08-18 12:28:02 +00001154ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1155 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1156 i->tag = ARM64in_VModifyV;
1157 i->ARM64in.VModifyV.op = op;
1158 i->ARM64in.VModifyV.mod = mod;
1159 i->ARM64in.VModifyV.arg = arg;
1160 return i;
1161}
sewardjfab09142014-02-10 10:28:13 +00001162ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1163 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1164 i->tag = ARM64in_VUnaryV;
1165 i->ARM64in.VUnaryV.op = op;
1166 i->ARM64in.VUnaryV.dst = dst;
1167 i->ARM64in.VUnaryV.arg = arg;
1168 return i;
1169}
sewardjecedd982014-08-11 14:02:47 +00001170ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1171 UInt dszBlg2, HReg dst, HReg src ) {
sewardj606c4ba2014-01-26 19:11:14 +00001172 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1173 i->tag = ARM64in_VNarrowV;
sewardjecedd982014-08-11 14:02:47 +00001174 i->ARM64in.VNarrowV.op = op;
sewardj606c4ba2014-01-26 19:11:14 +00001175 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1176 i->ARM64in.VNarrowV.dst = dst;
1177 i->ARM64in.VNarrowV.src = src;
1178 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1179 return i;
1180}
sewardja6b61f02014-08-17 18:32:14 +00001181ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
sewardje520bb32014-02-17 11:00:53 +00001182 HReg dst, HReg src, UInt amt ) {
1183 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1184 i->tag = ARM64in_VShiftImmV;
1185 i->ARM64in.VShiftImmV.op = op;
1186 i->ARM64in.VShiftImmV.dst = dst;
1187 i->ARM64in.VShiftImmV.src = src;
1188 i->ARM64in.VShiftImmV.amt = amt;
sewardja97dddf2014-08-14 22:26:52 +00001189 UInt minSh = 0;
sewardje520bb32014-02-17 11:00:53 +00001190 UInt maxSh = 0;
1191 switch (op) {
sewardja97dddf2014-08-14 22:26:52 +00001192 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1193 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1194 */
sewardja6b61f02014-08-17 18:32:14 +00001195 case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1196 case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1197 case ARM64vecshi_SQSHRUN2SD:
1198 case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1199 case ARM64vecshi_SQRSHRUN2SD:
sewardja97dddf2014-08-14 22:26:52 +00001200 minSh = 1; maxSh = 64; break;
sewardja6b61f02014-08-17 18:32:14 +00001201 case ARM64vecshi_SHL64x2:
1202 case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1203 case ARM64vecshi_SQSHLU64x2:
sewardja97dddf2014-08-14 22:26:52 +00001204 minSh = 0; maxSh = 63; break;
sewardja6b61f02014-08-17 18:32:14 +00001205 case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1206 case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1207 case ARM64vecshi_SQSHRUN4HS:
1208 case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1209 case ARM64vecshi_SQRSHRUN4HS:
sewardja97dddf2014-08-14 22:26:52 +00001210 minSh = 1; maxSh = 32; break;
sewardja6b61f02014-08-17 18:32:14 +00001211 case ARM64vecshi_SHL32x4:
1212 case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1213 case ARM64vecshi_SQSHLU32x4:
sewardja97dddf2014-08-14 22:26:52 +00001214 minSh = 0; maxSh = 31; break;
sewardja6b61f02014-08-17 18:32:14 +00001215 case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1216 case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1217 case ARM64vecshi_SQSHRUN8BH:
1218 case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1219 case ARM64vecshi_SQRSHRUN8BH:
sewardja97dddf2014-08-14 22:26:52 +00001220 minSh = 1; maxSh = 16; break;
sewardja6b61f02014-08-17 18:32:14 +00001221 case ARM64vecshi_SHL16x8:
1222 case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1223 case ARM64vecshi_SQSHLU16x8:
sewardja97dddf2014-08-14 22:26:52 +00001224 minSh = 0; maxSh = 15; break;
sewardja6b61f02014-08-17 18:32:14 +00001225 case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
sewardja97dddf2014-08-14 22:26:52 +00001226 minSh = 1; maxSh = 8; break;
sewardja6b61f02014-08-17 18:32:14 +00001227 case ARM64vecshi_SHL8x16:
1228 case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1229 case ARM64vecshi_SQSHLU8x16:
sewardja97dddf2014-08-14 22:26:52 +00001230 minSh = 0; maxSh = 7; break;
sewardje520bb32014-02-17 11:00:53 +00001231 default:
1232 vassert(0);
1233 }
1234 vassert(maxSh > 0);
sewardja97dddf2014-08-14 22:26:52 +00001235 vassert(amt >= minSh && amt <= maxSh);
sewardje520bb32014-02-17 11:00:53 +00001236 return i;
1237}
sewardjab33a7a2014-06-19 22:20:47 +00001238ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1239 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1240 i->tag = ARM64in_VExtV;
1241 i->ARM64in.VExtV.dst = dst;
1242 i->ARM64in.VExtV.srcLo = srcLo;
1243 i->ARM64in.VExtV.srcHi = srcHi;
1244 i->ARM64in.VExtV.amtB = amtB;
1245 vassert(amtB >= 1 && amtB <= 15);
1246 return i;
1247}
sewardjbbcf1882014-01-12 12:49:10 +00001248ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1249 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1250 i->tag = ARM64in_VImmQ;
1251 i->ARM64in.VImmQ.rQ = rQ;
1252 i->ARM64in.VImmQ.imm = imm;
sewardj208a7762014-10-22 13:52:51 +00001253 /* Check that this is something that can actually be emitted. */
1254 switch (imm) {
1255 case 0x0000: case 0x0001: case 0x0003:
1256 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1257 break;
1258 default:
1259 vassert(0);
1260 }
sewardjbbcf1882014-01-12 12:49:10 +00001261 return i;
1262}
1263ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1264 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1265 i->tag = ARM64in_VDfromX;
1266 i->ARM64in.VDfromX.rD = rD;
1267 i->ARM64in.VDfromX.rX = rX;
1268 return i;
1269}
sewardj12972182014-08-04 08:09:47 +00001270ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1271 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1272 i->tag = ARM64in_VQfromX;
1273 i->ARM64in.VQfromX.rQ = rQ;
1274 i->ARM64in.VQfromX.rXlo = rXlo;
1275 return i;
1276}
sewardjbbcf1882014-01-12 12:49:10 +00001277ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1278 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1279 i->tag = ARM64in_VQfromXX;
1280 i->ARM64in.VQfromXX.rQ = rQ;
1281 i->ARM64in.VQfromXX.rXhi = rXhi;
1282 i->ARM64in.VQfromXX.rXlo = rXlo;
1283 return i;
1284}
1285ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1286 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1287 i->tag = ARM64in_VXfromQ;
1288 i->ARM64in.VXfromQ.rX = rX;
1289 i->ARM64in.VXfromQ.rQ = rQ;
1290 i->ARM64in.VXfromQ.laneNo = laneNo;
1291 vassert(laneNo <= 1);
1292 return i;
1293}
sewardj85fbb022014-06-12 13:16:01 +00001294ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1295 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1296 i->tag = ARM64in_VXfromDorS;
1297 i->ARM64in.VXfromDorS.rX = rX;
1298 i->ARM64in.VXfromDorS.rDorS = rDorS;
1299 i->ARM64in.VXfromDorS.fromD = fromD;
1300 return i;
1301}
sewardjbbcf1882014-01-12 12:49:10 +00001302ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1303 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1304 i->tag = ARM64in_VMov;
1305 i->ARM64in.VMov.szB = szB;
1306 i->ARM64in.VMov.dst = dst;
1307 i->ARM64in.VMov.src = src;
1308 switch (szB) {
1309 case 16:
1310 vassert(hregClass(src) == HRcVec128);
1311 vassert(hregClass(dst) == HRcVec128);
1312 break;
1313 case 8:
1314 vassert(hregClass(src) == HRcFlt64);
1315 vassert(hregClass(dst) == HRcFlt64);
1316 break;
1317 default:
1318 vpanic("ARM64Instr_VMov");
1319 }
1320 return i;
1321}
sewardjbbcf1882014-01-12 12:49:10 +00001322ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1323 ARM64AMode* amFailAddr ) {
1324 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1325 i->tag = ARM64in_EvCheck;
1326 i->ARM64in.EvCheck.amCounter = amCounter;
1327 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1328 return i;
1329}
sewardj0ad37a92014-08-29 21:58:03 +00001330ARM64Instr* ARM64Instr_ProfInc ( void ) {
1331 ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
1332 i->tag = ARM64in_ProfInc;
1333 return i;
1334}
sewardjbbcf1882014-01-12 12:49:10 +00001335
sewardjbbcf1882014-01-12 12:49:10 +00001336/* ... */
1337
floriand8c64e02014-10-08 08:54:44 +00001338void ppARM64Instr ( const ARM64Instr* i ) {
sewardjbbcf1882014-01-12 12:49:10 +00001339 switch (i->tag) {
1340 case ARM64in_Arith:
1341 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1342 ppHRegARM64(i->ARM64in.Arith.dst);
1343 vex_printf(", ");
1344 ppHRegARM64(i->ARM64in.Arith.argL);
1345 vex_printf(", ");
1346 ppARM64RIA(i->ARM64in.Arith.argR);
1347 return;
1348 case ARM64in_Cmp:
1349 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1350 ppHRegARM64(i->ARM64in.Cmp.argL);
1351 vex_printf(", ");
1352 ppARM64RIA(i->ARM64in.Cmp.argR);
1353 return;
1354 case ARM64in_Logic:
1355 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1356 ppHRegARM64(i->ARM64in.Logic.dst);
1357 vex_printf(", ");
1358 ppHRegARM64(i->ARM64in.Logic.argL);
1359 vex_printf(", ");
1360 ppARM64RIL(i->ARM64in.Logic.argR);
1361 return;
1362 case ARM64in_Test:
1363 vex_printf("tst ");
1364 ppHRegARM64(i->ARM64in.Test.argL);
1365 vex_printf(", ");
1366 ppARM64RIL(i->ARM64in.Test.argR);
1367 return;
1368 case ARM64in_Shift:
1369 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1370 ppHRegARM64(i->ARM64in.Shift.dst);
1371 vex_printf(", ");
1372 ppHRegARM64(i->ARM64in.Shift.argL);
1373 vex_printf(", ");
1374 ppARM64RI6(i->ARM64in.Shift.argR);
1375 return;
1376 case ARM64in_Unary:
1377 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1378 ppHRegARM64(i->ARM64in.Unary.dst);
1379 vex_printf(", ");
1380 ppHRegARM64(i->ARM64in.Unary.src);
1381 return;
1382 case ARM64in_MovI:
1383 vex_printf("mov ");
1384 ppHRegARM64(i->ARM64in.MovI.dst);
1385 vex_printf(", ");
1386 ppHRegARM64(i->ARM64in.MovI.src);
1387 return;
1388 case ARM64in_Imm64:
1389 vex_printf("imm64 ");
1390 ppHRegARM64(i->ARM64in.Imm64.dst);
1391 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1392 return;
1393 case ARM64in_LdSt64:
1394 if (i->ARM64in.LdSt64.isLoad) {
1395 vex_printf("ldr ");
1396 ppHRegARM64(i->ARM64in.LdSt64.rD);
1397 vex_printf(", ");
1398 ppARM64AMode(i->ARM64in.LdSt64.amode);
1399 } else {
1400 vex_printf("str ");
1401 ppARM64AMode(i->ARM64in.LdSt64.amode);
1402 vex_printf(", ");
1403 ppHRegARM64(i->ARM64in.LdSt64.rD);
1404 }
1405 return;
1406 case ARM64in_LdSt32:
1407 if (i->ARM64in.LdSt32.isLoad) {
1408 vex_printf("ldruw ");
1409 ppHRegARM64(i->ARM64in.LdSt32.rD);
1410 vex_printf(", ");
1411 ppARM64AMode(i->ARM64in.LdSt32.amode);
1412 } else {
1413 vex_printf("strw ");
1414 ppARM64AMode(i->ARM64in.LdSt32.amode);
1415 vex_printf(", ");
1416 ppHRegARM64(i->ARM64in.LdSt32.rD);
1417 }
1418 return;
1419 case ARM64in_LdSt16:
1420 if (i->ARM64in.LdSt16.isLoad) {
1421 vex_printf("ldruh ");
1422 ppHRegARM64(i->ARM64in.LdSt16.rD);
1423 vex_printf(", ");
1424 ppARM64AMode(i->ARM64in.LdSt16.amode);
1425 } else {
1426 vex_printf("strh ");
1427 ppARM64AMode(i->ARM64in.LdSt16.amode);
1428 vex_printf(", ");
1429 ppHRegARM64(i->ARM64in.LdSt16.rD);
1430 }
1431 return;
1432 case ARM64in_LdSt8:
1433 if (i->ARM64in.LdSt8.isLoad) {
1434 vex_printf("ldrub ");
1435 ppHRegARM64(i->ARM64in.LdSt8.rD);
1436 vex_printf(", ");
1437 ppARM64AMode(i->ARM64in.LdSt8.amode);
1438 } else {
1439 vex_printf("strb ");
1440 ppARM64AMode(i->ARM64in.LdSt8.amode);
1441 vex_printf(", ");
1442 ppHRegARM64(i->ARM64in.LdSt8.rD);
1443 }
1444 return;
1445 case ARM64in_XDirect:
1446 vex_printf("(xDirect) ");
1447 vex_printf("if (%%pstate.%s) { ",
1448 showARM64CondCode(i->ARM64in.XDirect.cond));
1449 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1450 vex_printf("str x9,");
1451 ppARM64AMode(i->ARM64in.XDirect.amPC);
1452 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1453 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1454 vex_printf("blr x9 }");
1455 return;
1456 case ARM64in_XIndir:
1457 vex_printf("(xIndir) ");
1458 vex_printf("if (%%pstate.%s) { ",
1459 showARM64CondCode(i->ARM64in.XIndir.cond));
1460 vex_printf("str ");
1461 ppHRegARM64(i->ARM64in.XIndir.dstGA);
1462 vex_printf(",");
1463 ppARM64AMode(i->ARM64in.XIndir.amPC);
1464 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1465 vex_printf("br x9 }");
1466 return;
1467 case ARM64in_XAssisted:
1468 vex_printf("(xAssisted) ");
1469 vex_printf("if (%%pstate.%s) { ",
1470 showARM64CondCode(i->ARM64in.XAssisted.cond));
1471 vex_printf("str ");
1472 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1473 vex_printf(",");
1474 ppARM64AMode(i->ARM64in.XAssisted.amPC);
1475 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1476 (Int)i->ARM64in.XAssisted.jk);
1477 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1478 vex_printf("br x9 }");
1479 return;
1480 case ARM64in_CSel:
1481 vex_printf("csel ");
1482 ppHRegARM64(i->ARM64in.CSel.dst);
1483 vex_printf(", ");
1484 ppHRegARM64(i->ARM64in.CSel.argL);
1485 vex_printf(", ");
1486 ppHRegARM64(i->ARM64in.CSel.argR);
1487 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1488 return;
1489 case ARM64in_Call:
1490 vex_printf("call%s ",
1491 i->ARM64in.Call.cond==ARM64cc_AL
1492 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
1493 vex_printf("0x%lx [nArgRegs=%d, ",
1494 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1495 ppRetLoc(i->ARM64in.Call.rloc);
1496 vex_printf("]");
1497 return;
1498 case ARM64in_AddToSP: {
1499 Int simm = i->ARM64in.AddToSP.simm;
1500 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1501 simm < 0 ? -simm : simm);
1502 return;
1503 }
1504 case ARM64in_FromSP:
1505 vex_printf("mov ");
1506 ppHRegARM64(i->ARM64in.FromSP.dst);
1507 vex_printf(", xsp");
1508 return;
1509 case ARM64in_Mul:
1510 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
1511 ppHRegARM64(i->ARM64in.Mul.dst);
1512 vex_printf(", ");
1513 ppHRegARM64(i->ARM64in.Mul.argL);
1514 vex_printf(", ");
1515 ppHRegARM64(i->ARM64in.Mul.argR);
1516 return;
sewardj7d009132014-02-20 17:43:38 +00001517
1518 case ARM64in_LdrEX: {
1519 const HChar* sz = " ";
1520 switch (i->ARM64in.LdrEX.szB) {
1521 case 1: sz = "b"; break;
1522 case 2: sz = "h"; break;
1523 case 4: case 8: break;
1524 default: vassert(0);
1525 }
1526 vex_printf("ldxr%s %c2, [x4]",
1527 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1528 return;
1529 }
1530 case ARM64in_StrEX: {
1531 const HChar* sz = " ";
1532 switch (i->ARM64in.StrEX.szB) {
1533 case 1: sz = "b"; break;
1534 case 2: sz = "h"; break;
1535 case 4: case 8: break;
1536 default: vassert(0);
1537 }
1538 vex_printf("stxr%s w0, %c2, [x4]",
1539 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1540 return;
1541 }
1542 case ARM64in_MFence:
1543 vex_printf("(mfence) dsb sy; dmb sy; isb");
1544 return;
sewardjbbcf1882014-01-12 12:49:10 +00001545 case ARM64in_VLdStS:
1546 if (i->ARM64in.VLdStS.isLoad) {
1547 vex_printf("ldr ");
1548 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1549 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1550 ppHRegARM64(i->ARM64in.VLdStS.rN);
1551 vex_printf(")");
1552 } else {
1553 vex_printf("str ");
1554 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1555 ppHRegARM64(i->ARM64in.VLdStS.rN);
1556 vex_printf("), ");
1557 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1558 }
1559 return;
1560 case ARM64in_VLdStD:
1561 if (i->ARM64in.VLdStD.isLoad) {
1562 vex_printf("ldr ");
1563 ppHRegARM64(i->ARM64in.VLdStD.dD);
1564 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1565 ppHRegARM64(i->ARM64in.VLdStD.rN);
1566 vex_printf(")");
1567 } else {
1568 vex_printf("str ");
1569 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1570 ppHRegARM64(i->ARM64in.VLdStD.rN);
1571 vex_printf("), ");
1572 ppHRegARM64(i->ARM64in.VLdStD.dD);
1573 }
1574 return;
1575 case ARM64in_VLdStQ:
1576 if (i->ARM64in.VLdStQ.isLoad)
1577 vex_printf("ld1.2d {");
1578 else
1579 vex_printf("st1.2d {");
1580 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1581 vex_printf("}, [");
1582 ppHRegARM64(i->ARM64in.VLdStQ.rN);
1583 vex_printf("]");
1584 return;
1585 case ARM64in_VCvtI2F: {
1586 HChar syn = '?';
1587 UInt fszB = 0;
1588 UInt iszB = 0;
1589 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1590 vex_printf("%ccvtf ", syn);
1591 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1592 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1593 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1594 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1595 return;
1596 }
1597 case ARM64in_VCvtF2I: {
1598 HChar syn = '?';
1599 UInt fszB = 0;
1600 UInt iszB = 0;
1601 HChar rmo = '?';
1602 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1603 UChar armRM = i->ARM64in.VCvtF2I.armRM;
1604 if (armRM < 4) rmo = "npmz"[armRM];
1605 vex_printf("fcvt%c%c ", rmo, syn);
1606 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1607 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1608 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1609 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1610 return;
1611 }
1612 case ARM64in_VCvtSD:
1613 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1614 if (i->ARM64in.VCvtSD.sToD) {
1615 ppHRegARM64(i->ARM64in.VCvtSD.dst);
1616 vex_printf(", ");
1617 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1618 } else {
1619 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1620 vex_printf(", ");
1621 ppHRegARM64(i->ARM64in.VCvtSD.src);
1622 }
1623 return;
1624 case ARM64in_VUnaryD:
1625 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1626 ppHRegARM64(i->ARM64in.VUnaryD.dst);
1627 vex_printf(", ");
1628 ppHRegARM64(i->ARM64in.VUnaryD.src);
1629 return;
1630 case ARM64in_VUnaryS:
1631 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1632 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1633 vex_printf(", ");
1634 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1635 return;
1636 case ARM64in_VBinD:
1637 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1638 ppHRegARM64(i->ARM64in.VBinD.dst);
1639 vex_printf(", ");
1640 ppHRegARM64(i->ARM64in.VBinD.argL);
1641 vex_printf(", ");
1642 ppHRegARM64(i->ARM64in.VBinD.argR);
1643 return;
1644 case ARM64in_VBinS:
1645 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1646 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1647 vex_printf(", ");
1648 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1649 vex_printf(", ");
1650 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1651 return;
1652 case ARM64in_VCmpD:
1653 vex_printf("fcmp ");
1654 ppHRegARM64(i->ARM64in.VCmpD.argL);
1655 vex_printf(", ");
1656 ppHRegARM64(i->ARM64in.VCmpD.argR);
1657 return;
1658 case ARM64in_VCmpS:
1659 vex_printf("fcmp ");
1660 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1661 vex_printf(", ");
1662 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1663 return;
sewardje23ec112014-11-15 16:07:14 +00001664 case ARM64in_VFCSel: {
1665 void (*ppHRegARM64fp)(HReg)
1666 = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1667 vex_printf("fcsel ");
1668 ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1669 vex_printf(", ");
1670 ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1671 vex_printf(", ");
1672 ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1673 vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1674 return;
1675 }
sewardjbbcf1882014-01-12 12:49:10 +00001676 case ARM64in_FPCR:
1677 if (i->ARM64in.FPCR.toFPCR) {
1678 vex_printf("msr fpcr, ");
1679 ppHRegARM64(i->ARM64in.FPCR.iReg);
1680 } else {
1681 vex_printf("mrs ");
1682 ppHRegARM64(i->ARM64in.FPCR.iReg);
1683 vex_printf(", fpcr");
1684 }
1685 return;
sewardj12972182014-08-04 08:09:47 +00001686 case ARM64in_FPSR:
1687 if (i->ARM64in.FPSR.toFPSR) {
1688 vex_printf("msr fpsr, ");
1689 ppHRegARM64(i->ARM64in.FPSR.iReg);
1690 } else {
1691 vex_printf("mrs ");
1692 ppHRegARM64(i->ARM64in.FPSR.iReg);
1693 vex_printf(", fpsr");
1694 }
1695 return;
sewardj606c4ba2014-01-26 19:11:14 +00001696 case ARM64in_VBinV: {
1697 const HChar* nm = "??";
1698 const HChar* ar = "??";
1699 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
sewardj12972182014-08-04 08:09:47 +00001700 vex_printf("%s ", nm);
sewardj606c4ba2014-01-26 19:11:14 +00001701 ppHRegARM64(i->ARM64in.VBinV.dst);
1702 vex_printf(".%s, ", ar);
1703 ppHRegARM64(i->ARM64in.VBinV.argL);
1704 vex_printf(".%s, ", ar);
1705 ppHRegARM64(i->ARM64in.VBinV.argR);
1706 vex_printf(".%s", ar);
1707 return;
1708 }
sewardjf7003bc2014-08-18 12:28:02 +00001709 case ARM64in_VModifyV: {
1710 const HChar* nm = "??";
1711 const HChar* ar = "??";
1712 showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1713 vex_printf("%s ", nm);
1714 ppHRegARM64(i->ARM64in.VModifyV.mod);
1715 vex_printf(".%s, ", ar);
1716 ppHRegARM64(i->ARM64in.VModifyV.arg);
1717 vex_printf(".%s", ar);
1718 return;
1719 }
sewardjfab09142014-02-10 10:28:13 +00001720 case ARM64in_VUnaryV: {
1721 const HChar* nm = "??";
1722 const HChar* ar = "??";
1723 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1724 vex_printf("%s ", nm);
1725 ppHRegARM64(i->ARM64in.VUnaryV.dst);
1726 vex_printf(".%s, ", ar);
1727 ppHRegARM64(i->ARM64in.VUnaryV.arg);
1728 vex_printf(".%s", ar);
1729 return;
1730 }
sewardj606c4ba2014-01-26 19:11:14 +00001731 case ARM64in_VNarrowV: {
1732 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
1733 const HChar* darr[3] = { "8b", "4h", "2s" };
1734 const HChar* sarr[3] = { "8h", "4s", "2d" };
sewardjecedd982014-08-11 14:02:47 +00001735 const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
1736 vex_printf("%s ", nm);
sewardj606c4ba2014-01-26 19:11:14 +00001737 ppHRegARM64(i->ARM64in.VNarrowV.dst);
1738 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
1739 ppHRegARM64(i->ARM64in.VNarrowV.src);
1740 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
1741 return;
1742 }
sewardje520bb32014-02-17 11:00:53 +00001743 case ARM64in_VShiftImmV: {
1744 const HChar* nm = "??";
1745 const HChar* ar = "??";
sewardja6b61f02014-08-17 18:32:14 +00001746 showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
sewardje520bb32014-02-17 11:00:53 +00001747 vex_printf("%s ", nm);
1748 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
1749 vex_printf(".%s, ", ar);
1750 ppHRegARM64(i->ARM64in.VShiftImmV.src);
1751 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
1752 return;
1753 }
sewardjab33a7a2014-06-19 22:20:47 +00001754 case ARM64in_VExtV: {
1755 vex_printf("ext ");
1756 ppHRegARM64(i->ARM64in.VExtV.dst);
1757 vex_printf(".16b, ");
1758 ppHRegARM64(i->ARM64in.VExtV.srcLo);
1759 vex_printf(".16b, ");
1760 ppHRegARM64(i->ARM64in.VExtV.srcHi);
1761 vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
1762 return;
1763 }
sewardjbbcf1882014-01-12 12:49:10 +00001764 case ARM64in_VImmQ:
1765 vex_printf("qimm ");
1766 ppHRegARM64(i->ARM64in.VImmQ.rQ);
1767 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
1768 return;
1769 case ARM64in_VDfromX:
1770 vex_printf("fmov ");
1771 ppHRegARM64(i->ARM64in.VDfromX.rD);
1772 vex_printf(", ");
1773 ppHRegARM64(i->ARM64in.VDfromX.rX);
1774 return;
sewardj12972182014-08-04 08:09:47 +00001775 case ARM64in_VQfromX:
1776 vex_printf("fmov ");
1777 ppHRegARM64(i->ARM64in.VQfromX.rQ);
1778 vex_printf(".d[0], ");
1779 ppHRegARM64(i->ARM64in.VQfromX.rXlo);
1780 return;
sewardjbbcf1882014-01-12 12:49:10 +00001781 case ARM64in_VQfromXX:
1782 vex_printf("qFromXX ");
1783 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
1784 vex_printf(", ");
1785 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
1786 vex_printf(", ");
1787 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
1788 return;
1789 case ARM64in_VXfromQ:
sewardj85fbb022014-06-12 13:16:01 +00001790 vex_printf("fmov ");
sewardjbbcf1882014-01-12 12:49:10 +00001791 ppHRegARM64(i->ARM64in.VXfromQ.rX);
1792 vex_printf(", ");
1793 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
1794 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
1795 return;
sewardj85fbb022014-06-12 13:16:01 +00001796 case ARM64in_VXfromDorS:
1797 vex_printf("fmov ");
1798 ppHRegARM64(i->ARM64in.VXfromDorS.rX);
1799 vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
1800 ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
1801 vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
1802 return;
sewardjbbcf1882014-01-12 12:49:10 +00001803 case ARM64in_VMov: {
1804 UChar aux = '?';
1805 switch (i->ARM64in.VMov.szB) {
1806 case 16: aux = 'q'; break;
1807 case 8: aux = 'd'; break;
1808 case 4: aux = 's'; break;
1809 default: break;
1810 }
1811 vex_printf("mov(%c) ", aux);
1812 ppHRegARM64(i->ARM64in.VMov.dst);
1813 vex_printf(", ");
1814 ppHRegARM64(i->ARM64in.VMov.src);
1815 return;
sewardj0ad37a92014-08-29 21:58:03 +00001816 }
sewardjbbcf1882014-01-12 12:49:10 +00001817 case ARM64in_EvCheck:
1818 vex_printf("(evCheck) ldr w9,");
1819 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1820 vex_printf("; subs w9,w9,$1; str w9,");
1821 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1822 vex_printf("; bpl nofail; ldr x9,");
1823 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
1824 vex_printf("; br x9; nofail:");
1825 return;
sewardj0ad37a92014-08-29 21:58:03 +00001826 case ARM64in_ProfInc:
1827 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1828 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1829 return;
sewardjbbcf1882014-01-12 12:49:10 +00001830 default:
1831 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
1832 vpanic("ppARM64Instr(1)");
1833 return;
1834 }
1835}
1836
1837
1838/* --------- Helpers for register allocation. --------- */
1839
floriand8c64e02014-10-08 08:54:44 +00001840void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
sewardjbbcf1882014-01-12 12:49:10 +00001841{
1842 vassert(mode64 == True);
1843 initHRegUsage(u);
1844 switch (i->tag) {
1845 case ARM64in_Arith:
1846 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
1847 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
1848 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
1849 return;
1850 case ARM64in_Cmp:
1851 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
1852 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
1853 return;
1854 case ARM64in_Logic:
1855 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
1856 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
1857 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
1858 return;
1859 case ARM64in_Test:
1860 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
1861 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
1862 return;
1863 case ARM64in_Shift:
1864 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
1865 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
1866 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
1867 return;
1868 case ARM64in_Unary:
1869 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
1870 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
1871 return;
1872 case ARM64in_MovI:
1873 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1874 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
1875 return;
1876 case ARM64in_Imm64:
1877 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
1878 return;
1879 case ARM64in_LdSt64:
1880 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
1881 if (i->ARM64in.LdSt64.isLoad) {
1882 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
1883 } else {
1884 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
1885 }
1886 return;
1887 case ARM64in_LdSt32:
1888 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
1889 if (i->ARM64in.LdSt32.isLoad) {
1890 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
1891 } else {
1892 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
1893 }
1894 return;
1895 case ARM64in_LdSt16:
1896 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
1897 if (i->ARM64in.LdSt16.isLoad) {
1898 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
1899 } else {
1900 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
1901 }
1902 return;
1903 case ARM64in_LdSt8:
1904 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
1905 if (i->ARM64in.LdSt8.isLoad) {
1906 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
1907 } else {
1908 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
1909 }
1910 return;
1911 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1912 conditionally exit the block. Hence we only need to list (1)
1913 the registers that they read, and (2) the registers that they
1914 write in the case where the block is not exited. (2) is
1915 empty, hence only (1) is relevant here. */
1916 case ARM64in_XDirect:
1917 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
1918 return;
1919 case ARM64in_XIndir:
1920 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
1921 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
1922 return;
1923 case ARM64in_XAssisted:
1924 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
1925 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
1926 return;
1927 case ARM64in_CSel:
1928 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
1929 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
1930 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
1931 return;
1932 case ARM64in_Call:
1933 /* logic and comments copied/modified from x86 back end */
1934 /* This is a bit subtle. */
1935 /* First off, claim it trashes all the caller-saved regs
1936 which fall within the register allocator's jurisdiction.
sewardj76ac4762014-06-20 08:30:21 +00001937 These I believe to be x0 to x7 and the 128-bit vector
1938 registers in use, q16 .. q20. */
sewardjbbcf1882014-01-12 12:49:10 +00001939 addHRegUse(u, HRmWrite, hregARM64_X0());
1940 addHRegUse(u, HRmWrite, hregARM64_X1());
1941 addHRegUse(u, HRmWrite, hregARM64_X2());
1942 addHRegUse(u, HRmWrite, hregARM64_X3());
1943 addHRegUse(u, HRmWrite, hregARM64_X4());
1944 addHRegUse(u, HRmWrite, hregARM64_X5());
1945 addHRegUse(u, HRmWrite, hregARM64_X6());
1946 addHRegUse(u, HRmWrite, hregARM64_X7());
1947 addHRegUse(u, HRmWrite, hregARM64_Q16());
1948 addHRegUse(u, HRmWrite, hregARM64_Q17());
1949 addHRegUse(u, HRmWrite, hregARM64_Q18());
sewardj76ac4762014-06-20 08:30:21 +00001950 addHRegUse(u, HRmWrite, hregARM64_Q19());
1951 addHRegUse(u, HRmWrite, hregARM64_Q20());
sewardjbbcf1882014-01-12 12:49:10 +00001952 /* Now we have to state any parameter-carrying registers
1953 which might be read. This depends on nArgRegs. */
1954 switch (i->ARM64in.Call.nArgRegs) {
1955 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
1956 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
1957 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
1958 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
1959 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
1960 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
1961 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
1962 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
1963 case 0: break;
1964 default: vpanic("getRegUsage_ARM64:Call:regparms");
1965 }
1966 /* Finally, there is the issue that the insn trashes a
1967 register because the literal target address has to be
1968 loaded into a register. However, we reserve x9 for that
1969 purpose so there's no further complexity here. Stating x9
1970 as trashed is pointless since it's not under the control
1971 of the allocator, but what the hell. */
1972 addHRegUse(u, HRmWrite, hregARM64_X9());
1973 return;
1974 case ARM64in_AddToSP:
1975 /* Only changes SP, but regalloc doesn't control that, hence
1976 we don't care. */
1977 return;
1978 case ARM64in_FromSP:
1979 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
1980 return;
1981 case ARM64in_Mul:
1982 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
1983 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
1984 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
1985 return;
sewardj7d009132014-02-20 17:43:38 +00001986 case ARM64in_LdrEX:
1987 addHRegUse(u, HRmRead, hregARM64_X4());
1988 addHRegUse(u, HRmWrite, hregARM64_X2());
1989 return;
1990 case ARM64in_StrEX:
1991 addHRegUse(u, HRmRead, hregARM64_X4());
1992 addHRegUse(u, HRmWrite, hregARM64_X0());
1993 addHRegUse(u, HRmRead, hregARM64_X2());
1994 return;
1995 case ARM64in_MFence:
1996 return;
sewardjbbcf1882014-01-12 12:49:10 +00001997 case ARM64in_VLdStS:
1998 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
1999 if (i->ARM64in.VLdStS.isLoad) {
2000 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2001 } else {
2002 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2003 }
2004 return;
2005 case ARM64in_VLdStD:
2006 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2007 if (i->ARM64in.VLdStD.isLoad) {
2008 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2009 } else {
2010 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2011 }
2012 return;
2013 case ARM64in_VLdStQ:
2014 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2015 if (i->ARM64in.VLdStQ.isLoad)
2016 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2017 else
2018 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2019 return;
2020 case ARM64in_VCvtI2F:
2021 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2022 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2023 return;
2024 case ARM64in_VCvtF2I:
2025 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2026 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2027 return;
2028 case ARM64in_VCvtSD:
2029 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2030 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2031 return;
2032 case ARM64in_VUnaryD:
2033 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2034 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2035 return;
2036 case ARM64in_VUnaryS:
2037 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2038 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2039 return;
2040 case ARM64in_VBinD:
2041 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2042 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2043 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2044 return;
2045 case ARM64in_VBinS:
2046 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2047 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2048 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2049 return;
2050 case ARM64in_VCmpD:
2051 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2052 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2053 return;
2054 case ARM64in_VCmpS:
2055 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2056 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2057 return;
sewardje23ec112014-11-15 16:07:14 +00002058 case ARM64in_VFCSel:
2059 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2060 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2061 addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2062 return;
sewardjbbcf1882014-01-12 12:49:10 +00002063 case ARM64in_FPCR:
2064 if (i->ARM64in.FPCR.toFPCR)
2065 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2066 else
2067 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2068 return;
sewardj12972182014-08-04 08:09:47 +00002069 case ARM64in_FPSR:
2070 if (i->ARM64in.FPSR.toFPSR)
2071 addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2072 else
2073 addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2074 return;
sewardj606c4ba2014-01-26 19:11:14 +00002075 case ARM64in_VBinV:
2076 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2077 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2078 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2079 return;
sewardjf7003bc2014-08-18 12:28:02 +00002080 case ARM64in_VModifyV:
2081 addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2082 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2083 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2084 return;
sewardjfab09142014-02-10 10:28:13 +00002085 case ARM64in_VUnaryV:
2086 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2087 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2088 return;
sewardj606c4ba2014-01-26 19:11:14 +00002089 case ARM64in_VNarrowV:
2090 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2091 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2092 return;
sewardje520bb32014-02-17 11:00:53 +00002093 case ARM64in_VShiftImmV:
2094 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2095 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2096 return;
sewardjab33a7a2014-06-19 22:20:47 +00002097 case ARM64in_VExtV:
2098 addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2099 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2100 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
florian57628412014-09-02 14:54:39 +00002101 return;
sewardjbbcf1882014-01-12 12:49:10 +00002102 case ARM64in_VImmQ:
2103 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2104 return;
2105 case ARM64in_VDfromX:
2106 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2107 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2108 return;
sewardj12972182014-08-04 08:09:47 +00002109 case ARM64in_VQfromX:
2110 addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2111 addHRegUse(u, HRmRead, i->ARM64in.VQfromX.rXlo);
2112 return;
sewardjbbcf1882014-01-12 12:49:10 +00002113 case ARM64in_VQfromXX:
2114 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2115 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2116 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2117 return;
2118 case ARM64in_VXfromQ:
2119 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2120 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2121 return;
sewardj85fbb022014-06-12 13:16:01 +00002122 case ARM64in_VXfromDorS:
2123 addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2124 addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
2125 return;
sewardjbbcf1882014-01-12 12:49:10 +00002126 case ARM64in_VMov:
2127 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2128 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2129 return;
sewardjbbcf1882014-01-12 12:49:10 +00002130 case ARM64in_EvCheck:
2131 /* We expect both amodes only to mention x21, so this is in
2132 fact pointless, since x21 isn't allocatable, but
2133 anyway.. */
2134 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2135 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2136 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2137 return;
sewardj0ad37a92014-08-29 21:58:03 +00002138 case ARM64in_ProfInc:
2139 /* Again, pointless to actually state these since neither
2140 is available to RA. */
2141 addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2142 addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2143 return;
sewardjbbcf1882014-01-12 12:49:10 +00002144 default:
2145 ppARM64Instr(i);
2146 vpanic("getRegUsage_ARM64Instr");
2147 }
2148}
2149
2150
2151void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2152{
2153 vassert(mode64 == True);
2154 switch (i->tag) {
2155 case ARM64in_Arith:
2156 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2157 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2158 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2159 return;
2160 case ARM64in_Cmp:
2161 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2162 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2163 return;
2164 case ARM64in_Logic:
2165 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2166 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2167 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2168 return;
2169 case ARM64in_Test:
2170 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2171 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2172 return;
2173 case ARM64in_Shift:
2174 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2175 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2176 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2177 return;
2178 case ARM64in_Unary:
2179 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2180 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2181 return;
2182 case ARM64in_MovI:
2183 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2184 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2185 return;
2186 case ARM64in_Imm64:
2187 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2188 return;
2189 case ARM64in_LdSt64:
2190 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2191 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2192 return;
2193 case ARM64in_LdSt32:
2194 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2195 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2196 return;
2197 case ARM64in_LdSt16:
2198 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2199 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2200 return;
2201 case ARM64in_LdSt8:
2202 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2203 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2204 return;
2205 case ARM64in_XDirect:
2206 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2207 return;
2208 case ARM64in_XIndir:
2209 i->ARM64in.XIndir.dstGA
2210 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2211 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2212 return;
2213 case ARM64in_XAssisted:
2214 i->ARM64in.XAssisted.dstGA
2215 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2216 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2217 return;
2218 case ARM64in_CSel:
2219 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2220 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2221 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2222 return;
2223 case ARM64in_Call:
2224 return;
2225 case ARM64in_AddToSP:
2226 return;
2227 case ARM64in_FromSP:
2228 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2229 return;
2230 case ARM64in_Mul:
2231 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2232 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2233 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2234 break;
sewardj7d009132014-02-20 17:43:38 +00002235 case ARM64in_LdrEX:
2236 return;
2237 case ARM64in_StrEX:
2238 return;
2239 case ARM64in_MFence:
2240 return;
sewardjbbcf1882014-01-12 12:49:10 +00002241 case ARM64in_VLdStS:
2242 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2243 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2244 return;
2245 case ARM64in_VLdStD:
2246 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2247 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2248 return;
2249 case ARM64in_VLdStQ:
2250 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2251 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2252 return;
2253 case ARM64in_VCvtI2F:
2254 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2255 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2256 return;
2257 case ARM64in_VCvtF2I:
2258 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2259 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2260 return;
2261 case ARM64in_VCvtSD:
2262 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2263 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2264 return;
2265 case ARM64in_VUnaryD:
2266 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2267 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2268 return;
2269 case ARM64in_VUnaryS:
2270 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2271 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2272 return;
2273 case ARM64in_VBinD:
2274 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2275 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2276 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2277 return;
2278 case ARM64in_VBinS:
2279 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2280 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2281 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2282 return;
2283 case ARM64in_VCmpD:
2284 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2285 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2286 return;
2287 case ARM64in_VCmpS:
2288 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2289 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2290 return;
sewardje23ec112014-11-15 16:07:14 +00002291 case ARM64in_VFCSel:
2292 i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2293 i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2294 i->ARM64in.VFCSel.dst = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2295 return;
sewardjbbcf1882014-01-12 12:49:10 +00002296 case ARM64in_FPCR:
2297 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2298 return;
sewardj12972182014-08-04 08:09:47 +00002299 case ARM64in_FPSR:
2300 i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2301 return;
sewardj606c4ba2014-01-26 19:11:14 +00002302 case ARM64in_VBinV:
2303 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2304 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2305 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2306 return;
sewardjf7003bc2014-08-18 12:28:02 +00002307 case ARM64in_VModifyV:
2308 i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2309 i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2310 return;
sewardjfab09142014-02-10 10:28:13 +00002311 case ARM64in_VUnaryV:
2312 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2313 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2314 return;
sewardj606c4ba2014-01-26 19:11:14 +00002315 case ARM64in_VNarrowV:
2316 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2317 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2318 return;
sewardje520bb32014-02-17 11:00:53 +00002319 case ARM64in_VShiftImmV:
2320 i->ARM64in.VShiftImmV.dst
2321 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2322 i->ARM64in.VShiftImmV.src
2323 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2324 return;
sewardjab33a7a2014-06-19 22:20:47 +00002325 case ARM64in_VExtV:
2326 i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2327 i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2328 i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2329 return;
sewardjbbcf1882014-01-12 12:49:10 +00002330 case ARM64in_VImmQ:
2331 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2332 return;
2333 case ARM64in_VDfromX:
2334 i->ARM64in.VDfromX.rD
2335 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2336 i->ARM64in.VDfromX.rX
2337 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2338 return;
sewardj12972182014-08-04 08:09:47 +00002339 case ARM64in_VQfromX:
2340 i->ARM64in.VQfromX.rQ
2341 = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2342 i->ARM64in.VQfromX.rXlo
2343 = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2344 return;
sewardjbbcf1882014-01-12 12:49:10 +00002345 case ARM64in_VQfromXX:
2346 i->ARM64in.VQfromXX.rQ
2347 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2348 i->ARM64in.VQfromXX.rXhi
2349 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2350 i->ARM64in.VQfromXX.rXlo
2351 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2352 return;
2353 case ARM64in_VXfromQ:
2354 i->ARM64in.VXfromQ.rX
2355 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2356 i->ARM64in.VXfromQ.rQ
2357 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2358 return;
sewardj85fbb022014-06-12 13:16:01 +00002359 case ARM64in_VXfromDorS:
2360 i->ARM64in.VXfromDorS.rX
2361 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2362 i->ARM64in.VXfromDorS.rDorS
2363 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2364 return;
sewardjbbcf1882014-01-12 12:49:10 +00002365 case ARM64in_VMov:
2366 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2367 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2368 return;
sewardjbbcf1882014-01-12 12:49:10 +00002369 case ARM64in_EvCheck:
2370 /* We expect both amodes only to mention x21, so this is in
2371 fact pointless, since x21 isn't allocatable, but
2372 anyway.. */
2373 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2374 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2375 return;
sewardj0ad37a92014-08-29 21:58:03 +00002376 case ARM64in_ProfInc:
2377 /* hardwires x8 and x9 -- nothing to modify. */
2378 return;
sewardjbbcf1882014-01-12 12:49:10 +00002379 default:
2380 ppARM64Instr(i);
2381 vpanic("mapRegs_ARM64Instr");
2382 }
2383}
2384
2385/* Figure out if i represents a reg-reg move, and if so assign the
2386 source and destination to *src and *dst. If in doubt say No. Used
2387 by the register allocator to do move coalescing.
2388*/
floriand8c64e02014-10-08 08:54:44 +00002389Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
sewardjbbcf1882014-01-12 12:49:10 +00002390{
2391 switch (i->tag) {
2392 case ARM64in_MovI:
2393 *src = i->ARM64in.MovI.src;
2394 *dst = i->ARM64in.MovI.dst;
2395 return True;
2396 case ARM64in_VMov:
2397 *src = i->ARM64in.VMov.src;
2398 *dst = i->ARM64in.VMov.dst;
2399 return True;
2400 default:
2401 break;
2402 }
2403
2404 return False;
2405}
2406
2407
2408/* Generate arm spill/reload instructions under the direction of the
2409 register allocator. Note it's critical these don't write the
2410 condition codes. */
2411
2412void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2413 HReg rreg, Int offsetB, Bool mode64 )
2414{
2415 HRegClass rclass;
2416 vassert(offsetB >= 0);
2417 vassert(!hregIsVirtual(rreg));
2418 vassert(mode64 == True);
2419 *i1 = *i2 = NULL;
2420 rclass = hregClass(rreg);
2421 switch (rclass) {
2422 case HRcInt64:
2423 vassert(0 == (offsetB & 7));
2424 offsetB >>= 3;
2425 vassert(offsetB < 4096);
2426 *i1 = ARM64Instr_LdSt64(
2427 False/*!isLoad*/,
2428 rreg,
2429 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2430 );
2431 return;
2432 case HRcFlt64:
2433 vassert(0 == (offsetB & 7));
2434 vassert(offsetB >= 0 && offsetB < 32768);
2435 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2436 rreg, hregARM64_X21(), offsetB);
2437 return;
2438 case HRcVec128: {
2439 HReg x21 = hregARM64_X21(); // baseblock
2440 HReg x9 = hregARM64_X9(); // spill temporary
2441 vassert(0 == (offsetB & 15)); // check sane alignment
2442 vassert(offsetB < 4096);
2443 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2444 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2445 return;
2446 }
2447 default:
2448 ppHRegClass(rclass);
2449 vpanic("genSpill_ARM: unimplemented regclass");
2450 }
2451}
2452
2453void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2454 HReg rreg, Int offsetB, Bool mode64 )
2455{
2456 HRegClass rclass;
2457 vassert(offsetB >= 0);
2458 vassert(!hregIsVirtual(rreg));
2459 vassert(mode64 == True);
2460 *i1 = *i2 = NULL;
2461 rclass = hregClass(rreg);
2462 switch (rclass) {
2463 case HRcInt64:
2464 vassert(0 == (offsetB & 7));
2465 offsetB >>= 3;
2466 vassert(offsetB < 4096);
2467 *i1 = ARM64Instr_LdSt64(
2468 True/*isLoad*/,
2469 rreg,
2470 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2471 );
2472 return;
2473 case HRcFlt64:
2474 vassert(0 == (offsetB & 7));
2475 vassert(offsetB >= 0 && offsetB < 32768);
2476 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2477 rreg, hregARM64_X21(), offsetB);
2478 return;
2479 case HRcVec128: {
2480 HReg x21 = hregARM64_X21(); // baseblock
2481 HReg x9 = hregARM64_X9(); // spill temporary
2482 vassert(0 == (offsetB & 15)); // check sane alignment
2483 vassert(offsetB < 4096);
2484 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2485 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2486 return;
2487 }
2488 default:
2489 ppHRegClass(rclass);
2490 vpanic("genReload_ARM: unimplemented regclass");
2491 }
2492}
2493
2494
2495//ZZ /* Emit an instruction into buf and return the number of bytes used.
2496//ZZ Note that buf is not the insn's final place, and therefore it is
2497//ZZ imperative to emit position-independent code. */
2498
2499static inline UChar iregNo ( HReg r )
2500{
2501 UInt n;
2502 vassert(hregClass(r) == HRcInt64);
2503 vassert(!hregIsVirtual(r));
2504 n = hregNumber(r);
2505 vassert(n <= 30);
2506 return toUChar(n);
2507}
2508
2509static inline UChar dregNo ( HReg r )
2510{
2511 UInt n;
2512 vassert(hregClass(r) == HRcFlt64);
2513 vassert(!hregIsVirtual(r));
2514 n = hregNumber(r);
2515 vassert(n <= 31);
2516 return toUChar(n);
2517}
2518
2519static inline UChar qregNo ( HReg r )
2520{
2521 UInt n;
2522 vassert(hregClass(r) == HRcVec128);
2523 vassert(!hregIsVirtual(r));
2524 n = hregNumber(r);
2525 vassert(n <= 31);
2526 return toUChar(n);
2527}
2528
2529#define BITS4(zzb3,zzb2,zzb1,zzb0) \
2530 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2531
2532#define X00 BITS4(0,0, 0,0)
2533#define X01 BITS4(0,0, 0,1)
2534#define X10 BITS4(0,0, 1,0)
2535#define X11 BITS4(0,0, 1,1)
2536
2537#define X000 BITS4(0, 0,0,0)
2538#define X001 BITS4(0, 0,0,1)
2539#define X010 BITS4(0, 0,1,0)
2540#define X011 BITS4(0, 0,1,1)
2541#define X100 BITS4(0, 1,0,0)
2542#define X101 BITS4(0, 1,0,1)
2543#define X110 BITS4(0, 1,1,0)
2544#define X111 BITS4(0, 1,1,1)
2545
sewardjbbcf1882014-01-12 12:49:10 +00002546#define X0000 BITS4(0,0,0,0)
2547#define X0001 BITS4(0,0,0,1)
2548#define X0010 BITS4(0,0,1,0)
2549#define X0011 BITS4(0,0,1,1)
2550
sewardj606c4ba2014-01-26 19:11:14 +00002551#define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2552 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2553
sewardjbbcf1882014-01-12 12:49:10 +00002554#define X00000 BITS8(0,0,0, 0,0,0,0,0)
sewardj606c4ba2014-01-26 19:11:14 +00002555#define X00001 BITS8(0,0,0, 0,0,0,0,1)
sewardj85fbb022014-06-12 13:16:01 +00002556#define X00110 BITS8(0,0,0, 0,0,1,1,0)
sewardjbbcf1882014-01-12 12:49:10 +00002557#define X00111 BITS8(0,0,0, 0,0,1,1,1)
2558#define X01000 BITS8(0,0,0, 0,1,0,0,0)
2559#define X10000 BITS8(0,0,0, 1,0,0,0,0)
2560#define X11000 BITS8(0,0,0, 1,1,0,0,0)
2561#define X11110 BITS8(0,0,0, 1,1,1,1,0)
2562#define X11111 BITS8(0,0,0, 1,1,1,1,1)
2563
2564#define X000000 BITS8(0,0, 0,0,0,0,0,0)
sewardje520bb32014-02-17 11:00:53 +00002565#define X000001 BITS8(0,0, 0,0,0,0,0,1)
sewardjdf9d6d52014-06-27 10:43:22 +00002566#define X000010 BITS8(0,0, 0,0,0,0,1,0)
sewardja5a6b752014-06-30 07:33:56 +00002567#define X000011 BITS8(0,0, 0,0,0,0,1,1)
sewardjbbcf1882014-01-12 12:49:10 +00002568#define X000100 BITS8(0,0, 0,0,0,1,0,0)
sewardjd96daf62014-06-15 08:17:35 +00002569#define X000110 BITS8(0,0, 0,0,0,1,1,0)
sewardjecde6972014-02-05 11:01:19 +00002570#define X000111 BITS8(0,0, 0,0,0,1,1,1)
sewardjbbcf1882014-01-12 12:49:10 +00002571#define X001000 BITS8(0,0, 0,0,1,0,0,0)
2572#define X001001 BITS8(0,0, 0,0,1,0,0,1)
2573#define X001010 BITS8(0,0, 0,0,1,0,1,0)
sewardja5a6b752014-06-30 07:33:56 +00002574#define X001011 BITS8(0,0, 0,0,1,0,1,1)
sewardj92d0ae32014-04-03 13:48:54 +00002575#define X001101 BITS8(0,0, 0,0,1,1,0,1)
sewardjd96daf62014-06-15 08:17:35 +00002576#define X001110 BITS8(0,0, 0,0,1,1,1,0)
sewardjbbcf1882014-01-12 12:49:10 +00002577#define X001111 BITS8(0,0, 0,0,1,1,1,1)
2578#define X010000 BITS8(0,0, 0,1,0,0,0,0)
2579#define X010001 BITS8(0,0, 0,1,0,0,0,1)
sewardj2b6fd5e2014-06-19 14:21:37 +00002580#define X010010 BITS8(0,0, 0,1,0,0,1,0)
sewardj12972182014-08-04 08:09:47 +00002581#define X010011 BITS8(0,0, 0,1,0,0,1,1)
sewardj32d86752014-03-02 12:47:18 +00002582#define X010101 BITS8(0,0, 0,1,0,1,0,1)
sewardje520bb32014-02-17 11:00:53 +00002583#define X010110 BITS8(0,0, 0,1,0,1,1,0)
sewardj12972182014-08-04 08:09:47 +00002584#define X010111 BITS8(0,0, 0,1,0,1,1,1)
sewardjecde6972014-02-05 11:01:19 +00002585#define X011001 BITS8(0,0, 0,1,1,0,0,1)
sewardjbbcf1882014-01-12 12:49:10 +00002586#define X011010 BITS8(0,0, 0,1,1,0,1,0)
sewardjecde6972014-02-05 11:01:19 +00002587#define X011011 BITS8(0,0, 0,1,1,0,1,1)
sewardja97dddf2014-08-14 22:26:52 +00002588#define X011101 BITS8(0,0, 0,1,1,1,0,1)
sewardje520bb32014-02-17 11:00:53 +00002589#define X011110 BITS8(0,0, 0,1,1,1,1,0)
sewardjbbcf1882014-01-12 12:49:10 +00002590#define X011111 BITS8(0,0, 0,1,1,1,1,1)
sewardj606c4ba2014-01-26 19:11:14 +00002591#define X100001 BITS8(0,0, 1,0,0,0,0,1)
sewardje520bb32014-02-17 11:00:53 +00002592#define X100011 BITS8(0,0, 1,0,0,0,1,1)
sewardjbbcf1882014-01-12 12:49:10 +00002593#define X100100 BITS8(0,0, 1,0,0,1,0,0)
2594#define X100101 BITS8(0,0, 1,0,0,1,0,1)
2595#define X100110 BITS8(0,0, 1,0,0,1,1,0)
sewardjf5b08912014-02-06 12:57:58 +00002596#define X100111 BITS8(0,0, 1,0,0,1,1,1)
sewardj54ffa1d2014-07-22 09:27:49 +00002597#define X101101 BITS8(0,0, 1,0,1,1,0,1)
sewardj25523c42014-06-15 19:36:29 +00002598#define X101110 BITS8(0,0, 1,0,1,1,1,0)
sewardjbbcf1882014-01-12 12:49:10 +00002599#define X110000 BITS8(0,0, 1,1,0,0,0,0)
2600#define X110001 BITS8(0,0, 1,1,0,0,0,1)
sewardjfc261d92014-08-24 20:36:14 +00002601#define X110010 BITS8(0,0, 1,1,0,0,1,0)
sewardj51d012a2014-07-21 09:19:50 +00002602#define X110100 BITS8(0,0, 1,1,0,1,0,0)
sewardj606c4ba2014-01-26 19:11:14 +00002603#define X110101 BITS8(0,0, 1,1,0,1,0,1)
2604#define X110111 BITS8(0,0, 1,1,0,1,1,1)
sewardjbbcf1882014-01-12 12:49:10 +00002605#define X111000 BITS8(0,0, 1,1,1,0,0,0)
2606#define X111001 BITS8(0,0, 1,1,1,0,0,1)
2607#define X111101 BITS8(0,0, 1,1,1,1,0,1)
sewardjfab09142014-02-10 10:28:13 +00002608#define X111110 BITS8(0,0, 1,1,1,1,1,0)
sewardj606c4ba2014-01-26 19:11:14 +00002609#define X111111 BITS8(0,0, 1,1,1,1,1,1)
sewardjbbcf1882014-01-12 12:49:10 +00002610
sewardj93013432014-04-27 12:02:12 +00002611#define X0001000 BITS8(0, 0,0,0,1,0,0,0)
sewardj1eaaec22014-03-07 22:52:19 +00002612#define X0010000 BITS8(0, 0,0,1,0,0,0,0)
sewardj32d86752014-03-02 12:47:18 +00002613#define X0100000 BITS8(0, 0,1,0,0,0,0,0)
sewardje520bb32014-02-17 11:00:53 +00002614#define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2615
sewardjbbcf1882014-01-12 12:49:10 +00002616#define X00100000 BITS8(0,0,1,0,0,0,0,0)
2617#define X00100001 BITS8(0,0,1,0,0,0,0,1)
2618#define X00100010 BITS8(0,0,1,0,0,0,1,0)
2619#define X00100011 BITS8(0,0,1,0,0,0,1,1)
2620#define X01010000 BITS8(0,1,0,1,0,0,0,0)
2621#define X01010001 BITS8(0,1,0,1,0,0,0,1)
2622#define X01010100 BITS8(0,1,0,1,0,1,0,0)
2623#define X01011000 BITS8(0,1,0,1,1,0,0,0)
2624#define X01100000 BITS8(0,1,1,0,0,0,0,0)
2625#define X01100001 BITS8(0,1,1,0,0,0,0,1)
2626#define X01100010 BITS8(0,1,1,0,0,0,1,0)
2627#define X01100011 BITS8(0,1,1,0,0,0,1,1)
2628#define X01110000 BITS8(0,1,1,1,0,0,0,0)
sewardj606c4ba2014-01-26 19:11:14 +00002629#define X01110001 BITS8(0,1,1,1,0,0,0,1)
sewardjd96daf62014-06-15 08:17:35 +00002630#define X01110010 BITS8(0,1,1,1,0,0,1,0)
sewardj606c4ba2014-01-26 19:11:14 +00002631#define X01110011 BITS8(0,1,1,1,0,0,1,1)
sewardjd96daf62014-06-15 08:17:35 +00002632#define X01110100 BITS8(0,1,1,1,0,1,0,0)
sewardj606c4ba2014-01-26 19:11:14 +00002633#define X01110101 BITS8(0,1,1,1,0,1,0,1)
sewardjd96daf62014-06-15 08:17:35 +00002634#define X01110110 BITS8(0,1,1,1,0,1,1,0)
sewardj606c4ba2014-01-26 19:11:14 +00002635#define X01110111 BITS8(0,1,1,1,0,1,1,1)
sewardjbbcf1882014-01-12 12:49:10 +00002636#define X11000001 BITS8(1,1,0,0,0,0,0,1)
2637#define X11000011 BITS8(1,1,0,0,0,0,1,1)
2638#define X11010100 BITS8(1,1,0,1,0,1,0,0)
2639#define X11010110 BITS8(1,1,0,1,0,1,1,0)
2640#define X11011000 BITS8(1,1,0,1,1,0,0,0)
2641#define X11011010 BITS8(1,1,0,1,1,0,1,0)
2642#define X11011110 BITS8(1,1,0,1,1,1,1,0)
2643#define X11110001 BITS8(1,1,1,1,0,0,0,1)
2644#define X11110011 BITS8(1,1,1,1,0,0,1,1)
2645
2646
2647/* --- 4 fields --- */
2648
2649static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
2650 vassert(8+19+1+4 == 32);
2651 vassert(f1 < (1<<8));
2652 vassert(f2 < (1<<19));
2653 vassert(f3 < (1<<1));
2654 vassert(f4 < (1<<4));
2655 UInt w = 0;
2656 w = (w << 8) | f1;
2657 w = (w << 19) | f2;
2658 w = (w << 1) | f3;
2659 w = (w << 4) | f4;
2660 return w;
2661}
2662
2663/* --- 5 fields --- */
2664
2665static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
2666 UInt f3, UInt f4, UInt f5 ) {
2667 vassert(3+6+2+16+5 == 32);
2668 vassert(f1 < (1<<3));
2669 vassert(f2 < (1<<6));
2670 vassert(f3 < (1<<2));
2671 vassert(f4 < (1<<16));
2672 vassert(f5 < (1<<5));
2673 UInt w = 0;
2674 w = (w << 3) | f1;
2675 w = (w << 6) | f2;
2676 w = (w << 2) | f3;
2677 w = (w << 16) | f4;
2678 w = (w << 5) | f5;
2679 return w;
2680}
2681
2682/* --- 6 fields --- */
2683
2684static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
2685 UInt f4, UInt f5, UInt f6 ) {
2686 vassert(2+6+2+12+5+5 == 32);
2687 vassert(f1 < (1<<2));
2688 vassert(f2 < (1<<6));
2689 vassert(f3 < (1<<2));
2690 vassert(f4 < (1<<12));
2691 vassert(f5 < (1<<5));
2692 vassert(f6 < (1<<5));
2693 UInt w = 0;
2694 w = (w << 2) | f1;
2695 w = (w << 6) | f2;
2696 w = (w << 2) | f3;
2697 w = (w << 12) | f4;
2698 w = (w << 5) | f5;
2699 w = (w << 5) | f6;
2700 return w;
2701}
2702
2703static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
2704 UInt f4, UInt f5, UInt f6 ) {
2705 vassert(3+8+5+6+5+5 == 32);
2706 vassert(f1 < (1<<3));
2707 vassert(f2 < (1<<8));
2708 vassert(f3 < (1<<5));
2709 vassert(f4 < (1<<6));
2710 vassert(f5 < (1<<5));
2711 vassert(f6 < (1<<5));
2712 UInt w = 0;
2713 w = (w << 3) | f1;
2714 w = (w << 8) | f2;
2715 w = (w << 5) | f3;
2716 w = (w << 6) | f4;
2717 w = (w << 5) | f5;
2718 w = (w << 5) | f6;
2719 return w;
2720}
2721
2722static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
2723 UInt f4, UInt f5, UInt f6 ) {
2724 vassert(3+8+5+6+5+5 == 32);
2725 vassert(f1 < (1<<3));
2726 vassert(f2 < (1<<5));
2727 vassert(f3 < (1<<8));
2728 vassert(f4 < (1<<6));
2729 vassert(f5 < (1<<5));
2730 vassert(f6 < (1<<5));
2731 UInt w = 0;
2732 w = (w << 3) | f1;
2733 w = (w << 5) | f2;
2734 w = (w << 8) | f3;
2735 w = (w << 6) | f4;
2736 w = (w << 5) | f5;
2737 w = (w << 5) | f6;
2738 return w;
2739}
2740
sewardje520bb32014-02-17 11:00:53 +00002741static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
2742 UInt f4, UInt f5, UInt f6 ) {
2743 vassert(3+6+7+6+5+5 == 32);
2744 vassert(f1 < (1<<3));
2745 vassert(f2 < (1<<6));
2746 vassert(f3 < (1<<7));
2747 vassert(f4 < (1<<6));
2748 vassert(f5 < (1<<5));
2749 vassert(f6 < (1<<5));
2750 UInt w = 0;
2751 w = (w << 3) | f1;
2752 w = (w << 6) | f2;
2753 w = (w << 7) | f3;
2754 w = (w << 6) | f4;
2755 w = (w << 5) | f5;
2756 w = (w << 5) | f6;
2757 return w;
2758}
2759
sewardjbbcf1882014-01-12 12:49:10 +00002760/* --- 7 fields --- */
2761
2762static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
2763 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2764 vassert(2+6+3+9+2+5+5 == 32);
2765 vassert(f1 < (1<<2));
2766 vassert(f2 < (1<<6));
2767 vassert(f3 < (1<<3));
2768 vassert(f4 < (1<<9));
2769 vassert(f5 < (1<<2));
2770 vassert(f6 < (1<<5));
2771 vassert(f7 < (1<<5));
2772 UInt w = 0;
2773 w = (w << 2) | f1;
2774 w = (w << 6) | f2;
2775 w = (w << 3) | f3;
2776 w = (w << 9) | f4;
2777 w = (w << 2) | f5;
2778 w = (w << 5) | f6;
2779 w = (w << 5) | f7;
2780 return w;
2781}
2782
2783static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
2784 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2785 vassert(3+6+1+6+6+5+5 == 32);
2786 vassert(f1 < (1<<3));
2787 vassert(f2 < (1<<6));
2788 vassert(f3 < (1<<1));
2789 vassert(f4 < (1<<6));
2790 vassert(f5 < (1<<6));
2791 vassert(f6 < (1<<5));
2792 vassert(f7 < (1<<5));
2793 UInt w = 0;
2794 w = (w << 3) | f1;
2795 w = (w << 6) | f2;
2796 w = (w << 1) | f3;
2797 w = (w << 6) | f4;
2798 w = (w << 6) | f5;
2799 w = (w << 5) | f6;
2800 w = (w << 5) | f7;
2801 return w;
2802}
2803
2804
2805//ZZ #define X0000 BITS4(0,0,0,0)
2806//ZZ #define X0001 BITS4(0,0,0,1)
2807//ZZ #define X0010 BITS4(0,0,1,0)
2808//ZZ #define X0011 BITS4(0,0,1,1)
2809//ZZ #define X0100 BITS4(0,1,0,0)
2810//ZZ #define X0101 BITS4(0,1,0,1)
2811//ZZ #define X0110 BITS4(0,1,1,0)
2812//ZZ #define X0111 BITS4(0,1,1,1)
2813//ZZ #define X1000 BITS4(1,0,0,0)
2814//ZZ #define X1001 BITS4(1,0,0,1)
2815//ZZ #define X1010 BITS4(1,0,1,0)
2816//ZZ #define X1011 BITS4(1,0,1,1)
2817//ZZ #define X1100 BITS4(1,1,0,0)
2818//ZZ #define X1101 BITS4(1,1,0,1)
2819//ZZ #define X1110 BITS4(1,1,1,0)
2820//ZZ #define X1111 BITS4(1,1,1,1)
2821/*
2822#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2823 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2824 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2825 (((zzx3) & 0xF) << 12))
2826
2827#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2828 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2829 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2830 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2831
2832#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2833 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2834 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2835 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2836
2837#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2838 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2839 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2840 (((zzx0) & 0xF) << 0))
2841
2842#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2843 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2844 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2845 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2846 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2847
2848#define XX______(zzx7,zzx6) \
2849 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2850*/
sewardjbbcf1882014-01-12 12:49:10 +00002851
2852
2853/* Get an immediate into a register, using only that register. */
2854static UInt* imm64_to_iregNo ( UInt* p, Int xD, ULong imm64 )
2855{
2856 if (imm64 == 0) {
2857 // This has to be special-cased, since the logic below
2858 // will leave the register unchanged in this case.
2859 // MOVZ xD, #0, LSL #0
2860 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
2861 return p;
2862 }
2863
2864 // There must be at least one non-zero halfword. Find the
2865 // lowest nonzero such, and use MOVZ to install it and zero
2866 // out the rest of the register.
2867 UShort h[4];
2868 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2869 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
2870 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
2871 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
2872
2873 UInt i;
2874 for (i = 0; i < 4; i++) {
2875 if (h[i] != 0)
2876 break;
2877 }
2878 vassert(i < 4);
2879
2880 // MOVZ xD, h[i], LSL (16*i)
2881 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
2882
2883 // Work on upwards through h[i], using MOVK to stuff in any
2884 // remaining nonzero elements.
2885 i++;
2886 for (; i < 4; i++) {
2887 if (h[i] == 0)
2888 continue;
2889 // MOVK xD, h[i], LSL (16*i)
2890 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
2891 }
2892
2893 return p;
2894}
2895
2896/* Get an immediate into a register, using only that register, and
2897 generating exactly 4 instructions, regardless of the value of the
2898 immediate. This is used when generating sections of code that need
2899 to be patched later, so as to guarantee a specific size. */
2900static UInt* imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
2901{
2902 UShort h[4];
2903 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2904 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
2905 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
2906 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
2907 // Work on upwards through h[i], using MOVK to stuff in the
2908 // remaining elements.
2909 UInt i;
2910 for (i = 0; i < 4; i++) {
2911 if (i == 0) {
2912 // MOVZ xD, h[0], LSL (16*0)
2913 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
2914 } else {
2915 // MOVK xD, h[i], LSL (16*i)
2916 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
2917 }
2918 }
2919 return p;
2920}
2921
2922/* Check whether p points at a 4-insn sequence cooked up by
2923 imm64_to_iregNo_EXACTLY4(). */
2924static Bool is_imm64_to_iregNo_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
2925{
2926 UShort h[4];
2927 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2928 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
2929 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
2930 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
2931 // Work on upwards through h[i], using MOVK to stuff in the
2932 // remaining elements.
2933 UInt i;
2934 for (i = 0; i < 4; i++) {
2935 UInt expected;
2936 if (i == 0) {
2937 // MOVZ xD, h[0], LSL (16*0)
2938 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
2939 } else {
2940 // MOVK xD, h[i], LSL (16*i)
2941 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
2942 }
2943 if (p[i] != expected)
2944 return False;
2945 }
2946 return True;
2947}
2948
2949
2950/* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
2951 rD, using the given amode for the address. */
2952static UInt* do_load_or_store8 ( UInt* p,
2953 Bool isLoad, UInt wD, ARM64AMode* am )
2954{
2955 vassert(wD <= 30);
2956 if (am->tag == ARM64am_RI9) {
2957 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
2958 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
2959 */
2960 Int simm9 = am->ARM64am.RI9.simm9;
2961 vassert(-256 <= simm9 && simm9 <= 255);
2962 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
2963 simm9 & 0x1FF, X00,
2964 iregNo(am->ARM64am.RI9.reg), wD);
2965 *p++ = instr;
2966 return p;
2967 }
2968 if (am->tag == ARM64am_RI12) {
2969 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
2970 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
2971 */
2972 UInt uimm12 = am->ARM64am.RI12.uimm12;
2973 UInt scale = am->ARM64am.RI12.szB;
2974 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
2975 UInt xN = iregNo(am->ARM64am.RI12.reg);
2976 vassert(xN <= 30);
2977 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
2978 uimm12, xN, wD);
2979 *p++ = instr;
2980 return p;
2981 }
2982 if (am->tag == ARM64am_RR) {
2983 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
2984 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
2985 */
2986 UInt xN = iregNo(am->ARM64am.RR.base);
2987 UInt xM = iregNo(am->ARM64am.RR.index);
2988 vassert(xN <= 30);
2989 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
2990 xM, X011010, xN, wD);
2991 *p++ = instr;
2992 return p;
2993 }
2994 vpanic("do_load_or_store8");
2995 vassert(0);
2996}
2997
2998
2999/* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3000 rD, using the given amode for the address. */
3001static UInt* do_load_or_store16 ( UInt* p,
3002 Bool isLoad, UInt wD, ARM64AMode* am )
3003{
3004 vassert(wD <= 30);
3005 if (am->tag == ARM64am_RI9) {
3006 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3007 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3008 */
3009 Int simm9 = am->ARM64am.RI9.simm9;
3010 vassert(-256 <= simm9 && simm9 <= 255);
3011 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3012 simm9 & 0x1FF, X00,
3013 iregNo(am->ARM64am.RI9.reg), wD);
3014 *p++ = instr;
3015 return p;
3016 }
3017 if (am->tag == ARM64am_RI12) {
3018 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3019 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3020 */
3021 UInt uimm12 = am->ARM64am.RI12.uimm12;
3022 UInt scale = am->ARM64am.RI12.szB;
3023 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3024 UInt xN = iregNo(am->ARM64am.RI12.reg);
3025 vassert(xN <= 30);
3026 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3027 uimm12, xN, wD);
3028 *p++ = instr;
3029 return p;
3030 }
3031 if (am->tag == ARM64am_RR) {
3032 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3033 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3034 */
3035 UInt xN = iregNo(am->ARM64am.RR.base);
3036 UInt xM = iregNo(am->ARM64am.RR.index);
3037 vassert(xN <= 30);
3038 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3039 xM, X011010, xN, wD);
3040 *p++ = instr;
3041 return p;
3042 }
3043 vpanic("do_load_or_store16");
3044 vassert(0);
3045}
3046
3047
3048/* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3049 rD, using the given amode for the address. */
3050static UInt* do_load_or_store32 ( UInt* p,
3051 Bool isLoad, UInt wD, ARM64AMode* am )
3052{
3053 vassert(wD <= 30);
3054 if (am->tag == ARM64am_RI9) {
3055 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3056 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3057 */
3058 Int simm9 = am->ARM64am.RI9.simm9;
3059 vassert(-256 <= simm9 && simm9 <= 255);
3060 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3061 simm9 & 0x1FF, X00,
3062 iregNo(am->ARM64am.RI9.reg), wD);
3063 *p++ = instr;
3064 return p;
3065 }
3066 if (am->tag == ARM64am_RI12) {
3067 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3068 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3069 */
3070 UInt uimm12 = am->ARM64am.RI12.uimm12;
3071 UInt scale = am->ARM64am.RI12.szB;
3072 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3073 UInt xN = iregNo(am->ARM64am.RI12.reg);
3074 vassert(xN <= 30);
3075 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3076 uimm12, xN, wD);
3077 *p++ = instr;
3078 return p;
3079 }
3080 if (am->tag == ARM64am_RR) {
3081 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3082 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3083 */
3084 UInt xN = iregNo(am->ARM64am.RR.base);
3085 UInt xM = iregNo(am->ARM64am.RR.index);
3086 vassert(xN <= 30);
3087 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3088 xM, X011010, xN, wD);
3089 *p++ = instr;
3090 return p;
3091 }
3092 vpanic("do_load_or_store32");
3093 vassert(0);
3094}
3095
3096
3097/* Generate a 64 bit load or store to/from xD, using the given amode
3098 for the address. */
3099static UInt* do_load_or_store64 ( UInt* p,
3100 Bool isLoad, UInt xD, ARM64AMode* am )
3101{
3102 /* In all these cases, Rn can't be 31 since that means SP. */
3103 vassert(xD <= 30);
3104 if (am->tag == ARM64am_RI9) {
3105 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3106 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3107 */
3108 Int simm9 = am->ARM64am.RI9.simm9;
3109 vassert(-256 <= simm9 && simm9 <= 255);
3110 UInt xN = iregNo(am->ARM64am.RI9.reg);
3111 vassert(xN <= 30);
3112 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3113 simm9 & 0x1FF, X00, xN, xD);
3114 *p++ = instr;
3115 return p;
3116 }
3117 if (am->tag == ARM64am_RI12) {
3118 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3119 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3120 */
3121 UInt uimm12 = am->ARM64am.RI12.uimm12;
3122 UInt scale = am->ARM64am.RI12.szB;
3123 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3124 UInt xN = iregNo(am->ARM64am.RI12.reg);
3125 vassert(xN <= 30);
3126 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3127 uimm12, xN, xD);
3128 *p++ = instr;
3129 return p;
3130 }
3131 if (am->tag == ARM64am_RR) {
3132 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3133 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3134 */
3135 UInt xN = iregNo(am->ARM64am.RR.base);
3136 UInt xM = iregNo(am->ARM64am.RR.index);
3137 vassert(xN <= 30);
3138 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3139 xM, X011010, xN, xD);
3140 *p++ = instr;
3141 return p;
3142 }
3143 vpanic("do_load_or_store64");
3144 vassert(0);
3145}
3146
3147
3148/* Emit an instruction into buf and return the number of bytes used.
3149 Note that buf is not the insn's final place, and therefore it is
3150 imperative to emit position-independent code. If the emitted
3151 instruction was a profiler inc, set *is_profInc to True, else
3152 leave it unchanged. */
3153
3154Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
floriand8c64e02014-10-08 08:54:44 +00003155 UChar* buf, Int nbuf, const ARM64Instr* i,
sewardj9b769162014-07-24 12:42:03 +00003156 Bool mode64, VexEndness endness_host,
florian8462d112014-09-24 15:18:09 +00003157 const void* disp_cp_chain_me_to_slowEP,
3158 const void* disp_cp_chain_me_to_fastEP,
3159 const void* disp_cp_xindir,
3160 const void* disp_cp_xassisted )
sewardjbbcf1882014-01-12 12:49:10 +00003161{
3162 UInt* p = (UInt*)buf;
3163 vassert(nbuf >= 32);
3164 vassert(mode64 == True);
3165 vassert(0 == (((HWord)buf) & 3));
3166
3167 switch (i->tag) {
3168 case ARM64in_Arith: {
3169 UInt rD = iregNo(i->ARM64in.Arith.dst);
3170 UInt rN = iregNo(i->ARM64in.Arith.argL);
3171 ARM64RIA* argR = i->ARM64in.Arith.argR;
3172 switch (argR->tag) {
3173 case ARM64riA_I12:
3174 *p++ = X_2_6_2_12_5_5(
3175 i->ARM64in.Arith.isAdd ? X10 : X11,
3176 X010001,
3177 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3178 argR->ARM64riA.I12.imm12, rN, rD
3179 );
3180 break;
3181 case ARM64riA_R: {
3182 UInt rM = iregNo(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3183 *p++ = X_3_8_5_6_5_5(
3184 i->ARM64in.Arith.isAdd ? X100 : X110,
3185 X01011000, rM, X000000, rN, rD
3186 );
3187 break;
3188 }
3189 default:
3190 goto bad;
3191 }
3192 goto done;
3193 }
3194 case ARM64in_Cmp: {
3195 UInt rD = 31; /* XZR, we are going to dump the result */
3196 UInt rN = iregNo(i->ARM64in.Cmp.argL);
3197 ARM64RIA* argR = i->ARM64in.Cmp.argR;
3198 Bool is64 = i->ARM64in.Cmp.is64;
3199 switch (argR->tag) {
3200 case ARM64riA_I12:
3201 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3202 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3203 *p++ = X_2_6_2_12_5_5(
3204 is64 ? X11 : X01, X110001,
3205 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3206 argR->ARM64riA.I12.imm12, rN, rD);
3207 break;
3208 case ARM64riA_R: {
3209 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3210 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3211 UInt rM = iregNo(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3212 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3213 X01011000, rM, X000000, rN, rD);
3214 break;
3215 }
3216 default:
3217 goto bad;
3218 }
3219 goto done;
3220 }
3221 case ARM64in_Logic: {
3222 UInt rD = iregNo(i->ARM64in.Logic.dst);
3223 UInt rN = iregNo(i->ARM64in.Logic.argL);
3224 ARM64RIL* argR = i->ARM64in.Logic.argR;
3225 UInt opc = 0; /* invalid */
3226 vassert(rD < 31);
3227 vassert(rN < 31);
3228 switch (i->ARM64in.Logic.op) {
3229 case ARM64lo_OR: opc = X101; break;
3230 case ARM64lo_AND: opc = X100; break;
3231 case ARM64lo_XOR: opc = X110; break;
3232 default: break;
3233 }
3234 vassert(opc != 0);
3235 switch (argR->tag) {
3236 case ARM64riL_I13: {
3237 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3238 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3239 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3240 *p++ = X_3_6_1_6_6_5_5(
3241 opc, X100100, argR->ARM64riL.I13.bitN,
3242 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3243 rN, rD
3244 );
3245 break;
3246 }
3247 case ARM64riL_R: {
3248 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3249 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3250 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3251 UInt rM = iregNo(argR->ARM64riL.R.reg);
3252 vassert(rM < 31);
3253 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3254 break;
3255 }
3256 default:
3257 goto bad;
3258 }
3259 goto done;
3260 }
3261 case ARM64in_Test: {
3262 UInt rD = 31; /* XZR, we are going to dump the result */
3263 UInt rN = iregNo(i->ARM64in.Test.argL);
3264 ARM64RIL* argR = i->ARM64in.Test.argR;
3265 switch (argR->tag) {
3266 case ARM64riL_I13: {
3267 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3268 *p++ = X_3_6_1_6_6_5_5(
3269 X111, X100100, argR->ARM64riL.I13.bitN,
3270 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3271 rN, rD
3272 );
3273 break;
3274 }
3275 default:
3276 goto bad;
3277 }
3278 goto done;
3279 }
3280 case ARM64in_Shift: {
3281 UInt rD = iregNo(i->ARM64in.Shift.dst);
3282 UInt rN = iregNo(i->ARM64in.Shift.argL);
3283 ARM64RI6* argR = i->ARM64in.Shift.argR;
3284 vassert(rD < 31);
3285 vassert(rN < 31);
3286 switch (argR->tag) {
3287 case ARM64ri6_I6: {
3288 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3289 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3290 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3291 UInt sh = argR->ARM64ri6.I6.imm6;
3292 vassert(sh > 0 && sh < 64);
3293 switch (i->ARM64in.Shift.op) {
3294 case ARM64sh_SHL:
3295 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3296 1, 64-sh, 63-sh, rN, rD);
3297 break;
3298 case ARM64sh_SHR:
3299 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3300 break;
3301 case ARM64sh_SAR:
3302 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3303 break;
3304 default:
3305 vassert(0);
3306 }
3307 break;
3308 }
3309 case ARM64ri6_R: {
3310 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3311 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3312 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3313 UInt rM = iregNo(argR->ARM64ri6.R.reg);
3314 vassert(rM < 31);
3315 UInt subOpc = 0;
3316 switch (i->ARM64in.Shift.op) {
3317 case ARM64sh_SHL: subOpc = X001000; break;
3318 case ARM64sh_SHR: subOpc = X001001; break;
3319 case ARM64sh_SAR: subOpc = X001010; break;
3320 default: vassert(0);
3321 }
3322 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3323 break;
3324 }
3325 default:
3326 vassert(0);
3327 }
3328 goto done;
3329 }
3330 case ARM64in_Unary: {
3331 UInt rDst = iregNo(i->ARM64in.Unary.dst);
3332 UInt rSrc = iregNo(i->ARM64in.Unary.src);
3333 switch (i->ARM64in.Unary.op) {
3334 case ARM64un_CLZ:
3335 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3336 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3337 *p++ = X_3_8_5_6_5_5(X110,
3338 X11010110, X00000, X000100, rSrc, rDst);
3339 goto done;
3340 case ARM64un_NEG:
3341 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3342 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3343 *p++ = X_3_8_5_6_5_5(X110,
3344 X01011000, rSrc, X000000, X11111, rDst);
3345 goto done;
3346 case ARM64un_NOT: {
3347 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3348 *p++ = X_3_8_5_6_5_5(X101,
3349 X01010001, rSrc, X000000, X11111, rDst);
3350 goto done;
3351 }
3352 default:
3353 break;
3354 }
3355 goto bad;
3356 }
3357 case ARM64in_MovI: {
3358 /* We generate the "preferred form", ORR Xd, XZR, Xm
3359 101 01010 00 0 m 000000 11111 d
3360 */
3361 UInt instr = 0xAA0003E0;
3362 UInt d = iregNo(i->ARM64in.MovI.dst);
3363 UInt m = iregNo(i->ARM64in.MovI.src);
3364 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3365 goto done;
3366 }
3367 case ARM64in_Imm64: {
3368 p = imm64_to_iregNo( p, iregNo(i->ARM64in.Imm64.dst),
3369 i->ARM64in.Imm64.imm64 );
3370 goto done;
3371 }
3372 case ARM64in_LdSt64: {
3373 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3374 iregNo(i->ARM64in.LdSt64.rD),
3375 i->ARM64in.LdSt64.amode );
3376 goto done;
3377 }
3378 case ARM64in_LdSt32: {
3379 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3380 iregNo(i->ARM64in.LdSt32.rD),
3381 i->ARM64in.LdSt32.amode );
3382 goto done;
3383 }
3384 case ARM64in_LdSt16: {
3385 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3386 iregNo(i->ARM64in.LdSt16.rD),
3387 i->ARM64in.LdSt16.amode );
3388 goto done;
3389 }
3390 case ARM64in_LdSt8: {
3391 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3392 iregNo(i->ARM64in.LdSt8.rD),
3393 i->ARM64in.LdSt8.amode );
3394 goto done;
3395 }
sewardjbbcf1882014-01-12 12:49:10 +00003396
3397 case ARM64in_XDirect: {
3398 /* NB: what goes on here has to be very closely coordinated
3399 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3400 /* We're generating chain-me requests here, so we need to be
3401 sure this is actually allowed -- no-redir translations
3402 can't use chain-me's. Hence: */
3403 vassert(disp_cp_chain_me_to_slowEP != NULL);
3404 vassert(disp_cp_chain_me_to_fastEP != NULL);
3405
3406 /* Use ptmp for backpatching conditional jumps. */
3407 UInt* ptmp = NULL;
3408
3409 /* First off, if this is conditional, create a conditional
3410 jump over the rest of it. Or at least, leave a space for
3411 it that we will shortly fill in. */
3412 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3413 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3414 ptmp = p;
3415 *p++ = 0;
3416 }
3417
3418 /* Update the guest PC. */
3419 /* imm64 x9, dstGA */
3420 /* str x9, amPC */
3421 p = imm64_to_iregNo(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3422 p = do_load_or_store64(p, False/*!isLoad*/,
3423 /*x*/9, i->ARM64in.XDirect.amPC);
3424
3425 /* --- FIRST PATCHABLE BYTE follows --- */
3426 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3427 calling to) backs up the return address, so as to find the
3428 address of the first patchable byte. So: don't change the
3429 number of instructions (5) below. */
3430 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3431 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3432 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3433 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3434 /* blr x9 */
florian8462d112014-09-24 15:18:09 +00003435 const void* disp_cp_chain_me
sewardjbbcf1882014-01-12 12:49:10 +00003436 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3437 : disp_cp_chain_me_to_slowEP;
3438 p = imm64_to_iregNo_EXACTLY4(p, /*x*/9,
3439 Ptr_to_ULong(disp_cp_chain_me));
3440 *p++ = 0xD63F0120;
3441 /* --- END of PATCHABLE BYTES --- */
3442
3443 /* Fix up the conditional jump, if there was one. */
3444 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3445 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3446 vassert(delta > 0 && delta < 40);
3447 vassert((delta & 3) == 0);
3448 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3449 vassert(notCond <= 13); /* Neither AL nor NV */
3450 vassert(ptmp != NULL);
3451 delta = delta >> 2;
3452 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3453 }
3454 goto done;
3455 }
3456
3457 case ARM64in_XIndir: {
3458 // XIndir is more or less the same as XAssisted, except
3459 // we don't have a trc value to hand back, so there's no
3460 // write to r21
3461 /* Use ptmp for backpatching conditional jumps. */
3462 //UInt* ptmp = NULL;
3463
3464 /* First off, if this is conditional, create a conditional
3465 jump over the rest of it. Or at least, leave a space for
3466 it that we will shortly fill in. */
3467 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3468 vassert(0); //ATC
3469//ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3470//ZZ ptmp = p;
3471//ZZ *p++ = 0;
3472 }
3473
3474 /* Update the guest PC. */
3475 /* str r-dstGA, amPC */
3476 p = do_load_or_store64(p, False/*!isLoad*/,
3477 iregNo(i->ARM64in.XIndir.dstGA),
3478 i->ARM64in.XIndir.amPC);
3479
3480 /* imm64 x9, VG_(disp_cp_xindir) */
3481 /* br x9 */
3482 p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xindir));
3483 *p++ = 0xD61F0120; /* br x9 */
3484
3485 /* Fix up the conditional jump, if there was one. */
3486 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3487 vassert(0); //ATC
3488//ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3489//ZZ vassert(delta > 0 && delta < 40);
3490//ZZ vassert((delta & 3) == 0);
3491//ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3492//ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3493//ZZ delta = (delta >> 2) - 2;
3494//ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3495 }
3496 goto done;
3497 }
3498
3499 case ARM64in_XAssisted: {
3500 /* Use ptmp for backpatching conditional jumps. */
3501 UInt* ptmp = NULL;
3502
3503 /* First off, if this is conditional, create a conditional
3504 jump over the rest of it. Or at least, leave a space for
3505 it that we will shortly fill in. I think this can only
3506 ever happen when VEX is driven by the switchbacker. */
3507 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3508 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3509 ptmp = p;
3510 *p++ = 0;
3511 }
3512
3513 /* Update the guest PC. */
3514 /* str r-dstGA, amPC */
3515 p = do_load_or_store64(p, False/*!isLoad*/,
3516 iregNo(i->ARM64in.XAssisted.dstGA),
3517 i->ARM64in.XAssisted.amPC);
3518
3519 /* movw r21, $magic_number */
3520 UInt trcval = 0;
3521 switch (i->ARM64in.XAssisted.jk) {
3522 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3523 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3524 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3525 //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3526 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3527 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3528 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
sewardj05f5e012014-05-04 10:52:11 +00003529 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
sewardj65902992014-05-03 21:20:56 +00003530 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
sewardj99c1f812014-03-09 09:41:56 +00003531 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
sewardj39b51682014-11-25 12:17:53 +00003532 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
sewardjbbcf1882014-01-12 12:49:10 +00003533 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3534 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3535 /* We don't expect to see the following being assisted. */
3536 //case Ijk_Ret:
3537 //case Ijk_Call:
3538 /* fallthrough */
3539 default:
3540 ppIRJumpKind(i->ARM64in.XAssisted.jk);
3541 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3542 "unexpected jump kind");
3543 }
3544 vassert(trcval != 0);
3545 p = imm64_to_iregNo(p, /*x*/21, (ULong)trcval);
3546
3547 /* imm64 x9, VG_(disp_cp_xassisted) */
3548 /* br x9 */
3549 p = imm64_to_iregNo(p, /*x*/9, Ptr_to_ULong(disp_cp_xassisted));
3550 *p++ = 0xD61F0120; /* br x9 */
3551
3552 /* Fix up the conditional jump, if there was one. */
3553 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3554 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3555 vassert(delta > 0 && delta < 40);
3556 vassert((delta & 3) == 0);
3557 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3558 vassert(notCond <= 13); /* Neither AL nor NV */
3559 vassert(ptmp != NULL);
3560 delta = delta >> 2;
3561 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3562 }
3563 goto done;
3564 }
3565
3566 case ARM64in_CSel: {
3567 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3568 UInt dd = iregNo(i->ARM64in.CSel.dst);
3569 UInt nn = iregNo(i->ARM64in.CSel.argL);
3570 UInt mm = iregNo(i->ARM64in.CSel.argR);
3571 UInt cond = (UInt)i->ARM64in.CSel.cond;
3572 vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
3573 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
3574 goto done;
3575 }
3576
3577 case ARM64in_Call: {
3578 /* We'll use x9 as a scratch register to put the target
3579 address in. */
3580 if (i->ARM64in.Call.cond != ARM64cc_AL
3581 && i->ARM64in.Call.rloc.pri != RLPri_None) {
3582 /* The call might not happen (it isn't unconditional) and
3583 it returns a result. In this case we will need to
3584 generate a control flow diamond to put 0x555..555 in
3585 the return register(s) in the case where the call
3586 doesn't happen. If this ever becomes necessary, maybe
3587 copy code from the 32-bit ARM equivalent. Until that
3588 day, just give up. */
3589 goto bad;
3590 }
3591
3592 UInt* ptmp = NULL;
3593 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3594 /* Create a hole to put a conditional branch in. We'll
3595 patch it once we know the branch length. */
3596 ptmp = p;
3597 *p++ = 0;
3598 }
3599
3600 // x9 = &target
3601 p = imm64_to_iregNo( (UInt*)p,
3602 /*x*/9, (ULong)i->ARM64in.Call.target );
3603 // blr x9
3604 *p++ = 0xD63F0120;
3605
3606 // Patch the hole if necessary
3607 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3608 ULong dist = (ULong)(p - ptmp);
3609 /* imm64_to_iregNo produces between 1 and 4 insns, and
3610 then there's the BLR itself. Hence: */
3611 vassert(dist >= 2 && dist <= 5);
3612 vassert(ptmp != NULL);
3613 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3614 *ptmp = X_8_19_1_4(X01010100, dist, 0,
3615 1 ^ (UInt)i->ARM64in.Call.cond);
3616 } else {
3617 vassert(ptmp == NULL);
3618 }
3619
3620 goto done;
3621 }
3622
3623 case ARM64in_AddToSP: {
3624 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3625 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3626 */
3627 Int simm12 = i->ARM64in.AddToSP.simm;
3628 vassert(-4096 < simm12 && simm12 < 4096);
3629 vassert(0 == (simm12 & 0xF));
3630 if (simm12 >= 0) {
3631 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
3632 } else {
3633 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
3634 }
3635 goto done;
3636 }
3637
3638 case ARM64in_FromSP: {
3639 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3640 UInt dd = iregNo(i->ARM64in.FromSP.dst);
3641 vassert(dd < 31);
3642 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
3643 goto done;
3644 }
3645
3646 case ARM64in_Mul: {
3647 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3648 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3649 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3650 */
3651 UInt dd = iregNo(i->ARM64in.Mul.dst);
3652 UInt nn = iregNo(i->ARM64in.Mul.argL);
3653 UInt mm = iregNo(i->ARM64in.Mul.argR);
3654 vassert(dd < 31 && nn < 31 && mm < 31);
3655 switch (i->ARM64in.Mul.op) {
3656 case ARM64mul_ZX:
3657 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
3658 goto done;
sewardj7fce7cc2014-05-07 09:41:40 +00003659 case ARM64mul_SX:
3660 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
3661 goto done;
sewardjbbcf1882014-01-12 12:49:10 +00003662 case ARM64mul_PLAIN:
3663 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
3664 goto done;
3665 default:
3666 vassert(0);
3667 }
3668 goto bad;
3669 }
sewardj7d009132014-02-20 17:43:38 +00003670 case ARM64in_LdrEX: {
3671 /* 085F7C82 ldxrb w2, [x4]
3672 485F7C82 ldxrh w2, [x4]
3673 885F7C82 ldxr w2, [x4]
3674 C85F7C82 ldxr x2, [x4]
3675 */
3676 switch (i->ARM64in.LdrEX.szB) {
3677 case 1: *p++ = 0x085F7C82; goto done;
3678 case 2: *p++ = 0x485F7C82; goto done;
3679 case 4: *p++ = 0x885F7C82; goto done;
3680 case 8: *p++ = 0xC85F7C82; goto done;
3681 default: break;
3682 }
3683 goto bad;
3684 }
3685 case ARM64in_StrEX: {
3686 /* 08007C82 stxrb w0, w2, [x4]
3687 48007C82 stxrh w0, w2, [x4]
3688 88007C82 stxr w0, w2, [x4]
3689 C8007C82 stxr w0, x2, [x4]
3690 */
3691 switch (i->ARM64in.StrEX.szB) {
3692 case 1: *p++ = 0x08007C82; goto done;
3693 case 2: *p++ = 0x48007C82; goto done;
3694 case 4: *p++ = 0x88007C82; goto done;
3695 case 8: *p++ = 0xC8007C82; goto done;
3696 default: break;
3697 }
3698 goto bad;
3699 }
3700 case ARM64in_MFence: {
3701 *p++ = 0xD5033F9F; /* DSB sy */
3702 *p++ = 0xD5033FBF; /* DMB sy */
3703 *p++ = 0xD5033FDF; /* ISB */
3704 goto done;
3705 }
3706 //case ARM64in_CLREX: {
3707 // //ATC, but believed to be correct
3708 // goto bad;
3709 // *p++ = 0xD5033F5F; /* clrex */
3710 // goto done;
3711 //}
sewardjbbcf1882014-01-12 12:49:10 +00003712 case ARM64in_VLdStS: {
3713 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
3714 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
3715 */
3716 UInt sD = dregNo(i->ARM64in.VLdStS.sD);
3717 UInt rN = iregNo(i->ARM64in.VLdStS.rN);
3718 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
3719 Bool isLD = i->ARM64in.VLdStS.isLoad;
3720 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
3721 uimm12 >>= 2;
3722 vassert(uimm12 < (1<<12));
3723 vassert(sD < 32);
3724 vassert(rN < 31);
3725 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
3726 uimm12, rN, sD);
3727 goto done;
3728 }
3729 case ARM64in_VLdStD: {
3730 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
3731 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
3732 */
3733 UInt dD = dregNo(i->ARM64in.VLdStD.dD);
3734 UInt rN = iregNo(i->ARM64in.VLdStD.rN);
3735 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
3736 Bool isLD = i->ARM64in.VLdStD.isLoad;
3737 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
3738 uimm12 >>= 3;
3739 vassert(uimm12 < (1<<12));
3740 vassert(dD < 32);
3741 vassert(rN < 31);
3742 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
3743 uimm12, rN, dD);
3744 goto done;
3745 }
3746 case ARM64in_VLdStQ: {
3747 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
3748 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
3749 */
3750 UInt rQ = qregNo(i->ARM64in.VLdStQ.rQ);
3751 UInt rN = iregNo(i->ARM64in.VLdStQ.rN);
3752 vassert(rQ < 32);
3753 vassert(rN < 31);
3754 if (i->ARM64in.VLdStQ.isLoad) {
3755 *p++ = 0x4C407C00 | (rN << 5) | rQ;
3756 } else {
3757 *p++ = 0x4C007C00 | (rN << 5) | rQ;
3758 }
3759 goto done;
3760 }
3761 case ARM64in_VCvtI2F: {
3762 /* 31 28 23 21 20 18 15 9 4
3763 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
3764 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
sewardj606c4ba2014-01-26 19:11:14 +00003765 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003766 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
3767 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
3768 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
3769 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
3770 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
3771 */
3772 UInt rN = iregNo(i->ARM64in.VCvtI2F.rS);
3773 UInt rD = dregNo(i->ARM64in.VCvtI2F.rD);
3774 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
3775 /* Just handle cases as they show up. */
3776 switch (how) {
3777 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
3778 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
3779 break;
3780 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
3781 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
3782 break;
3783 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
3784 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
3785 break;
3786 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
3787 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
3788 break;
sewardj1eaaec22014-03-07 22:52:19 +00003789 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
3790 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
3791 break;
sewardjbbcf1882014-01-12 12:49:10 +00003792 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
3793 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
3794 break;
3795 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
3796 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
3797 break;
3798 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
3799 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
3800 break;
3801 default:
3802 goto bad; //ATC
3803 }
3804 goto done;
3805 }
3806 case ARM64in_VCvtF2I: {
3807 /* 30 23 20 18 15 9 4
3808 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
3809 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
3810 ---------------- 01 -------------- FCVTP-------- (round to +inf)
3811 ---------------- 10 -------------- FCVTM-------- (round to -inf)
3812 ---------------- 11 -------------- FCVTZ-------- (round to zero)
3813
3814 Rd is Xd when sf==1, Wd when sf==0
3815 Fn is Dn when x==1, Sn when x==0
3816 20:19 carry the rounding mode, using the same encoding as FPCR
3817 */
3818 UInt rD = iregNo(i->ARM64in.VCvtF2I.rD);
3819 UInt rN = dregNo(i->ARM64in.VCvtF2I.rS);
3820 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
3821 UChar armRM = i->ARM64in.VCvtF2I.armRM;
3822 /* Just handle cases as they show up. */
3823 switch (how) {
3824 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
3825 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
3826 X000000, rN, rD);
3827 break;
3828 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
3829 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
3830 X000000, rN, rD);
3831 break;
3832 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
3833 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
3834 X000000, rN, rD);
3835 break;
3836 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
3837 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
3838 X000000, rN, rD);
3839 break;
sewardjbbcf1882014-01-12 12:49:10 +00003840 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
3841 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
3842 X000000, rN, rD);
3843 break;
sewardj1eaaec22014-03-07 22:52:19 +00003844 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
3845 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
3846 X000000, rN, rD);
3847 break;
3848 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
3849 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
3850 X000000, rN, rD);
3851 break;
sewardjbbcf1882014-01-12 12:49:10 +00003852 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
3853 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
3854 X000000, rN, rD);
3855 break;
3856 default:
3857 goto bad; //ATC
3858 }
3859 goto done;
3860 }
3861 case ARM64in_VCvtSD: {
3862 /* 31 23 21 16 14 9 4
3863 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
3864 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
3865 Rounding, when dst is smaller than src, is per the FPCR.
3866 */
3867 UInt dd = dregNo(i->ARM64in.VCvtSD.dst);
3868 UInt nn = dregNo(i->ARM64in.VCvtSD.src);
3869 if (i->ARM64in.VCvtSD.sToD) {
3870 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
3871 } else {
3872 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
3873 }
3874 goto done;
3875 }
sewardjbbcf1882014-01-12 12:49:10 +00003876 case ARM64in_VUnaryD: {
3877 /* 31 23 21 16 14 9 4
3878 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
3879 ------------------- 0,1 --------- FABS ------
3880 ------------------- 1,0 --------- FNEG ------
3881 ------------------- 1,1 --------- FQSRT -----
3882 */
3883 UInt dD = dregNo(i->ARM64in.VUnaryD.dst);
3884 UInt dN = dregNo(i->ARM64in.VUnaryD.src);
3885 UInt b16 = 2; /* impossible */
3886 UInt b15 = 2; /* impossible */
3887 switch (i->ARM64in.VUnaryD.op) {
3888 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
3889 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
3890 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
3891 default: break;
3892 }
3893 if (b16 < 2 && b15 < 2) {
3894 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
3895 (b15 << 5) | X10000, dN, dD);
3896 goto done;
3897 }
3898 /*
3899 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
3900 */
3901 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
3902 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
3903 goto done;
3904 }
3905 goto bad;
3906 }
3907 case ARM64in_VUnaryS: {
3908 /* 31 23 21 16 14 9 4
3909 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
3910 ------------------- 0,1 --------- FABS ------
3911 ------------------- 1,0 --------- FNEG ------
3912 ------------------- 1,1 --------- FQSRT -----
3913 */
3914 UInt sD = dregNo(i->ARM64in.VUnaryS.dst);
3915 UInt sN = dregNo(i->ARM64in.VUnaryS.src);
3916 UInt b16 = 2; /* impossible */
3917 UInt b15 = 2; /* impossible */
3918 switch (i->ARM64in.VUnaryS.op) {
3919 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
3920 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
3921 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
3922 default: break;
3923 }
3924 if (b16 < 2 && b15 < 2) {
3925 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
3926 (b15 << 5) | X10000, sN, sD);
3927 goto done;
3928 }
3929 /*
3930 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
3931 */
3932 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
3933 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
3934 goto done;
3935 }
3936 goto bad;
3937 }
3938 case ARM64in_VBinD: {
3939 /* 31 23 20 15 11 9 4
3940 ---------------- 0000 ------ FMUL --------
3941 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
3942 ---------------- 0010 ------ FADD --------
3943 ---------------- 0011 ------ FSUB --------
3944 */
3945 UInt dD = dregNo(i->ARM64in.VBinD.dst);
3946 UInt dN = dregNo(i->ARM64in.VBinD.argL);
3947 UInt dM = dregNo(i->ARM64in.VBinD.argR);
3948 UInt b1512 = 16; /* impossible */
3949 switch (i->ARM64in.VBinD.op) {
3950 case ARM64fpb_DIV: b1512 = X0001; break;
3951 case ARM64fpb_MUL: b1512 = X0000; break;
3952 case ARM64fpb_SUB: b1512 = X0011; break;
3953 case ARM64fpb_ADD: b1512 = X0010; break;
3954 default: goto bad;
3955 }
3956 vassert(b1512 < 16);
3957 *p++
3958 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
3959 goto done;
3960 }
3961 case ARM64in_VBinS: {
3962 /* 31 23 20 15 11 9 4
3963 ---------------- 0000 ------ FMUL --------
3964 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
3965 ---------------- 0010 ------ FADD --------
3966 ---------------- 0011 ------ FSUB --------
3967 */
3968 UInt sD = dregNo(i->ARM64in.VBinS.dst);
3969 UInt sN = dregNo(i->ARM64in.VBinS.argL);
3970 UInt sM = dregNo(i->ARM64in.VBinS.argR);
3971 UInt b1512 = 16; /* impossible */
3972 switch (i->ARM64in.VBinS.op) {
3973 case ARM64fpb_DIV: b1512 = X0001; break;
3974 case ARM64fpb_MUL: b1512 = X0000; break;
3975 case ARM64fpb_SUB: b1512 = X0011; break;
3976 case ARM64fpb_ADD: b1512 = X0010; break;
3977 default: goto bad;
3978 }
3979 vassert(b1512 < 16);
3980 *p++
3981 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
3982 goto done;
3983 }
3984 case ARM64in_VCmpD: {
3985 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
3986 UInt dN = dregNo(i->ARM64in.VCmpD.argL);
3987 UInt dM = dregNo(i->ARM64in.VCmpD.argR);
3988 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
3989 goto done;
3990 }
3991 case ARM64in_VCmpS: {
3992 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
3993 UInt sN = dregNo(i->ARM64in.VCmpS.argL);
3994 UInt sM = dregNo(i->ARM64in.VCmpS.argR);
3995 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
3996 goto done;
3997 }
sewardje23ec112014-11-15 16:07:14 +00003998 case ARM64in_VFCSel: {
3999 /* 31 23 21 20 15 11 9 5
4000 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4001 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4002 */
4003 Bool isD = i->ARM64in.VFCSel.isD;
4004 UInt dd = dregNo(i->ARM64in.VFCSel.dst);
4005 UInt nn = dregNo(i->ARM64in.VFCSel.argL);
4006 UInt mm = dregNo(i->ARM64in.VFCSel.argR);
4007 UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4008 vassert(cond < 16);
4009 *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4010 mm, (cond << 2) | X000011, nn, dd);
4011 goto done;
4012 }
sewardj606c4ba2014-01-26 19:11:14 +00004013 case ARM64in_FPCR: {
4014 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4015 UInt iReg = iregNo(i->ARM64in.FPCR.iReg);
4016 if (toFPCR) {
4017 /* 0xD51B44 000 Rt MSR fpcr, rT */
4018 *p++ = 0xD51B4400 | (iReg & 0x1F);
4019 goto done;
4020 }
4021 goto bad; // FPCR -> iReg case currently ATC
4022 }
sewardj12972182014-08-04 08:09:47 +00004023 case ARM64in_FPSR: {
4024 Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4025 UInt iReg = iregNo(i->ARM64in.FPSR.iReg);
4026 if (toFPSR) {
4027 /* 0xD51B44 001 Rt MSR fpsr, rT */
4028 *p++ = 0xD51B4420 | (iReg & 0x1F);
4029 } else {
4030 /* 0xD53B44 001 Rt MRS rT, fpsr */
4031 *p++ = 0xD53B4420 | (iReg & 0x1F);
4032 }
4033 goto done;
4034 }
sewardj606c4ba2014-01-26 19:11:14 +00004035 case ARM64in_VBinV: {
4036 /* 31 23 20 15 9 4
sewardj93013432014-04-27 12:02:12 +00004037 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4038 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4039 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
sewardj92d0ae32014-04-03 13:48:54 +00004040 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
sewardjf5b08912014-02-06 12:57:58 +00004041
sewardj93013432014-04-27 12:02:12 +00004042 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4043 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4044 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
sewardj92d0ae32014-04-03 13:48:54 +00004045 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
sewardjf5b08912014-02-06 12:57:58 +00004046
sewardj93013432014-04-27 12:02:12 +00004047 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4048 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4049 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
sewardjf5b08912014-02-06 12:57:58 +00004050
sewardj606c4ba2014-01-26 19:11:14 +00004051 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
sewardjecde6972014-02-05 11:01:19 +00004052 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
sewardj606c4ba2014-01-26 19:11:14 +00004053 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
sewardjecde6972014-02-05 11:01:19 +00004054 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
sewardjf5b08912014-02-06 12:57:58 +00004055
sewardj606c4ba2014-01-26 19:11:14 +00004056 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
sewardjecde6972014-02-05 11:01:19 +00004057 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
sewardj606c4ba2014-01-26 19:11:14 +00004058 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
sewardjecde6972014-02-05 11:01:19 +00004059 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
sewardjf5b08912014-02-06 12:57:58 +00004060
sewardj76927e62014-11-17 11:21:21 +00004061 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4062 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4063 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4064 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4065
sewardjfab09142014-02-10 10:28:13 +00004066 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4067 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4068 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4069
4070 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4071 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4072 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
sewardjf5b08912014-02-06 12:57:58 +00004073
sewardj9b1cf5e2014-03-01 11:16:57 +00004074 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4075 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4076 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
sewardjfab09142014-02-10 10:28:13 +00004077
sewardj9b1cf5e2014-03-01 11:16:57 +00004078 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4079 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4080 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
sewardjf5b08912014-02-06 12:57:58 +00004081
sewardjecde6972014-02-05 11:01:19 +00004082 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4083 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
sewardje520bb32014-02-17 11:00:53 +00004084 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4085
sewardj505a27d2014-03-10 10:40:48 +00004086 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4087 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4088 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4089 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4090
sewardj93013432014-04-27 12:02:12 +00004091 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4092 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4093 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4094 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4095
4096 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4097 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4098 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4099 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
sewardj2bd1ffe2014-03-27 18:59:00 +00004100
4101 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4102 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4103
4104 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4105 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4106
4107 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4108 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
sewardj92d0ae32014-04-03 13:48:54 +00004109
4110 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4111
sewardjd96daf62014-06-15 08:17:35 +00004112 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4113 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4114 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4115 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4116
4117 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4118 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4119 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4120 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4121
4122 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4123 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4124 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4125
4126 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4127 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4128 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
sewardj168c8bd2014-06-25 13:05:23 +00004129
4130 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
sewardj31b5a952014-06-26 07:41:14 +00004131
4132 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
sewardj6f312d02014-06-28 12:21:37 +00004133
4134 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4135 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4136 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4137
4138 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4139 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4140 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
sewardja5a6b752014-06-30 07:33:56 +00004141
4142 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4143 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4144 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4145 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4146
4147 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4148 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4149 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4150 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4151
4152 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4153 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4154 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4155 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4156
4157 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4158 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4159 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4160 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
sewardj51d012a2014-07-21 09:19:50 +00004161
4162 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4163 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
sewardj54ffa1d2014-07-22 09:27:49 +00004164
4165 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4166 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4167 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4168 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
sewardj12972182014-08-04 08:09:47 +00004169
4170 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4171 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4172 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4173 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
sewardja6b61f02014-08-17 18:32:14 +00004174
4175 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4176 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4177 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4178 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
sewardj606c4ba2014-01-26 19:11:14 +00004179 */
4180 UInt vD = qregNo(i->ARM64in.VBinV.dst);
4181 UInt vN = qregNo(i->ARM64in.VBinV.argL);
4182 UInt vM = qregNo(i->ARM64in.VBinV.argR);
4183 switch (i->ARM64in.VBinV.op) {
4184 case ARM64vecb_ADD64x2:
4185 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
4186 break;
sewardjf5b08912014-02-06 12:57:58 +00004187 case ARM64vecb_ADD32x4:
4188 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
4189 break;
4190 case ARM64vecb_ADD16x8:
4191 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
4192 break;
sewardj92d0ae32014-04-03 13:48:54 +00004193 case ARM64vecb_ADD8x16:
4194 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
4195 break;
sewardj606c4ba2014-01-26 19:11:14 +00004196 case ARM64vecb_SUB64x2:
4197 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
4198 break;
4199 case ARM64vecb_SUB32x4:
4200 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
4201 break;
4202 case ARM64vecb_SUB16x8:
4203 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
4204 break;
sewardj92d0ae32014-04-03 13:48:54 +00004205 case ARM64vecb_SUB8x16:
4206 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
4207 break;
sewardjf5b08912014-02-06 12:57:58 +00004208 case ARM64vecb_MUL32x4:
4209 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
4210 break;
4211 case ARM64vecb_MUL16x8:
4212 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
4213 break;
sewardj93013432014-04-27 12:02:12 +00004214 case ARM64vecb_MUL8x16:
4215 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
4216 break;
sewardj606c4ba2014-01-26 19:11:14 +00004217 case ARM64vecb_FADD64x2:
4218 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
4219 break;
sewardjecde6972014-02-05 11:01:19 +00004220 case ARM64vecb_FADD32x4:
4221 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
4222 break;
sewardj606c4ba2014-01-26 19:11:14 +00004223 case ARM64vecb_FSUB64x2:
4224 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
4225 break;
sewardjecde6972014-02-05 11:01:19 +00004226 case ARM64vecb_FSUB32x4:
4227 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
4228 break;
sewardj606c4ba2014-01-26 19:11:14 +00004229 case ARM64vecb_FMUL64x2:
4230 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
4231 break;
sewardjecde6972014-02-05 11:01:19 +00004232 case ARM64vecb_FMUL32x4:
4233 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
4234 break;
sewardj606c4ba2014-01-26 19:11:14 +00004235 case ARM64vecb_FDIV64x2:
4236 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
4237 break;
sewardjecde6972014-02-05 11:01:19 +00004238 case ARM64vecb_FDIV32x4:
4239 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
4240 break;
sewardjf5b08912014-02-06 12:57:58 +00004241
sewardj76927e62014-11-17 11:21:21 +00004242 case ARM64vecb_FMAX64x2:
4243 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
4244 break;
4245 case ARM64vecb_FMAX32x4:
4246 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
4247 break;
4248 case ARM64vecb_FMIN64x2:
4249 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
4250 break;
4251 case ARM64vecb_FMIN32x4:
4252 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
4253 break;
4254
sewardjecde6972014-02-05 11:01:19 +00004255 case ARM64vecb_UMAX32x4:
4256 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
4257 break;
4258 case ARM64vecb_UMAX16x8:
4259 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
4260 break;
sewardjfab09142014-02-10 10:28:13 +00004261 case ARM64vecb_UMAX8x16:
4262 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
4263 break;
4264
sewardjecde6972014-02-05 11:01:19 +00004265 case ARM64vecb_UMIN32x4:
4266 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
4267 break;
4268 case ARM64vecb_UMIN16x8:
4269 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
4270 break;
sewardjfab09142014-02-10 10:28:13 +00004271 case ARM64vecb_UMIN8x16:
4272 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
4273 break;
sewardjf5b08912014-02-06 12:57:58 +00004274
4275 case ARM64vecb_SMAX32x4:
4276 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
4277 break;
4278 case ARM64vecb_SMAX16x8:
4279 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
4280 break;
sewardj9b1cf5e2014-03-01 11:16:57 +00004281 case ARM64vecb_SMAX8x16:
4282 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
4283 break;
sewardjfab09142014-02-10 10:28:13 +00004284
sewardjf5b08912014-02-06 12:57:58 +00004285 case ARM64vecb_SMIN32x4:
4286 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
4287 break;
4288 case ARM64vecb_SMIN16x8:
4289 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
4290 break;
sewardj9b1cf5e2014-03-01 11:16:57 +00004291 case ARM64vecb_SMIN8x16:
4292 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
4293 break;
sewardjf5b08912014-02-06 12:57:58 +00004294
sewardjecde6972014-02-05 11:01:19 +00004295 case ARM64vecb_AND:
4296 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
4297 break;
sewardje520bb32014-02-17 11:00:53 +00004298 case ARM64vecb_ORR:
4299 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
4300 break;
4301 case ARM64vecb_XOR:
4302 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
4303 break;
4304
4305 case ARM64vecb_CMEQ64x2:
4306 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
4307 break;
sewardj505a27d2014-03-10 10:40:48 +00004308 case ARM64vecb_CMEQ32x4:
4309 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
4310 break;
4311 case ARM64vecb_CMEQ16x8:
4312 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
4313 break;
4314 case ARM64vecb_CMEQ8x16:
4315 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
4316 break;
sewardje520bb32014-02-17 11:00:53 +00004317
sewardj93013432014-04-27 12:02:12 +00004318 case ARM64vecb_CMHI64x2:
4319 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
4320 break;
4321 case ARM64vecb_CMHI32x4:
4322 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
4323 break;
4324 case ARM64vecb_CMHI16x8:
4325 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
4326 break;
4327 case ARM64vecb_CMHI8x16:
4328 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
4329 break;
4330
4331 case ARM64vecb_CMGT64x2:
4332 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
4333 break;
4334 case ARM64vecb_CMGT32x4:
4335 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
4336 break;
4337 case ARM64vecb_CMGT16x8:
4338 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
4339 break;
4340 case ARM64vecb_CMGT8x16:
4341 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
4342 break;
4343
sewardj2bd1ffe2014-03-27 18:59:00 +00004344 case ARM64vecb_FCMEQ64x2:
4345 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
4346 break;
4347 case ARM64vecb_FCMEQ32x4:
4348 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
4349 break;
4350
4351 case ARM64vecb_FCMGE64x2:
4352 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
4353 break;
4354 case ARM64vecb_FCMGE32x4:
4355 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
4356 break;
4357
4358 case ARM64vecb_FCMGT64x2:
4359 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
4360 break;
4361 case ARM64vecb_FCMGT32x4:
4362 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
4363 break;
sewardj92d0ae32014-04-03 13:48:54 +00004364
4365 case ARM64vecb_TBL1:
sewardjd96daf62014-06-15 08:17:35 +00004366 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
4367 break;
4368
4369 case ARM64vecb_UZP164x2:
4370 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
4371 break;
4372 case ARM64vecb_UZP132x4:
4373 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
4374 break;
4375 case ARM64vecb_UZP116x8:
4376 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
4377 break;
4378 case ARM64vecb_UZP18x16:
4379 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
4380 break;
4381
4382 case ARM64vecb_UZP264x2:
4383 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
4384 break;
4385 case ARM64vecb_UZP232x4:
4386 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
4387 break;
4388 case ARM64vecb_UZP216x8:
4389 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
4390 break;
4391 case ARM64vecb_UZP28x16:
4392 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
4393 break;
4394
4395 case ARM64vecb_ZIP132x4:
4396 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
4397 break;
4398 case ARM64vecb_ZIP116x8:
4399 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
4400 break;
4401 case ARM64vecb_ZIP18x16:
4402 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
4403 break;
4404
4405 case ARM64vecb_ZIP232x4:
4406 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
4407 break;
4408 case ARM64vecb_ZIP216x8:
4409 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
4410 break;
4411 case ARM64vecb_ZIP28x16:
4412 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
sewardj92d0ae32014-04-03 13:48:54 +00004413 break;
4414
sewardj168c8bd2014-06-25 13:05:23 +00004415 case ARM64vecb_PMUL8x16:
4416 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
4417 break;
4418
sewardj31b5a952014-06-26 07:41:14 +00004419 case ARM64vecb_PMULL8x8:
4420 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
4421 break;
4422
sewardj6f312d02014-06-28 12:21:37 +00004423 case ARM64vecb_UMULL2DSS:
4424 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
4425 break;
4426 case ARM64vecb_UMULL4SHH:
4427 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
4428 break;
4429 case ARM64vecb_UMULL8HBB:
4430 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
4431 break;
4432
4433 case ARM64vecb_SMULL2DSS:
4434 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
4435 break;
4436 case ARM64vecb_SMULL4SHH:
4437 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
4438 break;
4439 case ARM64vecb_SMULL8HBB:
4440 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
4441 break;
4442
sewardja5a6b752014-06-30 07:33:56 +00004443 case ARM64vecb_SQADD64x2:
4444 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
4445 break;
4446 case ARM64vecb_SQADD32x4:
4447 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
4448 break;
4449 case ARM64vecb_SQADD16x8:
4450 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
4451 break;
4452 case ARM64vecb_SQADD8x16:
4453 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
4454 break;
4455
4456 case ARM64vecb_UQADD64x2:
4457 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
4458 break;
4459 case ARM64vecb_UQADD32x4:
4460 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
4461 break;
4462 case ARM64vecb_UQADD16x8:
4463 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
4464 break;
4465 case ARM64vecb_UQADD8x16:
4466 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
4467 break;
4468
4469 case ARM64vecb_SQSUB64x2:
4470 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
4471 break;
4472 case ARM64vecb_SQSUB32x4:
4473 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
4474 break;
4475 case ARM64vecb_SQSUB16x8:
4476 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
4477 break;
4478 case ARM64vecb_SQSUB8x16:
4479 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
4480 break;
4481
4482 case ARM64vecb_UQSUB64x2:
4483 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
4484 break;
4485 case ARM64vecb_UQSUB32x4:
4486 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
4487 break;
4488 case ARM64vecb_UQSUB16x8:
4489 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
4490 break;
4491 case ARM64vecb_UQSUB8x16:
4492 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
4493 break;
4494
sewardj51d012a2014-07-21 09:19:50 +00004495 case ARM64vecb_SQDMULL2DSS:
4496 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
4497 break;
4498 case ARM64vecb_SQDMULL4SHH:
4499 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
4500 break;
4501
sewardj54ffa1d2014-07-22 09:27:49 +00004502 case ARM64vecb_SQDMULH32x4:
4503 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
4504 break;
4505 case ARM64vecb_SQDMULH16x8:
4506 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
4507 break;
4508 case ARM64vecb_SQRDMULH32x4:
4509 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
4510 break;
4511 case ARM64vecb_SQRDMULH16x8:
4512 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
4513 break;
4514
sewardj12972182014-08-04 08:09:47 +00004515 case ARM64vecb_SQSHL64x2:
4516 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
4517 break;
4518 case ARM64vecb_SQSHL32x4:
4519 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
4520 break;
4521 case ARM64vecb_SQSHL16x8:
4522 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
4523 break;
4524 case ARM64vecb_SQSHL8x16:
4525 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
4526 break;
4527
4528 case ARM64vecb_SQRSHL64x2:
4529 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
4530 break;
4531 case ARM64vecb_SQRSHL32x4:
4532 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
4533 break;
4534 case ARM64vecb_SQRSHL16x8:
4535 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
4536 break;
4537 case ARM64vecb_SQRSHL8x16:
4538 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
4539 break;
4540
4541 case ARM64vecb_UQSHL64x2:
4542 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
4543 break;
4544 case ARM64vecb_UQSHL32x4:
4545 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
4546 break;
4547 case ARM64vecb_UQSHL16x8:
4548 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
4549 break;
4550 case ARM64vecb_UQSHL8x16:
4551 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
4552 break;
4553
4554 case ARM64vecb_UQRSHL64x2:
4555 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
4556 break;
4557 case ARM64vecb_UQRSHL32x4:
4558 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
4559 break;
4560 case ARM64vecb_UQRSHL16x8:
4561 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
4562 break;
4563 case ARM64vecb_UQRSHL8x16:
4564 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
4565 break;
4566
sewardja6b61f02014-08-17 18:32:14 +00004567 case ARM64vecb_SSHL64x2:
4568 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
4569 break;
4570 case ARM64vecb_SSHL32x4:
4571 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
4572 break;
4573 case ARM64vecb_SSHL16x8:
4574 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
4575 break;
4576 case ARM64vecb_SSHL8x16:
4577 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
4578 break;
4579
4580 case ARM64vecb_SRSHL64x2:
4581 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
4582 break;
4583 case ARM64vecb_SRSHL32x4:
4584 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
4585 break;
4586 case ARM64vecb_SRSHL16x8:
4587 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
4588 break;
4589 case ARM64vecb_SRSHL8x16:
4590 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
4591 break;
4592
4593 case ARM64vecb_USHL64x2:
4594 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
4595 break;
4596 case ARM64vecb_USHL32x4:
4597 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
4598 break;
4599 case ARM64vecb_USHL16x8:
4600 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
4601 break;
4602 case ARM64vecb_USHL8x16:
4603 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
4604 break;
4605
4606 case ARM64vecb_URSHL64x2:
4607 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
4608 break;
4609 case ARM64vecb_URSHL32x4:
4610 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
4611 break;
4612 case ARM64vecb_URSHL16x8:
4613 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
4614 break;
4615 case ARM64vecb_URSHL8x16:
4616 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
4617 break;
4618
sewardj606c4ba2014-01-26 19:11:14 +00004619 default:
4620 goto bad;
4621 }
4622 goto done;
4623 }
sewardjf7003bc2014-08-18 12:28:02 +00004624 case ARM64in_VModifyV: {
4625 /* 31 23 20 15 9 4
4626 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
4627 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
4628 */
4629 UInt vD = qregNo(i->ARM64in.VModifyV.mod);
4630 UInt vN = qregNo(i->ARM64in.VModifyV.arg);
4631 switch (i->ARM64in.VModifyV.op) {
4632 case ARM64vecmo_SUQADD64x2:
4633 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
4634 break;
4635 case ARM64vecmo_SUQADD32x4:
4636 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
4637 break;
4638 case ARM64vecmo_SUQADD16x8:
4639 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
4640 break;
4641 case ARM64vecmo_SUQADD8x16:
4642 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
4643 break;
4644 case ARM64vecmo_USQADD64x2:
4645 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
4646 break;
4647 case ARM64vecmo_USQADD32x4:
4648 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
4649 break;
4650 case ARM64vecmo_USQADD16x8:
4651 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
4652 break;
4653 case ARM64vecmo_USQADD8x16:
4654 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
4655 break;
4656 default:
4657 goto bad;
4658 }
4659 goto done;
4660 }
sewardjfab09142014-02-10 10:28:13 +00004661 case ARM64in_VUnaryV: {
4662 /* 31 23 20 15 9 4
sewardje520bb32014-02-17 11:00:53 +00004663 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
4664 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
4665 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
4666 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
4667 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
sewardj25523c42014-06-15 19:36:29 +00004668
4669 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
4670 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
4671 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
4672 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
sewardj2b6fd5e2014-06-19 14:21:37 +00004673
4674 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
4675 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
4676 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
4677
4678 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
4679 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
4680 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
4681
4682 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
sewardj715d1622014-06-26 12:39:05 +00004683
4684 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
sewardj715d1622014-06-26 12:39:05 +00004685 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
sewardjdf9d6d52014-06-27 10:43:22 +00004686 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
4687 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
4688
4689 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
4690 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
4691 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
sewardjfc261d92014-08-24 20:36:14 +00004692
4693 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
4694 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
sewardjfab09142014-02-10 10:28:13 +00004695 */
4696 UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
4697 UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
4698 switch (i->ARM64in.VUnaryV.op) {
sewardje520bb32014-02-17 11:00:53 +00004699 case ARM64vecu_FABS64x2:
4700 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
4701 break;
sewardj2bd1ffe2014-03-27 18:59:00 +00004702 case ARM64vecu_FABS32x4:
4703 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
4704 break;
sewardjfab09142014-02-10 10:28:13 +00004705 case ARM64vecu_FNEG64x2:
4706 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
4707 break;
sewardj950ca7a2014-04-03 23:03:32 +00004708 case ARM64vecu_FNEG32x4:
4709 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
4710 break;
sewardje520bb32014-02-17 11:00:53 +00004711 case ARM64vecu_NOT:
4712 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
4713 break;
sewardj25523c42014-06-15 19:36:29 +00004714 case ARM64vecu_ABS64x2:
4715 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
4716 break;
4717 case ARM64vecu_ABS32x4:
4718 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
4719 break;
4720 case ARM64vecu_ABS16x8:
4721 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
4722 break;
4723 case ARM64vecu_ABS8x16:
4724 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
4725 break;
sewardj2b6fd5e2014-06-19 14:21:37 +00004726 case ARM64vecu_CLS32x4:
4727 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
4728 break;
4729 case ARM64vecu_CLS16x8:
4730 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
4731 break;
4732 case ARM64vecu_CLS8x16:
4733 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
4734 break;
4735 case ARM64vecu_CLZ32x4:
4736 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
4737 break;
4738 case ARM64vecu_CLZ16x8:
4739 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
4740 break;
4741 case ARM64vecu_CLZ8x16:
4742 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
4743 break;
4744 case ARM64vecu_CNT8x16:
4745 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
4746 break;
sewardj715d1622014-06-26 12:39:05 +00004747 case ARM64vecu_RBIT:
4748 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
4749 break;
4750 case ARM64vecu_REV1616B:
4751 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
4752 break;
sewardjdf9d6d52014-06-27 10:43:22 +00004753 case ARM64vecu_REV3216B:
4754 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
4755 break;
4756 case ARM64vecu_REV328H:
4757 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
4758 break;
4759 case ARM64vecu_REV6416B:
4760 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
4761 break;
4762 case ARM64vecu_REV648H:
4763 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
4764 break;
4765 case ARM64vecu_REV644S:
4766 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
4767 break;
sewardjfc261d92014-08-24 20:36:14 +00004768 case ARM64vecu_URECPE32x4:
4769 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
4770 break;
4771 case ARM64vecu_URSQRTE32x4:
4772 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
4773 break;
sewardjfab09142014-02-10 10:28:13 +00004774 default:
4775 goto bad;
4776 }
4777 goto done;
4778 }
sewardj606c4ba2014-01-26 19:11:14 +00004779 case ARM64in_VNarrowV: {
4780 /* 31 23 21 15 9 4
4781 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
4782 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
4783 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
sewardjecedd982014-08-11 14:02:47 +00004784
4785 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
4786 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
4787 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
4788
4789 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
4790 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
4791 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
4792
4793 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
4794 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
4795 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
sewardj606c4ba2014-01-26 19:11:14 +00004796 */
4797 UInt vD = qregNo(i->ARM64in.VNarrowV.dst);
4798 UInt vN = qregNo(i->ARM64in.VNarrowV.src);
4799 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
4800 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
sewardjecedd982014-08-11 14:02:47 +00004801 switch (i->ARM64in.VNarrowV.op) {
4802 case ARM64vecna_XTN:
4803 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
4804 X00001, X001010, vN, vD);
4805 goto done;
4806 case ARM64vecna_SQXTUN:
4807 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
4808 X00001, X001010, vN, vD);
4809 goto done;
4810 case ARM64vecna_SQXTN:
4811 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
4812 X00001, X010010, vN, vD);
4813 goto done;
4814 case ARM64vecna_UQXTN:
4815 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
4816 X00001, X010010, vN, vD);
4817 goto done;
4818 default:
4819 break;
4820 }
4821 goto bad;
sewardj606c4ba2014-01-26 19:11:14 +00004822 }
sewardje520bb32014-02-17 11:00:53 +00004823 case ARM64in_VShiftImmV: {
4824 /*
sewardjecedd982014-08-11 14:02:47 +00004825 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
4826 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
4827
4828 001 011110 immh immb 100101 n d UQSHRN ,,#sh
4829 000 011110 immh immb 100101 n d SQSHRN ,,#sh
4830 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
4831
4832 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
4833 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
4834 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
sewardja97dddf2014-08-14 22:26:52 +00004835
sewardje520bb32014-02-17 11:00:53 +00004836 where immh:immb
4837 = case T of
sewardjecedd982014-08-11 14:02:47 +00004838 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
4839 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
4840 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
4841 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
sewardj32d86752014-03-02 12:47:18 +00004842
sewardja97dddf2014-08-14 22:26:52 +00004843 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
4844
4845 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
4846 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
4847 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
4848
sewardj32d86752014-03-02 12:47:18 +00004849 where immh:immb
4850 = case T of
sewardjecedd982014-08-11 14:02:47 +00004851 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
4852 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
4853 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
4854 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
sewardje520bb32014-02-17 11:00:53 +00004855 */
sewardjecedd982014-08-11 14:02:47 +00004856 UInt vD = qregNo(i->ARM64in.VShiftImmV.dst);
4857 UInt vN = qregNo(i->ARM64in.VShiftImmV.src);
4858 UInt sh = i->ARM64in.VShiftImmV.amt;
4859 UInt tmpl = 0; /* invalid */
4860
4861 const UInt tmpl_USHR
4862 = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
4863 const UInt tmpl_SSHR
4864 = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
4865
4866 const UInt tmpl_UQSHRN
4867 = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
4868 const UInt tmpl_SQSHRN
4869 = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
4870 const UInt tmpl_SQSHRUN
4871 = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
4872
4873 const UInt tmpl_UQRSHRN
4874 = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
4875 const UInt tmpl_SQRSHRN
4876 = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
4877 const UInt tmpl_SQRSHRUN
4878 = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
4879
4880 const UInt tmpl_SHL
4881 = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
4882
sewardja97dddf2014-08-14 22:26:52 +00004883 const UInt tmpl_UQSHL
4884 = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
4885 const UInt tmpl_SQSHL
4886 = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
4887 const UInt tmpl_SQSHLU
4888 = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
4889
sewardjecedd982014-08-11 14:02:47 +00004890 switch (i->ARM64in.VShiftImmV.op) {
sewardja6b61f02014-08-17 18:32:14 +00004891 case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
4892 case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
4893 case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
4894 case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
4895 case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
4896 case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
4897 case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
4898 case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
4899 case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
4900 case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
4901 case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
4902 case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
4903 case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
4904 case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
4905 case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
4906 case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
4907 case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
4908 case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
4909 case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
4910 case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
4911 case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
4912 case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
4913 case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
4914 case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
4915 case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
4916 case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
4917 case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
4918 case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
4919 case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
4920 case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
4921 case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
4922 case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
4923 case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
4924 case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
4925 case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
4926 case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
4927 case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
4928 case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
4929 case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
4930 case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
4931 case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
4932 case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
sewardjecedd982014-08-11 14:02:47 +00004933
4934 default: break;
4935
4936 right64x2:
sewardje520bb32014-02-17 11:00:53 +00004937 if (sh >= 1 && sh <= 63) {
sewardjecedd982014-08-11 14:02:47 +00004938 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
sewardje520bb32014-02-17 11:00:53 +00004939 goto done;
4940 }
4941 break;
sewardjecedd982014-08-11 14:02:47 +00004942 right32x4:
4943 if (sh >= 1 && sh <= 32) {
4944 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
4945 goto done;
4946 }
4947 break;
4948 right16x8:
4949 if (sh >= 1 && sh <= 16) {
4950 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
4951 goto done;
4952 }
4953 break;
4954 right8x16:
4955 if (sh >= 1 && sh <= 8) {
4956 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
4957 goto done;
4958 }
4959 break;
4960
4961 left64x2:
sewardja97dddf2014-08-14 22:26:52 +00004962 if (sh >= 0 && sh <= 63) {
sewardjecedd982014-08-11 14:02:47 +00004963 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
sewardj93013432014-04-27 12:02:12 +00004964 goto done;
4965 }
4966 break;
sewardjecedd982014-08-11 14:02:47 +00004967 left32x4:
sewardja97dddf2014-08-14 22:26:52 +00004968 if (sh >= 0 && sh <= 31) {
sewardjecedd982014-08-11 14:02:47 +00004969 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
sewardj93013432014-04-27 12:02:12 +00004970 goto done;
4971 }
4972 break;
sewardjecedd982014-08-11 14:02:47 +00004973 left16x8:
sewardja97dddf2014-08-14 22:26:52 +00004974 if (sh >= 0 && sh <= 15) {
sewardjecedd982014-08-11 14:02:47 +00004975 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
sewardj1eaaec22014-03-07 22:52:19 +00004976 goto done;
4977 }
4978 break;
sewardjecedd982014-08-11 14:02:47 +00004979 left8x16:
sewardja97dddf2014-08-14 22:26:52 +00004980 if (sh >= 0 && sh <= 7) {
sewardjecedd982014-08-11 14:02:47 +00004981 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
sewardj93013432014-04-27 12:02:12 +00004982 goto done;
4983 }
4984 break;
sewardje520bb32014-02-17 11:00:53 +00004985 }
4986 goto bad;
4987 }
sewardjab33a7a2014-06-19 22:20:47 +00004988 case ARM64in_VExtV: {
4989 /*
4990 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
4991 where imm4 = the shift amount, in bytes,
4992 Vn is low operand, Vm is high operand
4993 */
4994 UInt vD = qregNo(i->ARM64in.VExtV.dst);
4995 UInt vN = qregNo(i->ARM64in.VExtV.srcLo);
4996 UInt vM = qregNo(i->ARM64in.VExtV.srcHi);
4997 UInt imm4 = i->ARM64in.VExtV.amtB;
4998 vassert(imm4 >= 1 && imm4 <= 15);
4999 *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5000 X000000 | (imm4 << 1), vN, vD);
5001 goto done;
5002 }
sewardjbbcf1882014-01-12 12:49:10 +00005003 case ARM64in_VImmQ: {
5004 UInt rQ = qregNo(i->ARM64in.VImmQ.rQ);
5005 UShort imm = i->ARM64in.VImmQ.imm;
sewardj208a7762014-10-22 13:52:51 +00005006 vassert(rQ < 32);
5007 switch (imm) {
5008 case 0x0000:
5009 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5010 *p++ = 0x4F000400 | rQ;
5011 goto done;
5012 case 0x0001:
5013 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5014 *p++ = 0x2F00E420 | rQ;
5015 goto done;
5016 case 0x0003:
5017 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5018 *p++ = 0x2F00E460 | rQ;
5019 goto done;
5020 case 0x000F:
5021 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5022 *p++ = 0x2F00E5E0 | rQ;
5023 goto done;
5024 case 0x003F:
5025 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5026 *p++ = 0x2F01E7E0 | rQ;
5027 goto done;
5028 case 0x00FF:
5029 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5030 *p++ = 0x2F07E7E0 | rQ;
5031 goto done;
5032 case 0xFFFF:
5033 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5034 *p++ = 0x6F000400 | rQ;
5035 goto done;
5036 default:
5037 break;
sewardj8e91fd42014-07-11 12:05:47 +00005038 }
sewardjecde6972014-02-05 11:01:19 +00005039 goto bad; /* no other handled cases right now */
sewardjbbcf1882014-01-12 12:49:10 +00005040 }
5041
5042 case ARM64in_VDfromX: {
5043 /* INS Vd.D[0], rX
5044 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5045 This isn't wonderful, in the sense that the upper half of
5046 the vector register stays unchanged and thus the insn is
5047 data dependent on its output register. */
5048 UInt dd = dregNo(i->ARM64in.VDfromX.rD);
5049 UInt xx = iregNo(i->ARM64in.VDfromX.rX);
5050 vassert(xx < 31);
5051 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5052 goto done;
5053 }
5054
sewardj12972182014-08-04 08:09:47 +00005055 case ARM64in_VQfromX: {
5056 /* FMOV D, X
5057 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5058 I think this zeroes out the top half of the destination, which
5059 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5060 UInt dd = qregNo(i->ARM64in.VQfromX.rQ);
5061 UInt xx = iregNo(i->ARM64in.VQfromX.rXlo);
5062 vassert(xx < 31);
5063 *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5064 goto done;
5065 }
5066
sewardjbbcf1882014-01-12 12:49:10 +00005067 case ARM64in_VQfromXX: {
5068 /* What we really generate is a two insn sequence:
5069 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5070 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5071 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5072 */
5073 UInt qq = qregNo(i->ARM64in.VQfromXX.rQ);
5074 UInt xhi = iregNo(i->ARM64in.VQfromXX.rXhi);
5075 UInt xlo = iregNo(i->ARM64in.VQfromXX.rXlo);
5076 vassert(xhi < 31 && xlo < 31);
5077 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5078 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5079 goto done;
5080 }
5081
5082 case ARM64in_VXfromQ: {
5083 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5084 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5085 */
5086 UInt dd = iregNo(i->ARM64in.VXfromQ.rX);
5087 UInt nn = qregNo(i->ARM64in.VXfromQ.rQ);
5088 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5089 vassert(dd < 31);
5090 vassert(laneNo < 2);
5091 *p++ = X_3_8_5_6_5_5(X010, X01110000,
5092 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5093 goto done;
5094 }
5095
sewardj85fbb022014-06-12 13:16:01 +00005096 case ARM64in_VXfromDorS: {
5097 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5098 100 11110011 00110 000000 n d FMOV Xd, Dn
5099 */
5100 UInt dd = iregNo(i->ARM64in.VXfromDorS.rX);
5101 UInt nn = dregNo(i->ARM64in.VXfromDorS.rDorS);
5102 Bool fromD = i->ARM64in.VXfromDorS.fromD;
5103 vassert(dd < 31);
5104 *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
5105 fromD ? X11110011 : X11110001,
5106 X00110, X000000, nn, dd);
5107 goto done;
5108 }
5109
sewardjbbcf1882014-01-12 12:49:10 +00005110 case ARM64in_VMov: {
5111 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5112 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5113 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5114 */
5115 HReg rD = i->ARM64in.VMov.dst;
5116 HReg rN = i->ARM64in.VMov.src;
5117 switch (i->ARM64in.VMov.szB) {
sewardja97dddf2014-08-14 22:26:52 +00005118 case 16: {
5119 UInt dd = qregNo(rD);
5120 UInt nn = qregNo(rN);
5121 *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
5122 goto done;
5123 }
sewardjbbcf1882014-01-12 12:49:10 +00005124 case 8: {
5125 UInt dd = dregNo(rD);
5126 UInt nn = dregNo(rN);
5127 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
5128 goto done;
5129 }
5130 default:
5131 break;
5132 }
5133 goto bad;
5134 }
sewardjbbcf1882014-01-12 12:49:10 +00005135
5136 case ARM64in_EvCheck: {
5137 /* The sequence is fixed (canned) except for the two amodes
5138 supplied by the insn. These don't change the length, though.
5139 We generate:
5140 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5141 subs w9, w9, #1
5142 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5143 bpl nofail
5144 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5145 br x9
5146 nofail:
5147 */
5148 UInt* p0 = p;
5149 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
5150 i->ARM64in.EvCheck.amCounter);
5151 *p++ = 0x71000529; /* subs w9, w9, #1 */
5152 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
5153 i->ARM64in.EvCheck.amCounter);
5154 *p++ = 0x54000065; /* bpl nofail */
5155 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
5156 i->ARM64in.EvCheck.amFailAddr);
5157 *p++ = 0xD61F0120; /* br x9 */
5158 /* nofail: */
5159
5160 /* Crosscheck */
sewardj9b769162014-07-24 12:42:03 +00005161 vassert(evCheckSzB_ARM64(endness_host) == (UChar*)p - (UChar*)p0);
sewardjbbcf1882014-01-12 12:49:10 +00005162 goto done;
5163 }
5164
sewardj0ad37a92014-08-29 21:58:03 +00005165 case ARM64in_ProfInc: {
5166 /* We generate:
5167 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5168 expectation that a later call to LibVEX_patchProfCtr
5169 will be used to fill in the immediate fields once the
5170 right value is known.)
5171 imm64-exactly4 x9, 0x6555'7555'8555'9566
5172 ldr x8, [x9]
5173 add x8, x8, #1
5174 str x8, [x9]
5175 */
5176 p = imm64_to_iregNo_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
5177 *p++ = 0xF9400128;
5178 *p++ = 0x91000508;
5179 *p++ = 0xF9000128;
5180 /* Tell the caller .. */
5181 vassert(!(*is_profInc));
5182 *is_profInc = True;
5183 goto done;
5184 }
sewardjbbcf1882014-01-12 12:49:10 +00005185
5186 /* ... */
5187 default:
5188 goto bad;
5189 }
5190
5191 bad:
5192 ppARM64Instr(i);
5193 vpanic("emit_ARM64Instr");
5194 /*NOTREACHED*/
5195
5196 done:
5197 vassert(((UChar*)p) - &buf[0] <= 36);
5198 return ((UChar*)p) - &buf[0];
5199}
5200
5201
5202/* How big is an event check? See case for ARM64in_EvCheck in
5203 emit_ARM64Instr just above. That crosschecks what this returns, so
5204 we can tell if we're inconsistent. */
sewardj9b769162014-07-24 12:42:03 +00005205Int evCheckSzB_ARM64 ( VexEndness endness_host )
sewardjbbcf1882014-01-12 12:49:10 +00005206{
5207 return 24;
5208}
5209
5210
5211/* NB: what goes on here has to be very closely coordinated with the
5212 emitInstr case for XDirect, above. */
sewardj9b769162014-07-24 12:42:03 +00005213VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
5214 void* place_to_chain,
florian7d6f81d2014-09-22 21:43:37 +00005215 const void* disp_cp_chain_me_EXPECTED,
5216 const void* place_to_jump_to )
sewardjbbcf1882014-01-12 12:49:10 +00005217{
sewardj9b769162014-07-24 12:42:03 +00005218 vassert(endness_host == VexEndnessLE);
5219
sewardjbbcf1882014-01-12 12:49:10 +00005220 /* What we're expecting to see is:
5221 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5222 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5223 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5224 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5225 blr x9
5226 viz
5227 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
5228 D6 3F 01 20
5229 */
5230 UInt* p = (UInt*)place_to_chain;
5231 vassert(0 == (3 & (HWord)p));
5232 vassert(is_imm64_to_iregNo_EXACTLY4(
5233 p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
5234 vassert(p[4] == 0xD63F0120);
sewardjc6acaa42014-02-19 17:42:59 +00005235
sewardjbbcf1882014-01-12 12:49:10 +00005236 /* And what we want to change it to is:
5237 movw x9, place_to_jump_to[15:0]
5238 movk x9, place_to_jump_to[31:15], lsl 16
5239 movk x9, place_to_jump_to[47:32], lsl 32
5240 movk x9, place_to_jump_to[63:48], lsl 48
5241 br x9
5242 viz
5243 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
5244 D6 1F 01 20
5245
5246 The replacement has the same length as the original.
5247 */
sewardjbbcf1882014-01-12 12:49:10 +00005248 (void)imm64_to_iregNo_EXACTLY4(
5249 p, /*x*/9, Ptr_to_ULong(place_to_jump_to));
5250 p[4] = 0xD61F0120;
5251
5252 VexInvalRange vir = {(HWord)p, 20};
5253 return vir;
5254}
5255
5256
sewardjc6acaa42014-02-19 17:42:59 +00005257/* NB: what goes on here has to be very closely coordinated with the
5258 emitInstr case for XDirect, above. */
sewardj9b769162014-07-24 12:42:03 +00005259VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
5260 void* place_to_unchain,
florian7d6f81d2014-09-22 21:43:37 +00005261 const void* place_to_jump_to_EXPECTED,
5262 const void* disp_cp_chain_me )
sewardjc6acaa42014-02-19 17:42:59 +00005263{
sewardj9b769162014-07-24 12:42:03 +00005264 vassert(endness_host == VexEndnessLE);
5265
sewardjc6acaa42014-02-19 17:42:59 +00005266 /* What we're expecting to see is:
5267 movw x9, place_to_jump_to_EXPECTED[15:0]
5268 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5269 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5270 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5271 br x9
5272 viz
5273 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
5274 D6 1F 01 20
5275 */
5276 UInt* p = (UInt*)place_to_unchain;
5277 vassert(0 == (3 & (HWord)p));
5278 vassert(is_imm64_to_iregNo_EXACTLY4(
5279 p, /*x*/9, Ptr_to_ULong(place_to_jump_to_EXPECTED)));
5280 vassert(p[4] == 0xD61F0120);
5281
5282 /* And what we want to change it to is:
5283 movw x9, disp_cp_chain_me_to[15:0]
5284 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5285 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5286 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5287 blr x9
5288 viz
5289 <16 bytes generated by imm64_to_iregNo_EXACTLY4>
5290 D6 3F 01 20
5291 */
5292 (void)imm64_to_iregNo_EXACTLY4(
5293 p, /*x*/9, Ptr_to_ULong(disp_cp_chain_me));
5294 p[4] = 0xD63F0120;
5295
5296 VexInvalRange vir = {(HWord)p, 20};
5297 return vir;
5298}
5299
5300
sewardj0ad37a92014-08-29 21:58:03 +00005301/* Patch the counter address into a profile inc point, as previously
5302 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
5303VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
5304 void* place_to_patch,
florian7d6f81d2014-09-22 21:43:37 +00005305 const ULong* location_of_counter )
sewardj0ad37a92014-08-29 21:58:03 +00005306{
5307 vassert(sizeof(ULong*) == 8);
5308 vassert(endness_host == VexEndnessLE);
5309 UInt* p = (UInt*)place_to_patch;
5310 vassert(0 == (3 & (HWord)p));
5311 vassert(is_imm64_to_iregNo_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
5312 vassert(p[4] == 0xF9400128);
5313 vassert(p[5] == 0x91000508);
5314 vassert(p[6] == 0xF9000128);
5315 imm64_to_iregNo_EXACTLY4(p, /*x*/9,
5316 Ptr_to_ULong(location_of_counter));
5317 VexInvalRange vir = {(HWord)p, 4*4};
5318 return vir;
5319}
sewardjbbcf1882014-01-12 12:49:10 +00005320
5321/*---------------------------------------------------------------*/
5322/*--- end host_arm64_defs.c ---*/
5323/*---------------------------------------------------------------*/