blob: d2a8571cda99d5a28bd0c8f70403dbb4711e85e7 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, an x86 protected-mode emulator
9 designed for debugging and profiling binaries on x86-Unixes.
10
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
13 Julian_Seward@muraroa.demon.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file LICENSE.
31*/
32
33#include "vg_include.h"
34
35
36/*------------------------------------------------------------*/
37/*--- Renamings of frequently-used global functions. ---*/
38/*------------------------------------------------------------*/
39
40#define uInstr1 VG_(newUInstr1)
41#define uInstr2 VG_(newUInstr2)
42#define uInstr3 VG_(newUInstr3)
43#define dis VG_(disassemble)
44#define nameIReg VG_(nameOfIntReg)
45#define nameISize VG_(nameOfIntSize)
46#define uLiteral VG_(setLiteralField)
47#define newTemp VG_(getNewTemp)
48#define newShadow VG_(getNewShadow)
49
50
51/*------------------------------------------------------------*/
52/*--- Memory management for the translater. ---*/
53/*------------------------------------------------------------*/
54
55#define N_JITBLOCKS 4
56#define N_JITBLOCK_SZ 5000
57
58static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
59static Bool jitstorage_inuse[N_JITBLOCKS];
60static Bool jitstorage_initdone = False;
61
62static __inline__ void jitstorage_initialise ( void )
63{
64 Int i;
65 if (jitstorage_initdone) return;
66 jitstorage_initdone = True;
67 for (i = 0; i < N_JITBLOCKS; i++)
68 jitstorage_inuse[i] = False;
69}
70
71void* VG_(jitmalloc) ( Int nbytes )
72{
73 Int i;
74 jitstorage_initialise();
75 if (nbytes > N_JITBLOCK_SZ) {
76 /* VG_(printf)("too large: %d\n", nbytes); */
77 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
78 }
79 for (i = 0; i < N_JITBLOCKS; i++) {
80 if (!jitstorage_inuse[i]) {
81 jitstorage_inuse[i] = True;
82 /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
83 return & jitstorage[i][0];
84 }
85 }
86 VG_(panic)("out of slots in vg_jitmalloc\n");
87 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
88}
89
90void VG_(jitfree) ( void* ptr )
91{
92 Int i;
93 jitstorage_initialise();
94 for (i = 0; i < N_JITBLOCKS; i++) {
95 if (ptr == & jitstorage[i][0]) {
96 vg_assert(jitstorage_inuse[i]);
97 jitstorage_inuse[i] = False;
98 return;
99 }
100 }
101 VG_(free)(VG_AR_PRIVATE, ptr);
102}
103
104/*------------------------------------------------------------*/
105/*--- Basics ---*/
106/*------------------------------------------------------------*/
107
108static UCodeBlock* allocCodeBlock ( void )
109{
110 UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
111 cb->used = cb->size = cb->nextTemp = 0;
112 cb->instrs = NULL;
113 return cb;
114}
115
116
117static void freeCodeBlock ( UCodeBlock* cb )
118{
119 if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
120 VG_(free)(VG_AR_PRIVATE, cb);
121}
122
123
124/* Ensure there's enough space in a block to add one uinstr. */
125static __inline__
126void ensureUInstr ( UCodeBlock* cb )
127{
128 if (cb->used == cb->size) {
129 if (cb->instrs == NULL) {
130 vg_assert(cb->size == 0);
131 vg_assert(cb->used == 0);
132 cb->size = 8;
133 cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
134 } else {
135 Int i;
136 UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
137 2 * sizeof(UInstr) * cb->size);
138 for (i = 0; i < cb->used; i++)
139 instrs2[i] = cb->instrs[i];
140 cb->size *= 2;
141 VG_(free)(VG_AR_PRIVATE, cb->instrs);
142 cb->instrs = instrs2;
143 }
144 }
145
146 vg_assert(cb->used < cb->size);
147}
148
149
150__inline__
151void VG_(emptyUInstr) ( UInstr* u )
152{
153 u->val1 = u->val2 = u->val3 = 0;
154 u->tag1 = u->tag2 = u->tag3 = NoValue;
155 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000156 u->jmpkind = JmpBoring;
157 u->smc_check = u->signed_widen = False;
sewardjde4a1d02002-03-22 01:27:54 +0000158 u->lit32 = 0;
159 u->opcode = 0;
160 u->size = 0;
161 u->cond = 0;
162 u->extra4b = 0;
163}
164
165
166/* Add an instruction to a ucode block, and return the index of the
167 instruction. */
168__inline__
169void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
170 Tag tag1, UInt val1,
171 Tag tag2, UInt val2,
172 Tag tag3, UInt val3 )
173{
174 UInstr* ui;
175 ensureUInstr(cb);
176 ui = & cb->instrs[cb->used];
177 cb->used++;
178 VG_(emptyUInstr)(ui);
179 ui->val1 = val1;
180 ui->val2 = val2;
181 ui->val3 = val3;
182 ui->opcode = opcode;
183 ui->tag1 = tag1;
184 ui->tag2 = tag2;
185 ui->tag3 = tag3;
186 ui->size = sz;
187 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
188 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
189 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
190}
191
192
193__inline__
194void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
195 Tag tag1, UInt val1,
196 Tag tag2, UInt val2 )
197{
198 UInstr* ui;
199 ensureUInstr(cb);
200 ui = & cb->instrs[cb->used];
201 cb->used++;
202 VG_(emptyUInstr)(ui);
203 ui->val1 = val1;
204 ui->val2 = val2;
205 ui->opcode = opcode;
206 ui->tag1 = tag1;
207 ui->tag2 = tag2;
208 ui->size = sz;
209 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
210 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
211}
212
213
214__inline__
215void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
216 Tag tag1, UInt val1 )
217{
218 UInstr* ui;
219 ensureUInstr(cb);
220 ui = & cb->instrs[cb->used];
221 cb->used++;
222 VG_(emptyUInstr)(ui);
223 ui->val1 = val1;
224 ui->opcode = opcode;
225 ui->tag1 = tag1;
226 ui->size = sz;
227 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
228}
229
230
231__inline__
232void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
233{
234 UInstr* ui;
235 ensureUInstr(cb);
236 ui = & cb->instrs[cb->used];
237 cb->used++;
238 VG_(emptyUInstr)(ui);
239 ui->opcode = opcode;
240 ui->size = sz;
241}
242
243
244/* Copy an instruction into the given codeblock. */
245static __inline__
246void copyUInstr ( UCodeBlock* cb, UInstr* instr )
247{
248 ensureUInstr(cb);
249 cb->instrs[cb->used] = *instr;
250 cb->used++;
251}
252
253
254/* Copy auxiliary info from one uinstr to another. */
255static __inline__
256void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
257{
258 dst->cond = src->cond;
259 dst->extra4b = src->extra4b;
260 dst->smc_check = src->smc_check;
261 dst->signed_widen = src->signed_widen;
sewardj2e93c502002-04-12 11:12:52 +0000262 dst->jmpkind = src->jmpkind;
sewardjde4a1d02002-03-22 01:27:54 +0000263 dst->flags_r = src->flags_r;
264 dst->flags_w = src->flags_w;
265}
266
267
268/* Set the flag R/W sets on a uinstr. */
269void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
270{
271 /* VG_(ppUInstr)(-1,u); */
272 vg_assert(fr == (fr & FlagsALL));
273 vg_assert(fw == (fw & FlagsALL));
274 u->flags_r = fr;
275 u->flags_w = fw;
276}
277
278
279/* Set the lit32 field of the most recent uinsn. */
280void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
281{
282 LAST_UINSTR(cb).lit32 = lit32;
283}
284
285
286Bool VG_(anyFlagUse) ( UInstr* u )
287{
288 return (u->flags_r != FlagsEmpty
289 || u->flags_w != FlagsEmpty);
290}
291
292
293
294
295/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
296 register number. This effectively defines the order in which real
297 registers are allocated. %ebp is excluded since it is permanently
298 reserved for pointing at VG_(baseBlock). %edi is a general spare
299 temp used for Left4 and various misc tag ops.
300
301 Important! If you change the set of allocatable registers from
302 %eax, %ebx, %ecx, %edx, %esi you must change the
303 save/restore sequences in vg_helper_smc_check4 to match!
304*/
305__inline__ Int VG_(rankToRealRegNo) ( Int rank )
306{
307 switch (rank) {
308# if 1
309 /* Probably the best allocation ordering. */
310 case 0: return R_EAX;
311 case 1: return R_EBX;
312 case 2: return R_ECX;
313 case 3: return R_EDX;
314 case 4: return R_ESI;
315# else
316 /* Contrary; probably the worst. Helpful for debugging, tho. */
317 case 4: return R_EAX;
318 case 3: return R_EBX;
319 case 2: return R_ECX;
320 case 1: return R_EDX;
321 case 0: return R_ESI;
322# endif
323 default: VG_(panic)("rankToRealRegNo");
324 }
325}
326
327
328/*------------------------------------------------------------*/
329/*--- Sanity checking uinstrs. ---*/
330/*------------------------------------------------------------*/
331
332/* This seems as good a place as any to record some important stuff
333 about ucode semantics.
334
335 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
336 TempReg are defined to zero-extend the loaded value to 32 bits.
337 This is needed to make the translation of movzbl et al work
338 properly.
339
340 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
341
342 * Arithmetic on TempRegs is at the specified size. For example,
343 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
344 being emitted -- not a 32 bit one.
345
346 * On some insns we allow the cc bit to be set. If so, the
347 intention is that the simulated machine's %eflags register
348 is copied into that of the real machine before the insn,
349 and copied back again afterwards. This means that the
350 code generated for that insn must be very careful only to
351 update %eflags in the intended way. This is particularly
352 important for the routines referenced by CALL insns.
353*/
354
355/* Meaning of operand kinds is as follows:
356
357 ArchReg is a register of the simulated CPU, stored in memory,
358 in vg_m_state.m_eax .. m_edi. These values are stored
359 using the Intel register encoding.
360
361 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
362 available for allocation. As with ArchRegs, these values
363 are stored using the Intel register encoding.
364
365 TempReg is a temporary register used to express the results of
366 disassembly. There is an unlimited supply of them --
367 register allocation and spilling eventually assigns them
368 to RealRegs.
369
370 SpillNo is a spill slot number. The number of required spill
371 slots is VG_MAX_PSEUDOS, in general. Only allowed
372 as the ArchReg operand of GET and PUT.
373
374 Lit16 is a signed 16-bit literal value.
375
376 Literal is a 32-bit literal value. Each uinstr can only hold
377 one of these.
378
379 The disassembled code is expressed purely in terms of ArchReg,
380 TempReg and Literal operands. Eventually, register allocation
381 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
382 and Literals. New x86 code can easily be synthesised from this.
383 There are carefully designed restrictions on which insns can have
384 which operands, intended to make it possible to generate x86 code
385 from the result of register allocation on the ucode efficiently and
386 without need of any further RealRegs.
387
388 Restrictions on insns (as generated by the disassembler) are as
389 follows:
390
391 A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
392 N=NoValue
393
394 GETF T N N
395 PUTF T N N
396
397 GET A,S T N
398 PUT T A,S N
399 LOAD T T N
400 STORE T T N
401 MOV T,L T N
402 CMOV T T N
403 WIDEN T N N
404 JMP T,L N N
405 CALLM L N N
406 CALLM_S N N N
407 CALLM_E N N N
408 PUSH,POP T N N
409 CLEAR L N N
410
411 AND, OR
412 T T N
413
414 ADD, ADC, XOR, SUB, SBB
415 A,L,T T N
416
417 SHL, SHR, SAR, ROL, ROR, RCL, RCR
418 L,T T N
419
420 NOT, NEG, INC, DEC, CC2VAL, BSWAP
421 T N N
422
423 JIFZ T L N
424
425 FPU_R L T N
426 FPU_W L T N
427 FPU L T N
428
429 LEA1 T T (const in a seperate field)
430 LEA2 T T T (const & shift ditto)
431
432 INCEIP L N N
433
434 and for instrumentation insns:
435
436 LOADV T T N
437 STOREV T,L T N
438 GETV A T N
439 PUTV T,L A N
440 GETVF T N N
441 PUTVF T N N
442 WIDENV T N N
443 TESTV A,T N N
444 SETV A,T N N
445 TAG1 T N N
446 TAG2 T T N
447
448 Before register allocation, S operands should not appear anywhere.
449 After register allocation, all T operands should have been
450 converted into Rs, and S operands are allowed in GET and PUT --
451 denoting spill saves/restores.
452
453 The size field should be 0 for insns for which it is meaningless,
454 ie those which do not directly move/operate on data.
455*/
456Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
457{
458# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
459# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
460# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
461# define A1 (u->tag1 == ArchReg)
462# define A2 (u->tag2 == ArchReg)
463# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
464# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
465# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
466# define L1 (u->tag1 == Literal && u->val1 == 0)
467# define L2 (u->tag2 == Literal && u->val2 == 0)
468# define Ls1 (u->tag1 == Lit16)
469# define Ls3 (u->tag3 == Lit16)
470# define N1 (u->tag1 == NoValue)
471# define N2 (u->tag2 == NoValue)
472# define N3 (u->tag3 == NoValue)
473# define SZ4 (u->size == 4)
474# define SZ2 (u->size == 2)
475# define SZ1 (u->size == 1)
476# define SZ0 (u->size == 0)
477# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
478# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
479# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj4a7456e2002-03-24 13:52:19 +0000480# define FLG_WR_MAYBE (u->flags_r == FlagsEmpty && \
481 (u->flags_w == FlagsEmpty || u->flags_w == FlagsZCP))
sewardjde4a1d02002-03-22 01:27:54 +0000482# define CC1 (!(CC0))
483# define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
484 ? (u->size == 4) : True)
485
486 Int n_lits = 0;
487 if (u->tag1 == Literal) n_lits++;
488 if (u->tag2 == Literal) n_lits++;
489 if (u->tag3 == Literal) n_lits++;
490 if (n_lits > 1)
491 return False;
492
493 switch (u->opcode) {
494 case GETF:
495 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
496 case PUTF:
497 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
498 case CALLM_S: case CALLM_E:
499 return SZ0 && N1 && N2 && N3;
500 case INCEIP:
501 return SZ0 && CC0 && Ls1 && N2 && N3;
502 case LEA1:
503 return CC0 && TR1 && TR2 && N3 && SZ4;
504 case LEA2:
505 return CC0 && TR1 && TR2 && TR3 && SZ4;
506 case NOP:
507 return SZ0 && CC0 && N1 && N2 && N3;
508 case GET:
509 return CC0 && AS1 && TR2 && N3;
510 case PUT:
511 return CC0 && TR1 && AS2 && N3;
512 case LOAD: case STORE:
513 return CC0 && TR1 && TR2 && N3;
514 case MOV:
515 return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
516 case CMOV:
517 return CC1 && TR1 && TR2 && N3 && SZ4;
518 case JMP:
519 return (u->cond==CondAlways ? CC0 : CC1)
520 && (TR1 || L1) && N2 && SZ0 && N3;
521 case CLEAR:
522 return CC0 && Ls1 && N2 && SZ0 && N3;
523 case CALLM:
524 return SZ0 && Ls1 && N2 && N3;
525 case PUSH: case POP:
526 return CC0 && TR1 && N2 && N3;
527 case AND: case OR:
528 return TR1 && TR2 && N3;
529 case ADD: case ADC: case XOR: case SUB: case SBB:
530 return (A1 || TR1 || L1) && TR2 && N3;
531 case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
532 return (TR1 || L1) && TR2 && N3;
533 case NOT: case NEG: case INC: case DEC:
534 return TR1 && N2 && N3;
535 case BSWAP:
536 return TR1 && N2 && N3 && CC0 && SZ4;
537 case CC2VAL:
538 return CC1 && SZ1 && TR1 && N2 && N3;
539 case JIFZ:
540 return CC0 && SZ4 && TR1 && L2 && N3;
541 case FPU_R: case FPU_W:
542 return CC0 && Ls1 && TR2 && N3;
543 case FPU:
sewardj4a7456e2002-03-24 13:52:19 +0000544 return SZ0 && FLG_WR_MAYBE && Ls1 && N2 && N3;
sewardjde4a1d02002-03-22 01:27:54 +0000545 case LOADV:
546 return CC0 && TR1 && TR2 && N3;
547 case STOREV:
548 return CC0 && (TR1 || L1) && TR2 && N3;
549 case GETV:
550 return CC0 && A1 && TR2 && N3;
551 case PUTV:
552 return CC0 && (TR1 || L1) && A2 && N3;
553 case GETVF:
554 return CC0 && TR1 && N2 && N3 && SZ0;
555 case PUTVF:
556 return CC0 && TR1 && N2 && N3 && SZ0;
557 case WIDEN:
558 return CC0 && TR1 && N2 && N3;
559 case TESTV:
560 return CC0 && (A1 || TR1) && N2 && N3;
561 case SETV:
562 return CC0 && (A1 || TR1) && N2 && N3;
563 case TAG1:
564 return CC0 && TR1 && N2 && Ls3 && SZ0;
565 case TAG2:
566 return CC0 && TR1 && TR2 && Ls3 && SZ0;
567 default:
568 VG_(panic)("vg_saneUInstr: unhandled opcode");
569 }
570# undef SZ4_IF_TR1
571# undef CC0
572# undef CC1
573# undef SZ4
574# undef SZ2
575# undef SZ1
576# undef SZ0
577# undef TR1
578# undef TR2
579# undef TR3
580# undef A1
581# undef A2
582# undef AS1
583# undef AS2
584# undef AS3
585# undef L1
586# undef Ls1
587# undef L2
588# undef Ls3
589# undef N1
590# undef N2
591# undef N3
592# undef FLG_RD
593# undef FLG_WR
sewardj4a7456e2002-03-24 13:52:19 +0000594# undef FLG_WR_MAYBE
sewardjde4a1d02002-03-22 01:27:54 +0000595}
596
597
598/* Sanity checks to do with CALLMs in UCodeBlocks. */
599Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
600{
601 Int callm = 0;
602 Int callm_s = 0;
603 Int callm_e = 0;
604 Int callm_ptr, calls_ptr;
605 Int i, j, t;
606 Bool incall = False;
607
608 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
609
610 for (i = 0; i < cb->used; i++) {
611 switch (cb->instrs[i].opcode) {
612 case CALLM:
613 if (!incall) return False;
614 callm++;
615 break;
616 case CALLM_S:
617 if (incall) return False;
618 incall = True;
619 callm_s++;
620 break;
621 case CALLM_E:
622 if (!incall) return False;
623 incall = False;
624 callm_e++;
625 break;
626 case PUSH: case POP: case CLEAR:
627 if (!incall) return False;
628 break;
629 default:
630 break;
631 }
632 }
633 if (incall) return False;
634 if (callm != callm_s || callm != callm_e) return False;
635
636 /* Check the sections between CALLM_S and CALLM's. Ensure that no
637 PUSH uinsn pushes any TempReg that any other PUSH in the same
638 section pushes. Ie, check that the TempReg args to PUSHes in
639 the section are unique. If not, the instrumenter generates
640 incorrect code for CALLM insns. */
641
642 callm_ptr = 0;
643
644 find_next_CALLM:
645 /* Search for the next interval, making calls_ptr .. callm_ptr
646 bracket it. */
647 while (callm_ptr < cb->used
648 && cb->instrs[callm_ptr].opcode != CALLM)
649 callm_ptr++;
650 if (callm_ptr == cb->used)
651 return True;
652 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
653
654 calls_ptr = callm_ptr - 1;
655 while (cb->instrs[calls_ptr].opcode != CALLM_S)
656 calls_ptr--;
657 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
658 vg_assert(calls_ptr >= 0);
659
660 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
661
662 /* For each PUSH insn in the interval ... */
663 for (i = calls_ptr + 1; i < callm_ptr; i++) {
664 if (cb->instrs[i].opcode != PUSH) continue;
665 t = cb->instrs[i].val1;
666 /* Ensure no later PUSH insns up to callm_ptr push the same
667 TempReg. Return False if any such are found. */
668 for (j = i+1; j < callm_ptr; j++) {
669 if (cb->instrs[j].opcode == PUSH &&
670 cb->instrs[j].val1 == t)
671 return False;
672 }
673 }
674
675 /* This interval is clean. Keep going ... */
676 callm_ptr++;
677 goto find_next_CALLM;
678}
679
680
681/*------------------------------------------------------------*/
682/*--- Printing uinstrs. ---*/
683/*------------------------------------------------------------*/
684
685Char* VG_(nameCondcode) ( Condcode cond )
686{
687 switch (cond) {
688 case CondO: return "o";
689 case CondNO: return "no";
690 case CondB: return "b";
691 case CondNB: return "nb";
692 case CondZ: return "z";
693 case CondNZ: return "nz";
694 case CondBE: return "be";
695 case CondNBE: return "nbe";
696 case CondS: return "s";
697 case ConsNS: return "ns";
698 case CondP: return "p";
699 case CondNP: return "np";
700 case CondL: return "l";
701 case CondNL: return "nl";
702 case CondLE: return "le";
703 case CondNLE: return "nle";
704 case CondAlways: return "MP"; /* hack! */
705 default: VG_(panic)("nameCondcode");
706 }
707}
708
709
710static void vg_ppFlagSet ( Char* prefix, FlagSet set )
711{
712 VG_(printf)("%s", prefix);
713 if (set & FlagD) VG_(printf)("D");
714 if (set & FlagO) VG_(printf)("O");
715 if (set & FlagS) VG_(printf)("S");
716 if (set & FlagZ) VG_(printf)("Z");
717 if (set & FlagA) VG_(printf)("A");
718 if (set & FlagC) VG_(printf)("C");
719 if (set & FlagP) VG_(printf)("P");
720}
721
722
723static void ppTempReg ( Int tt )
724{
725 if ((tt & 1) == 0)
726 VG_(printf)("t%d", tt);
727 else
728 VG_(printf)("q%d", tt-1);
729}
730
731
732static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
733{
734 UInt tag, val;
735 switch (operandNo) {
736 case 1: tag = u->tag1; val = u->val1; break;
737 case 2: tag = u->tag2; val = u->val2; break;
738 case 3: tag = u->tag3; val = u->val3; break;
739 default: VG_(panic)("ppUOperand(1)");
740 }
741 if (tag == Literal) val = u->lit32;
742
743 if (parens) VG_(printf)("(");
744 switch (tag) {
745 case TempReg: ppTempReg(val); break;
746 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
747 case Literal: VG_(printf)("$0x%x", val); break;
748 case Lit16: VG_(printf)("$0x%x", val); break;
749 case NoValue: VG_(printf)("NoValue"); break;
750 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
751 case SpillNo: VG_(printf)("spill%d", val); break;
752 default: VG_(panic)("ppUOperand(2)");
753 }
754 if (parens) VG_(printf)(")");
755}
756
757
758Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
759{
760 switch (opc) {
761 case ADD: return (upper ? "ADD" : "add");
762 case ADC: return (upper ? "ADC" : "adc");
763 case AND: return (upper ? "AND" : "and");
764 case OR: return (upper ? "OR" : "or");
765 case XOR: return (upper ? "XOR" : "xor");
766 case SUB: return (upper ? "SUB" : "sub");
767 case SBB: return (upper ? "SBB" : "sbb");
768 case SHL: return (upper ? "SHL" : "shl");
769 case SHR: return (upper ? "SHR" : "shr");
770 case SAR: return (upper ? "SAR" : "sar");
771 case ROL: return (upper ? "ROL" : "rol");
772 case ROR: return (upper ? "ROR" : "ror");
773 case RCL: return (upper ? "RCL" : "rcl");
774 case RCR: return (upper ? "RCR" : "rcr");
775 case NOT: return (upper ? "NOT" : "not");
776 case NEG: return (upper ? "NEG" : "neg");
777 case INC: return (upper ? "INC" : "inc");
778 case DEC: return (upper ? "DEC" : "dec");
779 case BSWAP: return (upper ? "BSWAP" : "bswap");
780 default: break;
781 }
782 if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
783 switch (opc) {
784 case GETVF: return "GETVF";
785 case PUTVF: return "PUTVF";
786 case TAG1: return "TAG1";
787 case TAG2: return "TAG2";
788 case CALLM_S: return "CALLM_S";
789 case CALLM_E: return "CALLM_E";
790 case INCEIP: return "INCEIP";
791 case LEA1: return "LEA1";
792 case LEA2: return "LEA2";
793 case NOP: return "NOP";
794 case GET: return "GET";
795 case PUT: return "PUT";
796 case GETF: return "GETF";
797 case PUTF: return "PUTF";
798 case LOAD: return "LD" ;
799 case STORE: return "ST" ;
800 case MOV: return "MOV";
801 case CMOV: return "CMOV";
802 case WIDEN: return "WIDEN";
803 case JMP: return "J" ;
804 case JIFZ: return "JIFZ" ;
805 case CALLM: return "CALLM";
806 case PUSH: return "PUSH" ;
807 case POP: return "POP" ;
808 case CLEAR: return "CLEAR";
809 case CC2VAL: return "CC2VAL";
810 case FPU_R: return "FPU_R";
811 case FPU_W: return "FPU_W";
812 case FPU: return "FPU" ;
813 case LOADV: return "LOADV";
814 case STOREV: return "STOREV";
815 case GETV: return "GETV";
816 case PUTV: return "PUTV";
817 case TESTV: return "TESTV";
818 case SETV: return "SETV";
819 default: VG_(panic)("nameUOpcode: unhandled case");
820 }
821}
822
823
824void VG_(ppUInstr) ( Int instrNo, UInstr* u )
825{
826 VG_(printf)("\t%4d: %s", instrNo,
827 VG_(nameUOpcode)(True, u->opcode));
828 if (u->opcode == JMP || u->opcode == CC2VAL)
829 VG_(printf)("%s", VG_(nameCondcode(u->cond)));
830
831 switch (u->size) {
832 case 0: VG_(printf)("o"); break;
833 case 1: VG_(printf)("B"); break;
834 case 2: VG_(printf)("W"); break;
835 case 4: VG_(printf)("L"); break;
836 case 8: VG_(printf)("Q"); break;
837 default: VG_(printf)("%d", (Int)u->size); break;
838 }
839
840 switch (u->opcode) {
841
842 case TAG1:
843 VG_(printf)("\t");
844 ppUOperand(u, 1, 4, False);
845 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
846 ppUOperand(u, 1, 4, False);
847 VG_(printf)(" )");
848 break;
849
850 case TAG2:
851 VG_(printf)("\t");
852 ppUOperand(u, 2, 4, False);
853 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
854 ppUOperand(u, 1, 4, False);
855 VG_(printf)(", ");
856 ppUOperand(u, 2, 4, False);
857 VG_(printf)(" )");
858 break;
859
860 case CALLM_S: case CALLM_E:
861 break;
862
863 case INCEIP:
864 VG_(printf)("\t$%d", u->val1);
865 break;
866
867 case LEA2:
868 VG_(printf)("\t%d(" , u->lit32);
869 ppUOperand(u, 1, 4, False);
870 VG_(printf)(",");
871 ppUOperand(u, 2, 4, False);
872 VG_(printf)(",%d), ", (Int)u->extra4b);
873 ppUOperand(u, 3, 4, False);
874 break;
875
876 case LEA1:
877 VG_(printf)("\t%d" , u->lit32);
878 ppUOperand(u, 1, 4, True);
879 VG_(printf)(", ");
880 ppUOperand(u, 2, 4, False);
881 break;
882
883 case NOP:
884 break;
885
886 case FPU_W:
887 VG_(printf)("\t0x%x:0x%x, ",
888 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
889 ppUOperand(u, 2, 4, True);
890 break;
891
892 case FPU_R:
893 VG_(printf)("\t");
894 ppUOperand(u, 2, 4, True);
895 VG_(printf)(", 0x%x:0x%x",
896 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
897 break;
898
899 case FPU:
900 VG_(printf)("\t0x%x:0x%x",
901 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
902 break;
903
904 case STOREV: case LOADV:
905 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
906 VG_(printf)("\t");
907 ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV);
908 VG_(printf)(", ");
909 ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
910 break;
911
912 case GETF: case PUTF:
913 VG_(printf)("\t");
914 ppUOperand(u, 1, u->size, False);
915 break;
916
917 case JMP: case CC2VAL:
918 case PUSH: case POP: case CLEAR: case CALLM:
sewardj2e93c502002-04-12 11:12:52 +0000919 if (u->opcode == JMP) {
920 switch (u->jmpkind) {
921 case JmpCall: VG_(printf)("-c"); break;
922 case JmpRet: VG_(printf)("-r"); break;
923 case JmpSyscall: VG_(printf)("-sys"); break;
924 case JmpClientReq: VG_(printf)("-cli"); break;
925 default: break;
926 }
927 }
sewardjde4a1d02002-03-22 01:27:54 +0000928 VG_(printf)("\t");
929 ppUOperand(u, 1, u->size, False);
930 break;
931
932 case JIFZ:
933 VG_(printf)("\t");
934 ppUOperand(u, 1, u->size, False);
935 VG_(printf)(", ");
936 ppUOperand(u, 2, u->size, False);
937 break;
938
939 case PUTVF: case GETVF:
940 VG_(printf)("\t");
941 ppUOperand(u, 1, 0, False);
942 break;
943
944 case NOT: case NEG: case INC: case DEC: case BSWAP:
945 VG_(printf)("\t");
946 ppUOperand(u, 1, u->size, False);
947 break;
948
949 case ADD: case ADC: case AND: case OR:
950 case XOR: case SUB: case SBB:
951 case SHL: case SHR: case SAR:
952 case ROL: case ROR: case RCL: case RCR:
953 VG_(printf)("\t");
954 ppUOperand(u, 1, u->size, False);
955 VG_(printf)(", ");
956 ppUOperand(u, 2, u->size, False);
957 break;
958
959 case GETV: case PUTV:
960 VG_(printf)("\t");
961 ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
962 VG_(printf)(", ");
963 ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
964 break;
965
966 case WIDEN:
967 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
968 u->signed_widen?'s':'z');
969 VG_(printf)("\t");
970 ppUOperand(u, 1, u->size, False);
971 break;
972
973 case TESTV: case SETV:
974 VG_(printf)("\t");
975 ppUOperand(u, 1, u->size, False);
976 break;
977
978 default: VG_(panic)("ppUInstr: unhandled opcode");
979 }
980
981 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
982 VG_(printf)(" (");
983 if (u->flags_r != FlagsEmpty)
984 vg_ppFlagSet("-r", u->flags_r);
985 if (u->flags_w != FlagsEmpty)
986 vg_ppFlagSet("-w", u->flags_w);
987 VG_(printf)(")");
988 }
989 VG_(printf)("\n");
990}
991
992
993void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
994{
995 Int i;
996 VG_(printf)("\n%s\n", title);
997 for (i = 0; i < cb->used; i++)
998 if (0 || cb->instrs[i].opcode != NOP)
999 VG_(ppUInstr) ( i, &cb->instrs[i] );
1000 VG_(printf)("\n");
1001}
1002
1003
1004/*------------------------------------------------------------*/
1005/*--- uinstr helpers for register allocation ---*/
1006/*--- and code improvement. ---*/
1007/*------------------------------------------------------------*/
1008
1009/* A structure for communicating temp uses, and for indicating
1010 temp->real register mappings for patchUInstr. */
1011typedef
1012 struct {
1013 Int realNo;
1014 Int tempNo;
1015 Bool isWrite;
1016 }
1017 TempUse;
1018
1019
1020/* Get the temp use of a uinstr, parking them in an array supplied by
1021 the caller, which is assumed to be big enough. Return the number
1022 of entries. Insns which read _and_ write a register wind up
1023 mentioning it twice. Entries are placed in the array in program
1024 order, so that if a reg is read-modified-written, it appears first
1025 as a read and then as a write.
1026*/
1027static __inline__
1028Int getTempUsage ( UInstr* u, TempUse* arr )
1029{
1030
1031# define RD(ono) \
1032 if (mycat(u->tag,ono) == TempReg) \
1033 { arr[n].tempNo = mycat(u->val,ono); \
1034 arr[n].isWrite = False; n++; }
1035# define WR(ono) \
1036 if (mycat(u->tag,ono) == TempReg) \
1037 { arr[n].tempNo = mycat(u->val,ono); \
1038 arr[n].isWrite = True; n++; }
1039
1040 Int n = 0;
1041 switch (u->opcode) {
1042 case LEA1: RD(1); WR(2); break;
1043 case LEA2: RD(1); RD(2); WR(3); break;
1044
1045 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
1046 case FPU_R: case FPU_W: RD(2); break;
1047
1048 case GETF: WR(1); break;
1049 case PUTF: RD(1); break;
1050
1051 case GET: WR(2); break;
1052 case PUT: RD(1); break;
1053 case LOAD: RD(1); WR(2); break;
1054 case STORE: RD(1); RD(2); break;
1055 case MOV: RD(1); WR(2); break;
1056
1057 case JMP: RD(1); break;
1058 case CLEAR: case CALLM: break;
1059
1060 case PUSH: RD(1); break;
1061 case POP: WR(1); break;
1062
1063 case TAG2:
1064 case CMOV:
1065 case ADD: case ADC: case AND: case OR:
1066 case XOR: case SUB: case SBB:
1067 RD(1); RD(2); WR(2); break;
1068
1069 case SHL: case SHR: case SAR:
1070 case ROL: case ROR: case RCL: case RCR:
1071 RD(1); RD(2); WR(2); break;
1072
1073 case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
1074 RD(1); WR(1); break;
1075
1076 case WIDEN: RD(1); WR(1); break;
1077
1078 case CC2VAL: WR(1); break;
1079 case JIFZ: RD(1); break;
1080
1081 /* These sizes are only ever consulted when the instrumentation
1082 code is being added, so the following can return
1083 manifestly-bogus sizes. */
1084 case LOADV: RD(1); WR(2); break;
1085 case STOREV: RD(1); RD(2); break;
1086 case GETV: WR(2); break;
1087 case PUTV: RD(1); break;
1088 case TESTV: RD(1); break;
1089 case SETV: WR(1); break;
1090 case PUTVF: RD(1); break;
1091 case GETVF: WR(1); break;
1092
1093 default: VG_(panic)("getTempUsage: unhandled opcode");
1094 }
1095 return n;
1096
1097# undef RD
1098# undef WR
1099}
1100
1101
1102/* Change temp regs in u into real regs, as directed by tmap. */
1103static __inline__
1104void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
1105{
1106 Int i;
1107 if (u->tag1 == TempReg) {
1108 for (i = 0; i < n_tmap; i++)
1109 if (tmap[i].tempNo == u->val1) break;
1110 if (i == n_tmap) VG_(panic)("patchUInstr(1)");
1111 u->tag1 = RealReg;
1112 u->val1 = tmap[i].realNo;
1113 }
1114 if (u->tag2 == TempReg) {
1115 for (i = 0; i < n_tmap; i++)
1116 if (tmap[i].tempNo == u->val2) break;
1117 if (i == n_tmap) VG_(panic)("patchUInstr(2)");
1118 u->tag2 = RealReg;
1119 u->val2 = tmap[i].realNo;
1120 }
1121 if (u->tag3 == TempReg) {
1122 for (i = 0; i < n_tmap; i++)
1123 if (tmap[i].tempNo == u->val3) break;
1124 if (i == n_tmap) VG_(panic)("patchUInstr(3)");
1125 u->tag3 = RealReg;
1126 u->val3 = tmap[i].realNo;
1127 }
1128}
1129
1130
1131/* Tedious x86-specific hack which compensates for the fact that the
1132 register numbers for %ah .. %dh do not correspond to those for %eax
1133 .. %edx. It maps a (reg size, reg no) pair to the number of the
1134 containing 32-bit reg. */
1135static __inline__
1136Int containingArchRegOf ( Int sz, Int aregno )
1137{
1138 switch (sz) {
1139 case 4: return aregno;
1140 case 2: return aregno;
1141 case 1: return aregno >= 4 ? aregno-4 : aregno;
1142 default: VG_(panic)("containingArchRegOf");
1143 }
1144}
1145
1146
1147/* If u reads an ArchReg, return the number of the containing arch
1148 reg. Otherwise return -1. Used in redundant-PUT elimination. */
1149static __inline__
1150Int maybe_uinstrReadsArchReg ( UInstr* u )
1151{
1152 switch (u->opcode) {
1153 case GET:
1154 case ADD: case ADC: case AND: case OR:
1155 case XOR: case SUB: case SBB:
1156 case SHL: case SHR: case SAR: case ROL:
1157 case ROR: case RCL: case RCR:
1158 if (u->tag1 == ArchReg)
1159 return containingArchRegOf ( u->size, u->val1 );
1160 else
1161 return -1;
1162
1163 case GETF: case PUTF:
1164 case CALLM_S: case CALLM_E:
1165 case INCEIP:
1166 case LEA1:
1167 case LEA2:
1168 case NOP:
1169 case PUT:
1170 case LOAD:
1171 case STORE:
1172 case MOV:
1173 case CMOV:
1174 case JMP:
1175 case CALLM: case CLEAR: case PUSH: case POP:
1176 case NOT: case NEG: case INC: case DEC: case BSWAP:
1177 case CC2VAL:
1178 case JIFZ:
1179 case FPU: case FPU_R: case FPU_W:
1180 case WIDEN:
1181 return -1;
1182
1183 default:
1184 VG_(ppUInstr)(0,u);
1185 VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
1186 }
1187}
1188
1189static __inline__
1190Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1191{
1192 Int i, k;
1193 TempUse tempUse[3];
1194 k = getTempUsage ( u, &tempUse[0] );
1195 for (i = 0; i < k; i++)
1196 if (tempUse[i].tempNo == tempreg)
1197 return True;
1198 return False;
1199}
1200
1201
1202/*------------------------------------------------------------*/
1203/*--- ucode improvement. ---*/
1204/*------------------------------------------------------------*/
1205
1206/* Improve the code in cb by doing
1207 -- Redundant ArchReg-fetch elimination
1208 -- Redundant PUT elimination
1209 -- Redundant cond-code restore/save elimination
1210 The overall effect of these is to allow target registers to be
1211 cached in host registers over multiple target insns.
1212*/
1213static void vg_improve ( UCodeBlock* cb )
1214{
1215 Int i, j, k, m, n, ar, tr, told, actual_areg;
1216 Int areg_map[8];
1217 Bool annul_put[8];
1218 TempUse tempUse[3];
1219 UInstr* u;
1220 Bool wr;
1221 Int* last_live_before;
1222 FlagSet future_dead_flags;
1223
1224 if (cb->nextTemp > 0)
1225 last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
1226 else
1227 last_live_before = NULL;
1228
1229
1230 /* PASS 1: redundant GET elimination. (Actually, more general than
1231 that -- eliminates redundant fetches of ArchRegs). */
1232
1233 /* Find the live-range-ends for all temporaries. Duplicates code
1234 in the register allocator :-( */
1235
1236 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1237
1238 for (i = cb->used-1; i >= 0; i--) {
1239 u = &cb->instrs[i];
1240
1241 k = getTempUsage(u, &tempUse[0]);
1242
1243 /* For each temp usage ... bwds in program order. */
1244 for (j = k-1; j >= 0; j--) {
1245 tr = tempUse[j].tempNo;
1246 wr = tempUse[j].isWrite;
1247 if (last_live_before[tr] == -1) {
1248 vg_assert(tr >= 0 && tr < cb->nextTemp);
1249 last_live_before[tr] = wr ? (i+1) : i;
1250 }
1251 }
1252
1253 }
1254
1255# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1256 { Int q; \
1257 /* Invalidate any old binding(s) to tempreg. */ \
1258 for (q = 0; q < 8; q++) \
1259 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1260 /* Add the new binding. */ \
1261 areg_map[archreg] = (tempreg); \
1262 }
1263
1264 /* Set up the A-reg map. */
1265 for (i = 0; i < 8; i++) areg_map[i] = -1;
1266
1267 /* Scan insns. */
1268 for (i = 0; i < cb->used; i++) {
1269 u = &cb->instrs[i];
1270 if (u->opcode == GET && u->size == 4) {
1271 /* GET; see if it can be annulled. */
1272 vg_assert(u->tag1 == ArchReg);
1273 vg_assert(u->tag2 == TempReg);
1274 ar = u->val1;
1275 tr = u->val2;
1276 told = areg_map[ar];
1277 if (told != -1 && last_live_before[told] <= i) {
1278 /* ar already has an old mapping to told, but that runs
1279 out here. Annul this GET, rename tr to told for the
1280 rest of the block, and extend told's live range to that
1281 of tr. */
1282 u->opcode = NOP;
1283 u->tag1 = u->tag2 = NoValue;
1284 n = last_live_before[tr] + 1;
1285 if (n > cb->used) n = cb->used;
1286 last_live_before[told] = last_live_before[tr];
1287 last_live_before[tr] = i-1;
1288 if (VG_(disassemble))
1289 VG_(printf)(
1290 "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
1291 i, tr, told,i+1, n-1);
1292 for (m = i+1; m < n; m++) {
1293 if (cb->instrs[m].tag1 == TempReg
1294 && cb->instrs[m].val1 == tr)
1295 cb->instrs[m].val1 = told;
1296 if (cb->instrs[m].tag2 == TempReg
1297 && cb->instrs[m].val2 == tr)
1298 cb->instrs[m].val2 = told;
1299 }
1300 BIND_ARCH_TO_TEMP(ar,told);
1301 }
1302 else
1303 BIND_ARCH_TO_TEMP(ar,tr);
1304 }
1305 else if (u->opcode == GET && u->size != 4) {
1306 /* Invalidate any mapping for this archreg. */
1307 actual_areg = containingArchRegOf ( u->size, u->val1 );
1308 areg_map[actual_areg] = -1;
1309 }
1310 else if (u->opcode == PUT && u->size == 4) {
1311 /* PUT; re-establish t -> a binding */
1312 vg_assert(u->tag1 == TempReg);
1313 vg_assert(u->tag2 == ArchReg);
1314 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1315 }
1316 else if (u->opcode == PUT && u->size != 4) {
1317 /* Invalidate any mapping for this archreg. */
1318 actual_areg = containingArchRegOf ( u->size, u->val2 );
1319 areg_map[actual_areg] = -1;
1320 } else {
1321
1322 /* see if insn has an archreg as a read operand; if so try to
1323 map it. */
1324 if (u->tag1 == ArchReg && u->size == 4
1325 && areg_map[u->val1] != -1) {
1326 switch (u->opcode) {
1327 case ADD: case SUB: case AND: case OR: case XOR:
1328 case ADC: case SBB:
1329 case SHL: case SHR: case SAR: case ROL: case ROR:
1330 case RCL: case RCR:
1331 if (VG_(disassemble))
1332 VG_(printf)(
1333 "at %d: change ArchReg %S to TempReg t%d\n",
1334 i, nameIReg(4,u->val1), areg_map[u->val1]);
1335 u->tag1 = TempReg;
1336 u->val1 = areg_map[u->val1];
1337 /* Remember to extend the live range of the TempReg,
1338 if necessary. */
1339 if (last_live_before[u->val1] < i)
1340 last_live_before[u->val1] = i;
1341 break;
1342 default:
1343 break;
1344 }
1345 }
1346
1347 /* boring insn; invalidate any mappings to temps it writes */
1348 k = getTempUsage(u, &tempUse[0]);
1349
1350 for (j = 0; j < k; j++) {
1351 wr = tempUse[j].isWrite;
1352 if (!wr) continue;
1353 tr = tempUse[j].tempNo;
1354 for (m = 0; m < 8; m++)
1355 if (areg_map[m] == tr) areg_map[m] = -1;
1356 }
1357 }
1358
1359 }
1360
1361# undef BIND_ARCH_TO_TEMP
1362
1363 /* PASS 2: redundant PUT elimination. If doing instrumentation,
1364 don't annul (delay) puts of %ESP, since the memory check
1365 machinery always requires the in-memory value of %ESP to be up
1366 to date.
1367 */
1368 for (j = 0; j < 8; j++)
1369 annul_put[j] = False;
1370
1371 for (i = cb->used-1; i >= 0; i--) {
1372 u = &cb->instrs[i];
1373 if (u->opcode == NOP) continue;
1374
1375 if (u->opcode == PUT && u->size == 4) {
1376 vg_assert(u->tag2 == ArchReg);
1377 actual_areg = containingArchRegOf ( 4, u->val2 );
1378 if (annul_put[actual_areg]) {
1379 u->opcode = NOP;
1380 u->tag1 = u->tag2 = NoValue;
1381 if (VG_(disassemble))
1382 VG_(printf)("at %d: delete PUT\n", i );
1383 } else {
1384 if (!(VG_(clo_instrument) && actual_areg == R_ESP))
1385 annul_put[actual_areg] = True;
1386 }
1387 }
1388 else if (u->opcode == PUT && u->size != 4) {
1389 actual_areg = containingArchRegOf ( u->size, u->val2 );
1390 annul_put[actual_areg] = False;
1391 }
1392 else if (u->opcode == JMP || u->opcode == JIFZ
1393 || u->opcode == CALLM) {
1394 for (j = 0; j < 8; j++)
1395 annul_put[j] = False;
1396 }
1397 else {
1398 /* If an instruction reads an ArchReg, the immediately
1399 preceding PUT cannot be annulled. */
1400 actual_areg = maybe_uinstrReadsArchReg ( u );
1401 if (actual_areg != -1)
1402 annul_put[actual_areg] = False;
1403 }
1404 }
1405
1406 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1407 dead after this point, annul the MOV insn and rename t2 to t1.
1408 Further modifies the last_live_before map. */
1409
1410# if 0
1411 VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
1412 for (i = 0; i < cb->nextTemp; i++)
1413 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1414 VG_(printf)("\n");
1415# endif
1416
1417 for (i = 0; i < cb->used-1; i++) {
1418 u = &cb->instrs[i];
1419 if (u->opcode != MOV) continue;
1420 if (u->tag1 == Literal) continue;
1421 vg_assert(u->tag1 == TempReg);
1422 vg_assert(u->tag2 == TempReg);
1423 if (last_live_before[u->val1] == i) {
1424 if (VG_(disassemble))
1425 VG_(printf)(
1426 "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
1427 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1428 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1429 if (cb->instrs[j].tag1 == TempReg
1430 && cb->instrs[j].val1 == u->val2)
1431 cb->instrs[j].val1 = u->val1;
1432 if (cb->instrs[j].tag2 == TempReg
1433 && cb->instrs[j].val2 == u->val2)
1434 cb->instrs[j].val2 = u->val1;
1435 }
1436 last_live_before[u->val1] = last_live_before[u->val2];
1437 last_live_before[u->val2] = i-1;
1438 u->opcode = NOP;
1439 u->tag1 = u->tag2 = NoValue;
1440 }
1441 }
1442
1443 /* PASS 3: redundant condition-code restore/save elimination.
1444 Scan backwards from the end. future_dead_flags records the set
1445 of flags which are dead at this point, that is, will be written
1446 before they are next read. Earlier uinsns which write flags
1447 already in future_dead_flags can have their writes annulled.
1448 */
1449 future_dead_flags = FlagsEmpty;
1450
1451 for (i = cb->used-1; i >= 0; i--) {
1452 u = &cb->instrs[i];
1453
1454 /* We might never make it to insns beyond this one, so be
1455 conservative. */
1456 if (u->opcode == JIFZ || u->opcode == JMP) {
1457 future_dead_flags = FlagsEmpty;
1458 continue;
1459 }
1460
1461 /* We can annul the flags written by this insn if it writes a
1462 subset (or eq) of the set of flags known to be dead after
1463 this insn. If not, just record the flags also written by
1464 this insn.*/
1465 if (u->flags_w != FlagsEmpty
1466 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
1467 if (VG_(disassemble)) {
1468 VG_(printf)("at %d: annul flag write ", i);
1469 vg_ppFlagSet("", u->flags_w);
1470 VG_(printf)(" due to later ");
1471 vg_ppFlagSet("", future_dead_flags);
1472 VG_(printf)("\n");
1473 }
1474 u->flags_w = FlagsEmpty;
1475 } else {
1476 future_dead_flags
1477 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1478 }
1479
1480 /* If this insn also reads flags, empty out future_dead_flags so
1481 as to force preceding writes not to be annulled. */
1482 if (u->flags_r != FlagsEmpty)
1483 future_dead_flags = FlagsEmpty;
1484 }
1485
1486 if (last_live_before)
1487 VG_(jitfree) ( last_live_before );
1488}
1489
1490
1491/*------------------------------------------------------------*/
1492/*--- The new register allocator. ---*/
1493/*------------------------------------------------------------*/
1494
1495typedef
1496 struct {
1497 /* Becomes live for the first time after this insn ... */
1498 Int live_after;
1499 /* Becomes dead for the last time after this insn ... */
1500 Int dead_before;
1501 /* The "home" spill slot, if needed. Never changes. */
1502 Int spill_no;
1503 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1504 Int real_no;
1505 }
1506 TempInfo;
1507
1508
1509/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1510 them in spill locations, and add spill code, if there are not
1511 enough real regs. The usual register allocation deal, in short.
1512
1513 Important redundancy of representation:
1514
1515 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1516 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1517
1518 The .real_no field of a TempInfo gives the current RRR for
1519 this TempReg, or VG_NOVALUE if the TempReg is currently
1520 in memory, in which case it is in the SpillNo denoted by
1521 spillno.
1522
1523 These pieces of information (a fwds-bwds mapping, really) must
1524 be kept consistent!
1525
1526 This allocator uses the so-called Second Chance Bin Packing
1527 algorithm, as described in "Quality and Speed in Linear-scan
1528 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1529 pp142-151). It is simple and fast and remarkably good at
1530 minimising the amount of spill code introduced.
1531*/
1532
1533static
1534UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1535{
1536 TempInfo* temp_info;
1537 Int real_to_temp[VG_MAX_REALREGS];
1538 Bool is_spill_cand[VG_MAX_REALREGS];
1539 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1540 Int i, j, k, m, r, tno, max_ss_no;
1541 Bool wr, defer, isRead, spill_reqd;
1542 TempUse tempUse[3];
1543 UCodeBlock* c2;
1544
1545 /* Used to denote ... well, "no value" in this fn. */
1546# define VG_NOTHING (-2)
1547
1548 /* Initialise the TempReg info. */
1549 if (c1->nextTemp > 0)
1550 temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
1551 else
1552 temp_info = NULL;
1553
1554 for (i = 0; i < c1->nextTemp; i++) {
1555 temp_info[i].live_after = VG_NOTHING;
1556 temp_info[i].dead_before = VG_NOTHING;
1557 temp_info[i].spill_no = VG_NOTHING;
1558 /* temp_info[i].real_no is not yet relevant. */
1559 }
1560
1561 spill_reqd = False;
1562
1563 /* Scan fwds to establish live ranges. */
1564
1565 for (i = 0; i < c1->used; i++) {
1566 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1567 vg_assert(k >= 0 && k <= 3);
1568
1569 /* For each temp usage ... fwds in program order */
1570 for (j = 0; j < k; j++) {
1571 tno = tempUse[j].tempNo;
1572 wr = tempUse[j].isWrite;
1573 if (wr) {
1574 /* Writes hold a reg live until after this insn. */
1575 if (temp_info[tno].live_after == VG_NOTHING)
1576 temp_info[tno].live_after = i;
1577 if (temp_info[tno].dead_before < i + 1)
1578 temp_info[tno].dead_before = i + 1;
1579 } else {
1580 /* First use of a tmp should be a write. */
1581 vg_assert(temp_info[tno].live_after != VG_NOTHING);
1582 /* Reads only hold it live until before this insn. */
1583 if (temp_info[tno].dead_before < i)
1584 temp_info[tno].dead_before = i;
1585 }
1586 }
1587 }
1588
1589# if 0
1590 /* Sanity check on live ranges. Expensive but correct. */
1591 for (i = 0; i < c1->nextTemp; i++) {
1592 vg_assert( (temp_info[i].live_after == VG_NOTHING
1593 && temp_info[i].dead_before == VG_NOTHING)
1594 || (temp_info[i].live_after != VG_NOTHING
1595 && temp_info[i].dead_before != VG_NOTHING) );
1596 }
1597# endif
1598
1599 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1600 We put as few as possible values in spill slots, but
1601 nevertheless need to have an assignment to them just in case. */
1602
1603 max_ss_no = -1;
1604
1605 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1606 ss_busy_until_before[i] = 0;
1607
1608 for (i = 0; i < c1->nextTemp; i++) {
1609
1610 /* True iff this temp is unused. */
1611 if (temp_info[i].live_after == VG_NOTHING)
1612 continue;
1613
1614 /* Find the lowest-numbered spill slot which is available at the
1615 start point of this interval, and assign the interval to
1616 it. */
1617 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1618 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1619 break;
1620 if (j == VG_MAX_SPILLSLOTS) {
1621 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
1622 VG_(panic)("register allocation failed -- out of spill slots");
1623 }
1624 ss_busy_until_before[j] = temp_info[i].dead_before;
1625 temp_info[i].spill_no = j;
1626 if (j > max_ss_no)
1627 max_ss_no = j;
1628 }
1629
1630 VG_(total_reg_rank) += (max_ss_no+1);
1631
1632 /* Show live ranges and assigned spill slot nos. */
1633
1634 if (VG_(disassemble)) {
1635 VG_(printf)("Live Range Assignments\n");
1636
1637 for (i = 0; i < c1->nextTemp; i++) {
1638 if (temp_info[i].live_after == VG_NOTHING)
1639 continue;
1640 VG_(printf)(
1641 " LR %d is after %d to before %d spillno %d\n",
1642 i,
1643 temp_info[i].live_after,
1644 temp_info[i].dead_before,
1645 temp_info[i].spill_no
1646 );
1647 }
1648 }
1649
1650 /* Now that we've established a spill slot number for each used
1651 temporary, we can go ahead and do the core of the "Second-chance
1652 binpacking" allocation algorithm. */
1653
1654 /* Resulting code goes here. We generate it all in a forwards
1655 pass. */
1656 c2 = allocCodeBlock();
1657
1658 /* At the start, no TempRegs are assigned to any real register.
1659 Correspondingly, all temps claim to be currently resident in
1660 their spill slots, as computed by the previous two passes. */
1661 for (i = 0; i < VG_MAX_REALREGS; i++)
1662 real_to_temp[i] = VG_NOTHING;
1663 for (i = 0; i < c1->nextTemp; i++)
1664 temp_info[i].real_no = VG_NOTHING;
1665
1666 if (VG_(disassemble))
1667 VG_(printf)("\n");
1668
1669 /* Process each insn in turn. */
1670 for (i = 0; i < c1->used; i++) {
1671
1672 if (c1->instrs[i].opcode == NOP) continue;
1673 VG_(uinstrs_prealloc)++;
1674
1675# if 0
1676 /* Check map consistency. Expensive but correct. */
1677 for (r = 0; r < VG_MAX_REALREGS; r++) {
1678 if (real_to_temp[r] != VG_NOTHING) {
1679 tno = real_to_temp[r];
1680 vg_assert(tno >= 0 && tno < c1->nextTemp);
1681 vg_assert(temp_info[tno].real_no == r);
1682 }
1683 }
1684 for (tno = 0; tno < c1->nextTemp; tno++) {
1685 if (temp_info[tno].real_no != VG_NOTHING) {
1686 r = temp_info[tno].real_no;
1687 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
1688 vg_assert(real_to_temp[r] == tno);
1689 }
1690 }
1691# endif
1692
1693 if (VG_(disassemble))
1694 VG_(ppUInstr)(i, &c1->instrs[i]);
1695
1696 /* First, free up enough real regs for this insn. This may
1697 generate spill stores since we may have to evict some TempRegs
1698 currently in real regs. Also generates spill loads. */
1699
1700 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1701 vg_assert(k >= 0 && k <= 3);
1702
1703 /* For each ***different*** temp mentioned in the insn .... */
1704 for (j = 0; j < k; j++) {
1705
1706 /* First check if the temp is mentioned again later; if so,
1707 ignore this mention. We only want to process each temp
1708 used by the insn once, even if it is mentioned more than
1709 once. */
1710 defer = False;
1711 tno = tempUse[j].tempNo;
1712 for (m = j+1; m < k; m++)
1713 if (tempUse[m].tempNo == tno)
1714 defer = True;
1715 if (defer)
1716 continue;
1717
1718 /* Now we're trying to find a register for tempUse[j].tempNo.
1719 First of all, if it already has a register assigned, we
1720 don't need to do anything more. */
1721 if (temp_info[tno].real_no != VG_NOTHING)
1722 continue;
1723
1724 /* No luck. The next thing to do is see if there is a
1725 currently unassigned register available. If so, bag it. */
1726 for (r = 0; r < VG_MAX_REALREGS; r++) {
1727 if (real_to_temp[r] == VG_NOTHING)
1728 break;
1729 }
1730 if (r < VG_MAX_REALREGS) {
1731 real_to_temp[r] = tno;
1732 temp_info[tno].real_no = r;
1733 continue;
1734 }
1735
1736 /* Unfortunately, that didn't pan out either. So we'll have
1737 to eject some other unfortunate TempReg into a spill slot
1738 in order to free up a register. Of course, we need to be
1739 careful not to eject some other TempReg needed by this
1740 insn.
1741
1742 Select r in 0 .. VG_MAX_REALREGS-1 such that
1743 real_to_temp[r] is not mentioned in
1744 tempUse[0 .. k-1].tempNo, since it would be just plain
1745 wrong to eject some other TempReg which we need to use in
1746 this insn.
1747
1748 It is here that it is important to make a good choice of
1749 register to spill. */
1750
1751 /* First, mark those regs which are not spill candidates. */
1752 for (r = 0; r < VG_MAX_REALREGS; r++) {
1753 is_spill_cand[r] = True;
1754 for (m = 0; m < k; m++) {
1755 if (real_to_temp[r] == tempUse[m].tempNo) {
1756 is_spill_cand[r] = False;
1757 break;
1758 }
1759 }
1760 }
1761
1762 /* We can choose any r satisfying is_spill_cand[r]. However,
1763 try to make a good choice. First, try and find r such
1764 that the associated TempReg is already dead. */
1765 for (r = 0; r < VG_MAX_REALREGS; r++) {
1766 if (is_spill_cand[r] &&
1767 temp_info[real_to_temp[r]].dead_before <= i)
1768 goto have_spill_cand;
1769 }
1770
1771 /* No spill cand is mapped to a dead TempReg. Now we really
1772 _do_ have to generate spill code. Choose r so that the
1773 next use of its associated TempReg is as far ahead as
1774 possible, in the hope that this will minimise the number of
1775 consequent reloads required. This is a bit expensive, but
1776 we don't have to do it very often. */
1777 {
1778 Int furthest_r = VG_MAX_REALREGS;
1779 Int furthest = 0;
1780 for (r = 0; r < VG_MAX_REALREGS; r++) {
1781 if (!is_spill_cand[r]) continue;
1782 for (m = i+1; m < c1->used; m++)
1783 if (uInstrMentionsTempReg(&c1->instrs[m],
1784 real_to_temp[r]))
1785 break;
1786 if (m > furthest) {
1787 furthest = m;
1788 furthest_r = r;
1789 }
1790 }
1791 r = furthest_r;
1792 goto have_spill_cand;
1793 }
1794
1795 have_spill_cand:
1796 if (r == VG_MAX_REALREGS)
1797 VG_(panic)("new reg alloc: out of registers ?!");
1798
1799 /* Eject r. Important refinement: don't bother if the
1800 associated TempReg is now dead. */
1801 vg_assert(real_to_temp[r] != VG_NOTHING);
1802 vg_assert(real_to_temp[r] != tno);
1803 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
1804 if (temp_info[real_to_temp[r]].dead_before > i) {
1805 uInstr2(c2, PUT, 4,
1806 RealReg, VG_(rankToRealRegNo)(r),
1807 SpillNo, temp_info[real_to_temp[r]].spill_no);
1808 VG_(uinstrs_spill)++;
1809 spill_reqd = True;
1810 if (VG_(disassemble))
1811 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1812 }
1813
1814 /* Decide if tno is read. */
1815 isRead = False;
1816 for (m = 0; m < k; m++)
1817 if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
1818 isRead = True;
1819
1820 /* If so, generate a spill load. */
1821 if (isRead) {
1822 uInstr2(c2, GET, 4,
1823 SpillNo, temp_info[tno].spill_no,
1824 RealReg, VG_(rankToRealRegNo)(r) );
1825 VG_(uinstrs_spill)++;
1826 spill_reqd = True;
1827 if (VG_(disassemble))
1828 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1829 }
1830
1831 /* Update the forwards and backwards maps. */
1832 real_to_temp[r] = tno;
1833 temp_info[tno].real_no = r;
1834 }
1835
1836 /* By this point, all TempRegs mentioned by the insn have been
1837 bought into real regs. We now copy the insn to the output
1838 and use patchUInstr to convert its rTempRegs into
1839 realregs. */
1840 for (j = 0; j < k; j++)
1841 tempUse[j].realNo
1842 = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
1843 copyUInstr(c2, &c1->instrs[i]);
1844 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
1845
1846 if (VG_(disassemble)) {
1847 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1848 VG_(printf)("\n");
1849 }
1850 }
1851
1852 if (temp_info != NULL)
1853 VG_(jitfree)(temp_info);
1854
1855 freeCodeBlock(c1);
1856
1857 if (spill_reqd)
1858 VG_(translations_needing_spill)++;
1859
1860 return c2;
1861
1862# undef VG_NOTHING
1863
1864}
1865
1866
1867/*------------------------------------------------------------*/
1868/*--- New instrumentation machinery. ---*/
1869/*------------------------------------------------------------*/
1870
1871static
1872VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
1873{
1874 switch (sz) {
1875 case 4: return VgT_ImproveOR4_TQ;
1876 case 2: return VgT_ImproveOR2_TQ;
1877 case 1: return VgT_ImproveOR1_TQ;
1878 default: VG_(panic)("get_VgT_ImproveOR_TQ");
1879 }
1880}
1881
1882
1883static
1884VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
1885{
1886 switch (sz) {
1887 case 4: return VgT_ImproveAND4_TQ;
1888 case 2: return VgT_ImproveAND2_TQ;
1889 case 1: return VgT_ImproveAND1_TQ;
1890 default: VG_(panic)("get_VgT_ImproveAND_TQ");
1891 }
1892}
1893
1894
1895static
1896VgTagOp get_VgT_Left ( Int sz )
1897{
1898 switch (sz) {
1899 case 4: return VgT_Left4;
1900 case 2: return VgT_Left2;
1901 case 1: return VgT_Left1;
1902 default: VG_(panic)("get_VgT_Left");
1903 }
1904}
1905
1906
1907static
1908VgTagOp get_VgT_UifU ( Int sz )
1909{
1910 switch (sz) {
1911 case 4: return VgT_UifU4;
1912 case 2: return VgT_UifU2;
1913 case 1: return VgT_UifU1;
1914 case 0: return VgT_UifU0;
1915 default: VG_(panic)("get_VgT_UifU");
1916 }
1917}
1918
1919
1920static
1921VgTagOp get_VgT_DifD ( Int sz )
1922{
1923 switch (sz) {
1924 case 4: return VgT_DifD4;
1925 case 2: return VgT_DifD2;
1926 case 1: return VgT_DifD1;
1927 default: VG_(panic)("get_VgT_DifD");
1928 }
1929}
1930
1931
1932static
1933VgTagOp get_VgT_PCast ( Int szs, Int szd )
1934{
1935 if (szs == 4 && szd == 0) return VgT_PCast40;
1936 if (szs == 2 && szd == 0) return VgT_PCast20;
1937 if (szs == 1 && szd == 0) return VgT_PCast10;
1938 if (szs == 0 && szd == 1) return VgT_PCast01;
1939 if (szs == 0 && szd == 2) return VgT_PCast02;
1940 if (szs == 0 && szd == 4) return VgT_PCast04;
1941 if (szs == 1 && szd == 4) return VgT_PCast14;
1942 if (szs == 1 && szd == 2) return VgT_PCast12;
1943 if (szs == 1 && szd == 1) return VgT_PCast11;
1944 VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
1945 VG_(panic)("get_VgT_PCast");
1946}
1947
1948
1949static
1950VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
1951{
1952 if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
1953 if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
1954
1955 if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
1956 if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
1957
1958 if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
1959 if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
1960
1961 VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
1962 VG_(panic)("get_VgT_Widen");
1963}
1964
1965/* Pessimally cast the spec'd shadow from one size to another. */
1966static
1967void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
1968{
1969 if (szs == 0 && szd == 0)
1970 return;
1971 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1972 NoValue, 0,
1973 Lit16, get_VgT_PCast(szs,szd));
1974}
1975
1976
1977/* Create a signed or unsigned widen of the spec'd shadow from one
1978 size to another. The only allowed size transitions are 1->2, 1->4
1979 and 2->4. */
1980static
1981void create_Widen ( UCodeBlock* cb, Bool signed_widen,
1982 Int szs, Int szd, Int tempreg )
1983{
1984 if (szs == szd) return;
1985 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1986 NoValue, 0,
1987 Lit16, get_VgT_Widen(signed_widen,szs,szd));
1988}
1989
1990
1991/* Get the condition codes into a new shadow, at the given size. */
1992static
1993Int create_GETVF ( UCodeBlock* cb, Int sz )
1994{
1995 Int tt = newShadow(cb);
1996 uInstr1(cb, GETVF, 0, TempReg, tt);
1997 create_PCast(cb, 0, sz, tt);
1998 return tt;
1999}
2000
2001
2002/* Save the condition codes from the spec'd shadow. */
2003static
2004void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
2005{
2006 if (sz == 0) {
2007 uInstr1(cb, PUTVF, 0, TempReg, tempreg);
2008 } else {
2009 Int tt = newShadow(cb);
2010 uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
2011 create_PCast(cb, sz, 0, tt);
2012 uInstr1(cb, PUTVF, 0, TempReg, tt);
2013 }
2014}
2015
2016
2017/* Do Left on the spec'd shadow. */
2018static
2019void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
2020{
2021 uInstr3(cb, TAG1, 0,
2022 TempReg, tempreg,
2023 NoValue, 0,
2024 Lit16, get_VgT_Left(sz));
2025}
2026
2027
2028/* Do UifU on ts and td, putting the result in td. */
2029static
2030void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
2031{
2032 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2033 Lit16, get_VgT_UifU(sz));
2034}
2035
2036
2037/* Do DifD on ts and td, putting the result in td. */
2038static
2039void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
2040{
2041 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2042 Lit16, get_VgT_DifD(sz));
2043}
2044
2045
2046/* Do HelpAND on value tval and tag tqqq, putting the result in
2047 tqqq. */
2048static
2049void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2050{
2051 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2052 Lit16, get_VgT_ImproveAND_TQ(sz));
2053}
2054
2055
2056/* Do HelpOR on value tval and tag tqqq, putting the result in
2057 tqqq. */
2058static
2059void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2060{
2061 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2062 Lit16, get_VgT_ImproveOR_TQ(sz));
2063}
2064
2065
2066/* Get the shadow for an operand described by (tag, val). Emit code
2067 to do this and return the identity of the shadow holding the
2068 result. The result tag is always copied into a new shadow, so it
2069 can be modified without trashing the original.*/
2070static
2071Int /* TempReg */ getOperandShadow ( UCodeBlock* cb,
2072 Int sz, Int tag, Int val )
2073{
2074 Int sh;
2075 sh = newShadow(cb);
2076 if (tag == TempReg) {
2077 uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
2078 return sh;
2079 }
2080 if (tag == Literal) {
2081 uInstr1(cb, SETV, sz, TempReg, sh);
2082 return sh;
2083 }
2084 if (tag == ArchReg) {
2085 uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
2086 return sh;
2087 }
2088 VG_(panic)("getOperandShadow");
2089}
2090
2091
2092
2093/* Create and return an instrumented version of cb_in. Free cb_in
2094 before returning. */
2095static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
2096{
2097 UCodeBlock* cb;
2098 Int i, j;
2099 UInstr* u_in;
2100 Int qs, qd, qt, qtt;
2101 cb = allocCodeBlock();
2102 cb->nextTemp = cb_in->nextTemp;
2103
2104 for (i = 0; i < cb_in->used; i++) {
2105 qs = qd = qt = qtt = INVALID_TEMPREG;
2106 u_in = &cb_in->instrs[i];
2107
2108 /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
2109
2110 /* VG_(ppUInstr)(0, u_in); */
2111 switch (u_in->opcode) {
2112
2113 case NOP:
2114 break;
2115
2116 case INCEIP:
2117 copyUInstr(cb, u_in);
2118 break;
2119
sewardj97ced732002-03-25 00:07:36 +00002120 /* Loads and stores. Test the V bits for the address. 24
2121 Mar 02: since the address is A-checked anyway, there's not
2122 really much point in doing the V-check too, unless you
2123 think that you might use addresses which are undefined but
2124 still addressible. Hence the optionalisation of the V
2125 check.
2126
sewardjde4a1d02002-03-22 01:27:54 +00002127 The LOADV/STOREV does an addressibility check for the
2128 address. */
sewardj97ced732002-03-25 00:07:36 +00002129
sewardjde4a1d02002-03-22 01:27:54 +00002130 case LOAD:
sewardj97ced732002-03-25 00:07:36 +00002131 if (VG_(clo_check_addrVs)) {
2132 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2133 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2134 }
sewardjde4a1d02002-03-22 01:27:54 +00002135 uInstr2(cb, LOADV, u_in->size,
2136 TempReg, u_in->val1,
2137 TempReg, SHADOW(u_in->val2));
2138 copyUInstr(cb, u_in);
2139 break;
2140 case STORE:
sewardj97ced732002-03-25 00:07:36 +00002141 if (VG_(clo_check_addrVs)) {
2142 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2143 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
2144 }
sewardjde4a1d02002-03-22 01:27:54 +00002145 uInstr2(cb, STOREV, u_in->size,
2146 TempReg, SHADOW(u_in->val1),
2147 TempReg, u_in->val2);
2148 copyUInstr(cb, u_in);
2149 break;
2150
2151 /* Moving stuff around. Make the V bits follow accordingly,
2152 but don't do anything else. */
2153
2154 case GET:
2155 uInstr2(cb, GETV, u_in->size,
2156 ArchReg, u_in->val1,
2157 TempReg, SHADOW(u_in->val2));
2158 copyUInstr(cb, u_in);
2159 break;
2160 case PUT:
2161 uInstr2(cb, PUTV, u_in->size,
2162 TempReg, SHADOW(u_in->val1),
2163 ArchReg, u_in->val2);
2164 copyUInstr(cb, u_in);
2165 break;
2166
2167 case GETF:
2168 /* This is not the smartest way to do it, but should work. */
2169 qd = create_GETVF(cb, u_in->size);
2170 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
2171 copyUInstr(cb, u_in);
2172 break;
2173 case PUTF:
2174 create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
2175 copyUInstr(cb, u_in);
2176 break;
2177
2178 case MOV:
2179 switch (u_in->tag1) {
2180 case TempReg:
2181 uInstr2(cb, MOV, 4,
2182 TempReg, SHADOW(u_in->val1),
2183 TempReg, SHADOW(u_in->val2));
2184 break;
2185 case Literal:
2186 uInstr1(cb, SETV, u_in->size,
2187 TempReg, SHADOW(u_in->val2));
2188 break;
2189 default:
2190 VG_(panic)("vg_instrument: MOV");
2191 }
2192 copyUInstr(cb, u_in);
2193 break;
2194
2195 /* Special case of add, where one of the operands is a literal.
2196 lea1(t) = t + some literal.
2197 Therefore: lea1#(qa) = left(qa)
2198 */
2199 case LEA1:
2200 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2201 qs = SHADOW(u_in->val1);
2202 qd = SHADOW(u_in->val2);
2203 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
2204 create_Left(cb, u_in->size, qd);
2205 copyUInstr(cb, u_in);
2206 break;
2207
2208 /* Another form of add.
2209 lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
2210 and is 0,1,2 or 3.
2211 lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
2212 Note, subtly, that the shift puts zeroes at the bottom of qt,
2213 meaning Valid, since the corresponding shift of tt puts
2214 zeroes at the bottom of tb.
2215 */
2216 case LEA2: {
2217 Int shift;
2218 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2219 switch (u_in->extra4b) {
2220 case 1: shift = 0; break;
2221 case 2: shift = 1; break;
2222 case 4: shift = 2; break;
2223 case 8: shift = 3; break;
2224 default: VG_(panic)( "vg_instrument(LEA2)" );
2225 }
2226 qs = SHADOW(u_in->val1);
2227 qt = SHADOW(u_in->val2);
2228 qd = SHADOW(u_in->val3);
2229 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
2230 if (shift > 0) {
2231 uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
2232 uLiteral(cb, shift);
2233 }
2234 create_UifU(cb, 4, qs, qd);
2235 create_Left(cb, u_in->size, qd);
2236 copyUInstr(cb, u_in);
2237 break;
2238 }
2239
2240 /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
2241 case INC: case DEC:
2242 qd = SHADOW(u_in->val1);
2243 create_Left(cb, u_in->size, qd);
2244 if (u_in->flags_w != FlagsEmpty)
2245 create_PUTVF(cb, u_in->size, qd);
2246 copyUInstr(cb, u_in);
2247 break;
2248
2249 /* This is a HACK (approximation :-) */
2250 /* rcl#/rcr#(qs,qd)
2251 = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
2252 eflags# = q0
2253 qd =pcast-0-sz(q0)
2254 Ie, cast everything down to a single bit, then back up.
2255 This assumes that any bad bits infect the whole word and
2256 the eflags.
2257 */
2258 case RCL: case RCR:
2259 vg_assert(u_in->flags_r != FlagsEmpty);
2260 /* The following assertion looks like it makes sense, but is
2261 actually wrong. Consider this:
2262 rcll %eax
2263 imull %eax, %eax
2264 The rcll writes O and C but so does the imull, so the O and C
2265 write of the rcll is annulled by the prior improvement pass.
2266 Noticed by Kevin Ryde <user42@zip.com.au>
2267 */
2268 /* vg_assert(u_in->flags_w != FlagsEmpty); */
2269 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2270 /* We can safely modify qs; cast it to 0-size. */
2271 create_PCast(cb, u_in->size, 0, qs);
2272 qd = SHADOW(u_in->val2);
2273 create_PCast(cb, u_in->size, 0, qd);
2274 /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
2275 create_UifU(cb, 0, qs, qd);
2276 /* qs is now free; reuse it for the flag definedness. */
2277 qs = create_GETVF(cb, 0);
2278 create_UifU(cb, 0, qs, qd);
2279 create_PUTVF(cb, 0, qd);
2280 create_PCast(cb, 0, u_in->size, qd);
2281 copyUInstr(cb, u_in);
2282 break;
2283
2284 /* for OP in shl shr sar rol ror
2285 (qs is shift count#, qd is value to be OP#d)
2286 OP(ts,td)
2287 OP#(qs,qd)
2288 = pcast-1-sz(qs) `UifU` OP(ts,qd)
2289 So we apply OP to the tag bits too, and then UifU with
2290 the shift count# to take account of the possibility of it
2291 being undefined.
2292
2293 A bit subtle:
2294 ROL/ROR rearrange the tag bits as per the value bits.
2295 SHL/SHR shifts zeroes into the value, and corresponding
2296 zeroes indicating Definedness into the tag.
2297 SAR copies the top bit of the value downwards, and therefore
2298 SAR also copies the definedness of the top bit too.
2299 So in all five cases, we just apply the same op to the tag
2300 bits as is applied to the value bits. Neat!
2301 */
2302 case SHL:
2303 case SHR: case SAR:
2304 case ROL: case ROR: {
2305 Int t_amount = INVALID_TEMPREG;
2306 vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
2307 vg_assert(u_in->tag2 == TempReg);
2308 qd = SHADOW(u_in->val2);
2309
2310 /* Make qs hold shift-count# and make
2311 t_amount be a TempReg holding the shift count. */
2312 if (u_in->tag1 == Literal) {
2313 t_amount = newTemp(cb);
2314 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
2315 uLiteral(cb, u_in->lit32);
2316 qs = SHADOW(t_amount);
2317 uInstr1(cb, SETV, 1, TempReg, qs);
2318 } else {
2319 t_amount = u_in->val1;
2320 qs = SHADOW(u_in->val1);
2321 }
2322
2323 uInstr2(cb, u_in->opcode,
2324 u_in->size,
2325 TempReg, t_amount,
2326 TempReg, qd);
2327 qt = newShadow(cb);
2328 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2329 create_PCast(cb, 1, u_in->size, qt);
2330 create_UifU(cb, u_in->size, qt, qd);
2331 copyUInstr(cb, u_in);
2332 break;
2333 }
2334
2335 /* One simple tag operation. */
2336 case WIDEN:
2337 vg_assert(u_in->tag1 == TempReg);
2338 create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
2339 SHADOW(u_in->val1));
2340 copyUInstr(cb, u_in);
2341 break;
2342
2343 /* not#(x) = x (since bitwise independent) */
2344 case NOT:
2345 vg_assert(u_in->tag1 == TempReg);
2346 copyUInstr(cb, u_in);
2347 break;
2348
2349 /* neg#(x) = left(x) (derivable from case for SUB) */
2350 case NEG:
2351 vg_assert(u_in->tag1 == TempReg);
2352 create_Left(cb, u_in->size, SHADOW(u_in->val1));
2353 copyUInstr(cb, u_in);
2354 break;
2355
2356 /* bswap#(x) = bswap(x) */
2357 case BSWAP:
2358 vg_assert(u_in->tag1 == TempReg);
2359 vg_assert(u_in->size == 4);
2360 qd = SHADOW(u_in->val1);
2361 uInstr1(cb, BSWAP, 4, TempReg, qd);
2362 copyUInstr(cb, u_in);
2363 break;
2364
2365 /* cc2val#(qd) = pcast-0-to-size(eflags#) */
2366 case CC2VAL:
2367 vg_assert(u_in->tag1 == TempReg);
2368 vg_assert(u_in->flags_r != FlagsEmpty);
2369 qt = create_GETVF(cb, u_in->size);
2370 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
2371 copyUInstr(cb, u_in);
2372 break;
2373
2374 /* cmov#(qs,qd) = cmov(qs,qd)
2375 That is, do the cmov of tags using the same flags as for
2376 the data (obviously). However, first do a test on the
2377 validity of the flags.
2378 */
2379 case CMOV:
2380 vg_assert(u_in->size == 4);
2381 vg_assert(u_in->tag1 == TempReg);
2382 vg_assert(u_in->tag2 == TempReg);
2383 vg_assert(u_in->flags_r != FlagsEmpty);
2384 vg_assert(u_in->flags_w == FlagsEmpty);
2385 qs = SHADOW(u_in->val1);
2386 qd = SHADOW(u_in->val2);
2387 qt = create_GETVF(cb, 0);
2388 uInstr1(cb, TESTV, 0, TempReg, qt);
2389 /* qt should never be referred to again. Nevertheless
2390 ... */
2391 uInstr1(cb, SETV, 0, TempReg, qt);
2392
2393 uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
2394 LAST_UINSTR(cb).cond = u_in->cond;
2395 LAST_UINSTR(cb).flags_r = u_in->flags_r;
2396
2397 copyUInstr(cb, u_in);
2398 break;
2399
2400 /* add#/sub#(qs,qd)
2401 = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
2402 = left(qs) `UifU` left(qd)
2403 = left(qs `UifU` qd)
2404 adc#/sbb#(qs,qd)
2405 = left(qs `UifU` qd) `UifU` pcast(eflags#)
2406 Second arg (dest) is TempReg.
2407 First arg (src) is Literal or TempReg or ArchReg.
2408 */
2409 case ADD: case SUB:
2410 case ADC: case SBB:
2411 qd = SHADOW(u_in->val2);
2412 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2413 create_UifU(cb, u_in->size, qs, qd);
2414 create_Left(cb, u_in->size, qd);
2415 if (u_in->opcode == ADC || u_in->opcode == SBB) {
2416 vg_assert(u_in->flags_r != FlagsEmpty);
2417 qt = create_GETVF(cb, u_in->size);
2418 create_UifU(cb, u_in->size, qt, qd);
2419 }
2420 if (u_in->flags_w != FlagsEmpty) {
2421 create_PUTVF(cb, u_in->size, qd);
2422 }
2423 copyUInstr(cb, u_in);
2424 break;
2425
2426 /* xor#(qs,qd) = qs `UifU` qd */
2427 case XOR:
2428 qd = SHADOW(u_in->val2);
2429 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2430 create_UifU(cb, u_in->size, qs, qd);
2431 if (u_in->flags_w != FlagsEmpty) {
2432 create_PUTVF(cb, u_in->size, qd);
2433 }
2434 copyUInstr(cb, u_in);
2435 break;
2436
2437 /* and#/or#(qs,qd)
2438 = (qs `UifU` qd) `DifD` improve(vs,qs)
2439 `DifD` improve(vd,qd)
2440 where improve is the relevant one of
2441 Improve{AND,OR}_TQ
2442 Use the following steps, with qt as a temp:
2443 qt = improve(vd,qd)
2444 qd = qs `UifU` qd
2445 qd = qt `DifD` qd
2446 qt = improve(vs,qs)
2447 qd = qt `DifD` qd
2448 */
2449 case AND: case OR:
2450 vg_assert(u_in->tag1 == TempReg);
2451 vg_assert(u_in->tag2 == TempReg);
2452 qd = SHADOW(u_in->val2);
2453 qs = SHADOW(u_in->val1);
2454 qt = newShadow(cb);
2455
2456 /* qt = improve(vd,qd) */
2457 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
2458 if (u_in->opcode == AND)
2459 create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
2460 else
2461 create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
2462 /* qd = qs `UifU` qd */
2463 create_UifU(cb, u_in->size, qs, qd);
2464 /* qd = qt `DifD` qd */
2465 create_DifD(cb, u_in->size, qt, qd);
2466 /* qt = improve(vs,qs) */
2467 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2468 if (u_in->opcode == AND)
2469 create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
2470 else
2471 create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
2472 /* qd = qt `DifD` qd */
2473 create_DifD(cb, u_in->size, qt, qd);
2474 /* So, finally qd is the result tag. */
2475 if (u_in->flags_w != FlagsEmpty) {
2476 create_PUTVF(cb, u_in->size, qd);
2477 }
2478 copyUInstr(cb, u_in);
2479 break;
2480
2481 /* Machinery to do with supporting CALLM. Copy the start and
2482 end markers only to make the result easier to read
2483 (debug); they generate no code and have no effect.
2484 */
2485 case CALLM_S: case CALLM_E:
2486 copyUInstr(cb, u_in);
2487 break;
2488
2489 /* Copy PUSH and POP verbatim. Arg/result absval
2490 calculations are done when the associated CALL is
2491 processed. CLEAR has no effect on absval calculations but
2492 needs to be copied.
2493 */
2494 case PUSH: case POP: case CLEAR:
2495 copyUInstr(cb, u_in);
2496 break;
2497
2498 /* In short:
2499 callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
2500 We have to decide on a size to do the computation at,
2501 although the choice doesn't affect correctness. We will
2502 do a pcast to the final size anyway, so the only important
2503 factor is to choose a size which minimises the total
2504 number of casts needed. Valgrind: just use size 0,
2505 regardless. It may not be very good for performance
2506 but does simplify matters, mainly by reducing the number
2507 of different pessimising casts which have to be implemented.
2508 */
2509 case CALLM: {
2510 UInstr* uu;
2511 Bool res_used;
2512
2513 /* Now generate the code. Get the final result absval
2514 into qt. */
2515 qt = newShadow(cb);
2516 qtt = newShadow(cb);
2517 uInstr1(cb, SETV, 0, TempReg, qt);
2518 for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
2519 uu = & cb_in->instrs[j];
2520 if (uu->opcode != PUSH) continue;
2521 /* cast via a temporary */
2522 uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
2523 TempReg, qtt);
2524 create_PCast(cb, uu->size, 0, qtt);
2525 create_UifU(cb, 0, qtt, qt);
2526 }
2527 /* Remembering also that flags read count as inputs. */
2528 if (u_in->flags_r != FlagsEmpty) {
2529 qtt = create_GETVF(cb, 0);
2530 create_UifU(cb, 0, qtt, qt);
2531 }
2532
2533 /* qt now holds the result tag. If any results from the
2534 call are used, either by fetching with POP or
2535 implicitly by writing the flags, we copy the result
2536 absval to the relevant location. If not used, the call
2537 must have been for its side effects, so we test qt here
2538 and now. Note that this assumes that all values
2539 removed by POP continue to be live. So dead args
2540 *must* be removed with CLEAR, not by POPping them into
2541 a dummy tempreg.
2542 */
2543 res_used = False;
2544 for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
2545 uu = & cb_in->instrs[j];
2546 if (uu->opcode != POP) continue;
2547 /* Cast via a temp. */
2548 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
2549 create_PCast(cb, 0, uu->size, qtt);
2550 uInstr2(cb, MOV, 4, TempReg, qtt,
2551 TempReg, SHADOW(uu->val1));
2552 res_used = True;
2553 }
2554 if (u_in->flags_w != FlagsEmpty) {
2555 create_PUTVF(cb, 0, qt);
2556 res_used = True;
2557 }
2558 if (!res_used) {
2559 uInstr1(cb, TESTV, 0, TempReg, qt);
2560 /* qt should never be referred to again. Nevertheless
2561 ... */
2562 uInstr1(cb, SETV, 0, TempReg, qt);
2563 }
2564 copyUInstr(cb, u_in);
2565 break;
2566 }
2567 /* Whew ... */
2568
2569 case JMP:
2570 if (u_in->tag1 == TempReg) {
2571 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2572 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2573 } else {
2574 vg_assert(u_in->tag1 == Literal);
2575 }
2576 if (u_in->cond != CondAlways) {
2577 vg_assert(u_in->flags_r != FlagsEmpty);
2578 qt = create_GETVF(cb, 0);
2579 uInstr1(cb, TESTV, 0, TempReg, qt);
2580 /* qt should never be referred to again. Nevertheless
2581 ... */
2582 uInstr1(cb, SETV, 0, TempReg, qt);
2583 }
2584 copyUInstr(cb, u_in);
2585 break;
2586
2587 case JIFZ:
2588 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2589 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2590 copyUInstr(cb, u_in);
2591 break;
2592
2593 /* Emit a check on the address used. For FPU_R, the value
2594 loaded into the FPU is checked at the time it is read from
2595 memory (see synth_fpu_mem_check_actions). */
2596 case FPU_R: case FPU_W:
2597 vg_assert(u_in->tag2 == TempReg);
2598 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2599 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
2600 copyUInstr(cb, u_in);
2601 break;
2602
2603 /* For FPU insns not referencing memory, just copy thru. */
2604 case FPU:
2605 copyUInstr(cb, u_in);
2606 break;
2607
2608 default:
2609 VG_(ppUInstr)(0, u_in);
2610 VG_(panic)( "vg_instrument: unhandled case");
2611
2612 } /* end of switch (u_in->opcode) */
2613
2614 } /* end of for loop */
2615
2616 freeCodeBlock(cb_in);
2617 return cb;
2618}
2619
2620/*------------------------------------------------------------*/
2621/*--- Clean up mem check instrumentation. ---*/
2622/*------------------------------------------------------------*/
2623
2624#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
2625#define VGC_UNDEF ((UChar)100)
2626#define VGC_VALUE ((UChar)101)
2627
2628#define NOP_no_msg(uu) \
2629 do { uu->opcode = NOP; } while (False)
2630
2631#define NOP_tag1_op(uu) \
2632 do { uu->opcode = NOP; \
2633 if (VG_(disassemble)) \
2634 VG_(printf)("at %d: delete %s due to defd arg\n", \
2635 i, VG_(nameOfTagOp(u->val3))); \
2636 } while (False)
2637
2638#define SETV_tag1_op(uu,newsz) \
2639 do { uu->opcode = SETV; \
2640 uu->size = newsz; \
2641 uu->tag2 = uu->tag3 = NoValue; \
2642 if (VG_(disassemble)) \
2643 VG_(printf)("at %d: convert %s to SETV%d " \
2644 "due to defd arg\n", \
2645 i, VG_(nameOfTagOp(u->val3)), newsz); \
2646 } while (False)
2647
2648
2649
2650/* Run backwards and delete SETVs on shadow temps for which the next
2651 action is a write. Needs an env saying whether or not the next
2652 action is a write. The supplied UCodeBlock is destructively
2653 modified.
2654*/
2655static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
2656{
2657 Bool* next_is_write;
2658 Int i, j, k, n_temps;
2659 UInstr* u;
2660 TempUse tempUse[3];
2661
2662 n_temps = cb->nextTemp;
2663 if (n_temps == 0) return;
2664
2665 next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
2666
2667 for (i = 0; i < n_temps; i++) next_is_write[i] = True;
2668
2669 for (i = cb->used-1; i >= 0; i--) {
2670 u = &cb->instrs[i];
2671
sewardj97ced732002-03-25 00:07:36 +00002672 /* If we're not checking address V bits, there will be a lot of
2673 GETVs, TAG1s and TAG2s calculating values which are never
2674 used. These first three cases get rid of them. */
2675
2676 if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
2677 && next_is_write[u->val2]
2678 && !VG_(clo_check_addrVs)) {
2679 u->opcode = NOP;
2680 u->size = 0;
2681 if (VG_(disassemble))
2682 VG_(printf)("at %d: delete GETV\n", i);
2683 } else
2684
2685 if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
2686 && next_is_write[u->val1]
2687 && !VG_(clo_check_addrVs)) {
2688 u->opcode = NOP;
2689 u->size = 0;
2690 if (VG_(disassemble))
2691 VG_(printf)("at %d: delete TAG1\n", i);
2692 } else
2693
2694 if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
2695 && next_is_write[u->val2]
2696 && !VG_(clo_check_addrVs)) {
2697 u->opcode = NOP;
2698 u->size = 0;
2699 if (VG_(disassemble))
2700 VG_(printf)("at %d: delete TAG2\n", i);
2701 } else
2702
2703 /* We do the rest of these regardless of whether or not
2704 addresses are V-checked. */
2705
sewardjde4a1d02002-03-22 01:27:54 +00002706 if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
2707 && next_is_write[u->val2]) {
2708 /* This MOV is pointless because the target is dead at this
2709 point. Delete it. */
2710 u->opcode = NOP;
2711 u->size = 0;
2712 if (VG_(disassemble))
2713 VG_(printf)("at %d: delete MOV\n", i);
2714 } else
2715
2716 if (u->opcode == SETV) {
2717 if (u->tag1 == TempReg) {
2718 vg_assert(VGC_IS_SHADOW(u->val1));
2719 if (next_is_write[u->val1]) {
2720 /* This write is pointless, so annul it. */
2721 u->opcode = NOP;
2722 u->size = 0;
2723 if (VG_(disassemble))
2724 VG_(printf)("at %d: delete SETV\n", i);
2725 } else {
2726 /* This write has a purpose; don't annul it, but do
2727 notice that we did it. */
2728 next_is_write[u->val1] = True;
2729 }
2730
2731 }
2732
2733 } else {
2734 /* Find out what this insn does to the temps. */
2735 k = getTempUsage(u, &tempUse[0]);
2736 vg_assert(k <= 3);
2737 for (j = k-1; j >= 0; j--) {
2738 next_is_write[ tempUse[j].tempNo ]
2739 = tempUse[j].isWrite;
2740 }
2741 }
2742
2743 }
2744
2745 VG_(jitfree)(next_is_write);
2746}
2747
2748
2749/* Run forwards, propagating and using the is-completely-defined
2750 property. This removes a lot of redundant tag-munging code.
2751 Unfortunately it requires intimate knowledge of how each uinstr and
2752 tagop modifies its arguments. This duplicates knowledge of uinstr
2753 tempreg uses embodied in getTempUsage(), which is unfortunate.
2754 The supplied UCodeBlock* is modified in-place.
2755
2756 For each value temp, def[] should hold VGC_VALUE.
2757
2758 For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
2759 definitely known to be fully defined at that size. In all other
2760 circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
2761 undefined. In cases of doubt, VGC_UNDEF is always safe.
2762*/
2763static void vg_propagate_definedness ( UCodeBlock* cb )
2764{
2765 UChar* def;
2766 Int i, j, k, t, n_temps;
2767 UInstr* u;
2768 TempUse tempUse[3];
2769
2770 n_temps = cb->nextTemp;
2771 if (n_temps == 0) return;
2772
2773 def = VG_(jitmalloc)(n_temps * sizeof(UChar));
2774 for (i = 0; i < n_temps; i++)
2775 def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
2776
2777 /* Run forwards, detecting and using the all-defined property. */
2778
2779 for (i = 0; i < cb->used; i++) {
2780 u = &cb->instrs[i];
2781 switch (u->opcode) {
2782
2783 /* Tag-handling uinstrs. */
2784
2785 /* Deal with these quickly. */
2786 case NOP:
2787 case INCEIP:
2788 break;
2789
2790 /* Make a tag defined. */
2791 case SETV:
2792 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2793 def[u->val1] = u->size;
2794 break;
2795
2796 /* Check definedness of a tag. */
2797 case TESTV:
2798 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2799 if (def[u->val1] <= 4) {
2800 vg_assert(def[u->val1] == u->size);
2801 NOP_no_msg(u);
2802 if (VG_(disassemble))
2803 VG_(printf)("at %d: delete TESTV on defd arg\n", i);
2804 }
2805 break;
2806
2807 /* Applies to both values and tags. Propagate Definedness
2808 property through copies. Note that this isn't optional;
2809 we *have* to do this to keep def[] correct. */
2810 case MOV:
2811 vg_assert(u->tag2 == TempReg);
2812 if (u->tag1 == TempReg) {
2813 if (VGC_IS_SHADOW(u->val1)) {
2814 vg_assert(VGC_IS_SHADOW(u->val2));
2815 def[u->val2] = def[u->val1];
2816 }
2817 }
2818 break;
2819
2820 case PUTV:
2821 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2822 if (def[u->val1] <= 4) {
2823 vg_assert(def[u->val1] == u->size);
2824 u->tag1 = Literal;
2825 u->val1 = 0;
2826 switch (u->size) {
2827 case 4: u->lit32 = 0x00000000; break;
2828 case 2: u->lit32 = 0xFFFF0000; break;
2829 case 1: u->lit32 = 0xFFFFFF00; break;
2830 default: VG_(panic)("vg_cleanup(PUTV)");
2831 }
2832 if (VG_(disassemble))
2833 VG_(printf)(
2834 "at %d: propagate definedness into PUTV\n", i);
2835 }
2836 break;
2837
2838 case STOREV:
2839 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2840 if (def[u->val1] <= 4) {
2841 vg_assert(def[u->val1] == u->size);
2842 u->tag1 = Literal;
2843 u->val1 = 0;
2844 switch (u->size) {
2845 case 4: u->lit32 = 0x00000000; break;
2846 case 2: u->lit32 = 0xFFFF0000; break;
2847 case 1: u->lit32 = 0xFFFFFF00; break;
2848 default: VG_(panic)("vg_cleanup(STOREV)");
2849 }
2850 if (VG_(disassemble))
2851 VG_(printf)(
2852 "at %d: propagate definedness into STandV\n", i);
2853 }
2854 break;
2855
2856 /* Nothing interesting we can do with this, I think. */
2857 case PUTVF:
2858 break;
2859
2860 /* Tag handling operations. */
2861 case TAG2:
2862 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2863 vg_assert(u->tag3 == Lit16);
2864 /* Ultra-paranoid "type" checking. */
2865 switch (u->val3) {
2866 case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
2867 case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
2868 case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
2869 vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
2870 break;
2871 default:
2872 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2873 break;
2874 }
2875 switch (u->val3) {
2876 Int sz;
2877 case VgT_UifU4:
2878 sz = 4; goto do_UifU;
2879 case VgT_UifU2:
2880 sz = 2; goto do_UifU;
2881 case VgT_UifU1:
2882 sz = 1; goto do_UifU;
2883 case VgT_UifU0:
2884 sz = 0; goto do_UifU;
2885 do_UifU:
2886 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2887 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2888 if (def[u->val1] <= 4) {
2889 /* UifU. The first arg is defined, so result is
2890 simply second arg. Delete this operation. */
2891 vg_assert(def[u->val1] == sz);
2892 NOP_no_msg(u);
2893 if (VG_(disassemble))
2894 VG_(printf)(
2895 "at %d: delete UifU%d due to defd arg1\n",
2896 i, sz);
2897 }
2898 else
2899 if (def[u->val2] <= 4) {
2900 /* UifU. The second arg is defined, so result is
2901 simply first arg. Copy to second. */
2902 vg_assert(def[u->val2] == sz);
2903 u->opcode = MOV;
2904 u->size = 4;
2905 u->tag3 = NoValue;
2906 def[u->val2] = def[u->val1];
2907 if (VG_(disassemble))
2908 VG_(printf)(
2909 "at %d: change UifU%d to MOV due to defd"
2910 " arg2\n",
2911 i, sz);
2912 }
2913 break;
2914 case VgT_ImproveAND4_TQ:
2915 sz = 4; goto do_ImproveAND;
2916 case VgT_ImproveAND1_TQ:
2917 sz = 1; goto do_ImproveAND;
2918 do_ImproveAND:
2919 /* Implements Q = T OR Q. So if Q is entirely defined,
2920 ie all 0s, we get MOV T, Q. */
2921 if (def[u->val2] <= 4) {
2922 vg_assert(def[u->val2] == sz);
2923 u->size = 4; /* Regardless of sz */
2924 u->opcode = MOV;
2925 u->tag3 = NoValue;
2926 def[u->val2] = VGC_UNDEF;
2927 if (VG_(disassemble))
2928 VG_(printf)(
2929 "at %d: change ImproveAND%d_TQ to MOV due "
2930 "to defd arg2\n",
2931 i, sz);
2932 }
2933 break;
2934 default:
2935 goto unhandled;
2936 }
2937 break;
2938
2939 case TAG1:
2940 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2941 if (def[u->val1] > 4) break;
2942 /* We now know that the arg to the op is entirely defined.
2943 If the op changes the size of the arg, we must replace
2944 it with a SETV at the new size. If it doesn't change
2945 the size, we can delete it completely. */
2946 switch (u->val3) {
2947 /* Maintain the same size ... */
2948 case VgT_Left4:
2949 vg_assert(def[u->val1] == 4);
2950 NOP_tag1_op(u);
2951 break;
2952 case VgT_PCast11:
2953 vg_assert(def[u->val1] == 1);
2954 NOP_tag1_op(u);
2955 break;
2956 /* Change size ... */
2957 case VgT_PCast40:
2958 vg_assert(def[u->val1] == 4);
2959 SETV_tag1_op(u,0);
2960 def[u->val1] = 0;
2961 break;
2962 case VgT_PCast14:
2963 vg_assert(def[u->val1] == 1);
2964 SETV_tag1_op(u,4);
2965 def[u->val1] = 4;
2966 break;
2967 case VgT_PCast12:
2968 vg_assert(def[u->val1] == 1);
2969 SETV_tag1_op(u,2);
2970 def[u->val1] = 2;
2971 break;
2972 case VgT_PCast10:
2973 vg_assert(def[u->val1] == 1);
2974 SETV_tag1_op(u,0);
2975 def[u->val1] = 0;
2976 break;
2977 case VgT_PCast02:
2978 vg_assert(def[u->val1] == 0);
2979 SETV_tag1_op(u,2);
2980 def[u->val1] = 2;
2981 break;
2982 default:
2983 goto unhandled;
2984 }
2985 if (VG_(disassemble))
2986 VG_(printf)(
2987 "at %d: delete TAG1 %s due to defd arg\n",
2988 i, VG_(nameOfTagOp(u->val3)));
2989 break;
2990
2991 default:
2992 unhandled:
2993 /* We don't know how to handle this uinstr. Be safe, and
2994 set to VGC_VALUE or VGC_UNDEF all temps written by it. */
2995 k = getTempUsage(u, &tempUse[0]);
2996 vg_assert(k <= 3);
2997 for (j = 0; j < k; j++) {
2998 t = tempUse[j].tempNo;
2999 vg_assert(t >= 0 && t < n_temps);
3000 if (!tempUse[j].isWrite) {
3001 /* t is read; ignore it. */
3002 if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
3003 VG_(printf)("ignoring def %d at %s %s\n",
3004 def[t],
3005 VG_(nameUOpcode)(True, u->opcode),
3006 (u->opcode == TAG1 || u->opcode == TAG2)
3007 ? VG_(nameOfTagOp)(u->val3)
3008 : (Char*)"");
3009 } else {
3010 /* t is written; better nullify it. */
3011 def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
3012 }
3013 }
3014 }
3015 }
3016
3017 VG_(jitfree)(def);
3018}
3019
3020
3021/* Top level post-instrumentation cleanup function. */
3022static void vg_cleanup ( UCodeBlock* cb )
3023{
3024 vg_propagate_definedness ( cb );
3025 vg_delete_redundant_SETVs ( cb );
3026}
3027
3028
3029/*------------------------------------------------------------*/
3030/*--- Main entry point for the JITter. ---*/
3031/*------------------------------------------------------------*/
3032
3033/* Translate the basic block beginning at orig_addr, placing the
3034 translation in a vg_malloc'd block, the address and size of which
3035 are returned in trans_addr and trans_size. Length of the original
3036 block is also returned in orig_size. If the latter three are NULL,
3037 this call is being done for debugging purposes, in which case (a)
3038 throw away the translation once it is made, and (b) produce a load
3039 of debugging output.
3040*/
3041void VG_(translate) ( Addr orig_addr,
3042 UInt* orig_size,
3043 Addr* trans_addr,
3044 UInt* trans_size )
3045{
3046 Int n_disassembled_bytes, final_code_size;
3047 Bool debugging_translation;
3048 UChar* final_code;
3049 UCodeBlock* cb;
3050
3051 VGP_PUSHCC(VgpTranslate);
3052 debugging_translation
3053 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
3054
3055 dis = True;
3056 dis = debugging_translation;
3057
3058 /* Check if we're being asked to jump to a silly address, and if so
3059 record an error message before potentially crashing the entire
3060 system. */
3061 if (VG_(clo_instrument) && !debugging_translation && !dis) {
3062 Addr bad_addr;
3063 Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
3064 if (!ok) {
3065 VG_(record_jump_error)(bad_addr);
3066 }
3067 }
3068
3069 /* if (VG_(overall_in_count) >= 4800) dis=True; */
3070 if (VG_(disassemble))
3071 VG_(printf)("\n");
3072 if (0 || dis
3073 || (VG_(overall_in_count) > 0 &&
3074 (VG_(overall_in_count) % 1000 == 0))) {
3075 if (0&& (VG_(clo_verbosity) > 1 || dis))
3076 VG_(message)(Vg_UserMsg,
3077 "trans# %d, bb# %lu, in %d, out %d",
3078 VG_(overall_in_count),
3079 VG_(bbs_done),
3080 VG_(overall_in_osize), VG_(overall_in_tsize),
3081 orig_addr );
3082 }
3083 cb = allocCodeBlock();
3084
3085 /* Disassemble this basic block into cb. */
3086 VGP_PUSHCC(VgpToUCode);
3087 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
3088 VGP_POPCC;
3089 /* dis=True; */
3090 /* if (0&& VG_(translations_done) < 617) */
3091 /* dis=False; */
3092 /* Try and improve the code a bit. */
3093 if (VG_(clo_optimise)) {
3094 VGP_PUSHCC(VgpImprove);
3095 vg_improve ( cb );
3096 if (VG_(disassemble))
3097 VG_(ppUCodeBlock) ( cb, "Improved code:" );
3098 VGP_POPCC;
3099 }
3100 /* dis=False; */
3101 /* Add instrumentation code. */
3102 if (VG_(clo_instrument)) {
3103 VGP_PUSHCC(VgpInstrument);
3104 cb = vg_instrument(cb);
3105 VGP_POPCC;
3106 if (VG_(disassemble))
3107 VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
3108 if (VG_(clo_cleanup)) {
3109 VGP_PUSHCC(VgpCleanup);
3110 vg_cleanup(cb);
3111 VGP_POPCC;
3112 if (VG_(disassemble))
3113 VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
3114 }
3115 }
3116
3117 /* Allocate registers. */
3118 VGP_PUSHCC(VgpRegAlloc);
3119 cb = vg_do_register_allocation ( cb );
3120 VGP_POPCC;
3121 /* dis=False; */
3122 /*
3123 if (VG_(disassemble))
3124 VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
3125 */
3126
3127 VGP_PUSHCC(VgpFromUcode);
3128 /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
3129 and so must be VG_(jitfree)'d. */
3130 final_code = VG_(emit_code)(cb, &final_code_size );
3131 VGP_POPCC;
3132 freeCodeBlock(cb);
3133
3134 if (debugging_translation) {
3135 /* Only done for debugging -- throw away final result. */
3136 VG_(jitfree)(final_code);
3137 } else {
3138 /* Doing it for real -- return values to caller. */
3139 *orig_size = n_disassembled_bytes;
3140 *trans_addr = (Addr)final_code;
3141 *trans_size = final_code_size;
3142 }
3143 VGP_POPCC;
3144}
3145
3146/*--------------------------------------------------------------------*/
3147/*--- end vg_translate.c ---*/
3148/*--------------------------------------------------------------------*/