blob: 73cad5e9bcd6a8414f3eab590c76b1cca196954d [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, an x86 protected-mode emulator
9 designed for debugging and profiling binaries on x86-Unixes.
10
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file LICENSE.
30*/
31
32#include "vg_include.h"
33
34
35/*------------------------------------------------------------*/
36/*--- Renamings of frequently-used global functions. ---*/
37/*------------------------------------------------------------*/
38
39#define uInstr1 VG_(newUInstr1)
40#define uInstr2 VG_(newUInstr2)
41#define uInstr3 VG_(newUInstr3)
42#define dis VG_(disassemble)
43#define nameIReg VG_(nameOfIntReg)
44#define nameISize VG_(nameOfIntSize)
45#define uLiteral VG_(setLiteralField)
46#define newTemp VG_(getNewTemp)
47#define newShadow VG_(getNewShadow)
48
49
50/*------------------------------------------------------------*/
51/*--- Memory management for the translater. ---*/
52/*------------------------------------------------------------*/
53
54#define N_JITBLOCKS 4
55#define N_JITBLOCK_SZ 5000
56
57static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
58static Bool jitstorage_inuse[N_JITBLOCKS];
59static Bool jitstorage_initdone = False;
60
61static __inline__ void jitstorage_initialise ( void )
62{
63 Int i;
64 if (jitstorage_initdone) return;
65 jitstorage_initdone = True;
66 for (i = 0; i < N_JITBLOCKS; i++)
67 jitstorage_inuse[i] = False;
68}
69
70void* VG_(jitmalloc) ( Int nbytes )
71{
72 Int i;
73 jitstorage_initialise();
74 if (nbytes > N_JITBLOCK_SZ) {
75 /* VG_(printf)("too large: %d\n", nbytes); */
76 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
77 }
78 for (i = 0; i < N_JITBLOCKS; i++) {
79 if (!jitstorage_inuse[i]) {
80 jitstorage_inuse[i] = True;
81 /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
82 return & jitstorage[i][0];
83 }
84 }
85 VG_(panic)("out of slots in vg_jitmalloc\n");
86 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
87}
88
89void VG_(jitfree) ( void* ptr )
90{
91 Int i;
92 jitstorage_initialise();
93 for (i = 0; i < N_JITBLOCKS; i++) {
94 if (ptr == & jitstorage[i][0]) {
95 vg_assert(jitstorage_inuse[i]);
96 jitstorage_inuse[i] = False;
97 return;
98 }
99 }
100 VG_(free)(VG_AR_PRIVATE, ptr);
101}
102
103/*------------------------------------------------------------*/
104/*--- Basics ---*/
105/*------------------------------------------------------------*/
106
107static UCodeBlock* allocCodeBlock ( void )
108{
109 UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
110 cb->used = cb->size = cb->nextTemp = 0;
111 cb->instrs = NULL;
112 return cb;
113}
114
115
116static void freeCodeBlock ( UCodeBlock* cb )
117{
118 if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
119 VG_(free)(VG_AR_PRIVATE, cb);
120}
121
122
123/* Ensure there's enough space in a block to add one uinstr. */
124static __inline__
125void ensureUInstr ( UCodeBlock* cb )
126{
127 if (cb->used == cb->size) {
128 if (cb->instrs == NULL) {
129 vg_assert(cb->size == 0);
130 vg_assert(cb->used == 0);
131 cb->size = 8;
132 cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
133 } else {
134 Int i;
135 UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
136 2 * sizeof(UInstr) * cb->size);
137 for (i = 0; i < cb->used; i++)
138 instrs2[i] = cb->instrs[i];
139 cb->size *= 2;
140 VG_(free)(VG_AR_PRIVATE, cb->instrs);
141 cb->instrs = instrs2;
142 }
143 }
144
145 vg_assert(cb->used < cb->size);
146}
147
148
149__inline__
150void VG_(emptyUInstr) ( UInstr* u )
151{
152 u->val1 = u->val2 = u->val3 = 0;
153 u->tag1 = u->tag2 = u->tag3 = NoValue;
154 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000155 u->jmpkind = JmpBoring;
156 u->smc_check = u->signed_widen = False;
sewardjde4a1d02002-03-22 01:27:54 +0000157 u->lit32 = 0;
158 u->opcode = 0;
159 u->size = 0;
160 u->cond = 0;
161 u->extra4b = 0;
162}
163
164
165/* Add an instruction to a ucode block, and return the index of the
166 instruction. */
167__inline__
168void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
169 Tag tag1, UInt val1,
170 Tag tag2, UInt val2,
171 Tag tag3, UInt val3 )
172{
173 UInstr* ui;
174 ensureUInstr(cb);
175 ui = & cb->instrs[cb->used];
176 cb->used++;
177 VG_(emptyUInstr)(ui);
178 ui->val1 = val1;
179 ui->val2 = val2;
180 ui->val3 = val3;
181 ui->opcode = opcode;
182 ui->tag1 = tag1;
183 ui->tag2 = tag2;
184 ui->tag3 = tag3;
185 ui->size = sz;
186 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
187 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
188 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
189}
190
191
192__inline__
193void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
194 Tag tag1, UInt val1,
195 Tag tag2, UInt val2 )
196{
197 UInstr* ui;
198 ensureUInstr(cb);
199 ui = & cb->instrs[cb->used];
200 cb->used++;
201 VG_(emptyUInstr)(ui);
202 ui->val1 = val1;
203 ui->val2 = val2;
204 ui->opcode = opcode;
205 ui->tag1 = tag1;
206 ui->tag2 = tag2;
207 ui->size = sz;
208 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
209 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
210}
211
212
213__inline__
214void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
215 Tag tag1, UInt val1 )
216{
217 UInstr* ui;
218 ensureUInstr(cb);
219 ui = & cb->instrs[cb->used];
220 cb->used++;
221 VG_(emptyUInstr)(ui);
222 ui->val1 = val1;
223 ui->opcode = opcode;
224 ui->tag1 = tag1;
225 ui->size = sz;
226 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
227}
228
229
230__inline__
231void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
232{
233 UInstr* ui;
234 ensureUInstr(cb);
235 ui = & cb->instrs[cb->used];
236 cb->used++;
237 VG_(emptyUInstr)(ui);
238 ui->opcode = opcode;
239 ui->size = sz;
240}
241
242
243/* Copy an instruction into the given codeblock. */
244static __inline__
245void copyUInstr ( UCodeBlock* cb, UInstr* instr )
246{
247 ensureUInstr(cb);
248 cb->instrs[cb->used] = *instr;
249 cb->used++;
250}
251
252
253/* Copy auxiliary info from one uinstr to another. */
254static __inline__
255void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
256{
257 dst->cond = src->cond;
258 dst->extra4b = src->extra4b;
259 dst->smc_check = src->smc_check;
260 dst->signed_widen = src->signed_widen;
sewardj2e93c502002-04-12 11:12:52 +0000261 dst->jmpkind = src->jmpkind;
sewardjde4a1d02002-03-22 01:27:54 +0000262 dst->flags_r = src->flags_r;
263 dst->flags_w = src->flags_w;
264}
265
266
267/* Set the flag R/W sets on a uinstr. */
268void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
269{
270 /* VG_(ppUInstr)(-1,u); */
271 vg_assert(fr == (fr & FlagsALL));
272 vg_assert(fw == (fw & FlagsALL));
273 u->flags_r = fr;
274 u->flags_w = fw;
275}
276
277
278/* Set the lit32 field of the most recent uinsn. */
279void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
280{
281 LAST_UINSTR(cb).lit32 = lit32;
282}
283
284
285Bool VG_(anyFlagUse) ( UInstr* u )
286{
287 return (u->flags_r != FlagsEmpty
288 || u->flags_w != FlagsEmpty);
289}
290
291
292
293
294/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
295 register number. This effectively defines the order in which real
296 registers are allocated. %ebp is excluded since it is permanently
297 reserved for pointing at VG_(baseBlock). %edi is a general spare
298 temp used for Left4 and various misc tag ops.
299
300 Important! If you change the set of allocatable registers from
301 %eax, %ebx, %ecx, %edx, %esi you must change the
302 save/restore sequences in vg_helper_smc_check4 to match!
303*/
304__inline__ Int VG_(rankToRealRegNo) ( Int rank )
305{
306 switch (rank) {
307# if 1
308 /* Probably the best allocation ordering. */
309 case 0: return R_EAX;
310 case 1: return R_EBX;
311 case 2: return R_ECX;
312 case 3: return R_EDX;
313 case 4: return R_ESI;
314# else
315 /* Contrary; probably the worst. Helpful for debugging, tho. */
316 case 4: return R_EAX;
317 case 3: return R_EBX;
318 case 2: return R_ECX;
319 case 1: return R_EDX;
320 case 0: return R_ESI;
321# endif
322 default: VG_(panic)("rankToRealRegNo");
323 }
324}
325
326
327/*------------------------------------------------------------*/
328/*--- Sanity checking uinstrs. ---*/
329/*------------------------------------------------------------*/
330
331/* This seems as good a place as any to record some important stuff
332 about ucode semantics.
333
334 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
335 TempReg are defined to zero-extend the loaded value to 32 bits.
336 This is needed to make the translation of movzbl et al work
337 properly.
338
339 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
340
341 * Arithmetic on TempRegs is at the specified size. For example,
342 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
343 being emitted -- not a 32 bit one.
344
345 * On some insns we allow the cc bit to be set. If so, the
346 intention is that the simulated machine's %eflags register
347 is copied into that of the real machine before the insn,
348 and copied back again afterwards. This means that the
349 code generated for that insn must be very careful only to
350 update %eflags in the intended way. This is particularly
351 important for the routines referenced by CALL insns.
352*/
353
354/* Meaning of operand kinds is as follows:
355
356 ArchReg is a register of the simulated CPU, stored in memory,
357 in vg_m_state.m_eax .. m_edi. These values are stored
358 using the Intel register encoding.
359
360 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
361 available for allocation. As with ArchRegs, these values
362 are stored using the Intel register encoding.
363
364 TempReg is a temporary register used to express the results of
365 disassembly. There is an unlimited supply of them --
366 register allocation and spilling eventually assigns them
367 to RealRegs.
368
369 SpillNo is a spill slot number. The number of required spill
370 slots is VG_MAX_PSEUDOS, in general. Only allowed
371 as the ArchReg operand of GET and PUT.
372
373 Lit16 is a signed 16-bit literal value.
374
375 Literal is a 32-bit literal value. Each uinstr can only hold
376 one of these.
377
378 The disassembled code is expressed purely in terms of ArchReg,
379 TempReg and Literal operands. Eventually, register allocation
380 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
381 and Literals. New x86 code can easily be synthesised from this.
382 There are carefully designed restrictions on which insns can have
383 which operands, intended to make it possible to generate x86 code
384 from the result of register allocation on the ucode efficiently and
385 without need of any further RealRegs.
386
387 Restrictions on insns (as generated by the disassembler) are as
388 follows:
389
390 A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
391 N=NoValue
392
393 GETF T N N
394 PUTF T N N
395
396 GET A,S T N
397 PUT T A,S N
398 LOAD T T N
399 STORE T T N
400 MOV T,L T N
401 CMOV T T N
402 WIDEN T N N
403 JMP T,L N N
404 CALLM L N N
405 CALLM_S N N N
406 CALLM_E N N N
407 PUSH,POP T N N
408 CLEAR L N N
409
410 AND, OR
411 T T N
412
413 ADD, ADC, XOR, SUB, SBB
414 A,L,T T N
415
416 SHL, SHR, SAR, ROL, ROR, RCL, RCR
417 L,T T N
418
419 NOT, NEG, INC, DEC, CC2VAL, BSWAP
420 T N N
421
422 JIFZ T L N
423
424 FPU_R L T N
425 FPU_W L T N
426 FPU L T N
427
428 LEA1 T T (const in a seperate field)
429 LEA2 T T T (const & shift ditto)
430
431 INCEIP L N N
432
433 and for instrumentation insns:
434
435 LOADV T T N
436 STOREV T,L T N
437 GETV A T N
438 PUTV T,L A N
439 GETVF T N N
440 PUTVF T N N
441 WIDENV T N N
442 TESTV A,T N N
443 SETV A,T N N
444 TAG1 T N N
445 TAG2 T T N
446
447 Before register allocation, S operands should not appear anywhere.
448 After register allocation, all T operands should have been
449 converted into Rs, and S operands are allowed in GET and PUT --
450 denoting spill saves/restores.
451
452 The size field should be 0 for insns for which it is meaningless,
453 ie those which do not directly move/operate on data.
454*/
455Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
456{
457# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
458# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
459# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
460# define A1 (u->tag1 == ArchReg)
461# define A2 (u->tag2 == ArchReg)
462# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
463# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
464# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
465# define L1 (u->tag1 == Literal && u->val1 == 0)
466# define L2 (u->tag2 == Literal && u->val2 == 0)
467# define Ls1 (u->tag1 == Lit16)
468# define Ls3 (u->tag3 == Lit16)
469# define N1 (u->tag1 == NoValue)
470# define N2 (u->tag2 == NoValue)
471# define N3 (u->tag3 == NoValue)
472# define SZ4 (u->size == 4)
473# define SZ2 (u->size == 2)
474# define SZ1 (u->size == 1)
475# define SZ0 (u->size == 0)
476# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
477# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
478# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj8d32be72002-04-18 02:18:24 +0000479# define FLG_RD_WR_MAYBE \
480 ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \
481 || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \
482 || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
sewardjde4a1d02002-03-22 01:27:54 +0000483# define CC1 (!(CC0))
484# define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
485 ? (u->size == 4) : True)
486
487 Int n_lits = 0;
488 if (u->tag1 == Literal) n_lits++;
489 if (u->tag2 == Literal) n_lits++;
490 if (u->tag3 == Literal) n_lits++;
491 if (n_lits > 1)
492 return False;
493
494 switch (u->opcode) {
495 case GETF:
sewardj8d32be72002-04-18 02:18:24 +0000496 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
sewardjde4a1d02002-03-22 01:27:54 +0000497 case PUTF:
sewardj8d32be72002-04-18 02:18:24 +0000498 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
sewardjde4a1d02002-03-22 01:27:54 +0000499 case CALLM_S: case CALLM_E:
500 return SZ0 && N1 && N2 && N3;
501 case INCEIP:
502 return SZ0 && CC0 && Ls1 && N2 && N3;
503 case LEA1:
504 return CC0 && TR1 && TR2 && N3 && SZ4;
505 case LEA2:
506 return CC0 && TR1 && TR2 && TR3 && SZ4;
507 case NOP:
508 return SZ0 && CC0 && N1 && N2 && N3;
509 case GET:
510 return CC0 && AS1 && TR2 && N3;
511 case PUT:
512 return CC0 && TR1 && AS2 && N3;
513 case LOAD: case STORE:
514 return CC0 && TR1 && TR2 && N3;
515 case MOV:
516 return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
517 case CMOV:
518 return CC1 && TR1 && TR2 && N3 && SZ4;
519 case JMP:
520 return (u->cond==CondAlways ? CC0 : CC1)
521 && (TR1 || L1) && N2 && SZ0 && N3;
522 case CLEAR:
523 return CC0 && Ls1 && N2 && SZ0 && N3;
524 case CALLM:
525 return SZ0 && Ls1 && N2 && N3;
526 case PUSH: case POP:
527 return CC0 && TR1 && N2 && N3;
528 case AND: case OR:
529 return TR1 && TR2 && N3;
530 case ADD: case ADC: case XOR: case SUB: case SBB:
531 return (A1 || TR1 || L1) && TR2 && N3;
532 case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
533 return (TR1 || L1) && TR2 && N3;
534 case NOT: case NEG: case INC: case DEC:
535 return TR1 && N2 && N3;
536 case BSWAP:
537 return TR1 && N2 && N3 && CC0 && SZ4;
538 case CC2VAL:
539 return CC1 && SZ1 && TR1 && N2 && N3;
540 case JIFZ:
541 return CC0 && SZ4 && TR1 && L2 && N3;
542 case FPU_R: case FPU_W:
543 return CC0 && Ls1 && TR2 && N3;
544 case FPU:
sewardj8d32be72002-04-18 02:18:24 +0000545 return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
sewardjde4a1d02002-03-22 01:27:54 +0000546 case LOADV:
547 return CC0 && TR1 && TR2 && N3;
548 case STOREV:
549 return CC0 && (TR1 || L1) && TR2 && N3;
550 case GETV:
551 return CC0 && A1 && TR2 && N3;
552 case PUTV:
553 return CC0 && (TR1 || L1) && A2 && N3;
554 case GETVF:
555 return CC0 && TR1 && N2 && N3 && SZ0;
556 case PUTVF:
557 return CC0 && TR1 && N2 && N3 && SZ0;
558 case WIDEN:
559 return CC0 && TR1 && N2 && N3;
560 case TESTV:
561 return CC0 && (A1 || TR1) && N2 && N3;
562 case SETV:
563 return CC0 && (A1 || TR1) && N2 && N3;
564 case TAG1:
565 return CC0 && TR1 && N2 && Ls3 && SZ0;
566 case TAG2:
567 return CC0 && TR1 && TR2 && Ls3 && SZ0;
568 default:
569 VG_(panic)("vg_saneUInstr: unhandled opcode");
570 }
571# undef SZ4_IF_TR1
572# undef CC0
573# undef CC1
574# undef SZ4
575# undef SZ2
576# undef SZ1
577# undef SZ0
578# undef TR1
579# undef TR2
580# undef TR3
581# undef A1
582# undef A2
583# undef AS1
584# undef AS2
585# undef AS3
586# undef L1
587# undef Ls1
588# undef L2
589# undef Ls3
590# undef N1
591# undef N2
592# undef N3
593# undef FLG_RD
594# undef FLG_WR
sewardj8d32be72002-04-18 02:18:24 +0000595# undef FLG_RD_WR_MAYBE
sewardjde4a1d02002-03-22 01:27:54 +0000596}
597
598
599/* Sanity checks to do with CALLMs in UCodeBlocks. */
600Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
601{
602 Int callm = 0;
603 Int callm_s = 0;
604 Int callm_e = 0;
605 Int callm_ptr, calls_ptr;
606 Int i, j, t;
607 Bool incall = False;
608
609 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
610
611 for (i = 0; i < cb->used; i++) {
612 switch (cb->instrs[i].opcode) {
613 case CALLM:
614 if (!incall) return False;
615 callm++;
616 break;
617 case CALLM_S:
618 if (incall) return False;
619 incall = True;
620 callm_s++;
621 break;
622 case CALLM_E:
623 if (!incall) return False;
624 incall = False;
625 callm_e++;
626 break;
627 case PUSH: case POP: case CLEAR:
628 if (!incall) return False;
629 break;
630 default:
631 break;
632 }
633 }
634 if (incall) return False;
635 if (callm != callm_s || callm != callm_e) return False;
636
637 /* Check the sections between CALLM_S and CALLM's. Ensure that no
638 PUSH uinsn pushes any TempReg that any other PUSH in the same
639 section pushes. Ie, check that the TempReg args to PUSHes in
640 the section are unique. If not, the instrumenter generates
641 incorrect code for CALLM insns. */
642
643 callm_ptr = 0;
644
645 find_next_CALLM:
646 /* Search for the next interval, making calls_ptr .. callm_ptr
647 bracket it. */
648 while (callm_ptr < cb->used
649 && cb->instrs[callm_ptr].opcode != CALLM)
650 callm_ptr++;
651 if (callm_ptr == cb->used)
652 return True;
653 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
654
655 calls_ptr = callm_ptr - 1;
656 while (cb->instrs[calls_ptr].opcode != CALLM_S)
657 calls_ptr--;
658 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
659 vg_assert(calls_ptr >= 0);
660
661 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
662
663 /* For each PUSH insn in the interval ... */
664 for (i = calls_ptr + 1; i < callm_ptr; i++) {
665 if (cb->instrs[i].opcode != PUSH) continue;
666 t = cb->instrs[i].val1;
667 /* Ensure no later PUSH insns up to callm_ptr push the same
668 TempReg. Return False if any such are found. */
669 for (j = i+1; j < callm_ptr; j++) {
670 if (cb->instrs[j].opcode == PUSH &&
671 cb->instrs[j].val1 == t)
672 return False;
673 }
674 }
675
676 /* This interval is clean. Keep going ... */
677 callm_ptr++;
678 goto find_next_CALLM;
679}
680
681
682/*------------------------------------------------------------*/
683/*--- Printing uinstrs. ---*/
684/*------------------------------------------------------------*/
685
686Char* VG_(nameCondcode) ( Condcode cond )
687{
688 switch (cond) {
689 case CondO: return "o";
690 case CondNO: return "no";
691 case CondB: return "b";
692 case CondNB: return "nb";
693 case CondZ: return "z";
694 case CondNZ: return "nz";
695 case CondBE: return "be";
696 case CondNBE: return "nbe";
697 case CondS: return "s";
698 case ConsNS: return "ns";
699 case CondP: return "p";
700 case CondNP: return "np";
701 case CondL: return "l";
702 case CondNL: return "nl";
703 case CondLE: return "le";
704 case CondNLE: return "nle";
705 case CondAlways: return "MP"; /* hack! */
706 default: VG_(panic)("nameCondcode");
707 }
708}
709
710
711static void vg_ppFlagSet ( Char* prefix, FlagSet set )
712{
713 VG_(printf)("%s", prefix);
714 if (set & FlagD) VG_(printf)("D");
715 if (set & FlagO) VG_(printf)("O");
716 if (set & FlagS) VG_(printf)("S");
717 if (set & FlagZ) VG_(printf)("Z");
718 if (set & FlagA) VG_(printf)("A");
719 if (set & FlagC) VG_(printf)("C");
720 if (set & FlagP) VG_(printf)("P");
721}
722
723
724static void ppTempReg ( Int tt )
725{
726 if ((tt & 1) == 0)
727 VG_(printf)("t%d", tt);
728 else
729 VG_(printf)("q%d", tt-1);
730}
731
732
733static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
734{
735 UInt tag, val;
736 switch (operandNo) {
737 case 1: tag = u->tag1; val = u->val1; break;
738 case 2: tag = u->tag2; val = u->val2; break;
739 case 3: tag = u->tag3; val = u->val3; break;
740 default: VG_(panic)("ppUOperand(1)");
741 }
742 if (tag == Literal) val = u->lit32;
743
744 if (parens) VG_(printf)("(");
745 switch (tag) {
746 case TempReg: ppTempReg(val); break;
747 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
748 case Literal: VG_(printf)("$0x%x", val); break;
749 case Lit16: VG_(printf)("$0x%x", val); break;
750 case NoValue: VG_(printf)("NoValue"); break;
751 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
752 case SpillNo: VG_(printf)("spill%d", val); break;
753 default: VG_(panic)("ppUOperand(2)");
754 }
755 if (parens) VG_(printf)(")");
756}
757
758
759Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
760{
761 switch (opc) {
762 case ADD: return (upper ? "ADD" : "add");
763 case ADC: return (upper ? "ADC" : "adc");
764 case AND: return (upper ? "AND" : "and");
765 case OR: return (upper ? "OR" : "or");
766 case XOR: return (upper ? "XOR" : "xor");
767 case SUB: return (upper ? "SUB" : "sub");
768 case SBB: return (upper ? "SBB" : "sbb");
769 case SHL: return (upper ? "SHL" : "shl");
770 case SHR: return (upper ? "SHR" : "shr");
771 case SAR: return (upper ? "SAR" : "sar");
772 case ROL: return (upper ? "ROL" : "rol");
773 case ROR: return (upper ? "ROR" : "ror");
774 case RCL: return (upper ? "RCL" : "rcl");
775 case RCR: return (upper ? "RCR" : "rcr");
776 case NOT: return (upper ? "NOT" : "not");
777 case NEG: return (upper ? "NEG" : "neg");
778 case INC: return (upper ? "INC" : "inc");
779 case DEC: return (upper ? "DEC" : "dec");
780 case BSWAP: return (upper ? "BSWAP" : "bswap");
781 default: break;
782 }
783 if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
784 switch (opc) {
785 case GETVF: return "GETVF";
786 case PUTVF: return "PUTVF";
787 case TAG1: return "TAG1";
788 case TAG2: return "TAG2";
789 case CALLM_S: return "CALLM_S";
790 case CALLM_E: return "CALLM_E";
791 case INCEIP: return "INCEIP";
792 case LEA1: return "LEA1";
793 case LEA2: return "LEA2";
794 case NOP: return "NOP";
795 case GET: return "GET";
796 case PUT: return "PUT";
797 case GETF: return "GETF";
798 case PUTF: return "PUTF";
799 case LOAD: return "LD" ;
800 case STORE: return "ST" ;
801 case MOV: return "MOV";
802 case CMOV: return "CMOV";
803 case WIDEN: return "WIDEN";
804 case JMP: return "J" ;
805 case JIFZ: return "JIFZ" ;
806 case CALLM: return "CALLM";
807 case PUSH: return "PUSH" ;
808 case POP: return "POP" ;
809 case CLEAR: return "CLEAR";
810 case CC2VAL: return "CC2VAL";
811 case FPU_R: return "FPU_R";
812 case FPU_W: return "FPU_W";
813 case FPU: return "FPU" ;
814 case LOADV: return "LOADV";
815 case STOREV: return "STOREV";
816 case GETV: return "GETV";
817 case PUTV: return "PUTV";
818 case TESTV: return "TESTV";
819 case SETV: return "SETV";
820 default: VG_(panic)("nameUOpcode: unhandled case");
821 }
822}
823
824
825void VG_(ppUInstr) ( Int instrNo, UInstr* u )
826{
827 VG_(printf)("\t%4d: %s", instrNo,
828 VG_(nameUOpcode)(True, u->opcode));
829 if (u->opcode == JMP || u->opcode == CC2VAL)
830 VG_(printf)("%s", VG_(nameCondcode(u->cond)));
831
832 switch (u->size) {
833 case 0: VG_(printf)("o"); break;
834 case 1: VG_(printf)("B"); break;
835 case 2: VG_(printf)("W"); break;
836 case 4: VG_(printf)("L"); break;
837 case 8: VG_(printf)("Q"); break;
838 default: VG_(printf)("%d", (Int)u->size); break;
839 }
840
841 switch (u->opcode) {
842
843 case TAG1:
844 VG_(printf)("\t");
845 ppUOperand(u, 1, 4, False);
846 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
847 ppUOperand(u, 1, 4, False);
848 VG_(printf)(" )");
849 break;
850
851 case TAG2:
852 VG_(printf)("\t");
853 ppUOperand(u, 2, 4, False);
854 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
855 ppUOperand(u, 1, 4, False);
856 VG_(printf)(", ");
857 ppUOperand(u, 2, 4, False);
858 VG_(printf)(" )");
859 break;
860
861 case CALLM_S: case CALLM_E:
862 break;
863
864 case INCEIP:
865 VG_(printf)("\t$%d", u->val1);
866 break;
867
868 case LEA2:
869 VG_(printf)("\t%d(" , u->lit32);
870 ppUOperand(u, 1, 4, False);
871 VG_(printf)(",");
872 ppUOperand(u, 2, 4, False);
873 VG_(printf)(",%d), ", (Int)u->extra4b);
874 ppUOperand(u, 3, 4, False);
875 break;
876
877 case LEA1:
878 VG_(printf)("\t%d" , u->lit32);
879 ppUOperand(u, 1, 4, True);
880 VG_(printf)(", ");
881 ppUOperand(u, 2, 4, False);
882 break;
883
884 case NOP:
885 break;
886
887 case FPU_W:
888 VG_(printf)("\t0x%x:0x%x, ",
889 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
890 ppUOperand(u, 2, 4, True);
891 break;
892
893 case FPU_R:
894 VG_(printf)("\t");
895 ppUOperand(u, 2, 4, True);
896 VG_(printf)(", 0x%x:0x%x",
897 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
898 break;
899
900 case FPU:
901 VG_(printf)("\t0x%x:0x%x",
902 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
903 break;
904
905 case STOREV: case LOADV:
906 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
907 VG_(printf)("\t");
908 ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV);
909 VG_(printf)(", ");
910 ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
911 break;
912
913 case GETF: case PUTF:
914 VG_(printf)("\t");
915 ppUOperand(u, 1, u->size, False);
916 break;
917
918 case JMP: case CC2VAL:
919 case PUSH: case POP: case CLEAR: case CALLM:
sewardj2e93c502002-04-12 11:12:52 +0000920 if (u->opcode == JMP) {
921 switch (u->jmpkind) {
922 case JmpCall: VG_(printf)("-c"); break;
923 case JmpRet: VG_(printf)("-r"); break;
924 case JmpSyscall: VG_(printf)("-sys"); break;
925 case JmpClientReq: VG_(printf)("-cli"); break;
926 default: break;
927 }
928 }
sewardjde4a1d02002-03-22 01:27:54 +0000929 VG_(printf)("\t");
930 ppUOperand(u, 1, u->size, False);
931 break;
932
933 case JIFZ:
934 VG_(printf)("\t");
935 ppUOperand(u, 1, u->size, False);
936 VG_(printf)(", ");
937 ppUOperand(u, 2, u->size, False);
938 break;
939
940 case PUTVF: case GETVF:
941 VG_(printf)("\t");
942 ppUOperand(u, 1, 0, False);
943 break;
944
945 case NOT: case NEG: case INC: case DEC: case BSWAP:
946 VG_(printf)("\t");
947 ppUOperand(u, 1, u->size, False);
948 break;
949
950 case ADD: case ADC: case AND: case OR:
951 case XOR: case SUB: case SBB:
952 case SHL: case SHR: case SAR:
953 case ROL: case ROR: case RCL: case RCR:
954 VG_(printf)("\t");
955 ppUOperand(u, 1, u->size, False);
956 VG_(printf)(", ");
957 ppUOperand(u, 2, u->size, False);
958 break;
959
960 case GETV: case PUTV:
961 VG_(printf)("\t");
962 ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
963 VG_(printf)(", ");
964 ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
965 break;
966
967 case WIDEN:
968 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
969 u->signed_widen?'s':'z');
970 VG_(printf)("\t");
971 ppUOperand(u, 1, u->size, False);
972 break;
973
974 case TESTV: case SETV:
975 VG_(printf)("\t");
976 ppUOperand(u, 1, u->size, False);
977 break;
978
979 default: VG_(panic)("ppUInstr: unhandled opcode");
980 }
981
982 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
983 VG_(printf)(" (");
984 if (u->flags_r != FlagsEmpty)
985 vg_ppFlagSet("-r", u->flags_r);
986 if (u->flags_w != FlagsEmpty)
987 vg_ppFlagSet("-w", u->flags_w);
988 VG_(printf)(")");
989 }
990 VG_(printf)("\n");
991}
992
993
994void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
995{
996 Int i;
997 VG_(printf)("\n%s\n", title);
998 for (i = 0; i < cb->used; i++)
999 if (0 || cb->instrs[i].opcode != NOP)
1000 VG_(ppUInstr) ( i, &cb->instrs[i] );
1001 VG_(printf)("\n");
1002}
1003
1004
1005/*------------------------------------------------------------*/
1006/*--- uinstr helpers for register allocation ---*/
1007/*--- and code improvement. ---*/
1008/*------------------------------------------------------------*/
1009
1010/* A structure for communicating temp uses, and for indicating
1011 temp->real register mappings for patchUInstr. */
1012typedef
1013 struct {
1014 Int realNo;
1015 Int tempNo;
1016 Bool isWrite;
1017 }
1018 TempUse;
1019
1020
1021/* Get the temp use of a uinstr, parking them in an array supplied by
1022 the caller, which is assumed to be big enough. Return the number
1023 of entries. Insns which read _and_ write a register wind up
1024 mentioning it twice. Entries are placed in the array in program
1025 order, so that if a reg is read-modified-written, it appears first
1026 as a read and then as a write.
1027*/
1028static __inline__
1029Int getTempUsage ( UInstr* u, TempUse* arr )
1030{
1031
1032# define RD(ono) \
1033 if (mycat(u->tag,ono) == TempReg) \
1034 { arr[n].tempNo = mycat(u->val,ono); \
1035 arr[n].isWrite = False; n++; }
1036# define WR(ono) \
1037 if (mycat(u->tag,ono) == TempReg) \
1038 { arr[n].tempNo = mycat(u->val,ono); \
1039 arr[n].isWrite = True; n++; }
1040
1041 Int n = 0;
1042 switch (u->opcode) {
1043 case LEA1: RD(1); WR(2); break;
1044 case LEA2: RD(1); RD(2); WR(3); break;
1045
1046 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
1047 case FPU_R: case FPU_W: RD(2); break;
1048
1049 case GETF: WR(1); break;
1050 case PUTF: RD(1); break;
1051
1052 case GET: WR(2); break;
1053 case PUT: RD(1); break;
1054 case LOAD: RD(1); WR(2); break;
1055 case STORE: RD(1); RD(2); break;
1056 case MOV: RD(1); WR(2); break;
1057
1058 case JMP: RD(1); break;
1059 case CLEAR: case CALLM: break;
1060
1061 case PUSH: RD(1); break;
1062 case POP: WR(1); break;
1063
1064 case TAG2:
1065 case CMOV:
1066 case ADD: case ADC: case AND: case OR:
1067 case XOR: case SUB: case SBB:
1068 RD(1); RD(2); WR(2); break;
1069
1070 case SHL: case SHR: case SAR:
1071 case ROL: case ROR: case RCL: case RCR:
1072 RD(1); RD(2); WR(2); break;
1073
1074 case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
1075 RD(1); WR(1); break;
1076
1077 case WIDEN: RD(1); WR(1); break;
1078
1079 case CC2VAL: WR(1); break;
1080 case JIFZ: RD(1); break;
1081
1082 /* These sizes are only ever consulted when the instrumentation
1083 code is being added, so the following can return
1084 manifestly-bogus sizes. */
1085 case LOADV: RD(1); WR(2); break;
1086 case STOREV: RD(1); RD(2); break;
1087 case GETV: WR(2); break;
1088 case PUTV: RD(1); break;
1089 case TESTV: RD(1); break;
1090 case SETV: WR(1); break;
1091 case PUTVF: RD(1); break;
1092 case GETVF: WR(1); break;
1093
1094 default: VG_(panic)("getTempUsage: unhandled opcode");
1095 }
1096 return n;
1097
1098# undef RD
1099# undef WR
1100}
1101
1102
1103/* Change temp regs in u into real regs, as directed by tmap. */
1104static __inline__
1105void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
1106{
1107 Int i;
1108 if (u->tag1 == TempReg) {
1109 for (i = 0; i < n_tmap; i++)
1110 if (tmap[i].tempNo == u->val1) break;
1111 if (i == n_tmap) VG_(panic)("patchUInstr(1)");
1112 u->tag1 = RealReg;
1113 u->val1 = tmap[i].realNo;
1114 }
1115 if (u->tag2 == TempReg) {
1116 for (i = 0; i < n_tmap; i++)
1117 if (tmap[i].tempNo == u->val2) break;
1118 if (i == n_tmap) VG_(panic)("patchUInstr(2)");
1119 u->tag2 = RealReg;
1120 u->val2 = tmap[i].realNo;
1121 }
1122 if (u->tag3 == TempReg) {
1123 for (i = 0; i < n_tmap; i++)
1124 if (tmap[i].tempNo == u->val3) break;
1125 if (i == n_tmap) VG_(panic)("patchUInstr(3)");
1126 u->tag3 = RealReg;
1127 u->val3 = tmap[i].realNo;
1128 }
1129}
1130
1131
1132/* Tedious x86-specific hack which compensates for the fact that the
1133 register numbers for %ah .. %dh do not correspond to those for %eax
1134 .. %edx. It maps a (reg size, reg no) pair to the number of the
1135 containing 32-bit reg. */
1136static __inline__
1137Int containingArchRegOf ( Int sz, Int aregno )
1138{
1139 switch (sz) {
1140 case 4: return aregno;
1141 case 2: return aregno;
1142 case 1: return aregno >= 4 ? aregno-4 : aregno;
1143 default: VG_(panic)("containingArchRegOf");
1144 }
1145}
1146
1147
1148/* If u reads an ArchReg, return the number of the containing arch
1149 reg. Otherwise return -1. Used in redundant-PUT elimination. */
1150static __inline__
1151Int maybe_uinstrReadsArchReg ( UInstr* u )
1152{
1153 switch (u->opcode) {
1154 case GET:
1155 case ADD: case ADC: case AND: case OR:
1156 case XOR: case SUB: case SBB:
1157 case SHL: case SHR: case SAR: case ROL:
1158 case ROR: case RCL: case RCR:
1159 if (u->tag1 == ArchReg)
1160 return containingArchRegOf ( u->size, u->val1 );
1161 else
1162 return -1;
1163
1164 case GETF: case PUTF:
1165 case CALLM_S: case CALLM_E:
1166 case INCEIP:
1167 case LEA1:
1168 case LEA2:
1169 case NOP:
1170 case PUT:
1171 case LOAD:
1172 case STORE:
1173 case MOV:
1174 case CMOV:
1175 case JMP:
1176 case CALLM: case CLEAR: case PUSH: case POP:
1177 case NOT: case NEG: case INC: case DEC: case BSWAP:
1178 case CC2VAL:
1179 case JIFZ:
1180 case FPU: case FPU_R: case FPU_W:
1181 case WIDEN:
1182 return -1;
1183
1184 default:
1185 VG_(ppUInstr)(0,u);
1186 VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
1187 }
1188}
1189
1190static __inline__
1191Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1192{
1193 Int i, k;
1194 TempUse tempUse[3];
1195 k = getTempUsage ( u, &tempUse[0] );
1196 for (i = 0; i < k; i++)
1197 if (tempUse[i].tempNo == tempreg)
1198 return True;
1199 return False;
1200}
1201
1202
1203/*------------------------------------------------------------*/
1204/*--- ucode improvement. ---*/
1205/*------------------------------------------------------------*/
1206
1207/* Improve the code in cb by doing
1208 -- Redundant ArchReg-fetch elimination
1209 -- Redundant PUT elimination
1210 -- Redundant cond-code restore/save elimination
1211 The overall effect of these is to allow target registers to be
1212 cached in host registers over multiple target insns.
1213*/
1214static void vg_improve ( UCodeBlock* cb )
1215{
1216 Int i, j, k, m, n, ar, tr, told, actual_areg;
1217 Int areg_map[8];
1218 Bool annul_put[8];
1219 TempUse tempUse[3];
1220 UInstr* u;
1221 Bool wr;
1222 Int* last_live_before;
1223 FlagSet future_dead_flags;
1224
1225 if (cb->nextTemp > 0)
1226 last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
1227 else
1228 last_live_before = NULL;
1229
1230
1231 /* PASS 1: redundant GET elimination. (Actually, more general than
1232 that -- eliminates redundant fetches of ArchRegs). */
1233
1234 /* Find the live-range-ends for all temporaries. Duplicates code
1235 in the register allocator :-( */
1236
1237 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1238
1239 for (i = cb->used-1; i >= 0; i--) {
1240 u = &cb->instrs[i];
1241
1242 k = getTempUsage(u, &tempUse[0]);
1243
1244 /* For each temp usage ... bwds in program order. */
1245 for (j = k-1; j >= 0; j--) {
1246 tr = tempUse[j].tempNo;
1247 wr = tempUse[j].isWrite;
1248 if (last_live_before[tr] == -1) {
1249 vg_assert(tr >= 0 && tr < cb->nextTemp);
1250 last_live_before[tr] = wr ? (i+1) : i;
1251 }
1252 }
1253
1254 }
1255
1256# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1257 { Int q; \
1258 /* Invalidate any old binding(s) to tempreg. */ \
1259 for (q = 0; q < 8; q++) \
1260 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1261 /* Add the new binding. */ \
1262 areg_map[archreg] = (tempreg); \
1263 }
1264
1265 /* Set up the A-reg map. */
1266 for (i = 0; i < 8; i++) areg_map[i] = -1;
1267
1268 /* Scan insns. */
1269 for (i = 0; i < cb->used; i++) {
1270 u = &cb->instrs[i];
1271 if (u->opcode == GET && u->size == 4) {
1272 /* GET; see if it can be annulled. */
1273 vg_assert(u->tag1 == ArchReg);
1274 vg_assert(u->tag2 == TempReg);
1275 ar = u->val1;
1276 tr = u->val2;
1277 told = areg_map[ar];
1278 if (told != -1 && last_live_before[told] <= i) {
1279 /* ar already has an old mapping to told, but that runs
1280 out here. Annul this GET, rename tr to told for the
1281 rest of the block, and extend told's live range to that
1282 of tr. */
1283 u->opcode = NOP;
1284 u->tag1 = u->tag2 = NoValue;
1285 n = last_live_before[tr] + 1;
1286 if (n > cb->used) n = cb->used;
1287 last_live_before[told] = last_live_before[tr];
1288 last_live_before[tr] = i-1;
1289 if (VG_(disassemble))
1290 VG_(printf)(
1291 "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
1292 i, tr, told,i+1, n-1);
1293 for (m = i+1; m < n; m++) {
1294 if (cb->instrs[m].tag1 == TempReg
1295 && cb->instrs[m].val1 == tr)
1296 cb->instrs[m].val1 = told;
1297 if (cb->instrs[m].tag2 == TempReg
1298 && cb->instrs[m].val2 == tr)
1299 cb->instrs[m].val2 = told;
1300 }
1301 BIND_ARCH_TO_TEMP(ar,told);
1302 }
1303 else
1304 BIND_ARCH_TO_TEMP(ar,tr);
1305 }
1306 else if (u->opcode == GET && u->size != 4) {
1307 /* Invalidate any mapping for this archreg. */
1308 actual_areg = containingArchRegOf ( u->size, u->val1 );
1309 areg_map[actual_areg] = -1;
1310 }
1311 else if (u->opcode == PUT && u->size == 4) {
1312 /* PUT; re-establish t -> a binding */
1313 vg_assert(u->tag1 == TempReg);
1314 vg_assert(u->tag2 == ArchReg);
1315 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1316 }
1317 else if (u->opcode == PUT && u->size != 4) {
1318 /* Invalidate any mapping for this archreg. */
1319 actual_areg = containingArchRegOf ( u->size, u->val2 );
1320 areg_map[actual_areg] = -1;
1321 } else {
1322
1323 /* see if insn has an archreg as a read operand; if so try to
1324 map it. */
1325 if (u->tag1 == ArchReg && u->size == 4
1326 && areg_map[u->val1] != -1) {
1327 switch (u->opcode) {
1328 case ADD: case SUB: case AND: case OR: case XOR:
1329 case ADC: case SBB:
1330 case SHL: case SHR: case SAR: case ROL: case ROR:
1331 case RCL: case RCR:
1332 if (VG_(disassemble))
1333 VG_(printf)(
1334 "at %d: change ArchReg %S to TempReg t%d\n",
1335 i, nameIReg(4,u->val1), areg_map[u->val1]);
1336 u->tag1 = TempReg;
1337 u->val1 = areg_map[u->val1];
1338 /* Remember to extend the live range of the TempReg,
1339 if necessary. */
1340 if (last_live_before[u->val1] < i)
1341 last_live_before[u->val1] = i;
1342 break;
1343 default:
1344 break;
1345 }
1346 }
1347
1348 /* boring insn; invalidate any mappings to temps it writes */
1349 k = getTempUsage(u, &tempUse[0]);
1350
1351 for (j = 0; j < k; j++) {
1352 wr = tempUse[j].isWrite;
1353 if (!wr) continue;
1354 tr = tempUse[j].tempNo;
1355 for (m = 0; m < 8; m++)
1356 if (areg_map[m] == tr) areg_map[m] = -1;
1357 }
1358 }
1359
1360 }
1361
1362# undef BIND_ARCH_TO_TEMP
1363
1364 /* PASS 2: redundant PUT elimination. If doing instrumentation,
1365 don't annul (delay) puts of %ESP, since the memory check
1366 machinery always requires the in-memory value of %ESP to be up
1367 to date.
1368 */
1369 for (j = 0; j < 8; j++)
1370 annul_put[j] = False;
1371
1372 for (i = cb->used-1; i >= 0; i--) {
1373 u = &cb->instrs[i];
1374 if (u->opcode == NOP) continue;
1375
1376 if (u->opcode == PUT && u->size == 4) {
1377 vg_assert(u->tag2 == ArchReg);
1378 actual_areg = containingArchRegOf ( 4, u->val2 );
1379 if (annul_put[actual_areg]) {
1380 u->opcode = NOP;
1381 u->tag1 = u->tag2 = NoValue;
1382 if (VG_(disassemble))
1383 VG_(printf)("at %d: delete PUT\n", i );
1384 } else {
1385 if (!(VG_(clo_instrument) && actual_areg == R_ESP))
1386 annul_put[actual_areg] = True;
1387 }
1388 }
1389 else if (u->opcode == PUT && u->size != 4) {
1390 actual_areg = containingArchRegOf ( u->size, u->val2 );
1391 annul_put[actual_areg] = False;
1392 }
1393 else if (u->opcode == JMP || u->opcode == JIFZ
1394 || u->opcode == CALLM) {
1395 for (j = 0; j < 8; j++)
1396 annul_put[j] = False;
1397 }
1398 else {
1399 /* If an instruction reads an ArchReg, the immediately
1400 preceding PUT cannot be annulled. */
1401 actual_areg = maybe_uinstrReadsArchReg ( u );
1402 if (actual_areg != -1)
1403 annul_put[actual_areg] = False;
1404 }
1405 }
1406
1407 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1408 dead after this point, annul the MOV insn and rename t2 to t1.
1409 Further modifies the last_live_before map. */
1410
1411# if 0
1412 VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
1413 for (i = 0; i < cb->nextTemp; i++)
1414 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1415 VG_(printf)("\n");
1416# endif
1417
1418 for (i = 0; i < cb->used-1; i++) {
1419 u = &cb->instrs[i];
1420 if (u->opcode != MOV) continue;
1421 if (u->tag1 == Literal) continue;
1422 vg_assert(u->tag1 == TempReg);
1423 vg_assert(u->tag2 == TempReg);
1424 if (last_live_before[u->val1] == i) {
1425 if (VG_(disassemble))
1426 VG_(printf)(
1427 "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
1428 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1429 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1430 if (cb->instrs[j].tag1 == TempReg
1431 && cb->instrs[j].val1 == u->val2)
1432 cb->instrs[j].val1 = u->val1;
1433 if (cb->instrs[j].tag2 == TempReg
1434 && cb->instrs[j].val2 == u->val2)
1435 cb->instrs[j].val2 = u->val1;
1436 }
1437 last_live_before[u->val1] = last_live_before[u->val2];
1438 last_live_before[u->val2] = i-1;
1439 u->opcode = NOP;
1440 u->tag1 = u->tag2 = NoValue;
1441 }
1442 }
1443
1444 /* PASS 3: redundant condition-code restore/save elimination.
1445 Scan backwards from the end. future_dead_flags records the set
1446 of flags which are dead at this point, that is, will be written
1447 before they are next read. Earlier uinsns which write flags
1448 already in future_dead_flags can have their writes annulled.
1449 */
1450 future_dead_flags = FlagsEmpty;
1451
1452 for (i = cb->used-1; i >= 0; i--) {
1453 u = &cb->instrs[i];
1454
1455 /* We might never make it to insns beyond this one, so be
1456 conservative. */
1457 if (u->opcode == JIFZ || u->opcode == JMP) {
1458 future_dead_flags = FlagsEmpty;
1459 continue;
1460 }
1461
1462 /* We can annul the flags written by this insn if it writes a
1463 subset (or eq) of the set of flags known to be dead after
1464 this insn. If not, just record the flags also written by
1465 this insn.*/
1466 if (u->flags_w != FlagsEmpty
1467 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
1468 if (VG_(disassemble)) {
1469 VG_(printf)("at %d: annul flag write ", i);
1470 vg_ppFlagSet("", u->flags_w);
1471 VG_(printf)(" due to later ");
1472 vg_ppFlagSet("", future_dead_flags);
1473 VG_(printf)("\n");
1474 }
1475 u->flags_w = FlagsEmpty;
1476 } else {
1477 future_dead_flags
1478 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1479 }
1480
1481 /* If this insn also reads flags, empty out future_dead_flags so
1482 as to force preceding writes not to be annulled. */
1483 if (u->flags_r != FlagsEmpty)
1484 future_dead_flags = FlagsEmpty;
1485 }
1486
1487 if (last_live_before)
1488 VG_(jitfree) ( last_live_before );
1489}
1490
1491
1492/*------------------------------------------------------------*/
1493/*--- The new register allocator. ---*/
1494/*------------------------------------------------------------*/
1495
1496typedef
1497 struct {
1498 /* Becomes live for the first time after this insn ... */
1499 Int live_after;
1500 /* Becomes dead for the last time after this insn ... */
1501 Int dead_before;
1502 /* The "home" spill slot, if needed. Never changes. */
1503 Int spill_no;
1504 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1505 Int real_no;
1506 }
1507 TempInfo;
1508
1509
1510/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1511 them in spill locations, and add spill code, if there are not
1512 enough real regs. The usual register allocation deal, in short.
1513
1514 Important redundancy of representation:
1515
1516 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1517 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1518
1519 The .real_no field of a TempInfo gives the current RRR for
1520 this TempReg, or VG_NOVALUE if the TempReg is currently
1521 in memory, in which case it is in the SpillNo denoted by
1522 spillno.
1523
1524 These pieces of information (a fwds-bwds mapping, really) must
1525 be kept consistent!
1526
1527 This allocator uses the so-called Second Chance Bin Packing
1528 algorithm, as described in "Quality and Speed in Linear-scan
1529 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1530 pp142-151). It is simple and fast and remarkably good at
1531 minimising the amount of spill code introduced.
1532*/
1533
1534static
1535UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1536{
1537 TempInfo* temp_info;
1538 Int real_to_temp[VG_MAX_REALREGS];
1539 Bool is_spill_cand[VG_MAX_REALREGS];
1540 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1541 Int i, j, k, m, r, tno, max_ss_no;
1542 Bool wr, defer, isRead, spill_reqd;
1543 TempUse tempUse[3];
1544 UCodeBlock* c2;
1545
1546 /* Used to denote ... well, "no value" in this fn. */
1547# define VG_NOTHING (-2)
1548
1549 /* Initialise the TempReg info. */
1550 if (c1->nextTemp > 0)
1551 temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
1552 else
1553 temp_info = NULL;
1554
1555 for (i = 0; i < c1->nextTemp; i++) {
1556 temp_info[i].live_after = VG_NOTHING;
1557 temp_info[i].dead_before = VG_NOTHING;
1558 temp_info[i].spill_no = VG_NOTHING;
1559 /* temp_info[i].real_no is not yet relevant. */
1560 }
1561
1562 spill_reqd = False;
1563
1564 /* Scan fwds to establish live ranges. */
1565
1566 for (i = 0; i < c1->used; i++) {
1567 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1568 vg_assert(k >= 0 && k <= 3);
1569
1570 /* For each temp usage ... fwds in program order */
1571 for (j = 0; j < k; j++) {
1572 tno = tempUse[j].tempNo;
1573 wr = tempUse[j].isWrite;
1574 if (wr) {
1575 /* Writes hold a reg live until after this insn. */
1576 if (temp_info[tno].live_after == VG_NOTHING)
1577 temp_info[tno].live_after = i;
1578 if (temp_info[tno].dead_before < i + 1)
1579 temp_info[tno].dead_before = i + 1;
1580 } else {
1581 /* First use of a tmp should be a write. */
1582 vg_assert(temp_info[tno].live_after != VG_NOTHING);
1583 /* Reads only hold it live until before this insn. */
1584 if (temp_info[tno].dead_before < i)
1585 temp_info[tno].dead_before = i;
1586 }
1587 }
1588 }
1589
1590# if 0
1591 /* Sanity check on live ranges. Expensive but correct. */
1592 for (i = 0; i < c1->nextTemp; i++) {
1593 vg_assert( (temp_info[i].live_after == VG_NOTHING
1594 && temp_info[i].dead_before == VG_NOTHING)
1595 || (temp_info[i].live_after != VG_NOTHING
1596 && temp_info[i].dead_before != VG_NOTHING) );
1597 }
1598# endif
1599
1600 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1601 We put as few as possible values in spill slots, but
1602 nevertheless need to have an assignment to them just in case. */
1603
1604 max_ss_no = -1;
1605
1606 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1607 ss_busy_until_before[i] = 0;
1608
1609 for (i = 0; i < c1->nextTemp; i++) {
1610
1611 /* True iff this temp is unused. */
1612 if (temp_info[i].live_after == VG_NOTHING)
1613 continue;
1614
1615 /* Find the lowest-numbered spill slot which is available at the
1616 start point of this interval, and assign the interval to
1617 it. */
1618 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1619 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1620 break;
1621 if (j == VG_MAX_SPILLSLOTS) {
1622 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
1623 VG_(panic)("register allocation failed -- out of spill slots");
1624 }
1625 ss_busy_until_before[j] = temp_info[i].dead_before;
1626 temp_info[i].spill_no = j;
1627 if (j > max_ss_no)
1628 max_ss_no = j;
1629 }
1630
1631 VG_(total_reg_rank) += (max_ss_no+1);
1632
1633 /* Show live ranges and assigned spill slot nos. */
1634
1635 if (VG_(disassemble)) {
1636 VG_(printf)("Live Range Assignments\n");
1637
1638 for (i = 0; i < c1->nextTemp; i++) {
1639 if (temp_info[i].live_after == VG_NOTHING)
1640 continue;
1641 VG_(printf)(
1642 " LR %d is after %d to before %d spillno %d\n",
1643 i,
1644 temp_info[i].live_after,
1645 temp_info[i].dead_before,
1646 temp_info[i].spill_no
1647 );
1648 }
1649 }
1650
1651 /* Now that we've established a spill slot number for each used
1652 temporary, we can go ahead and do the core of the "Second-chance
1653 binpacking" allocation algorithm. */
1654
1655 /* Resulting code goes here. We generate it all in a forwards
1656 pass. */
1657 c2 = allocCodeBlock();
1658
1659 /* At the start, no TempRegs are assigned to any real register.
1660 Correspondingly, all temps claim to be currently resident in
1661 their spill slots, as computed by the previous two passes. */
1662 for (i = 0; i < VG_MAX_REALREGS; i++)
1663 real_to_temp[i] = VG_NOTHING;
1664 for (i = 0; i < c1->nextTemp; i++)
1665 temp_info[i].real_no = VG_NOTHING;
1666
1667 if (VG_(disassemble))
1668 VG_(printf)("\n");
1669
1670 /* Process each insn in turn. */
1671 for (i = 0; i < c1->used; i++) {
1672
1673 if (c1->instrs[i].opcode == NOP) continue;
1674 VG_(uinstrs_prealloc)++;
1675
1676# if 0
1677 /* Check map consistency. Expensive but correct. */
1678 for (r = 0; r < VG_MAX_REALREGS; r++) {
1679 if (real_to_temp[r] != VG_NOTHING) {
1680 tno = real_to_temp[r];
1681 vg_assert(tno >= 0 && tno < c1->nextTemp);
1682 vg_assert(temp_info[tno].real_no == r);
1683 }
1684 }
1685 for (tno = 0; tno < c1->nextTemp; tno++) {
1686 if (temp_info[tno].real_no != VG_NOTHING) {
1687 r = temp_info[tno].real_no;
1688 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
1689 vg_assert(real_to_temp[r] == tno);
1690 }
1691 }
1692# endif
1693
1694 if (VG_(disassemble))
1695 VG_(ppUInstr)(i, &c1->instrs[i]);
1696
1697 /* First, free up enough real regs for this insn. This may
1698 generate spill stores since we may have to evict some TempRegs
1699 currently in real regs. Also generates spill loads. */
1700
1701 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1702 vg_assert(k >= 0 && k <= 3);
1703
1704 /* For each ***different*** temp mentioned in the insn .... */
1705 for (j = 0; j < k; j++) {
1706
1707 /* First check if the temp is mentioned again later; if so,
1708 ignore this mention. We only want to process each temp
1709 used by the insn once, even if it is mentioned more than
1710 once. */
1711 defer = False;
1712 tno = tempUse[j].tempNo;
1713 for (m = j+1; m < k; m++)
1714 if (tempUse[m].tempNo == tno)
1715 defer = True;
1716 if (defer)
1717 continue;
1718
1719 /* Now we're trying to find a register for tempUse[j].tempNo.
1720 First of all, if it already has a register assigned, we
1721 don't need to do anything more. */
1722 if (temp_info[tno].real_no != VG_NOTHING)
1723 continue;
1724
1725 /* No luck. The next thing to do is see if there is a
1726 currently unassigned register available. If so, bag it. */
1727 for (r = 0; r < VG_MAX_REALREGS; r++) {
1728 if (real_to_temp[r] == VG_NOTHING)
1729 break;
1730 }
1731 if (r < VG_MAX_REALREGS) {
1732 real_to_temp[r] = tno;
1733 temp_info[tno].real_no = r;
1734 continue;
1735 }
1736
1737 /* Unfortunately, that didn't pan out either. So we'll have
1738 to eject some other unfortunate TempReg into a spill slot
1739 in order to free up a register. Of course, we need to be
1740 careful not to eject some other TempReg needed by this
1741 insn.
1742
1743 Select r in 0 .. VG_MAX_REALREGS-1 such that
1744 real_to_temp[r] is not mentioned in
1745 tempUse[0 .. k-1].tempNo, since it would be just plain
1746 wrong to eject some other TempReg which we need to use in
1747 this insn.
1748
1749 It is here that it is important to make a good choice of
1750 register to spill. */
1751
1752 /* First, mark those regs which are not spill candidates. */
1753 for (r = 0; r < VG_MAX_REALREGS; r++) {
1754 is_spill_cand[r] = True;
1755 for (m = 0; m < k; m++) {
1756 if (real_to_temp[r] == tempUse[m].tempNo) {
1757 is_spill_cand[r] = False;
1758 break;
1759 }
1760 }
1761 }
1762
1763 /* We can choose any r satisfying is_spill_cand[r]. However,
1764 try to make a good choice. First, try and find r such
1765 that the associated TempReg is already dead. */
1766 for (r = 0; r < VG_MAX_REALREGS; r++) {
1767 if (is_spill_cand[r] &&
1768 temp_info[real_to_temp[r]].dead_before <= i)
1769 goto have_spill_cand;
1770 }
1771
1772 /* No spill cand is mapped to a dead TempReg. Now we really
1773 _do_ have to generate spill code. Choose r so that the
1774 next use of its associated TempReg is as far ahead as
1775 possible, in the hope that this will minimise the number of
1776 consequent reloads required. This is a bit expensive, but
1777 we don't have to do it very often. */
1778 {
1779 Int furthest_r = VG_MAX_REALREGS;
1780 Int furthest = 0;
1781 for (r = 0; r < VG_MAX_REALREGS; r++) {
1782 if (!is_spill_cand[r]) continue;
1783 for (m = i+1; m < c1->used; m++)
1784 if (uInstrMentionsTempReg(&c1->instrs[m],
1785 real_to_temp[r]))
1786 break;
1787 if (m > furthest) {
1788 furthest = m;
1789 furthest_r = r;
1790 }
1791 }
1792 r = furthest_r;
1793 goto have_spill_cand;
1794 }
1795
1796 have_spill_cand:
1797 if (r == VG_MAX_REALREGS)
1798 VG_(panic)("new reg alloc: out of registers ?!");
1799
1800 /* Eject r. Important refinement: don't bother if the
1801 associated TempReg is now dead. */
1802 vg_assert(real_to_temp[r] != VG_NOTHING);
1803 vg_assert(real_to_temp[r] != tno);
1804 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
1805 if (temp_info[real_to_temp[r]].dead_before > i) {
1806 uInstr2(c2, PUT, 4,
1807 RealReg, VG_(rankToRealRegNo)(r),
1808 SpillNo, temp_info[real_to_temp[r]].spill_no);
1809 VG_(uinstrs_spill)++;
1810 spill_reqd = True;
1811 if (VG_(disassemble))
1812 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1813 }
1814
1815 /* Decide if tno is read. */
1816 isRead = False;
1817 for (m = 0; m < k; m++)
1818 if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
1819 isRead = True;
1820
1821 /* If so, generate a spill load. */
1822 if (isRead) {
1823 uInstr2(c2, GET, 4,
1824 SpillNo, temp_info[tno].spill_no,
1825 RealReg, VG_(rankToRealRegNo)(r) );
1826 VG_(uinstrs_spill)++;
1827 spill_reqd = True;
1828 if (VG_(disassemble))
1829 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1830 }
1831
1832 /* Update the forwards and backwards maps. */
1833 real_to_temp[r] = tno;
1834 temp_info[tno].real_no = r;
1835 }
1836
1837 /* By this point, all TempRegs mentioned by the insn have been
1838 bought into real regs. We now copy the insn to the output
1839 and use patchUInstr to convert its rTempRegs into
1840 realregs. */
1841 for (j = 0; j < k; j++)
1842 tempUse[j].realNo
1843 = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
1844 copyUInstr(c2, &c1->instrs[i]);
1845 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
1846
1847 if (VG_(disassemble)) {
1848 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1849 VG_(printf)("\n");
1850 }
1851 }
1852
1853 if (temp_info != NULL)
1854 VG_(jitfree)(temp_info);
1855
1856 freeCodeBlock(c1);
1857
1858 if (spill_reqd)
1859 VG_(translations_needing_spill)++;
1860
1861 return c2;
1862
1863# undef VG_NOTHING
1864
1865}
1866
1867
1868/*------------------------------------------------------------*/
1869/*--- New instrumentation machinery. ---*/
1870/*------------------------------------------------------------*/
1871
1872static
1873VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
1874{
1875 switch (sz) {
1876 case 4: return VgT_ImproveOR4_TQ;
1877 case 2: return VgT_ImproveOR2_TQ;
1878 case 1: return VgT_ImproveOR1_TQ;
1879 default: VG_(panic)("get_VgT_ImproveOR_TQ");
1880 }
1881}
1882
1883
1884static
1885VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
1886{
1887 switch (sz) {
1888 case 4: return VgT_ImproveAND4_TQ;
1889 case 2: return VgT_ImproveAND2_TQ;
1890 case 1: return VgT_ImproveAND1_TQ;
1891 default: VG_(panic)("get_VgT_ImproveAND_TQ");
1892 }
1893}
1894
1895
1896static
1897VgTagOp get_VgT_Left ( Int sz )
1898{
1899 switch (sz) {
1900 case 4: return VgT_Left4;
1901 case 2: return VgT_Left2;
1902 case 1: return VgT_Left1;
1903 default: VG_(panic)("get_VgT_Left");
1904 }
1905}
1906
1907
1908static
1909VgTagOp get_VgT_UifU ( Int sz )
1910{
1911 switch (sz) {
1912 case 4: return VgT_UifU4;
1913 case 2: return VgT_UifU2;
1914 case 1: return VgT_UifU1;
1915 case 0: return VgT_UifU0;
1916 default: VG_(panic)("get_VgT_UifU");
1917 }
1918}
1919
1920
1921static
1922VgTagOp get_VgT_DifD ( Int sz )
1923{
1924 switch (sz) {
1925 case 4: return VgT_DifD4;
1926 case 2: return VgT_DifD2;
1927 case 1: return VgT_DifD1;
1928 default: VG_(panic)("get_VgT_DifD");
1929 }
1930}
1931
1932
1933static
1934VgTagOp get_VgT_PCast ( Int szs, Int szd )
1935{
1936 if (szs == 4 && szd == 0) return VgT_PCast40;
1937 if (szs == 2 && szd == 0) return VgT_PCast20;
1938 if (szs == 1 && szd == 0) return VgT_PCast10;
1939 if (szs == 0 && szd == 1) return VgT_PCast01;
1940 if (szs == 0 && szd == 2) return VgT_PCast02;
1941 if (szs == 0 && szd == 4) return VgT_PCast04;
1942 if (szs == 1 && szd == 4) return VgT_PCast14;
1943 if (szs == 1 && szd == 2) return VgT_PCast12;
1944 if (szs == 1 && szd == 1) return VgT_PCast11;
1945 VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
1946 VG_(panic)("get_VgT_PCast");
1947}
1948
1949
1950static
1951VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
1952{
1953 if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
1954 if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
1955
1956 if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
1957 if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
1958
1959 if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
1960 if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
1961
1962 VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
1963 VG_(panic)("get_VgT_Widen");
1964}
1965
1966/* Pessimally cast the spec'd shadow from one size to another. */
1967static
1968void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
1969{
1970 if (szs == 0 && szd == 0)
1971 return;
1972 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1973 NoValue, 0,
1974 Lit16, get_VgT_PCast(szs,szd));
1975}
1976
1977
1978/* Create a signed or unsigned widen of the spec'd shadow from one
1979 size to another. The only allowed size transitions are 1->2, 1->4
1980 and 2->4. */
1981static
1982void create_Widen ( UCodeBlock* cb, Bool signed_widen,
1983 Int szs, Int szd, Int tempreg )
1984{
1985 if (szs == szd) return;
1986 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1987 NoValue, 0,
1988 Lit16, get_VgT_Widen(signed_widen,szs,szd));
1989}
1990
1991
1992/* Get the condition codes into a new shadow, at the given size. */
1993static
1994Int create_GETVF ( UCodeBlock* cb, Int sz )
1995{
1996 Int tt = newShadow(cb);
1997 uInstr1(cb, GETVF, 0, TempReg, tt);
1998 create_PCast(cb, 0, sz, tt);
1999 return tt;
2000}
2001
2002
2003/* Save the condition codes from the spec'd shadow. */
2004static
2005void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
2006{
2007 if (sz == 0) {
2008 uInstr1(cb, PUTVF, 0, TempReg, tempreg);
2009 } else {
2010 Int tt = newShadow(cb);
2011 uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
2012 create_PCast(cb, sz, 0, tt);
2013 uInstr1(cb, PUTVF, 0, TempReg, tt);
2014 }
2015}
2016
2017
2018/* Do Left on the spec'd shadow. */
2019static
2020void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
2021{
2022 uInstr3(cb, TAG1, 0,
2023 TempReg, tempreg,
2024 NoValue, 0,
2025 Lit16, get_VgT_Left(sz));
2026}
2027
2028
2029/* Do UifU on ts and td, putting the result in td. */
2030static
2031void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
2032{
2033 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2034 Lit16, get_VgT_UifU(sz));
2035}
2036
2037
2038/* Do DifD on ts and td, putting the result in td. */
2039static
2040void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
2041{
2042 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2043 Lit16, get_VgT_DifD(sz));
2044}
2045
2046
2047/* Do HelpAND on value tval and tag tqqq, putting the result in
2048 tqqq. */
2049static
2050void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2051{
2052 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2053 Lit16, get_VgT_ImproveAND_TQ(sz));
2054}
2055
2056
2057/* Do HelpOR on value tval and tag tqqq, putting the result in
2058 tqqq. */
2059static
2060void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2061{
2062 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2063 Lit16, get_VgT_ImproveOR_TQ(sz));
2064}
2065
2066
2067/* Get the shadow for an operand described by (tag, val). Emit code
2068 to do this and return the identity of the shadow holding the
2069 result. The result tag is always copied into a new shadow, so it
2070 can be modified without trashing the original.*/
2071static
2072Int /* TempReg */ getOperandShadow ( UCodeBlock* cb,
2073 Int sz, Int tag, Int val )
2074{
2075 Int sh;
2076 sh = newShadow(cb);
2077 if (tag == TempReg) {
2078 uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
2079 return sh;
2080 }
2081 if (tag == Literal) {
2082 uInstr1(cb, SETV, sz, TempReg, sh);
2083 return sh;
2084 }
2085 if (tag == ArchReg) {
2086 uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
2087 return sh;
2088 }
2089 VG_(panic)("getOperandShadow");
2090}
2091
2092
2093
2094/* Create and return an instrumented version of cb_in. Free cb_in
2095 before returning. */
2096static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
2097{
2098 UCodeBlock* cb;
2099 Int i, j;
2100 UInstr* u_in;
2101 Int qs, qd, qt, qtt;
2102 cb = allocCodeBlock();
2103 cb->nextTemp = cb_in->nextTemp;
2104
2105 for (i = 0; i < cb_in->used; i++) {
2106 qs = qd = qt = qtt = INVALID_TEMPREG;
2107 u_in = &cb_in->instrs[i];
2108
2109 /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
2110
2111 /* VG_(ppUInstr)(0, u_in); */
2112 switch (u_in->opcode) {
2113
2114 case NOP:
2115 break;
2116
2117 case INCEIP:
2118 copyUInstr(cb, u_in);
2119 break;
2120
sewardj97ced732002-03-25 00:07:36 +00002121 /* Loads and stores. Test the V bits for the address. 24
2122 Mar 02: since the address is A-checked anyway, there's not
2123 really much point in doing the V-check too, unless you
2124 think that you might use addresses which are undefined but
2125 still addressible. Hence the optionalisation of the V
2126 check.
2127
sewardjde4a1d02002-03-22 01:27:54 +00002128 The LOADV/STOREV does an addressibility check for the
2129 address. */
sewardj97ced732002-03-25 00:07:36 +00002130
sewardjde4a1d02002-03-22 01:27:54 +00002131 case LOAD:
sewardj97ced732002-03-25 00:07:36 +00002132 if (VG_(clo_check_addrVs)) {
2133 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2134 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2135 }
sewardjde4a1d02002-03-22 01:27:54 +00002136 uInstr2(cb, LOADV, u_in->size,
2137 TempReg, u_in->val1,
2138 TempReg, SHADOW(u_in->val2));
2139 copyUInstr(cb, u_in);
2140 break;
2141 case STORE:
sewardj97ced732002-03-25 00:07:36 +00002142 if (VG_(clo_check_addrVs)) {
2143 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2144 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
2145 }
sewardjde4a1d02002-03-22 01:27:54 +00002146 uInstr2(cb, STOREV, u_in->size,
2147 TempReg, SHADOW(u_in->val1),
2148 TempReg, u_in->val2);
2149 copyUInstr(cb, u_in);
2150 break;
2151
2152 /* Moving stuff around. Make the V bits follow accordingly,
2153 but don't do anything else. */
2154
2155 case GET:
2156 uInstr2(cb, GETV, u_in->size,
2157 ArchReg, u_in->val1,
2158 TempReg, SHADOW(u_in->val2));
2159 copyUInstr(cb, u_in);
2160 break;
2161 case PUT:
2162 uInstr2(cb, PUTV, u_in->size,
2163 TempReg, SHADOW(u_in->val1),
2164 ArchReg, u_in->val2);
2165 copyUInstr(cb, u_in);
2166 break;
2167
2168 case GETF:
2169 /* This is not the smartest way to do it, but should work. */
2170 qd = create_GETVF(cb, u_in->size);
2171 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
2172 copyUInstr(cb, u_in);
2173 break;
2174 case PUTF:
2175 create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
2176 copyUInstr(cb, u_in);
2177 break;
2178
2179 case MOV:
2180 switch (u_in->tag1) {
2181 case TempReg:
2182 uInstr2(cb, MOV, 4,
2183 TempReg, SHADOW(u_in->val1),
2184 TempReg, SHADOW(u_in->val2));
2185 break;
2186 case Literal:
2187 uInstr1(cb, SETV, u_in->size,
2188 TempReg, SHADOW(u_in->val2));
2189 break;
2190 default:
2191 VG_(panic)("vg_instrument: MOV");
2192 }
2193 copyUInstr(cb, u_in);
2194 break;
2195
2196 /* Special case of add, where one of the operands is a literal.
2197 lea1(t) = t + some literal.
2198 Therefore: lea1#(qa) = left(qa)
2199 */
2200 case LEA1:
2201 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2202 qs = SHADOW(u_in->val1);
2203 qd = SHADOW(u_in->val2);
2204 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
2205 create_Left(cb, u_in->size, qd);
2206 copyUInstr(cb, u_in);
2207 break;
2208
2209 /* Another form of add.
2210 lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
2211 and is 0,1,2 or 3.
2212 lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
2213 Note, subtly, that the shift puts zeroes at the bottom of qt,
2214 meaning Valid, since the corresponding shift of tt puts
2215 zeroes at the bottom of tb.
2216 */
2217 case LEA2: {
2218 Int shift;
2219 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2220 switch (u_in->extra4b) {
2221 case 1: shift = 0; break;
2222 case 2: shift = 1; break;
2223 case 4: shift = 2; break;
2224 case 8: shift = 3; break;
2225 default: VG_(panic)( "vg_instrument(LEA2)" );
2226 }
2227 qs = SHADOW(u_in->val1);
2228 qt = SHADOW(u_in->val2);
2229 qd = SHADOW(u_in->val3);
2230 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
2231 if (shift > 0) {
2232 uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
2233 uLiteral(cb, shift);
2234 }
2235 create_UifU(cb, 4, qs, qd);
2236 create_Left(cb, u_in->size, qd);
2237 copyUInstr(cb, u_in);
2238 break;
2239 }
2240
2241 /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
2242 case INC: case DEC:
2243 qd = SHADOW(u_in->val1);
2244 create_Left(cb, u_in->size, qd);
2245 if (u_in->flags_w != FlagsEmpty)
2246 create_PUTVF(cb, u_in->size, qd);
2247 copyUInstr(cb, u_in);
2248 break;
2249
2250 /* This is a HACK (approximation :-) */
2251 /* rcl#/rcr#(qs,qd)
2252 = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
2253 eflags# = q0
2254 qd =pcast-0-sz(q0)
2255 Ie, cast everything down to a single bit, then back up.
2256 This assumes that any bad bits infect the whole word and
2257 the eflags.
2258 */
2259 case RCL: case RCR:
2260 vg_assert(u_in->flags_r != FlagsEmpty);
2261 /* The following assertion looks like it makes sense, but is
2262 actually wrong. Consider this:
2263 rcll %eax
2264 imull %eax, %eax
2265 The rcll writes O and C but so does the imull, so the O and C
2266 write of the rcll is annulled by the prior improvement pass.
2267 Noticed by Kevin Ryde <user42@zip.com.au>
2268 */
2269 /* vg_assert(u_in->flags_w != FlagsEmpty); */
2270 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2271 /* We can safely modify qs; cast it to 0-size. */
2272 create_PCast(cb, u_in->size, 0, qs);
2273 qd = SHADOW(u_in->val2);
2274 create_PCast(cb, u_in->size, 0, qd);
2275 /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
2276 create_UifU(cb, 0, qs, qd);
2277 /* qs is now free; reuse it for the flag definedness. */
2278 qs = create_GETVF(cb, 0);
2279 create_UifU(cb, 0, qs, qd);
2280 create_PUTVF(cb, 0, qd);
2281 create_PCast(cb, 0, u_in->size, qd);
2282 copyUInstr(cb, u_in);
2283 break;
2284
2285 /* for OP in shl shr sar rol ror
2286 (qs is shift count#, qd is value to be OP#d)
2287 OP(ts,td)
2288 OP#(qs,qd)
2289 = pcast-1-sz(qs) `UifU` OP(ts,qd)
2290 So we apply OP to the tag bits too, and then UifU with
2291 the shift count# to take account of the possibility of it
2292 being undefined.
2293
2294 A bit subtle:
2295 ROL/ROR rearrange the tag bits as per the value bits.
2296 SHL/SHR shifts zeroes into the value, and corresponding
2297 zeroes indicating Definedness into the tag.
2298 SAR copies the top bit of the value downwards, and therefore
2299 SAR also copies the definedness of the top bit too.
2300 So in all five cases, we just apply the same op to the tag
2301 bits as is applied to the value bits. Neat!
2302 */
2303 case SHL:
2304 case SHR: case SAR:
2305 case ROL: case ROR: {
2306 Int t_amount = INVALID_TEMPREG;
2307 vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
2308 vg_assert(u_in->tag2 == TempReg);
2309 qd = SHADOW(u_in->val2);
2310
2311 /* Make qs hold shift-count# and make
2312 t_amount be a TempReg holding the shift count. */
2313 if (u_in->tag1 == Literal) {
2314 t_amount = newTemp(cb);
2315 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
2316 uLiteral(cb, u_in->lit32);
2317 qs = SHADOW(t_amount);
2318 uInstr1(cb, SETV, 1, TempReg, qs);
2319 } else {
2320 t_amount = u_in->val1;
2321 qs = SHADOW(u_in->val1);
2322 }
2323
2324 uInstr2(cb, u_in->opcode,
2325 u_in->size,
2326 TempReg, t_amount,
2327 TempReg, qd);
2328 qt = newShadow(cb);
2329 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2330 create_PCast(cb, 1, u_in->size, qt);
2331 create_UifU(cb, u_in->size, qt, qd);
2332 copyUInstr(cb, u_in);
2333 break;
2334 }
2335
2336 /* One simple tag operation. */
2337 case WIDEN:
2338 vg_assert(u_in->tag1 == TempReg);
2339 create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
2340 SHADOW(u_in->val1));
2341 copyUInstr(cb, u_in);
2342 break;
2343
2344 /* not#(x) = x (since bitwise independent) */
2345 case NOT:
2346 vg_assert(u_in->tag1 == TempReg);
2347 copyUInstr(cb, u_in);
2348 break;
2349
2350 /* neg#(x) = left(x) (derivable from case for SUB) */
2351 case NEG:
2352 vg_assert(u_in->tag1 == TempReg);
2353 create_Left(cb, u_in->size, SHADOW(u_in->val1));
2354 copyUInstr(cb, u_in);
2355 break;
2356
2357 /* bswap#(x) = bswap(x) */
2358 case BSWAP:
2359 vg_assert(u_in->tag1 == TempReg);
2360 vg_assert(u_in->size == 4);
2361 qd = SHADOW(u_in->val1);
2362 uInstr1(cb, BSWAP, 4, TempReg, qd);
2363 copyUInstr(cb, u_in);
2364 break;
2365
2366 /* cc2val#(qd) = pcast-0-to-size(eflags#) */
2367 case CC2VAL:
2368 vg_assert(u_in->tag1 == TempReg);
2369 vg_assert(u_in->flags_r != FlagsEmpty);
2370 qt = create_GETVF(cb, u_in->size);
2371 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
2372 copyUInstr(cb, u_in);
2373 break;
2374
2375 /* cmov#(qs,qd) = cmov(qs,qd)
2376 That is, do the cmov of tags using the same flags as for
2377 the data (obviously). However, first do a test on the
2378 validity of the flags.
2379 */
2380 case CMOV:
2381 vg_assert(u_in->size == 4);
2382 vg_assert(u_in->tag1 == TempReg);
2383 vg_assert(u_in->tag2 == TempReg);
2384 vg_assert(u_in->flags_r != FlagsEmpty);
2385 vg_assert(u_in->flags_w == FlagsEmpty);
2386 qs = SHADOW(u_in->val1);
2387 qd = SHADOW(u_in->val2);
2388 qt = create_GETVF(cb, 0);
2389 uInstr1(cb, TESTV, 0, TempReg, qt);
2390 /* qt should never be referred to again. Nevertheless
2391 ... */
2392 uInstr1(cb, SETV, 0, TempReg, qt);
2393
2394 uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
2395 LAST_UINSTR(cb).cond = u_in->cond;
2396 LAST_UINSTR(cb).flags_r = u_in->flags_r;
2397
2398 copyUInstr(cb, u_in);
2399 break;
2400
2401 /* add#/sub#(qs,qd)
2402 = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
2403 = left(qs) `UifU` left(qd)
2404 = left(qs `UifU` qd)
2405 adc#/sbb#(qs,qd)
2406 = left(qs `UifU` qd) `UifU` pcast(eflags#)
2407 Second arg (dest) is TempReg.
2408 First arg (src) is Literal or TempReg or ArchReg.
2409 */
2410 case ADD: case SUB:
2411 case ADC: case SBB:
2412 qd = SHADOW(u_in->val2);
2413 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2414 create_UifU(cb, u_in->size, qs, qd);
2415 create_Left(cb, u_in->size, qd);
2416 if (u_in->opcode == ADC || u_in->opcode == SBB) {
2417 vg_assert(u_in->flags_r != FlagsEmpty);
2418 qt = create_GETVF(cb, u_in->size);
2419 create_UifU(cb, u_in->size, qt, qd);
2420 }
2421 if (u_in->flags_w != FlagsEmpty) {
2422 create_PUTVF(cb, u_in->size, qd);
2423 }
2424 copyUInstr(cb, u_in);
2425 break;
2426
2427 /* xor#(qs,qd) = qs `UifU` qd */
2428 case XOR:
2429 qd = SHADOW(u_in->val2);
2430 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2431 create_UifU(cb, u_in->size, qs, qd);
2432 if (u_in->flags_w != FlagsEmpty) {
2433 create_PUTVF(cb, u_in->size, qd);
2434 }
2435 copyUInstr(cb, u_in);
2436 break;
2437
2438 /* and#/or#(qs,qd)
2439 = (qs `UifU` qd) `DifD` improve(vs,qs)
2440 `DifD` improve(vd,qd)
2441 where improve is the relevant one of
2442 Improve{AND,OR}_TQ
2443 Use the following steps, with qt as a temp:
2444 qt = improve(vd,qd)
2445 qd = qs `UifU` qd
2446 qd = qt `DifD` qd
2447 qt = improve(vs,qs)
2448 qd = qt `DifD` qd
2449 */
2450 case AND: case OR:
2451 vg_assert(u_in->tag1 == TempReg);
2452 vg_assert(u_in->tag2 == TempReg);
2453 qd = SHADOW(u_in->val2);
2454 qs = SHADOW(u_in->val1);
2455 qt = newShadow(cb);
2456
2457 /* qt = improve(vd,qd) */
2458 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
2459 if (u_in->opcode == AND)
2460 create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
2461 else
2462 create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
2463 /* qd = qs `UifU` qd */
2464 create_UifU(cb, u_in->size, qs, qd);
2465 /* qd = qt `DifD` qd */
2466 create_DifD(cb, u_in->size, qt, qd);
2467 /* qt = improve(vs,qs) */
2468 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2469 if (u_in->opcode == AND)
2470 create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
2471 else
2472 create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
2473 /* qd = qt `DifD` qd */
2474 create_DifD(cb, u_in->size, qt, qd);
2475 /* So, finally qd is the result tag. */
2476 if (u_in->flags_w != FlagsEmpty) {
2477 create_PUTVF(cb, u_in->size, qd);
2478 }
2479 copyUInstr(cb, u_in);
2480 break;
2481
2482 /* Machinery to do with supporting CALLM. Copy the start and
2483 end markers only to make the result easier to read
2484 (debug); they generate no code and have no effect.
2485 */
2486 case CALLM_S: case CALLM_E:
2487 copyUInstr(cb, u_in);
2488 break;
2489
2490 /* Copy PUSH and POP verbatim. Arg/result absval
2491 calculations are done when the associated CALL is
2492 processed. CLEAR has no effect on absval calculations but
2493 needs to be copied.
2494 */
2495 case PUSH: case POP: case CLEAR:
2496 copyUInstr(cb, u_in);
2497 break;
2498
2499 /* In short:
2500 callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
2501 We have to decide on a size to do the computation at,
2502 although the choice doesn't affect correctness. We will
2503 do a pcast to the final size anyway, so the only important
2504 factor is to choose a size which minimises the total
2505 number of casts needed. Valgrind: just use size 0,
2506 regardless. It may not be very good for performance
2507 but does simplify matters, mainly by reducing the number
2508 of different pessimising casts which have to be implemented.
2509 */
2510 case CALLM: {
2511 UInstr* uu;
2512 Bool res_used;
2513
2514 /* Now generate the code. Get the final result absval
2515 into qt. */
2516 qt = newShadow(cb);
2517 qtt = newShadow(cb);
2518 uInstr1(cb, SETV, 0, TempReg, qt);
2519 for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
2520 uu = & cb_in->instrs[j];
2521 if (uu->opcode != PUSH) continue;
2522 /* cast via a temporary */
2523 uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
2524 TempReg, qtt);
2525 create_PCast(cb, uu->size, 0, qtt);
2526 create_UifU(cb, 0, qtt, qt);
2527 }
2528 /* Remembering also that flags read count as inputs. */
2529 if (u_in->flags_r != FlagsEmpty) {
2530 qtt = create_GETVF(cb, 0);
2531 create_UifU(cb, 0, qtt, qt);
2532 }
2533
2534 /* qt now holds the result tag. If any results from the
2535 call are used, either by fetching with POP or
2536 implicitly by writing the flags, we copy the result
2537 absval to the relevant location. If not used, the call
2538 must have been for its side effects, so we test qt here
2539 and now. Note that this assumes that all values
2540 removed by POP continue to be live. So dead args
2541 *must* be removed with CLEAR, not by POPping them into
2542 a dummy tempreg.
2543 */
2544 res_used = False;
2545 for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
2546 uu = & cb_in->instrs[j];
2547 if (uu->opcode != POP) continue;
2548 /* Cast via a temp. */
2549 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
2550 create_PCast(cb, 0, uu->size, qtt);
2551 uInstr2(cb, MOV, 4, TempReg, qtt,
2552 TempReg, SHADOW(uu->val1));
2553 res_used = True;
2554 }
2555 if (u_in->flags_w != FlagsEmpty) {
2556 create_PUTVF(cb, 0, qt);
2557 res_used = True;
2558 }
2559 if (!res_used) {
2560 uInstr1(cb, TESTV, 0, TempReg, qt);
2561 /* qt should never be referred to again. Nevertheless
2562 ... */
2563 uInstr1(cb, SETV, 0, TempReg, qt);
2564 }
2565 copyUInstr(cb, u_in);
2566 break;
2567 }
2568 /* Whew ... */
2569
2570 case JMP:
2571 if (u_in->tag1 == TempReg) {
2572 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2573 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2574 } else {
2575 vg_assert(u_in->tag1 == Literal);
2576 }
2577 if (u_in->cond != CondAlways) {
2578 vg_assert(u_in->flags_r != FlagsEmpty);
2579 qt = create_GETVF(cb, 0);
2580 uInstr1(cb, TESTV, 0, TempReg, qt);
2581 /* qt should never be referred to again. Nevertheless
2582 ... */
2583 uInstr1(cb, SETV, 0, TempReg, qt);
2584 }
2585 copyUInstr(cb, u_in);
2586 break;
2587
2588 case JIFZ:
2589 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2590 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2591 copyUInstr(cb, u_in);
2592 break;
2593
2594 /* Emit a check on the address used. For FPU_R, the value
2595 loaded into the FPU is checked at the time it is read from
2596 memory (see synth_fpu_mem_check_actions). */
2597 case FPU_R: case FPU_W:
2598 vg_assert(u_in->tag2 == TempReg);
2599 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2600 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
2601 copyUInstr(cb, u_in);
2602 break;
2603
2604 /* For FPU insns not referencing memory, just copy thru. */
2605 case FPU:
2606 copyUInstr(cb, u_in);
2607 break;
2608
2609 default:
2610 VG_(ppUInstr)(0, u_in);
2611 VG_(panic)( "vg_instrument: unhandled case");
2612
2613 } /* end of switch (u_in->opcode) */
2614
2615 } /* end of for loop */
2616
2617 freeCodeBlock(cb_in);
2618 return cb;
2619}
2620
2621/*------------------------------------------------------------*/
2622/*--- Clean up mem check instrumentation. ---*/
2623/*------------------------------------------------------------*/
2624
2625#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
2626#define VGC_UNDEF ((UChar)100)
2627#define VGC_VALUE ((UChar)101)
2628
2629#define NOP_no_msg(uu) \
2630 do { uu->opcode = NOP; } while (False)
2631
2632#define NOP_tag1_op(uu) \
2633 do { uu->opcode = NOP; \
2634 if (VG_(disassemble)) \
2635 VG_(printf)("at %d: delete %s due to defd arg\n", \
2636 i, VG_(nameOfTagOp(u->val3))); \
2637 } while (False)
2638
2639#define SETV_tag1_op(uu,newsz) \
2640 do { uu->opcode = SETV; \
2641 uu->size = newsz; \
2642 uu->tag2 = uu->tag3 = NoValue; \
2643 if (VG_(disassemble)) \
2644 VG_(printf)("at %d: convert %s to SETV%d " \
2645 "due to defd arg\n", \
2646 i, VG_(nameOfTagOp(u->val3)), newsz); \
2647 } while (False)
2648
2649
2650
2651/* Run backwards and delete SETVs on shadow temps for which the next
2652 action is a write. Needs an env saying whether or not the next
2653 action is a write. The supplied UCodeBlock is destructively
2654 modified.
2655*/
2656static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
2657{
2658 Bool* next_is_write;
2659 Int i, j, k, n_temps;
2660 UInstr* u;
2661 TempUse tempUse[3];
2662
2663 n_temps = cb->nextTemp;
2664 if (n_temps == 0) return;
2665
2666 next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
2667
2668 for (i = 0; i < n_temps; i++) next_is_write[i] = True;
2669
2670 for (i = cb->used-1; i >= 0; i--) {
2671 u = &cb->instrs[i];
2672
sewardj97ced732002-03-25 00:07:36 +00002673 /* If we're not checking address V bits, there will be a lot of
2674 GETVs, TAG1s and TAG2s calculating values which are never
2675 used. These first three cases get rid of them. */
2676
2677 if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
2678 && next_is_write[u->val2]
2679 && !VG_(clo_check_addrVs)) {
2680 u->opcode = NOP;
2681 u->size = 0;
2682 if (VG_(disassemble))
2683 VG_(printf)("at %d: delete GETV\n", i);
2684 } else
2685
2686 if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
2687 && next_is_write[u->val1]
2688 && !VG_(clo_check_addrVs)) {
2689 u->opcode = NOP;
2690 u->size = 0;
2691 if (VG_(disassemble))
2692 VG_(printf)("at %d: delete TAG1\n", i);
2693 } else
2694
2695 if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
2696 && next_is_write[u->val2]
2697 && !VG_(clo_check_addrVs)) {
2698 u->opcode = NOP;
2699 u->size = 0;
2700 if (VG_(disassemble))
2701 VG_(printf)("at %d: delete TAG2\n", i);
2702 } else
2703
2704 /* We do the rest of these regardless of whether or not
2705 addresses are V-checked. */
2706
sewardjde4a1d02002-03-22 01:27:54 +00002707 if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
2708 && next_is_write[u->val2]) {
2709 /* This MOV is pointless because the target is dead at this
2710 point. Delete it. */
2711 u->opcode = NOP;
2712 u->size = 0;
2713 if (VG_(disassemble))
2714 VG_(printf)("at %d: delete MOV\n", i);
2715 } else
2716
2717 if (u->opcode == SETV) {
2718 if (u->tag1 == TempReg) {
2719 vg_assert(VGC_IS_SHADOW(u->val1));
2720 if (next_is_write[u->val1]) {
2721 /* This write is pointless, so annul it. */
2722 u->opcode = NOP;
2723 u->size = 0;
2724 if (VG_(disassemble))
2725 VG_(printf)("at %d: delete SETV\n", i);
2726 } else {
2727 /* This write has a purpose; don't annul it, but do
2728 notice that we did it. */
2729 next_is_write[u->val1] = True;
2730 }
2731
2732 }
2733
2734 } else {
2735 /* Find out what this insn does to the temps. */
2736 k = getTempUsage(u, &tempUse[0]);
2737 vg_assert(k <= 3);
2738 for (j = k-1; j >= 0; j--) {
2739 next_is_write[ tempUse[j].tempNo ]
2740 = tempUse[j].isWrite;
2741 }
2742 }
2743
2744 }
2745
2746 VG_(jitfree)(next_is_write);
2747}
2748
2749
2750/* Run forwards, propagating and using the is-completely-defined
2751 property. This removes a lot of redundant tag-munging code.
2752 Unfortunately it requires intimate knowledge of how each uinstr and
2753 tagop modifies its arguments. This duplicates knowledge of uinstr
2754 tempreg uses embodied in getTempUsage(), which is unfortunate.
2755 The supplied UCodeBlock* is modified in-place.
2756
2757 For each value temp, def[] should hold VGC_VALUE.
2758
2759 For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
2760 definitely known to be fully defined at that size. In all other
2761 circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
2762 undefined. In cases of doubt, VGC_UNDEF is always safe.
2763*/
2764static void vg_propagate_definedness ( UCodeBlock* cb )
2765{
2766 UChar* def;
2767 Int i, j, k, t, n_temps;
2768 UInstr* u;
2769 TempUse tempUse[3];
2770
2771 n_temps = cb->nextTemp;
2772 if (n_temps == 0) return;
2773
2774 def = VG_(jitmalloc)(n_temps * sizeof(UChar));
2775 for (i = 0; i < n_temps; i++)
2776 def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
2777
2778 /* Run forwards, detecting and using the all-defined property. */
2779
2780 for (i = 0; i < cb->used; i++) {
2781 u = &cb->instrs[i];
2782 switch (u->opcode) {
2783
2784 /* Tag-handling uinstrs. */
2785
2786 /* Deal with these quickly. */
2787 case NOP:
2788 case INCEIP:
2789 break;
2790
2791 /* Make a tag defined. */
2792 case SETV:
2793 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2794 def[u->val1] = u->size;
2795 break;
2796
2797 /* Check definedness of a tag. */
2798 case TESTV:
2799 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2800 if (def[u->val1] <= 4) {
2801 vg_assert(def[u->val1] == u->size);
2802 NOP_no_msg(u);
2803 if (VG_(disassemble))
2804 VG_(printf)("at %d: delete TESTV on defd arg\n", i);
2805 }
2806 break;
2807
2808 /* Applies to both values and tags. Propagate Definedness
2809 property through copies. Note that this isn't optional;
2810 we *have* to do this to keep def[] correct. */
2811 case MOV:
2812 vg_assert(u->tag2 == TempReg);
2813 if (u->tag1 == TempReg) {
2814 if (VGC_IS_SHADOW(u->val1)) {
2815 vg_assert(VGC_IS_SHADOW(u->val2));
2816 def[u->val2] = def[u->val1];
2817 }
2818 }
2819 break;
2820
2821 case PUTV:
2822 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2823 if (def[u->val1] <= 4) {
2824 vg_assert(def[u->val1] == u->size);
2825 u->tag1 = Literal;
2826 u->val1 = 0;
2827 switch (u->size) {
2828 case 4: u->lit32 = 0x00000000; break;
2829 case 2: u->lit32 = 0xFFFF0000; break;
2830 case 1: u->lit32 = 0xFFFFFF00; break;
2831 default: VG_(panic)("vg_cleanup(PUTV)");
2832 }
2833 if (VG_(disassemble))
2834 VG_(printf)(
2835 "at %d: propagate definedness into PUTV\n", i);
2836 }
2837 break;
2838
2839 case STOREV:
2840 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2841 if (def[u->val1] <= 4) {
2842 vg_assert(def[u->val1] == u->size);
2843 u->tag1 = Literal;
2844 u->val1 = 0;
2845 switch (u->size) {
2846 case 4: u->lit32 = 0x00000000; break;
2847 case 2: u->lit32 = 0xFFFF0000; break;
2848 case 1: u->lit32 = 0xFFFFFF00; break;
2849 default: VG_(panic)("vg_cleanup(STOREV)");
2850 }
2851 if (VG_(disassemble))
2852 VG_(printf)(
2853 "at %d: propagate definedness into STandV\n", i);
2854 }
2855 break;
2856
2857 /* Nothing interesting we can do with this, I think. */
2858 case PUTVF:
2859 break;
2860
2861 /* Tag handling operations. */
2862 case TAG2:
2863 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2864 vg_assert(u->tag3 == Lit16);
2865 /* Ultra-paranoid "type" checking. */
2866 switch (u->val3) {
2867 case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
2868 case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
2869 case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
2870 vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
2871 break;
2872 default:
2873 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2874 break;
2875 }
2876 switch (u->val3) {
2877 Int sz;
2878 case VgT_UifU4:
2879 sz = 4; goto do_UifU;
2880 case VgT_UifU2:
2881 sz = 2; goto do_UifU;
2882 case VgT_UifU1:
2883 sz = 1; goto do_UifU;
2884 case VgT_UifU0:
2885 sz = 0; goto do_UifU;
2886 do_UifU:
2887 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2888 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2889 if (def[u->val1] <= 4) {
2890 /* UifU. The first arg is defined, so result is
2891 simply second arg. Delete this operation. */
2892 vg_assert(def[u->val1] == sz);
2893 NOP_no_msg(u);
2894 if (VG_(disassemble))
2895 VG_(printf)(
2896 "at %d: delete UifU%d due to defd arg1\n",
2897 i, sz);
2898 }
2899 else
2900 if (def[u->val2] <= 4) {
2901 /* UifU. The second arg is defined, so result is
2902 simply first arg. Copy to second. */
2903 vg_assert(def[u->val2] == sz);
2904 u->opcode = MOV;
2905 u->size = 4;
2906 u->tag3 = NoValue;
2907 def[u->val2] = def[u->val1];
2908 if (VG_(disassemble))
2909 VG_(printf)(
2910 "at %d: change UifU%d to MOV due to defd"
2911 " arg2\n",
2912 i, sz);
2913 }
2914 break;
2915 case VgT_ImproveAND4_TQ:
2916 sz = 4; goto do_ImproveAND;
2917 case VgT_ImproveAND1_TQ:
2918 sz = 1; goto do_ImproveAND;
2919 do_ImproveAND:
2920 /* Implements Q = T OR Q. So if Q is entirely defined,
2921 ie all 0s, we get MOV T, Q. */
2922 if (def[u->val2] <= 4) {
2923 vg_assert(def[u->val2] == sz);
2924 u->size = 4; /* Regardless of sz */
2925 u->opcode = MOV;
2926 u->tag3 = NoValue;
2927 def[u->val2] = VGC_UNDEF;
2928 if (VG_(disassemble))
2929 VG_(printf)(
2930 "at %d: change ImproveAND%d_TQ to MOV due "
2931 "to defd arg2\n",
2932 i, sz);
2933 }
2934 break;
2935 default:
2936 goto unhandled;
2937 }
2938 break;
2939
2940 case TAG1:
2941 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2942 if (def[u->val1] > 4) break;
2943 /* We now know that the arg to the op is entirely defined.
2944 If the op changes the size of the arg, we must replace
2945 it with a SETV at the new size. If it doesn't change
2946 the size, we can delete it completely. */
2947 switch (u->val3) {
2948 /* Maintain the same size ... */
2949 case VgT_Left4:
2950 vg_assert(def[u->val1] == 4);
2951 NOP_tag1_op(u);
2952 break;
2953 case VgT_PCast11:
2954 vg_assert(def[u->val1] == 1);
2955 NOP_tag1_op(u);
2956 break;
2957 /* Change size ... */
2958 case VgT_PCast40:
2959 vg_assert(def[u->val1] == 4);
2960 SETV_tag1_op(u,0);
2961 def[u->val1] = 0;
2962 break;
2963 case VgT_PCast14:
2964 vg_assert(def[u->val1] == 1);
2965 SETV_tag1_op(u,4);
2966 def[u->val1] = 4;
2967 break;
2968 case VgT_PCast12:
2969 vg_assert(def[u->val1] == 1);
2970 SETV_tag1_op(u,2);
2971 def[u->val1] = 2;
2972 break;
2973 case VgT_PCast10:
2974 vg_assert(def[u->val1] == 1);
2975 SETV_tag1_op(u,0);
2976 def[u->val1] = 0;
2977 break;
2978 case VgT_PCast02:
2979 vg_assert(def[u->val1] == 0);
2980 SETV_tag1_op(u,2);
2981 def[u->val1] = 2;
2982 break;
2983 default:
2984 goto unhandled;
2985 }
2986 if (VG_(disassemble))
2987 VG_(printf)(
2988 "at %d: delete TAG1 %s due to defd arg\n",
2989 i, VG_(nameOfTagOp(u->val3)));
2990 break;
2991
2992 default:
2993 unhandled:
2994 /* We don't know how to handle this uinstr. Be safe, and
2995 set to VGC_VALUE or VGC_UNDEF all temps written by it. */
2996 k = getTempUsage(u, &tempUse[0]);
2997 vg_assert(k <= 3);
2998 for (j = 0; j < k; j++) {
2999 t = tempUse[j].tempNo;
3000 vg_assert(t >= 0 && t < n_temps);
3001 if (!tempUse[j].isWrite) {
3002 /* t is read; ignore it. */
3003 if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
3004 VG_(printf)("ignoring def %d at %s %s\n",
3005 def[t],
3006 VG_(nameUOpcode)(True, u->opcode),
3007 (u->opcode == TAG1 || u->opcode == TAG2)
3008 ? VG_(nameOfTagOp)(u->val3)
3009 : (Char*)"");
3010 } else {
3011 /* t is written; better nullify it. */
3012 def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
3013 }
3014 }
3015 }
3016 }
3017
3018 VG_(jitfree)(def);
3019}
3020
3021
3022/* Top level post-instrumentation cleanup function. */
3023static void vg_cleanup ( UCodeBlock* cb )
3024{
3025 vg_propagate_definedness ( cb );
3026 vg_delete_redundant_SETVs ( cb );
3027}
3028
3029
3030/*------------------------------------------------------------*/
3031/*--- Main entry point for the JITter. ---*/
3032/*------------------------------------------------------------*/
3033
3034/* Translate the basic block beginning at orig_addr, placing the
3035 translation in a vg_malloc'd block, the address and size of which
3036 are returned in trans_addr and trans_size. Length of the original
3037 block is also returned in orig_size. If the latter three are NULL,
3038 this call is being done for debugging purposes, in which case (a)
3039 throw away the translation once it is made, and (b) produce a load
3040 of debugging output.
3041*/
sewardj1e8cdc92002-04-18 11:37:52 +00003042void VG_(translate) ( ThreadState* tst,
3043 /* Identity of thread needing this block */
3044 Addr orig_addr,
sewardjde4a1d02002-03-22 01:27:54 +00003045 UInt* orig_size,
3046 Addr* trans_addr,
3047 UInt* trans_size )
3048{
3049 Int n_disassembled_bytes, final_code_size;
3050 Bool debugging_translation;
3051 UChar* final_code;
3052 UCodeBlock* cb;
3053
3054 VGP_PUSHCC(VgpTranslate);
3055 debugging_translation
3056 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
3057
3058 dis = True;
3059 dis = debugging_translation;
3060
3061 /* Check if we're being asked to jump to a silly address, and if so
3062 record an error message before potentially crashing the entire
3063 system. */
3064 if (VG_(clo_instrument) && !debugging_translation && !dis) {
3065 Addr bad_addr;
3066 Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
3067 if (!ok) {
sewardj1e8cdc92002-04-18 11:37:52 +00003068 VG_(record_jump_error)(tst, bad_addr);
sewardjde4a1d02002-03-22 01:27:54 +00003069 }
3070 }
3071
3072 /* if (VG_(overall_in_count) >= 4800) dis=True; */
3073 if (VG_(disassemble))
3074 VG_(printf)("\n");
3075 if (0 || dis
3076 || (VG_(overall_in_count) > 0 &&
3077 (VG_(overall_in_count) % 1000 == 0))) {
3078 if (0&& (VG_(clo_verbosity) > 1 || dis))
3079 VG_(message)(Vg_UserMsg,
3080 "trans# %d, bb# %lu, in %d, out %d",
3081 VG_(overall_in_count),
3082 VG_(bbs_done),
3083 VG_(overall_in_osize), VG_(overall_in_tsize),
3084 orig_addr );
3085 }
3086 cb = allocCodeBlock();
3087
3088 /* Disassemble this basic block into cb. */
3089 VGP_PUSHCC(VgpToUCode);
3090 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
3091 VGP_POPCC;
3092 /* dis=True; */
3093 /* if (0&& VG_(translations_done) < 617) */
3094 /* dis=False; */
3095 /* Try and improve the code a bit. */
3096 if (VG_(clo_optimise)) {
3097 VGP_PUSHCC(VgpImprove);
3098 vg_improve ( cb );
3099 if (VG_(disassemble))
3100 VG_(ppUCodeBlock) ( cb, "Improved code:" );
3101 VGP_POPCC;
3102 }
3103 /* dis=False; */
3104 /* Add instrumentation code. */
3105 if (VG_(clo_instrument)) {
3106 VGP_PUSHCC(VgpInstrument);
3107 cb = vg_instrument(cb);
3108 VGP_POPCC;
3109 if (VG_(disassemble))
3110 VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
3111 if (VG_(clo_cleanup)) {
3112 VGP_PUSHCC(VgpCleanup);
3113 vg_cleanup(cb);
3114 VGP_POPCC;
3115 if (VG_(disassemble))
3116 VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
3117 }
3118 }
3119
3120 /* Allocate registers. */
3121 VGP_PUSHCC(VgpRegAlloc);
3122 cb = vg_do_register_allocation ( cb );
3123 VGP_POPCC;
3124 /* dis=False; */
3125 /*
3126 if (VG_(disassemble))
3127 VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
3128 */
3129
3130 VGP_PUSHCC(VgpFromUcode);
3131 /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
3132 and so must be VG_(jitfree)'d. */
3133 final_code = VG_(emit_code)(cb, &final_code_size );
3134 VGP_POPCC;
3135 freeCodeBlock(cb);
3136
3137 if (debugging_translation) {
3138 /* Only done for debugging -- throw away final result. */
3139 VG_(jitfree)(final_code);
3140 } else {
3141 /* Doing it for real -- return values to caller. */
3142 *orig_size = n_disassembled_bytes;
3143 *trans_addr = (Addr)final_code;
3144 *trans_size = final_code_size;
3145 }
3146 VGP_POPCC;
3147}
3148
3149/*--------------------------------------------------------------------*/
3150/*--- end vg_translate.c ---*/
3151/*--------------------------------------------------------------------*/