blob: 76c6ef8a9b7c58282df2b60f786ff7d424e6e419 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, an x86 protected-mode emulator
9 designed for debugging and profiling binaries on x86-Unixes.
10
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file LICENSE.
30*/
31
32#include "vg_include.h"
33
34
35/*------------------------------------------------------------*/
36/*--- Renamings of frequently-used global functions. ---*/
37/*------------------------------------------------------------*/
38
39#define uInstr1 VG_(newUInstr1)
40#define uInstr2 VG_(newUInstr2)
41#define uInstr3 VG_(newUInstr3)
42#define dis VG_(disassemble)
43#define nameIReg VG_(nameOfIntReg)
44#define nameISize VG_(nameOfIntSize)
45#define uLiteral VG_(setLiteralField)
46#define newTemp VG_(getNewTemp)
47#define newShadow VG_(getNewShadow)
48
49
50/*------------------------------------------------------------*/
51/*--- Memory management for the translater. ---*/
52/*------------------------------------------------------------*/
53
54#define N_JITBLOCKS 4
55#define N_JITBLOCK_SZ 5000
56
57static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
58static Bool jitstorage_inuse[N_JITBLOCKS];
59static Bool jitstorage_initdone = False;
60
61static __inline__ void jitstorage_initialise ( void )
62{
63 Int i;
64 if (jitstorage_initdone) return;
65 jitstorage_initdone = True;
66 for (i = 0; i < N_JITBLOCKS; i++)
67 jitstorage_inuse[i] = False;
68}
69
70void* VG_(jitmalloc) ( Int nbytes )
71{
72 Int i;
73 jitstorage_initialise();
74 if (nbytes > N_JITBLOCK_SZ) {
75 /* VG_(printf)("too large: %d\n", nbytes); */
76 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
77 }
78 for (i = 0; i < N_JITBLOCKS; i++) {
79 if (!jitstorage_inuse[i]) {
80 jitstorage_inuse[i] = True;
81 /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
82 return & jitstorage[i][0];
83 }
84 }
85 VG_(panic)("out of slots in vg_jitmalloc\n");
86 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
87}
88
89void VG_(jitfree) ( void* ptr )
90{
91 Int i;
92 jitstorage_initialise();
93 for (i = 0; i < N_JITBLOCKS; i++) {
94 if (ptr == & jitstorage[i][0]) {
95 vg_assert(jitstorage_inuse[i]);
96 jitstorage_inuse[i] = False;
97 return;
98 }
99 }
100 VG_(free)(VG_AR_PRIVATE, ptr);
101}
102
103/*------------------------------------------------------------*/
104/*--- Basics ---*/
105/*------------------------------------------------------------*/
106
njn4f9c9342002-04-29 16:03:24 +0000107UCodeBlock* VG_(allocCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +0000108{
109 UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
110 cb->used = cb->size = cb->nextTemp = 0;
111 cb->instrs = NULL;
112 return cb;
113}
114
115
njn4f9c9342002-04-29 16:03:24 +0000116void VG_(freeCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000117{
118 if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
119 VG_(free)(VG_AR_PRIVATE, cb);
120}
121
122
123/* Ensure there's enough space in a block to add one uinstr. */
124static __inline__
125void ensureUInstr ( UCodeBlock* cb )
126{
127 if (cb->used == cb->size) {
128 if (cb->instrs == NULL) {
129 vg_assert(cb->size == 0);
130 vg_assert(cb->used == 0);
131 cb->size = 8;
132 cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
133 } else {
134 Int i;
135 UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
136 2 * sizeof(UInstr) * cb->size);
137 for (i = 0; i < cb->used; i++)
138 instrs2[i] = cb->instrs[i];
139 cb->size *= 2;
140 VG_(free)(VG_AR_PRIVATE, cb->instrs);
141 cb->instrs = instrs2;
142 }
143 }
144
145 vg_assert(cb->used < cb->size);
146}
147
148
149__inline__
150void VG_(emptyUInstr) ( UInstr* u )
151{
152 u->val1 = u->val2 = u->val3 = 0;
153 u->tag1 = u->tag2 = u->tag3 = NoValue;
154 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000155 u->jmpkind = JmpBoring;
156 u->smc_check = u->signed_widen = False;
sewardjde4a1d02002-03-22 01:27:54 +0000157 u->lit32 = 0;
158 u->opcode = 0;
159 u->size = 0;
160 u->cond = 0;
161 u->extra4b = 0;
162}
163
164
165/* Add an instruction to a ucode block, and return the index of the
166 instruction. */
167__inline__
168void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
169 Tag tag1, UInt val1,
170 Tag tag2, UInt val2,
171 Tag tag3, UInt val3 )
172{
173 UInstr* ui;
174 ensureUInstr(cb);
175 ui = & cb->instrs[cb->used];
176 cb->used++;
177 VG_(emptyUInstr)(ui);
178 ui->val1 = val1;
179 ui->val2 = val2;
180 ui->val3 = val3;
181 ui->opcode = opcode;
182 ui->tag1 = tag1;
183 ui->tag2 = tag2;
184 ui->tag3 = tag3;
185 ui->size = sz;
186 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
187 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
188 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
189}
190
191
192__inline__
193void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
194 Tag tag1, UInt val1,
195 Tag tag2, UInt val2 )
196{
197 UInstr* ui;
198 ensureUInstr(cb);
199 ui = & cb->instrs[cb->used];
200 cb->used++;
201 VG_(emptyUInstr)(ui);
202 ui->val1 = val1;
203 ui->val2 = val2;
204 ui->opcode = opcode;
205 ui->tag1 = tag1;
206 ui->tag2 = tag2;
207 ui->size = sz;
208 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
209 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
210}
211
212
213__inline__
214void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
215 Tag tag1, UInt val1 )
216{
217 UInstr* ui;
218 ensureUInstr(cb);
219 ui = & cb->instrs[cb->used];
220 cb->used++;
221 VG_(emptyUInstr)(ui);
222 ui->val1 = val1;
223 ui->opcode = opcode;
224 ui->tag1 = tag1;
225 ui->size = sz;
226 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
227}
228
229
230__inline__
231void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
232{
233 UInstr* ui;
234 ensureUInstr(cb);
235 ui = & cb->instrs[cb->used];
236 cb->used++;
237 VG_(emptyUInstr)(ui);
238 ui->opcode = opcode;
239 ui->size = sz;
240}
241
sewardjde4a1d02002-03-22 01:27:54 +0000242/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000243__inline__
244void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000245{
246 ensureUInstr(cb);
247 cb->instrs[cb->used] = *instr;
248 cb->used++;
249}
250
sewardjde4a1d02002-03-22 01:27:54 +0000251/* Copy auxiliary info from one uinstr to another. */
252static __inline__
253void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
254{
255 dst->cond = src->cond;
256 dst->extra4b = src->extra4b;
257 dst->smc_check = src->smc_check;
258 dst->signed_widen = src->signed_widen;
sewardj2e93c502002-04-12 11:12:52 +0000259 dst->jmpkind = src->jmpkind;
sewardjde4a1d02002-03-22 01:27:54 +0000260 dst->flags_r = src->flags_r;
261 dst->flags_w = src->flags_w;
262}
263
264
265/* Set the flag R/W sets on a uinstr. */
266void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
267{
268 /* VG_(ppUInstr)(-1,u); */
269 vg_assert(fr == (fr & FlagsALL));
270 vg_assert(fw == (fw & FlagsALL));
271 u->flags_r = fr;
272 u->flags_w = fw;
273}
274
275
276/* Set the lit32 field of the most recent uinsn. */
277void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
278{
279 LAST_UINSTR(cb).lit32 = lit32;
280}
281
282
283Bool VG_(anyFlagUse) ( UInstr* u )
284{
285 return (u->flags_r != FlagsEmpty
286 || u->flags_w != FlagsEmpty);
287}
288
289
290
291
292/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
293 register number. This effectively defines the order in which real
294 registers are allocated. %ebp is excluded since it is permanently
295 reserved for pointing at VG_(baseBlock). %edi is a general spare
296 temp used for Left4 and various misc tag ops.
297
298 Important! If you change the set of allocatable registers from
299 %eax, %ebx, %ecx, %edx, %esi you must change the
300 save/restore sequences in vg_helper_smc_check4 to match!
301*/
302__inline__ Int VG_(rankToRealRegNo) ( Int rank )
303{
304 switch (rank) {
305# if 1
306 /* Probably the best allocation ordering. */
307 case 0: return R_EAX;
308 case 1: return R_EBX;
309 case 2: return R_ECX;
310 case 3: return R_EDX;
311 case 4: return R_ESI;
312# else
313 /* Contrary; probably the worst. Helpful for debugging, tho. */
314 case 4: return R_EAX;
315 case 3: return R_EBX;
316 case 2: return R_ECX;
317 case 1: return R_EDX;
318 case 0: return R_ESI;
319# endif
320 default: VG_(panic)("rankToRealRegNo");
321 }
322}
323
324
325/*------------------------------------------------------------*/
326/*--- Sanity checking uinstrs. ---*/
327/*------------------------------------------------------------*/
328
329/* This seems as good a place as any to record some important stuff
330 about ucode semantics.
331
332 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
333 TempReg are defined to zero-extend the loaded value to 32 bits.
334 This is needed to make the translation of movzbl et al work
335 properly.
336
337 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
338
339 * Arithmetic on TempRegs is at the specified size. For example,
340 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
341 being emitted -- not a 32 bit one.
342
343 * On some insns we allow the cc bit to be set. If so, the
344 intention is that the simulated machine's %eflags register
345 is copied into that of the real machine before the insn,
346 and copied back again afterwards. This means that the
347 code generated for that insn must be very careful only to
348 update %eflags in the intended way. This is particularly
349 important for the routines referenced by CALL insns.
350*/
351
352/* Meaning of operand kinds is as follows:
353
354 ArchReg is a register of the simulated CPU, stored in memory,
355 in vg_m_state.m_eax .. m_edi. These values are stored
356 using the Intel register encoding.
357
358 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
359 available for allocation. As with ArchRegs, these values
360 are stored using the Intel register encoding.
361
362 TempReg is a temporary register used to express the results of
363 disassembly. There is an unlimited supply of them --
364 register allocation and spilling eventually assigns them
365 to RealRegs.
366
367 SpillNo is a spill slot number. The number of required spill
368 slots is VG_MAX_PSEUDOS, in general. Only allowed
369 as the ArchReg operand of GET and PUT.
370
371 Lit16 is a signed 16-bit literal value.
372
373 Literal is a 32-bit literal value. Each uinstr can only hold
374 one of these.
375
376 The disassembled code is expressed purely in terms of ArchReg,
377 TempReg and Literal operands. Eventually, register allocation
378 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
379 and Literals. New x86 code can easily be synthesised from this.
380 There are carefully designed restrictions on which insns can have
381 which operands, intended to make it possible to generate x86 code
382 from the result of register allocation on the ucode efficiently and
383 without need of any further RealRegs.
384
385 Restrictions on insns (as generated by the disassembler) are as
386 follows:
387
388 A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
389 N=NoValue
390
391 GETF T N N
392 PUTF T N N
393
394 GET A,S T N
395 PUT T A,S N
396 LOAD T T N
397 STORE T T N
398 MOV T,L T N
399 CMOV T T N
400 WIDEN T N N
401 JMP T,L N N
402 CALLM L N N
403 CALLM_S N N N
404 CALLM_E N N N
405 PUSH,POP T N N
406 CLEAR L N N
407
408 AND, OR
409 T T N
410
411 ADD, ADC, XOR, SUB, SBB
412 A,L,T T N
413
414 SHL, SHR, SAR, ROL, ROR, RCL, RCR
415 L,T T N
416
417 NOT, NEG, INC, DEC, CC2VAL, BSWAP
418 T N N
419
420 JIFZ T L N
421
422 FPU_R L T N
423 FPU_W L T N
424 FPU L T N
425
426 LEA1 T T (const in a seperate field)
427 LEA2 T T T (const & shift ditto)
428
429 INCEIP L N N
430
431 and for instrumentation insns:
432
433 LOADV T T N
434 STOREV T,L T N
435 GETV A T N
436 PUTV T,L A N
437 GETVF T N N
438 PUTVF T N N
439 WIDENV T N N
440 TESTV A,T N N
441 SETV A,T N N
442 TAG1 T N N
443 TAG2 T T N
444
445 Before register allocation, S operands should not appear anywhere.
446 After register allocation, all T operands should have been
447 converted into Rs, and S operands are allowed in GET and PUT --
448 denoting spill saves/restores.
449
450 The size field should be 0 for insns for which it is meaningless,
451 ie those which do not directly move/operate on data.
452*/
453Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
454{
455# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
456# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
457# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
458# define A1 (u->tag1 == ArchReg)
459# define A2 (u->tag2 == ArchReg)
460# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
461# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
462# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
463# define L1 (u->tag1 == Literal && u->val1 == 0)
464# define L2 (u->tag2 == Literal && u->val2 == 0)
465# define Ls1 (u->tag1 == Lit16)
466# define Ls3 (u->tag3 == Lit16)
467# define N1 (u->tag1 == NoValue)
468# define N2 (u->tag2 == NoValue)
469# define N3 (u->tag3 == NoValue)
470# define SZ4 (u->size == 4)
471# define SZ2 (u->size == 2)
472# define SZ1 (u->size == 1)
473# define SZ0 (u->size == 0)
474# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
475# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
476# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj8d32be72002-04-18 02:18:24 +0000477# define FLG_RD_WR_MAYBE \
478 ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \
479 || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \
480 || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
sewardjde4a1d02002-03-22 01:27:54 +0000481# define CC1 (!(CC0))
482# define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
483 ? (u->size == 4) : True)
484
485 Int n_lits = 0;
486 if (u->tag1 == Literal) n_lits++;
487 if (u->tag2 == Literal) n_lits++;
488 if (u->tag3 == Literal) n_lits++;
489 if (n_lits > 1)
490 return False;
491
492 switch (u->opcode) {
493 case GETF:
sewardj8d32be72002-04-18 02:18:24 +0000494 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
sewardjde4a1d02002-03-22 01:27:54 +0000495 case PUTF:
sewardj8d32be72002-04-18 02:18:24 +0000496 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
sewardjde4a1d02002-03-22 01:27:54 +0000497 case CALLM_S: case CALLM_E:
498 return SZ0 && N1 && N2 && N3;
499 case INCEIP:
500 return SZ0 && CC0 && Ls1 && N2 && N3;
501 case LEA1:
502 return CC0 && TR1 && TR2 && N3 && SZ4;
503 case LEA2:
504 return CC0 && TR1 && TR2 && TR3 && SZ4;
505 case NOP:
506 return SZ0 && CC0 && N1 && N2 && N3;
507 case GET:
508 return CC0 && AS1 && TR2 && N3;
509 case PUT:
510 return CC0 && TR1 && AS2 && N3;
511 case LOAD: case STORE:
512 return CC0 && TR1 && TR2 && N3;
513 case MOV:
514 return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
515 case CMOV:
516 return CC1 && TR1 && TR2 && N3 && SZ4;
517 case JMP:
518 return (u->cond==CondAlways ? CC0 : CC1)
519 && (TR1 || L1) && N2 && SZ0 && N3;
520 case CLEAR:
521 return CC0 && Ls1 && N2 && SZ0 && N3;
522 case CALLM:
523 return SZ0 && Ls1 && N2 && N3;
524 case PUSH: case POP:
525 return CC0 && TR1 && N2 && N3;
526 case AND: case OR:
527 return TR1 && TR2 && N3;
528 case ADD: case ADC: case XOR: case SUB: case SBB:
529 return (A1 || TR1 || L1) && TR2 && N3;
530 case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
531 return (TR1 || L1) && TR2 && N3;
532 case NOT: case NEG: case INC: case DEC:
533 return TR1 && N2 && N3;
534 case BSWAP:
535 return TR1 && N2 && N3 && CC0 && SZ4;
536 case CC2VAL:
537 return CC1 && SZ1 && TR1 && N2 && N3;
538 case JIFZ:
539 return CC0 && SZ4 && TR1 && L2 && N3;
540 case FPU_R: case FPU_W:
541 return CC0 && Ls1 && TR2 && N3;
542 case FPU:
sewardj8d32be72002-04-18 02:18:24 +0000543 return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
sewardjde4a1d02002-03-22 01:27:54 +0000544 case LOADV:
545 return CC0 && TR1 && TR2 && N3;
546 case STOREV:
547 return CC0 && (TR1 || L1) && TR2 && N3;
548 case GETV:
549 return CC0 && A1 && TR2 && N3;
550 case PUTV:
551 return CC0 && (TR1 || L1) && A2 && N3;
552 case GETVF:
553 return CC0 && TR1 && N2 && N3 && SZ0;
554 case PUTVF:
555 return CC0 && TR1 && N2 && N3 && SZ0;
556 case WIDEN:
557 return CC0 && TR1 && N2 && N3;
558 case TESTV:
559 return CC0 && (A1 || TR1) && N2 && N3;
560 case SETV:
561 return CC0 && (A1 || TR1) && N2 && N3;
562 case TAG1:
563 return CC0 && TR1 && N2 && Ls3 && SZ0;
564 case TAG2:
565 return CC0 && TR1 && TR2 && Ls3 && SZ0;
566 default:
567 VG_(panic)("vg_saneUInstr: unhandled opcode");
568 }
569# undef SZ4_IF_TR1
570# undef CC0
571# undef CC1
572# undef SZ4
573# undef SZ2
574# undef SZ1
575# undef SZ0
576# undef TR1
577# undef TR2
578# undef TR3
579# undef A1
580# undef A2
581# undef AS1
582# undef AS2
583# undef AS3
584# undef L1
585# undef Ls1
586# undef L2
587# undef Ls3
588# undef N1
589# undef N2
590# undef N3
591# undef FLG_RD
592# undef FLG_WR
sewardj8d32be72002-04-18 02:18:24 +0000593# undef FLG_RD_WR_MAYBE
sewardjde4a1d02002-03-22 01:27:54 +0000594}
595
596
597/* Sanity checks to do with CALLMs in UCodeBlocks. */
598Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
599{
600 Int callm = 0;
601 Int callm_s = 0;
602 Int callm_e = 0;
603 Int callm_ptr, calls_ptr;
604 Int i, j, t;
605 Bool incall = False;
606
607 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
608
609 for (i = 0; i < cb->used; i++) {
610 switch (cb->instrs[i].opcode) {
611 case CALLM:
612 if (!incall) return False;
613 callm++;
614 break;
615 case CALLM_S:
616 if (incall) return False;
617 incall = True;
618 callm_s++;
619 break;
620 case CALLM_E:
621 if (!incall) return False;
622 incall = False;
623 callm_e++;
624 break;
625 case PUSH: case POP: case CLEAR:
626 if (!incall) return False;
627 break;
628 default:
629 break;
630 }
631 }
632 if (incall) return False;
633 if (callm != callm_s || callm != callm_e) return False;
634
635 /* Check the sections between CALLM_S and CALLM's. Ensure that no
636 PUSH uinsn pushes any TempReg that any other PUSH in the same
637 section pushes. Ie, check that the TempReg args to PUSHes in
638 the section are unique. If not, the instrumenter generates
639 incorrect code for CALLM insns. */
640
641 callm_ptr = 0;
642
643 find_next_CALLM:
644 /* Search for the next interval, making calls_ptr .. callm_ptr
645 bracket it. */
646 while (callm_ptr < cb->used
647 && cb->instrs[callm_ptr].opcode != CALLM)
648 callm_ptr++;
649 if (callm_ptr == cb->used)
650 return True;
651 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
652
653 calls_ptr = callm_ptr - 1;
654 while (cb->instrs[calls_ptr].opcode != CALLM_S)
655 calls_ptr--;
656 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
657 vg_assert(calls_ptr >= 0);
658
659 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
660
661 /* For each PUSH insn in the interval ... */
662 for (i = calls_ptr + 1; i < callm_ptr; i++) {
663 if (cb->instrs[i].opcode != PUSH) continue;
664 t = cb->instrs[i].val1;
665 /* Ensure no later PUSH insns up to callm_ptr push the same
666 TempReg. Return False if any such are found. */
667 for (j = i+1; j < callm_ptr; j++) {
668 if (cb->instrs[j].opcode == PUSH &&
669 cb->instrs[j].val1 == t)
670 return False;
671 }
672 }
673
674 /* This interval is clean. Keep going ... */
675 callm_ptr++;
676 goto find_next_CALLM;
677}
678
679
680/*------------------------------------------------------------*/
681/*--- Printing uinstrs. ---*/
682/*------------------------------------------------------------*/
683
684Char* VG_(nameCondcode) ( Condcode cond )
685{
686 switch (cond) {
687 case CondO: return "o";
688 case CondNO: return "no";
689 case CondB: return "b";
690 case CondNB: return "nb";
691 case CondZ: return "z";
692 case CondNZ: return "nz";
693 case CondBE: return "be";
694 case CondNBE: return "nbe";
695 case CondS: return "s";
696 case ConsNS: return "ns";
697 case CondP: return "p";
698 case CondNP: return "np";
699 case CondL: return "l";
700 case CondNL: return "nl";
701 case CondLE: return "le";
702 case CondNLE: return "nle";
703 case CondAlways: return "MP"; /* hack! */
704 default: VG_(panic)("nameCondcode");
705 }
706}
707
708
709static void vg_ppFlagSet ( Char* prefix, FlagSet set )
710{
711 VG_(printf)("%s", prefix);
712 if (set & FlagD) VG_(printf)("D");
713 if (set & FlagO) VG_(printf)("O");
714 if (set & FlagS) VG_(printf)("S");
715 if (set & FlagZ) VG_(printf)("Z");
716 if (set & FlagA) VG_(printf)("A");
717 if (set & FlagC) VG_(printf)("C");
718 if (set & FlagP) VG_(printf)("P");
719}
720
721
722static void ppTempReg ( Int tt )
723{
724 if ((tt & 1) == 0)
725 VG_(printf)("t%d", tt);
726 else
727 VG_(printf)("q%d", tt-1);
728}
729
730
731static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
732{
733 UInt tag, val;
734 switch (operandNo) {
735 case 1: tag = u->tag1; val = u->val1; break;
736 case 2: tag = u->tag2; val = u->val2; break;
737 case 3: tag = u->tag3; val = u->val3; break;
738 default: VG_(panic)("ppUOperand(1)");
739 }
740 if (tag == Literal) val = u->lit32;
741
742 if (parens) VG_(printf)("(");
743 switch (tag) {
744 case TempReg: ppTempReg(val); break;
745 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
746 case Literal: VG_(printf)("$0x%x", val); break;
747 case Lit16: VG_(printf)("$0x%x", val); break;
748 case NoValue: VG_(printf)("NoValue"); break;
749 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
750 case SpillNo: VG_(printf)("spill%d", val); break;
751 default: VG_(panic)("ppUOperand(2)");
752 }
753 if (parens) VG_(printf)(")");
754}
755
756
757Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
758{
759 switch (opc) {
760 case ADD: return (upper ? "ADD" : "add");
761 case ADC: return (upper ? "ADC" : "adc");
762 case AND: return (upper ? "AND" : "and");
763 case OR: return (upper ? "OR" : "or");
764 case XOR: return (upper ? "XOR" : "xor");
765 case SUB: return (upper ? "SUB" : "sub");
766 case SBB: return (upper ? "SBB" : "sbb");
767 case SHL: return (upper ? "SHL" : "shl");
768 case SHR: return (upper ? "SHR" : "shr");
769 case SAR: return (upper ? "SAR" : "sar");
770 case ROL: return (upper ? "ROL" : "rol");
771 case ROR: return (upper ? "ROR" : "ror");
772 case RCL: return (upper ? "RCL" : "rcl");
773 case RCR: return (upper ? "RCR" : "rcr");
774 case NOT: return (upper ? "NOT" : "not");
775 case NEG: return (upper ? "NEG" : "neg");
776 case INC: return (upper ? "INC" : "inc");
777 case DEC: return (upper ? "DEC" : "dec");
778 case BSWAP: return (upper ? "BSWAP" : "bswap");
779 default: break;
780 }
781 if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
782 switch (opc) {
783 case GETVF: return "GETVF";
784 case PUTVF: return "PUTVF";
785 case TAG1: return "TAG1";
786 case TAG2: return "TAG2";
787 case CALLM_S: return "CALLM_S";
788 case CALLM_E: return "CALLM_E";
789 case INCEIP: return "INCEIP";
790 case LEA1: return "LEA1";
791 case LEA2: return "LEA2";
792 case NOP: return "NOP";
793 case GET: return "GET";
794 case PUT: return "PUT";
795 case GETF: return "GETF";
796 case PUTF: return "PUTF";
797 case LOAD: return "LD" ;
798 case STORE: return "ST" ;
799 case MOV: return "MOV";
800 case CMOV: return "CMOV";
801 case WIDEN: return "WIDEN";
802 case JMP: return "J" ;
803 case JIFZ: return "JIFZ" ;
804 case CALLM: return "CALLM";
805 case PUSH: return "PUSH" ;
806 case POP: return "POP" ;
807 case CLEAR: return "CLEAR";
808 case CC2VAL: return "CC2VAL";
809 case FPU_R: return "FPU_R";
810 case FPU_W: return "FPU_W";
811 case FPU: return "FPU" ;
812 case LOADV: return "LOADV";
813 case STOREV: return "STOREV";
814 case GETV: return "GETV";
815 case PUTV: return "PUTV";
816 case TESTV: return "TESTV";
817 case SETV: return "SETV";
818 default: VG_(panic)("nameUOpcode: unhandled case");
819 }
820}
821
822
823void VG_(ppUInstr) ( Int instrNo, UInstr* u )
824{
825 VG_(printf)("\t%4d: %s", instrNo,
826 VG_(nameUOpcode)(True, u->opcode));
827 if (u->opcode == JMP || u->opcode == CC2VAL)
828 VG_(printf)("%s", VG_(nameCondcode(u->cond)));
829
830 switch (u->size) {
831 case 0: VG_(printf)("o"); break;
832 case 1: VG_(printf)("B"); break;
833 case 2: VG_(printf)("W"); break;
834 case 4: VG_(printf)("L"); break;
835 case 8: VG_(printf)("Q"); break;
836 default: VG_(printf)("%d", (Int)u->size); break;
837 }
838
839 switch (u->opcode) {
840
841 case TAG1:
842 VG_(printf)("\t");
843 ppUOperand(u, 1, 4, False);
844 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
845 ppUOperand(u, 1, 4, False);
846 VG_(printf)(" )");
847 break;
848
849 case TAG2:
850 VG_(printf)("\t");
851 ppUOperand(u, 2, 4, False);
852 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
853 ppUOperand(u, 1, 4, False);
854 VG_(printf)(", ");
855 ppUOperand(u, 2, 4, False);
856 VG_(printf)(" )");
857 break;
858
859 case CALLM_S: case CALLM_E:
860 break;
861
862 case INCEIP:
863 VG_(printf)("\t$%d", u->val1);
864 break;
865
866 case LEA2:
867 VG_(printf)("\t%d(" , u->lit32);
868 ppUOperand(u, 1, 4, False);
869 VG_(printf)(",");
870 ppUOperand(u, 2, 4, False);
871 VG_(printf)(",%d), ", (Int)u->extra4b);
872 ppUOperand(u, 3, 4, False);
873 break;
874
875 case LEA1:
876 VG_(printf)("\t%d" , u->lit32);
877 ppUOperand(u, 1, 4, True);
878 VG_(printf)(", ");
879 ppUOperand(u, 2, 4, False);
880 break;
881
882 case NOP:
883 break;
884
885 case FPU_W:
886 VG_(printf)("\t0x%x:0x%x, ",
887 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
888 ppUOperand(u, 2, 4, True);
889 break;
890
891 case FPU_R:
892 VG_(printf)("\t");
893 ppUOperand(u, 2, 4, True);
894 VG_(printf)(", 0x%x:0x%x",
895 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
896 break;
897
898 case FPU:
899 VG_(printf)("\t0x%x:0x%x",
900 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
901 break;
902
903 case STOREV: case LOADV:
904 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
905 VG_(printf)("\t");
906 ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV);
907 VG_(printf)(", ");
908 ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
909 break;
910
911 case GETF: case PUTF:
912 VG_(printf)("\t");
913 ppUOperand(u, 1, u->size, False);
914 break;
915
916 case JMP: case CC2VAL:
917 case PUSH: case POP: case CLEAR: case CALLM:
sewardj2e93c502002-04-12 11:12:52 +0000918 if (u->opcode == JMP) {
919 switch (u->jmpkind) {
920 case JmpCall: VG_(printf)("-c"); break;
921 case JmpRet: VG_(printf)("-r"); break;
922 case JmpSyscall: VG_(printf)("-sys"); break;
923 case JmpClientReq: VG_(printf)("-cli"); break;
924 default: break;
925 }
926 }
sewardjde4a1d02002-03-22 01:27:54 +0000927 VG_(printf)("\t");
928 ppUOperand(u, 1, u->size, False);
929 break;
930
931 case JIFZ:
932 VG_(printf)("\t");
933 ppUOperand(u, 1, u->size, False);
934 VG_(printf)(", ");
935 ppUOperand(u, 2, u->size, False);
936 break;
937
938 case PUTVF: case GETVF:
939 VG_(printf)("\t");
940 ppUOperand(u, 1, 0, False);
941 break;
942
943 case NOT: case NEG: case INC: case DEC: case BSWAP:
944 VG_(printf)("\t");
945 ppUOperand(u, 1, u->size, False);
946 break;
947
948 case ADD: case ADC: case AND: case OR:
949 case XOR: case SUB: case SBB:
950 case SHL: case SHR: case SAR:
951 case ROL: case ROR: case RCL: case RCR:
952 VG_(printf)("\t");
953 ppUOperand(u, 1, u->size, False);
954 VG_(printf)(", ");
955 ppUOperand(u, 2, u->size, False);
956 break;
957
958 case GETV: case PUTV:
959 VG_(printf)("\t");
960 ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
961 VG_(printf)(", ");
962 ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
963 break;
964
965 case WIDEN:
966 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
967 u->signed_widen?'s':'z');
968 VG_(printf)("\t");
969 ppUOperand(u, 1, u->size, False);
970 break;
971
972 case TESTV: case SETV:
973 VG_(printf)("\t");
974 ppUOperand(u, 1, u->size, False);
975 break;
976
977 default: VG_(panic)("ppUInstr: unhandled opcode");
978 }
979
980 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
981 VG_(printf)(" (");
982 if (u->flags_r != FlagsEmpty)
983 vg_ppFlagSet("-r", u->flags_r);
984 if (u->flags_w != FlagsEmpty)
985 vg_ppFlagSet("-w", u->flags_w);
986 VG_(printf)(")");
987 }
988 VG_(printf)("\n");
989}
990
991
992void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
993{
994 Int i;
995 VG_(printf)("\n%s\n", title);
996 for (i = 0; i < cb->used; i++)
997 if (0 || cb->instrs[i].opcode != NOP)
998 VG_(ppUInstr) ( i, &cb->instrs[i] );
999 VG_(printf)("\n");
1000}
1001
1002
1003/*------------------------------------------------------------*/
1004/*--- uinstr helpers for register allocation ---*/
1005/*--- and code improvement. ---*/
1006/*------------------------------------------------------------*/
1007
1008/* A structure for communicating temp uses, and for indicating
1009 temp->real register mappings for patchUInstr. */
1010typedef
1011 struct {
1012 Int realNo;
1013 Int tempNo;
1014 Bool isWrite;
1015 }
1016 TempUse;
1017
1018
1019/* Get the temp use of a uinstr, parking them in an array supplied by
1020 the caller, which is assumed to be big enough. Return the number
1021 of entries. Insns which read _and_ write a register wind up
1022 mentioning it twice. Entries are placed in the array in program
1023 order, so that if a reg is read-modified-written, it appears first
1024 as a read and then as a write.
1025*/
1026static __inline__
1027Int getTempUsage ( UInstr* u, TempUse* arr )
1028{
1029
1030# define RD(ono) \
1031 if (mycat(u->tag,ono) == TempReg) \
1032 { arr[n].tempNo = mycat(u->val,ono); \
1033 arr[n].isWrite = False; n++; }
1034# define WR(ono) \
1035 if (mycat(u->tag,ono) == TempReg) \
1036 { arr[n].tempNo = mycat(u->val,ono); \
1037 arr[n].isWrite = True; n++; }
1038
1039 Int n = 0;
1040 switch (u->opcode) {
1041 case LEA1: RD(1); WR(2); break;
1042 case LEA2: RD(1); RD(2); WR(3); break;
1043
1044 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
1045 case FPU_R: case FPU_W: RD(2); break;
1046
1047 case GETF: WR(1); break;
1048 case PUTF: RD(1); break;
1049
1050 case GET: WR(2); break;
1051 case PUT: RD(1); break;
1052 case LOAD: RD(1); WR(2); break;
1053 case STORE: RD(1); RD(2); break;
1054 case MOV: RD(1); WR(2); break;
1055
1056 case JMP: RD(1); break;
1057 case CLEAR: case CALLM: break;
1058
1059 case PUSH: RD(1); break;
1060 case POP: WR(1); break;
1061
1062 case TAG2:
1063 case CMOV:
1064 case ADD: case ADC: case AND: case OR:
1065 case XOR: case SUB: case SBB:
1066 RD(1); RD(2); WR(2); break;
1067
1068 case SHL: case SHR: case SAR:
1069 case ROL: case ROR: case RCL: case RCR:
1070 RD(1); RD(2); WR(2); break;
1071
1072 case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
1073 RD(1); WR(1); break;
1074
1075 case WIDEN: RD(1); WR(1); break;
1076
1077 case CC2VAL: WR(1); break;
1078 case JIFZ: RD(1); break;
1079
1080 /* These sizes are only ever consulted when the instrumentation
1081 code is being added, so the following can return
1082 manifestly-bogus sizes. */
1083 case LOADV: RD(1); WR(2); break;
1084 case STOREV: RD(1); RD(2); break;
1085 case GETV: WR(2); break;
1086 case PUTV: RD(1); break;
1087 case TESTV: RD(1); break;
1088 case SETV: WR(1); break;
1089 case PUTVF: RD(1); break;
1090 case GETVF: WR(1); break;
1091
1092 default: VG_(panic)("getTempUsage: unhandled opcode");
1093 }
1094 return n;
1095
1096# undef RD
1097# undef WR
1098}
1099
1100
1101/* Change temp regs in u into real regs, as directed by tmap. */
1102static __inline__
1103void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
1104{
1105 Int i;
1106 if (u->tag1 == TempReg) {
1107 for (i = 0; i < n_tmap; i++)
1108 if (tmap[i].tempNo == u->val1) break;
1109 if (i == n_tmap) VG_(panic)("patchUInstr(1)");
1110 u->tag1 = RealReg;
1111 u->val1 = tmap[i].realNo;
1112 }
1113 if (u->tag2 == TempReg) {
1114 for (i = 0; i < n_tmap; i++)
1115 if (tmap[i].tempNo == u->val2) break;
1116 if (i == n_tmap) VG_(panic)("patchUInstr(2)");
1117 u->tag2 = RealReg;
1118 u->val2 = tmap[i].realNo;
1119 }
1120 if (u->tag3 == TempReg) {
1121 for (i = 0; i < n_tmap; i++)
1122 if (tmap[i].tempNo == u->val3) break;
1123 if (i == n_tmap) VG_(panic)("patchUInstr(3)");
1124 u->tag3 = RealReg;
1125 u->val3 = tmap[i].realNo;
1126 }
1127}
1128
1129
1130/* Tedious x86-specific hack which compensates for the fact that the
1131 register numbers for %ah .. %dh do not correspond to those for %eax
1132 .. %edx. It maps a (reg size, reg no) pair to the number of the
1133 containing 32-bit reg. */
1134static __inline__
1135Int containingArchRegOf ( Int sz, Int aregno )
1136{
1137 switch (sz) {
1138 case 4: return aregno;
1139 case 2: return aregno;
1140 case 1: return aregno >= 4 ? aregno-4 : aregno;
1141 default: VG_(panic)("containingArchRegOf");
1142 }
1143}
1144
1145
1146/* If u reads an ArchReg, return the number of the containing arch
1147 reg. Otherwise return -1. Used in redundant-PUT elimination. */
1148static __inline__
1149Int maybe_uinstrReadsArchReg ( UInstr* u )
1150{
1151 switch (u->opcode) {
1152 case GET:
1153 case ADD: case ADC: case AND: case OR:
1154 case XOR: case SUB: case SBB:
1155 case SHL: case SHR: case SAR: case ROL:
1156 case ROR: case RCL: case RCR:
1157 if (u->tag1 == ArchReg)
1158 return containingArchRegOf ( u->size, u->val1 );
1159 else
1160 return -1;
1161
1162 case GETF: case PUTF:
1163 case CALLM_S: case CALLM_E:
1164 case INCEIP:
1165 case LEA1:
1166 case LEA2:
1167 case NOP:
1168 case PUT:
1169 case LOAD:
1170 case STORE:
1171 case MOV:
1172 case CMOV:
1173 case JMP:
1174 case CALLM: case CLEAR: case PUSH: case POP:
1175 case NOT: case NEG: case INC: case DEC: case BSWAP:
1176 case CC2VAL:
1177 case JIFZ:
1178 case FPU: case FPU_R: case FPU_W:
1179 case WIDEN:
1180 return -1;
1181
1182 default:
1183 VG_(ppUInstr)(0,u);
1184 VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
1185 }
1186}
1187
1188static __inline__
1189Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1190{
1191 Int i, k;
1192 TempUse tempUse[3];
1193 k = getTempUsage ( u, &tempUse[0] );
1194 for (i = 0; i < k; i++)
1195 if (tempUse[i].tempNo == tempreg)
1196 return True;
1197 return False;
1198}
1199
1200
1201/*------------------------------------------------------------*/
1202/*--- ucode improvement. ---*/
1203/*------------------------------------------------------------*/
1204
1205/* Improve the code in cb by doing
1206 -- Redundant ArchReg-fetch elimination
1207 -- Redundant PUT elimination
1208 -- Redundant cond-code restore/save elimination
1209 The overall effect of these is to allow target registers to be
1210 cached in host registers over multiple target insns.
1211*/
1212static void vg_improve ( UCodeBlock* cb )
1213{
1214 Int i, j, k, m, n, ar, tr, told, actual_areg;
1215 Int areg_map[8];
1216 Bool annul_put[8];
1217 TempUse tempUse[3];
1218 UInstr* u;
1219 Bool wr;
1220 Int* last_live_before;
1221 FlagSet future_dead_flags;
1222
1223 if (cb->nextTemp > 0)
1224 last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
1225 else
1226 last_live_before = NULL;
1227
1228
1229 /* PASS 1: redundant GET elimination. (Actually, more general than
1230 that -- eliminates redundant fetches of ArchRegs). */
1231
1232 /* Find the live-range-ends for all temporaries. Duplicates code
1233 in the register allocator :-( */
1234
1235 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1236
1237 for (i = cb->used-1; i >= 0; i--) {
1238 u = &cb->instrs[i];
1239
1240 k = getTempUsage(u, &tempUse[0]);
1241
1242 /* For each temp usage ... bwds in program order. */
1243 for (j = k-1; j >= 0; j--) {
1244 tr = tempUse[j].tempNo;
1245 wr = tempUse[j].isWrite;
1246 if (last_live_before[tr] == -1) {
1247 vg_assert(tr >= 0 && tr < cb->nextTemp);
1248 last_live_before[tr] = wr ? (i+1) : i;
1249 }
1250 }
1251
1252 }
1253
1254# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1255 { Int q; \
1256 /* Invalidate any old binding(s) to tempreg. */ \
1257 for (q = 0; q < 8; q++) \
1258 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1259 /* Add the new binding. */ \
1260 areg_map[archreg] = (tempreg); \
1261 }
1262
1263 /* Set up the A-reg map. */
1264 for (i = 0; i < 8; i++) areg_map[i] = -1;
1265
1266 /* Scan insns. */
1267 for (i = 0; i < cb->used; i++) {
1268 u = &cb->instrs[i];
1269 if (u->opcode == GET && u->size == 4) {
1270 /* GET; see if it can be annulled. */
1271 vg_assert(u->tag1 == ArchReg);
1272 vg_assert(u->tag2 == TempReg);
1273 ar = u->val1;
1274 tr = u->val2;
1275 told = areg_map[ar];
1276 if (told != -1 && last_live_before[told] <= i) {
1277 /* ar already has an old mapping to told, but that runs
1278 out here. Annul this GET, rename tr to told for the
1279 rest of the block, and extend told's live range to that
1280 of tr. */
1281 u->opcode = NOP;
1282 u->tag1 = u->tag2 = NoValue;
1283 n = last_live_before[tr] + 1;
1284 if (n > cb->used) n = cb->used;
1285 last_live_before[told] = last_live_before[tr];
1286 last_live_before[tr] = i-1;
1287 if (VG_(disassemble))
1288 VG_(printf)(
1289 "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
1290 i, tr, told,i+1, n-1);
1291 for (m = i+1; m < n; m++) {
1292 if (cb->instrs[m].tag1 == TempReg
1293 && cb->instrs[m].val1 == tr)
1294 cb->instrs[m].val1 = told;
1295 if (cb->instrs[m].tag2 == TempReg
1296 && cb->instrs[m].val2 == tr)
1297 cb->instrs[m].val2 = told;
1298 }
1299 BIND_ARCH_TO_TEMP(ar,told);
1300 }
1301 else
1302 BIND_ARCH_TO_TEMP(ar,tr);
1303 }
1304 else if (u->opcode == GET && u->size != 4) {
1305 /* Invalidate any mapping for this archreg. */
1306 actual_areg = containingArchRegOf ( u->size, u->val1 );
1307 areg_map[actual_areg] = -1;
1308 }
1309 else if (u->opcode == PUT && u->size == 4) {
1310 /* PUT; re-establish t -> a binding */
1311 vg_assert(u->tag1 == TempReg);
1312 vg_assert(u->tag2 == ArchReg);
1313 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1314 }
1315 else if (u->opcode == PUT && u->size != 4) {
1316 /* Invalidate any mapping for this archreg. */
1317 actual_areg = containingArchRegOf ( u->size, u->val2 );
1318 areg_map[actual_areg] = -1;
1319 } else {
1320
1321 /* see if insn has an archreg as a read operand; if so try to
1322 map it. */
1323 if (u->tag1 == ArchReg && u->size == 4
1324 && areg_map[u->val1] != -1) {
1325 switch (u->opcode) {
1326 case ADD: case SUB: case AND: case OR: case XOR:
1327 case ADC: case SBB:
1328 case SHL: case SHR: case SAR: case ROL: case ROR:
1329 case RCL: case RCR:
1330 if (VG_(disassemble))
1331 VG_(printf)(
1332 "at %d: change ArchReg %S to TempReg t%d\n",
1333 i, nameIReg(4,u->val1), areg_map[u->val1]);
1334 u->tag1 = TempReg;
1335 u->val1 = areg_map[u->val1];
1336 /* Remember to extend the live range of the TempReg,
1337 if necessary. */
1338 if (last_live_before[u->val1] < i)
1339 last_live_before[u->val1] = i;
1340 break;
1341 default:
1342 break;
1343 }
1344 }
1345
1346 /* boring insn; invalidate any mappings to temps it writes */
1347 k = getTempUsage(u, &tempUse[0]);
1348
1349 for (j = 0; j < k; j++) {
1350 wr = tempUse[j].isWrite;
1351 if (!wr) continue;
1352 tr = tempUse[j].tempNo;
1353 for (m = 0; m < 8; m++)
1354 if (areg_map[m] == tr) areg_map[m] = -1;
1355 }
1356 }
1357
1358 }
1359
1360# undef BIND_ARCH_TO_TEMP
1361
1362 /* PASS 2: redundant PUT elimination. If doing instrumentation,
1363 don't annul (delay) puts of %ESP, since the memory check
1364 machinery always requires the in-memory value of %ESP to be up
1365 to date.
1366 */
1367 for (j = 0; j < 8; j++)
1368 annul_put[j] = False;
1369
1370 for (i = cb->used-1; i >= 0; i--) {
1371 u = &cb->instrs[i];
1372 if (u->opcode == NOP) continue;
1373
1374 if (u->opcode == PUT && u->size == 4) {
1375 vg_assert(u->tag2 == ArchReg);
1376 actual_areg = containingArchRegOf ( 4, u->val2 );
1377 if (annul_put[actual_areg]) {
1378 u->opcode = NOP;
1379 u->tag1 = u->tag2 = NoValue;
1380 if (VG_(disassemble))
1381 VG_(printf)("at %d: delete PUT\n", i );
1382 } else {
1383 if (!(VG_(clo_instrument) && actual_areg == R_ESP))
1384 annul_put[actual_areg] = True;
1385 }
1386 }
1387 else if (u->opcode == PUT && u->size != 4) {
1388 actual_areg = containingArchRegOf ( u->size, u->val2 );
1389 annul_put[actual_areg] = False;
1390 }
1391 else if (u->opcode == JMP || u->opcode == JIFZ
1392 || u->opcode == CALLM) {
1393 for (j = 0; j < 8; j++)
1394 annul_put[j] = False;
1395 }
1396 else {
1397 /* If an instruction reads an ArchReg, the immediately
1398 preceding PUT cannot be annulled. */
1399 actual_areg = maybe_uinstrReadsArchReg ( u );
1400 if (actual_areg != -1)
1401 annul_put[actual_areg] = False;
1402 }
1403 }
1404
1405 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1406 dead after this point, annul the MOV insn and rename t2 to t1.
1407 Further modifies the last_live_before map. */
1408
1409# if 0
1410 VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
1411 for (i = 0; i < cb->nextTemp; i++)
1412 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1413 VG_(printf)("\n");
1414# endif
1415
1416 for (i = 0; i < cb->used-1; i++) {
1417 u = &cb->instrs[i];
1418 if (u->opcode != MOV) continue;
1419 if (u->tag1 == Literal) continue;
1420 vg_assert(u->tag1 == TempReg);
1421 vg_assert(u->tag2 == TempReg);
1422 if (last_live_before[u->val1] == i) {
1423 if (VG_(disassemble))
1424 VG_(printf)(
1425 "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
1426 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1427 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1428 if (cb->instrs[j].tag1 == TempReg
1429 && cb->instrs[j].val1 == u->val2)
1430 cb->instrs[j].val1 = u->val1;
1431 if (cb->instrs[j].tag2 == TempReg
1432 && cb->instrs[j].val2 == u->val2)
1433 cb->instrs[j].val2 = u->val1;
1434 }
1435 last_live_before[u->val1] = last_live_before[u->val2];
1436 last_live_before[u->val2] = i-1;
1437 u->opcode = NOP;
1438 u->tag1 = u->tag2 = NoValue;
1439 }
1440 }
1441
1442 /* PASS 3: redundant condition-code restore/save elimination.
1443 Scan backwards from the end. future_dead_flags records the set
1444 of flags which are dead at this point, that is, will be written
1445 before they are next read. Earlier uinsns which write flags
1446 already in future_dead_flags can have their writes annulled.
1447 */
1448 future_dead_flags = FlagsEmpty;
1449
1450 for (i = cb->used-1; i >= 0; i--) {
1451 u = &cb->instrs[i];
1452
1453 /* We might never make it to insns beyond this one, so be
1454 conservative. */
1455 if (u->opcode == JIFZ || u->opcode == JMP) {
1456 future_dead_flags = FlagsEmpty;
1457 continue;
1458 }
1459
1460 /* We can annul the flags written by this insn if it writes a
1461 subset (or eq) of the set of flags known to be dead after
1462 this insn. If not, just record the flags also written by
1463 this insn.*/
1464 if (u->flags_w != FlagsEmpty
1465 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
1466 if (VG_(disassemble)) {
1467 VG_(printf)("at %d: annul flag write ", i);
1468 vg_ppFlagSet("", u->flags_w);
1469 VG_(printf)(" due to later ");
1470 vg_ppFlagSet("", future_dead_flags);
1471 VG_(printf)("\n");
1472 }
1473 u->flags_w = FlagsEmpty;
1474 } else {
1475 future_dead_flags
1476 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1477 }
1478
1479 /* If this insn also reads flags, empty out future_dead_flags so
1480 as to force preceding writes not to be annulled. */
1481 if (u->flags_r != FlagsEmpty)
1482 future_dead_flags = FlagsEmpty;
1483 }
1484
1485 if (last_live_before)
1486 VG_(jitfree) ( last_live_before );
1487}
1488
1489
1490/*------------------------------------------------------------*/
1491/*--- The new register allocator. ---*/
1492/*------------------------------------------------------------*/
1493
1494typedef
1495 struct {
1496 /* Becomes live for the first time after this insn ... */
1497 Int live_after;
1498 /* Becomes dead for the last time after this insn ... */
1499 Int dead_before;
1500 /* The "home" spill slot, if needed. Never changes. */
1501 Int spill_no;
1502 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1503 Int real_no;
1504 }
1505 TempInfo;
1506
1507
1508/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1509 them in spill locations, and add spill code, if there are not
1510 enough real regs. The usual register allocation deal, in short.
1511
1512 Important redundancy of representation:
1513
1514 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1515 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1516
1517 The .real_no field of a TempInfo gives the current RRR for
1518 this TempReg, or VG_NOVALUE if the TempReg is currently
1519 in memory, in which case it is in the SpillNo denoted by
1520 spillno.
1521
1522 These pieces of information (a fwds-bwds mapping, really) must
1523 be kept consistent!
1524
1525 This allocator uses the so-called Second Chance Bin Packing
1526 algorithm, as described in "Quality and Speed in Linear-scan
1527 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1528 pp142-151). It is simple and fast and remarkably good at
1529 minimising the amount of spill code introduced.
1530*/
1531
1532static
1533UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1534{
1535 TempInfo* temp_info;
1536 Int real_to_temp[VG_MAX_REALREGS];
1537 Bool is_spill_cand[VG_MAX_REALREGS];
1538 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1539 Int i, j, k, m, r, tno, max_ss_no;
1540 Bool wr, defer, isRead, spill_reqd;
1541 TempUse tempUse[3];
1542 UCodeBlock* c2;
1543
1544 /* Used to denote ... well, "no value" in this fn. */
1545# define VG_NOTHING (-2)
1546
1547 /* Initialise the TempReg info. */
1548 if (c1->nextTemp > 0)
1549 temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
1550 else
1551 temp_info = NULL;
1552
1553 for (i = 0; i < c1->nextTemp; i++) {
1554 temp_info[i].live_after = VG_NOTHING;
1555 temp_info[i].dead_before = VG_NOTHING;
1556 temp_info[i].spill_no = VG_NOTHING;
1557 /* temp_info[i].real_no is not yet relevant. */
1558 }
1559
1560 spill_reqd = False;
1561
1562 /* Scan fwds to establish live ranges. */
1563
1564 for (i = 0; i < c1->used; i++) {
1565 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1566 vg_assert(k >= 0 && k <= 3);
1567
1568 /* For each temp usage ... fwds in program order */
1569 for (j = 0; j < k; j++) {
1570 tno = tempUse[j].tempNo;
1571 wr = tempUse[j].isWrite;
1572 if (wr) {
1573 /* Writes hold a reg live until after this insn. */
1574 if (temp_info[tno].live_after == VG_NOTHING)
1575 temp_info[tno].live_after = i;
1576 if (temp_info[tno].dead_before < i + 1)
1577 temp_info[tno].dead_before = i + 1;
1578 } else {
1579 /* First use of a tmp should be a write. */
1580 vg_assert(temp_info[tno].live_after != VG_NOTHING);
1581 /* Reads only hold it live until before this insn. */
1582 if (temp_info[tno].dead_before < i)
1583 temp_info[tno].dead_before = i;
1584 }
1585 }
1586 }
1587
1588# if 0
1589 /* Sanity check on live ranges. Expensive but correct. */
1590 for (i = 0; i < c1->nextTemp; i++) {
1591 vg_assert( (temp_info[i].live_after == VG_NOTHING
1592 && temp_info[i].dead_before == VG_NOTHING)
1593 || (temp_info[i].live_after != VG_NOTHING
1594 && temp_info[i].dead_before != VG_NOTHING) );
1595 }
1596# endif
1597
1598 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1599 We put as few as possible values in spill slots, but
1600 nevertheless need to have an assignment to them just in case. */
1601
1602 max_ss_no = -1;
1603
1604 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1605 ss_busy_until_before[i] = 0;
1606
1607 for (i = 0; i < c1->nextTemp; i++) {
1608
1609 /* True iff this temp is unused. */
1610 if (temp_info[i].live_after == VG_NOTHING)
1611 continue;
1612
1613 /* Find the lowest-numbered spill slot which is available at the
1614 start point of this interval, and assign the interval to
1615 it. */
1616 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1617 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1618 break;
1619 if (j == VG_MAX_SPILLSLOTS) {
1620 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
1621 VG_(panic)("register allocation failed -- out of spill slots");
1622 }
1623 ss_busy_until_before[j] = temp_info[i].dead_before;
1624 temp_info[i].spill_no = j;
1625 if (j > max_ss_no)
1626 max_ss_no = j;
1627 }
1628
1629 VG_(total_reg_rank) += (max_ss_no+1);
1630
1631 /* Show live ranges and assigned spill slot nos. */
1632
1633 if (VG_(disassemble)) {
1634 VG_(printf)("Live Range Assignments\n");
1635
1636 for (i = 0; i < c1->nextTemp; i++) {
1637 if (temp_info[i].live_after == VG_NOTHING)
1638 continue;
1639 VG_(printf)(
1640 " LR %d is after %d to before %d spillno %d\n",
1641 i,
1642 temp_info[i].live_after,
1643 temp_info[i].dead_before,
1644 temp_info[i].spill_no
1645 );
1646 }
1647 }
1648
1649 /* Now that we've established a spill slot number for each used
1650 temporary, we can go ahead and do the core of the "Second-chance
1651 binpacking" allocation algorithm. */
1652
1653 /* Resulting code goes here. We generate it all in a forwards
1654 pass. */
njn4f9c9342002-04-29 16:03:24 +00001655 c2 = VG_(allocCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00001656
1657 /* At the start, no TempRegs are assigned to any real register.
1658 Correspondingly, all temps claim to be currently resident in
1659 their spill slots, as computed by the previous two passes. */
1660 for (i = 0; i < VG_MAX_REALREGS; i++)
1661 real_to_temp[i] = VG_NOTHING;
1662 for (i = 0; i < c1->nextTemp; i++)
1663 temp_info[i].real_no = VG_NOTHING;
1664
1665 if (VG_(disassemble))
1666 VG_(printf)("\n");
1667
1668 /* Process each insn in turn. */
1669 for (i = 0; i < c1->used; i++) {
1670
1671 if (c1->instrs[i].opcode == NOP) continue;
1672 VG_(uinstrs_prealloc)++;
1673
1674# if 0
1675 /* Check map consistency. Expensive but correct. */
1676 for (r = 0; r < VG_MAX_REALREGS; r++) {
1677 if (real_to_temp[r] != VG_NOTHING) {
1678 tno = real_to_temp[r];
1679 vg_assert(tno >= 0 && tno < c1->nextTemp);
1680 vg_assert(temp_info[tno].real_no == r);
1681 }
1682 }
1683 for (tno = 0; tno < c1->nextTemp; tno++) {
1684 if (temp_info[tno].real_no != VG_NOTHING) {
1685 r = temp_info[tno].real_no;
1686 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
1687 vg_assert(real_to_temp[r] == tno);
1688 }
1689 }
1690# endif
1691
1692 if (VG_(disassemble))
1693 VG_(ppUInstr)(i, &c1->instrs[i]);
1694
1695 /* First, free up enough real regs for this insn. This may
1696 generate spill stores since we may have to evict some TempRegs
1697 currently in real regs. Also generates spill loads. */
1698
1699 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1700 vg_assert(k >= 0 && k <= 3);
1701
1702 /* For each ***different*** temp mentioned in the insn .... */
1703 for (j = 0; j < k; j++) {
1704
1705 /* First check if the temp is mentioned again later; if so,
1706 ignore this mention. We only want to process each temp
1707 used by the insn once, even if it is mentioned more than
1708 once. */
1709 defer = False;
1710 tno = tempUse[j].tempNo;
1711 for (m = j+1; m < k; m++)
1712 if (tempUse[m].tempNo == tno)
1713 defer = True;
1714 if (defer)
1715 continue;
1716
1717 /* Now we're trying to find a register for tempUse[j].tempNo.
1718 First of all, if it already has a register assigned, we
1719 don't need to do anything more. */
1720 if (temp_info[tno].real_no != VG_NOTHING)
1721 continue;
1722
1723 /* No luck. The next thing to do is see if there is a
1724 currently unassigned register available. If so, bag it. */
1725 for (r = 0; r < VG_MAX_REALREGS; r++) {
1726 if (real_to_temp[r] == VG_NOTHING)
1727 break;
1728 }
1729 if (r < VG_MAX_REALREGS) {
1730 real_to_temp[r] = tno;
1731 temp_info[tno].real_no = r;
1732 continue;
1733 }
1734
1735 /* Unfortunately, that didn't pan out either. So we'll have
1736 to eject some other unfortunate TempReg into a spill slot
1737 in order to free up a register. Of course, we need to be
1738 careful not to eject some other TempReg needed by this
1739 insn.
1740
1741 Select r in 0 .. VG_MAX_REALREGS-1 such that
1742 real_to_temp[r] is not mentioned in
1743 tempUse[0 .. k-1].tempNo, since it would be just plain
1744 wrong to eject some other TempReg which we need to use in
1745 this insn.
1746
1747 It is here that it is important to make a good choice of
1748 register to spill. */
1749
1750 /* First, mark those regs which are not spill candidates. */
1751 for (r = 0; r < VG_MAX_REALREGS; r++) {
1752 is_spill_cand[r] = True;
1753 for (m = 0; m < k; m++) {
1754 if (real_to_temp[r] == tempUse[m].tempNo) {
1755 is_spill_cand[r] = False;
1756 break;
1757 }
1758 }
1759 }
1760
1761 /* We can choose any r satisfying is_spill_cand[r]. However,
1762 try to make a good choice. First, try and find r such
1763 that the associated TempReg is already dead. */
1764 for (r = 0; r < VG_MAX_REALREGS; r++) {
1765 if (is_spill_cand[r] &&
1766 temp_info[real_to_temp[r]].dead_before <= i)
1767 goto have_spill_cand;
1768 }
1769
1770 /* No spill cand is mapped to a dead TempReg. Now we really
1771 _do_ have to generate spill code. Choose r so that the
1772 next use of its associated TempReg is as far ahead as
1773 possible, in the hope that this will minimise the number of
1774 consequent reloads required. This is a bit expensive, but
1775 we don't have to do it very often. */
1776 {
1777 Int furthest_r = VG_MAX_REALREGS;
1778 Int furthest = 0;
1779 for (r = 0; r < VG_MAX_REALREGS; r++) {
1780 if (!is_spill_cand[r]) continue;
1781 for (m = i+1; m < c1->used; m++)
1782 if (uInstrMentionsTempReg(&c1->instrs[m],
1783 real_to_temp[r]))
1784 break;
1785 if (m > furthest) {
1786 furthest = m;
1787 furthest_r = r;
1788 }
1789 }
1790 r = furthest_r;
1791 goto have_spill_cand;
1792 }
1793
1794 have_spill_cand:
1795 if (r == VG_MAX_REALREGS)
1796 VG_(panic)("new reg alloc: out of registers ?!");
1797
1798 /* Eject r. Important refinement: don't bother if the
1799 associated TempReg is now dead. */
1800 vg_assert(real_to_temp[r] != VG_NOTHING);
1801 vg_assert(real_to_temp[r] != tno);
1802 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
1803 if (temp_info[real_to_temp[r]].dead_before > i) {
1804 uInstr2(c2, PUT, 4,
1805 RealReg, VG_(rankToRealRegNo)(r),
1806 SpillNo, temp_info[real_to_temp[r]].spill_no);
1807 VG_(uinstrs_spill)++;
1808 spill_reqd = True;
1809 if (VG_(disassemble))
1810 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1811 }
1812
1813 /* Decide if tno is read. */
1814 isRead = False;
1815 for (m = 0; m < k; m++)
1816 if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
1817 isRead = True;
1818
1819 /* If so, generate a spill load. */
1820 if (isRead) {
1821 uInstr2(c2, GET, 4,
1822 SpillNo, temp_info[tno].spill_no,
1823 RealReg, VG_(rankToRealRegNo)(r) );
1824 VG_(uinstrs_spill)++;
1825 spill_reqd = True;
1826 if (VG_(disassemble))
1827 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1828 }
1829
1830 /* Update the forwards and backwards maps. */
1831 real_to_temp[r] = tno;
1832 temp_info[tno].real_no = r;
1833 }
1834
1835 /* By this point, all TempRegs mentioned by the insn have been
1836 bought into real regs. We now copy the insn to the output
1837 and use patchUInstr to convert its rTempRegs into
1838 realregs. */
1839 for (j = 0; j < k; j++)
1840 tempUse[j].realNo
1841 = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
njn4f9c9342002-04-29 16:03:24 +00001842 VG_(copyUInstr)(c2, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00001843 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
1844
1845 if (VG_(disassemble)) {
1846 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1847 VG_(printf)("\n");
1848 }
1849 }
1850
1851 if (temp_info != NULL)
1852 VG_(jitfree)(temp_info);
1853
njn4f9c9342002-04-29 16:03:24 +00001854 VG_(freeCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00001855
1856 if (spill_reqd)
1857 VG_(translations_needing_spill)++;
1858
1859 return c2;
1860
1861# undef VG_NOTHING
1862
1863}
1864
1865
1866/*------------------------------------------------------------*/
1867/*--- New instrumentation machinery. ---*/
1868/*------------------------------------------------------------*/
1869
1870static
1871VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
1872{
1873 switch (sz) {
1874 case 4: return VgT_ImproveOR4_TQ;
1875 case 2: return VgT_ImproveOR2_TQ;
1876 case 1: return VgT_ImproveOR1_TQ;
1877 default: VG_(panic)("get_VgT_ImproveOR_TQ");
1878 }
1879}
1880
1881
1882static
1883VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
1884{
1885 switch (sz) {
1886 case 4: return VgT_ImproveAND4_TQ;
1887 case 2: return VgT_ImproveAND2_TQ;
1888 case 1: return VgT_ImproveAND1_TQ;
1889 default: VG_(panic)("get_VgT_ImproveAND_TQ");
1890 }
1891}
1892
1893
1894static
1895VgTagOp get_VgT_Left ( Int sz )
1896{
1897 switch (sz) {
1898 case 4: return VgT_Left4;
1899 case 2: return VgT_Left2;
1900 case 1: return VgT_Left1;
1901 default: VG_(panic)("get_VgT_Left");
1902 }
1903}
1904
1905
1906static
1907VgTagOp get_VgT_UifU ( Int sz )
1908{
1909 switch (sz) {
1910 case 4: return VgT_UifU4;
1911 case 2: return VgT_UifU2;
1912 case 1: return VgT_UifU1;
1913 case 0: return VgT_UifU0;
1914 default: VG_(panic)("get_VgT_UifU");
1915 }
1916}
1917
1918
1919static
1920VgTagOp get_VgT_DifD ( Int sz )
1921{
1922 switch (sz) {
1923 case 4: return VgT_DifD4;
1924 case 2: return VgT_DifD2;
1925 case 1: return VgT_DifD1;
1926 default: VG_(panic)("get_VgT_DifD");
1927 }
1928}
1929
1930
1931static
1932VgTagOp get_VgT_PCast ( Int szs, Int szd )
1933{
1934 if (szs == 4 && szd == 0) return VgT_PCast40;
1935 if (szs == 2 && szd == 0) return VgT_PCast20;
1936 if (szs == 1 && szd == 0) return VgT_PCast10;
1937 if (szs == 0 && szd == 1) return VgT_PCast01;
1938 if (szs == 0 && szd == 2) return VgT_PCast02;
1939 if (szs == 0 && szd == 4) return VgT_PCast04;
1940 if (szs == 1 && szd == 4) return VgT_PCast14;
1941 if (szs == 1 && szd == 2) return VgT_PCast12;
1942 if (szs == 1 && szd == 1) return VgT_PCast11;
1943 VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
1944 VG_(panic)("get_VgT_PCast");
1945}
1946
1947
1948static
1949VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
1950{
1951 if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
1952 if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
1953
1954 if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
1955 if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
1956
1957 if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
1958 if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
1959
1960 VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
1961 VG_(panic)("get_VgT_Widen");
1962}
1963
1964/* Pessimally cast the spec'd shadow from one size to another. */
1965static
1966void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
1967{
1968 if (szs == 0 && szd == 0)
1969 return;
1970 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1971 NoValue, 0,
1972 Lit16, get_VgT_PCast(szs,szd));
1973}
1974
1975
1976/* Create a signed or unsigned widen of the spec'd shadow from one
1977 size to another. The only allowed size transitions are 1->2, 1->4
1978 and 2->4. */
1979static
1980void create_Widen ( UCodeBlock* cb, Bool signed_widen,
1981 Int szs, Int szd, Int tempreg )
1982{
1983 if (szs == szd) return;
1984 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1985 NoValue, 0,
1986 Lit16, get_VgT_Widen(signed_widen,szs,szd));
1987}
1988
1989
1990/* Get the condition codes into a new shadow, at the given size. */
1991static
1992Int create_GETVF ( UCodeBlock* cb, Int sz )
1993{
1994 Int tt = newShadow(cb);
1995 uInstr1(cb, GETVF, 0, TempReg, tt);
1996 create_PCast(cb, 0, sz, tt);
1997 return tt;
1998}
1999
2000
2001/* Save the condition codes from the spec'd shadow. */
2002static
2003void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
2004{
2005 if (sz == 0) {
2006 uInstr1(cb, PUTVF, 0, TempReg, tempreg);
2007 } else {
2008 Int tt = newShadow(cb);
2009 uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
2010 create_PCast(cb, sz, 0, tt);
2011 uInstr1(cb, PUTVF, 0, TempReg, tt);
2012 }
2013}
2014
2015
2016/* Do Left on the spec'd shadow. */
2017static
2018void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
2019{
2020 uInstr3(cb, TAG1, 0,
2021 TempReg, tempreg,
2022 NoValue, 0,
2023 Lit16, get_VgT_Left(sz));
2024}
2025
2026
2027/* Do UifU on ts and td, putting the result in td. */
2028static
2029void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
2030{
2031 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2032 Lit16, get_VgT_UifU(sz));
2033}
2034
2035
2036/* Do DifD on ts and td, putting the result in td. */
2037static
2038void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
2039{
2040 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2041 Lit16, get_VgT_DifD(sz));
2042}
2043
2044
2045/* Do HelpAND on value tval and tag tqqq, putting the result in
2046 tqqq. */
2047static
2048void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2049{
2050 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2051 Lit16, get_VgT_ImproveAND_TQ(sz));
2052}
2053
2054
2055/* Do HelpOR on value tval and tag tqqq, putting the result in
2056 tqqq. */
2057static
2058void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2059{
2060 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2061 Lit16, get_VgT_ImproveOR_TQ(sz));
2062}
2063
2064
2065/* Get the shadow for an operand described by (tag, val). Emit code
2066 to do this and return the identity of the shadow holding the
2067 result. The result tag is always copied into a new shadow, so it
2068 can be modified without trashing the original.*/
2069static
2070Int /* TempReg */ getOperandShadow ( UCodeBlock* cb,
2071 Int sz, Int tag, Int val )
2072{
2073 Int sh;
2074 sh = newShadow(cb);
2075 if (tag == TempReg) {
2076 uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
2077 return sh;
2078 }
2079 if (tag == Literal) {
2080 uInstr1(cb, SETV, sz, TempReg, sh);
2081 return sh;
2082 }
2083 if (tag == ArchReg) {
2084 uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
2085 return sh;
2086 }
2087 VG_(panic)("getOperandShadow");
2088}
2089
2090
2091
2092/* Create and return an instrumented version of cb_in. Free cb_in
2093 before returning. */
2094static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
2095{
2096 UCodeBlock* cb;
2097 Int i, j;
2098 UInstr* u_in;
2099 Int qs, qd, qt, qtt;
njn4f9c9342002-04-29 16:03:24 +00002100 cb = VG_(allocCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00002101 cb->nextTemp = cb_in->nextTemp;
2102
2103 for (i = 0; i < cb_in->used; i++) {
2104 qs = qd = qt = qtt = INVALID_TEMPREG;
2105 u_in = &cb_in->instrs[i];
2106
2107 /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
2108
2109 /* VG_(ppUInstr)(0, u_in); */
2110 switch (u_in->opcode) {
2111
2112 case NOP:
2113 break;
2114
2115 case INCEIP:
njn4f9c9342002-04-29 16:03:24 +00002116 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002117 break;
2118
sewardj97ced732002-03-25 00:07:36 +00002119 /* Loads and stores. Test the V bits for the address. 24
2120 Mar 02: since the address is A-checked anyway, there's not
2121 really much point in doing the V-check too, unless you
2122 think that you might use addresses which are undefined but
2123 still addressible. Hence the optionalisation of the V
2124 check.
2125
sewardjde4a1d02002-03-22 01:27:54 +00002126 The LOADV/STOREV does an addressibility check for the
2127 address. */
sewardj97ced732002-03-25 00:07:36 +00002128
sewardjde4a1d02002-03-22 01:27:54 +00002129 case LOAD:
sewardj97ced732002-03-25 00:07:36 +00002130 if (VG_(clo_check_addrVs)) {
2131 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2132 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2133 }
sewardjde4a1d02002-03-22 01:27:54 +00002134 uInstr2(cb, LOADV, u_in->size,
2135 TempReg, u_in->val1,
2136 TempReg, SHADOW(u_in->val2));
njn4f9c9342002-04-29 16:03:24 +00002137 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002138 break;
2139 case STORE:
sewardj97ced732002-03-25 00:07:36 +00002140 if (VG_(clo_check_addrVs)) {
2141 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2142 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
2143 }
sewardjde4a1d02002-03-22 01:27:54 +00002144 uInstr2(cb, STOREV, u_in->size,
2145 TempReg, SHADOW(u_in->val1),
2146 TempReg, u_in->val2);
njn4f9c9342002-04-29 16:03:24 +00002147 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002148 break;
2149
2150 /* Moving stuff around. Make the V bits follow accordingly,
2151 but don't do anything else. */
2152
2153 case GET:
2154 uInstr2(cb, GETV, u_in->size,
2155 ArchReg, u_in->val1,
2156 TempReg, SHADOW(u_in->val2));
njn4f9c9342002-04-29 16:03:24 +00002157 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002158 break;
2159 case PUT:
2160 uInstr2(cb, PUTV, u_in->size,
2161 TempReg, SHADOW(u_in->val1),
2162 ArchReg, u_in->val2);
njn4f9c9342002-04-29 16:03:24 +00002163 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002164 break;
2165
2166 case GETF:
2167 /* This is not the smartest way to do it, but should work. */
2168 qd = create_GETVF(cb, u_in->size);
2169 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002170 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002171 break;
2172 case PUTF:
2173 create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002174 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002175 break;
2176
2177 case MOV:
2178 switch (u_in->tag1) {
2179 case TempReg:
2180 uInstr2(cb, MOV, 4,
2181 TempReg, SHADOW(u_in->val1),
2182 TempReg, SHADOW(u_in->val2));
2183 break;
2184 case Literal:
2185 uInstr1(cb, SETV, u_in->size,
2186 TempReg, SHADOW(u_in->val2));
2187 break;
2188 default:
2189 VG_(panic)("vg_instrument: MOV");
2190 }
njn4f9c9342002-04-29 16:03:24 +00002191 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002192 break;
2193
2194 /* Special case of add, where one of the operands is a literal.
2195 lea1(t) = t + some literal.
2196 Therefore: lea1#(qa) = left(qa)
2197 */
2198 case LEA1:
2199 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2200 qs = SHADOW(u_in->val1);
2201 qd = SHADOW(u_in->val2);
2202 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
2203 create_Left(cb, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002204 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002205 break;
2206
2207 /* Another form of add.
2208 lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
2209 and is 0,1,2 or 3.
2210 lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
2211 Note, subtly, that the shift puts zeroes at the bottom of qt,
2212 meaning Valid, since the corresponding shift of tt puts
2213 zeroes at the bottom of tb.
2214 */
2215 case LEA2: {
2216 Int shift;
2217 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2218 switch (u_in->extra4b) {
2219 case 1: shift = 0; break;
2220 case 2: shift = 1; break;
2221 case 4: shift = 2; break;
2222 case 8: shift = 3; break;
2223 default: VG_(panic)( "vg_instrument(LEA2)" );
2224 }
2225 qs = SHADOW(u_in->val1);
2226 qt = SHADOW(u_in->val2);
2227 qd = SHADOW(u_in->val3);
2228 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
2229 if (shift > 0) {
2230 uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
2231 uLiteral(cb, shift);
2232 }
2233 create_UifU(cb, 4, qs, qd);
2234 create_Left(cb, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002235 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002236 break;
2237 }
2238
2239 /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
2240 case INC: case DEC:
2241 qd = SHADOW(u_in->val1);
2242 create_Left(cb, u_in->size, qd);
2243 if (u_in->flags_w != FlagsEmpty)
2244 create_PUTVF(cb, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002245 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002246 break;
2247
2248 /* This is a HACK (approximation :-) */
2249 /* rcl#/rcr#(qs,qd)
2250 = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
2251 eflags# = q0
2252 qd =pcast-0-sz(q0)
2253 Ie, cast everything down to a single bit, then back up.
2254 This assumes that any bad bits infect the whole word and
2255 the eflags.
2256 */
2257 case RCL: case RCR:
2258 vg_assert(u_in->flags_r != FlagsEmpty);
2259 /* The following assertion looks like it makes sense, but is
2260 actually wrong. Consider this:
2261 rcll %eax
2262 imull %eax, %eax
2263 The rcll writes O and C but so does the imull, so the O and C
2264 write of the rcll is annulled by the prior improvement pass.
2265 Noticed by Kevin Ryde <user42@zip.com.au>
2266 */
2267 /* vg_assert(u_in->flags_w != FlagsEmpty); */
2268 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2269 /* We can safely modify qs; cast it to 0-size. */
2270 create_PCast(cb, u_in->size, 0, qs);
2271 qd = SHADOW(u_in->val2);
2272 create_PCast(cb, u_in->size, 0, qd);
2273 /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
2274 create_UifU(cb, 0, qs, qd);
2275 /* qs is now free; reuse it for the flag definedness. */
2276 qs = create_GETVF(cb, 0);
2277 create_UifU(cb, 0, qs, qd);
2278 create_PUTVF(cb, 0, qd);
2279 create_PCast(cb, 0, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002280 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002281 break;
2282
2283 /* for OP in shl shr sar rol ror
2284 (qs is shift count#, qd is value to be OP#d)
2285 OP(ts,td)
2286 OP#(qs,qd)
2287 = pcast-1-sz(qs) `UifU` OP(ts,qd)
2288 So we apply OP to the tag bits too, and then UifU with
2289 the shift count# to take account of the possibility of it
2290 being undefined.
2291
2292 A bit subtle:
2293 ROL/ROR rearrange the tag bits as per the value bits.
2294 SHL/SHR shifts zeroes into the value, and corresponding
2295 zeroes indicating Definedness into the tag.
2296 SAR copies the top bit of the value downwards, and therefore
2297 SAR also copies the definedness of the top bit too.
2298 So in all five cases, we just apply the same op to the tag
2299 bits as is applied to the value bits. Neat!
2300 */
2301 case SHL:
2302 case SHR: case SAR:
2303 case ROL: case ROR: {
2304 Int t_amount = INVALID_TEMPREG;
2305 vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
2306 vg_assert(u_in->tag2 == TempReg);
2307 qd = SHADOW(u_in->val2);
2308
2309 /* Make qs hold shift-count# and make
2310 t_amount be a TempReg holding the shift count. */
2311 if (u_in->tag1 == Literal) {
2312 t_amount = newTemp(cb);
2313 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
2314 uLiteral(cb, u_in->lit32);
2315 qs = SHADOW(t_amount);
2316 uInstr1(cb, SETV, 1, TempReg, qs);
2317 } else {
2318 t_amount = u_in->val1;
2319 qs = SHADOW(u_in->val1);
2320 }
2321
2322 uInstr2(cb, u_in->opcode,
2323 u_in->size,
2324 TempReg, t_amount,
2325 TempReg, qd);
2326 qt = newShadow(cb);
2327 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2328 create_PCast(cb, 1, u_in->size, qt);
2329 create_UifU(cb, u_in->size, qt, qd);
njn4f9c9342002-04-29 16:03:24 +00002330 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002331 break;
2332 }
2333
2334 /* One simple tag operation. */
2335 case WIDEN:
2336 vg_assert(u_in->tag1 == TempReg);
2337 create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
2338 SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002339 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002340 break;
2341
2342 /* not#(x) = x (since bitwise independent) */
2343 case NOT:
2344 vg_assert(u_in->tag1 == TempReg);
njn4f9c9342002-04-29 16:03:24 +00002345 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002346 break;
2347
2348 /* neg#(x) = left(x) (derivable from case for SUB) */
2349 case NEG:
2350 vg_assert(u_in->tag1 == TempReg);
2351 create_Left(cb, u_in->size, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002352 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002353 break;
2354
2355 /* bswap#(x) = bswap(x) */
2356 case BSWAP:
2357 vg_assert(u_in->tag1 == TempReg);
2358 vg_assert(u_in->size == 4);
2359 qd = SHADOW(u_in->val1);
2360 uInstr1(cb, BSWAP, 4, TempReg, qd);
njn4f9c9342002-04-29 16:03:24 +00002361 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002362 break;
2363
2364 /* cc2val#(qd) = pcast-0-to-size(eflags#) */
2365 case CC2VAL:
2366 vg_assert(u_in->tag1 == TempReg);
2367 vg_assert(u_in->flags_r != FlagsEmpty);
2368 qt = create_GETVF(cb, u_in->size);
2369 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002370 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002371 break;
2372
2373 /* cmov#(qs,qd) = cmov(qs,qd)
2374 That is, do the cmov of tags using the same flags as for
2375 the data (obviously). However, first do a test on the
2376 validity of the flags.
2377 */
2378 case CMOV:
2379 vg_assert(u_in->size == 4);
2380 vg_assert(u_in->tag1 == TempReg);
2381 vg_assert(u_in->tag2 == TempReg);
2382 vg_assert(u_in->flags_r != FlagsEmpty);
2383 vg_assert(u_in->flags_w == FlagsEmpty);
2384 qs = SHADOW(u_in->val1);
2385 qd = SHADOW(u_in->val2);
2386 qt = create_GETVF(cb, 0);
2387 uInstr1(cb, TESTV, 0, TempReg, qt);
2388 /* qt should never be referred to again. Nevertheless
2389 ... */
2390 uInstr1(cb, SETV, 0, TempReg, qt);
2391
2392 uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
2393 LAST_UINSTR(cb).cond = u_in->cond;
2394 LAST_UINSTR(cb).flags_r = u_in->flags_r;
2395
njn4f9c9342002-04-29 16:03:24 +00002396 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002397 break;
2398
2399 /* add#/sub#(qs,qd)
2400 = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
2401 = left(qs) `UifU` left(qd)
2402 = left(qs `UifU` qd)
2403 adc#/sbb#(qs,qd)
2404 = left(qs `UifU` qd) `UifU` pcast(eflags#)
2405 Second arg (dest) is TempReg.
2406 First arg (src) is Literal or TempReg or ArchReg.
2407 */
2408 case ADD: case SUB:
2409 case ADC: case SBB:
2410 qd = SHADOW(u_in->val2);
2411 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2412 create_UifU(cb, u_in->size, qs, qd);
2413 create_Left(cb, u_in->size, qd);
2414 if (u_in->opcode == ADC || u_in->opcode == SBB) {
2415 vg_assert(u_in->flags_r != FlagsEmpty);
2416 qt = create_GETVF(cb, u_in->size);
2417 create_UifU(cb, u_in->size, qt, qd);
2418 }
2419 if (u_in->flags_w != FlagsEmpty) {
2420 create_PUTVF(cb, u_in->size, qd);
2421 }
njn4f9c9342002-04-29 16:03:24 +00002422 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002423 break;
2424
2425 /* xor#(qs,qd) = qs `UifU` qd */
2426 case XOR:
2427 qd = SHADOW(u_in->val2);
2428 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2429 create_UifU(cb, u_in->size, qs, qd);
2430 if (u_in->flags_w != FlagsEmpty) {
2431 create_PUTVF(cb, u_in->size, qd);
2432 }
njn4f9c9342002-04-29 16:03:24 +00002433 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002434 break;
2435
2436 /* and#/or#(qs,qd)
2437 = (qs `UifU` qd) `DifD` improve(vs,qs)
2438 `DifD` improve(vd,qd)
2439 where improve is the relevant one of
2440 Improve{AND,OR}_TQ
2441 Use the following steps, with qt as a temp:
2442 qt = improve(vd,qd)
2443 qd = qs `UifU` qd
2444 qd = qt `DifD` qd
2445 qt = improve(vs,qs)
2446 qd = qt `DifD` qd
2447 */
2448 case AND: case OR:
2449 vg_assert(u_in->tag1 == TempReg);
2450 vg_assert(u_in->tag2 == TempReg);
2451 qd = SHADOW(u_in->val2);
2452 qs = SHADOW(u_in->val1);
2453 qt = newShadow(cb);
2454
2455 /* qt = improve(vd,qd) */
2456 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
2457 if (u_in->opcode == AND)
2458 create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
2459 else
2460 create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
2461 /* qd = qs `UifU` qd */
2462 create_UifU(cb, u_in->size, qs, qd);
2463 /* qd = qt `DifD` qd */
2464 create_DifD(cb, u_in->size, qt, qd);
2465 /* qt = improve(vs,qs) */
2466 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2467 if (u_in->opcode == AND)
2468 create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
2469 else
2470 create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
2471 /* qd = qt `DifD` qd */
2472 create_DifD(cb, u_in->size, qt, qd);
2473 /* So, finally qd is the result tag. */
2474 if (u_in->flags_w != FlagsEmpty) {
2475 create_PUTVF(cb, u_in->size, qd);
2476 }
njn4f9c9342002-04-29 16:03:24 +00002477 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002478 break;
2479
2480 /* Machinery to do with supporting CALLM. Copy the start and
2481 end markers only to make the result easier to read
2482 (debug); they generate no code and have no effect.
2483 */
2484 case CALLM_S: case CALLM_E:
njn4f9c9342002-04-29 16:03:24 +00002485 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002486 break;
2487
2488 /* Copy PUSH and POP verbatim. Arg/result absval
2489 calculations are done when the associated CALL is
2490 processed. CLEAR has no effect on absval calculations but
2491 needs to be copied.
2492 */
2493 case PUSH: case POP: case CLEAR:
njn4f9c9342002-04-29 16:03:24 +00002494 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002495 break;
2496
2497 /* In short:
2498 callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
2499 We have to decide on a size to do the computation at,
2500 although the choice doesn't affect correctness. We will
2501 do a pcast to the final size anyway, so the only important
2502 factor is to choose a size which minimises the total
2503 number of casts needed. Valgrind: just use size 0,
2504 regardless. It may not be very good for performance
2505 but does simplify matters, mainly by reducing the number
2506 of different pessimising casts which have to be implemented.
2507 */
2508 case CALLM: {
2509 UInstr* uu;
2510 Bool res_used;
2511
2512 /* Now generate the code. Get the final result absval
2513 into qt. */
2514 qt = newShadow(cb);
2515 qtt = newShadow(cb);
2516 uInstr1(cb, SETV, 0, TempReg, qt);
2517 for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
2518 uu = & cb_in->instrs[j];
2519 if (uu->opcode != PUSH) continue;
2520 /* cast via a temporary */
2521 uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
2522 TempReg, qtt);
2523 create_PCast(cb, uu->size, 0, qtt);
2524 create_UifU(cb, 0, qtt, qt);
2525 }
2526 /* Remembering also that flags read count as inputs. */
2527 if (u_in->flags_r != FlagsEmpty) {
2528 qtt = create_GETVF(cb, 0);
2529 create_UifU(cb, 0, qtt, qt);
2530 }
2531
2532 /* qt now holds the result tag. If any results from the
2533 call are used, either by fetching with POP or
2534 implicitly by writing the flags, we copy the result
2535 absval to the relevant location. If not used, the call
2536 must have been for its side effects, so we test qt here
2537 and now. Note that this assumes that all values
2538 removed by POP continue to be live. So dead args
2539 *must* be removed with CLEAR, not by POPping them into
2540 a dummy tempreg.
2541 */
2542 res_used = False;
2543 for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
2544 uu = & cb_in->instrs[j];
2545 if (uu->opcode != POP) continue;
2546 /* Cast via a temp. */
2547 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
2548 create_PCast(cb, 0, uu->size, qtt);
2549 uInstr2(cb, MOV, 4, TempReg, qtt,
2550 TempReg, SHADOW(uu->val1));
2551 res_used = True;
2552 }
2553 if (u_in->flags_w != FlagsEmpty) {
2554 create_PUTVF(cb, 0, qt);
2555 res_used = True;
2556 }
2557 if (!res_used) {
2558 uInstr1(cb, TESTV, 0, TempReg, qt);
2559 /* qt should never be referred to again. Nevertheless
2560 ... */
2561 uInstr1(cb, SETV, 0, TempReg, qt);
2562 }
njn4f9c9342002-04-29 16:03:24 +00002563 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002564 break;
2565 }
2566 /* Whew ... */
2567
2568 case JMP:
2569 if (u_in->tag1 == TempReg) {
2570 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2571 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2572 } else {
2573 vg_assert(u_in->tag1 == Literal);
2574 }
2575 if (u_in->cond != CondAlways) {
2576 vg_assert(u_in->flags_r != FlagsEmpty);
2577 qt = create_GETVF(cb, 0);
2578 uInstr1(cb, TESTV, 0, TempReg, qt);
2579 /* qt should never be referred to again. Nevertheless
2580 ... */
2581 uInstr1(cb, SETV, 0, TempReg, qt);
2582 }
njn4f9c9342002-04-29 16:03:24 +00002583 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002584 break;
2585
2586 case JIFZ:
2587 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2588 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002589 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002590 break;
2591
2592 /* Emit a check on the address used. For FPU_R, the value
2593 loaded into the FPU is checked at the time it is read from
2594 memory (see synth_fpu_mem_check_actions). */
2595 case FPU_R: case FPU_W:
2596 vg_assert(u_in->tag2 == TempReg);
2597 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2598 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
njn4f9c9342002-04-29 16:03:24 +00002599 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002600 break;
2601
2602 /* For FPU insns not referencing memory, just copy thru. */
2603 case FPU:
njn4f9c9342002-04-29 16:03:24 +00002604 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002605 break;
2606
2607 default:
2608 VG_(ppUInstr)(0, u_in);
2609 VG_(panic)( "vg_instrument: unhandled case");
2610
2611 } /* end of switch (u_in->opcode) */
2612
2613 } /* end of for loop */
2614
njn4f9c9342002-04-29 16:03:24 +00002615 VG_(freeCodeBlock)(cb_in);
sewardjde4a1d02002-03-22 01:27:54 +00002616 return cb;
2617}
2618
2619/*------------------------------------------------------------*/
2620/*--- Clean up mem check instrumentation. ---*/
2621/*------------------------------------------------------------*/
2622
2623#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
2624#define VGC_UNDEF ((UChar)100)
2625#define VGC_VALUE ((UChar)101)
2626
2627#define NOP_no_msg(uu) \
2628 do { uu->opcode = NOP; } while (False)
2629
2630#define NOP_tag1_op(uu) \
2631 do { uu->opcode = NOP; \
2632 if (VG_(disassemble)) \
2633 VG_(printf)("at %d: delete %s due to defd arg\n", \
2634 i, VG_(nameOfTagOp(u->val3))); \
2635 } while (False)
2636
2637#define SETV_tag1_op(uu,newsz) \
2638 do { uu->opcode = SETV; \
2639 uu->size = newsz; \
2640 uu->tag2 = uu->tag3 = NoValue; \
2641 if (VG_(disassemble)) \
2642 VG_(printf)("at %d: convert %s to SETV%d " \
2643 "due to defd arg\n", \
2644 i, VG_(nameOfTagOp(u->val3)), newsz); \
2645 } while (False)
2646
2647
2648
2649/* Run backwards and delete SETVs on shadow temps for which the next
2650 action is a write. Needs an env saying whether or not the next
2651 action is a write. The supplied UCodeBlock is destructively
2652 modified.
2653*/
2654static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
2655{
2656 Bool* next_is_write;
2657 Int i, j, k, n_temps;
2658 UInstr* u;
2659 TempUse tempUse[3];
2660
2661 n_temps = cb->nextTemp;
2662 if (n_temps == 0) return;
2663
2664 next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
2665
2666 for (i = 0; i < n_temps; i++) next_is_write[i] = True;
2667
2668 for (i = cb->used-1; i >= 0; i--) {
2669 u = &cb->instrs[i];
2670
sewardj97ced732002-03-25 00:07:36 +00002671 /* If we're not checking address V bits, there will be a lot of
2672 GETVs, TAG1s and TAG2s calculating values which are never
2673 used. These first three cases get rid of them. */
2674
2675 if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
2676 && next_is_write[u->val2]
2677 && !VG_(clo_check_addrVs)) {
2678 u->opcode = NOP;
2679 u->size = 0;
2680 if (VG_(disassemble))
2681 VG_(printf)("at %d: delete GETV\n", i);
2682 } else
2683
2684 if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
2685 && next_is_write[u->val1]
2686 && !VG_(clo_check_addrVs)) {
2687 u->opcode = NOP;
2688 u->size = 0;
2689 if (VG_(disassemble))
2690 VG_(printf)("at %d: delete TAG1\n", i);
2691 } else
2692
2693 if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
2694 && next_is_write[u->val2]
2695 && !VG_(clo_check_addrVs)) {
2696 u->opcode = NOP;
2697 u->size = 0;
2698 if (VG_(disassemble))
2699 VG_(printf)("at %d: delete TAG2\n", i);
2700 } else
2701
2702 /* We do the rest of these regardless of whether or not
2703 addresses are V-checked. */
2704
sewardjde4a1d02002-03-22 01:27:54 +00002705 if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
2706 && next_is_write[u->val2]) {
2707 /* This MOV is pointless because the target is dead at this
2708 point. Delete it. */
2709 u->opcode = NOP;
2710 u->size = 0;
2711 if (VG_(disassemble))
2712 VG_(printf)("at %d: delete MOV\n", i);
2713 } else
2714
2715 if (u->opcode == SETV) {
2716 if (u->tag1 == TempReg) {
2717 vg_assert(VGC_IS_SHADOW(u->val1));
2718 if (next_is_write[u->val1]) {
2719 /* This write is pointless, so annul it. */
2720 u->opcode = NOP;
2721 u->size = 0;
2722 if (VG_(disassemble))
2723 VG_(printf)("at %d: delete SETV\n", i);
2724 } else {
2725 /* This write has a purpose; don't annul it, but do
2726 notice that we did it. */
2727 next_is_write[u->val1] = True;
2728 }
2729
2730 }
2731
2732 } else {
2733 /* Find out what this insn does to the temps. */
2734 k = getTempUsage(u, &tempUse[0]);
2735 vg_assert(k <= 3);
2736 for (j = k-1; j >= 0; j--) {
2737 next_is_write[ tempUse[j].tempNo ]
2738 = tempUse[j].isWrite;
2739 }
2740 }
2741
2742 }
2743
2744 VG_(jitfree)(next_is_write);
2745}
2746
2747
2748/* Run forwards, propagating and using the is-completely-defined
2749 property. This removes a lot of redundant tag-munging code.
2750 Unfortunately it requires intimate knowledge of how each uinstr and
2751 tagop modifies its arguments. This duplicates knowledge of uinstr
2752 tempreg uses embodied in getTempUsage(), which is unfortunate.
2753 The supplied UCodeBlock* is modified in-place.
2754
2755 For each value temp, def[] should hold VGC_VALUE.
2756
2757 For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
2758 definitely known to be fully defined at that size. In all other
2759 circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
2760 undefined. In cases of doubt, VGC_UNDEF is always safe.
2761*/
2762static void vg_propagate_definedness ( UCodeBlock* cb )
2763{
2764 UChar* def;
2765 Int i, j, k, t, n_temps;
2766 UInstr* u;
2767 TempUse tempUse[3];
2768
2769 n_temps = cb->nextTemp;
2770 if (n_temps == 0) return;
2771
2772 def = VG_(jitmalloc)(n_temps * sizeof(UChar));
2773 for (i = 0; i < n_temps; i++)
2774 def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
2775
2776 /* Run forwards, detecting and using the all-defined property. */
2777
2778 for (i = 0; i < cb->used; i++) {
2779 u = &cb->instrs[i];
2780 switch (u->opcode) {
2781
2782 /* Tag-handling uinstrs. */
2783
2784 /* Deal with these quickly. */
2785 case NOP:
2786 case INCEIP:
2787 break;
2788
2789 /* Make a tag defined. */
2790 case SETV:
2791 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2792 def[u->val1] = u->size;
2793 break;
2794
2795 /* Check definedness of a tag. */
2796 case TESTV:
2797 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2798 if (def[u->val1] <= 4) {
2799 vg_assert(def[u->val1] == u->size);
2800 NOP_no_msg(u);
2801 if (VG_(disassemble))
2802 VG_(printf)("at %d: delete TESTV on defd arg\n", i);
2803 }
2804 break;
2805
2806 /* Applies to both values and tags. Propagate Definedness
2807 property through copies. Note that this isn't optional;
2808 we *have* to do this to keep def[] correct. */
2809 case MOV:
2810 vg_assert(u->tag2 == TempReg);
2811 if (u->tag1 == TempReg) {
2812 if (VGC_IS_SHADOW(u->val1)) {
2813 vg_assert(VGC_IS_SHADOW(u->val2));
2814 def[u->val2] = def[u->val1];
2815 }
2816 }
2817 break;
2818
2819 case PUTV:
2820 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2821 if (def[u->val1] <= 4) {
2822 vg_assert(def[u->val1] == u->size);
2823 u->tag1 = Literal;
2824 u->val1 = 0;
2825 switch (u->size) {
2826 case 4: u->lit32 = 0x00000000; break;
2827 case 2: u->lit32 = 0xFFFF0000; break;
2828 case 1: u->lit32 = 0xFFFFFF00; break;
2829 default: VG_(panic)("vg_cleanup(PUTV)");
2830 }
2831 if (VG_(disassemble))
2832 VG_(printf)(
2833 "at %d: propagate definedness into PUTV\n", i);
2834 }
2835 break;
2836
2837 case STOREV:
2838 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2839 if (def[u->val1] <= 4) {
2840 vg_assert(def[u->val1] == u->size);
2841 u->tag1 = Literal;
2842 u->val1 = 0;
2843 switch (u->size) {
2844 case 4: u->lit32 = 0x00000000; break;
2845 case 2: u->lit32 = 0xFFFF0000; break;
2846 case 1: u->lit32 = 0xFFFFFF00; break;
2847 default: VG_(panic)("vg_cleanup(STOREV)");
2848 }
2849 if (VG_(disassemble))
2850 VG_(printf)(
2851 "at %d: propagate definedness into STandV\n", i);
2852 }
2853 break;
2854
2855 /* Nothing interesting we can do with this, I think. */
2856 case PUTVF:
2857 break;
2858
2859 /* Tag handling operations. */
2860 case TAG2:
2861 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2862 vg_assert(u->tag3 == Lit16);
2863 /* Ultra-paranoid "type" checking. */
2864 switch (u->val3) {
2865 case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
2866 case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
2867 case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
2868 vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
2869 break;
2870 default:
2871 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2872 break;
2873 }
2874 switch (u->val3) {
2875 Int sz;
2876 case VgT_UifU4:
2877 sz = 4; goto do_UifU;
2878 case VgT_UifU2:
2879 sz = 2; goto do_UifU;
2880 case VgT_UifU1:
2881 sz = 1; goto do_UifU;
2882 case VgT_UifU0:
2883 sz = 0; goto do_UifU;
2884 do_UifU:
2885 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2886 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2887 if (def[u->val1] <= 4) {
2888 /* UifU. The first arg is defined, so result is
2889 simply second arg. Delete this operation. */
2890 vg_assert(def[u->val1] == sz);
2891 NOP_no_msg(u);
2892 if (VG_(disassemble))
2893 VG_(printf)(
2894 "at %d: delete UifU%d due to defd arg1\n",
2895 i, sz);
2896 }
2897 else
2898 if (def[u->val2] <= 4) {
2899 /* UifU. The second arg is defined, so result is
2900 simply first arg. Copy to second. */
2901 vg_assert(def[u->val2] == sz);
2902 u->opcode = MOV;
2903 u->size = 4;
2904 u->tag3 = NoValue;
2905 def[u->val2] = def[u->val1];
2906 if (VG_(disassemble))
2907 VG_(printf)(
2908 "at %d: change UifU%d to MOV due to defd"
2909 " arg2\n",
2910 i, sz);
2911 }
2912 break;
2913 case VgT_ImproveAND4_TQ:
2914 sz = 4; goto do_ImproveAND;
2915 case VgT_ImproveAND1_TQ:
2916 sz = 1; goto do_ImproveAND;
2917 do_ImproveAND:
2918 /* Implements Q = T OR Q. So if Q is entirely defined,
2919 ie all 0s, we get MOV T, Q. */
2920 if (def[u->val2] <= 4) {
2921 vg_assert(def[u->val2] == sz);
2922 u->size = 4; /* Regardless of sz */
2923 u->opcode = MOV;
2924 u->tag3 = NoValue;
2925 def[u->val2] = VGC_UNDEF;
2926 if (VG_(disassemble))
2927 VG_(printf)(
2928 "at %d: change ImproveAND%d_TQ to MOV due "
2929 "to defd arg2\n",
2930 i, sz);
2931 }
2932 break;
2933 default:
2934 goto unhandled;
2935 }
2936 break;
2937
2938 case TAG1:
2939 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2940 if (def[u->val1] > 4) break;
2941 /* We now know that the arg to the op is entirely defined.
2942 If the op changes the size of the arg, we must replace
2943 it with a SETV at the new size. If it doesn't change
2944 the size, we can delete it completely. */
2945 switch (u->val3) {
2946 /* Maintain the same size ... */
2947 case VgT_Left4:
2948 vg_assert(def[u->val1] == 4);
2949 NOP_tag1_op(u);
2950 break;
2951 case VgT_PCast11:
2952 vg_assert(def[u->val1] == 1);
2953 NOP_tag1_op(u);
2954 break;
2955 /* Change size ... */
2956 case VgT_PCast40:
2957 vg_assert(def[u->val1] == 4);
2958 SETV_tag1_op(u,0);
2959 def[u->val1] = 0;
2960 break;
2961 case VgT_PCast14:
2962 vg_assert(def[u->val1] == 1);
2963 SETV_tag1_op(u,4);
2964 def[u->val1] = 4;
2965 break;
2966 case VgT_PCast12:
2967 vg_assert(def[u->val1] == 1);
2968 SETV_tag1_op(u,2);
2969 def[u->val1] = 2;
2970 break;
2971 case VgT_PCast10:
2972 vg_assert(def[u->val1] == 1);
2973 SETV_tag1_op(u,0);
2974 def[u->val1] = 0;
2975 break;
2976 case VgT_PCast02:
2977 vg_assert(def[u->val1] == 0);
2978 SETV_tag1_op(u,2);
2979 def[u->val1] = 2;
2980 break;
2981 default:
2982 goto unhandled;
2983 }
2984 if (VG_(disassemble))
2985 VG_(printf)(
2986 "at %d: delete TAG1 %s due to defd arg\n",
2987 i, VG_(nameOfTagOp(u->val3)));
2988 break;
2989
2990 default:
2991 unhandled:
2992 /* We don't know how to handle this uinstr. Be safe, and
2993 set to VGC_VALUE or VGC_UNDEF all temps written by it. */
2994 k = getTempUsage(u, &tempUse[0]);
2995 vg_assert(k <= 3);
2996 for (j = 0; j < k; j++) {
2997 t = tempUse[j].tempNo;
2998 vg_assert(t >= 0 && t < n_temps);
2999 if (!tempUse[j].isWrite) {
3000 /* t is read; ignore it. */
3001 if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
3002 VG_(printf)("ignoring def %d at %s %s\n",
3003 def[t],
3004 VG_(nameUOpcode)(True, u->opcode),
3005 (u->opcode == TAG1 || u->opcode == TAG2)
3006 ? VG_(nameOfTagOp)(u->val3)
3007 : (Char*)"");
3008 } else {
3009 /* t is written; better nullify it. */
3010 def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
3011 }
3012 }
3013 }
3014 }
3015
3016 VG_(jitfree)(def);
3017}
3018
3019
3020/* Top level post-instrumentation cleanup function. */
3021static void vg_cleanup ( UCodeBlock* cb )
3022{
3023 vg_propagate_definedness ( cb );
3024 vg_delete_redundant_SETVs ( cb );
3025}
3026
3027
3028/*------------------------------------------------------------*/
3029/*--- Main entry point for the JITter. ---*/
3030/*------------------------------------------------------------*/
3031
3032/* Translate the basic block beginning at orig_addr, placing the
3033 translation in a vg_malloc'd block, the address and size of which
3034 are returned in trans_addr and trans_size. Length of the original
3035 block is also returned in orig_size. If the latter three are NULL,
3036 this call is being done for debugging purposes, in which case (a)
3037 throw away the translation once it is made, and (b) produce a load
3038 of debugging output.
3039*/
sewardj1e8cdc92002-04-18 11:37:52 +00003040void VG_(translate) ( ThreadState* tst,
3041 /* Identity of thread needing this block */
3042 Addr orig_addr,
sewardjde4a1d02002-03-22 01:27:54 +00003043 UInt* orig_size,
3044 Addr* trans_addr,
3045 UInt* trans_size )
3046{
3047 Int n_disassembled_bytes, final_code_size;
3048 Bool debugging_translation;
3049 UChar* final_code;
3050 UCodeBlock* cb;
3051
3052 VGP_PUSHCC(VgpTranslate);
3053 debugging_translation
3054 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
3055
3056 dis = True;
3057 dis = debugging_translation;
3058
3059 /* Check if we're being asked to jump to a silly address, and if so
3060 record an error message before potentially crashing the entire
3061 system. */
3062 if (VG_(clo_instrument) && !debugging_translation && !dis) {
3063 Addr bad_addr;
3064 Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
3065 if (!ok) {
sewardj1e8cdc92002-04-18 11:37:52 +00003066 VG_(record_jump_error)(tst, bad_addr);
sewardjde4a1d02002-03-22 01:27:54 +00003067 }
3068 }
3069
3070 /* if (VG_(overall_in_count) >= 4800) dis=True; */
3071 if (VG_(disassemble))
3072 VG_(printf)("\n");
3073 if (0 || dis
3074 || (VG_(overall_in_count) > 0 &&
3075 (VG_(overall_in_count) % 1000 == 0))) {
3076 if (0&& (VG_(clo_verbosity) > 1 || dis))
3077 VG_(message)(Vg_UserMsg,
3078 "trans# %d, bb# %lu, in %d, out %d",
3079 VG_(overall_in_count),
3080 VG_(bbs_done),
3081 VG_(overall_in_osize), VG_(overall_in_tsize),
3082 orig_addr );
3083 }
njn4f9c9342002-04-29 16:03:24 +00003084 cb = VG_(allocCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00003085
3086 /* Disassemble this basic block into cb. */
3087 VGP_PUSHCC(VgpToUCode);
3088 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
3089 VGP_POPCC;
3090 /* dis=True; */
3091 /* if (0&& VG_(translations_done) < 617) */
3092 /* dis=False; */
3093 /* Try and improve the code a bit. */
3094 if (VG_(clo_optimise)) {
3095 VGP_PUSHCC(VgpImprove);
3096 vg_improve ( cb );
3097 if (VG_(disassemble))
3098 VG_(ppUCodeBlock) ( cb, "Improved code:" );
3099 VGP_POPCC;
3100 }
3101 /* dis=False; */
3102 /* Add instrumentation code. */
3103 if (VG_(clo_instrument)) {
3104 VGP_PUSHCC(VgpInstrument);
3105 cb = vg_instrument(cb);
3106 VGP_POPCC;
3107 if (VG_(disassemble))
3108 VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
3109 if (VG_(clo_cleanup)) {
3110 VGP_PUSHCC(VgpCleanup);
3111 vg_cleanup(cb);
3112 VGP_POPCC;
3113 if (VG_(disassemble))
3114 VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
3115 }
3116 }
3117
njn4f9c9342002-04-29 16:03:24 +00003118 //VG_(disassemble) = True;
3119
3120 /* Add cache simulation code. */
3121 if (VG_(clo_cachesim)) {
3122 VGP_PUSHCC(VgpCacheInstrument);
3123 cb = VG_(cachesim_instrument)(cb, orig_addr);
3124 VGP_POPCC;
3125 if (VG_(disassemble))
3126 VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
3127 }
3128
3129 //VG_(disassemble) = False;
3130
sewardjde4a1d02002-03-22 01:27:54 +00003131 /* Allocate registers. */
3132 VGP_PUSHCC(VgpRegAlloc);
3133 cb = vg_do_register_allocation ( cb );
3134 VGP_POPCC;
3135 /* dis=False; */
3136 /*
3137 if (VG_(disassemble))
3138 VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
3139 */
3140
3141 VGP_PUSHCC(VgpFromUcode);
3142 /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
3143 and so must be VG_(jitfree)'d. */
3144 final_code = VG_(emit_code)(cb, &final_code_size );
3145 VGP_POPCC;
njn4f9c9342002-04-29 16:03:24 +00003146 VG_(freeCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00003147
3148 if (debugging_translation) {
3149 /* Only done for debugging -- throw away final result. */
3150 VG_(jitfree)(final_code);
3151 } else {
3152 /* Doing it for real -- return values to caller. */
3153 *orig_size = n_disassembled_bytes;
3154 *trans_addr = (Addr)final_code;
3155 *trans_size = final_code_size;
3156 }
3157 VGP_POPCC;
3158}
3159
3160/*--------------------------------------------------------------------*/
3161/*--- end vg_translate.c ---*/
3162/*--------------------------------------------------------------------*/