blob: 0a806944ec3d1dcd8608eb451231d31f84e4dc8f [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, an x86 protected-mode emulator
9 designed for debugging and profiling binaries on x86-Unixes.
10
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file LICENSE.
30*/
31
32#include "vg_include.h"
33
34
35/*------------------------------------------------------------*/
36/*--- Renamings of frequently-used global functions. ---*/
37/*------------------------------------------------------------*/
38
39#define uInstr1 VG_(newUInstr1)
40#define uInstr2 VG_(newUInstr2)
41#define uInstr3 VG_(newUInstr3)
42#define dis VG_(disassemble)
43#define nameIReg VG_(nameOfIntReg)
44#define nameISize VG_(nameOfIntSize)
45#define uLiteral VG_(setLiteralField)
46#define newTemp VG_(getNewTemp)
47#define newShadow VG_(getNewShadow)
48
49
50/*------------------------------------------------------------*/
51/*--- Memory management for the translater. ---*/
52/*------------------------------------------------------------*/
53
54#define N_JITBLOCKS 4
55#define N_JITBLOCK_SZ 5000
56
57static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
58static Bool jitstorage_inuse[N_JITBLOCKS];
59static Bool jitstorage_initdone = False;
60
61static __inline__ void jitstorage_initialise ( void )
62{
63 Int i;
64 if (jitstorage_initdone) return;
65 jitstorage_initdone = True;
66 for (i = 0; i < N_JITBLOCKS; i++)
67 jitstorage_inuse[i] = False;
68}
69
70void* VG_(jitmalloc) ( Int nbytes )
71{
72 Int i;
73 jitstorage_initialise();
74 if (nbytes > N_JITBLOCK_SZ) {
75 /* VG_(printf)("too large: %d\n", nbytes); */
76 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
77 }
78 for (i = 0; i < N_JITBLOCKS; i++) {
79 if (!jitstorage_inuse[i]) {
80 jitstorage_inuse[i] = True;
81 /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
82 return & jitstorage[i][0];
83 }
84 }
85 VG_(panic)("out of slots in vg_jitmalloc\n");
86 return VG_(malloc)(VG_AR_PRIVATE, nbytes);
87}
88
89void VG_(jitfree) ( void* ptr )
90{
91 Int i;
92 jitstorage_initialise();
93 for (i = 0; i < N_JITBLOCKS; i++) {
94 if (ptr == & jitstorage[i][0]) {
95 vg_assert(jitstorage_inuse[i]);
96 jitstorage_inuse[i] = False;
97 return;
98 }
99 }
100 VG_(free)(VG_AR_PRIVATE, ptr);
101}
102
103/*------------------------------------------------------------*/
104/*--- Basics ---*/
105/*------------------------------------------------------------*/
106
njn4f9c9342002-04-29 16:03:24 +0000107UCodeBlock* VG_(allocCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +0000108{
109 UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
110 cb->used = cb->size = cb->nextTemp = 0;
111 cb->instrs = NULL;
112 return cb;
113}
114
115
njn4f9c9342002-04-29 16:03:24 +0000116void VG_(freeCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000117{
118 if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
119 VG_(free)(VG_AR_PRIVATE, cb);
120}
121
122
123/* Ensure there's enough space in a block to add one uinstr. */
124static __inline__
125void ensureUInstr ( UCodeBlock* cb )
126{
127 if (cb->used == cb->size) {
128 if (cb->instrs == NULL) {
129 vg_assert(cb->size == 0);
130 vg_assert(cb->used == 0);
131 cb->size = 8;
132 cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
133 } else {
134 Int i;
135 UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
136 2 * sizeof(UInstr) * cb->size);
137 for (i = 0; i < cb->used; i++)
138 instrs2[i] = cb->instrs[i];
139 cb->size *= 2;
140 VG_(free)(VG_AR_PRIVATE, cb->instrs);
141 cb->instrs = instrs2;
142 }
143 }
144
145 vg_assert(cb->used < cb->size);
146}
147
148
149__inline__
150void VG_(emptyUInstr) ( UInstr* u )
151{
152 u->val1 = u->val2 = u->val3 = 0;
153 u->tag1 = u->tag2 = u->tag3 = NoValue;
154 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000155 u->jmpkind = JmpBoring;
156 u->smc_check = u->signed_widen = False;
sewardjde4a1d02002-03-22 01:27:54 +0000157 u->lit32 = 0;
158 u->opcode = 0;
159 u->size = 0;
160 u->cond = 0;
161 u->extra4b = 0;
162}
163
164
165/* Add an instruction to a ucode block, and return the index of the
166 instruction. */
167__inline__
168void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
169 Tag tag1, UInt val1,
170 Tag tag2, UInt val2,
171 Tag tag3, UInt val3 )
172{
173 UInstr* ui;
174 ensureUInstr(cb);
175 ui = & cb->instrs[cb->used];
176 cb->used++;
177 VG_(emptyUInstr)(ui);
178 ui->val1 = val1;
179 ui->val2 = val2;
180 ui->val3 = val3;
181 ui->opcode = opcode;
182 ui->tag1 = tag1;
183 ui->tag2 = tag2;
184 ui->tag3 = tag3;
185 ui->size = sz;
186 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
187 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
188 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
189}
190
191
192__inline__
193void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
194 Tag tag1, UInt val1,
195 Tag tag2, UInt val2 )
196{
197 UInstr* ui;
198 ensureUInstr(cb);
199 ui = & cb->instrs[cb->used];
200 cb->used++;
201 VG_(emptyUInstr)(ui);
202 ui->val1 = val1;
203 ui->val2 = val2;
204 ui->opcode = opcode;
205 ui->tag1 = tag1;
206 ui->tag2 = tag2;
207 ui->size = sz;
208 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
209 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
210}
211
212
213__inline__
214void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
215 Tag tag1, UInt val1 )
216{
217 UInstr* ui;
218 ensureUInstr(cb);
219 ui = & cb->instrs[cb->used];
220 cb->used++;
221 VG_(emptyUInstr)(ui);
222 ui->val1 = val1;
223 ui->opcode = opcode;
224 ui->tag1 = tag1;
225 ui->size = sz;
226 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
227}
228
229
230__inline__
231void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
232{
233 UInstr* ui;
234 ensureUInstr(cb);
235 ui = & cb->instrs[cb->used];
236 cb->used++;
237 VG_(emptyUInstr)(ui);
238 ui->opcode = opcode;
239 ui->size = sz;
240}
241
sewardjde4a1d02002-03-22 01:27:54 +0000242/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000243__inline__
244void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000245{
246 ensureUInstr(cb);
247 cb->instrs[cb->used] = *instr;
248 cb->used++;
249}
250
sewardjde4a1d02002-03-22 01:27:54 +0000251/* Copy auxiliary info from one uinstr to another. */
252static __inline__
253void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
254{
255 dst->cond = src->cond;
256 dst->extra4b = src->extra4b;
257 dst->smc_check = src->smc_check;
258 dst->signed_widen = src->signed_widen;
sewardj2e93c502002-04-12 11:12:52 +0000259 dst->jmpkind = src->jmpkind;
sewardjde4a1d02002-03-22 01:27:54 +0000260 dst->flags_r = src->flags_r;
261 dst->flags_w = src->flags_w;
262}
263
264
265/* Set the flag R/W sets on a uinstr. */
266void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
267{
268 /* VG_(ppUInstr)(-1,u); */
269 vg_assert(fr == (fr & FlagsALL));
270 vg_assert(fw == (fw & FlagsALL));
271 u->flags_r = fr;
272 u->flags_w = fw;
273}
274
275
276/* Set the lit32 field of the most recent uinsn. */
277void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
278{
279 LAST_UINSTR(cb).lit32 = lit32;
280}
281
282
283Bool VG_(anyFlagUse) ( UInstr* u )
284{
285 return (u->flags_r != FlagsEmpty
286 || u->flags_w != FlagsEmpty);
287}
288
289
290
291
292/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
293 register number. This effectively defines the order in which real
294 registers are allocated. %ebp is excluded since it is permanently
295 reserved for pointing at VG_(baseBlock). %edi is a general spare
296 temp used for Left4 and various misc tag ops.
297
298 Important! If you change the set of allocatable registers from
299 %eax, %ebx, %ecx, %edx, %esi you must change the
sewardj18d75132002-05-16 11:06:21 +0000300 save/restore sequences in various places to match!
sewardjde4a1d02002-03-22 01:27:54 +0000301*/
302__inline__ Int VG_(rankToRealRegNo) ( Int rank )
303{
304 switch (rank) {
305# if 1
306 /* Probably the best allocation ordering. */
307 case 0: return R_EAX;
308 case 1: return R_EBX;
309 case 2: return R_ECX;
310 case 3: return R_EDX;
311 case 4: return R_ESI;
312# else
313 /* Contrary; probably the worst. Helpful for debugging, tho. */
314 case 4: return R_EAX;
315 case 3: return R_EBX;
316 case 2: return R_ECX;
317 case 1: return R_EDX;
318 case 0: return R_ESI;
319# endif
320 default: VG_(panic)("rankToRealRegNo");
321 }
322}
323
324
325/*------------------------------------------------------------*/
326/*--- Sanity checking uinstrs. ---*/
327/*------------------------------------------------------------*/
328
329/* This seems as good a place as any to record some important stuff
330 about ucode semantics.
331
332 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
333 TempReg are defined to zero-extend the loaded value to 32 bits.
334 This is needed to make the translation of movzbl et al work
335 properly.
336
337 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
338
339 * Arithmetic on TempRegs is at the specified size. For example,
340 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
341 being emitted -- not a 32 bit one.
342
343 * On some insns we allow the cc bit to be set. If so, the
344 intention is that the simulated machine's %eflags register
345 is copied into that of the real machine before the insn,
346 and copied back again afterwards. This means that the
347 code generated for that insn must be very careful only to
348 update %eflags in the intended way. This is particularly
349 important for the routines referenced by CALL insns.
350*/
351
352/* Meaning of operand kinds is as follows:
353
354 ArchReg is a register of the simulated CPU, stored in memory,
355 in vg_m_state.m_eax .. m_edi. These values are stored
356 using the Intel register encoding.
357
358 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
359 available for allocation. As with ArchRegs, these values
360 are stored using the Intel register encoding.
361
362 TempReg is a temporary register used to express the results of
363 disassembly. There is an unlimited supply of them --
364 register allocation and spilling eventually assigns them
365 to RealRegs.
366
367 SpillNo is a spill slot number. The number of required spill
368 slots is VG_MAX_PSEUDOS, in general. Only allowed
369 as the ArchReg operand of GET and PUT.
370
371 Lit16 is a signed 16-bit literal value.
372
373 Literal is a 32-bit literal value. Each uinstr can only hold
374 one of these.
375
376 The disassembled code is expressed purely in terms of ArchReg,
377 TempReg and Literal operands. Eventually, register allocation
378 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
379 and Literals. New x86 code can easily be synthesised from this.
380 There are carefully designed restrictions on which insns can have
381 which operands, intended to make it possible to generate x86 code
382 from the result of register allocation on the ucode efficiently and
383 without need of any further RealRegs.
384
385 Restrictions on insns (as generated by the disassembler) are as
386 follows:
387
388 A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
389 N=NoValue
390
391 GETF T N N
392 PUTF T N N
393
394 GET A,S T N
395 PUT T A,S N
396 LOAD T T N
397 STORE T T N
398 MOV T,L T N
399 CMOV T T N
400 WIDEN T N N
401 JMP T,L N N
402 CALLM L N N
403 CALLM_S N N N
404 CALLM_E N N N
405 PUSH,POP T N N
406 CLEAR L N N
407
408 AND, OR
409 T T N
410
411 ADD, ADC, XOR, SUB, SBB
412 A,L,T T N
413
414 SHL, SHR, SAR, ROL, ROR, RCL, RCR
415 L,T T N
416
417 NOT, NEG, INC, DEC, CC2VAL, BSWAP
418 T N N
419
420 JIFZ T L N
421
422 FPU_R L T N
423 FPU_W L T N
424 FPU L T N
425
426 LEA1 T T (const in a seperate field)
427 LEA2 T T T (const & shift ditto)
428
429 INCEIP L N N
430
431 and for instrumentation insns:
432
433 LOADV T T N
434 STOREV T,L T N
435 GETV A T N
436 PUTV T,L A N
437 GETVF T N N
438 PUTVF T N N
439 WIDENV T N N
440 TESTV A,T N N
441 SETV A,T N N
442 TAG1 T N N
443 TAG2 T T N
444
445 Before register allocation, S operands should not appear anywhere.
446 After register allocation, all T operands should have been
447 converted into Rs, and S operands are allowed in GET and PUT --
448 denoting spill saves/restores.
449
450 The size field should be 0 for insns for which it is meaningless,
451 ie those which do not directly move/operate on data.
452*/
453Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
454{
455# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
456# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
457# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
458# define A1 (u->tag1 == ArchReg)
459# define A2 (u->tag2 == ArchReg)
460# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
461# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
462# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
463# define L1 (u->tag1 == Literal && u->val1 == 0)
464# define L2 (u->tag2 == Literal && u->val2 == 0)
465# define Ls1 (u->tag1 == Lit16)
466# define Ls3 (u->tag3 == Lit16)
467# define N1 (u->tag1 == NoValue)
468# define N2 (u->tag2 == NoValue)
469# define N3 (u->tag3 == NoValue)
470# define SZ4 (u->size == 4)
471# define SZ2 (u->size == 2)
472# define SZ1 (u->size == 1)
473# define SZ0 (u->size == 0)
474# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
475# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
476# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj8d32be72002-04-18 02:18:24 +0000477# define FLG_RD_WR_MAYBE \
478 ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \
479 || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \
480 || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
sewardjde4a1d02002-03-22 01:27:54 +0000481# define CC1 (!(CC0))
482# define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
483 ? (u->size == 4) : True)
484
485 Int n_lits = 0;
486 if (u->tag1 == Literal) n_lits++;
487 if (u->tag2 == Literal) n_lits++;
488 if (u->tag3 == Literal) n_lits++;
489 if (n_lits > 1)
490 return False;
491
492 switch (u->opcode) {
493 case GETF:
sewardj8d32be72002-04-18 02:18:24 +0000494 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
sewardjde4a1d02002-03-22 01:27:54 +0000495 case PUTF:
sewardj8d32be72002-04-18 02:18:24 +0000496 return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
sewardjde4a1d02002-03-22 01:27:54 +0000497 case CALLM_S: case CALLM_E:
498 return SZ0 && N1 && N2 && N3;
499 case INCEIP:
500 return SZ0 && CC0 && Ls1 && N2 && N3;
501 case LEA1:
502 return CC0 && TR1 && TR2 && N3 && SZ4;
503 case LEA2:
504 return CC0 && TR1 && TR2 && TR3 && SZ4;
505 case NOP:
506 return SZ0 && CC0 && N1 && N2 && N3;
507 case GET:
508 return CC0 && AS1 && TR2 && N3;
509 case PUT:
510 return CC0 && TR1 && AS2 && N3;
511 case LOAD: case STORE:
512 return CC0 && TR1 && TR2 && N3;
513 case MOV:
514 return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
515 case CMOV:
516 return CC1 && TR1 && TR2 && N3 && SZ4;
517 case JMP:
518 return (u->cond==CondAlways ? CC0 : CC1)
519 && (TR1 || L1) && N2 && SZ0 && N3;
520 case CLEAR:
521 return CC0 && Ls1 && N2 && SZ0 && N3;
522 case CALLM:
523 return SZ0 && Ls1 && N2 && N3;
524 case PUSH: case POP:
525 return CC0 && TR1 && N2 && N3;
526 case AND: case OR:
527 return TR1 && TR2 && N3;
528 case ADD: case ADC: case XOR: case SUB: case SBB:
529 return (A1 || TR1 || L1) && TR2 && N3;
530 case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
531 return (TR1 || L1) && TR2 && N3;
532 case NOT: case NEG: case INC: case DEC:
533 return TR1 && N2 && N3;
534 case BSWAP:
535 return TR1 && N2 && N3 && CC0 && SZ4;
536 case CC2VAL:
537 return CC1 && SZ1 && TR1 && N2 && N3;
538 case JIFZ:
539 return CC0 && SZ4 && TR1 && L2 && N3;
540 case FPU_R: case FPU_W:
541 return CC0 && Ls1 && TR2 && N3;
542 case FPU:
sewardj8d32be72002-04-18 02:18:24 +0000543 return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
sewardjde4a1d02002-03-22 01:27:54 +0000544 case LOADV:
545 return CC0 && TR1 && TR2 && N3;
546 case STOREV:
547 return CC0 && (TR1 || L1) && TR2 && N3;
548 case GETV:
549 return CC0 && A1 && TR2 && N3;
550 case PUTV:
551 return CC0 && (TR1 || L1) && A2 && N3;
552 case GETVF:
553 return CC0 && TR1 && N2 && N3 && SZ0;
554 case PUTVF:
555 return CC0 && TR1 && N2 && N3 && SZ0;
556 case WIDEN:
557 return CC0 && TR1 && N2 && N3;
558 case TESTV:
559 return CC0 && (A1 || TR1) && N2 && N3;
560 case SETV:
561 return CC0 && (A1 || TR1) && N2 && N3;
562 case TAG1:
563 return CC0 && TR1 && N2 && Ls3 && SZ0;
564 case TAG2:
565 return CC0 && TR1 && TR2 && Ls3 && SZ0;
566 default:
567 VG_(panic)("vg_saneUInstr: unhandled opcode");
568 }
569# undef SZ4_IF_TR1
570# undef CC0
571# undef CC1
572# undef SZ4
573# undef SZ2
574# undef SZ1
575# undef SZ0
576# undef TR1
577# undef TR2
578# undef TR3
579# undef A1
580# undef A2
581# undef AS1
582# undef AS2
583# undef AS3
584# undef L1
585# undef Ls1
586# undef L2
587# undef Ls3
588# undef N1
589# undef N2
590# undef N3
591# undef FLG_RD
592# undef FLG_WR
sewardj8d32be72002-04-18 02:18:24 +0000593# undef FLG_RD_WR_MAYBE
sewardjde4a1d02002-03-22 01:27:54 +0000594}
595
596
597/* Sanity checks to do with CALLMs in UCodeBlocks. */
598Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
599{
600 Int callm = 0;
601 Int callm_s = 0;
602 Int callm_e = 0;
603 Int callm_ptr, calls_ptr;
604 Int i, j, t;
605 Bool incall = False;
606
607 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
608
609 for (i = 0; i < cb->used; i++) {
610 switch (cb->instrs[i].opcode) {
611 case CALLM:
612 if (!incall) return False;
613 callm++;
614 break;
615 case CALLM_S:
616 if (incall) return False;
617 incall = True;
618 callm_s++;
619 break;
620 case CALLM_E:
621 if (!incall) return False;
622 incall = False;
623 callm_e++;
624 break;
625 case PUSH: case POP: case CLEAR:
626 if (!incall) return False;
627 break;
628 default:
629 break;
630 }
631 }
632 if (incall) return False;
633 if (callm != callm_s || callm != callm_e) return False;
634
635 /* Check the sections between CALLM_S and CALLM's. Ensure that no
636 PUSH uinsn pushes any TempReg that any other PUSH in the same
637 section pushes. Ie, check that the TempReg args to PUSHes in
638 the section are unique. If not, the instrumenter generates
639 incorrect code for CALLM insns. */
640
641 callm_ptr = 0;
642
643 find_next_CALLM:
644 /* Search for the next interval, making calls_ptr .. callm_ptr
645 bracket it. */
646 while (callm_ptr < cb->used
647 && cb->instrs[callm_ptr].opcode != CALLM)
648 callm_ptr++;
649 if (callm_ptr == cb->used)
650 return True;
651 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
652
653 calls_ptr = callm_ptr - 1;
654 while (cb->instrs[calls_ptr].opcode != CALLM_S)
655 calls_ptr--;
656 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
657 vg_assert(calls_ptr >= 0);
658
659 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
660
661 /* For each PUSH insn in the interval ... */
662 for (i = calls_ptr + 1; i < callm_ptr; i++) {
663 if (cb->instrs[i].opcode != PUSH) continue;
664 t = cb->instrs[i].val1;
665 /* Ensure no later PUSH insns up to callm_ptr push the same
666 TempReg. Return False if any such are found. */
667 for (j = i+1; j < callm_ptr; j++) {
668 if (cb->instrs[j].opcode == PUSH &&
669 cb->instrs[j].val1 == t)
670 return False;
671 }
672 }
673
674 /* This interval is clean. Keep going ... */
675 callm_ptr++;
676 goto find_next_CALLM;
677}
678
679
680/*------------------------------------------------------------*/
681/*--- Printing uinstrs. ---*/
682/*------------------------------------------------------------*/
683
684Char* VG_(nameCondcode) ( Condcode cond )
685{
686 switch (cond) {
687 case CondO: return "o";
688 case CondNO: return "no";
689 case CondB: return "b";
690 case CondNB: return "nb";
691 case CondZ: return "z";
692 case CondNZ: return "nz";
693 case CondBE: return "be";
694 case CondNBE: return "nbe";
695 case CondS: return "s";
696 case ConsNS: return "ns";
697 case CondP: return "p";
698 case CondNP: return "np";
699 case CondL: return "l";
700 case CondNL: return "nl";
701 case CondLE: return "le";
702 case CondNLE: return "nle";
703 case CondAlways: return "MP"; /* hack! */
704 default: VG_(panic)("nameCondcode");
705 }
706}
707
708
709static void vg_ppFlagSet ( Char* prefix, FlagSet set )
710{
711 VG_(printf)("%s", prefix);
712 if (set & FlagD) VG_(printf)("D");
713 if (set & FlagO) VG_(printf)("O");
714 if (set & FlagS) VG_(printf)("S");
715 if (set & FlagZ) VG_(printf)("Z");
716 if (set & FlagA) VG_(printf)("A");
717 if (set & FlagC) VG_(printf)("C");
718 if (set & FlagP) VG_(printf)("P");
719}
720
721
722static void ppTempReg ( Int tt )
723{
724 if ((tt & 1) == 0)
725 VG_(printf)("t%d", tt);
726 else
727 VG_(printf)("q%d", tt-1);
728}
729
730
731static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
732{
733 UInt tag, val;
734 switch (operandNo) {
735 case 1: tag = u->tag1; val = u->val1; break;
736 case 2: tag = u->tag2; val = u->val2; break;
737 case 3: tag = u->tag3; val = u->val3; break;
738 default: VG_(panic)("ppUOperand(1)");
739 }
740 if (tag == Literal) val = u->lit32;
741
742 if (parens) VG_(printf)("(");
743 switch (tag) {
744 case TempReg: ppTempReg(val); break;
745 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
746 case Literal: VG_(printf)("$0x%x", val); break;
747 case Lit16: VG_(printf)("$0x%x", val); break;
748 case NoValue: VG_(printf)("NoValue"); break;
749 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
750 case SpillNo: VG_(printf)("spill%d", val); break;
751 default: VG_(panic)("ppUOperand(2)");
752 }
753 if (parens) VG_(printf)(")");
754}
755
756
757Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
758{
759 switch (opc) {
760 case ADD: return (upper ? "ADD" : "add");
761 case ADC: return (upper ? "ADC" : "adc");
762 case AND: return (upper ? "AND" : "and");
763 case OR: return (upper ? "OR" : "or");
764 case XOR: return (upper ? "XOR" : "xor");
765 case SUB: return (upper ? "SUB" : "sub");
766 case SBB: return (upper ? "SBB" : "sbb");
767 case SHL: return (upper ? "SHL" : "shl");
768 case SHR: return (upper ? "SHR" : "shr");
769 case SAR: return (upper ? "SAR" : "sar");
770 case ROL: return (upper ? "ROL" : "rol");
771 case ROR: return (upper ? "ROR" : "ror");
772 case RCL: return (upper ? "RCL" : "rcl");
773 case RCR: return (upper ? "RCR" : "rcr");
774 case NOT: return (upper ? "NOT" : "not");
775 case NEG: return (upper ? "NEG" : "neg");
776 case INC: return (upper ? "INC" : "inc");
777 case DEC: return (upper ? "DEC" : "dec");
778 case BSWAP: return (upper ? "BSWAP" : "bswap");
779 default: break;
780 }
781 if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
782 switch (opc) {
783 case GETVF: return "GETVF";
784 case PUTVF: return "PUTVF";
785 case TAG1: return "TAG1";
786 case TAG2: return "TAG2";
787 case CALLM_S: return "CALLM_S";
788 case CALLM_E: return "CALLM_E";
789 case INCEIP: return "INCEIP";
790 case LEA1: return "LEA1";
791 case LEA2: return "LEA2";
792 case NOP: return "NOP";
793 case GET: return "GET";
794 case PUT: return "PUT";
795 case GETF: return "GETF";
796 case PUTF: return "PUTF";
797 case LOAD: return "LD" ;
798 case STORE: return "ST" ;
799 case MOV: return "MOV";
800 case CMOV: return "CMOV";
801 case WIDEN: return "WIDEN";
802 case JMP: return "J" ;
803 case JIFZ: return "JIFZ" ;
804 case CALLM: return "CALLM";
805 case PUSH: return "PUSH" ;
806 case POP: return "POP" ;
807 case CLEAR: return "CLEAR";
808 case CC2VAL: return "CC2VAL";
809 case FPU_R: return "FPU_R";
810 case FPU_W: return "FPU_W";
811 case FPU: return "FPU" ;
812 case LOADV: return "LOADV";
813 case STOREV: return "STOREV";
814 case GETV: return "GETV";
815 case PUTV: return "PUTV";
816 case TESTV: return "TESTV";
817 case SETV: return "SETV";
818 default: VG_(panic)("nameUOpcode: unhandled case");
819 }
820}
821
822
823void VG_(ppUInstr) ( Int instrNo, UInstr* u )
824{
825 VG_(printf)("\t%4d: %s", instrNo,
826 VG_(nameUOpcode)(True, u->opcode));
827 if (u->opcode == JMP || u->opcode == CC2VAL)
828 VG_(printf)("%s", VG_(nameCondcode(u->cond)));
829
830 switch (u->size) {
831 case 0: VG_(printf)("o"); break;
832 case 1: VG_(printf)("B"); break;
833 case 2: VG_(printf)("W"); break;
834 case 4: VG_(printf)("L"); break;
835 case 8: VG_(printf)("Q"); break;
836 default: VG_(printf)("%d", (Int)u->size); break;
837 }
838
839 switch (u->opcode) {
840
841 case TAG1:
842 VG_(printf)("\t");
843 ppUOperand(u, 1, 4, False);
844 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
845 ppUOperand(u, 1, 4, False);
846 VG_(printf)(" )");
847 break;
848
849 case TAG2:
850 VG_(printf)("\t");
851 ppUOperand(u, 2, 4, False);
852 VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
853 ppUOperand(u, 1, 4, False);
854 VG_(printf)(", ");
855 ppUOperand(u, 2, 4, False);
856 VG_(printf)(" )");
857 break;
858
859 case CALLM_S: case CALLM_E:
860 break;
861
862 case INCEIP:
863 VG_(printf)("\t$%d", u->val1);
864 break;
865
866 case LEA2:
867 VG_(printf)("\t%d(" , u->lit32);
868 ppUOperand(u, 1, 4, False);
869 VG_(printf)(",");
870 ppUOperand(u, 2, 4, False);
871 VG_(printf)(",%d), ", (Int)u->extra4b);
872 ppUOperand(u, 3, 4, False);
873 break;
874
875 case LEA1:
876 VG_(printf)("\t%d" , u->lit32);
877 ppUOperand(u, 1, 4, True);
878 VG_(printf)(", ");
879 ppUOperand(u, 2, 4, False);
880 break;
881
882 case NOP:
883 break;
884
885 case FPU_W:
886 VG_(printf)("\t0x%x:0x%x, ",
887 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
888 ppUOperand(u, 2, 4, True);
889 break;
890
891 case FPU_R:
892 VG_(printf)("\t");
893 ppUOperand(u, 2, 4, True);
894 VG_(printf)(", 0x%x:0x%x",
895 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
896 break;
897
898 case FPU:
899 VG_(printf)("\t0x%x:0x%x",
900 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
901 break;
902
903 case STOREV: case LOADV:
904 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
905 VG_(printf)("\t");
906 ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV);
907 VG_(printf)(", ");
908 ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
909 break;
910
911 case GETF: case PUTF:
912 VG_(printf)("\t");
913 ppUOperand(u, 1, u->size, False);
914 break;
915
916 case JMP: case CC2VAL:
917 case PUSH: case POP: case CLEAR: case CALLM:
sewardj2e93c502002-04-12 11:12:52 +0000918 if (u->opcode == JMP) {
919 switch (u->jmpkind) {
920 case JmpCall: VG_(printf)("-c"); break;
921 case JmpRet: VG_(printf)("-r"); break;
922 case JmpSyscall: VG_(printf)("-sys"); break;
923 case JmpClientReq: VG_(printf)("-cli"); break;
924 default: break;
925 }
926 }
sewardjde4a1d02002-03-22 01:27:54 +0000927 VG_(printf)("\t");
928 ppUOperand(u, 1, u->size, False);
929 break;
930
931 case JIFZ:
932 VG_(printf)("\t");
933 ppUOperand(u, 1, u->size, False);
934 VG_(printf)(", ");
935 ppUOperand(u, 2, u->size, False);
936 break;
937
938 case PUTVF: case GETVF:
939 VG_(printf)("\t");
940 ppUOperand(u, 1, 0, False);
941 break;
942
943 case NOT: case NEG: case INC: case DEC: case BSWAP:
944 VG_(printf)("\t");
945 ppUOperand(u, 1, u->size, False);
946 break;
947
948 case ADD: case ADC: case AND: case OR:
949 case XOR: case SUB: case SBB:
950 case SHL: case SHR: case SAR:
951 case ROL: case ROR: case RCL: case RCR:
952 VG_(printf)("\t");
953 ppUOperand(u, 1, u->size, False);
954 VG_(printf)(", ");
955 ppUOperand(u, 2, u->size, False);
956 break;
957
958 case GETV: case PUTV:
959 VG_(printf)("\t");
960 ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
961 VG_(printf)(", ");
962 ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
963 break;
964
965 case WIDEN:
966 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
967 u->signed_widen?'s':'z');
968 VG_(printf)("\t");
969 ppUOperand(u, 1, u->size, False);
970 break;
971
972 case TESTV: case SETV:
973 VG_(printf)("\t");
974 ppUOperand(u, 1, u->size, False);
975 break;
976
977 default: VG_(panic)("ppUInstr: unhandled opcode");
978 }
979
980 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
981 VG_(printf)(" (");
982 if (u->flags_r != FlagsEmpty)
983 vg_ppFlagSet("-r", u->flags_r);
984 if (u->flags_w != FlagsEmpty)
985 vg_ppFlagSet("-w", u->flags_w);
986 VG_(printf)(")");
987 }
988 VG_(printf)("\n");
989}
990
991
992void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
993{
994 Int i;
995 VG_(printf)("\n%s\n", title);
996 for (i = 0; i < cb->used; i++)
997 if (0 || cb->instrs[i].opcode != NOP)
998 VG_(ppUInstr) ( i, &cb->instrs[i] );
999 VG_(printf)("\n");
1000}
1001
1002
1003/*------------------------------------------------------------*/
1004/*--- uinstr helpers for register allocation ---*/
1005/*--- and code improvement. ---*/
1006/*------------------------------------------------------------*/
1007
1008/* A structure for communicating temp uses, and for indicating
1009 temp->real register mappings for patchUInstr. */
1010typedef
1011 struct {
1012 Int realNo;
1013 Int tempNo;
1014 Bool isWrite;
1015 }
1016 TempUse;
1017
1018
1019/* Get the temp use of a uinstr, parking them in an array supplied by
1020 the caller, which is assumed to be big enough. Return the number
1021 of entries. Insns which read _and_ write a register wind up
1022 mentioning it twice. Entries are placed in the array in program
1023 order, so that if a reg is read-modified-written, it appears first
1024 as a read and then as a write.
1025*/
1026static __inline__
1027Int getTempUsage ( UInstr* u, TempUse* arr )
1028{
1029
1030# define RD(ono) \
1031 if (mycat(u->tag,ono) == TempReg) \
1032 { arr[n].tempNo = mycat(u->val,ono); \
1033 arr[n].isWrite = False; n++; }
1034# define WR(ono) \
1035 if (mycat(u->tag,ono) == TempReg) \
1036 { arr[n].tempNo = mycat(u->val,ono); \
1037 arr[n].isWrite = True; n++; }
1038
1039 Int n = 0;
1040 switch (u->opcode) {
1041 case LEA1: RD(1); WR(2); break;
1042 case LEA2: RD(1); RD(2); WR(3); break;
1043
1044 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
1045 case FPU_R: case FPU_W: RD(2); break;
1046
1047 case GETF: WR(1); break;
1048 case PUTF: RD(1); break;
1049
1050 case GET: WR(2); break;
1051 case PUT: RD(1); break;
1052 case LOAD: RD(1); WR(2); break;
1053 case STORE: RD(1); RD(2); break;
1054 case MOV: RD(1); WR(2); break;
1055
1056 case JMP: RD(1); break;
1057 case CLEAR: case CALLM: break;
1058
1059 case PUSH: RD(1); break;
1060 case POP: WR(1); break;
1061
1062 case TAG2:
1063 case CMOV:
1064 case ADD: case ADC: case AND: case OR:
1065 case XOR: case SUB: case SBB:
1066 RD(1); RD(2); WR(2); break;
1067
1068 case SHL: case SHR: case SAR:
1069 case ROL: case ROR: case RCL: case RCR:
1070 RD(1); RD(2); WR(2); break;
1071
1072 case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
1073 RD(1); WR(1); break;
1074
1075 case WIDEN: RD(1); WR(1); break;
1076
1077 case CC2VAL: WR(1); break;
1078 case JIFZ: RD(1); break;
1079
1080 /* These sizes are only ever consulted when the instrumentation
1081 code is being added, so the following can return
1082 manifestly-bogus sizes. */
1083 case LOADV: RD(1); WR(2); break;
1084 case STOREV: RD(1); RD(2); break;
1085 case GETV: WR(2); break;
1086 case PUTV: RD(1); break;
1087 case TESTV: RD(1); break;
1088 case SETV: WR(1); break;
1089 case PUTVF: RD(1); break;
1090 case GETVF: WR(1); break;
1091
1092 default: VG_(panic)("getTempUsage: unhandled opcode");
1093 }
1094 return n;
1095
1096# undef RD
1097# undef WR
1098}
1099
1100
1101/* Change temp regs in u into real regs, as directed by tmap. */
1102static __inline__
1103void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
1104{
1105 Int i;
1106 if (u->tag1 == TempReg) {
1107 for (i = 0; i < n_tmap; i++)
1108 if (tmap[i].tempNo == u->val1) break;
1109 if (i == n_tmap) VG_(panic)("patchUInstr(1)");
1110 u->tag1 = RealReg;
1111 u->val1 = tmap[i].realNo;
1112 }
1113 if (u->tag2 == TempReg) {
1114 for (i = 0; i < n_tmap; i++)
1115 if (tmap[i].tempNo == u->val2) break;
1116 if (i == n_tmap) VG_(panic)("patchUInstr(2)");
1117 u->tag2 = RealReg;
1118 u->val2 = tmap[i].realNo;
1119 }
1120 if (u->tag3 == TempReg) {
1121 for (i = 0; i < n_tmap; i++)
1122 if (tmap[i].tempNo == u->val3) break;
1123 if (i == n_tmap) VG_(panic)("patchUInstr(3)");
1124 u->tag3 = RealReg;
1125 u->val3 = tmap[i].realNo;
1126 }
1127}
1128
1129
1130/* Tedious x86-specific hack which compensates for the fact that the
1131 register numbers for %ah .. %dh do not correspond to those for %eax
1132 .. %edx. It maps a (reg size, reg no) pair to the number of the
1133 containing 32-bit reg. */
1134static __inline__
1135Int containingArchRegOf ( Int sz, Int aregno )
1136{
1137 switch (sz) {
1138 case 4: return aregno;
1139 case 2: return aregno;
1140 case 1: return aregno >= 4 ? aregno-4 : aregno;
1141 default: VG_(panic)("containingArchRegOf");
1142 }
1143}
1144
1145
1146/* If u reads an ArchReg, return the number of the containing arch
1147 reg. Otherwise return -1. Used in redundant-PUT elimination. */
1148static __inline__
1149Int maybe_uinstrReadsArchReg ( UInstr* u )
1150{
1151 switch (u->opcode) {
1152 case GET:
1153 case ADD: case ADC: case AND: case OR:
1154 case XOR: case SUB: case SBB:
1155 case SHL: case SHR: case SAR: case ROL:
1156 case ROR: case RCL: case RCR:
1157 if (u->tag1 == ArchReg)
1158 return containingArchRegOf ( u->size, u->val1 );
1159 else
1160 return -1;
1161
1162 case GETF: case PUTF:
1163 case CALLM_S: case CALLM_E:
1164 case INCEIP:
1165 case LEA1:
1166 case LEA2:
1167 case NOP:
1168 case PUT:
1169 case LOAD:
1170 case STORE:
1171 case MOV:
1172 case CMOV:
1173 case JMP:
1174 case CALLM: case CLEAR: case PUSH: case POP:
1175 case NOT: case NEG: case INC: case DEC: case BSWAP:
1176 case CC2VAL:
1177 case JIFZ:
1178 case FPU: case FPU_R: case FPU_W:
1179 case WIDEN:
1180 return -1;
1181
1182 default:
1183 VG_(ppUInstr)(0,u);
1184 VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
1185 }
1186}
1187
1188static __inline__
1189Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1190{
1191 Int i, k;
1192 TempUse tempUse[3];
1193 k = getTempUsage ( u, &tempUse[0] );
1194 for (i = 0; i < k; i++)
1195 if (tempUse[i].tempNo == tempreg)
1196 return True;
1197 return False;
1198}
1199
1200
1201/*------------------------------------------------------------*/
1202/*--- ucode improvement. ---*/
1203/*------------------------------------------------------------*/
1204
1205/* Improve the code in cb by doing
1206 -- Redundant ArchReg-fetch elimination
1207 -- Redundant PUT elimination
1208 -- Redundant cond-code restore/save elimination
1209 The overall effect of these is to allow target registers to be
1210 cached in host registers over multiple target insns.
1211*/
1212static void vg_improve ( UCodeBlock* cb )
1213{
1214 Int i, j, k, m, n, ar, tr, told, actual_areg;
1215 Int areg_map[8];
1216 Bool annul_put[8];
1217 TempUse tempUse[3];
1218 UInstr* u;
1219 Bool wr;
1220 Int* last_live_before;
1221 FlagSet future_dead_flags;
1222
1223 if (cb->nextTemp > 0)
1224 last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
1225 else
1226 last_live_before = NULL;
1227
1228
1229 /* PASS 1: redundant GET elimination. (Actually, more general than
1230 that -- eliminates redundant fetches of ArchRegs). */
1231
1232 /* Find the live-range-ends for all temporaries. Duplicates code
1233 in the register allocator :-( */
1234
1235 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1236
1237 for (i = cb->used-1; i >= 0; i--) {
1238 u = &cb->instrs[i];
1239
1240 k = getTempUsage(u, &tempUse[0]);
1241
1242 /* For each temp usage ... bwds in program order. */
1243 for (j = k-1; j >= 0; j--) {
1244 tr = tempUse[j].tempNo;
1245 wr = tempUse[j].isWrite;
1246 if (last_live_before[tr] == -1) {
1247 vg_assert(tr >= 0 && tr < cb->nextTemp);
1248 last_live_before[tr] = wr ? (i+1) : i;
1249 }
1250 }
1251
1252 }
1253
1254# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1255 { Int q; \
1256 /* Invalidate any old binding(s) to tempreg. */ \
1257 for (q = 0; q < 8; q++) \
1258 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1259 /* Add the new binding. */ \
1260 areg_map[archreg] = (tempreg); \
1261 }
1262
1263 /* Set up the A-reg map. */
1264 for (i = 0; i < 8; i++) areg_map[i] = -1;
1265
1266 /* Scan insns. */
1267 for (i = 0; i < cb->used; i++) {
1268 u = &cb->instrs[i];
1269 if (u->opcode == GET && u->size == 4) {
1270 /* GET; see if it can be annulled. */
1271 vg_assert(u->tag1 == ArchReg);
1272 vg_assert(u->tag2 == TempReg);
1273 ar = u->val1;
1274 tr = u->val2;
1275 told = areg_map[ar];
1276 if (told != -1 && last_live_before[told] <= i) {
1277 /* ar already has an old mapping to told, but that runs
1278 out here. Annul this GET, rename tr to told for the
1279 rest of the block, and extend told's live range to that
1280 of tr. */
1281 u->opcode = NOP;
1282 u->tag1 = u->tag2 = NoValue;
1283 n = last_live_before[tr] + 1;
1284 if (n > cb->used) n = cb->used;
1285 last_live_before[told] = last_live_before[tr];
1286 last_live_before[tr] = i-1;
1287 if (VG_(disassemble))
1288 VG_(printf)(
1289 "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
1290 i, tr, told,i+1, n-1);
1291 for (m = i+1; m < n; m++) {
1292 if (cb->instrs[m].tag1 == TempReg
1293 && cb->instrs[m].val1 == tr)
1294 cb->instrs[m].val1 = told;
1295 if (cb->instrs[m].tag2 == TempReg
1296 && cb->instrs[m].val2 == tr)
1297 cb->instrs[m].val2 = told;
1298 }
1299 BIND_ARCH_TO_TEMP(ar,told);
1300 }
1301 else
1302 BIND_ARCH_TO_TEMP(ar,tr);
1303 }
1304 else if (u->opcode == GET && u->size != 4) {
1305 /* Invalidate any mapping for this archreg. */
1306 actual_areg = containingArchRegOf ( u->size, u->val1 );
1307 areg_map[actual_areg] = -1;
1308 }
1309 else if (u->opcode == PUT && u->size == 4) {
1310 /* PUT; re-establish t -> a binding */
1311 vg_assert(u->tag1 == TempReg);
1312 vg_assert(u->tag2 == ArchReg);
1313 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1314 }
1315 else if (u->opcode == PUT && u->size != 4) {
1316 /* Invalidate any mapping for this archreg. */
1317 actual_areg = containingArchRegOf ( u->size, u->val2 );
1318 areg_map[actual_areg] = -1;
1319 } else {
1320
1321 /* see if insn has an archreg as a read operand; if so try to
1322 map it. */
1323 if (u->tag1 == ArchReg && u->size == 4
1324 && areg_map[u->val1] != -1) {
1325 switch (u->opcode) {
1326 case ADD: case SUB: case AND: case OR: case XOR:
1327 case ADC: case SBB:
1328 case SHL: case SHR: case SAR: case ROL: case ROR:
1329 case RCL: case RCR:
1330 if (VG_(disassemble))
1331 VG_(printf)(
1332 "at %d: change ArchReg %S to TempReg t%d\n",
1333 i, nameIReg(4,u->val1), areg_map[u->val1]);
1334 u->tag1 = TempReg;
1335 u->val1 = areg_map[u->val1];
1336 /* Remember to extend the live range of the TempReg,
1337 if necessary. */
1338 if (last_live_before[u->val1] < i)
1339 last_live_before[u->val1] = i;
1340 break;
1341 default:
1342 break;
1343 }
1344 }
1345
1346 /* boring insn; invalidate any mappings to temps it writes */
1347 k = getTempUsage(u, &tempUse[0]);
1348
1349 for (j = 0; j < k; j++) {
1350 wr = tempUse[j].isWrite;
1351 if (!wr) continue;
1352 tr = tempUse[j].tempNo;
1353 for (m = 0; m < 8; m++)
1354 if (areg_map[m] == tr) areg_map[m] = -1;
1355 }
1356 }
1357
1358 }
1359
1360# undef BIND_ARCH_TO_TEMP
1361
sewardj05f1aa12002-04-30 00:29:36 +00001362 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1363 %ESP, since the memory check machinery always requires the
1364 in-memory value of %ESP to be up to date. Although this isn't
1365 actually required by other analyses (cache simulation), it's
1366 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001367 for (j = 0; j < 8; j++)
1368 annul_put[j] = False;
1369
1370 for (i = cb->used-1; i >= 0; i--) {
1371 u = &cb->instrs[i];
1372 if (u->opcode == NOP) continue;
1373
1374 if (u->opcode == PUT && u->size == 4) {
1375 vg_assert(u->tag2 == ArchReg);
1376 actual_areg = containingArchRegOf ( 4, u->val2 );
1377 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001378 vg_assert(actual_areg != R_ESP);
sewardjde4a1d02002-03-22 01:27:54 +00001379 u->opcode = NOP;
1380 u->tag1 = u->tag2 = NoValue;
1381 if (VG_(disassemble))
1382 VG_(printf)("at %d: delete PUT\n", i );
1383 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001384 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001385 annul_put[actual_areg] = True;
1386 }
1387 }
1388 else if (u->opcode == PUT && u->size != 4) {
1389 actual_areg = containingArchRegOf ( u->size, u->val2 );
1390 annul_put[actual_areg] = False;
1391 }
1392 else if (u->opcode == JMP || u->opcode == JIFZ
1393 || u->opcode == CALLM) {
1394 for (j = 0; j < 8; j++)
1395 annul_put[j] = False;
1396 }
1397 else {
1398 /* If an instruction reads an ArchReg, the immediately
1399 preceding PUT cannot be annulled. */
1400 actual_areg = maybe_uinstrReadsArchReg ( u );
1401 if (actual_areg != -1)
1402 annul_put[actual_areg] = False;
1403 }
1404 }
1405
1406 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1407 dead after this point, annul the MOV insn and rename t2 to t1.
1408 Further modifies the last_live_before map. */
1409
1410# if 0
1411 VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
1412 for (i = 0; i < cb->nextTemp; i++)
1413 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1414 VG_(printf)("\n");
1415# endif
1416
1417 for (i = 0; i < cb->used-1; i++) {
1418 u = &cb->instrs[i];
1419 if (u->opcode != MOV) continue;
1420 if (u->tag1 == Literal) continue;
1421 vg_assert(u->tag1 == TempReg);
1422 vg_assert(u->tag2 == TempReg);
1423 if (last_live_before[u->val1] == i) {
1424 if (VG_(disassemble))
1425 VG_(printf)(
1426 "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
1427 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1428 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1429 if (cb->instrs[j].tag1 == TempReg
1430 && cb->instrs[j].val1 == u->val2)
1431 cb->instrs[j].val1 = u->val1;
1432 if (cb->instrs[j].tag2 == TempReg
1433 && cb->instrs[j].val2 == u->val2)
1434 cb->instrs[j].val2 = u->val1;
1435 }
1436 last_live_before[u->val1] = last_live_before[u->val2];
1437 last_live_before[u->val2] = i-1;
1438 u->opcode = NOP;
1439 u->tag1 = u->tag2 = NoValue;
1440 }
1441 }
1442
1443 /* PASS 3: redundant condition-code restore/save elimination.
1444 Scan backwards from the end. future_dead_flags records the set
1445 of flags which are dead at this point, that is, will be written
1446 before they are next read. Earlier uinsns which write flags
1447 already in future_dead_flags can have their writes annulled.
1448 */
1449 future_dead_flags = FlagsEmpty;
1450
1451 for (i = cb->used-1; i >= 0; i--) {
1452 u = &cb->instrs[i];
1453
1454 /* We might never make it to insns beyond this one, so be
1455 conservative. */
1456 if (u->opcode == JIFZ || u->opcode == JMP) {
1457 future_dead_flags = FlagsEmpty;
1458 continue;
1459 }
1460
1461 /* We can annul the flags written by this insn if it writes a
1462 subset (or eq) of the set of flags known to be dead after
1463 this insn. If not, just record the flags also written by
1464 this insn.*/
1465 if (u->flags_w != FlagsEmpty
1466 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
1467 if (VG_(disassemble)) {
1468 VG_(printf)("at %d: annul flag write ", i);
1469 vg_ppFlagSet("", u->flags_w);
1470 VG_(printf)(" due to later ");
1471 vg_ppFlagSet("", future_dead_flags);
1472 VG_(printf)("\n");
1473 }
1474 u->flags_w = FlagsEmpty;
1475 } else {
1476 future_dead_flags
1477 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1478 }
1479
1480 /* If this insn also reads flags, empty out future_dead_flags so
1481 as to force preceding writes not to be annulled. */
1482 if (u->flags_r != FlagsEmpty)
1483 future_dead_flags = FlagsEmpty;
1484 }
1485
1486 if (last_live_before)
1487 VG_(jitfree) ( last_live_before );
1488}
1489
1490
1491/*------------------------------------------------------------*/
1492/*--- The new register allocator. ---*/
1493/*------------------------------------------------------------*/
1494
1495typedef
1496 struct {
1497 /* Becomes live for the first time after this insn ... */
1498 Int live_after;
1499 /* Becomes dead for the last time after this insn ... */
1500 Int dead_before;
1501 /* The "home" spill slot, if needed. Never changes. */
1502 Int spill_no;
1503 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1504 Int real_no;
1505 }
1506 TempInfo;
1507
1508
1509/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1510 them in spill locations, and add spill code, if there are not
1511 enough real regs. The usual register allocation deal, in short.
1512
1513 Important redundancy of representation:
1514
1515 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1516 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1517
1518 The .real_no field of a TempInfo gives the current RRR for
1519 this TempReg, or VG_NOVALUE if the TempReg is currently
1520 in memory, in which case it is in the SpillNo denoted by
1521 spillno.
1522
1523 These pieces of information (a fwds-bwds mapping, really) must
1524 be kept consistent!
1525
1526 This allocator uses the so-called Second Chance Bin Packing
1527 algorithm, as described in "Quality and Speed in Linear-scan
1528 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1529 pp142-151). It is simple and fast and remarkably good at
1530 minimising the amount of spill code introduced.
1531*/
1532
1533static
1534UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1535{
1536 TempInfo* temp_info;
1537 Int real_to_temp[VG_MAX_REALREGS];
1538 Bool is_spill_cand[VG_MAX_REALREGS];
1539 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1540 Int i, j, k, m, r, tno, max_ss_no;
1541 Bool wr, defer, isRead, spill_reqd;
1542 TempUse tempUse[3];
1543 UCodeBlock* c2;
1544
1545 /* Used to denote ... well, "no value" in this fn. */
1546# define VG_NOTHING (-2)
1547
1548 /* Initialise the TempReg info. */
1549 if (c1->nextTemp > 0)
1550 temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
1551 else
1552 temp_info = NULL;
1553
1554 for (i = 0; i < c1->nextTemp; i++) {
1555 temp_info[i].live_after = VG_NOTHING;
1556 temp_info[i].dead_before = VG_NOTHING;
1557 temp_info[i].spill_no = VG_NOTHING;
1558 /* temp_info[i].real_no is not yet relevant. */
1559 }
1560
1561 spill_reqd = False;
1562
1563 /* Scan fwds to establish live ranges. */
1564
1565 for (i = 0; i < c1->used; i++) {
1566 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1567 vg_assert(k >= 0 && k <= 3);
1568
1569 /* For each temp usage ... fwds in program order */
1570 for (j = 0; j < k; j++) {
1571 tno = tempUse[j].tempNo;
1572 wr = tempUse[j].isWrite;
1573 if (wr) {
1574 /* Writes hold a reg live until after this insn. */
1575 if (temp_info[tno].live_after == VG_NOTHING)
1576 temp_info[tno].live_after = i;
1577 if (temp_info[tno].dead_before < i + 1)
1578 temp_info[tno].dead_before = i + 1;
1579 } else {
1580 /* First use of a tmp should be a write. */
1581 vg_assert(temp_info[tno].live_after != VG_NOTHING);
1582 /* Reads only hold it live until before this insn. */
1583 if (temp_info[tno].dead_before < i)
1584 temp_info[tno].dead_before = i;
1585 }
1586 }
1587 }
1588
1589# if 0
1590 /* Sanity check on live ranges. Expensive but correct. */
1591 for (i = 0; i < c1->nextTemp; i++) {
1592 vg_assert( (temp_info[i].live_after == VG_NOTHING
1593 && temp_info[i].dead_before == VG_NOTHING)
1594 || (temp_info[i].live_after != VG_NOTHING
1595 && temp_info[i].dead_before != VG_NOTHING) );
1596 }
1597# endif
1598
1599 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1600 We put as few as possible values in spill slots, but
1601 nevertheless need to have an assignment to them just in case. */
1602
1603 max_ss_no = -1;
1604
1605 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1606 ss_busy_until_before[i] = 0;
1607
1608 for (i = 0; i < c1->nextTemp; i++) {
1609
1610 /* True iff this temp is unused. */
1611 if (temp_info[i].live_after == VG_NOTHING)
1612 continue;
1613
1614 /* Find the lowest-numbered spill slot which is available at the
1615 start point of this interval, and assign the interval to
1616 it. */
1617 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1618 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1619 break;
1620 if (j == VG_MAX_SPILLSLOTS) {
1621 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
1622 VG_(panic)("register allocation failed -- out of spill slots");
1623 }
1624 ss_busy_until_before[j] = temp_info[i].dead_before;
1625 temp_info[i].spill_no = j;
1626 if (j > max_ss_no)
1627 max_ss_no = j;
1628 }
1629
1630 VG_(total_reg_rank) += (max_ss_no+1);
1631
1632 /* Show live ranges and assigned spill slot nos. */
1633
1634 if (VG_(disassemble)) {
1635 VG_(printf)("Live Range Assignments\n");
1636
1637 for (i = 0; i < c1->nextTemp; i++) {
1638 if (temp_info[i].live_after == VG_NOTHING)
1639 continue;
1640 VG_(printf)(
1641 " LR %d is after %d to before %d spillno %d\n",
1642 i,
1643 temp_info[i].live_after,
1644 temp_info[i].dead_before,
1645 temp_info[i].spill_no
1646 );
1647 }
1648 }
1649
1650 /* Now that we've established a spill slot number for each used
1651 temporary, we can go ahead and do the core of the "Second-chance
1652 binpacking" allocation algorithm. */
1653
1654 /* Resulting code goes here. We generate it all in a forwards
1655 pass. */
njn4f9c9342002-04-29 16:03:24 +00001656 c2 = VG_(allocCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00001657
1658 /* At the start, no TempRegs are assigned to any real register.
1659 Correspondingly, all temps claim to be currently resident in
1660 their spill slots, as computed by the previous two passes. */
1661 for (i = 0; i < VG_MAX_REALREGS; i++)
1662 real_to_temp[i] = VG_NOTHING;
1663 for (i = 0; i < c1->nextTemp; i++)
1664 temp_info[i].real_no = VG_NOTHING;
1665
1666 if (VG_(disassemble))
1667 VG_(printf)("\n");
1668
1669 /* Process each insn in turn. */
1670 for (i = 0; i < c1->used; i++) {
1671
1672 if (c1->instrs[i].opcode == NOP) continue;
1673 VG_(uinstrs_prealloc)++;
1674
1675# if 0
1676 /* Check map consistency. Expensive but correct. */
1677 for (r = 0; r < VG_MAX_REALREGS; r++) {
1678 if (real_to_temp[r] != VG_NOTHING) {
1679 tno = real_to_temp[r];
1680 vg_assert(tno >= 0 && tno < c1->nextTemp);
1681 vg_assert(temp_info[tno].real_no == r);
1682 }
1683 }
1684 for (tno = 0; tno < c1->nextTemp; tno++) {
1685 if (temp_info[tno].real_no != VG_NOTHING) {
1686 r = temp_info[tno].real_no;
1687 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
1688 vg_assert(real_to_temp[r] == tno);
1689 }
1690 }
1691# endif
1692
1693 if (VG_(disassemble))
1694 VG_(ppUInstr)(i, &c1->instrs[i]);
1695
1696 /* First, free up enough real regs for this insn. This may
1697 generate spill stores since we may have to evict some TempRegs
1698 currently in real regs. Also generates spill loads. */
1699
1700 k = getTempUsage(&c1->instrs[i], &tempUse[0]);
1701 vg_assert(k >= 0 && k <= 3);
1702
1703 /* For each ***different*** temp mentioned in the insn .... */
1704 for (j = 0; j < k; j++) {
1705
1706 /* First check if the temp is mentioned again later; if so,
1707 ignore this mention. We only want to process each temp
1708 used by the insn once, even if it is mentioned more than
1709 once. */
1710 defer = False;
1711 tno = tempUse[j].tempNo;
1712 for (m = j+1; m < k; m++)
1713 if (tempUse[m].tempNo == tno)
1714 defer = True;
1715 if (defer)
1716 continue;
1717
1718 /* Now we're trying to find a register for tempUse[j].tempNo.
1719 First of all, if it already has a register assigned, we
1720 don't need to do anything more. */
1721 if (temp_info[tno].real_no != VG_NOTHING)
1722 continue;
1723
1724 /* No luck. The next thing to do is see if there is a
1725 currently unassigned register available. If so, bag it. */
1726 for (r = 0; r < VG_MAX_REALREGS; r++) {
1727 if (real_to_temp[r] == VG_NOTHING)
1728 break;
1729 }
1730 if (r < VG_MAX_REALREGS) {
1731 real_to_temp[r] = tno;
1732 temp_info[tno].real_no = r;
1733 continue;
1734 }
1735
1736 /* Unfortunately, that didn't pan out either. So we'll have
1737 to eject some other unfortunate TempReg into a spill slot
1738 in order to free up a register. Of course, we need to be
1739 careful not to eject some other TempReg needed by this
1740 insn.
1741
1742 Select r in 0 .. VG_MAX_REALREGS-1 such that
1743 real_to_temp[r] is not mentioned in
1744 tempUse[0 .. k-1].tempNo, since it would be just plain
1745 wrong to eject some other TempReg which we need to use in
1746 this insn.
1747
1748 It is here that it is important to make a good choice of
1749 register to spill. */
1750
1751 /* First, mark those regs which are not spill candidates. */
1752 for (r = 0; r < VG_MAX_REALREGS; r++) {
1753 is_spill_cand[r] = True;
1754 for (m = 0; m < k; m++) {
1755 if (real_to_temp[r] == tempUse[m].tempNo) {
1756 is_spill_cand[r] = False;
1757 break;
1758 }
1759 }
1760 }
1761
1762 /* We can choose any r satisfying is_spill_cand[r]. However,
1763 try to make a good choice. First, try and find r such
1764 that the associated TempReg is already dead. */
1765 for (r = 0; r < VG_MAX_REALREGS; r++) {
1766 if (is_spill_cand[r] &&
1767 temp_info[real_to_temp[r]].dead_before <= i)
1768 goto have_spill_cand;
1769 }
1770
1771 /* No spill cand is mapped to a dead TempReg. Now we really
1772 _do_ have to generate spill code. Choose r so that the
1773 next use of its associated TempReg is as far ahead as
1774 possible, in the hope that this will minimise the number of
1775 consequent reloads required. This is a bit expensive, but
1776 we don't have to do it very often. */
1777 {
1778 Int furthest_r = VG_MAX_REALREGS;
1779 Int furthest = 0;
1780 for (r = 0; r < VG_MAX_REALREGS; r++) {
1781 if (!is_spill_cand[r]) continue;
1782 for (m = i+1; m < c1->used; m++)
1783 if (uInstrMentionsTempReg(&c1->instrs[m],
1784 real_to_temp[r]))
1785 break;
1786 if (m > furthest) {
1787 furthest = m;
1788 furthest_r = r;
1789 }
1790 }
1791 r = furthest_r;
1792 goto have_spill_cand;
1793 }
1794
1795 have_spill_cand:
1796 if (r == VG_MAX_REALREGS)
1797 VG_(panic)("new reg alloc: out of registers ?!");
1798
1799 /* Eject r. Important refinement: don't bother if the
1800 associated TempReg is now dead. */
1801 vg_assert(real_to_temp[r] != VG_NOTHING);
1802 vg_assert(real_to_temp[r] != tno);
1803 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
1804 if (temp_info[real_to_temp[r]].dead_before > i) {
1805 uInstr2(c2, PUT, 4,
1806 RealReg, VG_(rankToRealRegNo)(r),
1807 SpillNo, temp_info[real_to_temp[r]].spill_no);
1808 VG_(uinstrs_spill)++;
1809 spill_reqd = True;
1810 if (VG_(disassemble))
1811 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1812 }
1813
1814 /* Decide if tno is read. */
1815 isRead = False;
1816 for (m = 0; m < k; m++)
1817 if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
1818 isRead = True;
1819
1820 /* If so, generate a spill load. */
1821 if (isRead) {
1822 uInstr2(c2, GET, 4,
1823 SpillNo, temp_info[tno].spill_no,
1824 RealReg, VG_(rankToRealRegNo)(r) );
1825 VG_(uinstrs_spill)++;
1826 spill_reqd = True;
1827 if (VG_(disassemble))
1828 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1829 }
1830
1831 /* Update the forwards and backwards maps. */
1832 real_to_temp[r] = tno;
1833 temp_info[tno].real_no = r;
1834 }
1835
1836 /* By this point, all TempRegs mentioned by the insn have been
1837 bought into real regs. We now copy the insn to the output
1838 and use patchUInstr to convert its rTempRegs into
1839 realregs. */
1840 for (j = 0; j < k; j++)
1841 tempUse[j].realNo
1842 = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
njn4f9c9342002-04-29 16:03:24 +00001843 VG_(copyUInstr)(c2, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00001844 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
1845
1846 if (VG_(disassemble)) {
1847 VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
1848 VG_(printf)("\n");
1849 }
1850 }
1851
1852 if (temp_info != NULL)
1853 VG_(jitfree)(temp_info);
1854
njn4f9c9342002-04-29 16:03:24 +00001855 VG_(freeCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00001856
1857 if (spill_reqd)
1858 VG_(translations_needing_spill)++;
1859
1860 return c2;
1861
1862# undef VG_NOTHING
1863
1864}
1865
1866
1867/*------------------------------------------------------------*/
1868/*--- New instrumentation machinery. ---*/
1869/*------------------------------------------------------------*/
1870
1871static
1872VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
1873{
1874 switch (sz) {
1875 case 4: return VgT_ImproveOR4_TQ;
1876 case 2: return VgT_ImproveOR2_TQ;
1877 case 1: return VgT_ImproveOR1_TQ;
1878 default: VG_(panic)("get_VgT_ImproveOR_TQ");
1879 }
1880}
1881
1882
1883static
1884VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
1885{
1886 switch (sz) {
1887 case 4: return VgT_ImproveAND4_TQ;
1888 case 2: return VgT_ImproveAND2_TQ;
1889 case 1: return VgT_ImproveAND1_TQ;
1890 default: VG_(panic)("get_VgT_ImproveAND_TQ");
1891 }
1892}
1893
1894
1895static
1896VgTagOp get_VgT_Left ( Int sz )
1897{
1898 switch (sz) {
1899 case 4: return VgT_Left4;
1900 case 2: return VgT_Left2;
1901 case 1: return VgT_Left1;
1902 default: VG_(panic)("get_VgT_Left");
1903 }
1904}
1905
1906
1907static
1908VgTagOp get_VgT_UifU ( Int sz )
1909{
1910 switch (sz) {
1911 case 4: return VgT_UifU4;
1912 case 2: return VgT_UifU2;
1913 case 1: return VgT_UifU1;
1914 case 0: return VgT_UifU0;
1915 default: VG_(panic)("get_VgT_UifU");
1916 }
1917}
1918
1919
1920static
1921VgTagOp get_VgT_DifD ( Int sz )
1922{
1923 switch (sz) {
1924 case 4: return VgT_DifD4;
1925 case 2: return VgT_DifD2;
1926 case 1: return VgT_DifD1;
1927 default: VG_(panic)("get_VgT_DifD");
1928 }
1929}
1930
1931
1932static
1933VgTagOp get_VgT_PCast ( Int szs, Int szd )
1934{
1935 if (szs == 4 && szd == 0) return VgT_PCast40;
1936 if (szs == 2 && szd == 0) return VgT_PCast20;
1937 if (szs == 1 && szd == 0) return VgT_PCast10;
1938 if (szs == 0 && szd == 1) return VgT_PCast01;
1939 if (szs == 0 && szd == 2) return VgT_PCast02;
1940 if (szs == 0 && szd == 4) return VgT_PCast04;
1941 if (szs == 1 && szd == 4) return VgT_PCast14;
1942 if (szs == 1 && szd == 2) return VgT_PCast12;
1943 if (szs == 1 && szd == 1) return VgT_PCast11;
1944 VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
1945 VG_(panic)("get_VgT_PCast");
1946}
1947
1948
1949static
1950VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
1951{
1952 if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
1953 if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
1954
1955 if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
1956 if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
1957
1958 if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
1959 if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
1960
1961 VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
1962 VG_(panic)("get_VgT_Widen");
1963}
1964
1965/* Pessimally cast the spec'd shadow from one size to another. */
1966static
1967void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
1968{
1969 if (szs == 0 && szd == 0)
1970 return;
1971 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1972 NoValue, 0,
1973 Lit16, get_VgT_PCast(szs,szd));
1974}
1975
1976
1977/* Create a signed or unsigned widen of the spec'd shadow from one
1978 size to another. The only allowed size transitions are 1->2, 1->4
1979 and 2->4. */
1980static
1981void create_Widen ( UCodeBlock* cb, Bool signed_widen,
1982 Int szs, Int szd, Int tempreg )
1983{
1984 if (szs == szd) return;
1985 uInstr3(cb, TAG1, 0, TempReg, tempreg,
1986 NoValue, 0,
1987 Lit16, get_VgT_Widen(signed_widen,szs,szd));
1988}
1989
1990
1991/* Get the condition codes into a new shadow, at the given size. */
1992static
1993Int create_GETVF ( UCodeBlock* cb, Int sz )
1994{
1995 Int tt = newShadow(cb);
1996 uInstr1(cb, GETVF, 0, TempReg, tt);
1997 create_PCast(cb, 0, sz, tt);
1998 return tt;
1999}
2000
2001
2002/* Save the condition codes from the spec'd shadow. */
2003static
2004void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
2005{
2006 if (sz == 0) {
2007 uInstr1(cb, PUTVF, 0, TempReg, tempreg);
2008 } else {
2009 Int tt = newShadow(cb);
2010 uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
2011 create_PCast(cb, sz, 0, tt);
2012 uInstr1(cb, PUTVF, 0, TempReg, tt);
2013 }
2014}
2015
2016
2017/* Do Left on the spec'd shadow. */
2018static
2019void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
2020{
2021 uInstr3(cb, TAG1, 0,
2022 TempReg, tempreg,
2023 NoValue, 0,
2024 Lit16, get_VgT_Left(sz));
2025}
2026
2027
2028/* Do UifU on ts and td, putting the result in td. */
2029static
2030void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
2031{
2032 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2033 Lit16, get_VgT_UifU(sz));
2034}
2035
2036
2037/* Do DifD on ts and td, putting the result in td. */
2038static
2039void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
2040{
2041 uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
2042 Lit16, get_VgT_DifD(sz));
2043}
2044
2045
2046/* Do HelpAND on value tval and tag tqqq, putting the result in
2047 tqqq. */
2048static
2049void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2050{
2051 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2052 Lit16, get_VgT_ImproveAND_TQ(sz));
2053}
2054
2055
2056/* Do HelpOR on value tval and tag tqqq, putting the result in
2057 tqqq. */
2058static
2059void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
2060{
2061 uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
2062 Lit16, get_VgT_ImproveOR_TQ(sz));
2063}
2064
2065
2066/* Get the shadow for an operand described by (tag, val). Emit code
2067 to do this and return the identity of the shadow holding the
2068 result. The result tag is always copied into a new shadow, so it
2069 can be modified without trashing the original.*/
2070static
2071Int /* TempReg */ getOperandShadow ( UCodeBlock* cb,
2072 Int sz, Int tag, Int val )
2073{
2074 Int sh;
2075 sh = newShadow(cb);
2076 if (tag == TempReg) {
2077 uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
2078 return sh;
2079 }
2080 if (tag == Literal) {
2081 uInstr1(cb, SETV, sz, TempReg, sh);
2082 return sh;
2083 }
2084 if (tag == ArchReg) {
2085 uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
2086 return sh;
2087 }
2088 VG_(panic)("getOperandShadow");
2089}
2090
2091
2092
2093/* Create and return an instrumented version of cb_in. Free cb_in
2094 before returning. */
2095static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
2096{
2097 UCodeBlock* cb;
2098 Int i, j;
2099 UInstr* u_in;
2100 Int qs, qd, qt, qtt;
njn4f9c9342002-04-29 16:03:24 +00002101 cb = VG_(allocCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00002102 cb->nextTemp = cb_in->nextTemp;
2103
2104 for (i = 0; i < cb_in->used; i++) {
2105 qs = qd = qt = qtt = INVALID_TEMPREG;
2106 u_in = &cb_in->instrs[i];
2107
2108 /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
2109
2110 /* VG_(ppUInstr)(0, u_in); */
2111 switch (u_in->opcode) {
2112
2113 case NOP:
2114 break;
2115
2116 case INCEIP:
njn4f9c9342002-04-29 16:03:24 +00002117 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002118 break;
2119
sewardj97ced732002-03-25 00:07:36 +00002120 /* Loads and stores. Test the V bits for the address. 24
2121 Mar 02: since the address is A-checked anyway, there's not
2122 really much point in doing the V-check too, unless you
2123 think that you might use addresses which are undefined but
2124 still addressible. Hence the optionalisation of the V
2125 check.
2126
sewardjde4a1d02002-03-22 01:27:54 +00002127 The LOADV/STOREV does an addressibility check for the
2128 address. */
sewardj97ced732002-03-25 00:07:36 +00002129
sewardjde4a1d02002-03-22 01:27:54 +00002130 case LOAD:
sewardj97ced732002-03-25 00:07:36 +00002131 if (VG_(clo_check_addrVs)) {
2132 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2133 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2134 }
sewardjde4a1d02002-03-22 01:27:54 +00002135 uInstr2(cb, LOADV, u_in->size,
2136 TempReg, u_in->val1,
2137 TempReg, SHADOW(u_in->val2));
njn4f9c9342002-04-29 16:03:24 +00002138 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002139 break;
2140 case STORE:
sewardj97ced732002-03-25 00:07:36 +00002141 if (VG_(clo_check_addrVs)) {
2142 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2143 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
2144 }
sewardjde4a1d02002-03-22 01:27:54 +00002145 uInstr2(cb, STOREV, u_in->size,
2146 TempReg, SHADOW(u_in->val1),
2147 TempReg, u_in->val2);
njn4f9c9342002-04-29 16:03:24 +00002148 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002149 break;
2150
2151 /* Moving stuff around. Make the V bits follow accordingly,
2152 but don't do anything else. */
2153
2154 case GET:
2155 uInstr2(cb, GETV, u_in->size,
2156 ArchReg, u_in->val1,
2157 TempReg, SHADOW(u_in->val2));
njn4f9c9342002-04-29 16:03:24 +00002158 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002159 break;
2160 case PUT:
2161 uInstr2(cb, PUTV, u_in->size,
2162 TempReg, SHADOW(u_in->val1),
2163 ArchReg, u_in->val2);
njn4f9c9342002-04-29 16:03:24 +00002164 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002165 break;
2166
2167 case GETF:
2168 /* This is not the smartest way to do it, but should work. */
2169 qd = create_GETVF(cb, u_in->size);
2170 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002171 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002172 break;
2173 case PUTF:
2174 create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002175 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002176 break;
2177
2178 case MOV:
2179 switch (u_in->tag1) {
2180 case TempReg:
2181 uInstr2(cb, MOV, 4,
2182 TempReg, SHADOW(u_in->val1),
2183 TempReg, SHADOW(u_in->val2));
2184 break;
2185 case Literal:
2186 uInstr1(cb, SETV, u_in->size,
2187 TempReg, SHADOW(u_in->val2));
2188 break;
2189 default:
2190 VG_(panic)("vg_instrument: MOV");
2191 }
njn4f9c9342002-04-29 16:03:24 +00002192 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002193 break;
2194
2195 /* Special case of add, where one of the operands is a literal.
2196 lea1(t) = t + some literal.
2197 Therefore: lea1#(qa) = left(qa)
2198 */
2199 case LEA1:
2200 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2201 qs = SHADOW(u_in->val1);
2202 qd = SHADOW(u_in->val2);
2203 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
2204 create_Left(cb, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002205 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002206 break;
2207
2208 /* Another form of add.
2209 lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
2210 and is 0,1,2 or 3.
2211 lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
2212 Note, subtly, that the shift puts zeroes at the bottom of qt,
2213 meaning Valid, since the corresponding shift of tt puts
2214 zeroes at the bottom of tb.
2215 */
2216 case LEA2: {
2217 Int shift;
2218 vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
2219 switch (u_in->extra4b) {
2220 case 1: shift = 0; break;
2221 case 2: shift = 1; break;
2222 case 4: shift = 2; break;
2223 case 8: shift = 3; break;
2224 default: VG_(panic)( "vg_instrument(LEA2)" );
2225 }
2226 qs = SHADOW(u_in->val1);
2227 qt = SHADOW(u_in->val2);
2228 qd = SHADOW(u_in->val3);
2229 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
2230 if (shift > 0) {
2231 uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
2232 uLiteral(cb, shift);
2233 }
2234 create_UifU(cb, 4, qs, qd);
2235 create_Left(cb, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002236 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002237 break;
2238 }
2239
2240 /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
2241 case INC: case DEC:
2242 qd = SHADOW(u_in->val1);
2243 create_Left(cb, u_in->size, qd);
2244 if (u_in->flags_w != FlagsEmpty)
2245 create_PUTVF(cb, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002246 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002247 break;
2248
2249 /* This is a HACK (approximation :-) */
2250 /* rcl#/rcr#(qs,qd)
2251 = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
2252 eflags# = q0
2253 qd =pcast-0-sz(q0)
2254 Ie, cast everything down to a single bit, then back up.
2255 This assumes that any bad bits infect the whole word and
2256 the eflags.
2257 */
2258 case RCL: case RCR:
2259 vg_assert(u_in->flags_r != FlagsEmpty);
2260 /* The following assertion looks like it makes sense, but is
2261 actually wrong. Consider this:
2262 rcll %eax
2263 imull %eax, %eax
2264 The rcll writes O and C but so does the imull, so the O and C
2265 write of the rcll is annulled by the prior improvement pass.
2266 Noticed by Kevin Ryde <user42@zip.com.au>
2267 */
2268 /* vg_assert(u_in->flags_w != FlagsEmpty); */
2269 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2270 /* We can safely modify qs; cast it to 0-size. */
2271 create_PCast(cb, u_in->size, 0, qs);
2272 qd = SHADOW(u_in->val2);
2273 create_PCast(cb, u_in->size, 0, qd);
2274 /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
2275 create_UifU(cb, 0, qs, qd);
2276 /* qs is now free; reuse it for the flag definedness. */
2277 qs = create_GETVF(cb, 0);
2278 create_UifU(cb, 0, qs, qd);
2279 create_PUTVF(cb, 0, qd);
2280 create_PCast(cb, 0, u_in->size, qd);
njn4f9c9342002-04-29 16:03:24 +00002281 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002282 break;
2283
2284 /* for OP in shl shr sar rol ror
2285 (qs is shift count#, qd is value to be OP#d)
2286 OP(ts,td)
2287 OP#(qs,qd)
2288 = pcast-1-sz(qs) `UifU` OP(ts,qd)
2289 So we apply OP to the tag bits too, and then UifU with
2290 the shift count# to take account of the possibility of it
2291 being undefined.
2292
2293 A bit subtle:
2294 ROL/ROR rearrange the tag bits as per the value bits.
2295 SHL/SHR shifts zeroes into the value, and corresponding
2296 zeroes indicating Definedness into the tag.
2297 SAR copies the top bit of the value downwards, and therefore
2298 SAR also copies the definedness of the top bit too.
2299 So in all five cases, we just apply the same op to the tag
2300 bits as is applied to the value bits. Neat!
2301 */
2302 case SHL:
2303 case SHR: case SAR:
2304 case ROL: case ROR: {
2305 Int t_amount = INVALID_TEMPREG;
2306 vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
2307 vg_assert(u_in->tag2 == TempReg);
2308 qd = SHADOW(u_in->val2);
2309
2310 /* Make qs hold shift-count# and make
2311 t_amount be a TempReg holding the shift count. */
2312 if (u_in->tag1 == Literal) {
2313 t_amount = newTemp(cb);
2314 uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
2315 uLiteral(cb, u_in->lit32);
2316 qs = SHADOW(t_amount);
2317 uInstr1(cb, SETV, 1, TempReg, qs);
2318 } else {
2319 t_amount = u_in->val1;
2320 qs = SHADOW(u_in->val1);
2321 }
2322
2323 uInstr2(cb, u_in->opcode,
2324 u_in->size,
2325 TempReg, t_amount,
2326 TempReg, qd);
2327 qt = newShadow(cb);
2328 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2329 create_PCast(cb, 1, u_in->size, qt);
2330 create_UifU(cb, u_in->size, qt, qd);
njn4f9c9342002-04-29 16:03:24 +00002331 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002332 break;
2333 }
2334
2335 /* One simple tag operation. */
2336 case WIDEN:
2337 vg_assert(u_in->tag1 == TempReg);
2338 create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
2339 SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002340 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002341 break;
2342
2343 /* not#(x) = x (since bitwise independent) */
2344 case NOT:
2345 vg_assert(u_in->tag1 == TempReg);
njn4f9c9342002-04-29 16:03:24 +00002346 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002347 break;
2348
2349 /* neg#(x) = left(x) (derivable from case for SUB) */
2350 case NEG:
2351 vg_assert(u_in->tag1 == TempReg);
2352 create_Left(cb, u_in->size, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002353 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002354 break;
2355
2356 /* bswap#(x) = bswap(x) */
2357 case BSWAP:
2358 vg_assert(u_in->tag1 == TempReg);
2359 vg_assert(u_in->size == 4);
2360 qd = SHADOW(u_in->val1);
2361 uInstr1(cb, BSWAP, 4, TempReg, qd);
njn4f9c9342002-04-29 16:03:24 +00002362 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002363 break;
2364
2365 /* cc2val#(qd) = pcast-0-to-size(eflags#) */
2366 case CC2VAL:
2367 vg_assert(u_in->tag1 == TempReg);
2368 vg_assert(u_in->flags_r != FlagsEmpty);
2369 qt = create_GETVF(cb, u_in->size);
2370 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002371 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002372 break;
2373
2374 /* cmov#(qs,qd) = cmov(qs,qd)
2375 That is, do the cmov of tags using the same flags as for
2376 the data (obviously). However, first do a test on the
2377 validity of the flags.
2378 */
2379 case CMOV:
2380 vg_assert(u_in->size == 4);
2381 vg_assert(u_in->tag1 == TempReg);
2382 vg_assert(u_in->tag2 == TempReg);
2383 vg_assert(u_in->flags_r != FlagsEmpty);
2384 vg_assert(u_in->flags_w == FlagsEmpty);
2385 qs = SHADOW(u_in->val1);
2386 qd = SHADOW(u_in->val2);
2387 qt = create_GETVF(cb, 0);
2388 uInstr1(cb, TESTV, 0, TempReg, qt);
2389 /* qt should never be referred to again. Nevertheless
2390 ... */
2391 uInstr1(cb, SETV, 0, TempReg, qt);
2392
2393 uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
2394 LAST_UINSTR(cb).cond = u_in->cond;
2395 LAST_UINSTR(cb).flags_r = u_in->flags_r;
2396
njn4f9c9342002-04-29 16:03:24 +00002397 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002398 break;
2399
2400 /* add#/sub#(qs,qd)
2401 = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
2402 = left(qs) `UifU` left(qd)
2403 = left(qs `UifU` qd)
2404 adc#/sbb#(qs,qd)
2405 = left(qs `UifU` qd) `UifU` pcast(eflags#)
2406 Second arg (dest) is TempReg.
2407 First arg (src) is Literal or TempReg or ArchReg.
2408 */
2409 case ADD: case SUB:
2410 case ADC: case SBB:
2411 qd = SHADOW(u_in->val2);
2412 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2413 create_UifU(cb, u_in->size, qs, qd);
2414 create_Left(cb, u_in->size, qd);
2415 if (u_in->opcode == ADC || u_in->opcode == SBB) {
2416 vg_assert(u_in->flags_r != FlagsEmpty);
2417 qt = create_GETVF(cb, u_in->size);
2418 create_UifU(cb, u_in->size, qt, qd);
2419 }
2420 if (u_in->flags_w != FlagsEmpty) {
2421 create_PUTVF(cb, u_in->size, qd);
2422 }
njn4f9c9342002-04-29 16:03:24 +00002423 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002424 break;
2425
2426 /* xor#(qs,qd) = qs `UifU` qd */
2427 case XOR:
2428 qd = SHADOW(u_in->val2);
2429 qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
2430 create_UifU(cb, u_in->size, qs, qd);
2431 if (u_in->flags_w != FlagsEmpty) {
2432 create_PUTVF(cb, u_in->size, qd);
2433 }
njn4f9c9342002-04-29 16:03:24 +00002434 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002435 break;
2436
2437 /* and#/or#(qs,qd)
2438 = (qs `UifU` qd) `DifD` improve(vs,qs)
2439 `DifD` improve(vd,qd)
2440 where improve is the relevant one of
2441 Improve{AND,OR}_TQ
2442 Use the following steps, with qt as a temp:
2443 qt = improve(vd,qd)
2444 qd = qs `UifU` qd
2445 qd = qt `DifD` qd
2446 qt = improve(vs,qs)
2447 qd = qt `DifD` qd
2448 */
2449 case AND: case OR:
2450 vg_assert(u_in->tag1 == TempReg);
2451 vg_assert(u_in->tag2 == TempReg);
2452 qd = SHADOW(u_in->val2);
2453 qs = SHADOW(u_in->val1);
2454 qt = newShadow(cb);
2455
2456 /* qt = improve(vd,qd) */
2457 uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
2458 if (u_in->opcode == AND)
2459 create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
2460 else
2461 create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
2462 /* qd = qs `UifU` qd */
2463 create_UifU(cb, u_in->size, qs, qd);
2464 /* qd = qt `DifD` qd */
2465 create_DifD(cb, u_in->size, qt, qd);
2466 /* qt = improve(vs,qs) */
2467 uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
2468 if (u_in->opcode == AND)
2469 create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
2470 else
2471 create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
2472 /* qd = qt `DifD` qd */
2473 create_DifD(cb, u_in->size, qt, qd);
2474 /* So, finally qd is the result tag. */
2475 if (u_in->flags_w != FlagsEmpty) {
2476 create_PUTVF(cb, u_in->size, qd);
2477 }
njn4f9c9342002-04-29 16:03:24 +00002478 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002479 break;
2480
2481 /* Machinery to do with supporting CALLM. Copy the start and
2482 end markers only to make the result easier to read
2483 (debug); they generate no code and have no effect.
2484 */
2485 case CALLM_S: case CALLM_E:
njn4f9c9342002-04-29 16:03:24 +00002486 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002487 break;
2488
2489 /* Copy PUSH and POP verbatim. Arg/result absval
2490 calculations are done when the associated CALL is
2491 processed. CLEAR has no effect on absval calculations but
2492 needs to be copied.
2493 */
2494 case PUSH: case POP: case CLEAR:
njn4f9c9342002-04-29 16:03:24 +00002495 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002496 break;
2497
2498 /* In short:
2499 callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
2500 We have to decide on a size to do the computation at,
2501 although the choice doesn't affect correctness. We will
2502 do a pcast to the final size anyway, so the only important
2503 factor is to choose a size which minimises the total
2504 number of casts needed. Valgrind: just use size 0,
2505 regardless. It may not be very good for performance
2506 but does simplify matters, mainly by reducing the number
2507 of different pessimising casts which have to be implemented.
2508 */
2509 case CALLM: {
2510 UInstr* uu;
2511 Bool res_used;
2512
2513 /* Now generate the code. Get the final result absval
2514 into qt. */
2515 qt = newShadow(cb);
2516 qtt = newShadow(cb);
2517 uInstr1(cb, SETV, 0, TempReg, qt);
2518 for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
2519 uu = & cb_in->instrs[j];
2520 if (uu->opcode != PUSH) continue;
2521 /* cast via a temporary */
2522 uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
2523 TempReg, qtt);
2524 create_PCast(cb, uu->size, 0, qtt);
2525 create_UifU(cb, 0, qtt, qt);
2526 }
2527 /* Remembering also that flags read count as inputs. */
2528 if (u_in->flags_r != FlagsEmpty) {
2529 qtt = create_GETVF(cb, 0);
2530 create_UifU(cb, 0, qtt, qt);
2531 }
2532
2533 /* qt now holds the result tag. If any results from the
2534 call are used, either by fetching with POP or
2535 implicitly by writing the flags, we copy the result
2536 absval to the relevant location. If not used, the call
2537 must have been for its side effects, so we test qt here
2538 and now. Note that this assumes that all values
2539 removed by POP continue to be live. So dead args
2540 *must* be removed with CLEAR, not by POPping them into
2541 a dummy tempreg.
2542 */
2543 res_used = False;
2544 for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
2545 uu = & cb_in->instrs[j];
2546 if (uu->opcode != POP) continue;
2547 /* Cast via a temp. */
2548 uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
2549 create_PCast(cb, 0, uu->size, qtt);
2550 uInstr2(cb, MOV, 4, TempReg, qtt,
2551 TempReg, SHADOW(uu->val1));
2552 res_used = True;
2553 }
2554 if (u_in->flags_w != FlagsEmpty) {
2555 create_PUTVF(cb, 0, qt);
2556 res_used = True;
2557 }
2558 if (!res_used) {
2559 uInstr1(cb, TESTV, 0, TempReg, qt);
2560 /* qt should never be referred to again. Nevertheless
2561 ... */
2562 uInstr1(cb, SETV, 0, TempReg, qt);
2563 }
njn4f9c9342002-04-29 16:03:24 +00002564 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002565 break;
2566 }
2567 /* Whew ... */
2568
2569 case JMP:
2570 if (u_in->tag1 == TempReg) {
2571 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2572 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
2573 } else {
2574 vg_assert(u_in->tag1 == Literal);
2575 }
2576 if (u_in->cond != CondAlways) {
2577 vg_assert(u_in->flags_r != FlagsEmpty);
2578 qt = create_GETVF(cb, 0);
2579 uInstr1(cb, TESTV, 0, TempReg, qt);
2580 /* qt should never be referred to again. Nevertheless
2581 ... */
2582 uInstr1(cb, SETV, 0, TempReg, qt);
2583 }
njn4f9c9342002-04-29 16:03:24 +00002584 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002585 break;
2586
2587 case JIFZ:
2588 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
2589 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
njn4f9c9342002-04-29 16:03:24 +00002590 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002591 break;
2592
2593 /* Emit a check on the address used. For FPU_R, the value
2594 loaded into the FPU is checked at the time it is read from
2595 memory (see synth_fpu_mem_check_actions). */
2596 case FPU_R: case FPU_W:
2597 vg_assert(u_in->tag2 == TempReg);
2598 uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
2599 uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
njn4f9c9342002-04-29 16:03:24 +00002600 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002601 break;
2602
2603 /* For FPU insns not referencing memory, just copy thru. */
2604 case FPU:
njn4f9c9342002-04-29 16:03:24 +00002605 VG_(copyUInstr)(cb, u_in);
sewardjde4a1d02002-03-22 01:27:54 +00002606 break;
2607
2608 default:
2609 VG_(ppUInstr)(0, u_in);
2610 VG_(panic)( "vg_instrument: unhandled case");
2611
2612 } /* end of switch (u_in->opcode) */
2613
2614 } /* end of for loop */
2615
njn4f9c9342002-04-29 16:03:24 +00002616 VG_(freeCodeBlock)(cb_in);
sewardjde4a1d02002-03-22 01:27:54 +00002617 return cb;
2618}
2619
2620/*------------------------------------------------------------*/
2621/*--- Clean up mem check instrumentation. ---*/
2622/*------------------------------------------------------------*/
2623
2624#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
2625#define VGC_UNDEF ((UChar)100)
2626#define VGC_VALUE ((UChar)101)
2627
2628#define NOP_no_msg(uu) \
2629 do { uu->opcode = NOP; } while (False)
2630
2631#define NOP_tag1_op(uu) \
2632 do { uu->opcode = NOP; \
2633 if (VG_(disassemble)) \
2634 VG_(printf)("at %d: delete %s due to defd arg\n", \
2635 i, VG_(nameOfTagOp(u->val3))); \
2636 } while (False)
2637
2638#define SETV_tag1_op(uu,newsz) \
2639 do { uu->opcode = SETV; \
2640 uu->size = newsz; \
2641 uu->tag2 = uu->tag3 = NoValue; \
2642 if (VG_(disassemble)) \
2643 VG_(printf)("at %d: convert %s to SETV%d " \
2644 "due to defd arg\n", \
2645 i, VG_(nameOfTagOp(u->val3)), newsz); \
2646 } while (False)
2647
2648
2649
2650/* Run backwards and delete SETVs on shadow temps for which the next
2651 action is a write. Needs an env saying whether or not the next
2652 action is a write. The supplied UCodeBlock is destructively
2653 modified.
2654*/
2655static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
2656{
2657 Bool* next_is_write;
2658 Int i, j, k, n_temps;
2659 UInstr* u;
2660 TempUse tempUse[3];
2661
2662 n_temps = cb->nextTemp;
2663 if (n_temps == 0) return;
2664
2665 next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
2666
2667 for (i = 0; i < n_temps; i++) next_is_write[i] = True;
2668
2669 for (i = cb->used-1; i >= 0; i--) {
2670 u = &cb->instrs[i];
2671
sewardj97ced732002-03-25 00:07:36 +00002672 /* If we're not checking address V bits, there will be a lot of
2673 GETVs, TAG1s and TAG2s calculating values which are never
2674 used. These first three cases get rid of them. */
2675
2676 if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
2677 && next_is_write[u->val2]
2678 && !VG_(clo_check_addrVs)) {
2679 u->opcode = NOP;
2680 u->size = 0;
2681 if (VG_(disassemble))
2682 VG_(printf)("at %d: delete GETV\n", i);
2683 } else
2684
2685 if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
2686 && next_is_write[u->val1]
2687 && !VG_(clo_check_addrVs)) {
2688 u->opcode = NOP;
2689 u->size = 0;
2690 if (VG_(disassemble))
2691 VG_(printf)("at %d: delete TAG1\n", i);
2692 } else
2693
2694 if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
2695 && next_is_write[u->val2]
2696 && !VG_(clo_check_addrVs)) {
2697 u->opcode = NOP;
2698 u->size = 0;
2699 if (VG_(disassemble))
2700 VG_(printf)("at %d: delete TAG2\n", i);
2701 } else
2702
2703 /* We do the rest of these regardless of whether or not
2704 addresses are V-checked. */
2705
sewardjde4a1d02002-03-22 01:27:54 +00002706 if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
2707 && next_is_write[u->val2]) {
2708 /* This MOV is pointless because the target is dead at this
2709 point. Delete it. */
2710 u->opcode = NOP;
2711 u->size = 0;
2712 if (VG_(disassemble))
2713 VG_(printf)("at %d: delete MOV\n", i);
2714 } else
2715
2716 if (u->opcode == SETV) {
2717 if (u->tag1 == TempReg) {
2718 vg_assert(VGC_IS_SHADOW(u->val1));
2719 if (next_is_write[u->val1]) {
2720 /* This write is pointless, so annul it. */
2721 u->opcode = NOP;
2722 u->size = 0;
2723 if (VG_(disassemble))
2724 VG_(printf)("at %d: delete SETV\n", i);
2725 } else {
2726 /* This write has a purpose; don't annul it, but do
2727 notice that we did it. */
2728 next_is_write[u->val1] = True;
2729 }
2730
2731 }
2732
2733 } else {
2734 /* Find out what this insn does to the temps. */
2735 k = getTempUsage(u, &tempUse[0]);
2736 vg_assert(k <= 3);
2737 for (j = k-1; j >= 0; j--) {
2738 next_is_write[ tempUse[j].tempNo ]
2739 = tempUse[j].isWrite;
2740 }
2741 }
2742
2743 }
2744
2745 VG_(jitfree)(next_is_write);
2746}
2747
2748
2749/* Run forwards, propagating and using the is-completely-defined
2750 property. This removes a lot of redundant tag-munging code.
2751 Unfortunately it requires intimate knowledge of how each uinstr and
2752 tagop modifies its arguments. This duplicates knowledge of uinstr
2753 tempreg uses embodied in getTempUsage(), which is unfortunate.
2754 The supplied UCodeBlock* is modified in-place.
2755
2756 For each value temp, def[] should hold VGC_VALUE.
2757
2758 For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
2759 definitely known to be fully defined at that size. In all other
2760 circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
2761 undefined. In cases of doubt, VGC_UNDEF is always safe.
2762*/
2763static void vg_propagate_definedness ( UCodeBlock* cb )
2764{
2765 UChar* def;
2766 Int i, j, k, t, n_temps;
2767 UInstr* u;
2768 TempUse tempUse[3];
2769
2770 n_temps = cb->nextTemp;
2771 if (n_temps == 0) return;
2772
2773 def = VG_(jitmalloc)(n_temps * sizeof(UChar));
2774 for (i = 0; i < n_temps; i++)
2775 def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
2776
2777 /* Run forwards, detecting and using the all-defined property. */
2778
2779 for (i = 0; i < cb->used; i++) {
2780 u = &cb->instrs[i];
2781 switch (u->opcode) {
2782
2783 /* Tag-handling uinstrs. */
2784
2785 /* Deal with these quickly. */
2786 case NOP:
2787 case INCEIP:
2788 break;
2789
2790 /* Make a tag defined. */
2791 case SETV:
2792 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2793 def[u->val1] = u->size;
2794 break;
2795
2796 /* Check definedness of a tag. */
2797 case TESTV:
2798 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2799 if (def[u->val1] <= 4) {
2800 vg_assert(def[u->val1] == u->size);
2801 NOP_no_msg(u);
2802 if (VG_(disassemble))
2803 VG_(printf)("at %d: delete TESTV on defd arg\n", i);
2804 }
2805 break;
2806
2807 /* Applies to both values and tags. Propagate Definedness
2808 property through copies. Note that this isn't optional;
2809 we *have* to do this to keep def[] correct. */
2810 case MOV:
2811 vg_assert(u->tag2 == TempReg);
2812 if (u->tag1 == TempReg) {
2813 if (VGC_IS_SHADOW(u->val1)) {
2814 vg_assert(VGC_IS_SHADOW(u->val2));
2815 def[u->val2] = def[u->val1];
2816 }
2817 }
2818 break;
2819
2820 case PUTV:
2821 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2822 if (def[u->val1] <= 4) {
2823 vg_assert(def[u->val1] == u->size);
2824 u->tag1 = Literal;
2825 u->val1 = 0;
2826 switch (u->size) {
2827 case 4: u->lit32 = 0x00000000; break;
2828 case 2: u->lit32 = 0xFFFF0000; break;
2829 case 1: u->lit32 = 0xFFFFFF00; break;
2830 default: VG_(panic)("vg_cleanup(PUTV)");
2831 }
2832 if (VG_(disassemble))
2833 VG_(printf)(
2834 "at %d: propagate definedness into PUTV\n", i);
2835 }
2836 break;
2837
2838 case STOREV:
2839 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2840 if (def[u->val1] <= 4) {
2841 vg_assert(def[u->val1] == u->size);
2842 u->tag1 = Literal;
2843 u->val1 = 0;
2844 switch (u->size) {
2845 case 4: u->lit32 = 0x00000000; break;
2846 case 2: u->lit32 = 0xFFFF0000; break;
2847 case 1: u->lit32 = 0xFFFFFF00; break;
2848 default: VG_(panic)("vg_cleanup(STOREV)");
2849 }
2850 if (VG_(disassemble))
2851 VG_(printf)(
2852 "at %d: propagate definedness into STandV\n", i);
2853 }
2854 break;
2855
2856 /* Nothing interesting we can do with this, I think. */
2857 case PUTVF:
2858 break;
2859
2860 /* Tag handling operations. */
2861 case TAG2:
2862 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2863 vg_assert(u->tag3 == Lit16);
2864 /* Ultra-paranoid "type" checking. */
2865 switch (u->val3) {
2866 case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
2867 case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
2868 case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
2869 vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
2870 break;
2871 default:
2872 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2873 break;
2874 }
2875 switch (u->val3) {
2876 Int sz;
2877 case VgT_UifU4:
2878 sz = 4; goto do_UifU;
2879 case VgT_UifU2:
2880 sz = 2; goto do_UifU;
2881 case VgT_UifU1:
2882 sz = 1; goto do_UifU;
2883 case VgT_UifU0:
2884 sz = 0; goto do_UifU;
2885 do_UifU:
2886 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2887 vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
2888 if (def[u->val1] <= 4) {
2889 /* UifU. The first arg is defined, so result is
2890 simply second arg. Delete this operation. */
2891 vg_assert(def[u->val1] == sz);
2892 NOP_no_msg(u);
2893 if (VG_(disassemble))
2894 VG_(printf)(
2895 "at %d: delete UifU%d due to defd arg1\n",
2896 i, sz);
2897 }
2898 else
2899 if (def[u->val2] <= 4) {
2900 /* UifU. The second arg is defined, so result is
2901 simply first arg. Copy to second. */
2902 vg_assert(def[u->val2] == sz);
2903 u->opcode = MOV;
2904 u->size = 4;
2905 u->tag3 = NoValue;
2906 def[u->val2] = def[u->val1];
2907 if (VG_(disassemble))
2908 VG_(printf)(
2909 "at %d: change UifU%d to MOV due to defd"
2910 " arg2\n",
2911 i, sz);
2912 }
2913 break;
2914 case VgT_ImproveAND4_TQ:
2915 sz = 4; goto do_ImproveAND;
2916 case VgT_ImproveAND1_TQ:
2917 sz = 1; goto do_ImproveAND;
2918 do_ImproveAND:
2919 /* Implements Q = T OR Q. So if Q is entirely defined,
2920 ie all 0s, we get MOV T, Q. */
2921 if (def[u->val2] <= 4) {
2922 vg_assert(def[u->val2] == sz);
2923 u->size = 4; /* Regardless of sz */
2924 u->opcode = MOV;
2925 u->tag3 = NoValue;
2926 def[u->val2] = VGC_UNDEF;
2927 if (VG_(disassemble))
2928 VG_(printf)(
2929 "at %d: change ImproveAND%d_TQ to MOV due "
2930 "to defd arg2\n",
2931 i, sz);
2932 }
2933 break;
2934 default:
2935 goto unhandled;
2936 }
2937 break;
2938
2939 case TAG1:
2940 vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
2941 if (def[u->val1] > 4) break;
2942 /* We now know that the arg to the op is entirely defined.
2943 If the op changes the size of the arg, we must replace
2944 it with a SETV at the new size. If it doesn't change
2945 the size, we can delete it completely. */
2946 switch (u->val3) {
2947 /* Maintain the same size ... */
2948 case VgT_Left4:
2949 vg_assert(def[u->val1] == 4);
2950 NOP_tag1_op(u);
2951 break;
2952 case VgT_PCast11:
2953 vg_assert(def[u->val1] == 1);
2954 NOP_tag1_op(u);
2955 break;
2956 /* Change size ... */
2957 case VgT_PCast40:
2958 vg_assert(def[u->val1] == 4);
2959 SETV_tag1_op(u,0);
2960 def[u->val1] = 0;
2961 break;
2962 case VgT_PCast14:
2963 vg_assert(def[u->val1] == 1);
2964 SETV_tag1_op(u,4);
2965 def[u->val1] = 4;
2966 break;
2967 case VgT_PCast12:
2968 vg_assert(def[u->val1] == 1);
2969 SETV_tag1_op(u,2);
2970 def[u->val1] = 2;
2971 break;
2972 case VgT_PCast10:
2973 vg_assert(def[u->val1] == 1);
2974 SETV_tag1_op(u,0);
2975 def[u->val1] = 0;
2976 break;
2977 case VgT_PCast02:
2978 vg_assert(def[u->val1] == 0);
2979 SETV_tag1_op(u,2);
2980 def[u->val1] = 2;
2981 break;
2982 default:
2983 goto unhandled;
2984 }
2985 if (VG_(disassemble))
2986 VG_(printf)(
2987 "at %d: delete TAG1 %s due to defd arg\n",
2988 i, VG_(nameOfTagOp(u->val3)));
2989 break;
2990
2991 default:
2992 unhandled:
2993 /* We don't know how to handle this uinstr. Be safe, and
2994 set to VGC_VALUE or VGC_UNDEF all temps written by it. */
2995 k = getTempUsage(u, &tempUse[0]);
2996 vg_assert(k <= 3);
2997 for (j = 0; j < k; j++) {
2998 t = tempUse[j].tempNo;
2999 vg_assert(t >= 0 && t < n_temps);
3000 if (!tempUse[j].isWrite) {
3001 /* t is read; ignore it. */
3002 if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
3003 VG_(printf)("ignoring def %d at %s %s\n",
3004 def[t],
3005 VG_(nameUOpcode)(True, u->opcode),
3006 (u->opcode == TAG1 || u->opcode == TAG2)
3007 ? VG_(nameOfTagOp)(u->val3)
3008 : (Char*)"");
3009 } else {
3010 /* t is written; better nullify it. */
3011 def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
3012 }
3013 }
3014 }
3015 }
3016
3017 VG_(jitfree)(def);
3018}
3019
3020
3021/* Top level post-instrumentation cleanup function. */
3022static void vg_cleanup ( UCodeBlock* cb )
3023{
3024 vg_propagate_definedness ( cb );
3025 vg_delete_redundant_SETVs ( cb );
3026}
3027
3028
3029/*------------------------------------------------------------*/
3030/*--- Main entry point for the JITter. ---*/
3031/*------------------------------------------------------------*/
3032
3033/* Translate the basic block beginning at orig_addr, placing the
3034 translation in a vg_malloc'd block, the address and size of which
3035 are returned in trans_addr and trans_size. Length of the original
3036 block is also returned in orig_size. If the latter three are NULL,
3037 this call is being done for debugging purposes, in which case (a)
3038 throw away the translation once it is made, and (b) produce a load
3039 of debugging output.
3040*/
sewardj1e8cdc92002-04-18 11:37:52 +00003041void VG_(translate) ( ThreadState* tst,
3042 /* Identity of thread needing this block */
3043 Addr orig_addr,
sewardjde4a1d02002-03-22 01:27:54 +00003044 UInt* orig_size,
3045 Addr* trans_addr,
3046 UInt* trans_size )
3047{
3048 Int n_disassembled_bytes, final_code_size;
3049 Bool debugging_translation;
3050 UChar* final_code;
3051 UCodeBlock* cb;
3052
3053 VGP_PUSHCC(VgpTranslate);
3054 debugging_translation
3055 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
3056
3057 dis = True;
3058 dis = debugging_translation;
3059
3060 /* Check if we're being asked to jump to a silly address, and if so
3061 record an error message before potentially crashing the entire
3062 system. */
3063 if (VG_(clo_instrument) && !debugging_translation && !dis) {
3064 Addr bad_addr;
3065 Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
3066 if (!ok) {
sewardj1e8cdc92002-04-18 11:37:52 +00003067 VG_(record_jump_error)(tst, bad_addr);
sewardjde4a1d02002-03-22 01:27:54 +00003068 }
3069 }
3070
3071 /* if (VG_(overall_in_count) >= 4800) dis=True; */
3072 if (VG_(disassemble))
3073 VG_(printf)("\n");
3074 if (0 || dis
3075 || (VG_(overall_in_count) > 0 &&
3076 (VG_(overall_in_count) % 1000 == 0))) {
3077 if (0&& (VG_(clo_verbosity) > 1 || dis))
3078 VG_(message)(Vg_UserMsg,
3079 "trans# %d, bb# %lu, in %d, out %d",
3080 VG_(overall_in_count),
3081 VG_(bbs_done),
3082 VG_(overall_in_osize), VG_(overall_in_tsize),
3083 orig_addr );
3084 }
njn4f9c9342002-04-29 16:03:24 +00003085 cb = VG_(allocCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00003086
3087 /* Disassemble this basic block into cb. */
sewardj671ff542002-05-07 09:25:30 +00003088 /* VGP_PUSHCC(VgpToUCode); */
sewardjde4a1d02002-03-22 01:27:54 +00003089 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
sewardj671ff542002-05-07 09:25:30 +00003090 /* VGP_POPCC; */
sewardjde4a1d02002-03-22 01:27:54 +00003091 /* dis=True; */
3092 /* if (0&& VG_(translations_done) < 617) */
3093 /* dis=False; */
3094 /* Try and improve the code a bit. */
3095 if (VG_(clo_optimise)) {
sewardj671ff542002-05-07 09:25:30 +00003096 /* VGP_PUSHCC(VgpImprove); */
sewardjde4a1d02002-03-22 01:27:54 +00003097 vg_improve ( cb );
3098 if (VG_(disassemble))
3099 VG_(ppUCodeBlock) ( cb, "Improved code:" );
sewardj671ff542002-05-07 09:25:30 +00003100 /* VGP_POPCC; */
sewardjde4a1d02002-03-22 01:27:54 +00003101 }
3102 /* dis=False; */
3103 /* Add instrumentation code. */
3104 if (VG_(clo_instrument)) {
sewardj671ff542002-05-07 09:25:30 +00003105 /* VGP_PUSHCC(VgpInstrument); */
sewardjde4a1d02002-03-22 01:27:54 +00003106 cb = vg_instrument(cb);
sewardj671ff542002-05-07 09:25:30 +00003107 /* VGP_POPCC; */
sewardjde4a1d02002-03-22 01:27:54 +00003108 if (VG_(disassemble))
3109 VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
3110 if (VG_(clo_cleanup)) {
sewardj671ff542002-05-07 09:25:30 +00003111 /* VGP_PUSHCC(VgpCleanup); */
sewardjde4a1d02002-03-22 01:27:54 +00003112 vg_cleanup(cb);
sewardj671ff542002-05-07 09:25:30 +00003113 /* VGP_POPCC; */
sewardjde4a1d02002-03-22 01:27:54 +00003114 if (VG_(disassemble))
3115 VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
3116 }
3117 }
3118
njn4f9c9342002-04-29 16:03:24 +00003119 //VG_(disassemble) = True;
3120
3121 /* Add cache simulation code. */
3122 if (VG_(clo_cachesim)) {
sewardj671ff542002-05-07 09:25:30 +00003123 /* VGP_PUSHCC(VgpCacheInstrument); */
njn4f9c9342002-04-29 16:03:24 +00003124 cb = VG_(cachesim_instrument)(cb, orig_addr);
sewardj671ff542002-05-07 09:25:30 +00003125 /* VGP_POPCC; */
njn4f9c9342002-04-29 16:03:24 +00003126 if (VG_(disassemble))
3127 VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
3128 }
3129
3130 //VG_(disassemble) = False;
3131
sewardjde4a1d02002-03-22 01:27:54 +00003132 /* Allocate registers. */
sewardj671ff542002-05-07 09:25:30 +00003133 /* VGP_PUSHCC(VgpRegAlloc); */
sewardjde4a1d02002-03-22 01:27:54 +00003134 cb = vg_do_register_allocation ( cb );
sewardj671ff542002-05-07 09:25:30 +00003135 /* VGP_POPCC; */
sewardjde4a1d02002-03-22 01:27:54 +00003136 /* dis=False; */
3137 /*
3138 if (VG_(disassemble))
3139 VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
3140 */
3141
sewardj671ff542002-05-07 09:25:30 +00003142 /* VGP_PUSHCC(VgpFromUcode); */
sewardjde4a1d02002-03-22 01:27:54 +00003143 /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
3144 and so must be VG_(jitfree)'d. */
3145 final_code = VG_(emit_code)(cb, &final_code_size );
sewardj671ff542002-05-07 09:25:30 +00003146 /* VGP_POPCC; */
njn4f9c9342002-04-29 16:03:24 +00003147 VG_(freeCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00003148
3149 if (debugging_translation) {
3150 /* Only done for debugging -- throw away final result. */
3151 VG_(jitfree)(final_code);
3152 } else {
3153 /* Doing it for real -- return values to caller. */
3154 *orig_size = n_disassembled_bytes;
3155 *trans_addr = (Addr)final_code;
3156 *trans_size = final_code_size;
3157 }
3158 VGP_POPCC;
3159}
3160
3161/*--------------------------------------------------------------------*/
3162/*--- end vg_translate.c ---*/
3163/*--------------------------------------------------------------------*/