Blame - vg_translate.c - fp2-dev/platform/external/valgrind

blob: d2a8571cda99d5a28bd0c8f70403dbb4711e85e7 [file] [log] [blame]

sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- The JITter proper: register allocation & code improvement ---/
				4	/--- vg_translate.c ---/
				5	/--------------------------------------------------------------------/
				6
				7	/*
				8	This file is part of Valgrind, an x86 protected-mode emulator
				9	designed for debugging and profiling binaries on x86-Unixes.
				10
				11	Copyright (C) 2000-2002 Julian Seward
				12	jseward@acm.org
				13	Julian_Seward@muraroa.demon.co.uk
				14
				15	This program is free software; you can redistribute it and/or
				16	modify it under the terms of the GNU General Public License as
				17	published by the Free Software Foundation; either version 2 of the
				18	License, or (at your option) any later version.
				19
				20	This program is distributed in the hope that it will be useful, but
				21	WITHOUT ANY WARRANTY; without even the implied warranty of
				22	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				23	General Public License for more details.
				24
				25	You should have received a copy of the GNU General Public License
				26	along with this program; if not, write to the Free Software
				27	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				28	02111-1307, USA.
				29
				30	The GNU General Public License is contained in the file LICENSE.
				31	*/
				32
				33	#include "vg_include.h"
				34
				35
				36	/------------------------------------------------------------/
				37	/--- Renamings of frequently-used global functions. ---/
				38	/------------------------------------------------------------/
				39
				40	#define uInstr1 VG_(newUInstr1)
				41	#define uInstr2 VG_(newUInstr2)
				42	#define uInstr3 VG_(newUInstr3)
				43	#define dis VG_(disassemble)
				44	#define nameIReg VG_(nameOfIntReg)
				45	#define nameISize VG_(nameOfIntSize)
				46	#define uLiteral VG_(setLiteralField)
				47	#define newTemp VG_(getNewTemp)
				48	#define newShadow VG_(getNewShadow)
				49
				50
				51	/------------------------------------------------------------/
				52	/--- Memory management for the translater. ---/
				53	/------------------------------------------------------------/
				54
				55	#define N_JITBLOCKS 4
				56	#define N_JITBLOCK_SZ 5000
				57
				58	static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
				59	static Bool jitstorage_inuse[N_JITBLOCKS];
				60	static Bool jitstorage_initdone = False;
				61
				62	static __inline__ void jitstorage_initialise ( void )
				63	{
				64	Int i;
				65	if (jitstorage_initdone) return;
				66	jitstorage_initdone = True;
				67	for (i = 0; i < N_JITBLOCKS; i++)
				68	jitstorage_inuse[i] = False;
				69	}
				70
				71	void* VG_(jitmalloc) ( Int nbytes )
				72	{
				73	Int i;
				74	jitstorage_initialise();
				75	if (nbytes > N_JITBLOCK_SZ) {
				76	/* VG_(printf)("too large: %d\n", nbytes); */
				77	return VG_(malloc)(VG_AR_PRIVATE, nbytes);
				78	}
				79	for (i = 0; i < N_JITBLOCKS; i++) {
				80	if (!jitstorage_inuse[i]) {
				81	jitstorage_inuse[i] = True;
				82	/* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
				83	return & jitstorage[i][0];
				84	}
				85	}
				86	VG_(panic)("out of slots in vg_jitmalloc\n");
				87	return VG_(malloc)(VG_AR_PRIVATE, nbytes);
				88	}
				89
				90	void VG_(jitfree) ( void* ptr )
				91	{
				92	Int i;
				93	jitstorage_initialise();
				94	for (i = 0; i < N_JITBLOCKS; i++) {
				95	if (ptr == & jitstorage[i][0]) {
				96	vg_assert(jitstorage_inuse[i]);
				97	jitstorage_inuse[i] = False;
				98	return;
				99	}
				100	}
				101	VG_(free)(VG_AR_PRIVATE, ptr);
				102	}
				103
				104	/------------------------------------------------------------/
				105	/--- Basics ---/
				106	/------------------------------------------------------------/
				107
				108	static UCodeBlock* allocCodeBlock ( void )
				109	{
				110	UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
				111	cb->used = cb->size = cb->nextTemp = 0;
				112	cb->instrs = NULL;
				113	return cb;
				114	}
				115
				116
				117	static void freeCodeBlock ( UCodeBlock* cb )
				118	{
				119	if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
				120	VG_(free)(VG_AR_PRIVATE, cb);
				121	}
				122
				123
				124	/* Ensure there's enough space in a block to add one uinstr. */
				125	static __inline__
				126	void ensureUInstr ( UCodeBlock* cb )
				127	{
				128	if (cb->used == cb->size) {
				129	if (cb->instrs == NULL) {
				130	vg_assert(cb->size == 0);
				131	vg_assert(cb->used == 0);
				132	cb->size = 8;
				133	cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
				134	} else {
				135	Int i;
				136	UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
				137	2 * sizeof(UInstr) * cb->size);
				138	for (i = 0; i < cb->used; i++)
				139	instrs2[i] = cb->instrs[i];
				140	cb->size *= 2;
				141	VG_(free)(VG_AR_PRIVATE, cb->instrs);
				142	cb->instrs = instrs2;
				143	}
				144	}
				145
				146	vg_assert(cb->used < cb->size);
				147	}
				148
				149
				150	__inline__
				151	void VG_(emptyUInstr) ( UInstr* u )
				152	{
				153	u->val1 = u->val2 = u->val3 = 0;
				154	u->tag1 = u->tag2 = u->tag3 = NoValue;
				155	u->flags_r = u->flags_w = FlagsEmpty;
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	156	u->jmpkind = JmpBoring;
				157	u->smc_check = u->signed_widen = False;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	158	u->lit32 = 0;
				159	u->opcode = 0;
				160	u->size = 0;
				161	u->cond = 0;
				162	u->extra4b = 0;
				163	}
				164
				165
				166	/* Add an instruction to a ucode block, and return the index of the
				167	instruction. */
				168	__inline__
				169	void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
				170	Tag tag1, UInt val1,
				171	Tag tag2, UInt val2,
				172	Tag tag3, UInt val3 )
				173	{
				174	UInstr* ui;
				175	ensureUInstr(cb);
				176	ui = & cb->instrs[cb->used];
				177	cb->used++;
				178	VG_(emptyUInstr)(ui);
				179	ui->val1 = val1;
				180	ui->val2 = val2;
				181	ui->val3 = val3;
				182	ui->opcode = opcode;
				183	ui->tag1 = tag1;
				184	ui->tag2 = tag2;
				185	ui->tag3 = tag3;
				186	ui->size = sz;
				187	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				188	if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
				189	if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
				190	}
				191
				192
				193	__inline__
				194	void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
				195	Tag tag1, UInt val1,
				196	Tag tag2, UInt val2 )
				197	{
				198	UInstr* ui;
				199	ensureUInstr(cb);
				200	ui = & cb->instrs[cb->used];
				201	cb->used++;
				202	VG_(emptyUInstr)(ui);
				203	ui->val1 = val1;
				204	ui->val2 = val2;
				205	ui->opcode = opcode;
				206	ui->tag1 = tag1;
				207	ui->tag2 = tag2;
				208	ui->size = sz;
				209	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				210	if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
				211	}
				212
				213
				214	__inline__
				215	void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
				216	Tag tag1, UInt val1 )
				217	{
				218	UInstr* ui;
				219	ensureUInstr(cb);
				220	ui = & cb->instrs[cb->used];
				221	cb->used++;
				222	VG_(emptyUInstr)(ui);
				223	ui->val1 = val1;
				224	ui->opcode = opcode;
				225	ui->tag1 = tag1;
				226	ui->size = sz;
				227	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				228	}
				229
				230
				231	__inline__
				232	void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
				233	{
				234	UInstr* ui;
				235	ensureUInstr(cb);
				236	ui = & cb->instrs[cb->used];
				237	cb->used++;
				238	VG_(emptyUInstr)(ui);
				239	ui->opcode = opcode;
				240	ui->size = sz;
				241	}
				242
				243
				244	/* Copy an instruction into the given codeblock. */
				245	static __inline__
				246	void copyUInstr ( UCodeBlock* cb, UInstr* instr )
				247	{
				248	ensureUInstr(cb);
				249	cb->instrs[cb->used] = *instr;
				250	cb->used++;
				251	}
				252
				253
				254	/* Copy auxiliary info from one uinstr to another. */
				255	static __inline__
				256	void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
				257	{
				258	dst->cond = src->cond;
				259	dst->extra4b = src->extra4b;
				260	dst->smc_check = src->smc_check;
				261	dst->signed_widen = src->signed_widen;
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	262	dst->jmpkind = src->jmpkind;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	263	dst->flags_r = src->flags_r;
				264	dst->flags_w = src->flags_w;
				265	}
				266
				267
				268	/* Set the flag R/W sets on a uinstr. */
				269	void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
				270	{
				271	/* VG_(ppUInstr)(-1,u); */
				272	vg_assert(fr == (fr & FlagsALL));
				273	vg_assert(fw == (fw & FlagsALL));
				274	u->flags_r = fr;
				275	u->flags_w = fw;
				276	}
				277
				278
				279	/* Set the lit32 field of the most recent uinsn. */
				280	void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
				281	{
				282	LAST_UINSTR(cb).lit32 = lit32;
				283	}
				284
				285
				286	Bool VG_(anyFlagUse) ( UInstr* u )
				287	{
				288	return (u->flags_r != FlagsEmpty
				289	\|\| u->flags_w != FlagsEmpty);
				290	}
				291
				292
				293
				294
				295	/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
				296	register number. This effectively defines the order in which real
				297	registers are allocated. %ebp is excluded since it is permanently
				298	reserved for pointing at VG_(baseBlock). %edi is a general spare
				299	temp used for Left4 and various misc tag ops.
				300
				301	Important! If you change the set of allocatable registers from
				302	%eax, %ebx, %ecx, %edx, %esi you must change the
				303	save/restore sequences in vg_helper_smc_check4 to match!
				304	*/
				305	__inline__ Int VG_(rankToRealRegNo) ( Int rank )
				306	{
				307	switch (rank) {
				308	# if 1
				309	/* Probably the best allocation ordering. */
				310	case 0: return R_EAX;
				311	case 1: return R_EBX;
				312	case 2: return R_ECX;
				313	case 3: return R_EDX;
				314	case 4: return R_ESI;
				315	# else
				316	/* Contrary; probably the worst. Helpful for debugging, tho. */
				317	case 4: return R_EAX;
				318	case 3: return R_EBX;
				319	case 2: return R_ECX;
				320	case 1: return R_EDX;
				321	case 0: return R_ESI;
				322	# endif
				323	default: VG_(panic)("rankToRealRegNo");
				324	}
				325	}
				326
				327
				328	/------------------------------------------------------------/
				329	/--- Sanity checking uinstrs. ---/
				330	/------------------------------------------------------------/
				331
				332	/* This seems as good a place as any to record some important stuff
				333	about ucode semantics.
				334
				335	* TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
				336	TempReg are defined to zero-extend the loaded value to 32 bits.
				337	This is needed to make the translation of movzbl et al work
				338	properly.
				339
				340	* Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
				341
				342	* Arithmetic on TempRegs is at the specified size. For example,
				343	SUBW t1, t2 has to result in a real 16 bit x86 subtraction
				344	being emitted -- not a 32 bit one.
				345
				346	* On some insns we allow the cc bit to be set. If so, the
				347	intention is that the simulated machine's %eflags register
				348	is copied into that of the real machine before the insn,
				349	and copied back again afterwards. This means that the
				350	code generated for that insn must be very careful only to
				351	update %eflags in the intended way. This is particularly
				352	important for the routines referenced by CALL insns.
				353	*/
				354
				355	/* Meaning of operand kinds is as follows:
				356
				357	ArchReg is a register of the simulated CPU, stored in memory,
				358	in vg_m_state.m_eax .. m_edi. These values are stored
				359	using the Intel register encoding.
				360
				361	RealReg is a register of the real CPU. There are VG_MAX_REALREGS
				362	available for allocation. As with ArchRegs, these values
				363	are stored using the Intel register encoding.
				364
				365	TempReg is a temporary register used to express the results of
				366	disassembly. There is an unlimited supply of them --
				367	register allocation and spilling eventually assigns them
				368	to RealRegs.
				369
				370	SpillNo is a spill slot number. The number of required spill
				371	slots is VG_MAX_PSEUDOS, in general. Only allowed
				372	as the ArchReg operand of GET and PUT.
				373
				374	Lit16 is a signed 16-bit literal value.
				375
				376	Literal is a 32-bit literal value. Each uinstr can only hold
				377	one of these.
				378
				379	The disassembled code is expressed purely in terms of ArchReg,
				380	TempReg and Literal operands. Eventually, register allocation
				381	removes all the TempRegs, giving a result using ArchRegs, RealRegs,
				382	and Literals. New x86 code can easily be synthesised from this.
				383	There are carefully designed restrictions on which insns can have
				384	which operands, intended to make it possible to generate x86 code
				385	from the result of register allocation on the ucode efficiently and
				386	without need of any further RealRegs.
				387
				388	Restrictions on insns (as generated by the disassembler) are as
				389	follows:
				390
				391	A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
				392	N=NoValue
				393
				394	GETF T N N
				395	PUTF T N N
				396
				397	GET A,S T N
				398	PUT T A,S N
				399	LOAD T T N
				400	STORE T T N
				401	MOV T,L T N
				402	CMOV T T N
				403	WIDEN T N N
				404	JMP T,L N N
				405	CALLM L N N
				406	CALLM_S N N N
				407	CALLM_E N N N
				408	PUSH,POP T N N
				409	CLEAR L N N
				410
				411	AND, OR
				412	T T N
				413
				414	ADD, ADC, XOR, SUB, SBB
				415	A,L,T T N
				416
				417	SHL, SHR, SAR, ROL, ROR, RCL, RCR
				418	L,T T N
				419
				420	NOT, NEG, INC, DEC, CC2VAL, BSWAP
				421	T N N
				422
				423	JIFZ T L N
				424
				425	FPU_R L T N
				426	FPU_W L T N
				427	FPU L T N
				428
				429	LEA1 T T (const in a seperate field)
				430	LEA2 T T T (const & shift ditto)
				431
				432	INCEIP L N N
				433
				434	and for instrumentation insns:
				435
				436	LOADV T T N
				437	STOREV T,L T N
				438	GETV A T N
				439	PUTV T,L A N
				440	GETVF T N N
				441	PUTVF T N N
				442	WIDENV T N N
				443	TESTV A,T N N
				444	SETV A,T N N
				445	TAG1 T N N
				446	TAG2 T T N
				447
				448	Before register allocation, S operands should not appear anywhere.
				449	After register allocation, all T operands should have been
				450	converted into Rs, and S operands are allowed in GET and PUT --
				451	denoting spill saves/restores.
				452
				453	The size field should be 0 for insns for which it is meaningless,
				454	ie those which do not directly move/operate on data.
				455	*/
				456	Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
				457	{
				458	# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
				459	# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
				460	# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
				461	# define A1 (u->tag1 == ArchReg)
				462	# define A2 (u->tag2 == ArchReg)
				463	# define AS1 ((u->tag1 == ArchReg) \|\| ((!beforeRA && (u->tag1 == SpillNo))))
				464	# define AS2 ((u->tag2 == ArchReg) \|\| ((!beforeRA && (u->tag2 == SpillNo))))
				465	# define AS3 ((u->tag3 == ArchReg) \|\| ((!beforeRA && (u->tag3 == SpillNo))))
				466	# define L1 (u->tag1 == Literal && u->val1 == 0)
				467	# define L2 (u->tag2 == Literal && u->val2 == 0)
				468	# define Ls1 (u->tag1 == Lit16)
				469	# define Ls3 (u->tag3 == Lit16)
				470	# define N1 (u->tag1 == NoValue)
				471	# define N2 (u->tag2 == NoValue)
				472	# define N3 (u->tag3 == NoValue)
				473	# define SZ4 (u->size == 4)
				474	# define SZ2 (u->size == 2)
				475	# define SZ1 (u->size == 1)
				476	# define SZ0 (u->size == 0)
				477	# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
				478	# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
				479	# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj	4a7456e	2002-03-24 13:52:19 +0000	[diff] [blame]	480	# define FLG_WR_MAYBE (u->flags_r == FlagsEmpty && \
				481	(u->flags_w == FlagsEmpty \|\| u->flags_w == FlagsZCP))
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	482	# define CC1 (!(CC0))
				483	# define SZ4_IF_TR1 ((u->tag1 == TempReg \|\| u->tag1 == RealReg) \
				484	? (u->size == 4) : True)
				485
				486	Int n_lits = 0;
				487	if (u->tag1 == Literal) n_lits++;
				488	if (u->tag2 == Literal) n_lits++;
				489	if (u->tag3 == Literal) n_lits++;
				490	if (n_lits > 1)
				491	return False;
				492
				493	switch (u->opcode) {
				494	case GETF:
				495	return (SZ2 \|\| SZ4) && TR1 && N2 && N3 && FLG_RD;
				496	case PUTF:
				497	return (SZ2 \|\| SZ4) && TR1 && N2 && N3 && FLG_WR;
				498	case CALLM_S: case CALLM_E:
				499	return SZ0 && N1 && N2 && N3;
				500	case INCEIP:
				501	return SZ0 && CC0 && Ls1 && N2 && N3;
				502	case LEA1:
				503	return CC0 && TR1 && TR2 && N3 && SZ4;
				504	case LEA2:
				505	return CC0 && TR1 && TR2 && TR3 && SZ4;
				506	case NOP:
				507	return SZ0 && CC0 && N1 && N2 && N3;
				508	case GET:
				509	return CC0 && AS1 && TR2 && N3;
				510	case PUT:
				511	return CC0 && TR1 && AS2 && N3;
				512	case LOAD: case STORE:
				513	return CC0 && TR1 && TR2 && N3;
				514	case MOV:
				515	return CC0 && (TR1 \|\| L1) && TR2 && N3 && SZ4_IF_TR1;
				516	case CMOV:
				517	return CC1 && TR1 && TR2 && N3 && SZ4;
				518	case JMP:
				519	return (u->cond==CondAlways ? CC0 : CC1)
				520	&& (TR1 \|\| L1) && N2 && SZ0 && N3;
				521	case CLEAR:
				522	return CC0 && Ls1 && N2 && SZ0 && N3;
				523	case CALLM:
				524	return SZ0 && Ls1 && N2 && N3;
				525	case PUSH: case POP:
				526	return CC0 && TR1 && N2 && N3;
				527	case AND: case OR:
				528	return TR1 && TR2 && N3;
				529	case ADD: case ADC: case XOR: case SUB: case SBB:
				530	return (A1 \|\| TR1 \|\| L1) && TR2 && N3;
				531	case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
				532	return (TR1 \|\| L1) && TR2 && N3;
				533	case NOT: case NEG: case INC: case DEC:
				534	return TR1 && N2 && N3;
				535	case BSWAP:
				536	return TR1 && N2 && N3 && CC0 && SZ4;
				537	case CC2VAL:
				538	return CC1 && SZ1 && TR1 && N2 && N3;
				539	case JIFZ:
				540	return CC0 && SZ4 && TR1 && L2 && N3;
				541	case FPU_R: case FPU_W:
				542	return CC0 && Ls1 && TR2 && N3;
				543	case FPU:
sewardj	4a7456e	2002-03-24 13:52:19 +0000	[diff] [blame]	544	return SZ0 && FLG_WR_MAYBE && Ls1 && N2 && N3;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	545	case LOADV:
				546	return CC0 && TR1 && TR2 && N3;
				547	case STOREV:
				548	return CC0 && (TR1 \|\| L1) && TR2 && N3;
				549	case GETV:
				550	return CC0 && A1 && TR2 && N3;
				551	case PUTV:
				552	return CC0 && (TR1 \|\| L1) && A2 && N3;
				553	case GETVF:
				554	return CC0 && TR1 && N2 && N3 && SZ0;
				555	case PUTVF:
				556	return CC0 && TR1 && N2 && N3 && SZ0;
				557	case WIDEN:
				558	return CC0 && TR1 && N2 && N3;
				559	case TESTV:
				560	return CC0 && (A1 \|\| TR1) && N2 && N3;
				561	case SETV:
				562	return CC0 && (A1 \|\| TR1) && N2 && N3;
				563	case TAG1:
				564	return CC0 && TR1 && N2 && Ls3 && SZ0;
				565	case TAG2:
				566	return CC0 && TR1 && TR2 && Ls3 && SZ0;
				567	default:
				568	VG_(panic)("vg_saneUInstr: unhandled opcode");
				569	}
				570	# undef SZ4_IF_TR1
				571	# undef CC0
				572	# undef CC1
				573	# undef SZ4
				574	# undef SZ2
				575	# undef SZ1
				576	# undef SZ0
				577	# undef TR1
				578	# undef TR2
				579	# undef TR3
				580	# undef A1
				581	# undef A2
				582	# undef AS1
				583	# undef AS2
				584	# undef AS3
				585	# undef L1
				586	# undef Ls1
				587	# undef L2
				588	# undef Ls3
				589	# undef N1
				590	# undef N2
				591	# undef N3
				592	# undef FLG_RD
				593	# undef FLG_WR
sewardj	4a7456e	2002-03-24 13:52:19 +0000	[diff] [blame]	594	# undef FLG_WR_MAYBE
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	595	}
				596
				597
				598	/* Sanity checks to do with CALLMs in UCodeBlocks. */
				599	Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
				600	{
				601	Int callm = 0;
				602	Int callm_s = 0;
				603	Int callm_e = 0;
				604	Int callm_ptr, calls_ptr;
				605	Int i, j, t;
				606	Bool incall = False;
				607
				608	/* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
				609
				610	for (i = 0; i < cb->used; i++) {
				611	switch (cb->instrs[i].opcode) {
				612	case CALLM:
				613	if (!incall) return False;
				614	callm++;
				615	break;
				616	case CALLM_S:
				617	if (incall) return False;
				618	incall = True;
				619	callm_s++;
				620	break;
				621	case CALLM_E:
				622	if (!incall) return False;
				623	incall = False;
				624	callm_e++;
				625	break;
				626	case PUSH: case POP: case CLEAR:
				627	if (!incall) return False;
				628	break;
				629	default:
				630	break;
				631	}
				632	}
				633	if (incall) return False;
				634	if (callm != callm_s \|\| callm != callm_e) return False;
				635
				636	/* Check the sections between CALLM_S and CALLM's. Ensure that no
				637	PUSH uinsn pushes any TempReg that any other PUSH in the same
				638	section pushes. Ie, check that the TempReg args to PUSHes in
				639	the section are unique. If not, the instrumenter generates
				640	incorrect code for CALLM insns. */
				641
				642	callm_ptr = 0;
				643
				644	find_next_CALLM:
				645	/* Search for the next interval, making calls_ptr .. callm_ptr
				646	bracket it. */
				647	while (callm_ptr < cb->used
				648	&& cb->instrs[callm_ptr].opcode != CALLM)
				649	callm_ptr++;
				650	if (callm_ptr == cb->used)
				651	return True;
				652	vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
				653
				654	calls_ptr = callm_ptr - 1;
				655	while (cb->instrs[calls_ptr].opcode != CALLM_S)
				656	calls_ptr--;
				657	vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
				658	vg_assert(calls_ptr >= 0);
				659
				660	/* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
				661
				662	/* For each PUSH insn in the interval ... */
				663	for (i = calls_ptr + 1; i < callm_ptr; i++) {
				664	if (cb->instrs[i].opcode != PUSH) continue;
				665	t = cb->instrs[i].val1;
				666	/* Ensure no later PUSH insns up to callm_ptr push the same
				667	TempReg. Return False if any such are found. */
				668	for (j = i+1; j < callm_ptr; j++) {
				669	if (cb->instrs[j].opcode == PUSH &&
				670	cb->instrs[j].val1 == t)
				671	return False;
				672	}
				673	}
				674
				675	/* This interval is clean. Keep going ... */
				676	callm_ptr++;
				677	goto find_next_CALLM;
				678	}
				679
				680
				681	/------------------------------------------------------------/
				682	/--- Printing uinstrs. ---/
				683	/------------------------------------------------------------/
				684
				685	Char* VG_(nameCondcode) ( Condcode cond )
				686	{
				687	switch (cond) {
				688	case CondO: return "o";
				689	case CondNO: return "no";
				690	case CondB: return "b";
				691	case CondNB: return "nb";
				692	case CondZ: return "z";
				693	case CondNZ: return "nz";
				694	case CondBE: return "be";
				695	case CondNBE: return "nbe";
				696	case CondS: return "s";
				697	case ConsNS: return "ns";
				698	case CondP: return "p";
				699	case CondNP: return "np";
				700	case CondL: return "l";
				701	case CondNL: return "nl";
				702	case CondLE: return "le";
				703	case CondNLE: return "nle";
				704	case CondAlways: return "MP"; /* hack! */
				705	default: VG_(panic)("nameCondcode");
				706	}
				707	}
				708
				709
				710	static void vg_ppFlagSet ( Char* prefix, FlagSet set )
				711	{
				712	VG_(printf)("%s", prefix);
				713	if (set & FlagD) VG_(printf)("D");
				714	if (set & FlagO) VG_(printf)("O");
				715	if (set & FlagS) VG_(printf)("S");
				716	if (set & FlagZ) VG_(printf)("Z");
				717	if (set & FlagA) VG_(printf)("A");
				718	if (set & FlagC) VG_(printf)("C");
				719	if (set & FlagP) VG_(printf)("P");
				720	}
				721
				722
				723	static void ppTempReg ( Int tt )
				724	{
				725	if ((tt & 1) == 0)
				726	VG_(printf)("t%d", tt);
				727	else
				728	VG_(printf)("q%d", tt-1);
				729	}
				730
				731
				732	static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
				733	{
				734	UInt tag, val;
				735	switch (operandNo) {
				736	case 1: tag = u->tag1; val = u->val1; break;
				737	case 2: tag = u->tag2; val = u->val2; break;
				738	case 3: tag = u->tag3; val = u->val3; break;
				739	default: VG_(panic)("ppUOperand(1)");
				740	}
				741	if (tag == Literal) val = u->lit32;
				742
				743	if (parens) VG_(printf)("(");
				744	switch (tag) {
				745	case TempReg: ppTempReg(val); break;
				746	case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
				747	case Literal: VG_(printf)("$0x%x", val); break;
				748	case Lit16: VG_(printf)("$0x%x", val); break;
				749	case NoValue: VG_(printf)("NoValue"); break;
				750	case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
				751	case SpillNo: VG_(printf)("spill%d", val); break;
				752	default: VG_(panic)("ppUOperand(2)");
				753	}
				754	if (parens) VG_(printf)(")");
				755	}
				756
				757
				758	Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
				759	{
				760	switch (opc) {
				761	case ADD: return (upper ? "ADD" : "add");
				762	case ADC: return (upper ? "ADC" : "adc");
				763	case AND: return (upper ? "AND" : "and");
				764	case OR: return (upper ? "OR" : "or");
				765	case XOR: return (upper ? "XOR" : "xor");
				766	case SUB: return (upper ? "SUB" : "sub");
				767	case SBB: return (upper ? "SBB" : "sbb");
				768	case SHL: return (upper ? "SHL" : "shl");
				769	case SHR: return (upper ? "SHR" : "shr");
				770	case SAR: return (upper ? "SAR" : "sar");
				771	case ROL: return (upper ? "ROL" : "rol");
				772	case ROR: return (upper ? "ROR" : "ror");
				773	case RCL: return (upper ? "RCL" : "rcl");
				774	case RCR: return (upper ? "RCR" : "rcr");
				775	case NOT: return (upper ? "NOT" : "not");
				776	case NEG: return (upper ? "NEG" : "neg");
				777	case INC: return (upper ? "INC" : "inc");
				778	case DEC: return (upper ? "DEC" : "dec");
				779	case BSWAP: return (upper ? "BSWAP" : "bswap");
				780	default: break;
				781	}
				782	if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
				783	switch (opc) {
				784	case GETVF: return "GETVF";
				785	case PUTVF: return "PUTVF";
				786	case TAG1: return "TAG1";
				787	case TAG2: return "TAG2";
				788	case CALLM_S: return "CALLM_S";
				789	case CALLM_E: return "CALLM_E";
				790	case INCEIP: return "INCEIP";
				791	case LEA1: return "LEA1";
				792	case LEA2: return "LEA2";
				793	case NOP: return "NOP";
				794	case GET: return "GET";
				795	case PUT: return "PUT";
				796	case GETF: return "GETF";
				797	case PUTF: return "PUTF";
				798	case LOAD: return "LD" ;
				799	case STORE: return "ST" ;
				800	case MOV: return "MOV";
				801	case CMOV: return "CMOV";
				802	case WIDEN: return "WIDEN";
				803	case JMP: return "J" ;
				804	case JIFZ: return "JIFZ" ;
				805	case CALLM: return "CALLM";
				806	case PUSH: return "PUSH" ;
				807	case POP: return "POP" ;
				808	case CLEAR: return "CLEAR";
				809	case CC2VAL: return "CC2VAL";
				810	case FPU_R: return "FPU_R";
				811	case FPU_W: return "FPU_W";
				812	case FPU: return "FPU" ;
				813	case LOADV: return "LOADV";
				814	case STOREV: return "STOREV";
				815	case GETV: return "GETV";
				816	case PUTV: return "PUTV";
				817	case TESTV: return "TESTV";
				818	case SETV: return "SETV";
				819	default: VG_(panic)("nameUOpcode: unhandled case");
				820	}
				821	}
				822
				823
				824	void VG_(ppUInstr) ( Int instrNo, UInstr* u )
				825	{
				826	VG_(printf)("\t%4d: %s", instrNo,
				827	VG_(nameUOpcode)(True, u->opcode));
				828	if (u->opcode == JMP \|\| u->opcode == CC2VAL)
				829	VG_(printf)("%s", VG_(nameCondcode(u->cond)));
				830
				831	switch (u->size) {
				832	case 0: VG_(printf)("o"); break;
				833	case 1: VG_(printf)("B"); break;
				834	case 2: VG_(printf)("W"); break;
				835	case 4: VG_(printf)("L"); break;
				836	case 8: VG_(printf)("Q"); break;
				837	default: VG_(printf)("%d", (Int)u->size); break;
				838	}
				839
				840	switch (u->opcode) {
				841
				842	case TAG1:
				843	VG_(printf)("\t");
				844	ppUOperand(u, 1, 4, False);
				845	VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
				846	ppUOperand(u, 1, 4, False);
				847	VG_(printf)(" )");
				848	break;
				849
				850	case TAG2:
				851	VG_(printf)("\t");
				852	ppUOperand(u, 2, 4, False);
				853	VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
				854	ppUOperand(u, 1, 4, False);
				855	VG_(printf)(", ");
				856	ppUOperand(u, 2, 4, False);
				857	VG_(printf)(" )");
				858	break;
				859
				860	case CALLM_S: case CALLM_E:
				861	break;
				862
				863	case INCEIP:
				864	VG_(printf)("\t$%d", u->val1);
				865	break;
				866
				867	case LEA2:
				868	VG_(printf)("\t%d(" , u->lit32);
				869	ppUOperand(u, 1, 4, False);
				870	VG_(printf)(",");
				871	ppUOperand(u, 2, 4, False);
				872	VG_(printf)(",%d), ", (Int)u->extra4b);
				873	ppUOperand(u, 3, 4, False);
				874	break;
				875
				876	case LEA1:
				877	VG_(printf)("\t%d" , u->lit32);
				878	ppUOperand(u, 1, 4, True);
				879	VG_(printf)(", ");
				880	ppUOperand(u, 2, 4, False);
				881	break;
				882
				883	case NOP:
				884	break;
				885
				886	case FPU_W:
				887	VG_(printf)("\t0x%x:0x%x, ",
				888	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				889	ppUOperand(u, 2, 4, True);
				890	break;
				891
				892	case FPU_R:
				893	VG_(printf)("\t");
				894	ppUOperand(u, 2, 4, True);
				895	VG_(printf)(", 0x%x:0x%x",
				896	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				897	break;
				898
				899	case FPU:
				900	VG_(printf)("\t0x%x:0x%x",
				901	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				902	break;
				903
				904	case STOREV: case LOADV:
				905	case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
				906	VG_(printf)("\t");
				907	ppUOperand(u, 1, u->size, u->opcode==LOAD \|\| u->opcode==LOADV);
				908	VG_(printf)(", ");
				909	ppUOperand(u, 2, u->size, u->opcode==STORE \|\| u->opcode==STOREV);
				910	break;
				911
				912	case GETF: case PUTF:
				913	VG_(printf)("\t");
				914	ppUOperand(u, 1, u->size, False);
				915	break;
				916
				917	case JMP: case CC2VAL:
				918	case PUSH: case POP: case CLEAR: case CALLM:
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	919	if (u->opcode == JMP) {
				920	switch (u->jmpkind) {
				921	case JmpCall: VG_(printf)("-c"); break;
				922	case JmpRet: VG_(printf)("-r"); break;
				923	case JmpSyscall: VG_(printf)("-sys"); break;
				924	case JmpClientReq: VG_(printf)("-cli"); break;
				925	default: break;
				926	}
				927	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	928	VG_(printf)("\t");
				929	ppUOperand(u, 1, u->size, False);
				930	break;
				931
				932	case JIFZ:
				933	VG_(printf)("\t");
				934	ppUOperand(u, 1, u->size, False);
				935	VG_(printf)(", ");
				936	ppUOperand(u, 2, u->size, False);
				937	break;
				938
				939	case PUTVF: case GETVF:
				940	VG_(printf)("\t");
				941	ppUOperand(u, 1, 0, False);
				942	break;
				943
				944	case NOT: case NEG: case INC: case DEC: case BSWAP:
				945	VG_(printf)("\t");
				946	ppUOperand(u, 1, u->size, False);
				947	break;
				948
				949	case ADD: case ADC: case AND: case OR:
				950	case XOR: case SUB: case SBB:
				951	case SHL: case SHR: case SAR:
				952	case ROL: case ROR: case RCL: case RCR:
				953	VG_(printf)("\t");
				954	ppUOperand(u, 1, u->size, False);
				955	VG_(printf)(", ");
				956	ppUOperand(u, 2, u->size, False);
				957	break;
				958
				959	case GETV: case PUTV:
				960	VG_(printf)("\t");
				961	ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
				962	VG_(printf)(", ");
				963	ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
				964	break;
				965
				966	case WIDEN:
				967	VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
				968	u->signed_widen?'s':'z');
				969	VG_(printf)("\t");
				970	ppUOperand(u, 1, u->size, False);
				971	break;
				972
				973	case TESTV: case SETV:
				974	VG_(printf)("\t");
				975	ppUOperand(u, 1, u->size, False);
				976	break;
				977
				978	default: VG_(panic)("ppUInstr: unhandled opcode");
				979	}
				980
				981	if (u->flags_r != FlagsEmpty \|\| u->flags_w != FlagsEmpty) {
				982	VG_(printf)(" (");
				983	if (u->flags_r != FlagsEmpty)
				984	vg_ppFlagSet("-r", u->flags_r);
				985	if (u->flags_w != FlagsEmpty)
				986	vg_ppFlagSet("-w", u->flags_w);
				987	VG_(printf)(")");
				988	}
				989	VG_(printf)("\n");
				990	}
				991
				992
				993	void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
				994	{
				995	Int i;
				996	VG_(printf)("\n%s\n", title);
				997	for (i = 0; i < cb->used; i++)
				998	if (0 \|\| cb->instrs[i].opcode != NOP)
				999	VG_(ppUInstr) ( i, &cb->instrs[i] );
				1000	VG_(printf)("\n");
				1001	}
				1002
				1003
				1004	/------------------------------------------------------------/
				1005	/--- uinstr helpers for register allocation ---/
				1006	/--- and code improvement. ---/
				1007	/------------------------------------------------------------/
				1008
				1009	/* A structure for communicating temp uses, and for indicating
				1010	temp->real register mappings for patchUInstr. */
				1011	typedef
				1012	struct {
				1013	Int realNo;
				1014	Int tempNo;
				1015	Bool isWrite;
				1016	}
				1017	TempUse;
				1018
				1019
				1020	/* Get the temp use of a uinstr, parking them in an array supplied by
				1021	the caller, which is assumed to be big enough. Return the number
				1022	of entries. Insns which read _and_ write a register wind up
				1023	mentioning it twice. Entries are placed in the array in program
				1024	order, so that if a reg is read-modified-written, it appears first
				1025	as a read and then as a write.
				1026	*/
				1027	static __inline__
				1028	Int getTempUsage ( UInstr* u, TempUse* arr )
				1029	{
				1030
				1031	# define RD(ono) \
				1032	if (mycat(u->tag,ono) == TempReg) \
				1033	{ arr[n].tempNo = mycat(u->val,ono); \
				1034	arr[n].isWrite = False; n++; }
				1035	# define WR(ono) \
				1036	if (mycat(u->tag,ono) == TempReg) \
				1037	{ arr[n].tempNo = mycat(u->val,ono); \
				1038	arr[n].isWrite = True; n++; }
				1039
				1040	Int n = 0;
				1041	switch (u->opcode) {
				1042	case LEA1: RD(1); WR(2); break;
				1043	case LEA2: RD(1); RD(2); WR(3); break;
				1044
				1045	case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
				1046	case FPU_R: case FPU_W: RD(2); break;
				1047
				1048	case GETF: WR(1); break;
				1049	case PUTF: RD(1); break;
				1050
				1051	case GET: WR(2); break;
				1052	case PUT: RD(1); break;
				1053	case LOAD: RD(1); WR(2); break;
				1054	case STORE: RD(1); RD(2); break;
				1055	case MOV: RD(1); WR(2); break;
				1056
				1057	case JMP: RD(1); break;
				1058	case CLEAR: case CALLM: break;
				1059
				1060	case PUSH: RD(1); break;
				1061	case POP: WR(1); break;
				1062
				1063	case TAG2:
				1064	case CMOV:
				1065	case ADD: case ADC: case AND: case OR:
				1066	case XOR: case SUB: case SBB:
				1067	RD(1); RD(2); WR(2); break;
				1068
				1069	case SHL: case SHR: case SAR:
				1070	case ROL: case ROR: case RCL: case RCR:
				1071	RD(1); RD(2); WR(2); break;
				1072
				1073	case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
				1074	RD(1); WR(1); break;
				1075
				1076	case WIDEN: RD(1); WR(1); break;
				1077
				1078	case CC2VAL: WR(1); break;
				1079	case JIFZ: RD(1); break;
				1080
				1081	/* These sizes are only ever consulted when the instrumentation
				1082	code is being added, so the following can return
				1083	manifestly-bogus sizes. */
				1084	case LOADV: RD(1); WR(2); break;
				1085	case STOREV: RD(1); RD(2); break;
				1086	case GETV: WR(2); break;
				1087	case PUTV: RD(1); break;
				1088	case TESTV: RD(1); break;
				1089	case SETV: WR(1); break;
				1090	case PUTVF: RD(1); break;
				1091	case GETVF: WR(1); break;
				1092
				1093	default: VG_(panic)("getTempUsage: unhandled opcode");
				1094	}
				1095	return n;
				1096
				1097	# undef RD
				1098	# undef WR
				1099	}
				1100
				1101
				1102	/* Change temp regs in u into real regs, as directed by tmap. */
				1103	static __inline__
				1104	void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
				1105	{
				1106	Int i;
				1107	if (u->tag1 == TempReg) {
				1108	for (i = 0; i < n_tmap; i++)
				1109	if (tmap[i].tempNo == u->val1) break;
				1110	if (i == n_tmap) VG_(panic)("patchUInstr(1)");
				1111	u->tag1 = RealReg;
				1112	u->val1 = tmap[i].realNo;
				1113	}
				1114	if (u->tag2 == TempReg) {
				1115	for (i = 0; i < n_tmap; i++)
				1116	if (tmap[i].tempNo == u->val2) break;
				1117	if (i == n_tmap) VG_(panic)("patchUInstr(2)");
				1118	u->tag2 = RealReg;
				1119	u->val2 = tmap[i].realNo;
				1120	}
				1121	if (u->tag3 == TempReg) {
				1122	for (i = 0; i < n_tmap; i++)
				1123	if (tmap[i].tempNo == u->val3) break;
				1124	if (i == n_tmap) VG_(panic)("patchUInstr(3)");
				1125	u->tag3 = RealReg;
				1126	u->val3 = tmap[i].realNo;
				1127	}
				1128	}
				1129
				1130
				1131	/* Tedious x86-specific hack which compensates for the fact that the
				1132	register numbers for %ah .. %dh do not correspond to those for %eax
				1133	.. %edx. It maps a (reg size, reg no) pair to the number of the
				1134	containing 32-bit reg. */
				1135	static __inline__
				1136	Int containingArchRegOf ( Int sz, Int aregno )
				1137	{
				1138	switch (sz) {
				1139	case 4: return aregno;
				1140	case 2: return aregno;
				1141	case 1: return aregno >= 4 ? aregno-4 : aregno;
				1142	default: VG_(panic)("containingArchRegOf");
				1143	}
				1144	}
				1145
				1146
				1147	/* If u reads an ArchReg, return the number of the containing arch
				1148	reg. Otherwise return -1. Used in redundant-PUT elimination. */
				1149	static __inline__
				1150	Int maybe_uinstrReadsArchReg ( UInstr* u )
				1151	{
				1152	switch (u->opcode) {
				1153	case GET:
				1154	case ADD: case ADC: case AND: case OR:
				1155	case XOR: case SUB: case SBB:
				1156	case SHL: case SHR: case SAR: case ROL:
				1157	case ROR: case RCL: case RCR:
				1158	if (u->tag1 == ArchReg)
				1159	return containingArchRegOf ( u->size, u->val1 );
				1160	else
				1161	return -1;
				1162
				1163	case GETF: case PUTF:
				1164	case CALLM_S: case CALLM_E:
				1165	case INCEIP:
				1166	case LEA1:
				1167	case LEA2:
				1168	case NOP:
				1169	case PUT:
				1170	case LOAD:
				1171	case STORE:
				1172	case MOV:
				1173	case CMOV:
				1174	case JMP:
				1175	case CALLM: case CLEAR: case PUSH: case POP:
				1176	case NOT: case NEG: case INC: case DEC: case BSWAP:
				1177	case CC2VAL:
				1178	case JIFZ:
				1179	case FPU: case FPU_R: case FPU_W:
				1180	case WIDEN:
				1181	return -1;
				1182
				1183	default:
				1184	VG_(ppUInstr)(0,u);
				1185	VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
				1186	}
				1187	}
				1188
				1189	static __inline__
				1190	Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
				1191	{
				1192	Int i, k;
				1193	TempUse tempUse[3];
				1194	k = getTempUsage ( u, &tempUse[0] );
				1195	for (i = 0; i < k; i++)
				1196	if (tempUse[i].tempNo == tempreg)
				1197	return True;
				1198	return False;
				1199	}
				1200
				1201
				1202	/------------------------------------------------------------/
				1203	/--- ucode improvement. ---/
				1204	/------------------------------------------------------------/
				1205
				1206	/* Improve the code in cb by doing
				1207	-- Redundant ArchReg-fetch elimination
				1208	-- Redundant PUT elimination
				1209	-- Redundant cond-code restore/save elimination
				1210	The overall effect of these is to allow target registers to be
				1211	cached in host registers over multiple target insns.
				1212	*/
				1213	static void vg_improve ( UCodeBlock* cb )
				1214	{
				1215	Int i, j, k, m, n, ar, tr, told, actual_areg;
				1216	Int areg_map[8];
				1217	Bool annul_put[8];
				1218	TempUse tempUse[3];
				1219	UInstr* u;
				1220	Bool wr;
				1221	Int* last_live_before;
				1222	FlagSet future_dead_flags;
				1223
				1224	if (cb->nextTemp > 0)
				1225	last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
				1226	else
				1227	last_live_before = NULL;
				1228
				1229
				1230	/* PASS 1: redundant GET elimination. (Actually, more general than
				1231	that -- eliminates redundant fetches of ArchRegs). */
				1232
				1233	/* Find the live-range-ends for all temporaries. Duplicates code
				1234	in the register allocator :-( */
				1235
				1236	for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
				1237
				1238	for (i = cb->used-1; i >= 0; i--) {
				1239	u = &cb->instrs[i];
				1240
				1241	k = getTempUsage(u, &tempUse[0]);
				1242
				1243	/* For each temp usage ... bwds in program order. */
				1244	for (j = k-1; j >= 0; j--) {
				1245	tr = tempUse[j].tempNo;
				1246	wr = tempUse[j].isWrite;
				1247	if (last_live_before[tr] == -1) {
				1248	vg_assert(tr >= 0 && tr < cb->nextTemp);
				1249	last_live_before[tr] = wr ? (i+1) : i;
				1250	}
				1251	}
				1252
				1253	}
				1254
				1255	# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
				1256	{ Int q; \
				1257	/* Invalidate any old binding(s) to tempreg. */ \
				1258	for (q = 0; q < 8; q++) \
				1259	if (areg_map[q] == tempreg) areg_map[q] = -1; \
				1260	/* Add the new binding. */ \
				1261	areg_map[archreg] = (tempreg); \
				1262	}
				1263
				1264	/* Set up the A-reg map. */
				1265	for (i = 0; i < 8; i++) areg_map[i] = -1;
				1266
				1267	/* Scan insns. */
				1268	for (i = 0; i < cb->used; i++) {
				1269	u = &cb->instrs[i];
				1270	if (u->opcode == GET && u->size == 4) {
				1271	/* GET; see if it can be annulled. */
				1272	vg_assert(u->tag1 == ArchReg);
				1273	vg_assert(u->tag2 == TempReg);
				1274	ar = u->val1;
				1275	tr = u->val2;
				1276	told = areg_map[ar];
				1277	if (told != -1 && last_live_before[told] <= i) {
				1278	/* ar already has an old mapping to told, but that runs
				1279	out here. Annul this GET, rename tr to told for the
				1280	rest of the block, and extend told's live range to that
				1281	of tr. */
				1282	u->opcode = NOP;
				1283	u->tag1 = u->tag2 = NoValue;
				1284	n = last_live_before[tr] + 1;
				1285	if (n > cb->used) n = cb->used;
				1286	last_live_before[told] = last_live_before[tr];
				1287	last_live_before[tr] = i-1;
				1288	if (VG_(disassemble))
				1289	VG_(printf)(
				1290	"at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
				1291	i, tr, told,i+1, n-1);
				1292	for (m = i+1; m < n; m++) {
				1293	if (cb->instrs[m].tag1 == TempReg
				1294	&& cb->instrs[m].val1 == tr)
				1295	cb->instrs[m].val1 = told;
				1296	if (cb->instrs[m].tag2 == TempReg
				1297	&& cb->instrs[m].val2 == tr)
				1298	cb->instrs[m].val2 = told;
				1299	}
				1300	BIND_ARCH_TO_TEMP(ar,told);
				1301	}
				1302	else
				1303	BIND_ARCH_TO_TEMP(ar,tr);
				1304	}
				1305	else if (u->opcode == GET && u->size != 4) {
				1306	/* Invalidate any mapping for this archreg. */
				1307	actual_areg = containingArchRegOf ( u->size, u->val1 );
				1308	areg_map[actual_areg] = -1;
				1309	}
				1310	else if (u->opcode == PUT && u->size == 4) {
				1311	/* PUT; re-establish t -> a binding */
				1312	vg_assert(u->tag1 == TempReg);
				1313	vg_assert(u->tag2 == ArchReg);
				1314	BIND_ARCH_TO_TEMP(u->val2, u->val1);
				1315	}
				1316	else if (u->opcode == PUT && u->size != 4) {
				1317	/* Invalidate any mapping for this archreg. */
				1318	actual_areg = containingArchRegOf ( u->size, u->val2 );
				1319	areg_map[actual_areg] = -1;
				1320	} else {
				1321
				1322	/* see if insn has an archreg as a read operand; if so try to
				1323	map it. */
				1324	if (u->tag1 == ArchReg && u->size == 4
				1325	&& areg_map[u->val1] != -1) {
				1326	switch (u->opcode) {
				1327	case ADD: case SUB: case AND: case OR: case XOR:
				1328	case ADC: case SBB:
				1329	case SHL: case SHR: case SAR: case ROL: case ROR:
				1330	case RCL: case RCR:
				1331	if (VG_(disassemble))
				1332	VG_(printf)(
				1333	"at %d: change ArchReg %S to TempReg t%d\n",
				1334	i, nameIReg(4,u->val1), areg_map[u->val1]);
				1335	u->tag1 = TempReg;
				1336	u->val1 = areg_map[u->val1];
				1337	/* Remember to extend the live range of the TempReg,
				1338	if necessary. */
				1339	if (last_live_before[u->val1] < i)
				1340	last_live_before[u->val1] = i;
				1341	break;
				1342	default:
				1343	break;
				1344	}
				1345	}
				1346
				1347	/* boring insn; invalidate any mappings to temps it writes */
				1348	k = getTempUsage(u, &tempUse[0]);
				1349
				1350	for (j = 0; j < k; j++) {
				1351	wr = tempUse[j].isWrite;
				1352	if (!wr) continue;
				1353	tr = tempUse[j].tempNo;
				1354	for (m = 0; m < 8; m++)
				1355	if (areg_map[m] == tr) areg_map[m] = -1;
				1356	}
				1357	}
				1358
				1359	}
				1360
				1361	# undef BIND_ARCH_TO_TEMP
				1362
				1363	/* PASS 2: redundant PUT elimination. If doing instrumentation,
				1364	don't annul (delay) puts of %ESP, since the memory check
				1365	machinery always requires the in-memory value of %ESP to be up
				1366	to date.
				1367	*/
				1368	for (j = 0; j < 8; j++)
				1369	annul_put[j] = False;
				1370
				1371	for (i = cb->used-1; i >= 0; i--) {
				1372	u = &cb->instrs[i];
				1373	if (u->opcode == NOP) continue;
				1374
				1375	if (u->opcode == PUT && u->size == 4) {
				1376	vg_assert(u->tag2 == ArchReg);
				1377	actual_areg = containingArchRegOf ( 4, u->val2 );
				1378	if (annul_put[actual_areg]) {
				1379	u->opcode = NOP;
				1380	u->tag1 = u->tag2 = NoValue;
				1381	if (VG_(disassemble))
				1382	VG_(printf)("at %d: delete PUT\n", i );
				1383	} else {
				1384	if (!(VG_(clo_instrument) && actual_areg == R_ESP))
				1385	annul_put[actual_areg] = True;
				1386	}
				1387	}
				1388	else if (u->opcode == PUT && u->size != 4) {
				1389	actual_areg = containingArchRegOf ( u->size, u->val2 );
				1390	annul_put[actual_areg] = False;
				1391	}
				1392	else if (u->opcode == JMP \|\| u->opcode == JIFZ
				1393	\|\| u->opcode == CALLM) {
				1394	for (j = 0; j < 8; j++)
				1395	annul_put[j] = False;
				1396	}
				1397	else {
				1398	/* If an instruction reads an ArchReg, the immediately
				1399	preceding PUT cannot be annulled. */
				1400	actual_areg = maybe_uinstrReadsArchReg ( u );
				1401	if (actual_areg != -1)
				1402	annul_put[actual_areg] = False;
				1403	}
				1404	}
				1405
				1406	/* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
				1407	dead after this point, annul the MOV insn and rename t2 to t1.
				1408	Further modifies the last_live_before map. */
				1409
				1410	# if 0
				1411	VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
				1412	for (i = 0; i < cb->nextTemp; i++)
				1413	VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
				1414	VG_(printf)("\n");
				1415	# endif
				1416
				1417	for (i = 0; i < cb->used-1; i++) {
				1418	u = &cb->instrs[i];
				1419	if (u->opcode != MOV) continue;
				1420	if (u->tag1 == Literal) continue;
				1421	vg_assert(u->tag1 == TempReg);
				1422	vg_assert(u->tag2 == TempReg);
				1423	if (last_live_before[u->val1] == i) {
				1424	if (VG_(disassemble))
				1425	VG_(printf)(
				1426	"at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
				1427	i, u->val2, u->val1, i+1, last_live_before[u->val2] );
				1428	for (j = i+1; j <= last_live_before[u->val2]; j++) {
				1429	if (cb->instrs[j].tag1 == TempReg
				1430	&& cb->instrs[j].val1 == u->val2)
				1431	cb->instrs[j].val1 = u->val1;
				1432	if (cb->instrs[j].tag2 == TempReg
				1433	&& cb->instrs[j].val2 == u->val2)
				1434	cb->instrs[j].val2 = u->val1;
				1435	}
				1436	last_live_before[u->val1] = last_live_before[u->val2];
				1437	last_live_before[u->val2] = i-1;
				1438	u->opcode = NOP;
				1439	u->tag1 = u->tag2 = NoValue;
				1440	}
				1441	}
				1442
				1443	/* PASS 3: redundant condition-code restore/save elimination.
				1444	Scan backwards from the end. future_dead_flags records the set
				1445	of flags which are dead at this point, that is, will be written
				1446	before they are next read. Earlier uinsns which write flags
				1447	already in future_dead_flags can have their writes annulled.
				1448	*/
				1449	future_dead_flags = FlagsEmpty;
				1450
				1451	for (i = cb->used-1; i >= 0; i--) {
				1452	u = &cb->instrs[i];
				1453
				1454	/* We might never make it to insns beyond this one, so be
				1455	conservative. */
				1456	if (u->opcode == JIFZ \|\| u->opcode == JMP) {
				1457	future_dead_flags = FlagsEmpty;
				1458	continue;
				1459	}
				1460
				1461	/* We can annul the flags written by this insn if it writes a
				1462	subset (or eq) of the set of flags known to be dead after
				1463	this insn. If not, just record the flags also written by
				1464	this insn.*/
				1465	if (u->flags_w != FlagsEmpty
				1466	&& VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
				1467	if (VG_(disassemble)) {
				1468	VG_(printf)("at %d: annul flag write ", i);
				1469	vg_ppFlagSet("", u->flags_w);
				1470	VG_(printf)(" due to later ");
				1471	vg_ppFlagSet("", future_dead_flags);
				1472	VG_(printf)("\n");
				1473	}
				1474	u->flags_w = FlagsEmpty;
				1475	} else {
				1476	future_dead_flags
				1477	= VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
				1478	}
				1479
				1480	/* If this insn also reads flags, empty out future_dead_flags so
				1481	as to force preceding writes not to be annulled. */
				1482	if (u->flags_r != FlagsEmpty)
				1483	future_dead_flags = FlagsEmpty;
				1484	}
				1485
				1486	if (last_live_before)
				1487	VG_(jitfree) ( last_live_before );
				1488	}
				1489
				1490
				1491	/------------------------------------------------------------/
				1492	/--- The new register allocator. ---/
				1493	/------------------------------------------------------------/
				1494
				1495	typedef
				1496	struct {
				1497	/* Becomes live for the first time after this insn ... */
				1498	Int live_after;
				1499	/* Becomes dead for the last time after this insn ... */
				1500	Int dead_before;
				1501	/* The "home" spill slot, if needed. Never changes. */
				1502	Int spill_no;
				1503	/* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
				1504	Int real_no;
				1505	}
				1506	TempInfo;
				1507
				1508
				1509	/* Take a ucode block and allocate its TempRegs to RealRegs, or put
				1510	them in spill locations, and add spill code, if there are not
				1511	enough real regs. The usual register allocation deal, in short.
				1512
				1513	Important redundancy of representation:
				1514
				1515	real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
				1516	to VG_NOVALUE if the real reg has no currently assigned TempReg.
				1517
				1518	The .real_no field of a TempInfo gives the current RRR for
				1519	this TempReg, or VG_NOVALUE if the TempReg is currently
				1520	in memory, in which case it is in the SpillNo denoted by
				1521	spillno.
				1522
				1523	These pieces of information (a fwds-bwds mapping, really) must
				1524	be kept consistent!
				1525
				1526	This allocator uses the so-called Second Chance Bin Packing
				1527	algorithm, as described in "Quality and Speed in Linear-scan
				1528	Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
				1529	pp142-151). It is simple and fast and remarkably good at
				1530	minimising the amount of spill code introduced.
				1531	*/
				1532
				1533	static
				1534	UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
				1535	{
				1536	TempInfo* temp_info;
				1537	Int real_to_temp[VG_MAX_REALREGS];
				1538	Bool is_spill_cand[VG_MAX_REALREGS];
				1539	Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
				1540	Int i, j, k, m, r, tno, max_ss_no;
				1541	Bool wr, defer, isRead, spill_reqd;
				1542	TempUse tempUse[3];
				1543	UCodeBlock* c2;
				1544
				1545	/* Used to denote ... well, "no value" in this fn. */
				1546	# define VG_NOTHING (-2)
				1547
				1548	/* Initialise the TempReg info. */
				1549	if (c1->nextTemp > 0)
				1550	temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
				1551	else
				1552	temp_info = NULL;
				1553
				1554	for (i = 0; i < c1->nextTemp; i++) {
				1555	temp_info[i].live_after = VG_NOTHING;
				1556	temp_info[i].dead_before = VG_NOTHING;
				1557	temp_info[i].spill_no = VG_NOTHING;
				1558	/* temp_info[i].real_no is not yet relevant. */
				1559	}
				1560
				1561	spill_reqd = False;
				1562
				1563	/* Scan fwds to establish live ranges. */
				1564
				1565	for (i = 0; i < c1->used; i++) {
				1566	k = getTempUsage(&c1->instrs[i], &tempUse[0]);
				1567	vg_assert(k >= 0 && k <= 3);
				1568
				1569	/* For each temp usage ... fwds in program order */
				1570	for (j = 0; j < k; j++) {
				1571	tno = tempUse[j].tempNo;
				1572	wr = tempUse[j].isWrite;
				1573	if (wr) {
				1574	/* Writes hold a reg live until after this insn. */
				1575	if (temp_info[tno].live_after == VG_NOTHING)
				1576	temp_info[tno].live_after = i;
				1577	if (temp_info[tno].dead_before < i + 1)
				1578	temp_info[tno].dead_before = i + 1;
				1579	} else {
				1580	/* First use of a tmp should be a write. */
				1581	vg_assert(temp_info[tno].live_after != VG_NOTHING);
				1582	/* Reads only hold it live until before this insn. */
				1583	if (temp_info[tno].dead_before < i)
				1584	temp_info[tno].dead_before = i;
				1585	}
				1586	}
				1587	}
				1588
				1589	# if 0
				1590	/* Sanity check on live ranges. Expensive but correct. */
				1591	for (i = 0; i < c1->nextTemp; i++) {
				1592	vg_assert( (temp_info[i].live_after == VG_NOTHING
				1593	&& temp_info[i].dead_before == VG_NOTHING)
				1594	\|\| (temp_info[i].live_after != VG_NOTHING
				1595	&& temp_info[i].dead_before != VG_NOTHING) );
				1596	}
				1597	# endif
				1598
				1599	/* Do a rank-based allocation of TempRegs to spill slot numbers.
				1600	We put as few as possible values in spill slots, but
				1601	nevertheless need to have an assignment to them just in case. */
				1602
				1603	max_ss_no = -1;
				1604
				1605	for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
				1606	ss_busy_until_before[i] = 0;
				1607
				1608	for (i = 0; i < c1->nextTemp; i++) {
				1609
				1610	/* True iff this temp is unused. */
				1611	if (temp_info[i].live_after == VG_NOTHING)
				1612	continue;
				1613
				1614	/* Find the lowest-numbered spill slot which is available at the
				1615	start point of this interval, and assign the interval to
				1616	it. */
				1617	for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
				1618	if (ss_busy_until_before[j] <= temp_info[i].live_after)
				1619	break;
				1620	if (j == VG_MAX_SPILLSLOTS) {
				1621	VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
				1622	VG_(panic)("register allocation failed -- out of spill slots");
				1623	}
				1624	ss_busy_until_before[j] = temp_info[i].dead_before;
				1625	temp_info[i].spill_no = j;
				1626	if (j > max_ss_no)
				1627	max_ss_no = j;
				1628	}
				1629
				1630	VG_(total_reg_rank) += (max_ss_no+1);
				1631
				1632	/* Show live ranges and assigned spill slot nos. */
				1633
				1634	if (VG_(disassemble)) {
				1635	VG_(printf)("Live Range Assignments\n");
				1636
				1637	for (i = 0; i < c1->nextTemp; i++) {
				1638	if (temp_info[i].live_after == VG_NOTHING)
				1639	continue;
				1640	VG_(printf)(
				1641	" LR %d is after %d to before %d spillno %d\n",
				1642	i,
				1643	temp_info[i].live_after,
				1644	temp_info[i].dead_before,
				1645	temp_info[i].spill_no
				1646	);
				1647	}
				1648	}
				1649
				1650	/* Now that we've established a spill slot number for each used
				1651	temporary, we can go ahead and do the core of the "Second-chance
				1652	binpacking" allocation algorithm. */
				1653
				1654	/* Resulting code goes here. We generate it all in a forwards
				1655	pass. */
				1656	c2 = allocCodeBlock();
				1657
				1658	/* At the start, no TempRegs are assigned to any real register.
				1659	Correspondingly, all temps claim to be currently resident in
				1660	their spill slots, as computed by the previous two passes. */
				1661	for (i = 0; i < VG_MAX_REALREGS; i++)
				1662	real_to_temp[i] = VG_NOTHING;
				1663	for (i = 0; i < c1->nextTemp; i++)
				1664	temp_info[i].real_no = VG_NOTHING;
				1665
				1666	if (VG_(disassemble))
				1667	VG_(printf)("\n");
				1668
				1669	/* Process each insn in turn. */
				1670	for (i = 0; i < c1->used; i++) {
				1671
				1672	if (c1->instrs[i].opcode == NOP) continue;
				1673	VG_(uinstrs_prealloc)++;
				1674
				1675	# if 0
				1676	/* Check map consistency. Expensive but correct. */
				1677	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1678	if (real_to_temp[r] != VG_NOTHING) {
				1679	tno = real_to_temp[r];
				1680	vg_assert(tno >= 0 && tno < c1->nextTemp);
				1681	vg_assert(temp_info[tno].real_no == r);
				1682	}
				1683	}
				1684	for (tno = 0; tno < c1->nextTemp; tno++) {
				1685	if (temp_info[tno].real_no != VG_NOTHING) {
				1686	r = temp_info[tno].real_no;
				1687	vg_assert(r >= 0 && r < VG_MAX_REALREGS);
				1688	vg_assert(real_to_temp[r] == tno);
				1689	}
				1690	}
				1691	# endif
				1692
				1693	if (VG_(disassemble))
				1694	VG_(ppUInstr)(i, &c1->instrs[i]);
				1695
				1696	/* First, free up enough real regs for this insn. This may
				1697	generate spill stores since we may have to evict some TempRegs
				1698	currently in real regs. Also generates spill loads. */
				1699
				1700	k = getTempUsage(&c1->instrs[i], &tempUse[0]);
				1701	vg_assert(k >= 0 && k <= 3);
				1702
				1703	/* For each *different* temp mentioned in the insn .... */
				1704	for (j = 0; j < k; j++) {
				1705
				1706	/* First check if the temp is mentioned again later; if so,
				1707	ignore this mention. We only want to process each temp
				1708	used by the insn once, even if it is mentioned more than
				1709	once. */
				1710	defer = False;
				1711	tno = tempUse[j].tempNo;
				1712	for (m = j+1; m < k; m++)
				1713	if (tempUse[m].tempNo == tno)
				1714	defer = True;
				1715	if (defer)
				1716	continue;
				1717
				1718	/* Now we're trying to find a register for tempUse[j].tempNo.
				1719	First of all, if it already has a register assigned, we
				1720	don't need to do anything more. */
				1721	if (temp_info[tno].real_no != VG_NOTHING)
				1722	continue;
				1723
				1724	/* No luck. The next thing to do is see if there is a
				1725	currently unassigned register available. If so, bag it. */
				1726	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1727	if (real_to_temp[r] == VG_NOTHING)
				1728	break;
				1729	}
				1730	if (r < VG_MAX_REALREGS) {
				1731	real_to_temp[r] = tno;
				1732	temp_info[tno].real_no = r;
				1733	continue;
				1734	}
				1735
				1736	/* Unfortunately, that didn't pan out either. So we'll have
				1737	to eject some other unfortunate TempReg into a spill slot
				1738	in order to free up a register. Of course, we need to be
				1739	careful not to eject some other TempReg needed by this
				1740	insn.
				1741
				1742	Select r in 0 .. VG_MAX_REALREGS-1 such that
				1743	real_to_temp[r] is not mentioned in
				1744	tempUse[0 .. k-1].tempNo, since it would be just plain
				1745	wrong to eject some other TempReg which we need to use in
				1746	this insn.
				1747
				1748	It is here that it is important to make a good choice of
				1749	register to spill. */
				1750
				1751	/* First, mark those regs which are not spill candidates. */
				1752	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1753	is_spill_cand[r] = True;
				1754	for (m = 0; m < k; m++) {
				1755	if (real_to_temp[r] == tempUse[m].tempNo) {
				1756	is_spill_cand[r] = False;
				1757	break;
				1758	}
				1759	}
				1760	}
				1761
				1762	/* We can choose any r satisfying is_spill_cand[r]. However,
				1763	try to make a good choice. First, try and find r such
				1764	that the associated TempReg is already dead. */
				1765	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1766	if (is_spill_cand[r] &&
				1767	temp_info[real_to_temp[r]].dead_before <= i)
				1768	goto have_spill_cand;
				1769	}
				1770
				1771	/* No spill cand is mapped to a dead TempReg. Now we really
				1772	_do_ have to generate spill code. Choose r so that the
				1773	next use of its associated TempReg is as far ahead as
				1774	possible, in the hope that this will minimise the number of
				1775	consequent reloads required. This is a bit expensive, but
				1776	we don't have to do it very often. */
				1777	{
				1778	Int furthest_r = VG_MAX_REALREGS;
				1779	Int furthest = 0;
				1780	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1781	if (!is_spill_cand[r]) continue;
				1782	for (m = i+1; m < c1->used; m++)
				1783	if (uInstrMentionsTempReg(&c1->instrs[m],
				1784	real_to_temp[r]))
				1785	break;
				1786	if (m > furthest) {
				1787	furthest = m;
				1788	furthest_r = r;
				1789	}
				1790	}
				1791	r = furthest_r;
				1792	goto have_spill_cand;
				1793	}
				1794
				1795	have_spill_cand:
				1796	if (r == VG_MAX_REALREGS)
				1797	VG_(panic)("new reg alloc: out of registers ?!");
				1798
				1799	/* Eject r. Important refinement: don't bother if the
				1800	associated TempReg is now dead. */
				1801	vg_assert(real_to_temp[r] != VG_NOTHING);
				1802	vg_assert(real_to_temp[r] != tno);
				1803	temp_info[real_to_temp[r]].real_no = VG_NOTHING;
				1804	if (temp_info[real_to_temp[r]].dead_before > i) {
				1805	uInstr2(c2, PUT, 4,
				1806	RealReg, VG_(rankToRealRegNo)(r),
				1807	SpillNo, temp_info[real_to_temp[r]].spill_no);
				1808	VG_(uinstrs_spill)++;
				1809	spill_reqd = True;
				1810	if (VG_(disassemble))
				1811	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1812	}
				1813
				1814	/* Decide if tno is read. */
				1815	isRead = False;
				1816	for (m = 0; m < k; m++)
				1817	if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
				1818	isRead = True;
				1819
				1820	/* If so, generate a spill load. */
				1821	if (isRead) {
				1822	uInstr2(c2, GET, 4,
				1823	SpillNo, temp_info[tno].spill_no,
				1824	RealReg, VG_(rankToRealRegNo)(r) );
				1825	VG_(uinstrs_spill)++;
				1826	spill_reqd = True;
				1827	if (VG_(disassemble))
				1828	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1829	}
				1830
				1831	/* Update the forwards and backwards maps. */
				1832	real_to_temp[r] = tno;
				1833	temp_info[tno].real_no = r;
				1834	}
				1835
				1836	/* By this point, all TempRegs mentioned by the insn have been
				1837	bought into real regs. We now copy the insn to the output
				1838	and use patchUInstr to convert its rTempRegs into
				1839	realregs. */
				1840	for (j = 0; j < k; j++)
				1841	tempUse[j].realNo
				1842	= VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
				1843	copyUInstr(c2, &c1->instrs[i]);
				1844	patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
				1845
				1846	if (VG_(disassemble)) {
				1847	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1848	VG_(printf)("\n");
				1849	}
				1850	}
				1851
				1852	if (temp_info != NULL)
				1853	VG_(jitfree)(temp_info);
				1854
				1855	freeCodeBlock(c1);
				1856
				1857	if (spill_reqd)
				1858	VG_(translations_needing_spill)++;
				1859
				1860	return c2;
				1861
				1862	# undef VG_NOTHING
				1863
				1864	}
				1865
				1866
				1867	/------------------------------------------------------------/
				1868	/--- New instrumentation machinery. ---/
				1869	/------------------------------------------------------------/
				1870
				1871	static
				1872	VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
				1873	{
				1874	switch (sz) {
				1875	case 4: return VgT_ImproveOR4_TQ;
				1876	case 2: return VgT_ImproveOR2_TQ;
				1877	case 1: return VgT_ImproveOR1_TQ;
				1878	default: VG_(panic)("get_VgT_ImproveOR_TQ");
				1879	}
				1880	}
				1881
				1882
				1883	static
				1884	VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
				1885	{
				1886	switch (sz) {
				1887	case 4: return VgT_ImproveAND4_TQ;
				1888	case 2: return VgT_ImproveAND2_TQ;
				1889	case 1: return VgT_ImproveAND1_TQ;
				1890	default: VG_(panic)("get_VgT_ImproveAND_TQ");
				1891	}
				1892	}
				1893
				1894
				1895	static
				1896	VgTagOp get_VgT_Left ( Int sz )
				1897	{
				1898	switch (sz) {
				1899	case 4: return VgT_Left4;
				1900	case 2: return VgT_Left2;
				1901	case 1: return VgT_Left1;
				1902	default: VG_(panic)("get_VgT_Left");
				1903	}
				1904	}
				1905
				1906
				1907	static
				1908	VgTagOp get_VgT_UifU ( Int sz )
				1909	{
				1910	switch (sz) {
				1911	case 4: return VgT_UifU4;
				1912	case 2: return VgT_UifU2;
				1913	case 1: return VgT_UifU1;
				1914	case 0: return VgT_UifU0;
				1915	default: VG_(panic)("get_VgT_UifU");
				1916	}
				1917	}
				1918
				1919
				1920	static
				1921	VgTagOp get_VgT_DifD ( Int sz )
				1922	{
				1923	switch (sz) {
				1924	case 4: return VgT_DifD4;
				1925	case 2: return VgT_DifD2;
				1926	case 1: return VgT_DifD1;
				1927	default: VG_(panic)("get_VgT_DifD");
				1928	}
				1929	}
				1930
				1931
				1932	static
				1933	VgTagOp get_VgT_PCast ( Int szs, Int szd )
				1934	{
				1935	if (szs == 4 && szd == 0) return VgT_PCast40;
				1936	if (szs == 2 && szd == 0) return VgT_PCast20;
				1937	if (szs == 1 && szd == 0) return VgT_PCast10;
				1938	if (szs == 0 && szd == 1) return VgT_PCast01;
				1939	if (szs == 0 && szd == 2) return VgT_PCast02;
				1940	if (szs == 0 && szd == 4) return VgT_PCast04;
				1941	if (szs == 1 && szd == 4) return VgT_PCast14;
				1942	if (szs == 1 && szd == 2) return VgT_PCast12;
				1943	if (szs == 1 && szd == 1) return VgT_PCast11;
				1944	VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
				1945	VG_(panic)("get_VgT_PCast");
				1946	}
				1947
				1948
				1949	static
				1950	VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
				1951	{
				1952	if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
				1953	if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
				1954
				1955	if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
				1956	if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
				1957
				1958	if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
				1959	if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
				1960
				1961	VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
				1962	VG_(panic)("get_VgT_Widen");
				1963	}
				1964
				1965	/* Pessimally cast the spec'd shadow from one size to another. */
				1966	static
				1967	void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
				1968	{
				1969	if (szs == 0 && szd == 0)
				1970	return;
				1971	uInstr3(cb, TAG1, 0, TempReg, tempreg,
				1972	NoValue, 0,
				1973	Lit16, get_VgT_PCast(szs,szd));
				1974	}
				1975
				1976
				1977	/* Create a signed or unsigned widen of the spec'd shadow from one
				1978	size to another. The only allowed size transitions are 1->2, 1->4
				1979	and 2->4. */
				1980	static
				1981	void create_Widen ( UCodeBlock* cb, Bool signed_widen,
				1982	Int szs, Int szd, Int tempreg )
				1983	{
				1984	if (szs == szd) return;
				1985	uInstr3(cb, TAG1, 0, TempReg, tempreg,
				1986	NoValue, 0,
				1987	Lit16, get_VgT_Widen(signed_widen,szs,szd));
				1988	}
				1989
				1990
				1991	/* Get the condition codes into a new shadow, at the given size. */
				1992	static
				1993	Int create_GETVF ( UCodeBlock* cb, Int sz )
				1994	{
				1995	Int tt = newShadow(cb);
				1996	uInstr1(cb, GETVF, 0, TempReg, tt);
				1997	create_PCast(cb, 0, sz, tt);
				1998	return tt;
				1999	}
				2000
				2001
				2002	/* Save the condition codes from the spec'd shadow. */
				2003	static
				2004	void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
				2005	{
				2006	if (sz == 0) {
				2007	uInstr1(cb, PUTVF, 0, TempReg, tempreg);
				2008	} else {
				2009	Int tt = newShadow(cb);
				2010	uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
				2011	create_PCast(cb, sz, 0, tt);
				2012	uInstr1(cb, PUTVF, 0, TempReg, tt);
				2013	}
				2014	}
				2015
				2016
				2017	/* Do Left on the spec'd shadow. */
				2018	static
				2019	void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
				2020	{
				2021	uInstr3(cb, TAG1, 0,
				2022	TempReg, tempreg,
				2023	NoValue, 0,
				2024	Lit16, get_VgT_Left(sz));
				2025	}
				2026
				2027
				2028	/* Do UifU on ts and td, putting the result in td. */
				2029	static
				2030	void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
				2031	{
				2032	uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
				2033	Lit16, get_VgT_UifU(sz));
				2034	}
				2035
				2036
				2037	/* Do DifD on ts and td, putting the result in td. */
				2038	static
				2039	void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
				2040	{
				2041	uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
				2042	Lit16, get_VgT_DifD(sz));
				2043	}
				2044
				2045
				2046	/* Do HelpAND on value tval and tag tqqq, putting the result in
				2047	tqqq. */
				2048	static
				2049	void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
				2050	{
				2051	uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
				2052	Lit16, get_VgT_ImproveAND_TQ(sz));
				2053	}
				2054
				2055
				2056	/* Do HelpOR on value tval and tag tqqq, putting the result in
				2057	tqqq. */
				2058	static
				2059	void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
				2060	{
				2061	uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
				2062	Lit16, get_VgT_ImproveOR_TQ(sz));
				2063	}
				2064
				2065
				2066	/* Get the shadow for an operand described by (tag, val). Emit code
				2067	to do this and return the identity of the shadow holding the
				2068	result. The result tag is always copied into a new shadow, so it
				2069	can be modified without trashing the original.*/
				2070	static
				2071	Int /* TempReg / getOperandShadow ( UCodeBlock cb,
				2072	Int sz, Int tag, Int val )
				2073	{
				2074	Int sh;
				2075	sh = newShadow(cb);
				2076	if (tag == TempReg) {
				2077	uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
				2078	return sh;
				2079	}
				2080	if (tag == Literal) {
				2081	uInstr1(cb, SETV, sz, TempReg, sh);
				2082	return sh;
				2083	}
				2084	if (tag == ArchReg) {
				2085	uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
				2086	return sh;
				2087	}
				2088	VG_(panic)("getOperandShadow");
				2089	}
				2090
				2091
				2092
				2093	/* Create and return an instrumented version of cb_in. Free cb_in
				2094	before returning. */
				2095	static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
				2096	{
				2097	UCodeBlock* cb;
				2098	Int i, j;
				2099	UInstr* u_in;
				2100	Int qs, qd, qt, qtt;
				2101	cb = allocCodeBlock();
				2102	cb->nextTemp = cb_in->nextTemp;
				2103
				2104	for (i = 0; i < cb_in->used; i++) {
				2105	qs = qd = qt = qtt = INVALID_TEMPREG;
				2106	u_in = &cb_in->instrs[i];
				2107
				2108	/* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
				2109
				2110	/* VG_(ppUInstr)(0, u_in); */
				2111	switch (u_in->opcode) {
				2112
				2113	case NOP:
				2114	break;
				2115
				2116	case INCEIP:
				2117	copyUInstr(cb, u_in);
				2118	break;
				2119
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2120	/* Loads and stores. Test the V bits for the address. 24
				2121	Mar 02: since the address is A-checked anyway, there's not
				2122	really much point in doing the V-check too, unless you
				2123	think that you might use addresses which are undefined but
				2124	still addressible. Hence the optionalisation of the V
				2125	check.
				2126
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2127	The LOADV/STOREV does an addressibility check for the
				2128	address. */
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2129
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2130	case LOAD:
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2131	if (VG_(clo_check_addrVs)) {
				2132	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2133	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2134	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2135	uInstr2(cb, LOADV, u_in->size,
				2136	TempReg, u_in->val1,
				2137	TempReg, SHADOW(u_in->val2));
				2138	copyUInstr(cb, u_in);
				2139	break;
				2140	case STORE:
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2141	if (VG_(clo_check_addrVs)) {
				2142	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
				2143	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
				2144	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2145	uInstr2(cb, STOREV, u_in->size,
				2146	TempReg, SHADOW(u_in->val1),
				2147	TempReg, u_in->val2);
				2148	copyUInstr(cb, u_in);
				2149	break;
				2150
				2151	/* Moving stuff around. Make the V bits follow accordingly,
				2152	but don't do anything else. */
				2153
				2154	case GET:
				2155	uInstr2(cb, GETV, u_in->size,
				2156	ArchReg, u_in->val1,
				2157	TempReg, SHADOW(u_in->val2));
				2158	copyUInstr(cb, u_in);
				2159	break;
				2160	case PUT:
				2161	uInstr2(cb, PUTV, u_in->size,
				2162	TempReg, SHADOW(u_in->val1),
				2163	ArchReg, u_in->val2);
				2164	copyUInstr(cb, u_in);
				2165	break;
				2166
				2167	case GETF:
				2168	/* This is not the smartest way to do it, but should work. */
				2169	qd = create_GETVF(cb, u_in->size);
				2170	uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
				2171	copyUInstr(cb, u_in);
				2172	break;
				2173	case PUTF:
				2174	create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
				2175	copyUInstr(cb, u_in);
				2176	break;
				2177
				2178	case MOV:
				2179	switch (u_in->tag1) {
				2180	case TempReg:
				2181	uInstr2(cb, MOV, 4,
				2182	TempReg, SHADOW(u_in->val1),
				2183	TempReg, SHADOW(u_in->val2));
				2184	break;
				2185	case Literal:
				2186	uInstr1(cb, SETV, u_in->size,
				2187	TempReg, SHADOW(u_in->val2));
				2188	break;
				2189	default:
				2190	VG_(panic)("vg_instrument: MOV");
				2191	}
				2192	copyUInstr(cb, u_in);
				2193	break;
				2194
				2195	/* Special case of add, where one of the operands is a literal.
				2196	lea1(t) = t + some literal.
				2197	Therefore: lea1#(qa) = left(qa)
				2198	*/
				2199	case LEA1:
				2200	vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
				2201	qs = SHADOW(u_in->val1);
				2202	qd = SHADOW(u_in->val2);
				2203	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
				2204	create_Left(cb, u_in->size, qd);
				2205	copyUInstr(cb, u_in);
				2206	break;
				2207
				2208	/* Another form of add.
				2209	lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
				2210	and is 0,1,2 or 3.
				2211	lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
				2212	Note, subtly, that the shift puts zeroes at the bottom of qt,
				2213	meaning Valid, since the corresponding shift of tt puts
				2214	zeroes at the bottom of tb.
				2215	*/
				2216	case LEA2: {
				2217	Int shift;
				2218	vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
				2219	switch (u_in->extra4b) {
				2220	case 1: shift = 0; break;
				2221	case 2: shift = 1; break;
				2222	case 4: shift = 2; break;
				2223	case 8: shift = 3; break;
				2224	default: VG_(panic)( "vg_instrument(LEA2)" );
				2225	}
				2226	qs = SHADOW(u_in->val1);
				2227	qt = SHADOW(u_in->val2);
				2228	qd = SHADOW(u_in->val3);
				2229	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
				2230	if (shift > 0) {
				2231	uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
				2232	uLiteral(cb, shift);
				2233	}
				2234	create_UifU(cb, 4, qs, qd);
				2235	create_Left(cb, u_in->size, qd);
				2236	copyUInstr(cb, u_in);
				2237	break;
				2238	}
				2239
				2240	/* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
				2241	case INC: case DEC:
				2242	qd = SHADOW(u_in->val1);
				2243	create_Left(cb, u_in->size, qd);
				2244	if (u_in->flags_w != FlagsEmpty)
				2245	create_PUTVF(cb, u_in->size, qd);
				2246	copyUInstr(cb, u_in);
				2247	break;
				2248
				2249	/* This is a HACK (approximation :-) */
				2250	/* rcl#/rcr#(qs,qd)
				2251	= let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
				2252	eflags# = q0
				2253	qd =pcast-0-sz(q0)
				2254	Ie, cast everything down to a single bit, then back up.
				2255	This assumes that any bad bits infect the whole word and
				2256	the eflags.
				2257	*/
				2258	case RCL: case RCR:
				2259	vg_assert(u_in->flags_r != FlagsEmpty);
				2260	/* The following assertion looks like it makes sense, but is
				2261	actually wrong. Consider this:
				2262	rcll %eax
				2263	imull %eax, %eax
				2264	The rcll writes O and C but so does the imull, so the O and C
				2265	write of the rcll is annulled by the prior improvement pass.
				2266	Noticed by Kevin Ryde <user42@zip.com.au>
				2267	*/
				2268	/* vg_assert(u_in->flags_w != FlagsEmpty); */
				2269	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2270	/* We can safely modify qs; cast it to 0-size. */
				2271	create_PCast(cb, u_in->size, 0, qs);
				2272	qd = SHADOW(u_in->val2);
				2273	create_PCast(cb, u_in->size, 0, qd);
				2274	/* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
				2275	create_UifU(cb, 0, qs, qd);
				2276	/* qs is now free; reuse it for the flag definedness. */
				2277	qs = create_GETVF(cb, 0);
				2278	create_UifU(cb, 0, qs, qd);
				2279	create_PUTVF(cb, 0, qd);
				2280	create_PCast(cb, 0, u_in->size, qd);
				2281	copyUInstr(cb, u_in);
				2282	break;
				2283
				2284	/* for OP in shl shr sar rol ror
				2285	(qs is shift count#, qd is value to be OP#d)
				2286	OP(ts,td)
				2287	OP#(qs,qd)
				2288	= pcast-1-sz(qs) `UifU` OP(ts,qd)
				2289	So we apply OP to the tag bits too, and then UifU with
				2290	the shift count# to take account of the possibility of it
				2291	being undefined.
				2292
				2293	A bit subtle:
				2294	ROL/ROR rearrange the tag bits as per the value bits.
				2295	SHL/SHR shifts zeroes into the value, and corresponding
				2296	zeroes indicating Definedness into the tag.
				2297	SAR copies the top bit of the value downwards, and therefore
				2298	SAR also copies the definedness of the top bit too.
				2299	So in all five cases, we just apply the same op to the tag
				2300	bits as is applied to the value bits. Neat!
				2301	*/
				2302	case SHL:
				2303	case SHR: case SAR:
				2304	case ROL: case ROR: {
				2305	Int t_amount = INVALID_TEMPREG;
				2306	vg_assert(u_in->tag1 == TempReg \|\| u_in->tag1 == Literal);
				2307	vg_assert(u_in->tag2 == TempReg);
				2308	qd = SHADOW(u_in->val2);
				2309
				2310	/* Make qs hold shift-count# and make
				2311	t_amount be a TempReg holding the shift count. */
				2312	if (u_in->tag1 == Literal) {
				2313	t_amount = newTemp(cb);
				2314	uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
				2315	uLiteral(cb, u_in->lit32);
				2316	qs = SHADOW(t_amount);
				2317	uInstr1(cb, SETV, 1, TempReg, qs);
				2318	} else {
				2319	t_amount = u_in->val1;
				2320	qs = SHADOW(u_in->val1);
				2321	}
				2322
				2323	uInstr2(cb, u_in->opcode,
				2324	u_in->size,
				2325	TempReg, t_amount,
				2326	TempReg, qd);
				2327	qt = newShadow(cb);
				2328	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
				2329	create_PCast(cb, 1, u_in->size, qt);
				2330	create_UifU(cb, u_in->size, qt, qd);
				2331	copyUInstr(cb, u_in);
				2332	break;
				2333	}
				2334
				2335	/* One simple tag operation. */
				2336	case WIDEN:
				2337	vg_assert(u_in->tag1 == TempReg);
				2338	create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
				2339	SHADOW(u_in->val1));
				2340	copyUInstr(cb, u_in);
				2341	break;
				2342
				2343	/* not#(x) = x (since bitwise independent) */
				2344	case NOT:
				2345	vg_assert(u_in->tag1 == TempReg);
				2346	copyUInstr(cb, u_in);
				2347	break;
				2348
				2349	/* neg#(x) = left(x) (derivable from case for SUB) */
				2350	case NEG:
				2351	vg_assert(u_in->tag1 == TempReg);
				2352	create_Left(cb, u_in->size, SHADOW(u_in->val1));
				2353	copyUInstr(cb, u_in);
				2354	break;
				2355
				2356	/* bswap#(x) = bswap(x) */
				2357	case BSWAP:
				2358	vg_assert(u_in->tag1 == TempReg);
				2359	vg_assert(u_in->size == 4);
				2360	qd = SHADOW(u_in->val1);
				2361	uInstr1(cb, BSWAP, 4, TempReg, qd);
				2362	copyUInstr(cb, u_in);
				2363	break;
				2364
				2365	/* cc2val#(qd) = pcast-0-to-size(eflags#) */
				2366	case CC2VAL:
				2367	vg_assert(u_in->tag1 == TempReg);
				2368	vg_assert(u_in->flags_r != FlagsEmpty);
				2369	qt = create_GETVF(cb, u_in->size);
				2370	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
				2371	copyUInstr(cb, u_in);
				2372	break;
				2373
				2374	/* cmov#(qs,qd) = cmov(qs,qd)
				2375	That is, do the cmov of tags using the same flags as for
				2376	the data (obviously). However, first do a test on the
				2377	validity of the flags.
				2378	*/
				2379	case CMOV:
				2380	vg_assert(u_in->size == 4);
				2381	vg_assert(u_in->tag1 == TempReg);
				2382	vg_assert(u_in->tag2 == TempReg);
				2383	vg_assert(u_in->flags_r != FlagsEmpty);
				2384	vg_assert(u_in->flags_w == FlagsEmpty);
				2385	qs = SHADOW(u_in->val1);
				2386	qd = SHADOW(u_in->val2);
				2387	qt = create_GETVF(cb, 0);
				2388	uInstr1(cb, TESTV, 0, TempReg, qt);
				2389	/* qt should never be referred to again. Nevertheless
				2390	... */
				2391	uInstr1(cb, SETV, 0, TempReg, qt);
				2392
				2393	uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
				2394	LAST_UINSTR(cb).cond = u_in->cond;
				2395	LAST_UINSTR(cb).flags_r = u_in->flags_r;
				2396
				2397	copyUInstr(cb, u_in);
				2398	break;
				2399
				2400	/* add#/sub#(qs,qd)
				2401	= qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
				2402	= left(qs) `UifU` left(qd)
				2403	= left(qs `UifU` qd)
				2404	adc#/sbb#(qs,qd)
				2405	= left(qs `UifU` qd) `UifU` pcast(eflags#)
				2406	Second arg (dest) is TempReg.
				2407	First arg (src) is Literal or TempReg or ArchReg.
				2408	*/
				2409	case ADD: case SUB:
				2410	case ADC: case SBB:
				2411	qd = SHADOW(u_in->val2);
				2412	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2413	create_UifU(cb, u_in->size, qs, qd);
				2414	create_Left(cb, u_in->size, qd);
				2415	if (u_in->opcode == ADC \|\| u_in->opcode == SBB) {
				2416	vg_assert(u_in->flags_r != FlagsEmpty);
				2417	qt = create_GETVF(cb, u_in->size);
				2418	create_UifU(cb, u_in->size, qt, qd);
				2419	}
				2420	if (u_in->flags_w != FlagsEmpty) {
				2421	create_PUTVF(cb, u_in->size, qd);
				2422	}
				2423	copyUInstr(cb, u_in);
				2424	break;
				2425
				2426	/* xor#(qs,qd) = qs `UifU` qd */
				2427	case XOR:
				2428	qd = SHADOW(u_in->val2);
				2429	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2430	create_UifU(cb, u_in->size, qs, qd);
				2431	if (u_in->flags_w != FlagsEmpty) {
				2432	create_PUTVF(cb, u_in->size, qd);
				2433	}
				2434	copyUInstr(cb, u_in);
				2435	break;
				2436
				2437	/* and#/or#(qs,qd)
				2438	= (qs `UifU` qd) `DifD` improve(vs,qs)
				2439	`DifD` improve(vd,qd)
				2440	where improve is the relevant one of
				2441	Improve{AND,OR}_TQ
				2442	Use the following steps, with qt as a temp:
				2443	qt = improve(vd,qd)
				2444	qd = qs `UifU` qd
				2445	qd = qt `DifD` qd
				2446	qt = improve(vs,qs)
				2447	qd = qt `DifD` qd
				2448	*/
				2449	case AND: case OR:
				2450	vg_assert(u_in->tag1 == TempReg);
				2451	vg_assert(u_in->tag2 == TempReg);
				2452	qd = SHADOW(u_in->val2);
				2453	qs = SHADOW(u_in->val1);
				2454	qt = newShadow(cb);
				2455
				2456	/* qt = improve(vd,qd) */
				2457	uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
				2458	if (u_in->opcode == AND)
				2459	create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
				2460	else
				2461	create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
				2462	/* qd = qs `UifU` qd */
				2463	create_UifU(cb, u_in->size, qs, qd);
				2464	/* qd = qt `DifD` qd */
				2465	create_DifD(cb, u_in->size, qt, qd);
				2466	/* qt = improve(vs,qs) */
				2467	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
				2468	if (u_in->opcode == AND)
				2469	create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
				2470	else
				2471	create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
				2472	/* qd = qt `DifD` qd */
				2473	create_DifD(cb, u_in->size, qt, qd);
				2474	/* So, finally qd is the result tag. */
				2475	if (u_in->flags_w != FlagsEmpty) {
				2476	create_PUTVF(cb, u_in->size, qd);
				2477	}
				2478	copyUInstr(cb, u_in);
				2479	break;
				2480
				2481	/* Machinery to do with supporting CALLM. Copy the start and
				2482	end markers only to make the result easier to read
				2483	(debug); they generate no code and have no effect.
				2484	*/
				2485	case CALLM_S: case CALLM_E:
				2486	copyUInstr(cb, u_in);
				2487	break;
				2488
				2489	/* Copy PUSH and POP verbatim. Arg/result absval
				2490	calculations are done when the associated CALL is
				2491	processed. CLEAR has no effect on absval calculations but
				2492	needs to be copied.
				2493	*/
				2494	case PUSH: case POP: case CLEAR:
				2495	copyUInstr(cb, u_in);
				2496	break;
				2497
				2498	/* In short:
				2499	callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
				2500	We have to decide on a size to do the computation at,
				2501	although the choice doesn't affect correctness. We will
				2502	do a pcast to the final size anyway, so the only important
				2503	factor is to choose a size which minimises the total
				2504	number of casts needed. Valgrind: just use size 0,
				2505	regardless. It may not be very good for performance
				2506	but does simplify matters, mainly by reducing the number
				2507	of different pessimising casts which have to be implemented.
				2508	*/
				2509	case CALLM: {
				2510	UInstr* uu;
				2511	Bool res_used;
				2512
				2513	/* Now generate the code. Get the final result absval
				2514	into qt. */
				2515	qt = newShadow(cb);
				2516	qtt = newShadow(cb);
				2517	uInstr1(cb, SETV, 0, TempReg, qt);
				2518	for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
				2519	uu = & cb_in->instrs[j];
				2520	if (uu->opcode != PUSH) continue;
				2521	/* cast via a temporary */
				2522	uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
				2523	TempReg, qtt);
				2524	create_PCast(cb, uu->size, 0, qtt);
				2525	create_UifU(cb, 0, qtt, qt);
				2526	}
				2527	/* Remembering also that flags read count as inputs. */
				2528	if (u_in->flags_r != FlagsEmpty) {
				2529	qtt = create_GETVF(cb, 0);
				2530	create_UifU(cb, 0, qtt, qt);
				2531	}
				2532
				2533	/* qt now holds the result tag. If any results from the
				2534	call are used, either by fetching with POP or
				2535	implicitly by writing the flags, we copy the result
				2536	absval to the relevant location. If not used, the call
				2537	must have been for its side effects, so we test qt here
				2538	and now. Note that this assumes that all values
				2539	removed by POP continue to be live. So dead args
				2540	must be removed with CLEAR, not by POPping them into
				2541	a dummy tempreg.
				2542	*/
				2543	res_used = False;
				2544	for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
				2545	uu = & cb_in->instrs[j];
				2546	if (uu->opcode != POP) continue;
				2547	/* Cast via a temp. */
				2548	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
				2549	create_PCast(cb, 0, uu->size, qtt);
				2550	uInstr2(cb, MOV, 4, TempReg, qtt,
				2551	TempReg, SHADOW(uu->val1));
				2552	res_used = True;
				2553	}
				2554	if (u_in->flags_w != FlagsEmpty) {
				2555	create_PUTVF(cb, 0, qt);
				2556	res_used = True;
				2557	}
				2558	if (!res_used) {
				2559	uInstr1(cb, TESTV, 0, TempReg, qt);
				2560	/* qt should never be referred to again. Nevertheless
				2561	... */
				2562	uInstr1(cb, SETV, 0, TempReg, qt);
				2563	}
				2564	copyUInstr(cb, u_in);
				2565	break;
				2566	}
				2567	/* Whew ... */
				2568
				2569	case JMP:
				2570	if (u_in->tag1 == TempReg) {
				2571	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2572	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2573	} else {
				2574	vg_assert(u_in->tag1 == Literal);
				2575	}
				2576	if (u_in->cond != CondAlways) {
				2577	vg_assert(u_in->flags_r != FlagsEmpty);
				2578	qt = create_GETVF(cb, 0);
				2579	uInstr1(cb, TESTV, 0, TempReg, qt);
				2580	/* qt should never be referred to again. Nevertheless
				2581	... */
				2582	uInstr1(cb, SETV, 0, TempReg, qt);
				2583	}
				2584	copyUInstr(cb, u_in);
				2585	break;
				2586
				2587	case JIFZ:
				2588	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2589	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2590	copyUInstr(cb, u_in);
				2591	break;
				2592
				2593	/* Emit a check on the address used. For FPU_R, the value
				2594	loaded into the FPU is checked at the time it is read from
				2595	memory (see synth_fpu_mem_check_actions). */
				2596	case FPU_R: case FPU_W:
				2597	vg_assert(u_in->tag2 == TempReg);
				2598	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
				2599	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
				2600	copyUInstr(cb, u_in);
				2601	break;
				2602
				2603	/* For FPU insns not referencing memory, just copy thru. */
				2604	case FPU:
				2605	copyUInstr(cb, u_in);
				2606	break;
				2607
				2608	default:
				2609	VG_(ppUInstr)(0, u_in);
				2610	VG_(panic)( "vg_instrument: unhandled case");
				2611
				2612	} /* end of switch (u_in->opcode) */
				2613
				2614	} /* end of for loop */
				2615
				2616	freeCodeBlock(cb_in);
				2617	return cb;
				2618	}
				2619
				2620	/------------------------------------------------------------/
				2621	/--- Clean up mem check instrumentation. ---/
				2622	/------------------------------------------------------------/
				2623
				2624	#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
				2625	#define VGC_UNDEF ((UChar)100)
				2626	#define VGC_VALUE ((UChar)101)
				2627
				2628	#define NOP_no_msg(uu) \
				2629	do { uu->opcode = NOP; } while (False)
				2630
				2631	#define NOP_tag1_op(uu) \
				2632	do { uu->opcode = NOP; \
				2633	if (VG_(disassemble)) \
				2634	VG_(printf)("at %d: delete %s due to defd arg\n", \
				2635	i, VG_(nameOfTagOp(u->val3))); \
				2636	} while (False)
				2637
				2638	#define SETV_tag1_op(uu,newsz) \
				2639	do { uu->opcode = SETV; \
				2640	uu->size = newsz; \
				2641	uu->tag2 = uu->tag3 = NoValue; \
				2642	if (VG_(disassemble)) \
				2643	VG_(printf)("at %d: convert %s to SETV%d " \
				2644	"due to defd arg\n", \
				2645	i, VG_(nameOfTagOp(u->val3)), newsz); \
				2646	} while (False)
				2647
				2648
				2649
				2650	/* Run backwards and delete SETVs on shadow temps for which the next
				2651	action is a write. Needs an env saying whether or not the next
				2652	action is a write. The supplied UCodeBlock is destructively
				2653	modified.
				2654	*/
				2655	static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
				2656	{
				2657	Bool* next_is_write;
				2658	Int i, j, k, n_temps;
				2659	UInstr* u;
				2660	TempUse tempUse[3];
				2661
				2662	n_temps = cb->nextTemp;
				2663	if (n_temps == 0) return;
				2664
				2665	next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
				2666
				2667	for (i = 0; i < n_temps; i++) next_is_write[i] = True;
				2668
				2669	for (i = cb->used-1; i >= 0; i--) {
				2670	u = &cb->instrs[i];
				2671
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2672	/* If we're not checking address V bits, there will be a lot of
				2673	GETVs, TAG1s and TAG2s calculating values which are never
				2674	used. These first three cases get rid of them. */
				2675
				2676	if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
				2677	&& next_is_write[u->val2]
				2678	&& !VG_(clo_check_addrVs)) {
				2679	u->opcode = NOP;
				2680	u->size = 0;
				2681	if (VG_(disassemble))
				2682	VG_(printf)("at %d: delete GETV\n", i);
				2683	} else
				2684
				2685	if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
				2686	&& next_is_write[u->val1]
				2687	&& !VG_(clo_check_addrVs)) {
				2688	u->opcode = NOP;
				2689	u->size = 0;
				2690	if (VG_(disassemble))
				2691	VG_(printf)("at %d: delete TAG1\n", i);
				2692	} else
				2693
				2694	if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
				2695	&& next_is_write[u->val2]
				2696	&& !VG_(clo_check_addrVs)) {
				2697	u->opcode = NOP;
				2698	u->size = 0;
				2699	if (VG_(disassemble))
				2700	VG_(printf)("at %d: delete TAG2\n", i);
				2701	} else
				2702
				2703	/* We do the rest of these regardless of whether or not
				2704	addresses are V-checked. */
				2705
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2706	if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
				2707	&& next_is_write[u->val2]) {
				2708	/* This MOV is pointless because the target is dead at this
				2709	point. Delete it. */
				2710	u->opcode = NOP;
				2711	u->size = 0;
				2712	if (VG_(disassemble))
				2713	VG_(printf)("at %d: delete MOV\n", i);
				2714	} else
				2715
				2716	if (u->opcode == SETV) {
				2717	if (u->tag1 == TempReg) {
				2718	vg_assert(VGC_IS_SHADOW(u->val1));
				2719	if (next_is_write[u->val1]) {
				2720	/* This write is pointless, so annul it. */
				2721	u->opcode = NOP;
				2722	u->size = 0;
				2723	if (VG_(disassemble))
				2724	VG_(printf)("at %d: delete SETV\n", i);
				2725	} else {
				2726	/* This write has a purpose; don't annul it, but do
				2727	notice that we did it. */
				2728	next_is_write[u->val1] = True;
				2729	}
				2730
				2731	}
				2732
				2733	} else {
				2734	/* Find out what this insn does to the temps. */
				2735	k = getTempUsage(u, &tempUse[0]);
				2736	vg_assert(k <= 3);
				2737	for (j = k-1; j >= 0; j--) {
				2738	next_is_write[ tempUse[j].tempNo ]
				2739	= tempUse[j].isWrite;
				2740	}
				2741	}
				2742
				2743	}
				2744
				2745	VG_(jitfree)(next_is_write);
				2746	}
				2747
				2748
				2749	/* Run forwards, propagating and using the is-completely-defined
				2750	property. This removes a lot of redundant tag-munging code.
				2751	Unfortunately it requires intimate knowledge of how each uinstr and
				2752	tagop modifies its arguments. This duplicates knowledge of uinstr
				2753	tempreg uses embodied in getTempUsage(), which is unfortunate.
				2754	The supplied UCodeBlock* is modified in-place.
				2755
				2756	For each value temp, def[] should hold VGC_VALUE.
				2757
				2758	For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
				2759	definitely known to be fully defined at that size. In all other
				2760	circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
				2761	undefined. In cases of doubt, VGC_UNDEF is always safe.
				2762	*/
				2763	static void vg_propagate_definedness ( UCodeBlock* cb )
				2764	{
				2765	UChar* def;
				2766	Int i, j, k, t, n_temps;
				2767	UInstr* u;
				2768	TempUse tempUse[3];
				2769
				2770	n_temps = cb->nextTemp;
				2771	if (n_temps == 0) return;
				2772
				2773	def = VG_(jitmalloc)(n_temps * sizeof(UChar));
				2774	for (i = 0; i < n_temps; i++)
				2775	def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
				2776
				2777	/* Run forwards, detecting and using the all-defined property. */
				2778
				2779	for (i = 0; i < cb->used; i++) {
				2780	u = &cb->instrs[i];
				2781	switch (u->opcode) {
				2782
				2783	/* Tag-handling uinstrs. */
				2784
				2785	/* Deal with these quickly. */
				2786	case NOP:
				2787	case INCEIP:
				2788	break;
				2789
				2790	/* Make a tag defined. */
				2791	case SETV:
				2792	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2793	def[u->val1] = u->size;
				2794	break;
				2795
				2796	/* Check definedness of a tag. */
				2797	case TESTV:
				2798	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2799	if (def[u->val1] <= 4) {
				2800	vg_assert(def[u->val1] == u->size);
				2801	NOP_no_msg(u);
				2802	if (VG_(disassemble))
				2803	VG_(printf)("at %d: delete TESTV on defd arg\n", i);
				2804	}
				2805	break;
				2806
				2807	/* Applies to both values and tags. Propagate Definedness
				2808	property through copies. Note that this isn't optional;
				2809	we have to do this to keep def[] correct. */
				2810	case MOV:
				2811	vg_assert(u->tag2 == TempReg);
				2812	if (u->tag1 == TempReg) {
				2813	if (VGC_IS_SHADOW(u->val1)) {
				2814	vg_assert(VGC_IS_SHADOW(u->val2));
				2815	def[u->val2] = def[u->val1];
				2816	}
				2817	}
				2818	break;
				2819
				2820	case PUTV:
				2821	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2822	if (def[u->val1] <= 4) {
				2823	vg_assert(def[u->val1] == u->size);
				2824	u->tag1 = Literal;
				2825	u->val1 = 0;
				2826	switch (u->size) {
				2827	case 4: u->lit32 = 0x00000000; break;
				2828	case 2: u->lit32 = 0xFFFF0000; break;
				2829	case 1: u->lit32 = 0xFFFFFF00; break;
				2830	default: VG_(panic)("vg_cleanup(PUTV)");
				2831	}
				2832	if (VG_(disassemble))
				2833	VG_(printf)(
				2834	"at %d: propagate definedness into PUTV\n", i);
				2835	}
				2836	break;
				2837
				2838	case STOREV:
				2839	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2840	if (def[u->val1] <= 4) {
				2841	vg_assert(def[u->val1] == u->size);
				2842	u->tag1 = Literal;
				2843	u->val1 = 0;
				2844	switch (u->size) {
				2845	case 4: u->lit32 = 0x00000000; break;
				2846	case 2: u->lit32 = 0xFFFF0000; break;
				2847	case 1: u->lit32 = 0xFFFFFF00; break;
				2848	default: VG_(panic)("vg_cleanup(STOREV)");
				2849	}
				2850	if (VG_(disassemble))
				2851	VG_(printf)(
				2852	"at %d: propagate definedness into STandV\n", i);
				2853	}
				2854	break;
				2855
				2856	/* Nothing interesting we can do with this, I think. */
				2857	case PUTVF:
				2858	break;
				2859
				2860	/* Tag handling operations. */
				2861	case TAG2:
				2862	vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
				2863	vg_assert(u->tag3 == Lit16);
				2864	/* Ultra-paranoid "type" checking. */
				2865	switch (u->val3) {
				2866	case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
				2867	case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
				2868	case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
				2869	vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
				2870	break;
				2871	default:
				2872	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2873	break;
				2874	}
				2875	switch (u->val3) {
				2876	Int sz;
				2877	case VgT_UifU4:
				2878	sz = 4; goto do_UifU;
				2879	case VgT_UifU2:
				2880	sz = 2; goto do_UifU;
				2881	case VgT_UifU1:
				2882	sz = 1; goto do_UifU;
				2883	case VgT_UifU0:
				2884	sz = 0; goto do_UifU;
				2885	do_UifU:
				2886	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2887	vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
				2888	if (def[u->val1] <= 4) {
				2889	/* UifU. The first arg is defined, so result is
				2890	simply second arg. Delete this operation. */
				2891	vg_assert(def[u->val1] == sz);
				2892	NOP_no_msg(u);
				2893	if (VG_(disassemble))
				2894	VG_(printf)(
				2895	"at %d: delete UifU%d due to defd arg1\n",
				2896	i, sz);
				2897	}
				2898	else
				2899	if (def[u->val2] <= 4) {
				2900	/* UifU. The second arg is defined, so result is
				2901	simply first arg. Copy to second. */
				2902	vg_assert(def[u->val2] == sz);
				2903	u->opcode = MOV;
				2904	u->size = 4;
				2905	u->tag3 = NoValue;
				2906	def[u->val2] = def[u->val1];
				2907	if (VG_(disassemble))
				2908	VG_(printf)(
				2909	"at %d: change UifU%d to MOV due to defd"
				2910	" arg2\n",
				2911	i, sz);
				2912	}
				2913	break;
				2914	case VgT_ImproveAND4_TQ:
				2915	sz = 4; goto do_ImproveAND;
				2916	case VgT_ImproveAND1_TQ:
				2917	sz = 1; goto do_ImproveAND;
				2918	do_ImproveAND:
				2919	/* Implements Q = T OR Q. So if Q is entirely defined,
				2920	ie all 0s, we get MOV T, Q. */
				2921	if (def[u->val2] <= 4) {
				2922	vg_assert(def[u->val2] == sz);
				2923	u->size = 4; /* Regardless of sz */
				2924	u->opcode = MOV;
				2925	u->tag3 = NoValue;
				2926	def[u->val2] = VGC_UNDEF;
				2927	if (VG_(disassemble))
				2928	VG_(printf)(
				2929	"at %d: change ImproveAND%d_TQ to MOV due "
				2930	"to defd arg2\n",
				2931	i, sz);
				2932	}
				2933	break;
				2934	default:
				2935	goto unhandled;
				2936	}
				2937	break;
				2938
				2939	case TAG1:
				2940	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2941	if (def[u->val1] > 4) break;
				2942	/* We now know that the arg to the op is entirely defined.
				2943	If the op changes the size of the arg, we must replace
				2944	it with a SETV at the new size. If it doesn't change
				2945	the size, we can delete it completely. */
				2946	switch (u->val3) {
				2947	/* Maintain the same size ... */
				2948	case VgT_Left4:
				2949	vg_assert(def[u->val1] == 4);
				2950	NOP_tag1_op(u);
				2951	break;
				2952	case VgT_PCast11:
				2953	vg_assert(def[u->val1] == 1);
				2954	NOP_tag1_op(u);
				2955	break;
				2956	/* Change size ... */
				2957	case VgT_PCast40:
				2958	vg_assert(def[u->val1] == 4);
				2959	SETV_tag1_op(u,0);
				2960	def[u->val1] = 0;
				2961	break;
				2962	case VgT_PCast14:
				2963	vg_assert(def[u->val1] == 1);
				2964	SETV_tag1_op(u,4);
				2965	def[u->val1] = 4;
				2966	break;
				2967	case VgT_PCast12:
				2968	vg_assert(def[u->val1] == 1);
				2969	SETV_tag1_op(u,2);
				2970	def[u->val1] = 2;
				2971	break;
				2972	case VgT_PCast10:
				2973	vg_assert(def[u->val1] == 1);
				2974	SETV_tag1_op(u,0);
				2975	def[u->val1] = 0;
				2976	break;
				2977	case VgT_PCast02:
				2978	vg_assert(def[u->val1] == 0);
				2979	SETV_tag1_op(u,2);
				2980	def[u->val1] = 2;
				2981	break;
				2982	default:
				2983	goto unhandled;
				2984	}
				2985	if (VG_(disassemble))
				2986	VG_(printf)(
				2987	"at %d: delete TAG1 %s due to defd arg\n",
				2988	i, VG_(nameOfTagOp(u->val3)));
				2989	break;
				2990
				2991	default:
				2992	unhandled:
				2993	/* We don't know how to handle this uinstr. Be safe, and
				2994	set to VGC_VALUE or VGC_UNDEF all temps written by it. */
				2995	k = getTempUsage(u, &tempUse[0]);
				2996	vg_assert(k <= 3);
				2997	for (j = 0; j < k; j++) {
				2998	t = tempUse[j].tempNo;
				2999	vg_assert(t >= 0 && t < n_temps);
				3000	if (!tempUse[j].isWrite) {
				3001	/* t is read; ignore it. */
				3002	if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
				3003	VG_(printf)("ignoring def %d at %s %s\n",
				3004	def[t],
				3005	VG_(nameUOpcode)(True, u->opcode),
				3006	(u->opcode == TAG1 \|\| u->opcode == TAG2)
				3007	? VG_(nameOfTagOp)(u->val3)
				3008	: (Char*)"");
				3009	} else {
				3010	/* t is written; better nullify it. */
				3011	def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
				3012	}
				3013	}
				3014	}
				3015	}
				3016
				3017	VG_(jitfree)(def);
				3018	}
				3019
				3020
				3021	/* Top level post-instrumentation cleanup function. */
				3022	static void vg_cleanup ( UCodeBlock* cb )
				3023	{
				3024	vg_propagate_definedness ( cb );
				3025	vg_delete_redundant_SETVs ( cb );
				3026	}
				3027
				3028
				3029	/------------------------------------------------------------/
				3030	/--- Main entry point for the JITter. ---/
				3031	/------------------------------------------------------------/
				3032
				3033	/* Translate the basic block beginning at orig_addr, placing the
				3034	translation in a vg_malloc'd block, the address and size of which
				3035	are returned in trans_addr and trans_size. Length of the original
				3036	block is also returned in orig_size. If the latter three are NULL,
				3037	this call is being done for debugging purposes, in which case (a)
				3038	throw away the translation once it is made, and (b) produce a load
				3039	of debugging output.
				3040	*/
				3041	void VG_(translate) ( Addr orig_addr,
				3042	UInt* orig_size,
				3043	Addr* trans_addr,
				3044	UInt* trans_size )
				3045	{
				3046	Int n_disassembled_bytes, final_code_size;
				3047	Bool debugging_translation;
				3048	UChar* final_code;
				3049	UCodeBlock* cb;
				3050
				3051	VGP_PUSHCC(VgpTranslate);
				3052	debugging_translation
				3053	= orig_size == NULL \|\| trans_addr == NULL \|\| trans_size == NULL;
				3054
				3055	dis = True;
				3056	dis = debugging_translation;
				3057
				3058	/* Check if we're being asked to jump to a silly address, and if so
				3059	record an error message before potentially crashing the entire
				3060	system. */
				3061	if (VG_(clo_instrument) && !debugging_translation && !dis) {
				3062	Addr bad_addr;
				3063	Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
				3064	if (!ok) {
				3065	VG_(record_jump_error)(bad_addr);
				3066	}
				3067	}
				3068
				3069	/* if (VG_(overall_in_count) >= 4800) dis=True; */
				3070	if (VG_(disassemble))
				3071	VG_(printf)("\n");
				3072	if (0 \|\| dis
				3073	\|\| (VG_(overall_in_count) > 0 &&
				3074	(VG_(overall_in_count) % 1000 == 0))) {
				3075	if (0&& (VG_(clo_verbosity) > 1 \|\| dis))
				3076	VG_(message)(Vg_UserMsg,
				3077	"trans# %d, bb# %lu, in %d, out %d",
				3078	VG_(overall_in_count),
				3079	VG_(bbs_done),
				3080	VG_(overall_in_osize), VG_(overall_in_tsize),
				3081	orig_addr );
				3082	}
				3083	cb = allocCodeBlock();
				3084
				3085	/* Disassemble this basic block into cb. */
				3086	VGP_PUSHCC(VgpToUCode);
				3087	n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
				3088	VGP_POPCC;
				3089	/* dis=True; */
				3090	/* if (0&& VG_(translations_done) < 617) */
				3091	/* dis=False; */
				3092	/* Try and improve the code a bit. */
				3093	if (VG_(clo_optimise)) {
				3094	VGP_PUSHCC(VgpImprove);
				3095	vg_improve ( cb );
				3096	if (VG_(disassemble))
				3097	VG_(ppUCodeBlock) ( cb, "Improved code:" );
				3098	VGP_POPCC;
				3099	}
				3100	/* dis=False; */
				3101	/* Add instrumentation code. */
				3102	if (VG_(clo_instrument)) {
				3103	VGP_PUSHCC(VgpInstrument);
				3104	cb = vg_instrument(cb);
				3105	VGP_POPCC;
				3106	if (VG_(disassemble))
				3107	VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
				3108	if (VG_(clo_cleanup)) {
				3109	VGP_PUSHCC(VgpCleanup);
				3110	vg_cleanup(cb);
				3111	VGP_POPCC;
				3112	if (VG_(disassemble))
				3113	VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
				3114	}
				3115	}
				3116
				3117	/* Allocate registers. */
				3118	VGP_PUSHCC(VgpRegAlloc);
				3119	cb = vg_do_register_allocation ( cb );
				3120	VGP_POPCC;
				3121	/* dis=False; */
				3122	/*
				3123	if (VG_(disassemble))
				3124	VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
				3125	*/
				3126
				3127	VGP_PUSHCC(VgpFromUcode);
				3128	/* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
				3129	and so must be VG_(jitfree)'d. */
				3130	final_code = VG_(emit_code)(cb, &final_code_size );
				3131	VGP_POPCC;
				3132	freeCodeBlock(cb);
				3133
				3134	if (debugging_translation) {
				3135	/* Only done for debugging -- throw away final result. */
				3136	VG_(jitfree)(final_code);
				3137	} else {
				3138	/* Doing it for real -- return values to caller. */
				3139	*orig_size = n_disassembled_bytes;
				3140	*trans_addr = (Addr)final_code;
				3141	*trans_size = final_code_size;
				3142	}
				3143	VGP_POPCC;
				3144	}
				3145
				3146	/--------------------------------------------------------------------/
				3147	/--- end vg_translate.c ---/
				3148	/--------------------------------------------------------------------/