Blame - vg_translate.c - platform/external/valgrind

blob: 0a806944ec3d1dcd8608eb451231d31f84e4dc8f [file] [log] [blame]

sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- The JITter proper: register allocation & code improvement ---/
				4	/--- vg_translate.c ---/
				5	/--------------------------------------------------------------------/
				6
				7	/*
				8	This file is part of Valgrind, an x86 protected-mode emulator
				9	designed for debugging and profiling binaries on x86-Unixes.
				10
				11	Copyright (C) 2000-2002 Julian Seward
				12	jseward@acm.org
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				27	02111-1307, USA.
				28
				29	The GNU General Public License is contained in the file LICENSE.
				30	*/
				31
				32	#include "vg_include.h"
				33
				34
				35	/------------------------------------------------------------/
				36	/--- Renamings of frequently-used global functions. ---/
				37	/------------------------------------------------------------/
				38
				39	#define uInstr1 VG_(newUInstr1)
				40	#define uInstr2 VG_(newUInstr2)
				41	#define uInstr3 VG_(newUInstr3)
				42	#define dis VG_(disassemble)
				43	#define nameIReg VG_(nameOfIntReg)
				44	#define nameISize VG_(nameOfIntSize)
				45	#define uLiteral VG_(setLiteralField)
				46	#define newTemp VG_(getNewTemp)
				47	#define newShadow VG_(getNewShadow)
				48
				49
				50	/------------------------------------------------------------/
				51	/--- Memory management for the translater. ---/
				52	/------------------------------------------------------------/
				53
				54	#define N_JITBLOCKS 4
				55	#define N_JITBLOCK_SZ 5000
				56
				57	static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
				58	static Bool jitstorage_inuse[N_JITBLOCKS];
				59	static Bool jitstorage_initdone = False;
				60
				61	static __inline__ void jitstorage_initialise ( void )
				62	{
				63	Int i;
				64	if (jitstorage_initdone) return;
				65	jitstorage_initdone = True;
				66	for (i = 0; i < N_JITBLOCKS; i++)
				67	jitstorage_inuse[i] = False;
				68	}
				69
				70	void* VG_(jitmalloc) ( Int nbytes )
				71	{
				72	Int i;
				73	jitstorage_initialise();
				74	if (nbytes > N_JITBLOCK_SZ) {
				75	/* VG_(printf)("too large: %d\n", nbytes); */
				76	return VG_(malloc)(VG_AR_PRIVATE, nbytes);
				77	}
				78	for (i = 0; i < N_JITBLOCKS; i++) {
				79	if (!jitstorage_inuse[i]) {
				80	jitstorage_inuse[i] = True;
				81	/* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
				82	return & jitstorage[i][0];
				83	}
				84	}
				85	VG_(panic)("out of slots in vg_jitmalloc\n");
				86	return VG_(malloc)(VG_AR_PRIVATE, nbytes);
				87	}
				88
				89	void VG_(jitfree) ( void* ptr )
				90	{
				91	Int i;
				92	jitstorage_initialise();
				93	for (i = 0; i < N_JITBLOCKS; i++) {
				94	if (ptr == & jitstorage[i][0]) {
				95	vg_assert(jitstorage_inuse[i]);
				96	jitstorage_inuse[i] = False;
				97	return;
				98	}
				99	}
				100	VG_(free)(VG_AR_PRIVATE, ptr);
				101	}
				102
				103	/------------------------------------------------------------/
				104	/--- Basics ---/
				105	/------------------------------------------------------------/
				106
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	107	UCodeBlock* VG_(allocCodeBlock) ( void )
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	108	{
				109	UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
				110	cb->used = cb->size = cb->nextTemp = 0;
				111	cb->instrs = NULL;
				112	return cb;
				113	}
				114
				115
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	116	void VG_(freeCodeBlock) ( UCodeBlock* cb )
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	117	{
				118	if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
				119	VG_(free)(VG_AR_PRIVATE, cb);
				120	}
				121
				122
				123	/* Ensure there's enough space in a block to add one uinstr. */
				124	static __inline__
				125	void ensureUInstr ( UCodeBlock* cb )
				126	{
				127	if (cb->used == cb->size) {
				128	if (cb->instrs == NULL) {
				129	vg_assert(cb->size == 0);
				130	vg_assert(cb->used == 0);
				131	cb->size = 8;
				132	cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
				133	} else {
				134	Int i;
				135	UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
				136	2 * sizeof(UInstr) * cb->size);
				137	for (i = 0; i < cb->used; i++)
				138	instrs2[i] = cb->instrs[i];
				139	cb->size *= 2;
				140	VG_(free)(VG_AR_PRIVATE, cb->instrs);
				141	cb->instrs = instrs2;
				142	}
				143	}
				144
				145	vg_assert(cb->used < cb->size);
				146	}
				147
				148
				149	__inline__
				150	void VG_(emptyUInstr) ( UInstr* u )
				151	{
				152	u->val1 = u->val2 = u->val3 = 0;
				153	u->tag1 = u->tag2 = u->tag3 = NoValue;
				154	u->flags_r = u->flags_w = FlagsEmpty;
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	155	u->jmpkind = JmpBoring;
				156	u->smc_check = u->signed_widen = False;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	157	u->lit32 = 0;
				158	u->opcode = 0;
				159	u->size = 0;
				160	u->cond = 0;
				161	u->extra4b = 0;
				162	}
				163
				164
				165	/* Add an instruction to a ucode block, and return the index of the
				166	instruction. */
				167	__inline__
				168	void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
				169	Tag tag1, UInt val1,
				170	Tag tag2, UInt val2,
				171	Tag tag3, UInt val3 )
				172	{
				173	UInstr* ui;
				174	ensureUInstr(cb);
				175	ui = & cb->instrs[cb->used];
				176	cb->used++;
				177	VG_(emptyUInstr)(ui);
				178	ui->val1 = val1;
				179	ui->val2 = val2;
				180	ui->val3 = val3;
				181	ui->opcode = opcode;
				182	ui->tag1 = tag1;
				183	ui->tag2 = tag2;
				184	ui->tag3 = tag3;
				185	ui->size = sz;
				186	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				187	if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
				188	if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
				189	}
				190
				191
				192	__inline__
				193	void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
				194	Tag tag1, UInt val1,
				195	Tag tag2, UInt val2 )
				196	{
				197	UInstr* ui;
				198	ensureUInstr(cb);
				199	ui = & cb->instrs[cb->used];
				200	cb->used++;
				201	VG_(emptyUInstr)(ui);
				202	ui->val1 = val1;
				203	ui->val2 = val2;
				204	ui->opcode = opcode;
				205	ui->tag1 = tag1;
				206	ui->tag2 = tag2;
				207	ui->size = sz;
				208	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				209	if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
				210	}
				211
				212
				213	__inline__
				214	void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
				215	Tag tag1, UInt val1 )
				216	{
				217	UInstr* ui;
				218	ensureUInstr(cb);
				219	ui = & cb->instrs[cb->used];
				220	cb->used++;
				221	VG_(emptyUInstr)(ui);
				222	ui->val1 = val1;
				223	ui->opcode = opcode;
				224	ui->tag1 = tag1;
				225	ui->size = sz;
				226	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				227	}
				228
				229
				230	__inline__
				231	void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
				232	{
				233	UInstr* ui;
				234	ensureUInstr(cb);
				235	ui = & cb->instrs[cb->used];
				236	cb->used++;
				237	VG_(emptyUInstr)(ui);
				238	ui->opcode = opcode;
				239	ui->size = sz;
				240	}
				241
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	242	/* Copy an instruction into the given codeblock. */
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	243	__inline__
				244	void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr )
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	245	{
				246	ensureUInstr(cb);
				247	cb->instrs[cb->used] = *instr;
				248	cb->used++;
				249	}
				250
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	251	/* Copy auxiliary info from one uinstr to another. */
				252	static __inline__
				253	void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
				254	{
				255	dst->cond = src->cond;
				256	dst->extra4b = src->extra4b;
				257	dst->smc_check = src->smc_check;
				258	dst->signed_widen = src->signed_widen;
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	259	dst->jmpkind = src->jmpkind;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	260	dst->flags_r = src->flags_r;
				261	dst->flags_w = src->flags_w;
				262	}
				263
				264
				265	/* Set the flag R/W sets on a uinstr. */
				266	void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
				267	{
				268	/* VG_(ppUInstr)(-1,u); */
				269	vg_assert(fr == (fr & FlagsALL));
				270	vg_assert(fw == (fw & FlagsALL));
				271	u->flags_r = fr;
				272	u->flags_w = fw;
				273	}
				274
				275
				276	/* Set the lit32 field of the most recent uinsn. */
				277	void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
				278	{
				279	LAST_UINSTR(cb).lit32 = lit32;
				280	}
				281
				282
				283	Bool VG_(anyFlagUse) ( UInstr* u )
				284	{
				285	return (u->flags_r != FlagsEmpty
				286	\|\| u->flags_w != FlagsEmpty);
				287	}
				288
				289
				290
				291
				292	/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
				293	register number. This effectively defines the order in which real
				294	registers are allocated. %ebp is excluded since it is permanently
				295	reserved for pointing at VG_(baseBlock). %edi is a general spare
				296	temp used for Left4 and various misc tag ops.
				297
				298	Important! If you change the set of allocatable registers from
				299	%eax, %ebx, %ecx, %edx, %esi you must change the
sewardj	18d7513	2002-05-16 11:06:21 +0000	[diff] [blame]	300	save/restore sequences in various places to match!
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	301	*/
				302	__inline__ Int VG_(rankToRealRegNo) ( Int rank )
				303	{
				304	switch (rank) {
				305	# if 1
				306	/* Probably the best allocation ordering. */
				307	case 0: return R_EAX;
				308	case 1: return R_EBX;
				309	case 2: return R_ECX;
				310	case 3: return R_EDX;
				311	case 4: return R_ESI;
				312	# else
				313	/* Contrary; probably the worst. Helpful for debugging, tho. */
				314	case 4: return R_EAX;
				315	case 3: return R_EBX;
				316	case 2: return R_ECX;
				317	case 1: return R_EDX;
				318	case 0: return R_ESI;
				319	# endif
				320	default: VG_(panic)("rankToRealRegNo");
				321	}
				322	}
				323
				324
				325	/------------------------------------------------------------/
				326	/--- Sanity checking uinstrs. ---/
				327	/------------------------------------------------------------/
				328
				329	/* This seems as good a place as any to record some important stuff
				330	about ucode semantics.
				331
				332	* TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
				333	TempReg are defined to zero-extend the loaded value to 32 bits.
				334	This is needed to make the translation of movzbl et al work
				335	properly.
				336
				337	* Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
				338
				339	* Arithmetic on TempRegs is at the specified size. For example,
				340	SUBW t1, t2 has to result in a real 16 bit x86 subtraction
				341	being emitted -- not a 32 bit one.
				342
				343	* On some insns we allow the cc bit to be set. If so, the
				344	intention is that the simulated machine's %eflags register
				345	is copied into that of the real machine before the insn,
				346	and copied back again afterwards. This means that the
				347	code generated for that insn must be very careful only to
				348	update %eflags in the intended way. This is particularly
				349	important for the routines referenced by CALL insns.
				350	*/
				351
				352	/* Meaning of operand kinds is as follows:
				353
				354	ArchReg is a register of the simulated CPU, stored in memory,
				355	in vg_m_state.m_eax .. m_edi. These values are stored
				356	using the Intel register encoding.
				357
				358	RealReg is a register of the real CPU. There are VG_MAX_REALREGS
				359	available for allocation. As with ArchRegs, these values
				360	are stored using the Intel register encoding.
				361
				362	TempReg is a temporary register used to express the results of
				363	disassembly. There is an unlimited supply of them --
				364	register allocation and spilling eventually assigns them
				365	to RealRegs.
				366
				367	SpillNo is a spill slot number. The number of required spill
				368	slots is VG_MAX_PSEUDOS, in general. Only allowed
				369	as the ArchReg operand of GET and PUT.
				370
				371	Lit16 is a signed 16-bit literal value.
				372
				373	Literal is a 32-bit literal value. Each uinstr can only hold
				374	one of these.
				375
				376	The disassembled code is expressed purely in terms of ArchReg,
				377	TempReg and Literal operands. Eventually, register allocation
				378	removes all the TempRegs, giving a result using ArchRegs, RealRegs,
				379	and Literals. New x86 code can easily be synthesised from this.
				380	There are carefully designed restrictions on which insns can have
				381	which operands, intended to make it possible to generate x86 code
				382	from the result of register allocation on the ucode efficiently and
				383	without need of any further RealRegs.
				384
				385	Restrictions on insns (as generated by the disassembler) are as
				386	follows:
				387
				388	A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
				389	N=NoValue
				390
				391	GETF T N N
				392	PUTF T N N
				393
				394	GET A,S T N
				395	PUT T A,S N
				396	LOAD T T N
				397	STORE T T N
				398	MOV T,L T N
				399	CMOV T T N
				400	WIDEN T N N
				401	JMP T,L N N
				402	CALLM L N N
				403	CALLM_S N N N
				404	CALLM_E N N N
				405	PUSH,POP T N N
				406	CLEAR L N N
				407
				408	AND, OR
				409	T T N
				410
				411	ADD, ADC, XOR, SUB, SBB
				412	A,L,T T N
				413
				414	SHL, SHR, SAR, ROL, ROR, RCL, RCR
				415	L,T T N
				416
				417	NOT, NEG, INC, DEC, CC2VAL, BSWAP
				418	T N N
				419
				420	JIFZ T L N
				421
				422	FPU_R L T N
				423	FPU_W L T N
				424	FPU L T N
				425
				426	LEA1 T T (const in a seperate field)
				427	LEA2 T T T (const & shift ditto)
				428
				429	INCEIP L N N
				430
				431	and for instrumentation insns:
				432
				433	LOADV T T N
				434	STOREV T,L T N
				435	GETV A T N
				436	PUTV T,L A N
				437	GETVF T N N
				438	PUTVF T N N
				439	WIDENV T N N
				440	TESTV A,T N N
				441	SETV A,T N N
				442	TAG1 T N N
				443	TAG2 T T N
				444
				445	Before register allocation, S operands should not appear anywhere.
				446	After register allocation, all T operands should have been
				447	converted into Rs, and S operands are allowed in GET and PUT --
				448	denoting spill saves/restores.
				449
				450	The size field should be 0 for insns for which it is meaningless,
				451	ie those which do not directly move/operate on data.
				452	*/
				453	Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
				454	{
				455	# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
				456	# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
				457	# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
				458	# define A1 (u->tag1 == ArchReg)
				459	# define A2 (u->tag2 == ArchReg)
				460	# define AS1 ((u->tag1 == ArchReg) \|\| ((!beforeRA && (u->tag1 == SpillNo))))
				461	# define AS2 ((u->tag2 == ArchReg) \|\| ((!beforeRA && (u->tag2 == SpillNo))))
				462	# define AS3 ((u->tag3 == ArchReg) \|\| ((!beforeRA && (u->tag3 == SpillNo))))
				463	# define L1 (u->tag1 == Literal && u->val1 == 0)
				464	# define L2 (u->tag2 == Literal && u->val2 == 0)
				465	# define Ls1 (u->tag1 == Lit16)
				466	# define Ls3 (u->tag3 == Lit16)
				467	# define N1 (u->tag1 == NoValue)
				468	# define N2 (u->tag2 == NoValue)
				469	# define N3 (u->tag3 == NoValue)
				470	# define SZ4 (u->size == 4)
				471	# define SZ2 (u->size == 2)
				472	# define SZ1 (u->size == 1)
				473	# define SZ0 (u->size == 0)
				474	# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
				475	# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
				476	# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	477	# define FLG_RD_WR_MAYBE \
				478	((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \
				479	\|\| (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \
				480	\|\| (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	481	# define CC1 (!(CC0))
				482	# define SZ4_IF_TR1 ((u->tag1 == TempReg \|\| u->tag1 == RealReg) \
				483	? (u->size == 4) : True)
				484
				485	Int n_lits = 0;
				486	if (u->tag1 == Literal) n_lits++;
				487	if (u->tag2 == Literal) n_lits++;
				488	if (u->tag3 == Literal) n_lits++;
				489	if (n_lits > 1)
				490	return False;
				491
				492	switch (u->opcode) {
				493	case GETF:
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	494	return (SZ2 \|\| SZ4) && TR1 && N2 && N3 && FLG_RD;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	495	case PUTF:
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	496	return (SZ2 \|\| SZ4) && TR1 && N2 && N3 && FLG_WR;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	497	case CALLM_S: case CALLM_E:
				498	return SZ0 && N1 && N2 && N3;
				499	case INCEIP:
				500	return SZ0 && CC0 && Ls1 && N2 && N3;
				501	case LEA1:
				502	return CC0 && TR1 && TR2 && N3 && SZ4;
				503	case LEA2:
				504	return CC0 && TR1 && TR2 && TR3 && SZ4;
				505	case NOP:
				506	return SZ0 && CC0 && N1 && N2 && N3;
				507	case GET:
				508	return CC0 && AS1 && TR2 && N3;
				509	case PUT:
				510	return CC0 && TR1 && AS2 && N3;
				511	case LOAD: case STORE:
				512	return CC0 && TR1 && TR2 && N3;
				513	case MOV:
				514	return CC0 && (TR1 \|\| L1) && TR2 && N3 && SZ4_IF_TR1;
				515	case CMOV:
				516	return CC1 && TR1 && TR2 && N3 && SZ4;
				517	case JMP:
				518	return (u->cond==CondAlways ? CC0 : CC1)
				519	&& (TR1 \|\| L1) && N2 && SZ0 && N3;
				520	case CLEAR:
				521	return CC0 && Ls1 && N2 && SZ0 && N3;
				522	case CALLM:
				523	return SZ0 && Ls1 && N2 && N3;
				524	case PUSH: case POP:
				525	return CC0 && TR1 && N2 && N3;
				526	case AND: case OR:
				527	return TR1 && TR2 && N3;
				528	case ADD: case ADC: case XOR: case SUB: case SBB:
				529	return (A1 \|\| TR1 \|\| L1) && TR2 && N3;
				530	case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
				531	return (TR1 \|\| L1) && TR2 && N3;
				532	case NOT: case NEG: case INC: case DEC:
				533	return TR1 && N2 && N3;
				534	case BSWAP:
				535	return TR1 && N2 && N3 && CC0 && SZ4;
				536	case CC2VAL:
				537	return CC1 && SZ1 && TR1 && N2 && N3;
				538	case JIFZ:
				539	return CC0 && SZ4 && TR1 && L2 && N3;
				540	case FPU_R: case FPU_W:
				541	return CC0 && Ls1 && TR2 && N3;
				542	case FPU:
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	543	return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	544	case LOADV:
				545	return CC0 && TR1 && TR2 && N3;
				546	case STOREV:
				547	return CC0 && (TR1 \|\| L1) && TR2 && N3;
				548	case GETV:
				549	return CC0 && A1 && TR2 && N3;
				550	case PUTV:
				551	return CC0 && (TR1 \|\| L1) && A2 && N3;
				552	case GETVF:
				553	return CC0 && TR1 && N2 && N3 && SZ0;
				554	case PUTVF:
				555	return CC0 && TR1 && N2 && N3 && SZ0;
				556	case WIDEN:
				557	return CC0 && TR1 && N2 && N3;
				558	case TESTV:
				559	return CC0 && (A1 \|\| TR1) && N2 && N3;
				560	case SETV:
				561	return CC0 && (A1 \|\| TR1) && N2 && N3;
				562	case TAG1:
				563	return CC0 && TR1 && N2 && Ls3 && SZ0;
				564	case TAG2:
				565	return CC0 && TR1 && TR2 && Ls3 && SZ0;
				566	default:
				567	VG_(panic)("vg_saneUInstr: unhandled opcode");
				568	}
				569	# undef SZ4_IF_TR1
				570	# undef CC0
				571	# undef CC1
				572	# undef SZ4
				573	# undef SZ2
				574	# undef SZ1
				575	# undef SZ0
				576	# undef TR1
				577	# undef TR2
				578	# undef TR3
				579	# undef A1
				580	# undef A2
				581	# undef AS1
				582	# undef AS2
				583	# undef AS3
				584	# undef L1
				585	# undef Ls1
				586	# undef L2
				587	# undef Ls3
				588	# undef N1
				589	# undef N2
				590	# undef N3
				591	# undef FLG_RD
				592	# undef FLG_WR
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	593	# undef FLG_RD_WR_MAYBE
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	594	}
				595
				596
				597	/* Sanity checks to do with CALLMs in UCodeBlocks. */
				598	Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
				599	{
				600	Int callm = 0;
				601	Int callm_s = 0;
				602	Int callm_e = 0;
				603	Int callm_ptr, calls_ptr;
				604	Int i, j, t;
				605	Bool incall = False;
				606
				607	/* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
				608
				609	for (i = 0; i < cb->used; i++) {
				610	switch (cb->instrs[i].opcode) {
				611	case CALLM:
				612	if (!incall) return False;
				613	callm++;
				614	break;
				615	case CALLM_S:
				616	if (incall) return False;
				617	incall = True;
				618	callm_s++;
				619	break;
				620	case CALLM_E:
				621	if (!incall) return False;
				622	incall = False;
				623	callm_e++;
				624	break;
				625	case PUSH: case POP: case CLEAR:
				626	if (!incall) return False;
				627	break;
				628	default:
				629	break;
				630	}
				631	}
				632	if (incall) return False;
				633	if (callm != callm_s \|\| callm != callm_e) return False;
				634
				635	/* Check the sections between CALLM_S and CALLM's. Ensure that no
				636	PUSH uinsn pushes any TempReg that any other PUSH in the same
				637	section pushes. Ie, check that the TempReg args to PUSHes in
				638	the section are unique. If not, the instrumenter generates
				639	incorrect code for CALLM insns. */
				640
				641	callm_ptr = 0;
				642
				643	find_next_CALLM:
				644	/* Search for the next interval, making calls_ptr .. callm_ptr
				645	bracket it. */
				646	while (callm_ptr < cb->used
				647	&& cb->instrs[callm_ptr].opcode != CALLM)
				648	callm_ptr++;
				649	if (callm_ptr == cb->used)
				650	return True;
				651	vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
				652
				653	calls_ptr = callm_ptr - 1;
				654	while (cb->instrs[calls_ptr].opcode != CALLM_S)
				655	calls_ptr--;
				656	vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
				657	vg_assert(calls_ptr >= 0);
				658
				659	/* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
				660
				661	/* For each PUSH insn in the interval ... */
				662	for (i = calls_ptr + 1; i < callm_ptr; i++) {
				663	if (cb->instrs[i].opcode != PUSH) continue;
				664	t = cb->instrs[i].val1;
				665	/* Ensure no later PUSH insns up to callm_ptr push the same
				666	TempReg. Return False if any such are found. */
				667	for (j = i+1; j < callm_ptr; j++) {
				668	if (cb->instrs[j].opcode == PUSH &&
				669	cb->instrs[j].val1 == t)
				670	return False;
				671	}
				672	}
				673
				674	/* This interval is clean. Keep going ... */
				675	callm_ptr++;
				676	goto find_next_CALLM;
				677	}
				678
				679
				680	/------------------------------------------------------------/
				681	/--- Printing uinstrs. ---/
				682	/------------------------------------------------------------/
				683
				684	Char* VG_(nameCondcode) ( Condcode cond )
				685	{
				686	switch (cond) {
				687	case CondO: return "o";
				688	case CondNO: return "no";
				689	case CondB: return "b";
				690	case CondNB: return "nb";
				691	case CondZ: return "z";
				692	case CondNZ: return "nz";
				693	case CondBE: return "be";
				694	case CondNBE: return "nbe";
				695	case CondS: return "s";
				696	case ConsNS: return "ns";
				697	case CondP: return "p";
				698	case CondNP: return "np";
				699	case CondL: return "l";
				700	case CondNL: return "nl";
				701	case CondLE: return "le";
				702	case CondNLE: return "nle";
				703	case CondAlways: return "MP"; /* hack! */
				704	default: VG_(panic)("nameCondcode");
				705	}
				706	}
				707
				708
				709	static void vg_ppFlagSet ( Char* prefix, FlagSet set )
				710	{
				711	VG_(printf)("%s", prefix);
				712	if (set & FlagD) VG_(printf)("D");
				713	if (set & FlagO) VG_(printf)("O");
				714	if (set & FlagS) VG_(printf)("S");
				715	if (set & FlagZ) VG_(printf)("Z");
				716	if (set & FlagA) VG_(printf)("A");
				717	if (set & FlagC) VG_(printf)("C");
				718	if (set & FlagP) VG_(printf)("P");
				719	}
				720
				721
				722	static void ppTempReg ( Int tt )
				723	{
				724	if ((tt & 1) == 0)
				725	VG_(printf)("t%d", tt);
				726	else
				727	VG_(printf)("q%d", tt-1);
				728	}
				729
				730
				731	static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
				732	{
				733	UInt tag, val;
				734	switch (operandNo) {
				735	case 1: tag = u->tag1; val = u->val1; break;
				736	case 2: tag = u->tag2; val = u->val2; break;
				737	case 3: tag = u->tag3; val = u->val3; break;
				738	default: VG_(panic)("ppUOperand(1)");
				739	}
				740	if (tag == Literal) val = u->lit32;
				741
				742	if (parens) VG_(printf)("(");
				743	switch (tag) {
				744	case TempReg: ppTempReg(val); break;
				745	case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
				746	case Literal: VG_(printf)("$0x%x", val); break;
				747	case Lit16: VG_(printf)("$0x%x", val); break;
				748	case NoValue: VG_(printf)("NoValue"); break;
				749	case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
				750	case SpillNo: VG_(printf)("spill%d", val); break;
				751	default: VG_(panic)("ppUOperand(2)");
				752	}
				753	if (parens) VG_(printf)(")");
				754	}
				755
				756
				757	Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
				758	{
				759	switch (opc) {
				760	case ADD: return (upper ? "ADD" : "add");
				761	case ADC: return (upper ? "ADC" : "adc");
				762	case AND: return (upper ? "AND" : "and");
				763	case OR: return (upper ? "OR" : "or");
				764	case XOR: return (upper ? "XOR" : "xor");
				765	case SUB: return (upper ? "SUB" : "sub");
				766	case SBB: return (upper ? "SBB" : "sbb");
				767	case SHL: return (upper ? "SHL" : "shl");
				768	case SHR: return (upper ? "SHR" : "shr");
				769	case SAR: return (upper ? "SAR" : "sar");
				770	case ROL: return (upper ? "ROL" : "rol");
				771	case ROR: return (upper ? "ROR" : "ror");
				772	case RCL: return (upper ? "RCL" : "rcl");
				773	case RCR: return (upper ? "RCR" : "rcr");
				774	case NOT: return (upper ? "NOT" : "not");
				775	case NEG: return (upper ? "NEG" : "neg");
				776	case INC: return (upper ? "INC" : "inc");
				777	case DEC: return (upper ? "DEC" : "dec");
				778	case BSWAP: return (upper ? "BSWAP" : "bswap");
				779	default: break;
				780	}
				781	if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
				782	switch (opc) {
				783	case GETVF: return "GETVF";
				784	case PUTVF: return "PUTVF";
				785	case TAG1: return "TAG1";
				786	case TAG2: return "TAG2";
				787	case CALLM_S: return "CALLM_S";
				788	case CALLM_E: return "CALLM_E";
				789	case INCEIP: return "INCEIP";
				790	case LEA1: return "LEA1";
				791	case LEA2: return "LEA2";
				792	case NOP: return "NOP";
				793	case GET: return "GET";
				794	case PUT: return "PUT";
				795	case GETF: return "GETF";
				796	case PUTF: return "PUTF";
				797	case LOAD: return "LD" ;
				798	case STORE: return "ST" ;
				799	case MOV: return "MOV";
				800	case CMOV: return "CMOV";
				801	case WIDEN: return "WIDEN";
				802	case JMP: return "J" ;
				803	case JIFZ: return "JIFZ" ;
				804	case CALLM: return "CALLM";
				805	case PUSH: return "PUSH" ;
				806	case POP: return "POP" ;
				807	case CLEAR: return "CLEAR";
				808	case CC2VAL: return "CC2VAL";
				809	case FPU_R: return "FPU_R";
				810	case FPU_W: return "FPU_W";
				811	case FPU: return "FPU" ;
				812	case LOADV: return "LOADV";
				813	case STOREV: return "STOREV";
				814	case GETV: return "GETV";
				815	case PUTV: return "PUTV";
				816	case TESTV: return "TESTV";
				817	case SETV: return "SETV";
				818	default: VG_(panic)("nameUOpcode: unhandled case");
				819	}
				820	}
				821
				822
				823	void VG_(ppUInstr) ( Int instrNo, UInstr* u )
				824	{
				825	VG_(printf)("\t%4d: %s", instrNo,
				826	VG_(nameUOpcode)(True, u->opcode));
				827	if (u->opcode == JMP \|\| u->opcode == CC2VAL)
				828	VG_(printf)("%s", VG_(nameCondcode(u->cond)));
				829
				830	switch (u->size) {
				831	case 0: VG_(printf)("o"); break;
				832	case 1: VG_(printf)("B"); break;
				833	case 2: VG_(printf)("W"); break;
				834	case 4: VG_(printf)("L"); break;
				835	case 8: VG_(printf)("Q"); break;
				836	default: VG_(printf)("%d", (Int)u->size); break;
				837	}
				838
				839	switch (u->opcode) {
				840
				841	case TAG1:
				842	VG_(printf)("\t");
				843	ppUOperand(u, 1, 4, False);
				844	VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
				845	ppUOperand(u, 1, 4, False);
				846	VG_(printf)(" )");
				847	break;
				848
				849	case TAG2:
				850	VG_(printf)("\t");
				851	ppUOperand(u, 2, 4, False);
				852	VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
				853	ppUOperand(u, 1, 4, False);
				854	VG_(printf)(", ");
				855	ppUOperand(u, 2, 4, False);
				856	VG_(printf)(" )");
				857	break;
				858
				859	case CALLM_S: case CALLM_E:
				860	break;
				861
				862	case INCEIP:
				863	VG_(printf)("\t$%d", u->val1);
				864	break;
				865
				866	case LEA2:
				867	VG_(printf)("\t%d(" , u->lit32);
				868	ppUOperand(u, 1, 4, False);
				869	VG_(printf)(",");
				870	ppUOperand(u, 2, 4, False);
				871	VG_(printf)(",%d), ", (Int)u->extra4b);
				872	ppUOperand(u, 3, 4, False);
				873	break;
				874
				875	case LEA1:
				876	VG_(printf)("\t%d" , u->lit32);
				877	ppUOperand(u, 1, 4, True);
				878	VG_(printf)(", ");
				879	ppUOperand(u, 2, 4, False);
				880	break;
				881
				882	case NOP:
				883	break;
				884
				885	case FPU_W:
				886	VG_(printf)("\t0x%x:0x%x, ",
				887	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				888	ppUOperand(u, 2, 4, True);
				889	break;
				890
				891	case FPU_R:
				892	VG_(printf)("\t");
				893	ppUOperand(u, 2, 4, True);
				894	VG_(printf)(", 0x%x:0x%x",
				895	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				896	break;
				897
				898	case FPU:
				899	VG_(printf)("\t0x%x:0x%x",
				900	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				901	break;
				902
				903	case STOREV: case LOADV:
				904	case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
				905	VG_(printf)("\t");
				906	ppUOperand(u, 1, u->size, u->opcode==LOAD \|\| u->opcode==LOADV);
				907	VG_(printf)(", ");
				908	ppUOperand(u, 2, u->size, u->opcode==STORE \|\| u->opcode==STOREV);
				909	break;
				910
				911	case GETF: case PUTF:
				912	VG_(printf)("\t");
				913	ppUOperand(u, 1, u->size, False);
				914	break;
				915
				916	case JMP: case CC2VAL:
				917	case PUSH: case POP: case CLEAR: case CALLM:
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	918	if (u->opcode == JMP) {
				919	switch (u->jmpkind) {
				920	case JmpCall: VG_(printf)("-c"); break;
				921	case JmpRet: VG_(printf)("-r"); break;
				922	case JmpSyscall: VG_(printf)("-sys"); break;
				923	case JmpClientReq: VG_(printf)("-cli"); break;
				924	default: break;
				925	}
				926	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	927	VG_(printf)("\t");
				928	ppUOperand(u, 1, u->size, False);
				929	break;
				930
				931	case JIFZ:
				932	VG_(printf)("\t");
				933	ppUOperand(u, 1, u->size, False);
				934	VG_(printf)(", ");
				935	ppUOperand(u, 2, u->size, False);
				936	break;
				937
				938	case PUTVF: case GETVF:
				939	VG_(printf)("\t");
				940	ppUOperand(u, 1, 0, False);
				941	break;
				942
				943	case NOT: case NEG: case INC: case DEC: case BSWAP:
				944	VG_(printf)("\t");
				945	ppUOperand(u, 1, u->size, False);
				946	break;
				947
				948	case ADD: case ADC: case AND: case OR:
				949	case XOR: case SUB: case SBB:
				950	case SHL: case SHR: case SAR:
				951	case ROL: case ROR: case RCL: case RCR:
				952	VG_(printf)("\t");
				953	ppUOperand(u, 1, u->size, False);
				954	VG_(printf)(", ");
				955	ppUOperand(u, 2, u->size, False);
				956	break;
				957
				958	case GETV: case PUTV:
				959	VG_(printf)("\t");
				960	ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
				961	VG_(printf)(", ");
				962	ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
				963	break;
				964
				965	case WIDEN:
				966	VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
				967	u->signed_widen?'s':'z');
				968	VG_(printf)("\t");
				969	ppUOperand(u, 1, u->size, False);
				970	break;
				971
				972	case TESTV: case SETV:
				973	VG_(printf)("\t");
				974	ppUOperand(u, 1, u->size, False);
				975	break;
				976
				977	default: VG_(panic)("ppUInstr: unhandled opcode");
				978	}
				979
				980	if (u->flags_r != FlagsEmpty \|\| u->flags_w != FlagsEmpty) {
				981	VG_(printf)(" (");
				982	if (u->flags_r != FlagsEmpty)
				983	vg_ppFlagSet("-r", u->flags_r);
				984	if (u->flags_w != FlagsEmpty)
				985	vg_ppFlagSet("-w", u->flags_w);
				986	VG_(printf)(")");
				987	}
				988	VG_(printf)("\n");
				989	}
				990
				991
				992	void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
				993	{
				994	Int i;
				995	VG_(printf)("\n%s\n", title);
				996	for (i = 0; i < cb->used; i++)
				997	if (0 \|\| cb->instrs[i].opcode != NOP)
				998	VG_(ppUInstr) ( i, &cb->instrs[i] );
				999	VG_(printf)("\n");
				1000	}
				1001
				1002
				1003	/------------------------------------------------------------/
				1004	/--- uinstr helpers for register allocation ---/
				1005	/--- and code improvement. ---/
				1006	/------------------------------------------------------------/
				1007
				1008	/* A structure for communicating temp uses, and for indicating
				1009	temp->real register mappings for patchUInstr. */
				1010	typedef
				1011	struct {
				1012	Int realNo;
				1013	Int tempNo;
				1014	Bool isWrite;
				1015	}
				1016	TempUse;
				1017
				1018
				1019	/* Get the temp use of a uinstr, parking them in an array supplied by
				1020	the caller, which is assumed to be big enough. Return the number
				1021	of entries. Insns which read _and_ write a register wind up
				1022	mentioning it twice. Entries are placed in the array in program
				1023	order, so that if a reg is read-modified-written, it appears first
				1024	as a read and then as a write.
				1025	*/
				1026	static __inline__
				1027	Int getTempUsage ( UInstr* u, TempUse* arr )
				1028	{
				1029
				1030	# define RD(ono) \
				1031	if (mycat(u->tag,ono) == TempReg) \
				1032	{ arr[n].tempNo = mycat(u->val,ono); \
				1033	arr[n].isWrite = False; n++; }
				1034	# define WR(ono) \
				1035	if (mycat(u->tag,ono) == TempReg) \
				1036	{ arr[n].tempNo = mycat(u->val,ono); \
				1037	arr[n].isWrite = True; n++; }
				1038
				1039	Int n = 0;
				1040	switch (u->opcode) {
				1041	case LEA1: RD(1); WR(2); break;
				1042	case LEA2: RD(1); RD(2); WR(3); break;
				1043
				1044	case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
				1045	case FPU_R: case FPU_W: RD(2); break;
				1046
				1047	case GETF: WR(1); break;
				1048	case PUTF: RD(1); break;
				1049
				1050	case GET: WR(2); break;
				1051	case PUT: RD(1); break;
				1052	case LOAD: RD(1); WR(2); break;
				1053	case STORE: RD(1); RD(2); break;
				1054	case MOV: RD(1); WR(2); break;
				1055
				1056	case JMP: RD(1); break;
				1057	case CLEAR: case CALLM: break;
				1058
				1059	case PUSH: RD(1); break;
				1060	case POP: WR(1); break;
				1061
				1062	case TAG2:
				1063	case CMOV:
				1064	case ADD: case ADC: case AND: case OR:
				1065	case XOR: case SUB: case SBB:
				1066	RD(1); RD(2); WR(2); break;
				1067
				1068	case SHL: case SHR: case SAR:
				1069	case ROL: case ROR: case RCL: case RCR:
				1070	RD(1); RD(2); WR(2); break;
				1071
				1072	case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
				1073	RD(1); WR(1); break;
				1074
				1075	case WIDEN: RD(1); WR(1); break;
				1076
				1077	case CC2VAL: WR(1); break;
				1078	case JIFZ: RD(1); break;
				1079
				1080	/* These sizes are only ever consulted when the instrumentation
				1081	code is being added, so the following can return
				1082	manifestly-bogus sizes. */
				1083	case LOADV: RD(1); WR(2); break;
				1084	case STOREV: RD(1); RD(2); break;
				1085	case GETV: WR(2); break;
				1086	case PUTV: RD(1); break;
				1087	case TESTV: RD(1); break;
				1088	case SETV: WR(1); break;
				1089	case PUTVF: RD(1); break;
				1090	case GETVF: WR(1); break;
				1091
				1092	default: VG_(panic)("getTempUsage: unhandled opcode");
				1093	}
				1094	return n;
				1095
				1096	# undef RD
				1097	# undef WR
				1098	}
				1099
				1100
				1101	/* Change temp regs in u into real regs, as directed by tmap. */
				1102	static __inline__
				1103	void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
				1104	{
				1105	Int i;
				1106	if (u->tag1 == TempReg) {
				1107	for (i = 0; i < n_tmap; i++)
				1108	if (tmap[i].tempNo == u->val1) break;
				1109	if (i == n_tmap) VG_(panic)("patchUInstr(1)");
				1110	u->tag1 = RealReg;
				1111	u->val1 = tmap[i].realNo;
				1112	}
				1113	if (u->tag2 == TempReg) {
				1114	for (i = 0; i < n_tmap; i++)
				1115	if (tmap[i].tempNo == u->val2) break;
				1116	if (i == n_tmap) VG_(panic)("patchUInstr(2)");
				1117	u->tag2 = RealReg;
				1118	u->val2 = tmap[i].realNo;
				1119	}
				1120	if (u->tag3 == TempReg) {
				1121	for (i = 0; i < n_tmap; i++)
				1122	if (tmap[i].tempNo == u->val3) break;
				1123	if (i == n_tmap) VG_(panic)("patchUInstr(3)");
				1124	u->tag3 = RealReg;
				1125	u->val3 = tmap[i].realNo;
				1126	}
				1127	}
				1128
				1129
				1130	/* Tedious x86-specific hack which compensates for the fact that the
				1131	register numbers for %ah .. %dh do not correspond to those for %eax
				1132	.. %edx. It maps a (reg size, reg no) pair to the number of the
				1133	containing 32-bit reg. */
				1134	static __inline__
				1135	Int containingArchRegOf ( Int sz, Int aregno )
				1136	{
				1137	switch (sz) {
				1138	case 4: return aregno;
				1139	case 2: return aregno;
				1140	case 1: return aregno >= 4 ? aregno-4 : aregno;
				1141	default: VG_(panic)("containingArchRegOf");
				1142	}
				1143	}
				1144
				1145
				1146	/* If u reads an ArchReg, return the number of the containing arch
				1147	reg. Otherwise return -1. Used in redundant-PUT elimination. */
				1148	static __inline__
				1149	Int maybe_uinstrReadsArchReg ( UInstr* u )
				1150	{
				1151	switch (u->opcode) {
				1152	case GET:
				1153	case ADD: case ADC: case AND: case OR:
				1154	case XOR: case SUB: case SBB:
				1155	case SHL: case SHR: case SAR: case ROL:
				1156	case ROR: case RCL: case RCR:
				1157	if (u->tag1 == ArchReg)
				1158	return containingArchRegOf ( u->size, u->val1 );
				1159	else
				1160	return -1;
				1161
				1162	case GETF: case PUTF:
				1163	case CALLM_S: case CALLM_E:
				1164	case INCEIP:
				1165	case LEA1:
				1166	case LEA2:
				1167	case NOP:
				1168	case PUT:
				1169	case LOAD:
				1170	case STORE:
				1171	case MOV:
				1172	case CMOV:
				1173	case JMP:
				1174	case CALLM: case CLEAR: case PUSH: case POP:
				1175	case NOT: case NEG: case INC: case DEC: case BSWAP:
				1176	case CC2VAL:
				1177	case JIFZ:
				1178	case FPU: case FPU_R: case FPU_W:
				1179	case WIDEN:
				1180	return -1;
				1181
				1182	default:
				1183	VG_(ppUInstr)(0,u);
				1184	VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
				1185	}
				1186	}
				1187
				1188	static __inline__
				1189	Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
				1190	{
				1191	Int i, k;
				1192	TempUse tempUse[3];
				1193	k = getTempUsage ( u, &tempUse[0] );
				1194	for (i = 0; i < k; i++)
				1195	if (tempUse[i].tempNo == tempreg)
				1196	return True;
				1197	return False;
				1198	}
				1199
				1200
				1201	/------------------------------------------------------------/
				1202	/--- ucode improvement. ---/
				1203	/------------------------------------------------------------/
				1204
				1205	/* Improve the code in cb by doing
				1206	-- Redundant ArchReg-fetch elimination
				1207	-- Redundant PUT elimination
				1208	-- Redundant cond-code restore/save elimination
				1209	The overall effect of these is to allow target registers to be
				1210	cached in host registers over multiple target insns.
				1211	*/
				1212	static void vg_improve ( UCodeBlock* cb )
				1213	{
				1214	Int i, j, k, m, n, ar, tr, told, actual_areg;
				1215	Int areg_map[8];
				1216	Bool annul_put[8];
				1217	TempUse tempUse[3];
				1218	UInstr* u;
				1219	Bool wr;
				1220	Int* last_live_before;
				1221	FlagSet future_dead_flags;
				1222
				1223	if (cb->nextTemp > 0)
				1224	last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
				1225	else
				1226	last_live_before = NULL;
				1227
				1228
				1229	/* PASS 1: redundant GET elimination. (Actually, more general than
				1230	that -- eliminates redundant fetches of ArchRegs). */
				1231
				1232	/* Find the live-range-ends for all temporaries. Duplicates code
				1233	in the register allocator :-( */
				1234
				1235	for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
				1236
				1237	for (i = cb->used-1; i >= 0; i--) {
				1238	u = &cb->instrs[i];
				1239
				1240	k = getTempUsage(u, &tempUse[0]);
				1241
				1242	/* For each temp usage ... bwds in program order. */
				1243	for (j = k-1; j >= 0; j--) {
				1244	tr = tempUse[j].tempNo;
				1245	wr = tempUse[j].isWrite;
				1246	if (last_live_before[tr] == -1) {
				1247	vg_assert(tr >= 0 && tr < cb->nextTemp);
				1248	last_live_before[tr] = wr ? (i+1) : i;
				1249	}
				1250	}
				1251
				1252	}
				1253
				1254	# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
				1255	{ Int q; \
				1256	/* Invalidate any old binding(s) to tempreg. */ \
				1257	for (q = 0; q < 8; q++) \
				1258	if (areg_map[q] == tempreg) areg_map[q] = -1; \
				1259	/* Add the new binding. */ \
				1260	areg_map[archreg] = (tempreg); \
				1261	}
				1262
				1263	/* Set up the A-reg map. */
				1264	for (i = 0; i < 8; i++) areg_map[i] = -1;
				1265
				1266	/* Scan insns. */
				1267	for (i = 0; i < cb->used; i++) {
				1268	u = &cb->instrs[i];
				1269	if (u->opcode == GET && u->size == 4) {
				1270	/* GET; see if it can be annulled. */
				1271	vg_assert(u->tag1 == ArchReg);
				1272	vg_assert(u->tag2 == TempReg);
				1273	ar = u->val1;
				1274	tr = u->val2;
				1275	told = areg_map[ar];
				1276	if (told != -1 && last_live_before[told] <= i) {
				1277	/* ar already has an old mapping to told, but that runs
				1278	out here. Annul this GET, rename tr to told for the
				1279	rest of the block, and extend told's live range to that
				1280	of tr. */
				1281	u->opcode = NOP;
				1282	u->tag1 = u->tag2 = NoValue;
				1283	n = last_live_before[tr] + 1;
				1284	if (n > cb->used) n = cb->used;
				1285	last_live_before[told] = last_live_before[tr];
				1286	last_live_before[tr] = i-1;
				1287	if (VG_(disassemble))
				1288	VG_(printf)(
				1289	"at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
				1290	i, tr, told,i+1, n-1);
				1291	for (m = i+1; m < n; m++) {
				1292	if (cb->instrs[m].tag1 == TempReg
				1293	&& cb->instrs[m].val1 == tr)
				1294	cb->instrs[m].val1 = told;
				1295	if (cb->instrs[m].tag2 == TempReg
				1296	&& cb->instrs[m].val2 == tr)
				1297	cb->instrs[m].val2 = told;
				1298	}
				1299	BIND_ARCH_TO_TEMP(ar,told);
				1300	}
				1301	else
				1302	BIND_ARCH_TO_TEMP(ar,tr);
				1303	}
				1304	else if (u->opcode == GET && u->size != 4) {
				1305	/* Invalidate any mapping for this archreg. */
				1306	actual_areg = containingArchRegOf ( u->size, u->val1 );
				1307	areg_map[actual_areg] = -1;
				1308	}
				1309	else if (u->opcode == PUT && u->size == 4) {
				1310	/* PUT; re-establish t -> a binding */
				1311	vg_assert(u->tag1 == TempReg);
				1312	vg_assert(u->tag2 == ArchReg);
				1313	BIND_ARCH_TO_TEMP(u->val2, u->val1);
				1314	}
				1315	else if (u->opcode == PUT && u->size != 4) {
				1316	/* Invalidate any mapping for this archreg. */
				1317	actual_areg = containingArchRegOf ( u->size, u->val2 );
				1318	areg_map[actual_areg] = -1;
				1319	} else {
				1320
				1321	/* see if insn has an archreg as a read operand; if so try to
				1322	map it. */
				1323	if (u->tag1 == ArchReg && u->size == 4
				1324	&& areg_map[u->val1] != -1) {
				1325	switch (u->opcode) {
				1326	case ADD: case SUB: case AND: case OR: case XOR:
				1327	case ADC: case SBB:
				1328	case SHL: case SHR: case SAR: case ROL: case ROR:
				1329	case RCL: case RCR:
				1330	if (VG_(disassemble))
				1331	VG_(printf)(
				1332	"at %d: change ArchReg %S to TempReg t%d\n",
				1333	i, nameIReg(4,u->val1), areg_map[u->val1]);
				1334	u->tag1 = TempReg;
				1335	u->val1 = areg_map[u->val1];
				1336	/* Remember to extend the live range of the TempReg,
				1337	if necessary. */
				1338	if (last_live_before[u->val1] < i)
				1339	last_live_before[u->val1] = i;
				1340	break;
				1341	default:
				1342	break;
				1343	}
				1344	}
				1345
				1346	/* boring insn; invalidate any mappings to temps it writes */
				1347	k = getTempUsage(u, &tempUse[0]);
				1348
				1349	for (j = 0; j < k; j++) {
				1350	wr = tempUse[j].isWrite;
				1351	if (!wr) continue;
				1352	tr = tempUse[j].tempNo;
				1353	for (m = 0; m < 8; m++)
				1354	if (areg_map[m] == tr) areg_map[m] = -1;
				1355	}
				1356	}
				1357
				1358	}
				1359
				1360	# undef BIND_ARCH_TO_TEMP
				1361
sewardj	05f1aa1	2002-04-30 00:29:36 +0000	[diff] [blame]	1362	/* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
				1363	%ESP, since the memory check machinery always requires the
				1364	in-memory value of %ESP to be up to date. Although this isn't
				1365	actually required by other analyses (cache simulation), it's
				1366	simplest to be consistent for all end-uses. */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1367	for (j = 0; j < 8; j++)
				1368	annul_put[j] = False;
				1369
				1370	for (i = cb->used-1; i >= 0; i--) {
				1371	u = &cb->instrs[i];
				1372	if (u->opcode == NOP) continue;
				1373
				1374	if (u->opcode == PUT && u->size == 4) {
				1375	vg_assert(u->tag2 == ArchReg);
				1376	actual_areg = containingArchRegOf ( 4, u->val2 );
				1377	if (annul_put[actual_areg]) {
sewardj	05f1aa1	2002-04-30 00:29:36 +0000	[diff] [blame]	1378	vg_assert(actual_areg != R_ESP);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1379	u->opcode = NOP;
				1380	u->tag1 = u->tag2 = NoValue;
				1381	if (VG_(disassemble))
				1382	VG_(printf)("at %d: delete PUT\n", i );
				1383	} else {
sewardj	05f1aa1	2002-04-30 00:29:36 +0000	[diff] [blame]	1384	if (actual_areg != R_ESP)
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1385	annul_put[actual_areg] = True;
				1386	}
				1387	}
				1388	else if (u->opcode == PUT && u->size != 4) {
				1389	actual_areg = containingArchRegOf ( u->size, u->val2 );
				1390	annul_put[actual_areg] = False;
				1391	}
				1392	else if (u->opcode == JMP \|\| u->opcode == JIFZ
				1393	\|\| u->opcode == CALLM) {
				1394	for (j = 0; j < 8; j++)
				1395	annul_put[j] = False;
				1396	}
				1397	else {
				1398	/* If an instruction reads an ArchReg, the immediately
				1399	preceding PUT cannot be annulled. */
				1400	actual_areg = maybe_uinstrReadsArchReg ( u );
				1401	if (actual_areg != -1)
				1402	annul_put[actual_areg] = False;
				1403	}
				1404	}
				1405
				1406	/* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
				1407	dead after this point, annul the MOV insn and rename t2 to t1.
				1408	Further modifies the last_live_before map. */
				1409
				1410	# if 0
				1411	VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
				1412	for (i = 0; i < cb->nextTemp; i++)
				1413	VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
				1414	VG_(printf)("\n");
				1415	# endif
				1416
				1417	for (i = 0; i < cb->used-1; i++) {
				1418	u = &cb->instrs[i];
				1419	if (u->opcode != MOV) continue;
				1420	if (u->tag1 == Literal) continue;
				1421	vg_assert(u->tag1 == TempReg);
				1422	vg_assert(u->tag2 == TempReg);
				1423	if (last_live_before[u->val1] == i) {
				1424	if (VG_(disassemble))
				1425	VG_(printf)(
				1426	"at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
				1427	i, u->val2, u->val1, i+1, last_live_before[u->val2] );
				1428	for (j = i+1; j <= last_live_before[u->val2]; j++) {
				1429	if (cb->instrs[j].tag1 == TempReg
				1430	&& cb->instrs[j].val1 == u->val2)
				1431	cb->instrs[j].val1 = u->val1;
				1432	if (cb->instrs[j].tag2 == TempReg
				1433	&& cb->instrs[j].val2 == u->val2)
				1434	cb->instrs[j].val2 = u->val1;
				1435	}
				1436	last_live_before[u->val1] = last_live_before[u->val2];
				1437	last_live_before[u->val2] = i-1;
				1438	u->opcode = NOP;
				1439	u->tag1 = u->tag2 = NoValue;
				1440	}
				1441	}
				1442
				1443	/* PASS 3: redundant condition-code restore/save elimination.
				1444	Scan backwards from the end. future_dead_flags records the set
				1445	of flags which are dead at this point, that is, will be written
				1446	before they are next read. Earlier uinsns which write flags
				1447	already in future_dead_flags can have their writes annulled.
				1448	*/
				1449	future_dead_flags = FlagsEmpty;
				1450
				1451	for (i = cb->used-1; i >= 0; i--) {
				1452	u = &cb->instrs[i];
				1453
				1454	/* We might never make it to insns beyond this one, so be
				1455	conservative. */
				1456	if (u->opcode == JIFZ \|\| u->opcode == JMP) {
				1457	future_dead_flags = FlagsEmpty;
				1458	continue;
				1459	}
				1460
				1461	/* We can annul the flags written by this insn if it writes a
				1462	subset (or eq) of the set of flags known to be dead after
				1463	this insn. If not, just record the flags also written by
				1464	this insn.*/
				1465	if (u->flags_w != FlagsEmpty
				1466	&& VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
				1467	if (VG_(disassemble)) {
				1468	VG_(printf)("at %d: annul flag write ", i);
				1469	vg_ppFlagSet("", u->flags_w);
				1470	VG_(printf)(" due to later ");
				1471	vg_ppFlagSet("", future_dead_flags);
				1472	VG_(printf)("\n");
				1473	}
				1474	u->flags_w = FlagsEmpty;
				1475	} else {
				1476	future_dead_flags
				1477	= VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
				1478	}
				1479
				1480	/* If this insn also reads flags, empty out future_dead_flags so
				1481	as to force preceding writes not to be annulled. */
				1482	if (u->flags_r != FlagsEmpty)
				1483	future_dead_flags = FlagsEmpty;
				1484	}
				1485
				1486	if (last_live_before)
				1487	VG_(jitfree) ( last_live_before );
				1488	}
				1489
				1490
				1491	/------------------------------------------------------------/
				1492	/--- The new register allocator. ---/
				1493	/------------------------------------------------------------/
				1494
				1495	typedef
				1496	struct {
				1497	/* Becomes live for the first time after this insn ... */
				1498	Int live_after;
				1499	/* Becomes dead for the last time after this insn ... */
				1500	Int dead_before;
				1501	/* The "home" spill slot, if needed. Never changes. */
				1502	Int spill_no;
				1503	/* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
				1504	Int real_no;
				1505	}
				1506	TempInfo;
				1507
				1508
				1509	/* Take a ucode block and allocate its TempRegs to RealRegs, or put
				1510	them in spill locations, and add spill code, if there are not
				1511	enough real regs. The usual register allocation deal, in short.
				1512
				1513	Important redundancy of representation:
				1514
				1515	real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
				1516	to VG_NOVALUE if the real reg has no currently assigned TempReg.
				1517
				1518	The .real_no field of a TempInfo gives the current RRR for
				1519	this TempReg, or VG_NOVALUE if the TempReg is currently
				1520	in memory, in which case it is in the SpillNo denoted by
				1521	spillno.
				1522
				1523	These pieces of information (a fwds-bwds mapping, really) must
				1524	be kept consistent!
				1525
				1526	This allocator uses the so-called Second Chance Bin Packing
				1527	algorithm, as described in "Quality and Speed in Linear-scan
				1528	Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
				1529	pp142-151). It is simple and fast and remarkably good at
				1530	minimising the amount of spill code introduced.
				1531	*/
				1532
				1533	static
				1534	UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
				1535	{
				1536	TempInfo* temp_info;
				1537	Int real_to_temp[VG_MAX_REALREGS];
				1538	Bool is_spill_cand[VG_MAX_REALREGS];
				1539	Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
				1540	Int i, j, k, m, r, tno, max_ss_no;
				1541	Bool wr, defer, isRead, spill_reqd;
				1542	TempUse tempUse[3];
				1543	UCodeBlock* c2;
				1544
				1545	/* Used to denote ... well, "no value" in this fn. */
				1546	# define VG_NOTHING (-2)
				1547
				1548	/* Initialise the TempReg info. */
				1549	if (c1->nextTemp > 0)
				1550	temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
				1551	else
				1552	temp_info = NULL;
				1553
				1554	for (i = 0; i < c1->nextTemp; i++) {
				1555	temp_info[i].live_after = VG_NOTHING;
				1556	temp_info[i].dead_before = VG_NOTHING;
				1557	temp_info[i].spill_no = VG_NOTHING;
				1558	/* temp_info[i].real_no is not yet relevant. */
				1559	}
				1560
				1561	spill_reqd = False;
				1562
				1563	/* Scan fwds to establish live ranges. */
				1564
				1565	for (i = 0; i < c1->used; i++) {
				1566	k = getTempUsage(&c1->instrs[i], &tempUse[0]);
				1567	vg_assert(k >= 0 && k <= 3);
				1568
				1569	/* For each temp usage ... fwds in program order */
				1570	for (j = 0; j < k; j++) {
				1571	tno = tempUse[j].tempNo;
				1572	wr = tempUse[j].isWrite;
				1573	if (wr) {
				1574	/* Writes hold a reg live until after this insn. */
				1575	if (temp_info[tno].live_after == VG_NOTHING)
				1576	temp_info[tno].live_after = i;
				1577	if (temp_info[tno].dead_before < i + 1)
				1578	temp_info[tno].dead_before = i + 1;
				1579	} else {
				1580	/* First use of a tmp should be a write. */
				1581	vg_assert(temp_info[tno].live_after != VG_NOTHING);
				1582	/* Reads only hold it live until before this insn. */
				1583	if (temp_info[tno].dead_before < i)
				1584	temp_info[tno].dead_before = i;
				1585	}
				1586	}
				1587	}
				1588
				1589	# if 0
				1590	/* Sanity check on live ranges. Expensive but correct. */
				1591	for (i = 0; i < c1->nextTemp; i++) {
				1592	vg_assert( (temp_info[i].live_after == VG_NOTHING
				1593	&& temp_info[i].dead_before == VG_NOTHING)
				1594	\|\| (temp_info[i].live_after != VG_NOTHING
				1595	&& temp_info[i].dead_before != VG_NOTHING) );
				1596	}
				1597	# endif
				1598
				1599	/* Do a rank-based allocation of TempRegs to spill slot numbers.
				1600	We put as few as possible values in spill slots, but
				1601	nevertheless need to have an assignment to them just in case. */
				1602
				1603	max_ss_no = -1;
				1604
				1605	for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
				1606	ss_busy_until_before[i] = 0;
				1607
				1608	for (i = 0; i < c1->nextTemp; i++) {
				1609
				1610	/* True iff this temp is unused. */
				1611	if (temp_info[i].live_after == VG_NOTHING)
				1612	continue;
				1613
				1614	/* Find the lowest-numbered spill slot which is available at the
				1615	start point of this interval, and assign the interval to
				1616	it. */
				1617	for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
				1618	if (ss_busy_until_before[j] <= temp_info[i].live_after)
				1619	break;
				1620	if (j == VG_MAX_SPILLSLOTS) {
				1621	VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
				1622	VG_(panic)("register allocation failed -- out of spill slots");
				1623	}
				1624	ss_busy_until_before[j] = temp_info[i].dead_before;
				1625	temp_info[i].spill_no = j;
				1626	if (j > max_ss_no)
				1627	max_ss_no = j;
				1628	}
				1629
				1630	VG_(total_reg_rank) += (max_ss_no+1);
				1631
				1632	/* Show live ranges and assigned spill slot nos. */
				1633
				1634	if (VG_(disassemble)) {
				1635	VG_(printf)("Live Range Assignments\n");
				1636
				1637	for (i = 0; i < c1->nextTemp; i++) {
				1638	if (temp_info[i].live_after == VG_NOTHING)
				1639	continue;
				1640	VG_(printf)(
				1641	" LR %d is after %d to before %d spillno %d\n",
				1642	i,
				1643	temp_info[i].live_after,
				1644	temp_info[i].dead_before,
				1645	temp_info[i].spill_no
				1646	);
				1647	}
				1648	}
				1649
				1650	/* Now that we've established a spill slot number for each used
				1651	temporary, we can go ahead and do the core of the "Second-chance
				1652	binpacking" allocation algorithm. */
				1653
				1654	/* Resulting code goes here. We generate it all in a forwards
				1655	pass. */
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	1656	c2 = VG_(allocCodeBlock)();
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1657
				1658	/* At the start, no TempRegs are assigned to any real register.
				1659	Correspondingly, all temps claim to be currently resident in
				1660	their spill slots, as computed by the previous two passes. */
				1661	for (i = 0; i < VG_MAX_REALREGS; i++)
				1662	real_to_temp[i] = VG_NOTHING;
				1663	for (i = 0; i < c1->nextTemp; i++)
				1664	temp_info[i].real_no = VG_NOTHING;
				1665
				1666	if (VG_(disassemble))
				1667	VG_(printf)("\n");
				1668
				1669	/* Process each insn in turn. */
				1670	for (i = 0; i < c1->used; i++) {
				1671
				1672	if (c1->instrs[i].opcode == NOP) continue;
				1673	VG_(uinstrs_prealloc)++;
				1674
				1675	# if 0
				1676	/* Check map consistency. Expensive but correct. */
				1677	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1678	if (real_to_temp[r] != VG_NOTHING) {
				1679	tno = real_to_temp[r];
				1680	vg_assert(tno >= 0 && tno < c1->nextTemp);
				1681	vg_assert(temp_info[tno].real_no == r);
				1682	}
				1683	}
				1684	for (tno = 0; tno < c1->nextTemp; tno++) {
				1685	if (temp_info[tno].real_no != VG_NOTHING) {
				1686	r = temp_info[tno].real_no;
				1687	vg_assert(r >= 0 && r < VG_MAX_REALREGS);
				1688	vg_assert(real_to_temp[r] == tno);
				1689	}
				1690	}
				1691	# endif
				1692
				1693	if (VG_(disassemble))
				1694	VG_(ppUInstr)(i, &c1->instrs[i]);
				1695
				1696	/* First, free up enough real regs for this insn. This may
				1697	generate spill stores since we may have to evict some TempRegs
				1698	currently in real regs. Also generates spill loads. */
				1699
				1700	k = getTempUsage(&c1->instrs[i], &tempUse[0]);
				1701	vg_assert(k >= 0 && k <= 3);
				1702
				1703	/* For each *different* temp mentioned in the insn .... */
				1704	for (j = 0; j < k; j++) {
				1705
				1706	/* First check if the temp is mentioned again later; if so,
				1707	ignore this mention. We only want to process each temp
				1708	used by the insn once, even if it is mentioned more than
				1709	once. */
				1710	defer = False;
				1711	tno = tempUse[j].tempNo;
				1712	for (m = j+1; m < k; m++)
				1713	if (tempUse[m].tempNo == tno)
				1714	defer = True;
				1715	if (defer)
				1716	continue;
				1717
				1718	/* Now we're trying to find a register for tempUse[j].tempNo.
				1719	First of all, if it already has a register assigned, we
				1720	don't need to do anything more. */
				1721	if (temp_info[tno].real_no != VG_NOTHING)
				1722	continue;
				1723
				1724	/* No luck. The next thing to do is see if there is a
				1725	currently unassigned register available. If so, bag it. */
				1726	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1727	if (real_to_temp[r] == VG_NOTHING)
				1728	break;
				1729	}
				1730	if (r < VG_MAX_REALREGS) {
				1731	real_to_temp[r] = tno;
				1732	temp_info[tno].real_no = r;
				1733	continue;
				1734	}
				1735
				1736	/* Unfortunately, that didn't pan out either. So we'll have
				1737	to eject some other unfortunate TempReg into a spill slot
				1738	in order to free up a register. Of course, we need to be
				1739	careful not to eject some other TempReg needed by this
				1740	insn.
				1741
				1742	Select r in 0 .. VG_MAX_REALREGS-1 such that
				1743	real_to_temp[r] is not mentioned in
				1744	tempUse[0 .. k-1].tempNo, since it would be just plain
				1745	wrong to eject some other TempReg which we need to use in
				1746	this insn.
				1747
				1748	It is here that it is important to make a good choice of
				1749	register to spill. */
				1750
				1751	/* First, mark those regs which are not spill candidates. */
				1752	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1753	is_spill_cand[r] = True;
				1754	for (m = 0; m < k; m++) {
				1755	if (real_to_temp[r] == tempUse[m].tempNo) {
				1756	is_spill_cand[r] = False;
				1757	break;
				1758	}
				1759	}
				1760	}
				1761
				1762	/* We can choose any r satisfying is_spill_cand[r]. However,
				1763	try to make a good choice. First, try and find r such
				1764	that the associated TempReg is already dead. */
				1765	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1766	if (is_spill_cand[r] &&
				1767	temp_info[real_to_temp[r]].dead_before <= i)
				1768	goto have_spill_cand;
				1769	}
				1770
				1771	/* No spill cand is mapped to a dead TempReg. Now we really
				1772	_do_ have to generate spill code. Choose r so that the
				1773	next use of its associated TempReg is as far ahead as
				1774	possible, in the hope that this will minimise the number of
				1775	consequent reloads required. This is a bit expensive, but
				1776	we don't have to do it very often. */
				1777	{
				1778	Int furthest_r = VG_MAX_REALREGS;
				1779	Int furthest = 0;
				1780	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1781	if (!is_spill_cand[r]) continue;
				1782	for (m = i+1; m < c1->used; m++)
				1783	if (uInstrMentionsTempReg(&c1->instrs[m],
				1784	real_to_temp[r]))
				1785	break;
				1786	if (m > furthest) {
				1787	furthest = m;
				1788	furthest_r = r;
				1789	}
				1790	}
				1791	r = furthest_r;
				1792	goto have_spill_cand;
				1793	}
				1794
				1795	have_spill_cand:
				1796	if (r == VG_MAX_REALREGS)
				1797	VG_(panic)("new reg alloc: out of registers ?!");
				1798
				1799	/* Eject r. Important refinement: don't bother if the
				1800	associated TempReg is now dead. */
				1801	vg_assert(real_to_temp[r] != VG_NOTHING);
				1802	vg_assert(real_to_temp[r] != tno);
				1803	temp_info[real_to_temp[r]].real_no = VG_NOTHING;
				1804	if (temp_info[real_to_temp[r]].dead_before > i) {
				1805	uInstr2(c2, PUT, 4,
				1806	RealReg, VG_(rankToRealRegNo)(r),
				1807	SpillNo, temp_info[real_to_temp[r]].spill_no);
				1808	VG_(uinstrs_spill)++;
				1809	spill_reqd = True;
				1810	if (VG_(disassemble))
				1811	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1812	}
				1813
				1814	/* Decide if tno is read. */
				1815	isRead = False;
				1816	for (m = 0; m < k; m++)
				1817	if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
				1818	isRead = True;
				1819
				1820	/* If so, generate a spill load. */
				1821	if (isRead) {
				1822	uInstr2(c2, GET, 4,
				1823	SpillNo, temp_info[tno].spill_no,
				1824	RealReg, VG_(rankToRealRegNo)(r) );
				1825	VG_(uinstrs_spill)++;
				1826	spill_reqd = True;
				1827	if (VG_(disassemble))
				1828	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1829	}
				1830
				1831	/* Update the forwards and backwards maps. */
				1832	real_to_temp[r] = tno;
				1833	temp_info[tno].real_no = r;
				1834	}
				1835
				1836	/* By this point, all TempRegs mentioned by the insn have been
				1837	bought into real regs. We now copy the insn to the output
				1838	and use patchUInstr to convert its rTempRegs into
				1839	realregs. */
				1840	for (j = 0; j < k; j++)
				1841	tempUse[j].realNo
				1842	= VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	1843	VG_(copyUInstr)(c2, &c1->instrs[i]);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1844	patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
				1845
				1846	if (VG_(disassemble)) {
				1847	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1848	VG_(printf)("\n");
				1849	}
				1850	}
				1851
				1852	if (temp_info != NULL)
				1853	VG_(jitfree)(temp_info);
				1854
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	1855	VG_(freeCodeBlock)(c1);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1856
				1857	if (spill_reqd)
				1858	VG_(translations_needing_spill)++;
				1859
				1860	return c2;
				1861
				1862	# undef VG_NOTHING
				1863
				1864	}
				1865
				1866
				1867	/------------------------------------------------------------/
				1868	/--- New instrumentation machinery. ---/
				1869	/------------------------------------------------------------/
				1870
				1871	static
				1872	VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
				1873	{
				1874	switch (sz) {
				1875	case 4: return VgT_ImproveOR4_TQ;
				1876	case 2: return VgT_ImproveOR2_TQ;
				1877	case 1: return VgT_ImproveOR1_TQ;
				1878	default: VG_(panic)("get_VgT_ImproveOR_TQ");
				1879	}
				1880	}
				1881
				1882
				1883	static
				1884	VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
				1885	{
				1886	switch (sz) {
				1887	case 4: return VgT_ImproveAND4_TQ;
				1888	case 2: return VgT_ImproveAND2_TQ;
				1889	case 1: return VgT_ImproveAND1_TQ;
				1890	default: VG_(panic)("get_VgT_ImproveAND_TQ");
				1891	}
				1892	}
				1893
				1894
				1895	static
				1896	VgTagOp get_VgT_Left ( Int sz )
				1897	{
				1898	switch (sz) {
				1899	case 4: return VgT_Left4;
				1900	case 2: return VgT_Left2;
				1901	case 1: return VgT_Left1;
				1902	default: VG_(panic)("get_VgT_Left");
				1903	}
				1904	}
				1905
				1906
				1907	static
				1908	VgTagOp get_VgT_UifU ( Int sz )
				1909	{
				1910	switch (sz) {
				1911	case 4: return VgT_UifU4;
				1912	case 2: return VgT_UifU2;
				1913	case 1: return VgT_UifU1;
				1914	case 0: return VgT_UifU0;
				1915	default: VG_(panic)("get_VgT_UifU");
				1916	}
				1917	}
				1918
				1919
				1920	static
				1921	VgTagOp get_VgT_DifD ( Int sz )
				1922	{
				1923	switch (sz) {
				1924	case 4: return VgT_DifD4;
				1925	case 2: return VgT_DifD2;
				1926	case 1: return VgT_DifD1;
				1927	default: VG_(panic)("get_VgT_DifD");
				1928	}
				1929	}
				1930
				1931
				1932	static
				1933	VgTagOp get_VgT_PCast ( Int szs, Int szd )
				1934	{
				1935	if (szs == 4 && szd == 0) return VgT_PCast40;
				1936	if (szs == 2 && szd == 0) return VgT_PCast20;
				1937	if (szs == 1 && szd == 0) return VgT_PCast10;
				1938	if (szs == 0 && szd == 1) return VgT_PCast01;
				1939	if (szs == 0 && szd == 2) return VgT_PCast02;
				1940	if (szs == 0 && szd == 4) return VgT_PCast04;
				1941	if (szs == 1 && szd == 4) return VgT_PCast14;
				1942	if (szs == 1 && szd == 2) return VgT_PCast12;
				1943	if (szs == 1 && szd == 1) return VgT_PCast11;
				1944	VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
				1945	VG_(panic)("get_VgT_PCast");
				1946	}
				1947
				1948
				1949	static
				1950	VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
				1951	{
				1952	if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
				1953	if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
				1954
				1955	if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
				1956	if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
				1957
				1958	if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
				1959	if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
				1960
				1961	VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
				1962	VG_(panic)("get_VgT_Widen");
				1963	}
				1964
				1965	/* Pessimally cast the spec'd shadow from one size to another. */
				1966	static
				1967	void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
				1968	{
				1969	if (szs == 0 && szd == 0)
				1970	return;
				1971	uInstr3(cb, TAG1, 0, TempReg, tempreg,
				1972	NoValue, 0,
				1973	Lit16, get_VgT_PCast(szs,szd));
				1974	}
				1975
				1976
				1977	/* Create a signed or unsigned widen of the spec'd shadow from one
				1978	size to another. The only allowed size transitions are 1->2, 1->4
				1979	and 2->4. */
				1980	static
				1981	void create_Widen ( UCodeBlock* cb, Bool signed_widen,
				1982	Int szs, Int szd, Int tempreg )
				1983	{
				1984	if (szs == szd) return;
				1985	uInstr3(cb, TAG1, 0, TempReg, tempreg,
				1986	NoValue, 0,
				1987	Lit16, get_VgT_Widen(signed_widen,szs,szd));
				1988	}
				1989
				1990
				1991	/* Get the condition codes into a new shadow, at the given size. */
				1992	static
				1993	Int create_GETVF ( UCodeBlock* cb, Int sz )
				1994	{
				1995	Int tt = newShadow(cb);
				1996	uInstr1(cb, GETVF, 0, TempReg, tt);
				1997	create_PCast(cb, 0, sz, tt);
				1998	return tt;
				1999	}
				2000
				2001
				2002	/* Save the condition codes from the spec'd shadow. */
				2003	static
				2004	void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
				2005	{
				2006	if (sz == 0) {
				2007	uInstr1(cb, PUTVF, 0, TempReg, tempreg);
				2008	} else {
				2009	Int tt = newShadow(cb);
				2010	uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
				2011	create_PCast(cb, sz, 0, tt);
				2012	uInstr1(cb, PUTVF, 0, TempReg, tt);
				2013	}
				2014	}
				2015
				2016
				2017	/* Do Left on the spec'd shadow. */
				2018	static
				2019	void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
				2020	{
				2021	uInstr3(cb, TAG1, 0,
				2022	TempReg, tempreg,
				2023	NoValue, 0,
				2024	Lit16, get_VgT_Left(sz));
				2025	}
				2026
				2027
				2028	/* Do UifU on ts and td, putting the result in td. */
				2029	static
				2030	void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
				2031	{
				2032	uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
				2033	Lit16, get_VgT_UifU(sz));
				2034	}
				2035
				2036
				2037	/* Do DifD on ts and td, putting the result in td. */
				2038	static
				2039	void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
				2040	{
				2041	uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
				2042	Lit16, get_VgT_DifD(sz));
				2043	}
				2044
				2045
				2046	/* Do HelpAND on value tval and tag tqqq, putting the result in
				2047	tqqq. */
				2048	static
				2049	void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
				2050	{
				2051	uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
				2052	Lit16, get_VgT_ImproveAND_TQ(sz));
				2053	}
				2054
				2055
				2056	/* Do HelpOR on value tval and tag tqqq, putting the result in
				2057	tqqq. */
				2058	static
				2059	void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
				2060	{
				2061	uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
				2062	Lit16, get_VgT_ImproveOR_TQ(sz));
				2063	}
				2064
				2065
				2066	/* Get the shadow for an operand described by (tag, val). Emit code
				2067	to do this and return the identity of the shadow holding the
				2068	result. The result tag is always copied into a new shadow, so it
				2069	can be modified without trashing the original.*/
				2070	static
				2071	Int /* TempReg / getOperandShadow ( UCodeBlock cb,
				2072	Int sz, Int tag, Int val )
				2073	{
				2074	Int sh;
				2075	sh = newShadow(cb);
				2076	if (tag == TempReg) {
				2077	uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
				2078	return sh;
				2079	}
				2080	if (tag == Literal) {
				2081	uInstr1(cb, SETV, sz, TempReg, sh);
				2082	return sh;
				2083	}
				2084	if (tag == ArchReg) {
				2085	uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
				2086	return sh;
				2087	}
				2088	VG_(panic)("getOperandShadow");
				2089	}
				2090
				2091
				2092
				2093	/* Create and return an instrumented version of cb_in. Free cb_in
				2094	before returning. */
				2095	static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
				2096	{
				2097	UCodeBlock* cb;
				2098	Int i, j;
				2099	UInstr* u_in;
				2100	Int qs, qd, qt, qtt;
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2101	cb = VG_(allocCodeBlock)();
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2102	cb->nextTemp = cb_in->nextTemp;
				2103
				2104	for (i = 0; i < cb_in->used; i++) {
				2105	qs = qd = qt = qtt = INVALID_TEMPREG;
				2106	u_in = &cb_in->instrs[i];
				2107
				2108	/* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
				2109
				2110	/* VG_(ppUInstr)(0, u_in); */
				2111	switch (u_in->opcode) {
				2112
				2113	case NOP:
				2114	break;
				2115
				2116	case INCEIP:
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2117	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2118	break;
				2119
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2120	/* Loads and stores. Test the V bits for the address. 24
				2121	Mar 02: since the address is A-checked anyway, there's not
				2122	really much point in doing the V-check too, unless you
				2123	think that you might use addresses which are undefined but
				2124	still addressible. Hence the optionalisation of the V
				2125	check.
				2126
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2127	The LOADV/STOREV does an addressibility check for the
				2128	address. */
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2129
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2130	case LOAD:
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2131	if (VG_(clo_check_addrVs)) {
				2132	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2133	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2134	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2135	uInstr2(cb, LOADV, u_in->size,
				2136	TempReg, u_in->val1,
				2137	TempReg, SHADOW(u_in->val2));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2138	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2139	break;
				2140	case STORE:
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2141	if (VG_(clo_check_addrVs)) {
				2142	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
				2143	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
				2144	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2145	uInstr2(cb, STOREV, u_in->size,
				2146	TempReg, SHADOW(u_in->val1),
				2147	TempReg, u_in->val2);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2148	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2149	break;
				2150
				2151	/* Moving stuff around. Make the V bits follow accordingly,
				2152	but don't do anything else. */
				2153
				2154	case GET:
				2155	uInstr2(cb, GETV, u_in->size,
				2156	ArchReg, u_in->val1,
				2157	TempReg, SHADOW(u_in->val2));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2158	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2159	break;
				2160	case PUT:
				2161	uInstr2(cb, PUTV, u_in->size,
				2162	TempReg, SHADOW(u_in->val1),
				2163	ArchReg, u_in->val2);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2164	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2165	break;
				2166
				2167	case GETF:
				2168	/* This is not the smartest way to do it, but should work. */
				2169	qd = create_GETVF(cb, u_in->size);
				2170	uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2171	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2172	break;
				2173	case PUTF:
				2174	create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2175	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2176	break;
				2177
				2178	case MOV:
				2179	switch (u_in->tag1) {
				2180	case TempReg:
				2181	uInstr2(cb, MOV, 4,
				2182	TempReg, SHADOW(u_in->val1),
				2183	TempReg, SHADOW(u_in->val2));
				2184	break;
				2185	case Literal:
				2186	uInstr1(cb, SETV, u_in->size,
				2187	TempReg, SHADOW(u_in->val2));
				2188	break;
				2189	default:
				2190	VG_(panic)("vg_instrument: MOV");
				2191	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2192	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2193	break;
				2194
				2195	/* Special case of add, where one of the operands is a literal.
				2196	lea1(t) = t + some literal.
				2197	Therefore: lea1#(qa) = left(qa)
				2198	*/
				2199	case LEA1:
				2200	vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
				2201	qs = SHADOW(u_in->val1);
				2202	qd = SHADOW(u_in->val2);
				2203	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
				2204	create_Left(cb, u_in->size, qd);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2205	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2206	break;
				2207
				2208	/* Another form of add.
				2209	lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
				2210	and is 0,1,2 or 3.
				2211	lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
				2212	Note, subtly, that the shift puts zeroes at the bottom of qt,
				2213	meaning Valid, since the corresponding shift of tt puts
				2214	zeroes at the bottom of tb.
				2215	*/
				2216	case LEA2: {
				2217	Int shift;
				2218	vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
				2219	switch (u_in->extra4b) {
				2220	case 1: shift = 0; break;
				2221	case 2: shift = 1; break;
				2222	case 4: shift = 2; break;
				2223	case 8: shift = 3; break;
				2224	default: VG_(panic)( "vg_instrument(LEA2)" );
				2225	}
				2226	qs = SHADOW(u_in->val1);
				2227	qt = SHADOW(u_in->val2);
				2228	qd = SHADOW(u_in->val3);
				2229	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
				2230	if (shift > 0) {
				2231	uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
				2232	uLiteral(cb, shift);
				2233	}
				2234	create_UifU(cb, 4, qs, qd);
				2235	create_Left(cb, u_in->size, qd);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2236	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2237	break;
				2238	}
				2239
				2240	/* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
				2241	case INC: case DEC:
				2242	qd = SHADOW(u_in->val1);
				2243	create_Left(cb, u_in->size, qd);
				2244	if (u_in->flags_w != FlagsEmpty)
				2245	create_PUTVF(cb, u_in->size, qd);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2246	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2247	break;
				2248
				2249	/* This is a HACK (approximation :-) */
				2250	/* rcl#/rcr#(qs,qd)
				2251	= let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
				2252	eflags# = q0
				2253	qd =pcast-0-sz(q0)
				2254	Ie, cast everything down to a single bit, then back up.
				2255	This assumes that any bad bits infect the whole word and
				2256	the eflags.
				2257	*/
				2258	case RCL: case RCR:
				2259	vg_assert(u_in->flags_r != FlagsEmpty);
				2260	/* The following assertion looks like it makes sense, but is
				2261	actually wrong. Consider this:
				2262	rcll %eax
				2263	imull %eax, %eax
				2264	The rcll writes O and C but so does the imull, so the O and C
				2265	write of the rcll is annulled by the prior improvement pass.
				2266	Noticed by Kevin Ryde <user42@zip.com.au>
				2267	*/
				2268	/* vg_assert(u_in->flags_w != FlagsEmpty); */
				2269	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2270	/* We can safely modify qs; cast it to 0-size. */
				2271	create_PCast(cb, u_in->size, 0, qs);
				2272	qd = SHADOW(u_in->val2);
				2273	create_PCast(cb, u_in->size, 0, qd);
				2274	/* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
				2275	create_UifU(cb, 0, qs, qd);
				2276	/* qs is now free; reuse it for the flag definedness. */
				2277	qs = create_GETVF(cb, 0);
				2278	create_UifU(cb, 0, qs, qd);
				2279	create_PUTVF(cb, 0, qd);
				2280	create_PCast(cb, 0, u_in->size, qd);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2281	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2282	break;
				2283
				2284	/* for OP in shl shr sar rol ror
				2285	(qs is shift count#, qd is value to be OP#d)
				2286	OP(ts,td)
				2287	OP#(qs,qd)
				2288	= pcast-1-sz(qs) `UifU` OP(ts,qd)
				2289	So we apply OP to the tag bits too, and then UifU with
				2290	the shift count# to take account of the possibility of it
				2291	being undefined.
				2292
				2293	A bit subtle:
				2294	ROL/ROR rearrange the tag bits as per the value bits.
				2295	SHL/SHR shifts zeroes into the value, and corresponding
				2296	zeroes indicating Definedness into the tag.
				2297	SAR copies the top bit of the value downwards, and therefore
				2298	SAR also copies the definedness of the top bit too.
				2299	So in all five cases, we just apply the same op to the tag
				2300	bits as is applied to the value bits. Neat!
				2301	*/
				2302	case SHL:
				2303	case SHR: case SAR:
				2304	case ROL: case ROR: {
				2305	Int t_amount = INVALID_TEMPREG;
				2306	vg_assert(u_in->tag1 == TempReg \|\| u_in->tag1 == Literal);
				2307	vg_assert(u_in->tag2 == TempReg);
				2308	qd = SHADOW(u_in->val2);
				2309
				2310	/* Make qs hold shift-count# and make
				2311	t_amount be a TempReg holding the shift count. */
				2312	if (u_in->tag1 == Literal) {
				2313	t_amount = newTemp(cb);
				2314	uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
				2315	uLiteral(cb, u_in->lit32);
				2316	qs = SHADOW(t_amount);
				2317	uInstr1(cb, SETV, 1, TempReg, qs);
				2318	} else {
				2319	t_amount = u_in->val1;
				2320	qs = SHADOW(u_in->val1);
				2321	}
				2322
				2323	uInstr2(cb, u_in->opcode,
				2324	u_in->size,
				2325	TempReg, t_amount,
				2326	TempReg, qd);
				2327	qt = newShadow(cb);
				2328	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
				2329	create_PCast(cb, 1, u_in->size, qt);
				2330	create_UifU(cb, u_in->size, qt, qd);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2331	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2332	break;
				2333	}
				2334
				2335	/* One simple tag operation. */
				2336	case WIDEN:
				2337	vg_assert(u_in->tag1 == TempReg);
				2338	create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
				2339	SHADOW(u_in->val1));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2340	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2341	break;
				2342
				2343	/* not#(x) = x (since bitwise independent) */
				2344	case NOT:
				2345	vg_assert(u_in->tag1 == TempReg);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2346	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2347	break;
				2348
				2349	/* neg#(x) = left(x) (derivable from case for SUB) */
				2350	case NEG:
				2351	vg_assert(u_in->tag1 == TempReg);
				2352	create_Left(cb, u_in->size, SHADOW(u_in->val1));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2353	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2354	break;
				2355
				2356	/* bswap#(x) = bswap(x) */
				2357	case BSWAP:
				2358	vg_assert(u_in->tag1 == TempReg);
				2359	vg_assert(u_in->size == 4);
				2360	qd = SHADOW(u_in->val1);
				2361	uInstr1(cb, BSWAP, 4, TempReg, qd);
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2362	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2363	break;
				2364
				2365	/* cc2val#(qd) = pcast-0-to-size(eflags#) */
				2366	case CC2VAL:
				2367	vg_assert(u_in->tag1 == TempReg);
				2368	vg_assert(u_in->flags_r != FlagsEmpty);
				2369	qt = create_GETVF(cb, u_in->size);
				2370	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2371	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2372	break;
				2373
				2374	/* cmov#(qs,qd) = cmov(qs,qd)
				2375	That is, do the cmov of tags using the same flags as for
				2376	the data (obviously). However, first do a test on the
				2377	validity of the flags.
				2378	*/
				2379	case CMOV:
				2380	vg_assert(u_in->size == 4);
				2381	vg_assert(u_in->tag1 == TempReg);
				2382	vg_assert(u_in->tag2 == TempReg);
				2383	vg_assert(u_in->flags_r != FlagsEmpty);
				2384	vg_assert(u_in->flags_w == FlagsEmpty);
				2385	qs = SHADOW(u_in->val1);
				2386	qd = SHADOW(u_in->val2);
				2387	qt = create_GETVF(cb, 0);
				2388	uInstr1(cb, TESTV, 0, TempReg, qt);
				2389	/* qt should never be referred to again. Nevertheless
				2390	... */
				2391	uInstr1(cb, SETV, 0, TempReg, qt);
				2392
				2393	uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
				2394	LAST_UINSTR(cb).cond = u_in->cond;
				2395	LAST_UINSTR(cb).flags_r = u_in->flags_r;
				2396
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2397	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2398	break;
				2399
				2400	/* add#/sub#(qs,qd)
				2401	= qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
				2402	= left(qs) `UifU` left(qd)
				2403	= left(qs `UifU` qd)
				2404	adc#/sbb#(qs,qd)
				2405	= left(qs `UifU` qd) `UifU` pcast(eflags#)
				2406	Second arg (dest) is TempReg.
				2407	First arg (src) is Literal or TempReg or ArchReg.
				2408	*/
				2409	case ADD: case SUB:
				2410	case ADC: case SBB:
				2411	qd = SHADOW(u_in->val2);
				2412	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2413	create_UifU(cb, u_in->size, qs, qd);
				2414	create_Left(cb, u_in->size, qd);
				2415	if (u_in->opcode == ADC \|\| u_in->opcode == SBB) {
				2416	vg_assert(u_in->flags_r != FlagsEmpty);
				2417	qt = create_GETVF(cb, u_in->size);
				2418	create_UifU(cb, u_in->size, qt, qd);
				2419	}
				2420	if (u_in->flags_w != FlagsEmpty) {
				2421	create_PUTVF(cb, u_in->size, qd);
				2422	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2423	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2424	break;
				2425
				2426	/* xor#(qs,qd) = qs `UifU` qd */
				2427	case XOR:
				2428	qd = SHADOW(u_in->val2);
				2429	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2430	create_UifU(cb, u_in->size, qs, qd);
				2431	if (u_in->flags_w != FlagsEmpty) {
				2432	create_PUTVF(cb, u_in->size, qd);
				2433	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2434	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2435	break;
				2436
				2437	/* and#/or#(qs,qd)
				2438	= (qs `UifU` qd) `DifD` improve(vs,qs)
				2439	`DifD` improve(vd,qd)
				2440	where improve is the relevant one of
				2441	Improve{AND,OR}_TQ
				2442	Use the following steps, with qt as a temp:
				2443	qt = improve(vd,qd)
				2444	qd = qs `UifU` qd
				2445	qd = qt `DifD` qd
				2446	qt = improve(vs,qs)
				2447	qd = qt `DifD` qd
				2448	*/
				2449	case AND: case OR:
				2450	vg_assert(u_in->tag1 == TempReg);
				2451	vg_assert(u_in->tag2 == TempReg);
				2452	qd = SHADOW(u_in->val2);
				2453	qs = SHADOW(u_in->val1);
				2454	qt = newShadow(cb);
				2455
				2456	/* qt = improve(vd,qd) */
				2457	uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
				2458	if (u_in->opcode == AND)
				2459	create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
				2460	else
				2461	create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
				2462	/* qd = qs `UifU` qd */
				2463	create_UifU(cb, u_in->size, qs, qd);
				2464	/* qd = qt `DifD` qd */
				2465	create_DifD(cb, u_in->size, qt, qd);
				2466	/* qt = improve(vs,qs) */
				2467	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
				2468	if (u_in->opcode == AND)
				2469	create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
				2470	else
				2471	create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
				2472	/* qd = qt `DifD` qd */
				2473	create_DifD(cb, u_in->size, qt, qd);
				2474	/* So, finally qd is the result tag. */
				2475	if (u_in->flags_w != FlagsEmpty) {
				2476	create_PUTVF(cb, u_in->size, qd);
				2477	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2478	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2479	break;
				2480
				2481	/* Machinery to do with supporting CALLM. Copy the start and
				2482	end markers only to make the result easier to read
				2483	(debug); they generate no code and have no effect.
				2484	*/
				2485	case CALLM_S: case CALLM_E:
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2486	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2487	break;
				2488
				2489	/* Copy PUSH and POP verbatim. Arg/result absval
				2490	calculations are done when the associated CALL is
				2491	processed. CLEAR has no effect on absval calculations but
				2492	needs to be copied.
				2493	*/
				2494	case PUSH: case POP: case CLEAR:
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2495	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2496	break;
				2497
				2498	/* In short:
				2499	callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
				2500	We have to decide on a size to do the computation at,
				2501	although the choice doesn't affect correctness. We will
				2502	do a pcast to the final size anyway, so the only important
				2503	factor is to choose a size which minimises the total
				2504	number of casts needed. Valgrind: just use size 0,
				2505	regardless. It may not be very good for performance
				2506	but does simplify matters, mainly by reducing the number
				2507	of different pessimising casts which have to be implemented.
				2508	*/
				2509	case CALLM: {
				2510	UInstr* uu;
				2511	Bool res_used;
				2512
				2513	/* Now generate the code. Get the final result absval
				2514	into qt. */
				2515	qt = newShadow(cb);
				2516	qtt = newShadow(cb);
				2517	uInstr1(cb, SETV, 0, TempReg, qt);
				2518	for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
				2519	uu = & cb_in->instrs[j];
				2520	if (uu->opcode != PUSH) continue;
				2521	/* cast via a temporary */
				2522	uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
				2523	TempReg, qtt);
				2524	create_PCast(cb, uu->size, 0, qtt);
				2525	create_UifU(cb, 0, qtt, qt);
				2526	}
				2527	/* Remembering also that flags read count as inputs. */
				2528	if (u_in->flags_r != FlagsEmpty) {
				2529	qtt = create_GETVF(cb, 0);
				2530	create_UifU(cb, 0, qtt, qt);
				2531	}
				2532
				2533	/* qt now holds the result tag. If any results from the
				2534	call are used, either by fetching with POP or
				2535	implicitly by writing the flags, we copy the result
				2536	absval to the relevant location. If not used, the call
				2537	must have been for its side effects, so we test qt here
				2538	and now. Note that this assumes that all values
				2539	removed by POP continue to be live. So dead args
				2540	must be removed with CLEAR, not by POPping them into
				2541	a dummy tempreg.
				2542	*/
				2543	res_used = False;
				2544	for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
				2545	uu = & cb_in->instrs[j];
				2546	if (uu->opcode != POP) continue;
				2547	/* Cast via a temp. */
				2548	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
				2549	create_PCast(cb, 0, uu->size, qtt);
				2550	uInstr2(cb, MOV, 4, TempReg, qtt,
				2551	TempReg, SHADOW(uu->val1));
				2552	res_used = True;
				2553	}
				2554	if (u_in->flags_w != FlagsEmpty) {
				2555	create_PUTVF(cb, 0, qt);
				2556	res_used = True;
				2557	}
				2558	if (!res_used) {
				2559	uInstr1(cb, TESTV, 0, TempReg, qt);
				2560	/* qt should never be referred to again. Nevertheless
				2561	... */
				2562	uInstr1(cb, SETV, 0, TempReg, qt);
				2563	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2564	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2565	break;
				2566	}
				2567	/* Whew ... */
				2568
				2569	case JMP:
				2570	if (u_in->tag1 == TempReg) {
				2571	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2572	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2573	} else {
				2574	vg_assert(u_in->tag1 == Literal);
				2575	}
				2576	if (u_in->cond != CondAlways) {
				2577	vg_assert(u_in->flags_r != FlagsEmpty);
				2578	qt = create_GETVF(cb, 0);
				2579	uInstr1(cb, TESTV, 0, TempReg, qt);
				2580	/* qt should never be referred to again. Nevertheless
				2581	... */
				2582	uInstr1(cb, SETV, 0, TempReg, qt);
				2583	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2584	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2585	break;
				2586
				2587	case JIFZ:
				2588	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2589	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2590	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2591	break;
				2592
				2593	/* Emit a check on the address used. For FPU_R, the value
				2594	loaded into the FPU is checked at the time it is read from
				2595	memory (see synth_fpu_mem_check_actions). */
				2596	case FPU_R: case FPU_W:
				2597	vg_assert(u_in->tag2 == TempReg);
				2598	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
				2599	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2600	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2601	break;
				2602
				2603	/* For FPU insns not referencing memory, just copy thru. */
				2604	case FPU:
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2605	VG_(copyUInstr)(cb, u_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2606	break;
				2607
				2608	default:
				2609	VG_(ppUInstr)(0, u_in);
				2610	VG_(panic)( "vg_instrument: unhandled case");
				2611
				2612	} /* end of switch (u_in->opcode) */
				2613
				2614	} /* end of for loop */
				2615
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	2616	VG_(freeCodeBlock)(cb_in);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2617	return cb;
				2618	}
				2619
				2620	/------------------------------------------------------------/
				2621	/--- Clean up mem check instrumentation. ---/
				2622	/------------------------------------------------------------/
				2623
				2624	#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
				2625	#define VGC_UNDEF ((UChar)100)
				2626	#define VGC_VALUE ((UChar)101)
				2627
				2628	#define NOP_no_msg(uu) \
				2629	do { uu->opcode = NOP; } while (False)
				2630
				2631	#define NOP_tag1_op(uu) \
				2632	do { uu->opcode = NOP; \
				2633	if (VG_(disassemble)) \
				2634	VG_(printf)("at %d: delete %s due to defd arg\n", \
				2635	i, VG_(nameOfTagOp(u->val3))); \
				2636	} while (False)
				2637
				2638	#define SETV_tag1_op(uu,newsz) \
				2639	do { uu->opcode = SETV; \
				2640	uu->size = newsz; \
				2641	uu->tag2 = uu->tag3 = NoValue; \
				2642	if (VG_(disassemble)) \
				2643	VG_(printf)("at %d: convert %s to SETV%d " \
				2644	"due to defd arg\n", \
				2645	i, VG_(nameOfTagOp(u->val3)), newsz); \
				2646	} while (False)
				2647
				2648
				2649
				2650	/* Run backwards and delete SETVs on shadow temps for which the next
				2651	action is a write. Needs an env saying whether or not the next
				2652	action is a write. The supplied UCodeBlock is destructively
				2653	modified.
				2654	*/
				2655	static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
				2656	{
				2657	Bool* next_is_write;
				2658	Int i, j, k, n_temps;
				2659	UInstr* u;
				2660	TempUse tempUse[3];
				2661
				2662	n_temps = cb->nextTemp;
				2663	if (n_temps == 0) return;
				2664
				2665	next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
				2666
				2667	for (i = 0; i < n_temps; i++) next_is_write[i] = True;
				2668
				2669	for (i = cb->used-1; i >= 0; i--) {
				2670	u = &cb->instrs[i];
				2671
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2672	/* If we're not checking address V bits, there will be a lot of
				2673	GETVs, TAG1s and TAG2s calculating values which are never
				2674	used. These first three cases get rid of them. */
				2675
				2676	if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
				2677	&& next_is_write[u->val2]
				2678	&& !VG_(clo_check_addrVs)) {
				2679	u->opcode = NOP;
				2680	u->size = 0;
				2681	if (VG_(disassemble))
				2682	VG_(printf)("at %d: delete GETV\n", i);
				2683	} else
				2684
				2685	if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
				2686	&& next_is_write[u->val1]
				2687	&& !VG_(clo_check_addrVs)) {
				2688	u->opcode = NOP;
				2689	u->size = 0;
				2690	if (VG_(disassemble))
				2691	VG_(printf)("at %d: delete TAG1\n", i);
				2692	} else
				2693
				2694	if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
				2695	&& next_is_write[u->val2]
				2696	&& !VG_(clo_check_addrVs)) {
				2697	u->opcode = NOP;
				2698	u->size = 0;
				2699	if (VG_(disassemble))
				2700	VG_(printf)("at %d: delete TAG2\n", i);
				2701	} else
				2702
				2703	/* We do the rest of these regardless of whether or not
				2704	addresses are V-checked. */
				2705
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2706	if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
				2707	&& next_is_write[u->val2]) {
				2708	/* This MOV is pointless because the target is dead at this
				2709	point. Delete it. */
				2710	u->opcode = NOP;
				2711	u->size = 0;
				2712	if (VG_(disassemble))
				2713	VG_(printf)("at %d: delete MOV\n", i);
				2714	} else
				2715
				2716	if (u->opcode == SETV) {
				2717	if (u->tag1 == TempReg) {
				2718	vg_assert(VGC_IS_SHADOW(u->val1));
				2719	if (next_is_write[u->val1]) {
				2720	/* This write is pointless, so annul it. */
				2721	u->opcode = NOP;
				2722	u->size = 0;
				2723	if (VG_(disassemble))
				2724	VG_(printf)("at %d: delete SETV\n", i);
				2725	} else {
				2726	/* This write has a purpose; don't annul it, but do
				2727	notice that we did it. */
				2728	next_is_write[u->val1] = True;
				2729	}
				2730
				2731	}
				2732
				2733	} else {
				2734	/* Find out what this insn does to the temps. */
				2735	k = getTempUsage(u, &tempUse[0]);
				2736	vg_assert(k <= 3);
				2737	for (j = k-1; j >= 0; j--) {
				2738	next_is_write[ tempUse[j].tempNo ]
				2739	= tempUse[j].isWrite;
				2740	}
				2741	}
				2742
				2743	}
				2744
				2745	VG_(jitfree)(next_is_write);
				2746	}
				2747
				2748
				2749	/* Run forwards, propagating and using the is-completely-defined
				2750	property. This removes a lot of redundant tag-munging code.
				2751	Unfortunately it requires intimate knowledge of how each uinstr and
				2752	tagop modifies its arguments. This duplicates knowledge of uinstr
				2753	tempreg uses embodied in getTempUsage(), which is unfortunate.
				2754	The supplied UCodeBlock* is modified in-place.
				2755
				2756	For each value temp, def[] should hold VGC_VALUE.
				2757
				2758	For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
				2759	definitely known to be fully defined at that size. In all other
				2760	circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
				2761	undefined. In cases of doubt, VGC_UNDEF is always safe.
				2762	*/
				2763	static void vg_propagate_definedness ( UCodeBlock* cb )
				2764	{
				2765	UChar* def;
				2766	Int i, j, k, t, n_temps;
				2767	UInstr* u;
				2768	TempUse tempUse[3];
				2769
				2770	n_temps = cb->nextTemp;
				2771	if (n_temps == 0) return;
				2772
				2773	def = VG_(jitmalloc)(n_temps * sizeof(UChar));
				2774	for (i = 0; i < n_temps; i++)
				2775	def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
				2776
				2777	/* Run forwards, detecting and using the all-defined property. */
				2778
				2779	for (i = 0; i < cb->used; i++) {
				2780	u = &cb->instrs[i];
				2781	switch (u->opcode) {
				2782
				2783	/* Tag-handling uinstrs. */
				2784
				2785	/* Deal with these quickly. */
				2786	case NOP:
				2787	case INCEIP:
				2788	break;
				2789
				2790	/* Make a tag defined. */
				2791	case SETV:
				2792	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2793	def[u->val1] = u->size;
				2794	break;
				2795
				2796	/* Check definedness of a tag. */
				2797	case TESTV:
				2798	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2799	if (def[u->val1] <= 4) {
				2800	vg_assert(def[u->val1] == u->size);
				2801	NOP_no_msg(u);
				2802	if (VG_(disassemble))
				2803	VG_(printf)("at %d: delete TESTV on defd arg\n", i);
				2804	}
				2805	break;
				2806
				2807	/* Applies to both values and tags. Propagate Definedness
				2808	property through copies. Note that this isn't optional;
				2809	we have to do this to keep def[] correct. */
				2810	case MOV:
				2811	vg_assert(u->tag2 == TempReg);
				2812	if (u->tag1 == TempReg) {
				2813	if (VGC_IS_SHADOW(u->val1)) {
				2814	vg_assert(VGC_IS_SHADOW(u->val2));
				2815	def[u->val2] = def[u->val1];
				2816	}
				2817	}
				2818	break;
				2819
				2820	case PUTV:
				2821	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2822	if (def[u->val1] <= 4) {
				2823	vg_assert(def[u->val1] == u->size);
				2824	u->tag1 = Literal;
				2825	u->val1 = 0;
				2826	switch (u->size) {
				2827	case 4: u->lit32 = 0x00000000; break;
				2828	case 2: u->lit32 = 0xFFFF0000; break;
				2829	case 1: u->lit32 = 0xFFFFFF00; break;
				2830	default: VG_(panic)("vg_cleanup(PUTV)");
				2831	}
				2832	if (VG_(disassemble))
				2833	VG_(printf)(
				2834	"at %d: propagate definedness into PUTV\n", i);
				2835	}
				2836	break;
				2837
				2838	case STOREV:
				2839	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2840	if (def[u->val1] <= 4) {
				2841	vg_assert(def[u->val1] == u->size);
				2842	u->tag1 = Literal;
				2843	u->val1 = 0;
				2844	switch (u->size) {
				2845	case 4: u->lit32 = 0x00000000; break;
				2846	case 2: u->lit32 = 0xFFFF0000; break;
				2847	case 1: u->lit32 = 0xFFFFFF00; break;
				2848	default: VG_(panic)("vg_cleanup(STOREV)");
				2849	}
				2850	if (VG_(disassemble))
				2851	VG_(printf)(
				2852	"at %d: propagate definedness into STandV\n", i);
				2853	}
				2854	break;
				2855
				2856	/* Nothing interesting we can do with this, I think. */
				2857	case PUTVF:
				2858	break;
				2859
				2860	/* Tag handling operations. */
				2861	case TAG2:
				2862	vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
				2863	vg_assert(u->tag3 == Lit16);
				2864	/* Ultra-paranoid "type" checking. */
				2865	switch (u->val3) {
				2866	case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
				2867	case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
				2868	case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
				2869	vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
				2870	break;
				2871	default:
				2872	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2873	break;
				2874	}
				2875	switch (u->val3) {
				2876	Int sz;
				2877	case VgT_UifU4:
				2878	sz = 4; goto do_UifU;
				2879	case VgT_UifU2:
				2880	sz = 2; goto do_UifU;
				2881	case VgT_UifU1:
				2882	sz = 1; goto do_UifU;
				2883	case VgT_UifU0:
				2884	sz = 0; goto do_UifU;
				2885	do_UifU:
				2886	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2887	vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
				2888	if (def[u->val1] <= 4) {
				2889	/* UifU. The first arg is defined, so result is
				2890	simply second arg. Delete this operation. */
				2891	vg_assert(def[u->val1] == sz);
				2892	NOP_no_msg(u);
				2893	if (VG_(disassemble))
				2894	VG_(printf)(
				2895	"at %d: delete UifU%d due to defd arg1\n",
				2896	i, sz);
				2897	}
				2898	else
				2899	if (def[u->val2] <= 4) {
				2900	/* UifU. The second arg is defined, so result is
				2901	simply first arg. Copy to second. */
				2902	vg_assert(def[u->val2] == sz);
				2903	u->opcode = MOV;
				2904	u->size = 4;
				2905	u->tag3 = NoValue;
				2906	def[u->val2] = def[u->val1];
				2907	if (VG_(disassemble))
				2908	VG_(printf)(
				2909	"at %d: change UifU%d to MOV due to defd"
				2910	" arg2\n",
				2911	i, sz);
				2912	}
				2913	break;
				2914	case VgT_ImproveAND4_TQ:
				2915	sz = 4; goto do_ImproveAND;
				2916	case VgT_ImproveAND1_TQ:
				2917	sz = 1; goto do_ImproveAND;
				2918	do_ImproveAND:
				2919	/* Implements Q = T OR Q. So if Q is entirely defined,
				2920	ie all 0s, we get MOV T, Q. */
				2921	if (def[u->val2] <= 4) {
				2922	vg_assert(def[u->val2] == sz);
				2923	u->size = 4; /* Regardless of sz */
				2924	u->opcode = MOV;
				2925	u->tag3 = NoValue;
				2926	def[u->val2] = VGC_UNDEF;
				2927	if (VG_(disassemble))
				2928	VG_(printf)(
				2929	"at %d: change ImproveAND%d_TQ to MOV due "
				2930	"to defd arg2\n",
				2931	i, sz);
				2932	}
				2933	break;
				2934	default:
				2935	goto unhandled;
				2936	}
				2937	break;
				2938
				2939	case TAG1:
				2940	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2941	if (def[u->val1] > 4) break;
				2942	/* We now know that the arg to the op is entirely defined.
				2943	If the op changes the size of the arg, we must replace
				2944	it with a SETV at the new size. If it doesn't change
				2945	the size, we can delete it completely. */
				2946	switch (u->val3) {
				2947	/* Maintain the same size ... */
				2948	case VgT_Left4:
				2949	vg_assert(def[u->val1] == 4);
				2950	NOP_tag1_op(u);
				2951	break;
				2952	case VgT_PCast11:
				2953	vg_assert(def[u->val1] == 1);
				2954	NOP_tag1_op(u);
				2955	break;
				2956	/* Change size ... */
				2957	case VgT_PCast40:
				2958	vg_assert(def[u->val1] == 4);
				2959	SETV_tag1_op(u,0);
				2960	def[u->val1] = 0;
				2961	break;
				2962	case VgT_PCast14:
				2963	vg_assert(def[u->val1] == 1);
				2964	SETV_tag1_op(u,4);
				2965	def[u->val1] = 4;
				2966	break;
				2967	case VgT_PCast12:
				2968	vg_assert(def[u->val1] == 1);
				2969	SETV_tag1_op(u,2);
				2970	def[u->val1] = 2;
				2971	break;
				2972	case VgT_PCast10:
				2973	vg_assert(def[u->val1] == 1);
				2974	SETV_tag1_op(u,0);
				2975	def[u->val1] = 0;
				2976	break;
				2977	case VgT_PCast02:
				2978	vg_assert(def[u->val1] == 0);
				2979	SETV_tag1_op(u,2);
				2980	def[u->val1] = 2;
				2981	break;
				2982	default:
				2983	goto unhandled;
				2984	}
				2985	if (VG_(disassemble))
				2986	VG_(printf)(
				2987	"at %d: delete TAG1 %s due to defd arg\n",
				2988	i, VG_(nameOfTagOp(u->val3)));
				2989	break;
				2990
				2991	default:
				2992	unhandled:
				2993	/* We don't know how to handle this uinstr. Be safe, and
				2994	set to VGC_VALUE or VGC_UNDEF all temps written by it. */
				2995	k = getTempUsage(u, &tempUse[0]);
				2996	vg_assert(k <= 3);
				2997	for (j = 0; j < k; j++) {
				2998	t = tempUse[j].tempNo;
				2999	vg_assert(t >= 0 && t < n_temps);
				3000	if (!tempUse[j].isWrite) {
				3001	/* t is read; ignore it. */
				3002	if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
				3003	VG_(printf)("ignoring def %d at %s %s\n",
				3004	def[t],
				3005	VG_(nameUOpcode)(True, u->opcode),
				3006	(u->opcode == TAG1 \|\| u->opcode == TAG2)
				3007	? VG_(nameOfTagOp)(u->val3)
				3008	: (Char*)"");
				3009	} else {
				3010	/* t is written; better nullify it. */
				3011	def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
				3012	}
				3013	}
				3014	}
				3015	}
				3016
				3017	VG_(jitfree)(def);
				3018	}
				3019
				3020
				3021	/* Top level post-instrumentation cleanup function. */
				3022	static void vg_cleanup ( UCodeBlock* cb )
				3023	{
				3024	vg_propagate_definedness ( cb );
				3025	vg_delete_redundant_SETVs ( cb );
				3026	}
				3027
				3028
				3029	/------------------------------------------------------------/
				3030	/--- Main entry point for the JITter. ---/
				3031	/------------------------------------------------------------/
				3032
				3033	/* Translate the basic block beginning at orig_addr, placing the
				3034	translation in a vg_malloc'd block, the address and size of which
				3035	are returned in trans_addr and trans_size. Length of the original
				3036	block is also returned in orig_size. If the latter three are NULL,
				3037	this call is being done for debugging purposes, in which case (a)
				3038	throw away the translation once it is made, and (b) produce a load
				3039	of debugging output.
				3040	*/
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	3041	void VG_(translate) ( ThreadState* tst,
				3042	/* Identity of thread needing this block */
				3043	Addr orig_addr,
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3044	UInt* orig_size,
				3045	Addr* trans_addr,
				3046	UInt* trans_size )
				3047	{
				3048	Int n_disassembled_bytes, final_code_size;
				3049	Bool debugging_translation;
				3050	UChar* final_code;
				3051	UCodeBlock* cb;
				3052
				3053	VGP_PUSHCC(VgpTranslate);
				3054	debugging_translation
				3055	= orig_size == NULL \|\| trans_addr == NULL \|\| trans_size == NULL;
				3056
				3057	dis = True;
				3058	dis = debugging_translation;
				3059
				3060	/* Check if we're being asked to jump to a silly address, and if so
				3061	record an error message before potentially crashing the entire
				3062	system. */
				3063	if (VG_(clo_instrument) && !debugging_translation && !dis) {
				3064	Addr bad_addr;
				3065	Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
				3066	if (!ok) {
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	3067	VG_(record_jump_error)(tst, bad_addr);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3068	}
				3069	}
				3070
				3071	/* if (VG_(overall_in_count) >= 4800) dis=True; */
				3072	if (VG_(disassemble))
				3073	VG_(printf)("\n");
				3074	if (0 \|\| dis
				3075	\|\| (VG_(overall_in_count) > 0 &&
				3076	(VG_(overall_in_count) % 1000 == 0))) {
				3077	if (0&& (VG_(clo_verbosity) > 1 \|\| dis))
				3078	VG_(message)(Vg_UserMsg,
				3079	"trans# %d, bb# %lu, in %d, out %d",
				3080	VG_(overall_in_count),
				3081	VG_(bbs_done),
				3082	VG_(overall_in_osize), VG_(overall_in_tsize),
				3083	orig_addr );
				3084	}
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	3085	cb = VG_(allocCodeBlock)();
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3086
				3087	/* Disassemble this basic block into cb. */
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3088	/* VGP_PUSHCC(VgpToUCode); */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3089	n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3090	/* VGP_POPCC; */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3091	/* dis=True; */
				3092	/* if (0&& VG_(translations_done) < 617) */
				3093	/* dis=False; */
				3094	/* Try and improve the code a bit. */
				3095	if (VG_(clo_optimise)) {
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3096	/* VGP_PUSHCC(VgpImprove); */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3097	vg_improve ( cb );
				3098	if (VG_(disassemble))
				3099	VG_(ppUCodeBlock) ( cb, "Improved code:" );
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3100	/* VGP_POPCC; */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3101	}
				3102	/* dis=False; */
				3103	/* Add instrumentation code. */
				3104	if (VG_(clo_instrument)) {
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3105	/* VGP_PUSHCC(VgpInstrument); */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3106	cb = vg_instrument(cb);
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3107	/* VGP_POPCC; */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3108	if (VG_(disassemble))
				3109	VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
				3110	if (VG_(clo_cleanup)) {
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3111	/* VGP_PUSHCC(VgpCleanup); */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3112	vg_cleanup(cb);
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3113	/* VGP_POPCC; */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3114	if (VG_(disassemble))
				3115	VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
				3116	}
				3117	}
				3118
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	3119	//VG_(disassemble) = True;
				3120
				3121	/* Add cache simulation code. */
				3122	if (VG_(clo_cachesim)) {
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3123	/* VGP_PUSHCC(VgpCacheInstrument); */
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	3124	cb = VG_(cachesim_instrument)(cb, orig_addr);
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3125	/* VGP_POPCC; */
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	3126	if (VG_(disassemble))
				3127	VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
				3128	}
				3129
				3130	//VG_(disassemble) = False;
				3131
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3132	/* Allocate registers. */
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3133	/* VGP_PUSHCC(VgpRegAlloc); */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3134	cb = vg_do_register_allocation ( cb );
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3135	/* VGP_POPCC; */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3136	/* dis=False; */
				3137	/*
				3138	if (VG_(disassemble))
				3139	VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
				3140	*/
				3141
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3142	/* VGP_PUSHCC(VgpFromUcode); */
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3143	/* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
				3144	and so must be VG_(jitfree)'d. */
				3145	final_code = VG_(emit_code)(cb, &final_code_size );
sewardj	671ff54	2002-05-07 09:25:30 +0000	[diff] [blame]	3146	/* VGP_POPCC; */
njn	4f9c934	2002-04-29 16:03:24 +0000	[diff] [blame]	3147	VG_(freeCodeBlock)(cb);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3148
				3149	if (debugging_translation) {
				3150	/* Only done for debugging -- throw away final result. */
				3151	VG_(jitfree)(final_code);
				3152	} else {
				3153	/* Doing it for real -- return values to caller. */
				3154	*orig_size = n_disassembled_bytes;
				3155	*trans_addr = (Addr)final_code;
				3156	*trans_size = final_code_size;
				3157	}
				3158	VGP_POPCC;
				3159	}
				3160
				3161	/--------------------------------------------------------------------/
				3162	/--- end vg_translate.c ---/
				3163	/--------------------------------------------------------------------/