Blame - coregrind/vg_translate.c - platform/external/valgrind

blob: 73cad5e9bcd6a8414f3eab590c76b1cca196954d [file] [log] [blame]

sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- The JITter proper: register allocation & code improvement ---/
				4	/--- vg_translate.c ---/
				5	/--------------------------------------------------------------------/
				6
				7	/*
				8	This file is part of Valgrind, an x86 protected-mode emulator
				9	designed for debugging and profiling binaries on x86-Unixes.
				10
				11	Copyright (C) 2000-2002 Julian Seward
				12	jseward@acm.org
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				27	02111-1307, USA.
				28
				29	The GNU General Public License is contained in the file LICENSE.
				30	*/
				31
				32	#include "vg_include.h"
				33
				34
				35	/------------------------------------------------------------/
				36	/--- Renamings of frequently-used global functions. ---/
				37	/------------------------------------------------------------/
				38
				39	#define uInstr1 VG_(newUInstr1)
				40	#define uInstr2 VG_(newUInstr2)
				41	#define uInstr3 VG_(newUInstr3)
				42	#define dis VG_(disassemble)
				43	#define nameIReg VG_(nameOfIntReg)
				44	#define nameISize VG_(nameOfIntSize)
				45	#define uLiteral VG_(setLiteralField)
				46	#define newTemp VG_(getNewTemp)
				47	#define newShadow VG_(getNewShadow)
				48
				49
				50	/------------------------------------------------------------/
				51	/--- Memory management for the translater. ---/
				52	/------------------------------------------------------------/
				53
				54	#define N_JITBLOCKS 4
				55	#define N_JITBLOCK_SZ 5000
				56
				57	static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
				58	static Bool jitstorage_inuse[N_JITBLOCKS];
				59	static Bool jitstorage_initdone = False;
				60
				61	static __inline__ void jitstorage_initialise ( void )
				62	{
				63	Int i;
				64	if (jitstorage_initdone) return;
				65	jitstorage_initdone = True;
				66	for (i = 0; i < N_JITBLOCKS; i++)
				67	jitstorage_inuse[i] = False;
				68	}
				69
				70	void* VG_(jitmalloc) ( Int nbytes )
				71	{
				72	Int i;
				73	jitstorage_initialise();
				74	if (nbytes > N_JITBLOCK_SZ) {
				75	/* VG_(printf)("too large: %d\n", nbytes); */
				76	return VG_(malloc)(VG_AR_PRIVATE, nbytes);
				77	}
				78	for (i = 0; i < N_JITBLOCKS; i++) {
				79	if (!jitstorage_inuse[i]) {
				80	jitstorage_inuse[i] = True;
				81	/* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
				82	return & jitstorage[i][0];
				83	}
				84	}
				85	VG_(panic)("out of slots in vg_jitmalloc\n");
				86	return VG_(malloc)(VG_AR_PRIVATE, nbytes);
				87	}
				88
				89	void VG_(jitfree) ( void* ptr )
				90	{
				91	Int i;
				92	jitstorage_initialise();
				93	for (i = 0; i < N_JITBLOCKS; i++) {
				94	if (ptr == & jitstorage[i][0]) {
				95	vg_assert(jitstorage_inuse[i]);
				96	jitstorage_inuse[i] = False;
				97	return;
				98	}
				99	}
				100	VG_(free)(VG_AR_PRIVATE, ptr);
				101	}
				102
				103	/------------------------------------------------------------/
				104	/--- Basics ---/
				105	/------------------------------------------------------------/
				106
				107	static UCodeBlock* allocCodeBlock ( void )
				108	{
				109	UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
				110	cb->used = cb->size = cb->nextTemp = 0;
				111	cb->instrs = NULL;
				112	return cb;
				113	}
				114
				115
				116	static void freeCodeBlock ( UCodeBlock* cb )
				117	{
				118	if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
				119	VG_(free)(VG_AR_PRIVATE, cb);
				120	}
				121
				122
				123	/* Ensure there's enough space in a block to add one uinstr. */
				124	static __inline__
				125	void ensureUInstr ( UCodeBlock* cb )
				126	{
				127	if (cb->used == cb->size) {
				128	if (cb->instrs == NULL) {
				129	vg_assert(cb->size == 0);
				130	vg_assert(cb->used == 0);
				131	cb->size = 8;
				132	cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
				133	} else {
				134	Int i;
				135	UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
				136	2 * sizeof(UInstr) * cb->size);
				137	for (i = 0; i < cb->used; i++)
				138	instrs2[i] = cb->instrs[i];
				139	cb->size *= 2;
				140	VG_(free)(VG_AR_PRIVATE, cb->instrs);
				141	cb->instrs = instrs2;
				142	}
				143	}
				144
				145	vg_assert(cb->used < cb->size);
				146	}
				147
				148
				149	__inline__
				150	void VG_(emptyUInstr) ( UInstr* u )
				151	{
				152	u->val1 = u->val2 = u->val3 = 0;
				153	u->tag1 = u->tag2 = u->tag3 = NoValue;
				154	u->flags_r = u->flags_w = FlagsEmpty;
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	155	u->jmpkind = JmpBoring;
				156	u->smc_check = u->signed_widen = False;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	157	u->lit32 = 0;
				158	u->opcode = 0;
				159	u->size = 0;
				160	u->cond = 0;
				161	u->extra4b = 0;
				162	}
				163
				164
				165	/* Add an instruction to a ucode block, and return the index of the
				166	instruction. */
				167	__inline__
				168	void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
				169	Tag tag1, UInt val1,
				170	Tag tag2, UInt val2,
				171	Tag tag3, UInt val3 )
				172	{
				173	UInstr* ui;
				174	ensureUInstr(cb);
				175	ui = & cb->instrs[cb->used];
				176	cb->used++;
				177	VG_(emptyUInstr)(ui);
				178	ui->val1 = val1;
				179	ui->val2 = val2;
				180	ui->val3 = val3;
				181	ui->opcode = opcode;
				182	ui->tag1 = tag1;
				183	ui->tag2 = tag2;
				184	ui->tag3 = tag3;
				185	ui->size = sz;
				186	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				187	if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
				188	if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
				189	}
				190
				191
				192	__inline__
				193	void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
				194	Tag tag1, UInt val1,
				195	Tag tag2, UInt val2 )
				196	{
				197	UInstr* ui;
				198	ensureUInstr(cb);
				199	ui = & cb->instrs[cb->used];
				200	cb->used++;
				201	VG_(emptyUInstr)(ui);
				202	ui->val1 = val1;
				203	ui->val2 = val2;
				204	ui->opcode = opcode;
				205	ui->tag1 = tag1;
				206	ui->tag2 = tag2;
				207	ui->size = sz;
				208	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				209	if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
				210	}
				211
				212
				213	__inline__
				214	void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
				215	Tag tag1, UInt val1 )
				216	{
				217	UInstr* ui;
				218	ensureUInstr(cb);
				219	ui = & cb->instrs[cb->used];
				220	cb->used++;
				221	VG_(emptyUInstr)(ui);
				222	ui->val1 = val1;
				223	ui->opcode = opcode;
				224	ui->tag1 = tag1;
				225	ui->size = sz;
				226	if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
				227	}
				228
				229
				230	__inline__
				231	void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
				232	{
				233	UInstr* ui;
				234	ensureUInstr(cb);
				235	ui = & cb->instrs[cb->used];
				236	cb->used++;
				237	VG_(emptyUInstr)(ui);
				238	ui->opcode = opcode;
				239	ui->size = sz;
				240	}
				241
				242
				243	/* Copy an instruction into the given codeblock. */
				244	static __inline__
				245	void copyUInstr ( UCodeBlock* cb, UInstr* instr )
				246	{
				247	ensureUInstr(cb);
				248	cb->instrs[cb->used] = *instr;
				249	cb->used++;
				250	}
				251
				252
				253	/* Copy auxiliary info from one uinstr to another. */
				254	static __inline__
				255	void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
				256	{
				257	dst->cond = src->cond;
				258	dst->extra4b = src->extra4b;
				259	dst->smc_check = src->smc_check;
				260	dst->signed_widen = src->signed_widen;
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	261	dst->jmpkind = src->jmpkind;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	262	dst->flags_r = src->flags_r;
				263	dst->flags_w = src->flags_w;
				264	}
				265
				266
				267	/* Set the flag R/W sets on a uinstr. */
				268	void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
				269	{
				270	/* VG_(ppUInstr)(-1,u); */
				271	vg_assert(fr == (fr & FlagsALL));
				272	vg_assert(fw == (fw & FlagsALL));
				273	u->flags_r = fr;
				274	u->flags_w = fw;
				275	}
				276
				277
				278	/* Set the lit32 field of the most recent uinsn. */
				279	void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
				280	{
				281	LAST_UINSTR(cb).lit32 = lit32;
				282	}
				283
				284
				285	Bool VG_(anyFlagUse) ( UInstr* u )
				286	{
				287	return (u->flags_r != FlagsEmpty
				288	\|\| u->flags_w != FlagsEmpty);
				289	}
				290
				291
				292
				293
				294	/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
				295	register number. This effectively defines the order in which real
				296	registers are allocated. %ebp is excluded since it is permanently
				297	reserved for pointing at VG_(baseBlock). %edi is a general spare
				298	temp used for Left4 and various misc tag ops.
				299
				300	Important! If you change the set of allocatable registers from
				301	%eax, %ebx, %ecx, %edx, %esi you must change the
				302	save/restore sequences in vg_helper_smc_check4 to match!
				303	*/
				304	__inline__ Int VG_(rankToRealRegNo) ( Int rank )
				305	{
				306	switch (rank) {
				307	# if 1
				308	/* Probably the best allocation ordering. */
				309	case 0: return R_EAX;
				310	case 1: return R_EBX;
				311	case 2: return R_ECX;
				312	case 3: return R_EDX;
				313	case 4: return R_ESI;
				314	# else
				315	/* Contrary; probably the worst. Helpful for debugging, tho. */
				316	case 4: return R_EAX;
				317	case 3: return R_EBX;
				318	case 2: return R_ECX;
				319	case 1: return R_EDX;
				320	case 0: return R_ESI;
				321	# endif
				322	default: VG_(panic)("rankToRealRegNo");
				323	}
				324	}
				325
				326
				327	/------------------------------------------------------------/
				328	/--- Sanity checking uinstrs. ---/
				329	/------------------------------------------------------------/
				330
				331	/* This seems as good a place as any to record some important stuff
				332	about ucode semantics.
				333
				334	* TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
				335	TempReg are defined to zero-extend the loaded value to 32 bits.
				336	This is needed to make the translation of movzbl et al work
				337	properly.
				338
				339	* Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
				340
				341	* Arithmetic on TempRegs is at the specified size. For example,
				342	SUBW t1, t2 has to result in a real 16 bit x86 subtraction
				343	being emitted -- not a 32 bit one.
				344
				345	* On some insns we allow the cc bit to be set. If so, the
				346	intention is that the simulated machine's %eflags register
				347	is copied into that of the real machine before the insn,
				348	and copied back again afterwards. This means that the
				349	code generated for that insn must be very careful only to
				350	update %eflags in the intended way. This is particularly
				351	important for the routines referenced by CALL insns.
				352	*/
				353
				354	/* Meaning of operand kinds is as follows:
				355
				356	ArchReg is a register of the simulated CPU, stored in memory,
				357	in vg_m_state.m_eax .. m_edi. These values are stored
				358	using the Intel register encoding.
				359
				360	RealReg is a register of the real CPU. There are VG_MAX_REALREGS
				361	available for allocation. As with ArchRegs, these values
				362	are stored using the Intel register encoding.
				363
				364	TempReg is a temporary register used to express the results of
				365	disassembly. There is an unlimited supply of them --
				366	register allocation and spilling eventually assigns them
				367	to RealRegs.
				368
				369	SpillNo is a spill slot number. The number of required spill
				370	slots is VG_MAX_PSEUDOS, in general. Only allowed
				371	as the ArchReg operand of GET and PUT.
				372
				373	Lit16 is a signed 16-bit literal value.
				374
				375	Literal is a 32-bit literal value. Each uinstr can only hold
				376	one of these.
				377
				378	The disassembled code is expressed purely in terms of ArchReg,
				379	TempReg and Literal operands. Eventually, register allocation
				380	removes all the TempRegs, giving a result using ArchRegs, RealRegs,
				381	and Literals. New x86 code can easily be synthesised from this.
				382	There are carefully designed restrictions on which insns can have
				383	which operands, intended to make it possible to generate x86 code
				384	from the result of register allocation on the ucode efficiently and
				385	without need of any further RealRegs.
				386
				387	Restrictions on insns (as generated by the disassembler) are as
				388	follows:
				389
				390	A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
				391	N=NoValue
				392
				393	GETF T N N
				394	PUTF T N N
				395
				396	GET A,S T N
				397	PUT T A,S N
				398	LOAD T T N
				399	STORE T T N
				400	MOV T,L T N
				401	CMOV T T N
				402	WIDEN T N N
				403	JMP T,L N N
				404	CALLM L N N
				405	CALLM_S N N N
				406	CALLM_E N N N
				407	PUSH,POP T N N
				408	CLEAR L N N
				409
				410	AND, OR
				411	T T N
				412
				413	ADD, ADC, XOR, SUB, SBB
				414	A,L,T T N
				415
				416	SHL, SHR, SAR, ROL, ROR, RCL, RCR
				417	L,T T N
				418
				419	NOT, NEG, INC, DEC, CC2VAL, BSWAP
				420	T N N
				421
				422	JIFZ T L N
				423
				424	FPU_R L T N
				425	FPU_W L T N
				426	FPU L T N
				427
				428	LEA1 T T (const in a seperate field)
				429	LEA2 T T T (const & shift ditto)
				430
				431	INCEIP L N N
				432
				433	and for instrumentation insns:
				434
				435	LOADV T T N
				436	STOREV T,L T N
				437	GETV A T N
				438	PUTV T,L A N
				439	GETVF T N N
				440	PUTVF T N N
				441	WIDENV T N N
				442	TESTV A,T N N
				443	SETV A,T N N
				444	TAG1 T N N
				445	TAG2 T T N
				446
				447	Before register allocation, S operands should not appear anywhere.
				448	After register allocation, all T operands should have been
				449	converted into Rs, and S operands are allowed in GET and PUT --
				450	denoting spill saves/restores.
				451
				452	The size field should be 0 for insns for which it is meaningless,
				453	ie those which do not directly move/operate on data.
				454	*/
				455	Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
				456	{
				457	# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
				458	# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
				459	# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
				460	# define A1 (u->tag1 == ArchReg)
				461	# define A2 (u->tag2 == ArchReg)
				462	# define AS1 ((u->tag1 == ArchReg) \|\| ((!beforeRA && (u->tag1 == SpillNo))))
				463	# define AS2 ((u->tag2 == ArchReg) \|\| ((!beforeRA && (u->tag2 == SpillNo))))
				464	# define AS3 ((u->tag3 == ArchReg) \|\| ((!beforeRA && (u->tag3 == SpillNo))))
				465	# define L1 (u->tag1 == Literal && u->val1 == 0)
				466	# define L2 (u->tag2 == Literal && u->val2 == 0)
				467	# define Ls1 (u->tag1 == Lit16)
				468	# define Ls3 (u->tag3 == Lit16)
				469	# define N1 (u->tag1 == NoValue)
				470	# define N2 (u->tag2 == NoValue)
				471	# define N3 (u->tag3 == NoValue)
				472	# define SZ4 (u->size == 4)
				473	# define SZ2 (u->size == 2)
				474	# define SZ1 (u->size == 1)
				475	# define SZ0 (u->size == 0)
				476	# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
				477	# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
				478	# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	479	# define FLG_RD_WR_MAYBE \
				480	((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \
				481	\|\| (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \
				482	\|\| (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	483	# define CC1 (!(CC0))
				484	# define SZ4_IF_TR1 ((u->tag1 == TempReg \|\| u->tag1 == RealReg) \
				485	? (u->size == 4) : True)
				486
				487	Int n_lits = 0;
				488	if (u->tag1 == Literal) n_lits++;
				489	if (u->tag2 == Literal) n_lits++;
				490	if (u->tag3 == Literal) n_lits++;
				491	if (n_lits > 1)
				492	return False;
				493
				494	switch (u->opcode) {
				495	case GETF:
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	496	return (SZ2 \|\| SZ4) && TR1 && N2 && N3 && FLG_RD;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	497	case PUTF:
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	498	return (SZ2 \|\| SZ4) && TR1 && N2 && N3 && FLG_WR;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	499	case CALLM_S: case CALLM_E:
				500	return SZ0 && N1 && N2 && N3;
				501	case INCEIP:
				502	return SZ0 && CC0 && Ls1 && N2 && N3;
				503	case LEA1:
				504	return CC0 && TR1 && TR2 && N3 && SZ4;
				505	case LEA2:
				506	return CC0 && TR1 && TR2 && TR3 && SZ4;
				507	case NOP:
				508	return SZ0 && CC0 && N1 && N2 && N3;
				509	case GET:
				510	return CC0 && AS1 && TR2 && N3;
				511	case PUT:
				512	return CC0 && TR1 && AS2 && N3;
				513	case LOAD: case STORE:
				514	return CC0 && TR1 && TR2 && N3;
				515	case MOV:
				516	return CC0 && (TR1 \|\| L1) && TR2 && N3 && SZ4_IF_TR1;
				517	case CMOV:
				518	return CC1 && TR1 && TR2 && N3 && SZ4;
				519	case JMP:
				520	return (u->cond==CondAlways ? CC0 : CC1)
				521	&& (TR1 \|\| L1) && N2 && SZ0 && N3;
				522	case CLEAR:
				523	return CC0 && Ls1 && N2 && SZ0 && N3;
				524	case CALLM:
				525	return SZ0 && Ls1 && N2 && N3;
				526	case PUSH: case POP:
				527	return CC0 && TR1 && N2 && N3;
				528	case AND: case OR:
				529	return TR1 && TR2 && N3;
				530	case ADD: case ADC: case XOR: case SUB: case SBB:
				531	return (A1 \|\| TR1 \|\| L1) && TR2 && N3;
				532	case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
				533	return (TR1 \|\| L1) && TR2 && N3;
				534	case NOT: case NEG: case INC: case DEC:
				535	return TR1 && N2 && N3;
				536	case BSWAP:
				537	return TR1 && N2 && N3 && CC0 && SZ4;
				538	case CC2VAL:
				539	return CC1 && SZ1 && TR1 && N2 && N3;
				540	case JIFZ:
				541	return CC0 && SZ4 && TR1 && L2 && N3;
				542	case FPU_R: case FPU_W:
				543	return CC0 && Ls1 && TR2 && N3;
				544	case FPU:
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	545	return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	546	case LOADV:
				547	return CC0 && TR1 && TR2 && N3;
				548	case STOREV:
				549	return CC0 && (TR1 \|\| L1) && TR2 && N3;
				550	case GETV:
				551	return CC0 && A1 && TR2 && N3;
				552	case PUTV:
				553	return CC0 && (TR1 \|\| L1) && A2 && N3;
				554	case GETVF:
				555	return CC0 && TR1 && N2 && N3 && SZ0;
				556	case PUTVF:
				557	return CC0 && TR1 && N2 && N3 && SZ0;
				558	case WIDEN:
				559	return CC0 && TR1 && N2 && N3;
				560	case TESTV:
				561	return CC0 && (A1 \|\| TR1) && N2 && N3;
				562	case SETV:
				563	return CC0 && (A1 \|\| TR1) && N2 && N3;
				564	case TAG1:
				565	return CC0 && TR1 && N2 && Ls3 && SZ0;
				566	case TAG2:
				567	return CC0 && TR1 && TR2 && Ls3 && SZ0;
				568	default:
				569	VG_(panic)("vg_saneUInstr: unhandled opcode");
				570	}
				571	# undef SZ4_IF_TR1
				572	# undef CC0
				573	# undef CC1
				574	# undef SZ4
				575	# undef SZ2
				576	# undef SZ1
				577	# undef SZ0
				578	# undef TR1
				579	# undef TR2
				580	# undef TR3
				581	# undef A1
				582	# undef A2
				583	# undef AS1
				584	# undef AS2
				585	# undef AS3
				586	# undef L1
				587	# undef Ls1
				588	# undef L2
				589	# undef Ls3
				590	# undef N1
				591	# undef N2
				592	# undef N3
				593	# undef FLG_RD
				594	# undef FLG_WR
sewardj	8d32be7	2002-04-18 02:18:24 +0000	[diff] [blame]	595	# undef FLG_RD_WR_MAYBE
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	596	}
				597
				598
				599	/* Sanity checks to do with CALLMs in UCodeBlocks. */
				600	Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
				601	{
				602	Int callm = 0;
				603	Int callm_s = 0;
				604	Int callm_e = 0;
				605	Int callm_ptr, calls_ptr;
				606	Int i, j, t;
				607	Bool incall = False;
				608
				609	/* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
				610
				611	for (i = 0; i < cb->used; i++) {
				612	switch (cb->instrs[i].opcode) {
				613	case CALLM:
				614	if (!incall) return False;
				615	callm++;
				616	break;
				617	case CALLM_S:
				618	if (incall) return False;
				619	incall = True;
				620	callm_s++;
				621	break;
				622	case CALLM_E:
				623	if (!incall) return False;
				624	incall = False;
				625	callm_e++;
				626	break;
				627	case PUSH: case POP: case CLEAR:
				628	if (!incall) return False;
				629	break;
				630	default:
				631	break;
				632	}
				633	}
				634	if (incall) return False;
				635	if (callm != callm_s \|\| callm != callm_e) return False;
				636
				637	/* Check the sections between CALLM_S and CALLM's. Ensure that no
				638	PUSH uinsn pushes any TempReg that any other PUSH in the same
				639	section pushes. Ie, check that the TempReg args to PUSHes in
				640	the section are unique. If not, the instrumenter generates
				641	incorrect code for CALLM insns. */
				642
				643	callm_ptr = 0;
				644
				645	find_next_CALLM:
				646	/* Search for the next interval, making calls_ptr .. callm_ptr
				647	bracket it. */
				648	while (callm_ptr < cb->used
				649	&& cb->instrs[callm_ptr].opcode != CALLM)
				650	callm_ptr++;
				651	if (callm_ptr == cb->used)
				652	return True;
				653	vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
				654
				655	calls_ptr = callm_ptr - 1;
				656	while (cb->instrs[calls_ptr].opcode != CALLM_S)
				657	calls_ptr--;
				658	vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
				659	vg_assert(calls_ptr >= 0);
				660
				661	/* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
				662
				663	/* For each PUSH insn in the interval ... */
				664	for (i = calls_ptr + 1; i < callm_ptr; i++) {
				665	if (cb->instrs[i].opcode != PUSH) continue;
				666	t = cb->instrs[i].val1;
				667	/* Ensure no later PUSH insns up to callm_ptr push the same
				668	TempReg. Return False if any such are found. */
				669	for (j = i+1; j < callm_ptr; j++) {
				670	if (cb->instrs[j].opcode == PUSH &&
				671	cb->instrs[j].val1 == t)
				672	return False;
				673	}
				674	}
				675
				676	/* This interval is clean. Keep going ... */
				677	callm_ptr++;
				678	goto find_next_CALLM;
				679	}
				680
				681
				682	/------------------------------------------------------------/
				683	/--- Printing uinstrs. ---/
				684	/------------------------------------------------------------/
				685
				686	Char* VG_(nameCondcode) ( Condcode cond )
				687	{
				688	switch (cond) {
				689	case CondO: return "o";
				690	case CondNO: return "no";
				691	case CondB: return "b";
				692	case CondNB: return "nb";
				693	case CondZ: return "z";
				694	case CondNZ: return "nz";
				695	case CondBE: return "be";
				696	case CondNBE: return "nbe";
				697	case CondS: return "s";
				698	case ConsNS: return "ns";
				699	case CondP: return "p";
				700	case CondNP: return "np";
				701	case CondL: return "l";
				702	case CondNL: return "nl";
				703	case CondLE: return "le";
				704	case CondNLE: return "nle";
				705	case CondAlways: return "MP"; /* hack! */
				706	default: VG_(panic)("nameCondcode");
				707	}
				708	}
				709
				710
				711	static void vg_ppFlagSet ( Char* prefix, FlagSet set )
				712	{
				713	VG_(printf)("%s", prefix);
				714	if (set & FlagD) VG_(printf)("D");
				715	if (set & FlagO) VG_(printf)("O");
				716	if (set & FlagS) VG_(printf)("S");
				717	if (set & FlagZ) VG_(printf)("Z");
				718	if (set & FlagA) VG_(printf)("A");
				719	if (set & FlagC) VG_(printf)("C");
				720	if (set & FlagP) VG_(printf)("P");
				721	}
				722
				723
				724	static void ppTempReg ( Int tt )
				725	{
				726	if ((tt & 1) == 0)
				727	VG_(printf)("t%d", tt);
				728	else
				729	VG_(printf)("q%d", tt-1);
				730	}
				731
				732
				733	static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
				734	{
				735	UInt tag, val;
				736	switch (operandNo) {
				737	case 1: tag = u->tag1; val = u->val1; break;
				738	case 2: tag = u->tag2; val = u->val2; break;
				739	case 3: tag = u->tag3; val = u->val3; break;
				740	default: VG_(panic)("ppUOperand(1)");
				741	}
				742	if (tag == Literal) val = u->lit32;
				743
				744	if (parens) VG_(printf)("(");
				745	switch (tag) {
				746	case TempReg: ppTempReg(val); break;
				747	case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
				748	case Literal: VG_(printf)("$0x%x", val); break;
				749	case Lit16: VG_(printf)("$0x%x", val); break;
				750	case NoValue: VG_(printf)("NoValue"); break;
				751	case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
				752	case SpillNo: VG_(printf)("spill%d", val); break;
				753	default: VG_(panic)("ppUOperand(2)");
				754	}
				755	if (parens) VG_(printf)(")");
				756	}
				757
				758
				759	Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
				760	{
				761	switch (opc) {
				762	case ADD: return (upper ? "ADD" : "add");
				763	case ADC: return (upper ? "ADC" : "adc");
				764	case AND: return (upper ? "AND" : "and");
				765	case OR: return (upper ? "OR" : "or");
				766	case XOR: return (upper ? "XOR" : "xor");
				767	case SUB: return (upper ? "SUB" : "sub");
				768	case SBB: return (upper ? "SBB" : "sbb");
				769	case SHL: return (upper ? "SHL" : "shl");
				770	case SHR: return (upper ? "SHR" : "shr");
				771	case SAR: return (upper ? "SAR" : "sar");
				772	case ROL: return (upper ? "ROL" : "rol");
				773	case ROR: return (upper ? "ROR" : "ror");
				774	case RCL: return (upper ? "RCL" : "rcl");
				775	case RCR: return (upper ? "RCR" : "rcr");
				776	case NOT: return (upper ? "NOT" : "not");
				777	case NEG: return (upper ? "NEG" : "neg");
				778	case INC: return (upper ? "INC" : "inc");
				779	case DEC: return (upper ? "DEC" : "dec");
				780	case BSWAP: return (upper ? "BSWAP" : "bswap");
				781	default: break;
				782	}
				783	if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
				784	switch (opc) {
				785	case GETVF: return "GETVF";
				786	case PUTVF: return "PUTVF";
				787	case TAG1: return "TAG1";
				788	case TAG2: return "TAG2";
				789	case CALLM_S: return "CALLM_S";
				790	case CALLM_E: return "CALLM_E";
				791	case INCEIP: return "INCEIP";
				792	case LEA1: return "LEA1";
				793	case LEA2: return "LEA2";
				794	case NOP: return "NOP";
				795	case GET: return "GET";
				796	case PUT: return "PUT";
				797	case GETF: return "GETF";
				798	case PUTF: return "PUTF";
				799	case LOAD: return "LD" ;
				800	case STORE: return "ST" ;
				801	case MOV: return "MOV";
				802	case CMOV: return "CMOV";
				803	case WIDEN: return "WIDEN";
				804	case JMP: return "J" ;
				805	case JIFZ: return "JIFZ" ;
				806	case CALLM: return "CALLM";
				807	case PUSH: return "PUSH" ;
				808	case POP: return "POP" ;
				809	case CLEAR: return "CLEAR";
				810	case CC2VAL: return "CC2VAL";
				811	case FPU_R: return "FPU_R";
				812	case FPU_W: return "FPU_W";
				813	case FPU: return "FPU" ;
				814	case LOADV: return "LOADV";
				815	case STOREV: return "STOREV";
				816	case GETV: return "GETV";
				817	case PUTV: return "PUTV";
				818	case TESTV: return "TESTV";
				819	case SETV: return "SETV";
				820	default: VG_(panic)("nameUOpcode: unhandled case");
				821	}
				822	}
				823
				824
				825	void VG_(ppUInstr) ( Int instrNo, UInstr* u )
				826	{
				827	VG_(printf)("\t%4d: %s", instrNo,
				828	VG_(nameUOpcode)(True, u->opcode));
				829	if (u->opcode == JMP \|\| u->opcode == CC2VAL)
				830	VG_(printf)("%s", VG_(nameCondcode(u->cond)));
				831
				832	switch (u->size) {
				833	case 0: VG_(printf)("o"); break;
				834	case 1: VG_(printf)("B"); break;
				835	case 2: VG_(printf)("W"); break;
				836	case 4: VG_(printf)("L"); break;
				837	case 8: VG_(printf)("Q"); break;
				838	default: VG_(printf)("%d", (Int)u->size); break;
				839	}
				840
				841	switch (u->opcode) {
				842
				843	case TAG1:
				844	VG_(printf)("\t");
				845	ppUOperand(u, 1, 4, False);
				846	VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
				847	ppUOperand(u, 1, 4, False);
				848	VG_(printf)(" )");
				849	break;
				850
				851	case TAG2:
				852	VG_(printf)("\t");
				853	ppUOperand(u, 2, 4, False);
				854	VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
				855	ppUOperand(u, 1, 4, False);
				856	VG_(printf)(", ");
				857	ppUOperand(u, 2, 4, False);
				858	VG_(printf)(" )");
				859	break;
				860
				861	case CALLM_S: case CALLM_E:
				862	break;
				863
				864	case INCEIP:
				865	VG_(printf)("\t$%d", u->val1);
				866	break;
				867
				868	case LEA2:
				869	VG_(printf)("\t%d(" , u->lit32);
				870	ppUOperand(u, 1, 4, False);
				871	VG_(printf)(",");
				872	ppUOperand(u, 2, 4, False);
				873	VG_(printf)(",%d), ", (Int)u->extra4b);
				874	ppUOperand(u, 3, 4, False);
				875	break;
				876
				877	case LEA1:
				878	VG_(printf)("\t%d" , u->lit32);
				879	ppUOperand(u, 1, 4, True);
				880	VG_(printf)(", ");
				881	ppUOperand(u, 2, 4, False);
				882	break;
				883
				884	case NOP:
				885	break;
				886
				887	case FPU_W:
				888	VG_(printf)("\t0x%x:0x%x, ",
				889	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				890	ppUOperand(u, 2, 4, True);
				891	break;
				892
				893	case FPU_R:
				894	VG_(printf)("\t");
				895	ppUOperand(u, 2, 4, True);
				896	VG_(printf)(", 0x%x:0x%x",
				897	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				898	break;
				899
				900	case FPU:
				901	VG_(printf)("\t0x%x:0x%x",
				902	(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
				903	break;
				904
				905	case STOREV: case LOADV:
				906	case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
				907	VG_(printf)("\t");
				908	ppUOperand(u, 1, u->size, u->opcode==LOAD \|\| u->opcode==LOADV);
				909	VG_(printf)(", ");
				910	ppUOperand(u, 2, u->size, u->opcode==STORE \|\| u->opcode==STOREV);
				911	break;
				912
				913	case GETF: case PUTF:
				914	VG_(printf)("\t");
				915	ppUOperand(u, 1, u->size, False);
				916	break;
				917
				918	case JMP: case CC2VAL:
				919	case PUSH: case POP: case CLEAR: case CALLM:
sewardj	2e93c50	2002-04-12 11:12:52 +0000	[diff] [blame]	920	if (u->opcode == JMP) {
				921	switch (u->jmpkind) {
				922	case JmpCall: VG_(printf)("-c"); break;
				923	case JmpRet: VG_(printf)("-r"); break;
				924	case JmpSyscall: VG_(printf)("-sys"); break;
				925	case JmpClientReq: VG_(printf)("-cli"); break;
				926	default: break;
				927	}
				928	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	929	VG_(printf)("\t");
				930	ppUOperand(u, 1, u->size, False);
				931	break;
				932
				933	case JIFZ:
				934	VG_(printf)("\t");
				935	ppUOperand(u, 1, u->size, False);
				936	VG_(printf)(", ");
				937	ppUOperand(u, 2, u->size, False);
				938	break;
				939
				940	case PUTVF: case GETVF:
				941	VG_(printf)("\t");
				942	ppUOperand(u, 1, 0, False);
				943	break;
				944
				945	case NOT: case NEG: case INC: case DEC: case BSWAP:
				946	VG_(printf)("\t");
				947	ppUOperand(u, 1, u->size, False);
				948	break;
				949
				950	case ADD: case ADC: case AND: case OR:
				951	case XOR: case SUB: case SBB:
				952	case SHL: case SHR: case SAR:
				953	case ROL: case ROR: case RCL: case RCR:
				954	VG_(printf)("\t");
				955	ppUOperand(u, 1, u->size, False);
				956	VG_(printf)(", ");
				957	ppUOperand(u, 2, u->size, False);
				958	break;
				959
				960	case GETV: case PUTV:
				961	VG_(printf)("\t");
				962	ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
				963	VG_(printf)(", ");
				964	ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
				965	break;
				966
				967	case WIDEN:
				968	VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
				969	u->signed_widen?'s':'z');
				970	VG_(printf)("\t");
				971	ppUOperand(u, 1, u->size, False);
				972	break;
				973
				974	case TESTV: case SETV:
				975	VG_(printf)("\t");
				976	ppUOperand(u, 1, u->size, False);
				977	break;
				978
				979	default: VG_(panic)("ppUInstr: unhandled opcode");
				980	}
				981
				982	if (u->flags_r != FlagsEmpty \|\| u->flags_w != FlagsEmpty) {
				983	VG_(printf)(" (");
				984	if (u->flags_r != FlagsEmpty)
				985	vg_ppFlagSet("-r", u->flags_r);
				986	if (u->flags_w != FlagsEmpty)
				987	vg_ppFlagSet("-w", u->flags_w);
				988	VG_(printf)(")");
				989	}
				990	VG_(printf)("\n");
				991	}
				992
				993
				994	void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
				995	{
				996	Int i;
				997	VG_(printf)("\n%s\n", title);
				998	for (i = 0; i < cb->used; i++)
				999	if (0 \|\| cb->instrs[i].opcode != NOP)
				1000	VG_(ppUInstr) ( i, &cb->instrs[i] );
				1001	VG_(printf)("\n");
				1002	}
				1003
				1004
				1005	/------------------------------------------------------------/
				1006	/--- uinstr helpers for register allocation ---/
				1007	/--- and code improvement. ---/
				1008	/------------------------------------------------------------/
				1009
				1010	/* A structure for communicating temp uses, and for indicating
				1011	temp->real register mappings for patchUInstr. */
				1012	typedef
				1013	struct {
				1014	Int realNo;
				1015	Int tempNo;
				1016	Bool isWrite;
				1017	}
				1018	TempUse;
				1019
				1020
				1021	/* Get the temp use of a uinstr, parking them in an array supplied by
				1022	the caller, which is assumed to be big enough. Return the number
				1023	of entries. Insns which read _and_ write a register wind up
				1024	mentioning it twice. Entries are placed in the array in program
				1025	order, so that if a reg is read-modified-written, it appears first
				1026	as a read and then as a write.
				1027	*/
				1028	static __inline__
				1029	Int getTempUsage ( UInstr* u, TempUse* arr )
				1030	{
				1031
				1032	# define RD(ono) \
				1033	if (mycat(u->tag,ono) == TempReg) \
				1034	{ arr[n].tempNo = mycat(u->val,ono); \
				1035	arr[n].isWrite = False; n++; }
				1036	# define WR(ono) \
				1037	if (mycat(u->tag,ono) == TempReg) \
				1038	{ arr[n].tempNo = mycat(u->val,ono); \
				1039	arr[n].isWrite = True; n++; }
				1040
				1041	Int n = 0;
				1042	switch (u->opcode) {
				1043	case LEA1: RD(1); WR(2); break;
				1044	case LEA2: RD(1); RD(2); WR(3); break;
				1045
				1046	case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
				1047	case FPU_R: case FPU_W: RD(2); break;
				1048
				1049	case GETF: WR(1); break;
				1050	case PUTF: RD(1); break;
				1051
				1052	case GET: WR(2); break;
				1053	case PUT: RD(1); break;
				1054	case LOAD: RD(1); WR(2); break;
				1055	case STORE: RD(1); RD(2); break;
				1056	case MOV: RD(1); WR(2); break;
				1057
				1058	case JMP: RD(1); break;
				1059	case CLEAR: case CALLM: break;
				1060
				1061	case PUSH: RD(1); break;
				1062	case POP: WR(1); break;
				1063
				1064	case TAG2:
				1065	case CMOV:
				1066	case ADD: case ADC: case AND: case OR:
				1067	case XOR: case SUB: case SBB:
				1068	RD(1); RD(2); WR(2); break;
				1069
				1070	case SHL: case SHR: case SAR:
				1071	case ROL: case ROR: case RCL: case RCR:
				1072	RD(1); RD(2); WR(2); break;
				1073
				1074	case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
				1075	RD(1); WR(1); break;
				1076
				1077	case WIDEN: RD(1); WR(1); break;
				1078
				1079	case CC2VAL: WR(1); break;
				1080	case JIFZ: RD(1); break;
				1081
				1082	/* These sizes are only ever consulted when the instrumentation
				1083	code is being added, so the following can return
				1084	manifestly-bogus sizes. */
				1085	case LOADV: RD(1); WR(2); break;
				1086	case STOREV: RD(1); RD(2); break;
				1087	case GETV: WR(2); break;
				1088	case PUTV: RD(1); break;
				1089	case TESTV: RD(1); break;
				1090	case SETV: WR(1); break;
				1091	case PUTVF: RD(1); break;
				1092	case GETVF: WR(1); break;
				1093
				1094	default: VG_(panic)("getTempUsage: unhandled opcode");
				1095	}
				1096	return n;
				1097
				1098	# undef RD
				1099	# undef WR
				1100	}
				1101
				1102
				1103	/* Change temp regs in u into real regs, as directed by tmap. */
				1104	static __inline__
				1105	void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
				1106	{
				1107	Int i;
				1108	if (u->tag1 == TempReg) {
				1109	for (i = 0; i < n_tmap; i++)
				1110	if (tmap[i].tempNo == u->val1) break;
				1111	if (i == n_tmap) VG_(panic)("patchUInstr(1)");
				1112	u->tag1 = RealReg;
				1113	u->val1 = tmap[i].realNo;
				1114	}
				1115	if (u->tag2 == TempReg) {
				1116	for (i = 0; i < n_tmap; i++)
				1117	if (tmap[i].tempNo == u->val2) break;
				1118	if (i == n_tmap) VG_(panic)("patchUInstr(2)");
				1119	u->tag2 = RealReg;
				1120	u->val2 = tmap[i].realNo;
				1121	}
				1122	if (u->tag3 == TempReg) {
				1123	for (i = 0; i < n_tmap; i++)
				1124	if (tmap[i].tempNo == u->val3) break;
				1125	if (i == n_tmap) VG_(panic)("patchUInstr(3)");
				1126	u->tag3 = RealReg;
				1127	u->val3 = tmap[i].realNo;
				1128	}
				1129	}
				1130
				1131
				1132	/* Tedious x86-specific hack which compensates for the fact that the
				1133	register numbers for %ah .. %dh do not correspond to those for %eax
				1134	.. %edx. It maps a (reg size, reg no) pair to the number of the
				1135	containing 32-bit reg. */
				1136	static __inline__
				1137	Int containingArchRegOf ( Int sz, Int aregno )
				1138	{
				1139	switch (sz) {
				1140	case 4: return aregno;
				1141	case 2: return aregno;
				1142	case 1: return aregno >= 4 ? aregno-4 : aregno;
				1143	default: VG_(panic)("containingArchRegOf");
				1144	}
				1145	}
				1146
				1147
				1148	/* If u reads an ArchReg, return the number of the containing arch
				1149	reg. Otherwise return -1. Used in redundant-PUT elimination. */
				1150	static __inline__
				1151	Int maybe_uinstrReadsArchReg ( UInstr* u )
				1152	{
				1153	switch (u->opcode) {
				1154	case GET:
				1155	case ADD: case ADC: case AND: case OR:
				1156	case XOR: case SUB: case SBB:
				1157	case SHL: case SHR: case SAR: case ROL:
				1158	case ROR: case RCL: case RCR:
				1159	if (u->tag1 == ArchReg)
				1160	return containingArchRegOf ( u->size, u->val1 );
				1161	else
				1162	return -1;
				1163
				1164	case GETF: case PUTF:
				1165	case CALLM_S: case CALLM_E:
				1166	case INCEIP:
				1167	case LEA1:
				1168	case LEA2:
				1169	case NOP:
				1170	case PUT:
				1171	case LOAD:
				1172	case STORE:
				1173	case MOV:
				1174	case CMOV:
				1175	case JMP:
				1176	case CALLM: case CLEAR: case PUSH: case POP:
				1177	case NOT: case NEG: case INC: case DEC: case BSWAP:
				1178	case CC2VAL:
				1179	case JIFZ:
				1180	case FPU: case FPU_R: case FPU_W:
				1181	case WIDEN:
				1182	return -1;
				1183
				1184	default:
				1185	VG_(ppUInstr)(0,u);
				1186	VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
				1187	}
				1188	}
				1189
				1190	static __inline__
				1191	Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
				1192	{
				1193	Int i, k;
				1194	TempUse tempUse[3];
				1195	k = getTempUsage ( u, &tempUse[0] );
				1196	for (i = 0; i < k; i++)
				1197	if (tempUse[i].tempNo == tempreg)
				1198	return True;
				1199	return False;
				1200	}
				1201
				1202
				1203	/------------------------------------------------------------/
				1204	/--- ucode improvement. ---/
				1205	/------------------------------------------------------------/
				1206
				1207	/* Improve the code in cb by doing
				1208	-- Redundant ArchReg-fetch elimination
				1209	-- Redundant PUT elimination
				1210	-- Redundant cond-code restore/save elimination
				1211	The overall effect of these is to allow target registers to be
				1212	cached in host registers over multiple target insns.
				1213	*/
				1214	static void vg_improve ( UCodeBlock* cb )
				1215	{
				1216	Int i, j, k, m, n, ar, tr, told, actual_areg;
				1217	Int areg_map[8];
				1218	Bool annul_put[8];
				1219	TempUse tempUse[3];
				1220	UInstr* u;
				1221	Bool wr;
				1222	Int* last_live_before;
				1223	FlagSet future_dead_flags;
				1224
				1225	if (cb->nextTemp > 0)
				1226	last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
				1227	else
				1228	last_live_before = NULL;
				1229
				1230
				1231	/* PASS 1: redundant GET elimination. (Actually, more general than
				1232	that -- eliminates redundant fetches of ArchRegs). */
				1233
				1234	/* Find the live-range-ends for all temporaries. Duplicates code
				1235	in the register allocator :-( */
				1236
				1237	for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
				1238
				1239	for (i = cb->used-1; i >= 0; i--) {
				1240	u = &cb->instrs[i];
				1241
				1242	k = getTempUsage(u, &tempUse[0]);
				1243
				1244	/* For each temp usage ... bwds in program order. */
				1245	for (j = k-1; j >= 0; j--) {
				1246	tr = tempUse[j].tempNo;
				1247	wr = tempUse[j].isWrite;
				1248	if (last_live_before[tr] == -1) {
				1249	vg_assert(tr >= 0 && tr < cb->nextTemp);
				1250	last_live_before[tr] = wr ? (i+1) : i;
				1251	}
				1252	}
				1253
				1254	}
				1255
				1256	# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
				1257	{ Int q; \
				1258	/* Invalidate any old binding(s) to tempreg. */ \
				1259	for (q = 0; q < 8; q++) \
				1260	if (areg_map[q] == tempreg) areg_map[q] = -1; \
				1261	/* Add the new binding. */ \
				1262	areg_map[archreg] = (tempreg); \
				1263	}
				1264
				1265	/* Set up the A-reg map. */
				1266	for (i = 0; i < 8; i++) areg_map[i] = -1;
				1267
				1268	/* Scan insns. */
				1269	for (i = 0; i < cb->used; i++) {
				1270	u = &cb->instrs[i];
				1271	if (u->opcode == GET && u->size == 4) {
				1272	/* GET; see if it can be annulled. */
				1273	vg_assert(u->tag1 == ArchReg);
				1274	vg_assert(u->tag2 == TempReg);
				1275	ar = u->val1;
				1276	tr = u->val2;
				1277	told = areg_map[ar];
				1278	if (told != -1 && last_live_before[told] <= i) {
				1279	/* ar already has an old mapping to told, but that runs
				1280	out here. Annul this GET, rename tr to told for the
				1281	rest of the block, and extend told's live range to that
				1282	of tr. */
				1283	u->opcode = NOP;
				1284	u->tag1 = u->tag2 = NoValue;
				1285	n = last_live_before[tr] + 1;
				1286	if (n > cb->used) n = cb->used;
				1287	last_live_before[told] = last_live_before[tr];
				1288	last_live_before[tr] = i-1;
				1289	if (VG_(disassemble))
				1290	VG_(printf)(
				1291	"at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
				1292	i, tr, told,i+1, n-1);
				1293	for (m = i+1; m < n; m++) {
				1294	if (cb->instrs[m].tag1 == TempReg
				1295	&& cb->instrs[m].val1 == tr)
				1296	cb->instrs[m].val1 = told;
				1297	if (cb->instrs[m].tag2 == TempReg
				1298	&& cb->instrs[m].val2 == tr)
				1299	cb->instrs[m].val2 = told;
				1300	}
				1301	BIND_ARCH_TO_TEMP(ar,told);
				1302	}
				1303	else
				1304	BIND_ARCH_TO_TEMP(ar,tr);
				1305	}
				1306	else if (u->opcode == GET && u->size != 4) {
				1307	/* Invalidate any mapping for this archreg. */
				1308	actual_areg = containingArchRegOf ( u->size, u->val1 );
				1309	areg_map[actual_areg] = -1;
				1310	}
				1311	else if (u->opcode == PUT && u->size == 4) {
				1312	/* PUT; re-establish t -> a binding */
				1313	vg_assert(u->tag1 == TempReg);
				1314	vg_assert(u->tag2 == ArchReg);
				1315	BIND_ARCH_TO_TEMP(u->val2, u->val1);
				1316	}
				1317	else if (u->opcode == PUT && u->size != 4) {
				1318	/* Invalidate any mapping for this archreg. */
				1319	actual_areg = containingArchRegOf ( u->size, u->val2 );
				1320	areg_map[actual_areg] = -1;
				1321	} else {
				1322
				1323	/* see if insn has an archreg as a read operand; if so try to
				1324	map it. */
				1325	if (u->tag1 == ArchReg && u->size == 4
				1326	&& areg_map[u->val1] != -1) {
				1327	switch (u->opcode) {
				1328	case ADD: case SUB: case AND: case OR: case XOR:
				1329	case ADC: case SBB:
				1330	case SHL: case SHR: case SAR: case ROL: case ROR:
				1331	case RCL: case RCR:
				1332	if (VG_(disassemble))
				1333	VG_(printf)(
				1334	"at %d: change ArchReg %S to TempReg t%d\n",
				1335	i, nameIReg(4,u->val1), areg_map[u->val1]);
				1336	u->tag1 = TempReg;
				1337	u->val1 = areg_map[u->val1];
				1338	/* Remember to extend the live range of the TempReg,
				1339	if necessary. */
				1340	if (last_live_before[u->val1] < i)
				1341	last_live_before[u->val1] = i;
				1342	break;
				1343	default:
				1344	break;
				1345	}
				1346	}
				1347
				1348	/* boring insn; invalidate any mappings to temps it writes */
				1349	k = getTempUsage(u, &tempUse[0]);
				1350
				1351	for (j = 0; j < k; j++) {
				1352	wr = tempUse[j].isWrite;
				1353	if (!wr) continue;
				1354	tr = tempUse[j].tempNo;
				1355	for (m = 0; m < 8; m++)
				1356	if (areg_map[m] == tr) areg_map[m] = -1;
				1357	}
				1358	}
				1359
				1360	}
				1361
				1362	# undef BIND_ARCH_TO_TEMP
				1363
				1364	/* PASS 2: redundant PUT elimination. If doing instrumentation,
				1365	don't annul (delay) puts of %ESP, since the memory check
				1366	machinery always requires the in-memory value of %ESP to be up
				1367	to date.
				1368	*/
				1369	for (j = 0; j < 8; j++)
				1370	annul_put[j] = False;
				1371
				1372	for (i = cb->used-1; i >= 0; i--) {
				1373	u = &cb->instrs[i];
				1374	if (u->opcode == NOP) continue;
				1375
				1376	if (u->opcode == PUT && u->size == 4) {
				1377	vg_assert(u->tag2 == ArchReg);
				1378	actual_areg = containingArchRegOf ( 4, u->val2 );
				1379	if (annul_put[actual_areg]) {
				1380	u->opcode = NOP;
				1381	u->tag1 = u->tag2 = NoValue;
				1382	if (VG_(disassemble))
				1383	VG_(printf)("at %d: delete PUT\n", i );
				1384	} else {
				1385	if (!(VG_(clo_instrument) && actual_areg == R_ESP))
				1386	annul_put[actual_areg] = True;
				1387	}
				1388	}
				1389	else if (u->opcode == PUT && u->size != 4) {
				1390	actual_areg = containingArchRegOf ( u->size, u->val2 );
				1391	annul_put[actual_areg] = False;
				1392	}
				1393	else if (u->opcode == JMP \|\| u->opcode == JIFZ
				1394	\|\| u->opcode == CALLM) {
				1395	for (j = 0; j < 8; j++)
				1396	annul_put[j] = False;
				1397	}
				1398	else {
				1399	/* If an instruction reads an ArchReg, the immediately
				1400	preceding PUT cannot be annulled. */
				1401	actual_areg = maybe_uinstrReadsArchReg ( u );
				1402	if (actual_areg != -1)
				1403	annul_put[actual_areg] = False;
				1404	}
				1405	}
				1406
				1407	/* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
				1408	dead after this point, annul the MOV insn and rename t2 to t1.
				1409	Further modifies the last_live_before map. */
				1410
				1411	# if 0
				1412	VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
				1413	for (i = 0; i < cb->nextTemp; i++)
				1414	VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
				1415	VG_(printf)("\n");
				1416	# endif
				1417
				1418	for (i = 0; i < cb->used-1; i++) {
				1419	u = &cb->instrs[i];
				1420	if (u->opcode != MOV) continue;
				1421	if (u->tag1 == Literal) continue;
				1422	vg_assert(u->tag1 == TempReg);
				1423	vg_assert(u->tag2 == TempReg);
				1424	if (last_live_before[u->val1] == i) {
				1425	if (VG_(disassemble))
				1426	VG_(printf)(
				1427	"at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
				1428	i, u->val2, u->val1, i+1, last_live_before[u->val2] );
				1429	for (j = i+1; j <= last_live_before[u->val2]; j++) {
				1430	if (cb->instrs[j].tag1 == TempReg
				1431	&& cb->instrs[j].val1 == u->val2)
				1432	cb->instrs[j].val1 = u->val1;
				1433	if (cb->instrs[j].tag2 == TempReg
				1434	&& cb->instrs[j].val2 == u->val2)
				1435	cb->instrs[j].val2 = u->val1;
				1436	}
				1437	last_live_before[u->val1] = last_live_before[u->val2];
				1438	last_live_before[u->val2] = i-1;
				1439	u->opcode = NOP;
				1440	u->tag1 = u->tag2 = NoValue;
				1441	}
				1442	}
				1443
				1444	/* PASS 3: redundant condition-code restore/save elimination.
				1445	Scan backwards from the end. future_dead_flags records the set
				1446	of flags which are dead at this point, that is, will be written
				1447	before they are next read. Earlier uinsns which write flags
				1448	already in future_dead_flags can have their writes annulled.
				1449	*/
				1450	future_dead_flags = FlagsEmpty;
				1451
				1452	for (i = cb->used-1; i >= 0; i--) {
				1453	u = &cb->instrs[i];
				1454
				1455	/* We might never make it to insns beyond this one, so be
				1456	conservative. */
				1457	if (u->opcode == JIFZ \|\| u->opcode == JMP) {
				1458	future_dead_flags = FlagsEmpty;
				1459	continue;
				1460	}
				1461
				1462	/* We can annul the flags written by this insn if it writes a
				1463	subset (or eq) of the set of flags known to be dead after
				1464	this insn. If not, just record the flags also written by
				1465	this insn.*/
				1466	if (u->flags_w != FlagsEmpty
				1467	&& VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
				1468	if (VG_(disassemble)) {
				1469	VG_(printf)("at %d: annul flag write ", i);
				1470	vg_ppFlagSet("", u->flags_w);
				1471	VG_(printf)(" due to later ");
				1472	vg_ppFlagSet("", future_dead_flags);
				1473	VG_(printf)("\n");
				1474	}
				1475	u->flags_w = FlagsEmpty;
				1476	} else {
				1477	future_dead_flags
				1478	= VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
				1479	}
				1480
				1481	/* If this insn also reads flags, empty out future_dead_flags so
				1482	as to force preceding writes not to be annulled. */
				1483	if (u->flags_r != FlagsEmpty)
				1484	future_dead_flags = FlagsEmpty;
				1485	}
				1486
				1487	if (last_live_before)
				1488	VG_(jitfree) ( last_live_before );
				1489	}
				1490
				1491
				1492	/------------------------------------------------------------/
				1493	/--- The new register allocator. ---/
				1494	/------------------------------------------------------------/
				1495
				1496	typedef
				1497	struct {
				1498	/* Becomes live for the first time after this insn ... */
				1499	Int live_after;
				1500	/* Becomes dead for the last time after this insn ... */
				1501	Int dead_before;
				1502	/* The "home" spill slot, if needed. Never changes. */
				1503	Int spill_no;
				1504	/* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
				1505	Int real_no;
				1506	}
				1507	TempInfo;
				1508
				1509
				1510	/* Take a ucode block and allocate its TempRegs to RealRegs, or put
				1511	them in spill locations, and add spill code, if there are not
				1512	enough real regs. The usual register allocation deal, in short.
				1513
				1514	Important redundancy of representation:
				1515
				1516	real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
				1517	to VG_NOVALUE if the real reg has no currently assigned TempReg.
				1518
				1519	The .real_no field of a TempInfo gives the current RRR for
				1520	this TempReg, or VG_NOVALUE if the TempReg is currently
				1521	in memory, in which case it is in the SpillNo denoted by
				1522	spillno.
				1523
				1524	These pieces of information (a fwds-bwds mapping, really) must
				1525	be kept consistent!
				1526
				1527	This allocator uses the so-called Second Chance Bin Packing
				1528	algorithm, as described in "Quality and Speed in Linear-scan
				1529	Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
				1530	pp142-151). It is simple and fast and remarkably good at
				1531	minimising the amount of spill code introduced.
				1532	*/
				1533
				1534	static
				1535	UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
				1536	{
				1537	TempInfo* temp_info;
				1538	Int real_to_temp[VG_MAX_REALREGS];
				1539	Bool is_spill_cand[VG_MAX_REALREGS];
				1540	Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
				1541	Int i, j, k, m, r, tno, max_ss_no;
				1542	Bool wr, defer, isRead, spill_reqd;
				1543	TempUse tempUse[3];
				1544	UCodeBlock* c2;
				1545
				1546	/* Used to denote ... well, "no value" in this fn. */
				1547	# define VG_NOTHING (-2)
				1548
				1549	/* Initialise the TempReg info. */
				1550	if (c1->nextTemp > 0)
				1551	temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
				1552	else
				1553	temp_info = NULL;
				1554
				1555	for (i = 0; i < c1->nextTemp; i++) {
				1556	temp_info[i].live_after = VG_NOTHING;
				1557	temp_info[i].dead_before = VG_NOTHING;
				1558	temp_info[i].spill_no = VG_NOTHING;
				1559	/* temp_info[i].real_no is not yet relevant. */
				1560	}
				1561
				1562	spill_reqd = False;
				1563
				1564	/* Scan fwds to establish live ranges. */
				1565
				1566	for (i = 0; i < c1->used; i++) {
				1567	k = getTempUsage(&c1->instrs[i], &tempUse[0]);
				1568	vg_assert(k >= 0 && k <= 3);
				1569
				1570	/* For each temp usage ... fwds in program order */
				1571	for (j = 0; j < k; j++) {
				1572	tno = tempUse[j].tempNo;
				1573	wr = tempUse[j].isWrite;
				1574	if (wr) {
				1575	/* Writes hold a reg live until after this insn. */
				1576	if (temp_info[tno].live_after == VG_NOTHING)
				1577	temp_info[tno].live_after = i;
				1578	if (temp_info[tno].dead_before < i + 1)
				1579	temp_info[tno].dead_before = i + 1;
				1580	} else {
				1581	/* First use of a tmp should be a write. */
				1582	vg_assert(temp_info[tno].live_after != VG_NOTHING);
				1583	/* Reads only hold it live until before this insn. */
				1584	if (temp_info[tno].dead_before < i)
				1585	temp_info[tno].dead_before = i;
				1586	}
				1587	}
				1588	}
				1589
				1590	# if 0
				1591	/* Sanity check on live ranges. Expensive but correct. */
				1592	for (i = 0; i < c1->nextTemp; i++) {
				1593	vg_assert( (temp_info[i].live_after == VG_NOTHING
				1594	&& temp_info[i].dead_before == VG_NOTHING)
				1595	\|\| (temp_info[i].live_after != VG_NOTHING
				1596	&& temp_info[i].dead_before != VG_NOTHING) );
				1597	}
				1598	# endif
				1599
				1600	/* Do a rank-based allocation of TempRegs to spill slot numbers.
				1601	We put as few as possible values in spill slots, but
				1602	nevertheless need to have an assignment to them just in case. */
				1603
				1604	max_ss_no = -1;
				1605
				1606	for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
				1607	ss_busy_until_before[i] = 0;
				1608
				1609	for (i = 0; i < c1->nextTemp; i++) {
				1610
				1611	/* True iff this temp is unused. */
				1612	if (temp_info[i].live_after == VG_NOTHING)
				1613	continue;
				1614
				1615	/* Find the lowest-numbered spill slot which is available at the
				1616	start point of this interval, and assign the interval to
				1617	it. */
				1618	for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
				1619	if (ss_busy_until_before[j] <= temp_info[i].live_after)
				1620	break;
				1621	if (j == VG_MAX_SPILLSLOTS) {
				1622	VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
				1623	VG_(panic)("register allocation failed -- out of spill slots");
				1624	}
				1625	ss_busy_until_before[j] = temp_info[i].dead_before;
				1626	temp_info[i].spill_no = j;
				1627	if (j > max_ss_no)
				1628	max_ss_no = j;
				1629	}
				1630
				1631	VG_(total_reg_rank) += (max_ss_no+1);
				1632
				1633	/* Show live ranges and assigned spill slot nos. */
				1634
				1635	if (VG_(disassemble)) {
				1636	VG_(printf)("Live Range Assignments\n");
				1637
				1638	for (i = 0; i < c1->nextTemp; i++) {
				1639	if (temp_info[i].live_after == VG_NOTHING)
				1640	continue;
				1641	VG_(printf)(
				1642	" LR %d is after %d to before %d spillno %d\n",
				1643	i,
				1644	temp_info[i].live_after,
				1645	temp_info[i].dead_before,
				1646	temp_info[i].spill_no
				1647	);
				1648	}
				1649	}
				1650
				1651	/* Now that we've established a spill slot number for each used
				1652	temporary, we can go ahead and do the core of the "Second-chance
				1653	binpacking" allocation algorithm. */
				1654
				1655	/* Resulting code goes here. We generate it all in a forwards
				1656	pass. */
				1657	c2 = allocCodeBlock();
				1658
				1659	/* At the start, no TempRegs are assigned to any real register.
				1660	Correspondingly, all temps claim to be currently resident in
				1661	their spill slots, as computed by the previous two passes. */
				1662	for (i = 0; i < VG_MAX_REALREGS; i++)
				1663	real_to_temp[i] = VG_NOTHING;
				1664	for (i = 0; i < c1->nextTemp; i++)
				1665	temp_info[i].real_no = VG_NOTHING;
				1666
				1667	if (VG_(disassemble))
				1668	VG_(printf)("\n");
				1669
				1670	/* Process each insn in turn. */
				1671	for (i = 0; i < c1->used; i++) {
				1672
				1673	if (c1->instrs[i].opcode == NOP) continue;
				1674	VG_(uinstrs_prealloc)++;
				1675
				1676	# if 0
				1677	/* Check map consistency. Expensive but correct. */
				1678	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1679	if (real_to_temp[r] != VG_NOTHING) {
				1680	tno = real_to_temp[r];
				1681	vg_assert(tno >= 0 && tno < c1->nextTemp);
				1682	vg_assert(temp_info[tno].real_no == r);
				1683	}
				1684	}
				1685	for (tno = 0; tno < c1->nextTemp; tno++) {
				1686	if (temp_info[tno].real_no != VG_NOTHING) {
				1687	r = temp_info[tno].real_no;
				1688	vg_assert(r >= 0 && r < VG_MAX_REALREGS);
				1689	vg_assert(real_to_temp[r] == tno);
				1690	}
				1691	}
				1692	# endif
				1693
				1694	if (VG_(disassemble))
				1695	VG_(ppUInstr)(i, &c1->instrs[i]);
				1696
				1697	/* First, free up enough real regs for this insn. This may
				1698	generate spill stores since we may have to evict some TempRegs
				1699	currently in real regs. Also generates spill loads. */
				1700
				1701	k = getTempUsage(&c1->instrs[i], &tempUse[0]);
				1702	vg_assert(k >= 0 && k <= 3);
				1703
				1704	/* For each *different* temp mentioned in the insn .... */
				1705	for (j = 0; j < k; j++) {
				1706
				1707	/* First check if the temp is mentioned again later; if so,
				1708	ignore this mention. We only want to process each temp
				1709	used by the insn once, even if it is mentioned more than
				1710	once. */
				1711	defer = False;
				1712	tno = tempUse[j].tempNo;
				1713	for (m = j+1; m < k; m++)
				1714	if (tempUse[m].tempNo == tno)
				1715	defer = True;
				1716	if (defer)
				1717	continue;
				1718
				1719	/* Now we're trying to find a register for tempUse[j].tempNo.
				1720	First of all, if it already has a register assigned, we
				1721	don't need to do anything more. */
				1722	if (temp_info[tno].real_no != VG_NOTHING)
				1723	continue;
				1724
				1725	/* No luck. The next thing to do is see if there is a
				1726	currently unassigned register available. If so, bag it. */
				1727	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1728	if (real_to_temp[r] == VG_NOTHING)
				1729	break;
				1730	}
				1731	if (r < VG_MAX_REALREGS) {
				1732	real_to_temp[r] = tno;
				1733	temp_info[tno].real_no = r;
				1734	continue;
				1735	}
				1736
				1737	/* Unfortunately, that didn't pan out either. So we'll have
				1738	to eject some other unfortunate TempReg into a spill slot
				1739	in order to free up a register. Of course, we need to be
				1740	careful not to eject some other TempReg needed by this
				1741	insn.
				1742
				1743	Select r in 0 .. VG_MAX_REALREGS-1 such that
				1744	real_to_temp[r] is not mentioned in
				1745	tempUse[0 .. k-1].tempNo, since it would be just plain
				1746	wrong to eject some other TempReg which we need to use in
				1747	this insn.
				1748
				1749	It is here that it is important to make a good choice of
				1750	register to spill. */
				1751
				1752	/* First, mark those regs which are not spill candidates. */
				1753	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1754	is_spill_cand[r] = True;
				1755	for (m = 0; m < k; m++) {
				1756	if (real_to_temp[r] == tempUse[m].tempNo) {
				1757	is_spill_cand[r] = False;
				1758	break;
				1759	}
				1760	}
				1761	}
				1762
				1763	/* We can choose any r satisfying is_spill_cand[r]. However,
				1764	try to make a good choice. First, try and find r such
				1765	that the associated TempReg is already dead. */
				1766	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1767	if (is_spill_cand[r] &&
				1768	temp_info[real_to_temp[r]].dead_before <= i)
				1769	goto have_spill_cand;
				1770	}
				1771
				1772	/* No spill cand is mapped to a dead TempReg. Now we really
				1773	_do_ have to generate spill code. Choose r so that the
				1774	next use of its associated TempReg is as far ahead as
				1775	possible, in the hope that this will minimise the number of
				1776	consequent reloads required. This is a bit expensive, but
				1777	we don't have to do it very often. */
				1778	{
				1779	Int furthest_r = VG_MAX_REALREGS;
				1780	Int furthest = 0;
				1781	for (r = 0; r < VG_MAX_REALREGS; r++) {
				1782	if (!is_spill_cand[r]) continue;
				1783	for (m = i+1; m < c1->used; m++)
				1784	if (uInstrMentionsTempReg(&c1->instrs[m],
				1785	real_to_temp[r]))
				1786	break;
				1787	if (m > furthest) {
				1788	furthest = m;
				1789	furthest_r = r;
				1790	}
				1791	}
				1792	r = furthest_r;
				1793	goto have_spill_cand;
				1794	}
				1795
				1796	have_spill_cand:
				1797	if (r == VG_MAX_REALREGS)
				1798	VG_(panic)("new reg alloc: out of registers ?!");
				1799
				1800	/* Eject r. Important refinement: don't bother if the
				1801	associated TempReg is now dead. */
				1802	vg_assert(real_to_temp[r] != VG_NOTHING);
				1803	vg_assert(real_to_temp[r] != tno);
				1804	temp_info[real_to_temp[r]].real_no = VG_NOTHING;
				1805	if (temp_info[real_to_temp[r]].dead_before > i) {
				1806	uInstr2(c2, PUT, 4,
				1807	RealReg, VG_(rankToRealRegNo)(r),
				1808	SpillNo, temp_info[real_to_temp[r]].spill_no);
				1809	VG_(uinstrs_spill)++;
				1810	spill_reqd = True;
				1811	if (VG_(disassemble))
				1812	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1813	}
				1814
				1815	/* Decide if tno is read. */
				1816	isRead = False;
				1817	for (m = 0; m < k; m++)
				1818	if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
				1819	isRead = True;
				1820
				1821	/* If so, generate a spill load. */
				1822	if (isRead) {
				1823	uInstr2(c2, GET, 4,
				1824	SpillNo, temp_info[tno].spill_no,
				1825	RealReg, VG_(rankToRealRegNo)(r) );
				1826	VG_(uinstrs_spill)++;
				1827	spill_reqd = True;
				1828	if (VG_(disassemble))
				1829	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1830	}
				1831
				1832	/* Update the forwards and backwards maps. */
				1833	real_to_temp[r] = tno;
				1834	temp_info[tno].real_no = r;
				1835	}
				1836
				1837	/* By this point, all TempRegs mentioned by the insn have been
				1838	bought into real regs. We now copy the insn to the output
				1839	and use patchUInstr to convert its rTempRegs into
				1840	realregs. */
				1841	for (j = 0; j < k; j++)
				1842	tempUse[j].realNo
				1843	= VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
				1844	copyUInstr(c2, &c1->instrs[i]);
				1845	patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
				1846
				1847	if (VG_(disassemble)) {
				1848	VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
				1849	VG_(printf)("\n");
				1850	}
				1851	}
				1852
				1853	if (temp_info != NULL)
				1854	VG_(jitfree)(temp_info);
				1855
				1856	freeCodeBlock(c1);
				1857
				1858	if (spill_reqd)
				1859	VG_(translations_needing_spill)++;
				1860
				1861	return c2;
				1862
				1863	# undef VG_NOTHING
				1864
				1865	}
				1866
				1867
				1868	/------------------------------------------------------------/
				1869	/--- New instrumentation machinery. ---/
				1870	/------------------------------------------------------------/
				1871
				1872	static
				1873	VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
				1874	{
				1875	switch (sz) {
				1876	case 4: return VgT_ImproveOR4_TQ;
				1877	case 2: return VgT_ImproveOR2_TQ;
				1878	case 1: return VgT_ImproveOR1_TQ;
				1879	default: VG_(panic)("get_VgT_ImproveOR_TQ");
				1880	}
				1881	}
				1882
				1883
				1884	static
				1885	VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
				1886	{
				1887	switch (sz) {
				1888	case 4: return VgT_ImproveAND4_TQ;
				1889	case 2: return VgT_ImproveAND2_TQ;
				1890	case 1: return VgT_ImproveAND1_TQ;
				1891	default: VG_(panic)("get_VgT_ImproveAND_TQ");
				1892	}
				1893	}
				1894
				1895
				1896	static
				1897	VgTagOp get_VgT_Left ( Int sz )
				1898	{
				1899	switch (sz) {
				1900	case 4: return VgT_Left4;
				1901	case 2: return VgT_Left2;
				1902	case 1: return VgT_Left1;
				1903	default: VG_(panic)("get_VgT_Left");
				1904	}
				1905	}
				1906
				1907
				1908	static
				1909	VgTagOp get_VgT_UifU ( Int sz )
				1910	{
				1911	switch (sz) {
				1912	case 4: return VgT_UifU4;
				1913	case 2: return VgT_UifU2;
				1914	case 1: return VgT_UifU1;
				1915	case 0: return VgT_UifU0;
				1916	default: VG_(panic)("get_VgT_UifU");
				1917	}
				1918	}
				1919
				1920
				1921	static
				1922	VgTagOp get_VgT_DifD ( Int sz )
				1923	{
				1924	switch (sz) {
				1925	case 4: return VgT_DifD4;
				1926	case 2: return VgT_DifD2;
				1927	case 1: return VgT_DifD1;
				1928	default: VG_(panic)("get_VgT_DifD");
				1929	}
				1930	}
				1931
				1932
				1933	static
				1934	VgTagOp get_VgT_PCast ( Int szs, Int szd )
				1935	{
				1936	if (szs == 4 && szd == 0) return VgT_PCast40;
				1937	if (szs == 2 && szd == 0) return VgT_PCast20;
				1938	if (szs == 1 && szd == 0) return VgT_PCast10;
				1939	if (szs == 0 && szd == 1) return VgT_PCast01;
				1940	if (szs == 0 && szd == 2) return VgT_PCast02;
				1941	if (szs == 0 && szd == 4) return VgT_PCast04;
				1942	if (szs == 1 && szd == 4) return VgT_PCast14;
				1943	if (szs == 1 && szd == 2) return VgT_PCast12;
				1944	if (szs == 1 && szd == 1) return VgT_PCast11;
				1945	VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
				1946	VG_(panic)("get_VgT_PCast");
				1947	}
				1948
				1949
				1950	static
				1951	VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
				1952	{
				1953	if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
				1954	if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
				1955
				1956	if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
				1957	if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
				1958
				1959	if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
				1960	if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
				1961
				1962	VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
				1963	VG_(panic)("get_VgT_Widen");
				1964	}
				1965
				1966	/* Pessimally cast the spec'd shadow from one size to another. */
				1967	static
				1968	void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
				1969	{
				1970	if (szs == 0 && szd == 0)
				1971	return;
				1972	uInstr3(cb, TAG1, 0, TempReg, tempreg,
				1973	NoValue, 0,
				1974	Lit16, get_VgT_PCast(szs,szd));
				1975	}
				1976
				1977
				1978	/* Create a signed or unsigned widen of the spec'd shadow from one
				1979	size to another. The only allowed size transitions are 1->2, 1->4
				1980	and 2->4. */
				1981	static
				1982	void create_Widen ( UCodeBlock* cb, Bool signed_widen,
				1983	Int szs, Int szd, Int tempreg )
				1984	{
				1985	if (szs == szd) return;
				1986	uInstr3(cb, TAG1, 0, TempReg, tempreg,
				1987	NoValue, 0,
				1988	Lit16, get_VgT_Widen(signed_widen,szs,szd));
				1989	}
				1990
				1991
				1992	/* Get the condition codes into a new shadow, at the given size. */
				1993	static
				1994	Int create_GETVF ( UCodeBlock* cb, Int sz )
				1995	{
				1996	Int tt = newShadow(cb);
				1997	uInstr1(cb, GETVF, 0, TempReg, tt);
				1998	create_PCast(cb, 0, sz, tt);
				1999	return tt;
				2000	}
				2001
				2002
				2003	/* Save the condition codes from the spec'd shadow. */
				2004	static
				2005	void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
				2006	{
				2007	if (sz == 0) {
				2008	uInstr1(cb, PUTVF, 0, TempReg, tempreg);
				2009	} else {
				2010	Int tt = newShadow(cb);
				2011	uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
				2012	create_PCast(cb, sz, 0, tt);
				2013	uInstr1(cb, PUTVF, 0, TempReg, tt);
				2014	}
				2015	}
				2016
				2017
				2018	/* Do Left on the spec'd shadow. */
				2019	static
				2020	void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
				2021	{
				2022	uInstr3(cb, TAG1, 0,
				2023	TempReg, tempreg,
				2024	NoValue, 0,
				2025	Lit16, get_VgT_Left(sz));
				2026	}
				2027
				2028
				2029	/* Do UifU on ts and td, putting the result in td. */
				2030	static
				2031	void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
				2032	{
				2033	uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
				2034	Lit16, get_VgT_UifU(sz));
				2035	}
				2036
				2037
				2038	/* Do DifD on ts and td, putting the result in td. */
				2039	static
				2040	void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
				2041	{
				2042	uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
				2043	Lit16, get_VgT_DifD(sz));
				2044	}
				2045
				2046
				2047	/* Do HelpAND on value tval and tag tqqq, putting the result in
				2048	tqqq. */
				2049	static
				2050	void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
				2051	{
				2052	uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
				2053	Lit16, get_VgT_ImproveAND_TQ(sz));
				2054	}
				2055
				2056
				2057	/* Do HelpOR on value tval and tag tqqq, putting the result in
				2058	tqqq. */
				2059	static
				2060	void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
				2061	{
				2062	uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
				2063	Lit16, get_VgT_ImproveOR_TQ(sz));
				2064	}
				2065
				2066
				2067	/* Get the shadow for an operand described by (tag, val). Emit code
				2068	to do this and return the identity of the shadow holding the
				2069	result. The result tag is always copied into a new shadow, so it
				2070	can be modified without trashing the original.*/
				2071	static
				2072	Int /* TempReg / getOperandShadow ( UCodeBlock cb,
				2073	Int sz, Int tag, Int val )
				2074	{
				2075	Int sh;
				2076	sh = newShadow(cb);
				2077	if (tag == TempReg) {
				2078	uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
				2079	return sh;
				2080	}
				2081	if (tag == Literal) {
				2082	uInstr1(cb, SETV, sz, TempReg, sh);
				2083	return sh;
				2084	}
				2085	if (tag == ArchReg) {
				2086	uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
				2087	return sh;
				2088	}
				2089	VG_(panic)("getOperandShadow");
				2090	}
				2091
				2092
				2093
				2094	/* Create and return an instrumented version of cb_in. Free cb_in
				2095	before returning. */
				2096	static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
				2097	{
				2098	UCodeBlock* cb;
				2099	Int i, j;
				2100	UInstr* u_in;
				2101	Int qs, qd, qt, qtt;
				2102	cb = allocCodeBlock();
				2103	cb->nextTemp = cb_in->nextTemp;
				2104
				2105	for (i = 0; i < cb_in->used; i++) {
				2106	qs = qd = qt = qtt = INVALID_TEMPREG;
				2107	u_in = &cb_in->instrs[i];
				2108
				2109	/* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
				2110
				2111	/* VG_(ppUInstr)(0, u_in); */
				2112	switch (u_in->opcode) {
				2113
				2114	case NOP:
				2115	break;
				2116
				2117	case INCEIP:
				2118	copyUInstr(cb, u_in);
				2119	break;
				2120
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2121	/* Loads and stores. Test the V bits for the address. 24
				2122	Mar 02: since the address is A-checked anyway, there's not
				2123	really much point in doing the V-check too, unless you
				2124	think that you might use addresses which are undefined but
				2125	still addressible. Hence the optionalisation of the V
				2126	check.
				2127
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2128	The LOADV/STOREV does an addressibility check for the
				2129	address. */
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2130
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2131	case LOAD:
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2132	if (VG_(clo_check_addrVs)) {
				2133	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2134	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2135	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2136	uInstr2(cb, LOADV, u_in->size,
				2137	TempReg, u_in->val1,
				2138	TempReg, SHADOW(u_in->val2));
				2139	copyUInstr(cb, u_in);
				2140	break;
				2141	case STORE:
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2142	if (VG_(clo_check_addrVs)) {
				2143	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
				2144	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
				2145	}
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2146	uInstr2(cb, STOREV, u_in->size,
				2147	TempReg, SHADOW(u_in->val1),
				2148	TempReg, u_in->val2);
				2149	copyUInstr(cb, u_in);
				2150	break;
				2151
				2152	/* Moving stuff around. Make the V bits follow accordingly,
				2153	but don't do anything else. */
				2154
				2155	case GET:
				2156	uInstr2(cb, GETV, u_in->size,
				2157	ArchReg, u_in->val1,
				2158	TempReg, SHADOW(u_in->val2));
				2159	copyUInstr(cb, u_in);
				2160	break;
				2161	case PUT:
				2162	uInstr2(cb, PUTV, u_in->size,
				2163	TempReg, SHADOW(u_in->val1),
				2164	ArchReg, u_in->val2);
				2165	copyUInstr(cb, u_in);
				2166	break;
				2167
				2168	case GETF:
				2169	/* This is not the smartest way to do it, but should work. */
				2170	qd = create_GETVF(cb, u_in->size);
				2171	uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
				2172	copyUInstr(cb, u_in);
				2173	break;
				2174	case PUTF:
				2175	create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
				2176	copyUInstr(cb, u_in);
				2177	break;
				2178
				2179	case MOV:
				2180	switch (u_in->tag1) {
				2181	case TempReg:
				2182	uInstr2(cb, MOV, 4,
				2183	TempReg, SHADOW(u_in->val1),
				2184	TempReg, SHADOW(u_in->val2));
				2185	break;
				2186	case Literal:
				2187	uInstr1(cb, SETV, u_in->size,
				2188	TempReg, SHADOW(u_in->val2));
				2189	break;
				2190	default:
				2191	VG_(panic)("vg_instrument: MOV");
				2192	}
				2193	copyUInstr(cb, u_in);
				2194	break;
				2195
				2196	/* Special case of add, where one of the operands is a literal.
				2197	lea1(t) = t + some literal.
				2198	Therefore: lea1#(qa) = left(qa)
				2199	*/
				2200	case LEA1:
				2201	vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
				2202	qs = SHADOW(u_in->val1);
				2203	qd = SHADOW(u_in->val2);
				2204	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
				2205	create_Left(cb, u_in->size, qd);
				2206	copyUInstr(cb, u_in);
				2207	break;
				2208
				2209	/* Another form of add.
				2210	lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
				2211	and is 0,1,2 or 3.
				2212	lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
				2213	Note, subtly, that the shift puts zeroes at the bottom of qt,
				2214	meaning Valid, since the corresponding shift of tt puts
				2215	zeroes at the bottom of tb.
				2216	*/
				2217	case LEA2: {
				2218	Int shift;
				2219	vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
				2220	switch (u_in->extra4b) {
				2221	case 1: shift = 0; break;
				2222	case 2: shift = 1; break;
				2223	case 4: shift = 2; break;
				2224	case 8: shift = 3; break;
				2225	default: VG_(panic)( "vg_instrument(LEA2)" );
				2226	}
				2227	qs = SHADOW(u_in->val1);
				2228	qt = SHADOW(u_in->val2);
				2229	qd = SHADOW(u_in->val3);
				2230	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
				2231	if (shift > 0) {
				2232	uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
				2233	uLiteral(cb, shift);
				2234	}
				2235	create_UifU(cb, 4, qs, qd);
				2236	create_Left(cb, u_in->size, qd);
				2237	copyUInstr(cb, u_in);
				2238	break;
				2239	}
				2240
				2241	/* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
				2242	case INC: case DEC:
				2243	qd = SHADOW(u_in->val1);
				2244	create_Left(cb, u_in->size, qd);
				2245	if (u_in->flags_w != FlagsEmpty)
				2246	create_PUTVF(cb, u_in->size, qd);
				2247	copyUInstr(cb, u_in);
				2248	break;
				2249
				2250	/* This is a HACK (approximation :-) */
				2251	/* rcl#/rcr#(qs,qd)
				2252	= let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
				2253	eflags# = q0
				2254	qd =pcast-0-sz(q0)
				2255	Ie, cast everything down to a single bit, then back up.
				2256	This assumes that any bad bits infect the whole word and
				2257	the eflags.
				2258	*/
				2259	case RCL: case RCR:
				2260	vg_assert(u_in->flags_r != FlagsEmpty);
				2261	/* The following assertion looks like it makes sense, but is
				2262	actually wrong. Consider this:
				2263	rcll %eax
				2264	imull %eax, %eax
				2265	The rcll writes O and C but so does the imull, so the O and C
				2266	write of the rcll is annulled by the prior improvement pass.
				2267	Noticed by Kevin Ryde <user42@zip.com.au>
				2268	*/
				2269	/* vg_assert(u_in->flags_w != FlagsEmpty); */
				2270	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2271	/* We can safely modify qs; cast it to 0-size. */
				2272	create_PCast(cb, u_in->size, 0, qs);
				2273	qd = SHADOW(u_in->val2);
				2274	create_PCast(cb, u_in->size, 0, qd);
				2275	/* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
				2276	create_UifU(cb, 0, qs, qd);
				2277	/* qs is now free; reuse it for the flag definedness. */
				2278	qs = create_GETVF(cb, 0);
				2279	create_UifU(cb, 0, qs, qd);
				2280	create_PUTVF(cb, 0, qd);
				2281	create_PCast(cb, 0, u_in->size, qd);
				2282	copyUInstr(cb, u_in);
				2283	break;
				2284
				2285	/* for OP in shl shr sar rol ror
				2286	(qs is shift count#, qd is value to be OP#d)
				2287	OP(ts,td)
				2288	OP#(qs,qd)
				2289	= pcast-1-sz(qs) `UifU` OP(ts,qd)
				2290	So we apply OP to the tag bits too, and then UifU with
				2291	the shift count# to take account of the possibility of it
				2292	being undefined.
				2293
				2294	A bit subtle:
				2295	ROL/ROR rearrange the tag bits as per the value bits.
				2296	SHL/SHR shifts zeroes into the value, and corresponding
				2297	zeroes indicating Definedness into the tag.
				2298	SAR copies the top bit of the value downwards, and therefore
				2299	SAR also copies the definedness of the top bit too.
				2300	So in all five cases, we just apply the same op to the tag
				2301	bits as is applied to the value bits. Neat!
				2302	*/
				2303	case SHL:
				2304	case SHR: case SAR:
				2305	case ROL: case ROR: {
				2306	Int t_amount = INVALID_TEMPREG;
				2307	vg_assert(u_in->tag1 == TempReg \|\| u_in->tag1 == Literal);
				2308	vg_assert(u_in->tag2 == TempReg);
				2309	qd = SHADOW(u_in->val2);
				2310
				2311	/* Make qs hold shift-count# and make
				2312	t_amount be a TempReg holding the shift count. */
				2313	if (u_in->tag1 == Literal) {
				2314	t_amount = newTemp(cb);
				2315	uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
				2316	uLiteral(cb, u_in->lit32);
				2317	qs = SHADOW(t_amount);
				2318	uInstr1(cb, SETV, 1, TempReg, qs);
				2319	} else {
				2320	t_amount = u_in->val1;
				2321	qs = SHADOW(u_in->val1);
				2322	}
				2323
				2324	uInstr2(cb, u_in->opcode,
				2325	u_in->size,
				2326	TempReg, t_amount,
				2327	TempReg, qd);
				2328	qt = newShadow(cb);
				2329	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
				2330	create_PCast(cb, 1, u_in->size, qt);
				2331	create_UifU(cb, u_in->size, qt, qd);
				2332	copyUInstr(cb, u_in);
				2333	break;
				2334	}
				2335
				2336	/* One simple tag operation. */
				2337	case WIDEN:
				2338	vg_assert(u_in->tag1 == TempReg);
				2339	create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
				2340	SHADOW(u_in->val1));
				2341	copyUInstr(cb, u_in);
				2342	break;
				2343
				2344	/* not#(x) = x (since bitwise independent) */
				2345	case NOT:
				2346	vg_assert(u_in->tag1 == TempReg);
				2347	copyUInstr(cb, u_in);
				2348	break;
				2349
				2350	/* neg#(x) = left(x) (derivable from case for SUB) */
				2351	case NEG:
				2352	vg_assert(u_in->tag1 == TempReg);
				2353	create_Left(cb, u_in->size, SHADOW(u_in->val1));
				2354	copyUInstr(cb, u_in);
				2355	break;
				2356
				2357	/* bswap#(x) = bswap(x) */
				2358	case BSWAP:
				2359	vg_assert(u_in->tag1 == TempReg);
				2360	vg_assert(u_in->size == 4);
				2361	qd = SHADOW(u_in->val1);
				2362	uInstr1(cb, BSWAP, 4, TempReg, qd);
				2363	copyUInstr(cb, u_in);
				2364	break;
				2365
				2366	/* cc2val#(qd) = pcast-0-to-size(eflags#) */
				2367	case CC2VAL:
				2368	vg_assert(u_in->tag1 == TempReg);
				2369	vg_assert(u_in->flags_r != FlagsEmpty);
				2370	qt = create_GETVF(cb, u_in->size);
				2371	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
				2372	copyUInstr(cb, u_in);
				2373	break;
				2374
				2375	/* cmov#(qs,qd) = cmov(qs,qd)
				2376	That is, do the cmov of tags using the same flags as for
				2377	the data (obviously). However, first do a test on the
				2378	validity of the flags.
				2379	*/
				2380	case CMOV:
				2381	vg_assert(u_in->size == 4);
				2382	vg_assert(u_in->tag1 == TempReg);
				2383	vg_assert(u_in->tag2 == TempReg);
				2384	vg_assert(u_in->flags_r != FlagsEmpty);
				2385	vg_assert(u_in->flags_w == FlagsEmpty);
				2386	qs = SHADOW(u_in->val1);
				2387	qd = SHADOW(u_in->val2);
				2388	qt = create_GETVF(cb, 0);
				2389	uInstr1(cb, TESTV, 0, TempReg, qt);
				2390	/* qt should never be referred to again. Nevertheless
				2391	... */
				2392	uInstr1(cb, SETV, 0, TempReg, qt);
				2393
				2394	uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
				2395	LAST_UINSTR(cb).cond = u_in->cond;
				2396	LAST_UINSTR(cb).flags_r = u_in->flags_r;
				2397
				2398	copyUInstr(cb, u_in);
				2399	break;
				2400
				2401	/* add#/sub#(qs,qd)
				2402	= qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
				2403	= left(qs) `UifU` left(qd)
				2404	= left(qs `UifU` qd)
				2405	adc#/sbb#(qs,qd)
				2406	= left(qs `UifU` qd) `UifU` pcast(eflags#)
				2407	Second arg (dest) is TempReg.
				2408	First arg (src) is Literal or TempReg or ArchReg.
				2409	*/
				2410	case ADD: case SUB:
				2411	case ADC: case SBB:
				2412	qd = SHADOW(u_in->val2);
				2413	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2414	create_UifU(cb, u_in->size, qs, qd);
				2415	create_Left(cb, u_in->size, qd);
				2416	if (u_in->opcode == ADC \|\| u_in->opcode == SBB) {
				2417	vg_assert(u_in->flags_r != FlagsEmpty);
				2418	qt = create_GETVF(cb, u_in->size);
				2419	create_UifU(cb, u_in->size, qt, qd);
				2420	}
				2421	if (u_in->flags_w != FlagsEmpty) {
				2422	create_PUTVF(cb, u_in->size, qd);
				2423	}
				2424	copyUInstr(cb, u_in);
				2425	break;
				2426
				2427	/* xor#(qs,qd) = qs `UifU` qd */
				2428	case XOR:
				2429	qd = SHADOW(u_in->val2);
				2430	qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
				2431	create_UifU(cb, u_in->size, qs, qd);
				2432	if (u_in->flags_w != FlagsEmpty) {
				2433	create_PUTVF(cb, u_in->size, qd);
				2434	}
				2435	copyUInstr(cb, u_in);
				2436	break;
				2437
				2438	/* and#/or#(qs,qd)
				2439	= (qs `UifU` qd) `DifD` improve(vs,qs)
				2440	`DifD` improve(vd,qd)
				2441	where improve is the relevant one of
				2442	Improve{AND,OR}_TQ
				2443	Use the following steps, with qt as a temp:
				2444	qt = improve(vd,qd)
				2445	qd = qs `UifU` qd
				2446	qd = qt `DifD` qd
				2447	qt = improve(vs,qs)
				2448	qd = qt `DifD` qd
				2449	*/
				2450	case AND: case OR:
				2451	vg_assert(u_in->tag1 == TempReg);
				2452	vg_assert(u_in->tag2 == TempReg);
				2453	qd = SHADOW(u_in->val2);
				2454	qs = SHADOW(u_in->val1);
				2455	qt = newShadow(cb);
				2456
				2457	/* qt = improve(vd,qd) */
				2458	uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
				2459	if (u_in->opcode == AND)
				2460	create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
				2461	else
				2462	create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
				2463	/* qd = qs `UifU` qd */
				2464	create_UifU(cb, u_in->size, qs, qd);
				2465	/* qd = qt `DifD` qd */
				2466	create_DifD(cb, u_in->size, qt, qd);
				2467	/* qt = improve(vs,qs) */
				2468	uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
				2469	if (u_in->opcode == AND)
				2470	create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
				2471	else
				2472	create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
				2473	/* qd = qt `DifD` qd */
				2474	create_DifD(cb, u_in->size, qt, qd);
				2475	/* So, finally qd is the result tag. */
				2476	if (u_in->flags_w != FlagsEmpty) {
				2477	create_PUTVF(cb, u_in->size, qd);
				2478	}
				2479	copyUInstr(cb, u_in);
				2480	break;
				2481
				2482	/* Machinery to do with supporting CALLM. Copy the start and
				2483	end markers only to make the result easier to read
				2484	(debug); they generate no code and have no effect.
				2485	*/
				2486	case CALLM_S: case CALLM_E:
				2487	copyUInstr(cb, u_in);
				2488	break;
				2489
				2490	/* Copy PUSH and POP verbatim. Arg/result absval
				2491	calculations are done when the associated CALL is
				2492	processed. CLEAR has no effect on absval calculations but
				2493	needs to be copied.
				2494	*/
				2495	case PUSH: case POP: case CLEAR:
				2496	copyUInstr(cb, u_in);
				2497	break;
				2498
				2499	/* In short:
				2500	callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
				2501	We have to decide on a size to do the computation at,
				2502	although the choice doesn't affect correctness. We will
				2503	do a pcast to the final size anyway, so the only important
				2504	factor is to choose a size which minimises the total
				2505	number of casts needed. Valgrind: just use size 0,
				2506	regardless. It may not be very good for performance
				2507	but does simplify matters, mainly by reducing the number
				2508	of different pessimising casts which have to be implemented.
				2509	*/
				2510	case CALLM: {
				2511	UInstr* uu;
				2512	Bool res_used;
				2513
				2514	/* Now generate the code. Get the final result absval
				2515	into qt. */
				2516	qt = newShadow(cb);
				2517	qtt = newShadow(cb);
				2518	uInstr1(cb, SETV, 0, TempReg, qt);
				2519	for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
				2520	uu = & cb_in->instrs[j];
				2521	if (uu->opcode != PUSH) continue;
				2522	/* cast via a temporary */
				2523	uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
				2524	TempReg, qtt);
				2525	create_PCast(cb, uu->size, 0, qtt);
				2526	create_UifU(cb, 0, qtt, qt);
				2527	}
				2528	/* Remembering also that flags read count as inputs. */
				2529	if (u_in->flags_r != FlagsEmpty) {
				2530	qtt = create_GETVF(cb, 0);
				2531	create_UifU(cb, 0, qtt, qt);
				2532	}
				2533
				2534	/* qt now holds the result tag. If any results from the
				2535	call are used, either by fetching with POP or
				2536	implicitly by writing the flags, we copy the result
				2537	absval to the relevant location. If not used, the call
				2538	must have been for its side effects, so we test qt here
				2539	and now. Note that this assumes that all values
				2540	removed by POP continue to be live. So dead args
				2541	must be removed with CLEAR, not by POPping them into
				2542	a dummy tempreg.
				2543	*/
				2544	res_used = False;
				2545	for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
				2546	uu = & cb_in->instrs[j];
				2547	if (uu->opcode != POP) continue;
				2548	/* Cast via a temp. */
				2549	uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
				2550	create_PCast(cb, 0, uu->size, qtt);
				2551	uInstr2(cb, MOV, 4, TempReg, qtt,
				2552	TempReg, SHADOW(uu->val1));
				2553	res_used = True;
				2554	}
				2555	if (u_in->flags_w != FlagsEmpty) {
				2556	create_PUTVF(cb, 0, qt);
				2557	res_used = True;
				2558	}
				2559	if (!res_used) {
				2560	uInstr1(cb, TESTV, 0, TempReg, qt);
				2561	/* qt should never be referred to again. Nevertheless
				2562	... */
				2563	uInstr1(cb, SETV, 0, TempReg, qt);
				2564	}
				2565	copyUInstr(cb, u_in);
				2566	break;
				2567	}
				2568	/* Whew ... */
				2569
				2570	case JMP:
				2571	if (u_in->tag1 == TempReg) {
				2572	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2573	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2574	} else {
				2575	vg_assert(u_in->tag1 == Literal);
				2576	}
				2577	if (u_in->cond != CondAlways) {
				2578	vg_assert(u_in->flags_r != FlagsEmpty);
				2579	qt = create_GETVF(cb, 0);
				2580	uInstr1(cb, TESTV, 0, TempReg, qt);
				2581	/* qt should never be referred to again. Nevertheless
				2582	... */
				2583	uInstr1(cb, SETV, 0, TempReg, qt);
				2584	}
				2585	copyUInstr(cb, u_in);
				2586	break;
				2587
				2588	case JIFZ:
				2589	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
				2590	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
				2591	copyUInstr(cb, u_in);
				2592	break;
				2593
				2594	/* Emit a check on the address used. For FPU_R, the value
				2595	loaded into the FPU is checked at the time it is read from
				2596	memory (see synth_fpu_mem_check_actions). */
				2597	case FPU_R: case FPU_W:
				2598	vg_assert(u_in->tag2 == TempReg);
				2599	uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
				2600	uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
				2601	copyUInstr(cb, u_in);
				2602	break;
				2603
				2604	/* For FPU insns not referencing memory, just copy thru. */
				2605	case FPU:
				2606	copyUInstr(cb, u_in);
				2607	break;
				2608
				2609	default:
				2610	VG_(ppUInstr)(0, u_in);
				2611	VG_(panic)( "vg_instrument: unhandled case");
				2612
				2613	} /* end of switch (u_in->opcode) */
				2614
				2615	} /* end of for loop */
				2616
				2617	freeCodeBlock(cb_in);
				2618	return cb;
				2619	}
				2620
				2621	/------------------------------------------------------------/
				2622	/--- Clean up mem check instrumentation. ---/
				2623	/------------------------------------------------------------/
				2624
				2625	#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
				2626	#define VGC_UNDEF ((UChar)100)
				2627	#define VGC_VALUE ((UChar)101)
				2628
				2629	#define NOP_no_msg(uu) \
				2630	do { uu->opcode = NOP; } while (False)
				2631
				2632	#define NOP_tag1_op(uu) \
				2633	do { uu->opcode = NOP; \
				2634	if (VG_(disassemble)) \
				2635	VG_(printf)("at %d: delete %s due to defd arg\n", \
				2636	i, VG_(nameOfTagOp(u->val3))); \
				2637	} while (False)
				2638
				2639	#define SETV_tag1_op(uu,newsz) \
				2640	do { uu->opcode = SETV; \
				2641	uu->size = newsz; \
				2642	uu->tag2 = uu->tag3 = NoValue; \
				2643	if (VG_(disassemble)) \
				2644	VG_(printf)("at %d: convert %s to SETV%d " \
				2645	"due to defd arg\n", \
				2646	i, VG_(nameOfTagOp(u->val3)), newsz); \
				2647	} while (False)
				2648
				2649
				2650
				2651	/* Run backwards and delete SETVs on shadow temps for which the next
				2652	action is a write. Needs an env saying whether or not the next
				2653	action is a write. The supplied UCodeBlock is destructively
				2654	modified.
				2655	*/
				2656	static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
				2657	{
				2658	Bool* next_is_write;
				2659	Int i, j, k, n_temps;
				2660	UInstr* u;
				2661	TempUse tempUse[3];
				2662
				2663	n_temps = cb->nextTemp;
				2664	if (n_temps == 0) return;
				2665
				2666	next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
				2667
				2668	for (i = 0; i < n_temps; i++) next_is_write[i] = True;
				2669
				2670	for (i = cb->used-1; i >= 0; i--) {
				2671	u = &cb->instrs[i];
				2672
sewardj	97ced73	2002-03-25 00:07:36 +0000	[diff] [blame]	2673	/* If we're not checking address V bits, there will be a lot of
				2674	GETVs, TAG1s and TAG2s calculating values which are never
				2675	used. These first three cases get rid of them. */
				2676
				2677	if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
				2678	&& next_is_write[u->val2]
				2679	&& !VG_(clo_check_addrVs)) {
				2680	u->opcode = NOP;
				2681	u->size = 0;
				2682	if (VG_(disassemble))
				2683	VG_(printf)("at %d: delete GETV\n", i);
				2684	} else
				2685
				2686	if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
				2687	&& next_is_write[u->val1]
				2688	&& !VG_(clo_check_addrVs)) {
				2689	u->opcode = NOP;
				2690	u->size = 0;
				2691	if (VG_(disassemble))
				2692	VG_(printf)("at %d: delete TAG1\n", i);
				2693	} else
				2694
				2695	if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
				2696	&& next_is_write[u->val2]
				2697	&& !VG_(clo_check_addrVs)) {
				2698	u->opcode = NOP;
				2699	u->size = 0;
				2700	if (VG_(disassemble))
				2701	VG_(printf)("at %d: delete TAG2\n", i);
				2702	} else
				2703
				2704	/* We do the rest of these regardless of whether or not
				2705	addresses are V-checked. */
				2706
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	2707	if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
				2708	&& next_is_write[u->val2]) {
				2709	/* This MOV is pointless because the target is dead at this
				2710	point. Delete it. */
				2711	u->opcode = NOP;
				2712	u->size = 0;
				2713	if (VG_(disassemble))
				2714	VG_(printf)("at %d: delete MOV\n", i);
				2715	} else
				2716
				2717	if (u->opcode == SETV) {
				2718	if (u->tag1 == TempReg) {
				2719	vg_assert(VGC_IS_SHADOW(u->val1));
				2720	if (next_is_write[u->val1]) {
				2721	/* This write is pointless, so annul it. */
				2722	u->opcode = NOP;
				2723	u->size = 0;
				2724	if (VG_(disassemble))
				2725	VG_(printf)("at %d: delete SETV\n", i);
				2726	} else {
				2727	/* This write has a purpose; don't annul it, but do
				2728	notice that we did it. */
				2729	next_is_write[u->val1] = True;
				2730	}
				2731
				2732	}
				2733
				2734	} else {
				2735	/* Find out what this insn does to the temps. */
				2736	k = getTempUsage(u, &tempUse[0]);
				2737	vg_assert(k <= 3);
				2738	for (j = k-1; j >= 0; j--) {
				2739	next_is_write[ tempUse[j].tempNo ]
				2740	= tempUse[j].isWrite;
				2741	}
				2742	}
				2743
				2744	}
				2745
				2746	VG_(jitfree)(next_is_write);
				2747	}
				2748
				2749
				2750	/* Run forwards, propagating and using the is-completely-defined
				2751	property. This removes a lot of redundant tag-munging code.
				2752	Unfortunately it requires intimate knowledge of how each uinstr and
				2753	tagop modifies its arguments. This duplicates knowledge of uinstr
				2754	tempreg uses embodied in getTempUsage(), which is unfortunate.
				2755	The supplied UCodeBlock* is modified in-place.
				2756
				2757	For each value temp, def[] should hold VGC_VALUE.
				2758
				2759	For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
				2760	definitely known to be fully defined at that size. In all other
				2761	circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
				2762	undefined. In cases of doubt, VGC_UNDEF is always safe.
				2763	*/
				2764	static void vg_propagate_definedness ( UCodeBlock* cb )
				2765	{
				2766	UChar* def;
				2767	Int i, j, k, t, n_temps;
				2768	UInstr* u;
				2769	TempUse tempUse[3];
				2770
				2771	n_temps = cb->nextTemp;
				2772	if (n_temps == 0) return;
				2773
				2774	def = VG_(jitmalloc)(n_temps * sizeof(UChar));
				2775	for (i = 0; i < n_temps; i++)
				2776	def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
				2777
				2778	/* Run forwards, detecting and using the all-defined property. */
				2779
				2780	for (i = 0; i < cb->used; i++) {
				2781	u = &cb->instrs[i];
				2782	switch (u->opcode) {
				2783
				2784	/* Tag-handling uinstrs. */
				2785
				2786	/* Deal with these quickly. */
				2787	case NOP:
				2788	case INCEIP:
				2789	break;
				2790
				2791	/* Make a tag defined. */
				2792	case SETV:
				2793	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2794	def[u->val1] = u->size;
				2795	break;
				2796
				2797	/* Check definedness of a tag. */
				2798	case TESTV:
				2799	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2800	if (def[u->val1] <= 4) {
				2801	vg_assert(def[u->val1] == u->size);
				2802	NOP_no_msg(u);
				2803	if (VG_(disassemble))
				2804	VG_(printf)("at %d: delete TESTV on defd arg\n", i);
				2805	}
				2806	break;
				2807
				2808	/* Applies to both values and tags. Propagate Definedness
				2809	property through copies. Note that this isn't optional;
				2810	we have to do this to keep def[] correct. */
				2811	case MOV:
				2812	vg_assert(u->tag2 == TempReg);
				2813	if (u->tag1 == TempReg) {
				2814	if (VGC_IS_SHADOW(u->val1)) {
				2815	vg_assert(VGC_IS_SHADOW(u->val2));
				2816	def[u->val2] = def[u->val1];
				2817	}
				2818	}
				2819	break;
				2820
				2821	case PUTV:
				2822	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2823	if (def[u->val1] <= 4) {
				2824	vg_assert(def[u->val1] == u->size);
				2825	u->tag1 = Literal;
				2826	u->val1 = 0;
				2827	switch (u->size) {
				2828	case 4: u->lit32 = 0x00000000; break;
				2829	case 2: u->lit32 = 0xFFFF0000; break;
				2830	case 1: u->lit32 = 0xFFFFFF00; break;
				2831	default: VG_(panic)("vg_cleanup(PUTV)");
				2832	}
				2833	if (VG_(disassemble))
				2834	VG_(printf)(
				2835	"at %d: propagate definedness into PUTV\n", i);
				2836	}
				2837	break;
				2838
				2839	case STOREV:
				2840	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2841	if (def[u->val1] <= 4) {
				2842	vg_assert(def[u->val1] == u->size);
				2843	u->tag1 = Literal;
				2844	u->val1 = 0;
				2845	switch (u->size) {
				2846	case 4: u->lit32 = 0x00000000; break;
				2847	case 2: u->lit32 = 0xFFFF0000; break;
				2848	case 1: u->lit32 = 0xFFFFFF00; break;
				2849	default: VG_(panic)("vg_cleanup(STOREV)");
				2850	}
				2851	if (VG_(disassemble))
				2852	VG_(printf)(
				2853	"at %d: propagate definedness into STandV\n", i);
				2854	}
				2855	break;
				2856
				2857	/* Nothing interesting we can do with this, I think. */
				2858	case PUTVF:
				2859	break;
				2860
				2861	/* Tag handling operations. */
				2862	case TAG2:
				2863	vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
				2864	vg_assert(u->tag3 == Lit16);
				2865	/* Ultra-paranoid "type" checking. */
				2866	switch (u->val3) {
				2867	case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
				2868	case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
				2869	case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
				2870	vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
				2871	break;
				2872	default:
				2873	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2874	break;
				2875	}
				2876	switch (u->val3) {
				2877	Int sz;
				2878	case VgT_UifU4:
				2879	sz = 4; goto do_UifU;
				2880	case VgT_UifU2:
				2881	sz = 2; goto do_UifU;
				2882	case VgT_UifU1:
				2883	sz = 1; goto do_UifU;
				2884	case VgT_UifU0:
				2885	sz = 0; goto do_UifU;
				2886	do_UifU:
				2887	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2888	vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
				2889	if (def[u->val1] <= 4) {
				2890	/* UifU. The first arg is defined, so result is
				2891	simply second arg. Delete this operation. */
				2892	vg_assert(def[u->val1] == sz);
				2893	NOP_no_msg(u);
				2894	if (VG_(disassemble))
				2895	VG_(printf)(
				2896	"at %d: delete UifU%d due to defd arg1\n",
				2897	i, sz);
				2898	}
				2899	else
				2900	if (def[u->val2] <= 4) {
				2901	/* UifU. The second arg is defined, so result is
				2902	simply first arg. Copy to second. */
				2903	vg_assert(def[u->val2] == sz);
				2904	u->opcode = MOV;
				2905	u->size = 4;
				2906	u->tag3 = NoValue;
				2907	def[u->val2] = def[u->val1];
				2908	if (VG_(disassemble))
				2909	VG_(printf)(
				2910	"at %d: change UifU%d to MOV due to defd"
				2911	" arg2\n",
				2912	i, sz);
				2913	}
				2914	break;
				2915	case VgT_ImproveAND4_TQ:
				2916	sz = 4; goto do_ImproveAND;
				2917	case VgT_ImproveAND1_TQ:
				2918	sz = 1; goto do_ImproveAND;
				2919	do_ImproveAND:
				2920	/* Implements Q = T OR Q. So if Q is entirely defined,
				2921	ie all 0s, we get MOV T, Q. */
				2922	if (def[u->val2] <= 4) {
				2923	vg_assert(def[u->val2] == sz);
				2924	u->size = 4; /* Regardless of sz */
				2925	u->opcode = MOV;
				2926	u->tag3 = NoValue;
				2927	def[u->val2] = VGC_UNDEF;
				2928	if (VG_(disassemble))
				2929	VG_(printf)(
				2930	"at %d: change ImproveAND%d_TQ to MOV due "
				2931	"to defd arg2\n",
				2932	i, sz);
				2933	}
				2934	break;
				2935	default:
				2936	goto unhandled;
				2937	}
				2938	break;
				2939
				2940	case TAG1:
				2941	vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
				2942	if (def[u->val1] > 4) break;
				2943	/* We now know that the arg to the op is entirely defined.
				2944	If the op changes the size of the arg, we must replace
				2945	it with a SETV at the new size. If it doesn't change
				2946	the size, we can delete it completely. */
				2947	switch (u->val3) {
				2948	/* Maintain the same size ... */
				2949	case VgT_Left4:
				2950	vg_assert(def[u->val1] == 4);
				2951	NOP_tag1_op(u);
				2952	break;
				2953	case VgT_PCast11:
				2954	vg_assert(def[u->val1] == 1);
				2955	NOP_tag1_op(u);
				2956	break;
				2957	/* Change size ... */
				2958	case VgT_PCast40:
				2959	vg_assert(def[u->val1] == 4);
				2960	SETV_tag1_op(u,0);
				2961	def[u->val1] = 0;
				2962	break;
				2963	case VgT_PCast14:
				2964	vg_assert(def[u->val1] == 1);
				2965	SETV_tag1_op(u,4);
				2966	def[u->val1] = 4;
				2967	break;
				2968	case VgT_PCast12:
				2969	vg_assert(def[u->val1] == 1);
				2970	SETV_tag1_op(u,2);
				2971	def[u->val1] = 2;
				2972	break;
				2973	case VgT_PCast10:
				2974	vg_assert(def[u->val1] == 1);
				2975	SETV_tag1_op(u,0);
				2976	def[u->val1] = 0;
				2977	break;
				2978	case VgT_PCast02:
				2979	vg_assert(def[u->val1] == 0);
				2980	SETV_tag1_op(u,2);
				2981	def[u->val1] = 2;
				2982	break;
				2983	default:
				2984	goto unhandled;
				2985	}
				2986	if (VG_(disassemble))
				2987	VG_(printf)(
				2988	"at %d: delete TAG1 %s due to defd arg\n",
				2989	i, VG_(nameOfTagOp(u->val3)));
				2990	break;
				2991
				2992	default:
				2993	unhandled:
				2994	/* We don't know how to handle this uinstr. Be safe, and
				2995	set to VGC_VALUE or VGC_UNDEF all temps written by it. */
				2996	k = getTempUsage(u, &tempUse[0]);
				2997	vg_assert(k <= 3);
				2998	for (j = 0; j < k; j++) {
				2999	t = tempUse[j].tempNo;
				3000	vg_assert(t >= 0 && t < n_temps);
				3001	if (!tempUse[j].isWrite) {
				3002	/* t is read; ignore it. */
				3003	if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
				3004	VG_(printf)("ignoring def %d at %s %s\n",
				3005	def[t],
				3006	VG_(nameUOpcode)(True, u->opcode),
				3007	(u->opcode == TAG1 \|\| u->opcode == TAG2)
				3008	? VG_(nameOfTagOp)(u->val3)
				3009	: (Char*)"");
				3010	} else {
				3011	/* t is written; better nullify it. */
				3012	def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
				3013	}
				3014	}
				3015	}
				3016	}
				3017
				3018	VG_(jitfree)(def);
				3019	}
				3020
				3021
				3022	/* Top level post-instrumentation cleanup function. */
				3023	static void vg_cleanup ( UCodeBlock* cb )
				3024	{
				3025	vg_propagate_definedness ( cb );
				3026	vg_delete_redundant_SETVs ( cb );
				3027	}
				3028
				3029
				3030	/------------------------------------------------------------/
				3031	/--- Main entry point for the JITter. ---/
				3032	/------------------------------------------------------------/
				3033
				3034	/* Translate the basic block beginning at orig_addr, placing the
				3035	translation in a vg_malloc'd block, the address and size of which
				3036	are returned in trans_addr and trans_size. Length of the original
				3037	block is also returned in orig_size. If the latter three are NULL,
				3038	this call is being done for debugging purposes, in which case (a)
				3039	throw away the translation once it is made, and (b) produce a load
				3040	of debugging output.
				3041	*/
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	3042	void VG_(translate) ( ThreadState* tst,
				3043	/* Identity of thread needing this block */
				3044	Addr orig_addr,
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3045	UInt* orig_size,
				3046	Addr* trans_addr,
				3047	UInt* trans_size )
				3048	{
				3049	Int n_disassembled_bytes, final_code_size;
				3050	Bool debugging_translation;
				3051	UChar* final_code;
				3052	UCodeBlock* cb;
				3053
				3054	VGP_PUSHCC(VgpTranslate);
				3055	debugging_translation
				3056	= orig_size == NULL \|\| trans_addr == NULL \|\| trans_size == NULL;
				3057
				3058	dis = True;
				3059	dis = debugging_translation;
				3060
				3061	/* Check if we're being asked to jump to a silly address, and if so
				3062	record an error message before potentially crashing the entire
				3063	system. */
				3064	if (VG_(clo_instrument) && !debugging_translation && !dis) {
				3065	Addr bad_addr;
				3066	Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
				3067	if (!ok) {
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	3068	VG_(record_jump_error)(tst, bad_addr);
sewardj	de4a1d0	2002-03-22 01:27:54 +0000	[diff] [blame]	3069	}
				3070	}
				3071
				3072	/* if (VG_(overall_in_count) >= 4800) dis=True; */
				3073	if (VG_(disassemble))
				3074	VG_(printf)("\n");
				3075	if (0 \|\| dis
				3076	\|\| (VG_(overall_in_count) > 0 &&
				3077	(VG_(overall_in_count) % 1000 == 0))) {
				3078	if (0&& (VG_(clo_verbosity) > 1 \|\| dis))
				3079	VG_(message)(Vg_UserMsg,
				3080	"trans# %d, bb# %lu, in %d, out %d",
				3081	VG_(overall_in_count),
				3082	VG_(bbs_done),
				3083	VG_(overall_in_osize), VG_(overall_in_tsize),
				3084	orig_addr );
				3085	}
				3086	cb = allocCodeBlock();
				3087
				3088	/* Disassemble this basic block into cb. */
				3089	VGP_PUSHCC(VgpToUCode);
				3090	n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
				3091	VGP_POPCC;
				3092	/* dis=True; */
				3093	/* if (0&& VG_(translations_done) < 617) */
				3094	/* dis=False; */
				3095	/* Try and improve the code a bit. */
				3096	if (VG_(clo_optimise)) {
				3097	VGP_PUSHCC(VgpImprove);
				3098	vg_improve ( cb );
				3099	if (VG_(disassemble))
				3100	VG_(ppUCodeBlock) ( cb, "Improved code:" );
				3101	VGP_POPCC;
				3102	}
				3103	/* dis=False; */
				3104	/* Add instrumentation code. */
				3105	if (VG_(clo_instrument)) {
				3106	VGP_PUSHCC(VgpInstrument);
				3107	cb = vg_instrument(cb);
				3108	VGP_POPCC;
				3109	if (VG_(disassemble))
				3110	VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
				3111	if (VG_(clo_cleanup)) {
				3112	VGP_PUSHCC(VgpCleanup);
				3113	vg_cleanup(cb);
				3114	VGP_POPCC;
				3115	if (VG_(disassemble))
				3116	VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
				3117	}
				3118	}
				3119
				3120	/* Allocate registers. */
				3121	VGP_PUSHCC(VgpRegAlloc);
				3122	cb = vg_do_register_allocation ( cb );
				3123	VGP_POPCC;
				3124	/* dis=False; */
				3125	/*
				3126	if (VG_(disassemble))
				3127	VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
				3128	*/
				3129
				3130	VGP_PUSHCC(VgpFromUcode);
				3131	/* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
				3132	and so must be VG_(jitfree)'d. */
				3133	final_code = VG_(emit_code)(cb, &final_code_size );
				3134	VGP_POPCC;
				3135	freeCodeBlock(cb);
				3136
				3137	if (debugging_translation) {
				3138	/* Only done for debugging -- throw away final result. */
				3139	VG_(jitfree)(final_code);
				3140	} else {
				3141	/* Doing it for real -- return values to caller. */
				3142	*orig_size = n_disassembled_bytes;
				3143	*trans_addr = (Addr)final_code;
				3144	*trans_size = final_code_size;
				3145	}
				3146	VGP_POPCC;
				3147	}
				3148
				3149	/--------------------------------------------------------------------/
				3150	/--- end vg_translate.c ---/
				3151	/--------------------------------------------------------------------/