Blame - vg_scheduler.c - fp2-dev/platform/external/valgrind

blob: cb7d5a943c09b925016c5af8308a7f47bf62290e [file] [log] [blame]

sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- A user-space pthreads implementation. vg_scheduler.c ---/
				4	/--------------------------------------------------------------------/
				5
				6	/*
				7	This file is part of Valgrind, an x86 protected-mode emulator
				8	designed for debugging and profiling binaries on x86-Unixes.
				9
				10	Copyright (C) 2000-2002 Julian Seward
				11	jseward@acm.org
				12	Julian_Seward@muraroa.demon.co.uk
				13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				27	02111-1307, USA.
				28
				29	The GNU General Public License is contained in the file LICENSE.
				30	*/
				31
				32	#include "vg_include.h"
				33	#include "vg_constants.h"
				34
				35	#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
				36	VG_USERREQ__DO_LEAK_CHECK */
				37
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame^]	38	/* BORKAGE/ISSUES as of 14 Apr 02
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	39
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame^]	40	Note! This pthreads implementation is so poor as to not be
				41	suitable for use by anyone at all!
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	42
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame^]	43	- Currently, when a signal is run, just the ThreadStatus.status fields
				44	are saved in the signal frame, along with the CPU state. Question:
				45	should I also save and restore:
				46	ThreadStatus.joiner
				47	ThreadStatus.waited_on_mid
				48	ThreadStatus.awaken_at
				49	ThreadStatus.retval
				50	Currently unsure, and so am not doing so.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	51
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame^]	52	- Signals interrupting read/write and nanosleep: SA_RESTART settings.
				53	Read/write correctly return with EINTR when SA_RESTART isn't
				54	specified and they are interrupted by a signal. nanosleep just
				55	pretends signals don't exist -- should be fixed.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	56
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	57	- when a thread is done mark its stack as noaccess
				58
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	59	- 0xDEADBEEF syscall errors ... fix.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	60
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	61	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	62
				63
				64	/* ---------------------------------------------------------------------
				65	Types and globals for the scheduler.
				66	------------------------------------------------------------------ */
				67
				68	/* type ThreadId is defined in vg_include.h. */
				69
				70	/* struct ThreadState is defined in vg_include.h. */
				71
				72	/* Private globals. A statically allocated array of threads. */
				73	static ThreadState vg_threads[VG_N_THREADS];
				74
				75
				76	/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
				77	jmp_buf VG_(scheduler_jmpbuf);
				78	/* ... and if so, here's the signal which caused it to do so. */
				79	Int VG_(longjmpd_on_signal);
				80
				81
				82	/* Machinery to keep track of which threads are waiting on which
				83	fds. */
				84	typedef
				85	struct {
				86	/* The thread which made the request. */
				87	ThreadId tid;
				88
				89	/* The next two fields describe the request. */
				90	/* File descriptor waited for. -1 means this slot is not in use */
				91	Int fd;
				92	/* The syscall number the fd is used in. */
				93	Int syscall_no;
				94
				95	/* False => still waiting for select to tell us the fd is ready
				96	to go. True => the fd is ready, but the results have not yet
				97	been delivered back to the calling thread. Once the latter
				98	happens, this entire record is marked as no longer in use, by
				99	making the fd field be -1. */
				100	Bool ready;
				101	}
				102	VgWaitedOnFd;
				103
				104	static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
				105
				106
				107
				108	typedef
				109	struct {
				110	/* Is this slot in use, or free? */
				111	Bool in_use;
				112	/* If in_use, is this mutex held by some thread, or not? */
				113	Bool held;
				114	/* if held==True, owner indicates who by. */
				115	ThreadId owner;
				116	}
				117	VgMutex;
				118
				119	static VgMutex vg_mutexes[VG_N_MUTEXES];
				120
				121	/* Forwards */
				122	static void do_nontrivial_clientreq ( ThreadId tid );
				123
				124
				125	/* ---------------------------------------------------------------------
				126	Helper functions for the scheduler.
				127	------------------------------------------------------------------ */
				128
				129	static
				130	void pp_sched_status ( void )
				131	{
				132	Int i;
				133	VG_(printf)("\nsched status:\n");
				134	for (i = 0; i < VG_N_THREADS; i++) {
				135	if (vg_threads[i].status == VgTs_Empty) continue;
				136	VG_(printf)("tid %d: ", i);
				137	switch (vg_threads[i].status) {
				138	case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
				139	case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
				140	case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
				141	vg_threads[i].joiner); break;
				142	case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
				143	default: VG_(printf)("???"); break;
				144	}
				145	}
				146	VG_(printf)("\n");
				147	}
				148
				149	static
				150	void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
				151	{
				152	Int i;
				153
				154	vg_assert(fd != -1); /* avoid total chaos */
				155
				156	for (i = 0; i < VG_N_WAITING_FDS; i++)
				157	if (vg_waiting_fds[i].fd == -1)
				158	break;
				159
				160	if (i == VG_N_WAITING_FDS)
				161	VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
				162	/*
				163	VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
				164	tid, fd, i);
				165	*/
				166	vg_waiting_fds[i].fd = fd;
				167	vg_waiting_fds[i].tid = tid;
				168	vg_waiting_fds[i].ready = False;
				169	vg_waiting_fds[i].syscall_no = syscall_no;
				170	}
				171
				172
				173
				174	static
				175	void print_sched_event ( ThreadId tid, Char* what )
				176	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	177	VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
				178	}
				179
				180
				181	static
				182	void print_pthread_event ( ThreadId tid, Char* what )
				183	{
				184	VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	185	}
				186
				187
				188	static
				189	Char* name_of_sched_event ( UInt event )
				190	{
				191	switch (event) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	192	case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
				193	case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
				194	case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
				195	case VG_TRC_INNER_FASTMISS: return "FASTMISS";
				196	case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
				197	default: return "??UNKNOWN??";
				198	}
				199	}
				200
				201
				202	/* Create a translation of the client basic block beginning at
				203	orig_addr, and add it to the translation cache & translation table.
				204	This probably doesn't really belong here, but, hey ...
				205	*/
				206	void VG_(create_translation_for) ( Addr orig_addr )
				207	{
				208	Addr trans_addr;
				209	TTEntry tte;
				210	Int orig_size, trans_size;
				211	/* Ensure there is space to hold a translation. */
				212	VG_(maybe_do_lru_pass)();
				213	VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
				214	/* Copy data at trans_addr into the translation cache.
				215	Returned pointer is to the code, not to the 4-byte
				216	header. */
				217	/* Since the .orig_size and .trans_size fields are
				218	UShort, be paranoid. */
				219	vg_assert(orig_size > 0 && orig_size < 65536);
				220	vg_assert(trans_size > 0 && trans_size < 65536);
				221	tte.orig_size = orig_size;
				222	tte.orig_addr = orig_addr;
				223	tte.trans_size = trans_size;
				224	tte.trans_addr = VG_(copy_to_transcache)
				225	( trans_addr, trans_size );
				226	tte.mru_epoch = VG_(current_epoch);
				227	/* Free the intermediary -- was allocated by VG_(emit_code). */
				228	VG_(jitfree)( (void*)trans_addr );
				229	/* Add to trans tab and set back pointer. */
				230	VG_(add_to_trans_tab) ( &tte );
				231	/* Update stats. */
				232	VG_(this_epoch_in_count) ++;
				233	VG_(this_epoch_in_osize) += orig_size;
				234	VG_(this_epoch_in_tsize) += trans_size;
				235	VG_(overall_in_count) ++;
				236	VG_(overall_in_osize) += orig_size;
				237	VG_(overall_in_tsize) += trans_size;
				238	/* Record translated area for SMC detection. */
				239	VG_(smc_mark_original) ( orig_addr, orig_size );
				240	}
				241
				242
				243	/* Allocate a completely empty ThreadState record. */
				244	static
				245	ThreadId vg_alloc_ThreadState ( void )
				246	{
				247	Int i;
				248	for (i = 0; i < VG_N_THREADS; i++) {
				249	if (vg_threads[i].status == VgTs_Empty)
				250	return i;
				251	}
				252	VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
				253	VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
				254	VG_(panic)("VG_N_THREADS is too low");
				255	/NOTREACHED/
				256	}
				257
				258
				259	ThreadState* VG_(get_thread_state) ( ThreadId tid )
				260	{
				261	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				262	vg_assert(vg_threads[tid].status != VgTs_Empty);
				263	return & vg_threads[tid];
				264	}
				265
				266
				267	/* Find an unused VgMutex record. */
				268	static
				269	MutexId vg_alloc_VgMutex ( void )
				270	{
				271	Int i;
				272	for (i = 0; i < VG_N_MUTEXES; i++) {
				273	if (!vg_mutexes[i].in_use)
				274	return i;
				275	}
				276	VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
				277	VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
				278	VG_(panic)("VG_N_MUTEXES is too low");
				279	/NOTREACHED/
				280	}
				281
				282
				283	/* Copy the saved state of a thread into VG_(baseBlock), ready for it
				284	to be run. */
				285	__inline__
				286	void VG_(load_thread_state) ( ThreadId tid )
				287	{
				288	Int i;
				289	VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
				290	VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
				291	VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
				292	VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
				293	VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
				294	VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
				295	VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
				296	VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
				297	VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
				298	VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
				299
				300	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				301	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
				302
				303	VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
				304	VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
				305	VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
				306	VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
				307	VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
				308	VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
				309	VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
				310	VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
				311	VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
				312	}
				313
				314
				315	/* Copy the state of a thread from VG_(baseBlock), presumably after it
				316	has been descheduled. For sanity-check purposes, fill the vacated
				317	VG_(baseBlock) with garbage so as to make the system more likely to
				318	fail quickly if we erroneously continue to poke around inside
				319	VG_(baseBlock) without first doing a load_thread_state().
				320	*/
				321	__inline__
				322	void VG_(save_thread_state) ( ThreadId tid )
				323	{
				324	Int i;
				325	const UInt junk = 0xDEADBEEF;
				326
				327	vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
				328	vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
				329	vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
				330	vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
				331	vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
				332	vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
				333	vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
				334	vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				335	vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
				336	vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
				337
				338	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				339	vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
				340
				341	vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
				342	vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
				343	vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
				344	vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
				345	vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
				346	vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
				347	vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
				348	vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
				349	vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
				350
				351	/* Fill it up with junk. */
				352	VG_(baseBlock)[VGOFF_(m_eax)] = junk;
				353	VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
				354	VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
				355	VG_(baseBlock)[VGOFF_(m_edx)] = junk;
				356	VG_(baseBlock)[VGOFF_(m_esi)] = junk;
				357	VG_(baseBlock)[VGOFF_(m_edi)] = junk;
				358	VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
				359	VG_(baseBlock)[VGOFF_(m_esp)] = junk;
				360	VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
				361	VG_(baseBlock)[VGOFF_(m_eip)] = junk;
				362
				363	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				364	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
				365	}
				366
				367
				368	/* Run the thread tid for a while, and return a VG_TRC_* value to the
				369	scheduler indicating what happened. */
				370	static
				371	UInt run_thread_for_a_while ( ThreadId tid )
				372	{
				373	UInt trc = 0;
				374	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				375	vg_assert(vg_threads[tid].status != VgTs_Empty);
				376	vg_assert(VG_(bbs_to_go) > 0);
				377
				378	VG_(load_thread_state) ( tid );
				379	if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
				380	/* try this ... */
				381	trc = VG_(run_innerloop)();
				382	/* We get here if the client didn't take a fault. */
				383	} else {
				384	/* We get here if the client took a fault, which caused our
				385	signal handler to longjmp. */
				386	vg_assert(trc == 0);
				387	trc = VG_TRC_UNRESUMABLE_SIGNAL;
				388	}
				389	VG_(save_thread_state) ( tid );
				390	return trc;
				391	}
				392
				393
				394	/* Increment the LRU epoch counter. */
				395	static
				396	void increment_epoch ( void )
				397	{
				398	VG_(current_epoch)++;
				399	if (VG_(clo_verbosity) > 2) {
				400	UInt tt_used, tc_used;
				401	VG_(get_tt_tc_used) ( &tt_used, &tc_used );
				402	VG_(message)(Vg_UserMsg,
				403	"%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
				404	VG_(bbs_done),
				405	VG_(this_epoch_in_count),
				406	VG_(this_epoch_in_osize),
				407	VG_(this_epoch_in_tsize),
				408	VG_(this_epoch_out_count),
				409	VG_(this_epoch_out_osize),
				410	VG_(this_epoch_out_tsize),
				411	tt_used, tc_used
				412	);
				413	}
				414	VG_(this_epoch_in_count) = 0;
				415	VG_(this_epoch_in_osize) = 0;
				416	VG_(this_epoch_in_tsize) = 0;
				417	VG_(this_epoch_out_count) = 0;
				418	VG_(this_epoch_out_osize) = 0;
				419	VG_(this_epoch_out_tsize) = 0;
				420	}
				421
				422
				423	/* Initialise the scheduler. Create a single "main" thread ready to
				424	run, with special ThreadId of zero. This is called at startup; the
				425	caller takes care to park the client's state is parked in
				426	VG_(baseBlock).
				427	*/
				428	void VG_(scheduler_init) ( void )
				429	{
				430	Int i;
				431	Addr startup_esp;
				432	ThreadId tid_main;
				433
				434	startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				435	if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
				436	VG_(printf)("%esp at startup = %p is not near %p; aborting\n",
				437	startup_esp, VG_STARTUP_STACK_MASK);
				438	VG_(panic)("unexpected %esp at startup");
				439	}
				440
				441	for (i = 0; i < VG_N_THREADS; i++) {
				442	vg_threads[i].stack_size = 0;
				443	vg_threads[i].stack_base = (Addr)NULL;
				444	}
				445
				446	for (i = 0; i < VG_N_WAITING_FDS; i++)
				447	vg_waiting_fds[i].fd = -1; /* not in use */
				448
				449	for (i = 0; i < VG_N_MUTEXES; i++)
				450	vg_mutexes[i].in_use = False;
				451
				452	/* Assert this is thread zero, which has certain magic
				453	properties. */
				454	tid_main = vg_alloc_ThreadState();
				455	vg_assert(tid_main == 0);
				456
				457	vg_threads[tid_main].status = VgTs_Runnable;
				458	vg_threads[tid_main].joiner = VG_INVALID_THREADID;
				459	vg_threads[tid_main].retval = NULL; /* not important */
				460
				461	/* Copy VG_(baseBlock) state to tid_main's slot. */
				462	VG_(save_thread_state) ( tid_main );
				463	}
				464
				465
				466	/* What if fd isn't a valid fd? */
				467	static
				468	void set_fd_nonblocking ( Int fd )
				469	{
				470	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				471	vg_assert(!VG_(is_kerror)(res));
				472	res \|= VKI_O_NONBLOCK;
				473	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				474	vg_assert(!VG_(is_kerror)(res));
				475	}
				476
				477	static
				478	void set_fd_blocking ( Int fd )
				479	{
				480	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				481	vg_assert(!VG_(is_kerror)(res));
				482	res &= ~VKI_O_NONBLOCK;
				483	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				484	vg_assert(!VG_(is_kerror)(res));
				485	}
				486
				487	static
				488	Bool fd_is_blockful ( Int fd )
				489	{
				490	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				491	vg_assert(!VG_(is_kerror)(res));
				492	return (res & VKI_O_NONBLOCK) ? False : True;
				493	}
				494
				495
				496
				497	/* Do a purely thread-local request for tid, and put the result in its
				498	%EDX, without changing its scheduling state in any way, nor that of
				499	any other threads. Return True if so.
				500
				501	If the request is non-trivial, return False; a more capable but
				502	slower mechanism will deal with it.
				503	*/
				504	static
				505	Bool maybe_do_trivial_clientreq ( ThreadId tid )
				506	{
				507	# define SIMPLE_RETURN(vvv) \
				508	{ vg_threads[tid].m_edx = (vvv); \
				509	return True; \
				510	}
				511
				512	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				513	UInt req_no = arg[0];
				514	switch (req_no) {
				515	case VG_USERREQ__MALLOC:
				516	SIMPLE_RETURN(
				517	(UInt)VG_(client_malloc) ( arg[1], Vg_AllocMalloc )
				518	);
				519	case VG_USERREQ__BUILTIN_NEW:
				520	SIMPLE_RETURN(
				521	(UInt)VG_(client_malloc) ( arg[1], Vg_AllocNew )
				522	);
				523	case VG_USERREQ__BUILTIN_VEC_NEW:
				524	SIMPLE_RETURN(
				525	(UInt)VG_(client_malloc) ( arg[1], Vg_AllocNewVec )
				526	);
				527	case VG_USERREQ__FREE:
				528	VG_(client_free) ( (void*)arg[1], Vg_AllocMalloc );
				529	SIMPLE_RETURN(0); /* irrelevant */
				530	case VG_USERREQ__BUILTIN_DELETE:
				531	VG_(client_free) ( (void*)arg[1], Vg_AllocNew );
				532	SIMPLE_RETURN(0); /* irrelevant */
				533	case VG_USERREQ__BUILTIN_VEC_DELETE:
				534	VG_(client_free) ( (void*)arg[1], Vg_AllocNewVec );
				535	SIMPLE_RETURN(0); /* irrelevant */
				536	case VG_USERREQ__CALLOC:
				537	SIMPLE_RETURN(
				538	(UInt)VG_(client_calloc) ( arg[1], arg[2] )
				539	);
				540	case VG_USERREQ__REALLOC:
				541	SIMPLE_RETURN(
				542	(UInt)VG_(client_realloc) ( (void*)arg[1], arg[2] )
				543	);
				544	case VG_USERREQ__MEMALIGN:
				545	SIMPLE_RETURN(
				546	(UInt)VG_(client_memalign) ( arg[1], arg[2] )
				547	);
				548	default:
				549	/* Too hard; wimp out. */
				550	return False;
				551	}
				552	# undef SIMPLE_RETURN
				553	}
				554
				555
				556	static
				557	void sched_do_syscall ( ThreadId tid )
				558	{
				559	UInt saved_eax;
				560	UInt res, syscall_no;
				561	UInt fd;
				562	Bool might_block, assumed_nonblocking;
				563	Bool orig_fd_blockness;
				564	Char msg_buf[100];
				565
				566	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				567	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				568
				569	syscall_no = vg_threads[tid].m_eax; /* syscall number */
				570
				571	if (syscall_no == __NR_nanosleep) {
				572	ULong t_now, t_awaken;
				573	struct vki_timespec* req;
				574	req = (struct vki_timespec)vg_threads[tid].m_ebx; / arg1 */
				575	t_now = VG_(read_microsecond_timer)();
				576	t_awaken
				577	= t_now
				578	+ (ULong)1000000ULL * (ULong)(req->tv_sec)
				579	+ (ULong)( (UInt)(req->tv_nsec) / 1000 );
				580	vg_threads[tid].status = VgTs_Sleeping;
				581	vg_threads[tid].awaken_at = t_awaken;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	582	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	583	VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
				584	t_now, t_awaken-t_now);
				585	print_sched_event(tid, msg_buf);
				586	}
				587	/* Force the scheduler to run something else for a while. */
				588	return;
				589	}
				590
				591	switch (syscall_no) {
				592	case __NR_read:
				593	case __NR_write:
				594	assumed_nonblocking
				595	= False;
				596	might_block
				597	= fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
				598	break;
				599	default:
				600	might_block = False;
				601	assumed_nonblocking = True;
				602	}
				603
				604	if (assumed_nonblocking) {
				605	/* We think it's non-blocking. Just do it in the normal way. */
				606	VG_(perform_assumed_nonblocking_syscall)(tid);
				607	/* The thread is still runnable. */
				608	return;
				609	}
				610
				611	/* It might block. Take evasive action. */
				612	switch (syscall_no) {
				613	case __NR_read:
				614	case __NR_write:
				615	fd = vg_threads[tid].m_ebx; break;
				616	default:
				617	vg_assert(3+3 == 7);
				618	}
				619
				620	/* Set the fd to nonblocking, and do the syscall, which will return
				621	immediately, in order to lodge a request with the Linux kernel.
				622	We later poll for I/O completion using select(). */
				623
				624	orig_fd_blockness = fd_is_blockful(fd);
				625	set_fd_nonblocking(fd);
				626	vg_assert(!fd_is_blockful(fd));
				627	VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
				628
				629	/* This trashes the thread's %eax; we have to preserve it. */
				630	saved_eax = vg_threads[tid].m_eax;
				631	KERNEL_DO_SYSCALL(tid,res);
				632
				633	/* Restore original blockfulness of the fd. */
				634	if (orig_fd_blockness)
				635	set_fd_blocking(fd);
				636	else
				637	set_fd_nonblocking(fd);
				638
				639	if (res != -VKI_EWOULDBLOCK) {
				640	/* It didn't block; it went through immediately. So finish off
				641	in the normal way. Don't restore %EAX, since that now
				642	(correctly) holds the result of the call. */
				643	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				644	/* We're still runnable. */
				645	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				646
				647	} else {
				648
				649	/* It would have blocked. First, restore %EAX to what it was
				650	before our speculative call. */
				651	vg_threads[tid].m_eax = saved_eax;
				652	/* Put this fd in a table of fds on which we are waiting for
				653	completion. The arguments for select() later are constructed
				654	from this table. */
				655	add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
				656	/* Deschedule thread until an I/O completion happens. */
				657	vg_threads[tid].status = VgTs_WaitFD;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	658	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	659	VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
				660	print_sched_event(tid, msg_buf);
				661	}
				662
				663	}
				664	}
				665
				666
				667	/* Find out which of the fds in vg_waiting_fds are now ready to go, by
				668	making enquiries with select(), and mark them as ready. We have to
				669	wait for the requesting threads to fall into the the WaitFD state
				670	before we can actually finally deliver the results, so this
				671	procedure doesn't do that; complete_blocked_syscalls() does it.
				672
				673	It might seem odd that a thread which has done a blocking syscall
				674	is not in WaitFD state; the way this can happen is if it initially
				675	becomes WaitFD, but then a signal is delivered to it, so it becomes
				676	Runnable for a while. In this case we have to wait for the
				677	sighandler to return, whereupon the WaitFD state is resumed, and
				678	only at that point can the I/O result be delivered to it. However,
				679	this point may be long after the fd is actually ready.
				680
				681	So, poll_for_ready_fds() merely detects fds which are ready.
				682	complete_blocked_syscalls() does the second half of the trick,
				683	possibly much later: it delivers the results from ready fds to
				684	threads in WaitFD state.
				685	*/
				686	void poll_for_ready_fds ( void )
				687	{
				688	vki_ksigset_t saved_procmask;
				689	vki_fd_set readfds;
				690	vki_fd_set writefds;
				691	vki_fd_set exceptfds;
				692	struct vki_timeval timeout;
				693	Int fd, fd_max, i, n_ready, syscall_no, n_ok;
				694	ThreadId tid;
				695	Bool rd_ok, wr_ok, ex_ok;
				696	Char msg_buf[100];
				697
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	698	struct vki_timespec* rem;
				699	ULong t_now;
				700
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	701	/* Awaken any sleeping threads whose sleep has expired. */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	702	t_now = VG_(read_microsecond_timer)();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	703	for (tid = 0; tid < VG_N_THREADS; tid++) {
				704	if (vg_threads[tid].status != VgTs_Sleeping)
				705	continue;
				706	if (t_now >= vg_threads[tid].awaken_at) {
				707	/* Resume this thread. Set to zero the remaining-time (second)
				708	arg of nanosleep, since it's used up all its time. */
				709	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				710	rem = (struct vki_timespec )vg_threads[tid].m_ecx; / arg2 */
				711	if (rem != NULL) {
				712	rem->tv_sec = 0;
				713	rem->tv_nsec = 0;
				714	}
				715	/* Make the syscall return 0 (success). */
				716	vg_threads[tid].m_eax = 0;
				717	/* Reschedule this thread. */
				718	vg_threads[tid].status = VgTs_Runnable;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	719	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	720	VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
				721	t_now);
				722	print_sched_event(tid, msg_buf);
				723	}
				724	}
				725	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	726
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	727	/* And look for threads waiting on file descriptors which are now
				728	ready for I/O.*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	729	timeout.tv_sec = 0;
				730	timeout.tv_usec = 0;
				731
				732	VKI_FD_ZERO(&readfds);
				733	VKI_FD_ZERO(&writefds);
				734	VKI_FD_ZERO(&exceptfds);
				735	fd_max = -1;
				736	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				737	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				738	continue;
				739	if (vg_waiting_fds[i].ready /* already ready? */)
				740	continue;
				741	fd = vg_waiting_fds[i].fd;
				742	/* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	743	vg_assert(fd >= 0);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	744	if (fd > fd_max)
				745	fd_max = fd;
				746	tid = vg_waiting_fds[i].tid;
				747	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				748	syscall_no = vg_waiting_fds[i].syscall_no;
				749	switch (syscall_no) {
				750	case __NR_read:
				751	VKI_FD_SET(fd, &readfds); break;
				752	case __NR_write:
				753	VKI_FD_SET(fd, &writefds); break;
				754	default:
				755	VG_(panic)("poll_for_ready_fds: unexpected syscall");
				756	/NOTREACHED/
				757	break;
				758	}
				759	}
				760
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	761	/* Short cut: if no fds are waiting, give up now. */
				762	if (fd_max == -1)
				763	return;
				764
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	765	/* BLOCK ALL SIGNALS. We don't want the complication of select()
				766	getting interrupted. */
				767	VG_(block_all_host_signals)( &saved_procmask );
				768
				769	n_ready = VG_(select)
				770	( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
				771	if (VG_(is_kerror)(n_ready)) {
				772	VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
				773	VG_(panic)("poll_for_ready_fds: select failed?!");
				774	/NOTREACHED/
				775	}
				776
				777	/* UNBLOCK ALL SIGNALS */
				778	VG_(restore_host_signals)( &saved_procmask );
				779
				780	/* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
				781
				782	if (n_ready == 0)
				783	return;
				784
				785	/* Inspect all the fds we know about, and handle any completions that
				786	have happened. */
				787	/*
				788	VG_(printf)("\n\n");
				789	for (fd = 0; fd < 100; fd++)
				790	if (VKI_FD_ISSET(fd, &writefds) \|\| VKI_FD_ISSET(fd, &readfds)) {
				791	VG_(printf)("X"); } else { VG_(printf)("."); };
				792	VG_(printf)("\n\nfd_max = %d\n", fd_max);
				793	*/
				794
				795	for (fd = 0; fd <= fd_max; fd++) {
				796	rd_ok = VKI_FD_ISSET(fd, &readfds);
				797	wr_ok = VKI_FD_ISSET(fd, &writefds);
				798	ex_ok = VKI_FD_ISSET(fd, &exceptfds);
				799
				800	n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
				801	if (n_ok == 0)
				802	continue;
				803	if (n_ok > 1) {
				804	VG_(printf)("offending fd = %d\n", fd);
				805	VG_(panic)("poll_for_ready_fds: multiple events on fd");
				806	}
				807
				808	/* An I/O event completed for fd. Find the thread which
				809	requested this. */
				810	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				811	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				812	continue;
				813	if (vg_waiting_fds[i].fd == fd)
				814	break;
				815	}
				816
				817	/* And a bit more paranoia ... */
				818	vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
				819
				820	/* Mark the fd as ready. */
				821	vg_assert(! vg_waiting_fds[i].ready);
				822	vg_waiting_fds[i].ready = True;
				823	}
				824	}
				825
				826
				827	/* See comment attached to poll_for_ready_fds() for explaination. */
				828	void complete_blocked_syscalls ( void )
				829	{
				830	Int fd, i, res, syscall_no;
				831	ThreadId tid;
				832	Char msg_buf[100];
				833
				834	/* Inspect all the outstanding fds we know about. */
				835
				836	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				837	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				838	continue;
				839	if (! vg_waiting_fds[i].ready)
				840	continue;
				841
				842	fd = vg_waiting_fds[i].fd;
				843	tid = vg_waiting_fds[i].tid;
				844	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				845
				846	/* The thread actually has to be waiting for the I/O event it
				847	requested before we can deliver the result! */
				848	if (vg_threads[tid].status != VgTs_WaitFD)
				849	continue;
				850
				851	/* Ok, actually do it! We can safely use %EAX as the syscall
				852	number, because the speculative call made by
				853	sched_do_syscall() doesn't change %EAX in the case where the
				854	call would have blocked. */
				855
				856	syscall_no = vg_waiting_fds[i].syscall_no;
				857	vg_assert(syscall_no == vg_threads[tid].m_eax);
				858	KERNEL_DO_SYSCALL(tid,res);
				859	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				860
				861	/* Reschedule. */
				862	vg_threads[tid].status = VgTs_Runnable;
				863	/* Mark slot as no longer in use. */
				864	vg_waiting_fds[i].fd = -1;
				865	/* pp_sched_status(); */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	866	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	867	VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
				868	print_sched_event(tid, msg_buf);
				869	}
				870	}
				871	}
				872
				873
				874	static
				875	void nanosleep_for_a_while ( void )
				876	{
				877	Int res;
				878	struct vki_timespec req;
				879	struct vki_timespec rem;
				880	req.tv_sec = 0;
				881	req.tv_nsec = 20 * 1000 * 1000;
				882	res = VG_(nanosleep)( &req, &rem );
				883	/* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
				884	vg_assert(res == 0);
				885	}
				886
				887
				888	/* ---------------------------------------------------------------------
				889	The scheduler proper.
				890	------------------------------------------------------------------ */
				891
				892	/* Run user-space threads until either
				893	* Deadlock occurs
				894	* One thread asks to shutdown Valgrind
				895	* The specified number of basic blocks has gone by.
				896	*/
				897	VgSchedReturnCode VG_(scheduler) ( void )
				898	{
				899	ThreadId tid, tid_next;
				900	UInt trc;
				901	UInt dispatch_ctr_SAVED;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	902	Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	903	Char msg_buf[100];
				904	Addr trans_addr;
				905
				906	/* For the LRU structures, records when the epoch began. */
				907	ULong lru_epoch_started_at = 0;
				908
				909	/* Start with the root thread. tid in general indicates the
				910	currently runnable/just-finished-running thread. */
				911	tid = 0;
				912
				913	/* This is the top level scheduler loop. It falls into three
				914	phases. */
				915	while (True) {
				916
				917	/* ======================= Phase 1 of 3 =======================
				918	Handle I/O completions and signals. This may change the
				919	status of various threads. Then select a new thread to run,
				920	or declare deadlock, or sleep if there are no runnable
				921	threads but some are blocked on I/O. */
				922
				923	/* Age the LRU structures if an epoch has been completed. */
				924	if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
				925	lru_epoch_started_at = VG_(bbs_done);
				926	increment_epoch();
				927	}
				928
				929	/* Was a debug-stop requested? */
				930	if (VG_(bbs_to_go) == 0)
				931	goto debug_stop;
				932
				933	/* Do the following loop until a runnable thread is found, or
				934	deadlock is detected. */
				935	while (True) {
				936
				937	/* For stats purposes only. */
				938	VG_(num_scheduling_events_MAJOR) ++;
				939
				940	/* See if any I/O operations which we were waiting for have
				941	completed, and, if so, make runnable the relevant waiting
				942	threads. */
				943	poll_for_ready_fds();
				944	complete_blocked_syscalls();
				945
				946	/* See if there are any signals which need to be delivered. If
				947	so, choose thread(s) to deliver them to, and build signal
				948	delivery frames on those thread(s) stacks. */
				949	VG_(deliver_signals)( 0 /HACK/ );
				950	VG_(do_sanity_checks)(0 /HACK/, False);
				951
				952	/* Try and find a thread (tid) to run. */
				953	tid_next = tid;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	954	n_in_fdwait_or_sleep = 0;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	955	while (True) {
				956	tid_next++;
				957	if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	958	if (vg_threads[tid_next].status == VgTs_WaitFD
				959	\|\| vg_threads[tid_next].status == VgTs_Sleeping)
				960	n_in_fdwait_or_sleep ++;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	961	if (vg_threads[tid_next].status == VgTs_Runnable)
				962	break; /* We can run this one. */
				963	if (tid_next == tid)
				964	break; /* been all the way round */
				965	}
				966	tid = tid_next;
				967
				968	if (vg_threads[tid].status == VgTs_Runnable) {
				969	/* Found a suitable candidate. Fall out of this loop, so
				970	we can advance to stage 2 of the scheduler: actually
				971	running the thread. */
				972	break;
				973	}
				974
				975	/* We didn't find a runnable thread. Now what? */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	976	if (n_in_fdwait_or_sleep == 0) {
				977	/* No runnable threads and no prospect of any appearing
				978	even if we wait for an arbitrary length of time. In
				979	short, we have a deadlock. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	980	pp_sched_status();
				981	return VgSrc_Deadlock;
				982	}
				983
				984	/* At least one thread is in a fd-wait state. Delay for a
				985	while, and go round again, in the hope that eventually a
				986	thread becomes runnable. */
				987	nanosleep_for_a_while();
				988	// pp_sched_status();
				989	// VG_(printf)(".\n");
				990	}
				991
				992
				993	/* ======================= Phase 2 of 3 =======================
				994	Wahey! We've finally decided that thread tid is runnable, so
				995	we now do that. Run it for as much of a quanta as possible.
				996	Trivial requests are handled and the thread continues. The
				997	aim is not to do too many of Phase 1 since it is expensive. */
				998
				999	if (0)
				1000	VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
				1001
				1002	/* Figure out how many bbs to ask vg_run_innerloop to do. Note
				1003	that it decrements the counter before testing it for zero, so
				1004	that if VG_(dispatch_ctr) is set to N you get at most N-1
				1005	iterations. Also this means that VG_(dispatch_ctr) must
				1006	exceed zero before entering the innerloop. Also also, the
				1007	decrement is done before the bb is actually run, so you
				1008	always get at least one decrement even if nothing happens.
				1009	*/
				1010	if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
				1011	VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
				1012	else
				1013	VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
				1014
				1015	/* ... and remember what we asked for. */
				1016	dispatch_ctr_SAVED = VG_(dispatch_ctr);
				1017
				1018	/* Actually run thread tid. */
				1019	while (True) {
				1020
				1021	/* For stats purposes only. */
				1022	VG_(num_scheduling_events_MINOR) ++;
				1023
				1024	if (0)
				1025	VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
				1026	tid, VG_(dispatch_ctr) - 1 );
				1027
				1028	trc = run_thread_for_a_while ( tid );
				1029
				1030	/* Deal quickly with trivial scheduling events, and resume the
				1031	thread. */
				1032
				1033	if (trc == VG_TRC_INNER_FASTMISS) {
				1034	vg_assert(VG_(dispatch_ctr) > 0);
				1035
				1036	/* Trivial event. Miss in the fast-cache. Do a full
				1037	lookup for it. */
				1038	trans_addr
				1039	= VG_(search_transtab) ( vg_threads[tid].m_eip );
				1040	if (trans_addr == (Addr)0) {
				1041	/* Not found; we need to request a translation. */
				1042	VG_(create_translation_for)( vg_threads[tid].m_eip );
				1043	trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
				1044	if (trans_addr == (Addr)0)
				1045	VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
				1046	}
				1047	continue; /* with this thread */
				1048	}
				1049
				1050	if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
				1051	Bool is_triv = maybe_do_trivial_clientreq(tid);
				1052	if (is_triv) {
				1053	/* NOTE: a trivial request is something like a call to
				1054	malloc() or free(). It DOES NOT change the
				1055	Runnability of this thread nor the status of any
				1056	other thread; it is purely thread-local. */
				1057	continue; /* with this thread */
				1058	}
				1059	}
				1060
				1061	/* It's a non-trivial event. Give up running this thread and
				1062	handle things the expensive way. */
				1063	break;
				1064	}
				1065
				1066	/* ======================= Phase 3 of 3 =======================
				1067	Handle non-trivial thread requests, mostly pthread stuff. */
				1068
				1069	/* Ok, we've fallen out of the dispatcher for a
				1070	non-completely-trivial reason. First, update basic-block
				1071	counters. */
				1072
				1073	done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
				1074	vg_assert(done_this_time >= 0);
				1075	VG_(bbs_to_go) -= (ULong)done_this_time;
				1076	VG_(bbs_done) += (ULong)done_this_time;
				1077
				1078	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1079	VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
				1080	tid, done_this_time, (Int)trc );
				1081
				1082	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1083	VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
				1084	tid, VG_(bbs_done),
				1085	name_of_sched_event(trc) );
				1086
				1087	/* Examine the thread's return code to figure out why it
				1088	stopped, and handle requests. */
				1089
				1090	switch (trc) {
				1091
				1092	case VG_TRC_INNER_FASTMISS:
				1093	VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
				1094	/NOTREACHED/
				1095	break;
				1096
				1097	case VG_TRC_INNER_COUNTERZERO:
				1098	/* Timeslice is out. Let a new thread be scheduled,
				1099	simply by doing nothing, causing us to arrive back at
				1100	Phase 1. */
				1101	if (VG_(bbs_to_go) == 0) {
				1102	goto debug_stop;
				1103	}
				1104	vg_assert(VG_(dispatch_ctr) == 0);
				1105	break;
				1106
				1107	case VG_TRC_UNRESUMABLE_SIGNAL:
				1108	/* It got a SIGSEGV/SIGBUS, which we need to deliver right
				1109	away. Again, do nothing, so we wind up back at Phase
				1110	1, whereupon the signal will be "delivered". */
				1111	break;
				1112
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1113	case VG_TRC_EBP_JMP_SYSCALL:
				1114	/* Do a syscall for the vthread tid. This could cause it
				1115	to become non-runnable. */
				1116	sched_do_syscall(tid);
				1117	break;
				1118
				1119	case VG_TRC_EBP_JMP_CLIENTREQ:
				1120	/* Do a client request for the vthread tid. Note that
				1121	some requests will have been handled by
				1122	maybe_do_trivial_clientreq(), so we don't expect to see
				1123	those here.
				1124	*/
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1125	/* The thread's %EAX points at an arg block, the first
				1126	word of which is the request code. */
				1127	request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1128	if (0) {
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1129	VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1130	print_sched_event(tid, msg_buf);
				1131	}
				1132	/* Do a non-trivial client request for thread tid. tid's
				1133	%EAX points to a short vector of argument words, the
				1134	first of which is the request code. The result of the
				1135	request is put in tid's %EDX. Alternatively, perhaps
				1136	the request causes tid to become non-runnable and/or
				1137	other blocked threads become runnable. In general we
				1138	can and often do mess with the state of arbitrary
				1139	threads at this point. */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1140	if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
				1141	return VgSrc_Shutdown;
				1142	} else {
				1143	do_nontrivial_clientreq(tid);
				1144	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1145	break;
				1146
				1147	default:
				1148	VG_(printf)("\ntrc = %d\n", trc);
				1149	VG_(panic)("VG_(scheduler), phase 3: "
				1150	"unexpected thread return code");
				1151	/* NOTREACHED */
				1152	break;
				1153
				1154	} /* switch (trc) */
				1155
				1156	/* That completes Phase 3 of 3. Return now to the top of the
				1157	main scheduler loop, to Phase 1 of 3. */
				1158
				1159	} /* top-level scheduler loop */
				1160
				1161
				1162	/* NOTREACHED */
				1163	VG_(panic)("scheduler: post-main-loop ?!");
				1164	/* NOTREACHED */
				1165
				1166	debug_stop:
				1167	/* If we exited because of a debug stop, print the translation
				1168	of the last block executed -- by translating it again, and
				1169	throwing away the result. */
				1170	VG_(printf)(
				1171	"======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
				1172	VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
				1173	VG_(printf)("\n");
				1174	VG_(printf)(
				1175	"======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
				1176
				1177	return VgSrc_BbsDone;
				1178	}
				1179
				1180
				1181	/* ---------------------------------------------------------------------
				1182	The pthread implementation.
				1183	------------------------------------------------------------------ */
				1184
				1185	#include <pthread.h>
				1186	#include <errno.h>
				1187
				1188	#if !defined(PTHREAD_STACK_MIN)
				1189	# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
				1190	#endif
				1191
				1192	/* /usr/include/bits/pthreadtypes.h:
				1193	typedef unsigned long int pthread_t;
				1194	*/
				1195
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1196
				1197	static
				1198	void do_pthread_cancel ( ThreadId tid_canceller,
				1199	pthread_t tid_cancellee )
				1200	{
				1201	Char msg_buf[100];
				1202	/* We want make is appear that this thread has returned to
				1203	do_pthread_create_bogusRA with PTHREAD_CANCELED as the
				1204	return value. So: simple: put PTHREAD_CANCELED into %EAX
				1205	and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1206	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1207	VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
				1208	print_sched_event(tid_cancellee, msg_buf);
				1209	}
				1210	vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1211	vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1212	vg_threads[tid_cancellee].status = VgTs_Runnable;
				1213	}
				1214
				1215
				1216
				1217	/* Thread tid is exiting, by returning from the function it was
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1218	created with. Or possibly due to pthread_exit or cancellation.
				1219	The main complication here is to resume any thread waiting to join
				1220	with this one. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1221	static
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1222	void handle_pthread_return ( ThreadId tid, void* retval )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1223	{
				1224	ThreadId jnr; /* joiner, the thread calling pthread_join. */
				1225	UInt* jnr_args;
				1226	void** jnr_thread_return;
				1227	Char msg_buf[100];
				1228
				1229	/* Mark it as not in use. Leave the stack in place so the next
				1230	user of this slot doesn't reallocate it. */
				1231	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1232	vg_assert(vg_threads[tid].status != VgTs_Empty);
				1233
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1234	vg_threads[tid].retval = retval;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1235
				1236	if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
				1237	/* No one has yet done a join on me */
				1238	vg_threads[tid].status = VgTs_WaitJoiner;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1239	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1240	VG_(sprintf)(msg_buf,
				1241	"root fn returns, waiting for a call pthread_join(%d)",
				1242	tid);
				1243	print_sched_event(tid, msg_buf);
				1244	}
				1245	} else {
				1246	/* Some is waiting; make their join call return with success,
				1247	putting my exit code in the place specified by the caller's
				1248	thread_return param. This is all very horrible, since we
				1249	need to consult the joiner's arg block -- pointed to by its
				1250	%EAX -- in order to extract the 2nd param of its pthread_join
				1251	call. TODO: free properly the slot (also below).
				1252	*/
				1253	jnr = vg_threads[tid].joiner;
				1254	vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
				1255	vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
				1256	jnr_args = (UInt*)vg_threads[jnr].m_eax;
				1257	jnr_thread_return = (void**)(jnr_args[2]);
				1258	if (jnr_thread_return != NULL)
				1259	*jnr_thread_return = vg_threads[tid].retval;
				1260	vg_threads[jnr].m_edx = 0; /* success */
				1261	vg_threads[jnr].status = VgTs_Runnable;
				1262	vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1263	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1264	VG_(sprintf)(msg_buf,
				1265	"root fn returns, to find a waiting pthread_join(%d)", tid);
				1266	print_sched_event(tid, msg_buf);
				1267	VG_(sprintf)(msg_buf,
				1268	"my pthread_join(%d) returned; resuming", tid);
				1269	print_sched_event(jnr, msg_buf);
				1270	}
				1271	}
				1272
				1273	/* Return value is irrelevant; this thread will not get
				1274	rescheduled. */
				1275	}
				1276
				1277
				1278	static
				1279	void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
				1280	{
				1281	Char msg_buf[100];
				1282
				1283	/* jee, the joinee, is the thread specified as an arg in thread
				1284	tid's call to pthread_join. So tid is the join-er. */
				1285	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1286	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1287
				1288	if (jee == tid) {
				1289	vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
				1290	vg_threads[tid].status = VgTs_Runnable;
				1291	return;
				1292	}
				1293
				1294	if (jee < 0
				1295	\|\| jee >= VG_N_THREADS
				1296	\|\| vg_threads[jee].status == VgTs_Empty) {
				1297	/* Invalid thread to join to. */
				1298	vg_threads[tid].m_edx = EINVAL;
				1299	vg_threads[tid].status = VgTs_Runnable;
				1300	return;
				1301	}
				1302
				1303	if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
				1304	/* Someone already did join on this thread */
				1305	vg_threads[tid].m_edx = EINVAL;
				1306	vg_threads[tid].status = VgTs_Runnable;
				1307	return;
				1308	}
				1309
				1310	/* if (vg_threads[jee].detached) ... */
				1311
				1312	/* Perhaps the joinee has already finished? If so return
				1313	immediately with its return code, and free up the slot. TODO:
				1314	free it properly (also above). */
				1315	if (vg_threads[jee].status == VgTs_WaitJoiner) {
				1316	vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
				1317	vg_threads[tid].m_edx = 0; /* success */
				1318	if (thread_return != NULL)
				1319	*thread_return = vg_threads[jee].retval;
				1320	vg_threads[tid].status = VgTs_Runnable;
				1321	vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1322	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1323	VG_(sprintf)(msg_buf,
				1324	"someone called pthread_join() on me; bye!");
				1325	print_sched_event(jee, msg_buf);
				1326	VG_(sprintf)(msg_buf,
				1327	"my pthread_join(%d) returned immediately",
				1328	jee );
				1329	print_sched_event(tid, msg_buf);
				1330	}
				1331	return;
				1332	}
				1333
				1334	/* Ok, so we'll have to wait on jee. */
				1335	vg_threads[jee].joiner = tid;
				1336	vg_threads[tid].status = VgTs_WaitJoinee;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1337	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1338	VG_(sprintf)(msg_buf,
				1339	"blocking on call of pthread_join(%d)", jee );
				1340	print_sched_event(tid, msg_buf);
				1341	}
				1342	/* So tid's join call does not return just now. */
				1343	}
				1344
				1345
				1346	static
				1347	void do_pthread_create ( ThreadId parent_tid,
				1348	pthread_t* thread,
				1349	pthread_attr_t* attr,
				1350	void* (start_routine)(void ),
				1351	void* arg )
				1352	{
				1353	Addr new_stack;
				1354	UInt new_stk_szb;
				1355	ThreadId tid;
				1356	Char msg_buf[100];
				1357
				1358	/* Paranoia ... */
				1359	vg_assert(sizeof(pthread_t) == sizeof(UInt));
				1360
				1361	vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
				1362
				1363	tid = vg_alloc_ThreadState();
				1364
				1365	/* If we've created the main thread's tid, we're in deep trouble :) */
				1366	vg_assert(tid != 0);
				1367
				1368	/* Copy the parent's CPU state into the child's, in a roundabout
				1369	way (via baseBlock). */
				1370	VG_(load_thread_state)(parent_tid);
				1371	VG_(save_thread_state)(tid);
				1372
				1373	/* Consider allocating the child a stack, if the one it already has
				1374	is inadequate. */
				1375	new_stk_szb = PTHREAD_STACK_MIN;
				1376
				1377	if (new_stk_szb > vg_threads[tid].stack_size) {
				1378	/* Again, for good measure :) We definitely don't want to be
				1379	allocating a stack for the main thread. */
				1380	vg_assert(tid != 0);
				1381	/* for now, we don't handle the case of anything other than
				1382	assigning it for the first time. */
				1383	vg_assert(vg_threads[tid].stack_size == 0);
				1384	vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
				1385	new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
				1386	vg_threads[tid].stack_base = new_stack;
				1387	vg_threads[tid].stack_size = new_stk_szb;
				1388	vg_threads[tid].m_esp
				1389	= new_stack + new_stk_szb
				1390	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
				1391	}
				1392	if (VG_(clo_instrument))
				1393	VGM_(make_noaccess)( vg_threads[tid].m_esp,
				1394	VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
				1395
				1396	/* push arg */
				1397	vg_threads[tid].m_esp -= 4;
				1398	* (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
				1399
				1400	/* push (magical) return address */
				1401	vg_threads[tid].m_esp -= 4;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1402	* (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1403
				1404	if (VG_(clo_instrument))
				1405	VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
				1406
				1407	/* this is where we start */
				1408	vg_threads[tid].m_eip = (UInt)start_routine;
				1409
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1410	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1411	VG_(sprintf)(msg_buf,
				1412	"new thread, created by %d", parent_tid );
				1413	print_sched_event(tid, msg_buf);
				1414	}
				1415
				1416	/* store the thread id in thread. /
				1417	// if (VG_(clo_instrument))
				1418	// ***** CHECK *thread is writable
				1419	*thread = (pthread_t)tid;
				1420
				1421	/* return zero */
				1422	vg_threads[tid].joiner = VG_INVALID_THREADID;
				1423	vg_threads[tid].status = VgTs_Runnable;
				1424	vg_threads[tid].m_edx = 0; /* success */
				1425	}
				1426
				1427
				1428	/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
				1429	is a struct with at least 5 words:
				1430	typedef struct
				1431	{
				1432	int __m_reserved; -- Reserved for future use
				1433	int __m_count; -- Depth of recursive locking
				1434	_pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
				1435	int __m_kind; -- Mutex kind: fast, recursive or errcheck
				1436	struct _pthread_fastlock __m_lock; -- Underlying fast lock
				1437	} pthread_mutex_t;
				1438	Ours is just a single word, an index into vg_mutexes[].
				1439	For now I'll park it in the __m_reserved field.
				1440
				1441	Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
				1442	a zero __m_count field (see /usr/include/pthread.h). So I'll
				1443	use zero to mean non-inited, and 1 to mean inited.
				1444
				1445	How convenient.
				1446	*/
				1447
				1448	static
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1449	void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1450	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1451	MutexId mid;
				1452	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1453	/* vg_alloc_MutexId aborts if we can't allocate a mutex, for
				1454	whatever reason. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1455	mid = vg_alloc_VgMutex();
				1456	vg_mutexes[mid].in_use = True;
				1457	vg_mutexes[mid].held = False;
				1458	vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
				1459	mutex->__m_reserved = mid;
				1460	mutex->__m_count = 1; /* initialised */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1461	if (VG_(clo_trace_pthread)) {
				1462	VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
				1463	mutex, mid );
				1464	print_pthread_event(tid, msg_buf);
				1465	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1466	}
				1467
				1468	/* Allocate a new MutexId and write it into *mutex. Ideally take
				1469	notice of the attributes in mutexattr. /
				1470	static
				1471	void do_pthread_mutex_init ( ThreadId tid,
				1472	pthread_mutex_t *mutex,
				1473	const pthread_mutexattr_t *mutexattr)
				1474	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1475	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1476	/* Paranoia ... */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1477	vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
				1478
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1479	initialise_mutex(tid, mutex);
				1480
				1481	if (VG_(clo_trace_pthread)) {
				1482	VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
				1483	mutex, mutex->__m_reserved );
				1484	print_pthread_event(tid, msg_buf);
				1485	}
				1486
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1487	/*
				1488	RETURN VALUE
				1489	pthread_mutex_init always returns 0. The other mutex functions
				1490	return 0 on success and a non-zero error code on error.
				1491	*/
				1492	/* THIS THREAD returns with 0. */
				1493	vg_threads[tid].m_edx = 0;
				1494	}
				1495
				1496
				1497	static
				1498	void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
				1499	{
				1500	MutexId mid;
				1501	Char msg_buf[100];
				1502
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1503	/* *mutex contains the MutexId, or one of the magic values
				1504	PTHREAD_MUTEX_INITIALIZER, indicating we need to initialise it
				1505	now. See comment(s) above re use of __m_count to indicated
				1506	initialisation status.
				1507	*/
				1508
				1509	/* POSIX doesn't mandate this, but for sanity ... */
				1510	if (mutex == NULL) {
				1511	vg_threads[tid].m_edx = EINVAL;
				1512	return;
				1513	}
				1514
				1515	if (mutex->__m_count == 0) {
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1516	initialise_mutex(tid, mutex);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1517	}
				1518
				1519	mid = mutex->__m_reserved;
				1520	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1521	vg_threads[tid].m_edx = EINVAL;
				1522	return;
				1523	}
				1524
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1525	if (VG_(clo_trace_pthread)) {
				1526	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
				1527	mid, mutex );
				1528	print_pthread_event(tid, msg_buf);
				1529	}
				1530
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1531	/* Assert initialised. */
				1532	vg_assert(mutex->__m_count == 1);
				1533
				1534	/* Assume tid valid. */
				1535	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1536
				1537	if (vg_mutexes[mid].held) {
				1538	if (vg_mutexes[mid].owner == tid) {
				1539	vg_threads[tid].m_edx = EDEADLK;
				1540	return;
				1541	}
				1542	/* Someone else has it; we have to wait. */
				1543	vg_threads[tid].status = VgTs_WaitMX;
				1544	vg_threads[tid].waited_on_mid = mid;
				1545	/* No assignment to %EDX, since we're blocking. */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1546	if (VG_(clo_trace_pthread)) {
				1547	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
				1548	mid, mutex );
				1549	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1550	}
				1551	} else {
				1552	/* We get it! */
				1553	vg_mutexes[mid].held = True;
				1554	vg_mutexes[mid].owner = tid;
				1555	/* return 0 (success). */
				1556	vg_threads[tid].m_edx = 0;
				1557	}
				1558	}
				1559
				1560
				1561	static
				1562	void do_pthread_mutex_unlock ( ThreadId tid,
				1563	pthread_mutex_t *mutex )
				1564	{
				1565	MutexId mid;
				1566	Int i;
				1567	Char msg_buf[100];
				1568
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1569	if (mutex == NULL
				1570	\|\| mutex->__m_count != 1) {
				1571	vg_threads[tid].m_edx = EINVAL;
				1572	return;
				1573	}
				1574
				1575	mid = mutex->__m_reserved;
				1576	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1577	vg_threads[tid].m_edx = EINVAL;
				1578	return;
				1579	}
				1580
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1581	if (VG_(clo_trace_pthread)) {
				1582	VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
				1583	mid, mutex );
				1584	print_pthread_event(tid, msg_buf);
				1585	}
				1586
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1587	/* Assume tid valid */
				1588	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1589
				1590	/* Barf if we don't currently hold the mutex. */
				1591	if (!vg_mutexes[mid].held \|\| vg_mutexes[mid].owner != tid) {
				1592	vg_threads[tid].m_edx = EPERM;
				1593	return;
				1594	}
				1595
				1596	/* Find some arbitrary thread waiting on this mutex, and make it
				1597	runnable. If none are waiting, mark the mutex as not held. */
				1598	for (i = 0; i < VG_N_THREADS; i++) {
				1599	if (vg_threads[i].status == VgTs_Empty)
				1600	continue;
				1601	if (vg_threads[i].status == VgTs_WaitMX
				1602	&& vg_threads[i].waited_on_mid == mid)
				1603	break;
				1604	}
				1605
				1606	vg_assert(i <= VG_N_THREADS);
				1607	if (i == VG_N_THREADS) {
				1608	/* Nobody else is waiting on it. */
				1609	vg_mutexes[mid].held = False;
				1610	} else {
				1611	/* Notionally transfer the hold to thread i, whose
				1612	pthread_mutex_lock() call now returns with 0 (success). */
				1613	vg_mutexes[mid].owner = i;
				1614	vg_threads[i].status = VgTs_Runnable;
				1615	vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1616
				1617	if (VG_(clo_trace_pthread)) {
				1618	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
				1619	mid );
				1620	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1621	}
				1622	}
				1623
				1624	/* In either case, our (tid's) pth_unlock() returns with 0
				1625	(success). */
				1626	vg_threads[tid].m_edx = 0; /* Success. */
				1627	}
				1628
				1629
				1630	static void do_pthread_mutex_destroy ( ThreadId tid,
				1631	pthread_mutex_t *mutex )
				1632	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1633	MutexId mid;
				1634	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1635
				1636	if (mutex == NULL
				1637	\|\| mutex->__m_count != 1) {
				1638	vg_threads[tid].m_edx = EINVAL;
				1639	return;
				1640	}
				1641
				1642	mid = mutex->__m_reserved;
				1643	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1644	vg_threads[tid].m_edx = EINVAL;
				1645	return;
				1646	}
				1647
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1648	if (VG_(clo_trace_pthread)) {
				1649	VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
				1650	mid, mutex );
				1651	print_pthread_event(tid, msg_buf);
				1652	}
				1653
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1654	/* Assume tid valid */
				1655	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1656
				1657	/* Barf if the mutex is currently held. */
				1658	if (vg_mutexes[mid].held) {
				1659	vg_threads[tid].m_edx = EBUSY;
				1660	return;
				1661	}
				1662
				1663	mutex->__m_count = 0; /* uninitialised */
				1664	vg_mutexes[mid].in_use = False;
				1665	vg_threads[tid].m_edx = 0;
				1666	}
				1667
				1668
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame^]	1669	/* vthread tid is returning from a signal handler; modify its
				1670	stack/regs accordingly. */
				1671	static
				1672	void handle_signal_return ( ThreadId tid )
				1673	{
				1674	Char msg_buf[100];
				1675	Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
				1676
				1677	if (restart_blocked_syscalls)
				1678	/* Easy; we don't have to do anything. */
				1679	return;
				1680
				1681	if (vg_threads[tid].status == VgTs_WaitFD) {
				1682	vg_assert(vg_threads[tid].m_eax == __NR_read
				1683	\|\| vg_threads[tid].m_eax == __NR_write);
				1684	/* read() or write() interrupted. Force a return with EINTR. */
				1685	vg_threads[tid].m_eax = -VKI_EINTR;
				1686	vg_threads[tid].status = VgTs_Runnable;
				1687	if (VG_(clo_trace_sched)) {
				1688	VG_(sprintf)(msg_buf,
				1689	"read() / write() interrupted by signal; return EINTR" );
				1690	print_sched_event(tid, msg_buf);
				1691	}
				1692	return;
				1693	}
				1694
				1695	if (vg_threads[tid].status == VgTs_WaitFD) {
				1696	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				1697	/* We interrupted a nanosleep(). The right thing to do is to
				1698	write the unused time to nanosleep's second param and return
				1699	EINTR, but I'm too lazy for that. */
				1700	return;
				1701	}
				1702
				1703	/* All other cases? Just return. */
				1704	}
				1705
				1706
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1707	/* ---------------------------------------------------------------------
				1708	Handle non-trivial client requests.
				1709	------------------------------------------------------------------ */
				1710
				1711	static
				1712	void do_nontrivial_clientreq ( ThreadId tid )
				1713	{
				1714	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				1715	UInt req_no = arg[0];
				1716	switch (req_no) {
				1717
				1718	case VG_USERREQ__PTHREAD_CREATE:
				1719	do_pthread_create( tid,
				1720	(pthread_t*)arg[1],
				1721	(pthread_attr_t*)arg[2],
				1722	(void()(void*))arg[3],
				1723	(void*)arg[4] );
				1724	break;
				1725
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1726	case VG_USERREQ__PTHREAD_RETURNS:
				1727	handle_pthread_return( tid, (void*)arg[1] );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1728	break;
				1729
				1730	case VG_USERREQ__PTHREAD_JOIN:
				1731	do_pthread_join( tid, arg[1], (void**)(arg[2]) );
				1732	break;
				1733
				1734	/* Sigh ... this probably will cause huge numbers of major
				1735	(expensive) scheduling events, for no real reason.
				1736	Perhaps should be classified as a trivial-request. */
				1737	case VG_USERREQ__PTHREAD_GET_THREADID:
				1738	vg_threads[tid].m_edx = tid;
				1739	break;
				1740
				1741	case VG_USERREQ__PTHREAD_MUTEX_INIT:
				1742	do_pthread_mutex_init( tid,
				1743	(pthread_mutex_t *)(arg[1]),
				1744	(pthread_mutexattr_t *)(arg[2]) );
				1745	break;
				1746
				1747	case VG_USERREQ__PTHREAD_MUTEX_LOCK:
				1748	do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
				1749	break;
				1750
				1751	case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
				1752	do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
				1753	break;
				1754
				1755	case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
				1756	do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
				1757	break;
				1758
				1759	case VG_USERREQ__PTHREAD_CANCEL:
				1760	do_pthread_cancel( tid, (pthread_t)(arg[1]) );
				1761	break;
				1762
				1763	case VG_USERREQ__MAKE_NOACCESS:
				1764	case VG_USERREQ__MAKE_WRITABLE:
				1765	case VG_USERREQ__MAKE_READABLE:
				1766	case VG_USERREQ__DISCARD:
				1767	case VG_USERREQ__CHECK_WRITABLE:
				1768	case VG_USERREQ__CHECK_READABLE:
				1769	case VG_USERREQ__MAKE_NOACCESS_STACK:
				1770	case VG_USERREQ__RUNNING_ON_VALGRIND:
				1771	case VG_USERREQ__DO_LEAK_CHECK:
				1772	vg_threads[tid].m_edx = VG_(handle_client_request) ( arg );
				1773	break;
				1774
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame^]	1775	case VG_USERREQ__SIGNAL_RETURNS:
				1776	handle_signal_return(tid);
				1777	break;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1778
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1779	default:
				1780	VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
				1781	VG_(panic)("handle_private_client_pthread_request: "
				1782	"unknown request");
				1783	/NOTREACHED/
				1784	break;
				1785	}
				1786	}
				1787
				1788
				1789	/--------------------------------------------------------------------/
				1790	/--- end vg_scheduler.c ---/
				1791	/--------------------------------------------------------------------/