Blame - vg_scheduler.c - fp2-dev/platform/external/valgrind

blob: 330bbde3f3bd405adc4d021e21097628af8ed611 [file] [log] [blame]

sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- A user-space pthreads implementation. vg_scheduler.c ---/
				4	/--------------------------------------------------------------------/
				5
				6	/*
				7	This file is part of Valgrind, an x86 protected-mode emulator
				8	designed for debugging and profiling binaries on x86-Unixes.
				9
				10	Copyright (C) 2000-2002 Julian Seward
				11	jseward@acm.org
				12	Julian_Seward@muraroa.demon.co.uk
				13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				27	02111-1307, USA.
				28
				29	The GNU General Public License is contained in the file LICENSE.
				30	*/
				31
				32	#include "vg_include.h"
				33	#include "vg_constants.h"
				34
				35	#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
				36	VG_USERREQ__DO_LEAK_CHECK */
				37
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	38	/* BORKAGE/ISSUES as of 14 Apr 02
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	39
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	40	Note! This pthreads implementation is so poor as to not be
				41	suitable for use by anyone at all!
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	42
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	43	- Currently, when a signal is run, just the ThreadStatus.status fields
				44	are saved in the signal frame, along with the CPU state. Question:
				45	should I also save and restore:
				46	ThreadStatus.joiner
				47	ThreadStatus.waited_on_mid
				48	ThreadStatus.awaken_at
				49	ThreadStatus.retval
				50	Currently unsure, and so am not doing so.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	51
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	52	- Signals interrupting read/write and nanosleep: SA_RESTART settings.
				53	Read/write correctly return with EINTR when SA_RESTART isn't
				54	specified and they are interrupted by a signal. nanosleep just
				55	pretends signals don't exist -- should be fixed.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	56
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	57	- Read/write syscall starts: don't crap out when the initial
				58	nonblocking read/write returns an error.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	59
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	60	- Get rid of restrictions re use of sigaltstack; they are no longer
				61	needed.
				62
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	63	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	64
				65
				66	/* ---------------------------------------------------------------------
				67	Types and globals for the scheduler.
				68	------------------------------------------------------------------ */
				69
				70	/* type ThreadId is defined in vg_include.h. */
				71
				72	/* struct ThreadState is defined in vg_include.h. */
				73
				74	/* Private globals. A statically allocated array of threads. */
				75	static ThreadState vg_threads[VG_N_THREADS];
				76
				77
				78	/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
				79	jmp_buf VG_(scheduler_jmpbuf);
				80	/* ... and if so, here's the signal which caused it to do so. */
				81	Int VG_(longjmpd_on_signal);
				82
				83
				84	/* Machinery to keep track of which threads are waiting on which
				85	fds. */
				86	typedef
				87	struct {
				88	/* The thread which made the request. */
				89	ThreadId tid;
				90
				91	/* The next two fields describe the request. */
				92	/* File descriptor waited for. -1 means this slot is not in use */
				93	Int fd;
				94	/* The syscall number the fd is used in. */
				95	Int syscall_no;
				96
				97	/* False => still waiting for select to tell us the fd is ready
				98	to go. True => the fd is ready, but the results have not yet
				99	been delivered back to the calling thread. Once the latter
				100	happens, this entire record is marked as no longer in use, by
				101	making the fd field be -1. */
				102	Bool ready;
				103	}
				104	VgWaitedOnFd;
				105
				106	static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
				107
				108
				109
				110	typedef
				111	struct {
				112	/* Is this slot in use, or free? */
				113	Bool in_use;
				114	/* If in_use, is this mutex held by some thread, or not? */
				115	Bool held;
				116	/* if held==True, owner indicates who by. */
				117	ThreadId owner;
				118	}
				119	VgMutex;
				120
				121	static VgMutex vg_mutexes[VG_N_MUTEXES];
				122
				123	/* Forwards */
				124	static void do_nontrivial_clientreq ( ThreadId tid );
				125
				126
				127	/* ---------------------------------------------------------------------
				128	Helper functions for the scheduler.
				129	------------------------------------------------------------------ */
				130
				131	static
				132	void pp_sched_status ( void )
				133	{
				134	Int i;
				135	VG_(printf)("\nsched status:\n");
				136	for (i = 0; i < VG_N_THREADS; i++) {
				137	if (vg_threads[i].status == VgTs_Empty) continue;
				138	VG_(printf)("tid %d: ", i);
				139	switch (vg_threads[i].status) {
				140	case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
				141	case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
				142	case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
				143	vg_threads[i].joiner); break;
				144	case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	145	case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	146	default: VG_(printf)("???"); break;
				147	}
				148	}
				149	VG_(printf)("\n");
				150	}
				151
				152	static
				153	void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
				154	{
				155	Int i;
				156
				157	vg_assert(fd != -1); /* avoid total chaos */
				158
				159	for (i = 0; i < VG_N_WAITING_FDS; i++)
				160	if (vg_waiting_fds[i].fd == -1)
				161	break;
				162
				163	if (i == VG_N_WAITING_FDS)
				164	VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
				165	/*
				166	VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
				167	tid, fd, i);
				168	*/
				169	vg_waiting_fds[i].fd = fd;
				170	vg_waiting_fds[i].tid = tid;
				171	vg_waiting_fds[i].ready = False;
				172	vg_waiting_fds[i].syscall_no = syscall_no;
				173	}
				174
				175
				176
				177	static
				178	void print_sched_event ( ThreadId tid, Char* what )
				179	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	180	VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
				181	}
				182
				183
				184	static
				185	void print_pthread_event ( ThreadId tid, Char* what )
				186	{
				187	VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	188	}
				189
				190
				191	static
				192	Char* name_of_sched_event ( UInt event )
				193	{
				194	switch (event) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	195	case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
				196	case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
				197	case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
				198	case VG_TRC_INNER_FASTMISS: return "FASTMISS";
				199	case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
				200	default: return "??UNKNOWN??";
				201	}
				202	}
				203
				204
				205	/* Create a translation of the client basic block beginning at
				206	orig_addr, and add it to the translation cache & translation table.
				207	This probably doesn't really belong here, but, hey ...
				208	*/
				209	void VG_(create_translation_for) ( Addr orig_addr )
				210	{
				211	Addr trans_addr;
				212	TTEntry tte;
				213	Int orig_size, trans_size;
				214	/* Ensure there is space to hold a translation. */
				215	VG_(maybe_do_lru_pass)();
				216	VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
				217	/* Copy data at trans_addr into the translation cache.
				218	Returned pointer is to the code, not to the 4-byte
				219	header. */
				220	/* Since the .orig_size and .trans_size fields are
				221	UShort, be paranoid. */
				222	vg_assert(orig_size > 0 && orig_size < 65536);
				223	vg_assert(trans_size > 0 && trans_size < 65536);
				224	tte.orig_size = orig_size;
				225	tte.orig_addr = orig_addr;
				226	tte.trans_size = trans_size;
				227	tte.trans_addr = VG_(copy_to_transcache)
				228	( trans_addr, trans_size );
				229	tte.mru_epoch = VG_(current_epoch);
				230	/* Free the intermediary -- was allocated by VG_(emit_code). */
				231	VG_(jitfree)( (void*)trans_addr );
				232	/* Add to trans tab and set back pointer. */
				233	VG_(add_to_trans_tab) ( &tte );
				234	/* Update stats. */
				235	VG_(this_epoch_in_count) ++;
				236	VG_(this_epoch_in_osize) += orig_size;
				237	VG_(this_epoch_in_tsize) += trans_size;
				238	VG_(overall_in_count) ++;
				239	VG_(overall_in_osize) += orig_size;
				240	VG_(overall_in_tsize) += trans_size;
				241	/* Record translated area for SMC detection. */
				242	VG_(smc_mark_original) ( orig_addr, orig_size );
				243	}
				244
				245
				246	/* Allocate a completely empty ThreadState record. */
				247	static
				248	ThreadId vg_alloc_ThreadState ( void )
				249	{
				250	Int i;
				251	for (i = 0; i < VG_N_THREADS; i++) {
				252	if (vg_threads[i].status == VgTs_Empty)
				253	return i;
				254	}
				255	VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
				256	VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
				257	VG_(panic)("VG_N_THREADS is too low");
				258	/NOTREACHED/
				259	}
				260
				261
				262	ThreadState* VG_(get_thread_state) ( ThreadId tid )
				263	{
				264	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				265	vg_assert(vg_threads[tid].status != VgTs_Empty);
				266	return & vg_threads[tid];
				267	}
				268
				269
				270	/* Find an unused VgMutex record. */
				271	static
				272	MutexId vg_alloc_VgMutex ( void )
				273	{
				274	Int i;
				275	for (i = 0; i < VG_N_MUTEXES; i++) {
				276	if (!vg_mutexes[i].in_use)
				277	return i;
				278	}
				279	VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
				280	VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
				281	VG_(panic)("VG_N_MUTEXES is too low");
				282	/NOTREACHED/
				283	}
				284
				285
				286	/* Copy the saved state of a thread into VG_(baseBlock), ready for it
				287	to be run. */
				288	__inline__
				289	void VG_(load_thread_state) ( ThreadId tid )
				290	{
				291	Int i;
				292	VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
				293	VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
				294	VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
				295	VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
				296	VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
				297	VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
				298	VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
				299	VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
				300	VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
				301	VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
				302
				303	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				304	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
				305
				306	VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
				307	VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
				308	VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
				309	VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
				310	VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
				311	VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
				312	VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
				313	VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
				314	VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
				315	}
				316
				317
				318	/* Copy the state of a thread from VG_(baseBlock), presumably after it
				319	has been descheduled. For sanity-check purposes, fill the vacated
				320	VG_(baseBlock) with garbage so as to make the system more likely to
				321	fail quickly if we erroneously continue to poke around inside
				322	VG_(baseBlock) without first doing a load_thread_state().
				323	*/
				324	__inline__
				325	void VG_(save_thread_state) ( ThreadId tid )
				326	{
				327	Int i;
				328	const UInt junk = 0xDEADBEEF;
				329
				330	vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
				331	vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
				332	vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
				333	vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
				334	vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
				335	vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
				336	vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
				337	vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				338	vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
				339	vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
				340
				341	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				342	vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
				343
				344	vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
				345	vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
				346	vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
				347	vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
				348	vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
				349	vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
				350	vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
				351	vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
				352	vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
				353
				354	/* Fill it up with junk. */
				355	VG_(baseBlock)[VGOFF_(m_eax)] = junk;
				356	VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
				357	VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
				358	VG_(baseBlock)[VGOFF_(m_edx)] = junk;
				359	VG_(baseBlock)[VGOFF_(m_esi)] = junk;
				360	VG_(baseBlock)[VGOFF_(m_edi)] = junk;
				361	VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
				362	VG_(baseBlock)[VGOFF_(m_esp)] = junk;
				363	VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
				364	VG_(baseBlock)[VGOFF_(m_eip)] = junk;
				365
				366	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				367	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
				368	}
				369
				370
				371	/* Run the thread tid for a while, and return a VG_TRC_* value to the
				372	scheduler indicating what happened. */
				373	static
				374	UInt run_thread_for_a_while ( ThreadId tid )
				375	{
				376	UInt trc = 0;
				377	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				378	vg_assert(vg_threads[tid].status != VgTs_Empty);
				379	vg_assert(VG_(bbs_to_go) > 0);
				380
				381	VG_(load_thread_state) ( tid );
				382	if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
				383	/* try this ... */
				384	trc = VG_(run_innerloop)();
				385	/* We get here if the client didn't take a fault. */
				386	} else {
				387	/* We get here if the client took a fault, which caused our
				388	signal handler to longjmp. */
				389	vg_assert(trc == 0);
				390	trc = VG_TRC_UNRESUMABLE_SIGNAL;
				391	}
				392	VG_(save_thread_state) ( tid );
				393	return trc;
				394	}
				395
				396
				397	/* Increment the LRU epoch counter. */
				398	static
				399	void increment_epoch ( void )
				400	{
				401	VG_(current_epoch)++;
				402	if (VG_(clo_verbosity) > 2) {
				403	UInt tt_used, tc_used;
				404	VG_(get_tt_tc_used) ( &tt_used, &tc_used );
				405	VG_(message)(Vg_UserMsg,
				406	"%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
				407	VG_(bbs_done),
				408	VG_(this_epoch_in_count),
				409	VG_(this_epoch_in_osize),
				410	VG_(this_epoch_in_tsize),
				411	VG_(this_epoch_out_count),
				412	VG_(this_epoch_out_osize),
				413	VG_(this_epoch_out_tsize),
				414	tt_used, tc_used
				415	);
				416	}
				417	VG_(this_epoch_in_count) = 0;
				418	VG_(this_epoch_in_osize) = 0;
				419	VG_(this_epoch_in_tsize) = 0;
				420	VG_(this_epoch_out_count) = 0;
				421	VG_(this_epoch_out_osize) = 0;
				422	VG_(this_epoch_out_tsize) = 0;
				423	}
				424
				425
				426	/* Initialise the scheduler. Create a single "main" thread ready to
				427	run, with special ThreadId of zero. This is called at startup; the
				428	caller takes care to park the client's state is parked in
				429	VG_(baseBlock).
				430	*/
				431	void VG_(scheduler_init) ( void )
				432	{
				433	Int i;
				434	Addr startup_esp;
				435	ThreadId tid_main;
				436
				437	startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				438	if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	439	VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
				440	(void)startup_esp, (void)VG_STARTUP_STACK_MASK);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	441	VG_(panic)("unexpected %esp at startup");
				442	}
				443
				444	for (i = 0; i < VG_N_THREADS; i++) {
				445	vg_threads[i].stack_size = 0;
				446	vg_threads[i].stack_base = (Addr)NULL;
				447	}
				448
				449	for (i = 0; i < VG_N_WAITING_FDS; i++)
				450	vg_waiting_fds[i].fd = -1; /* not in use */
				451
				452	for (i = 0; i < VG_N_MUTEXES; i++)
				453	vg_mutexes[i].in_use = False;
				454
				455	/* Assert this is thread zero, which has certain magic
				456	properties. */
				457	tid_main = vg_alloc_ThreadState();
				458	vg_assert(tid_main == 0);
				459
				460	vg_threads[tid_main].status = VgTs_Runnable;
				461	vg_threads[tid_main].joiner = VG_INVALID_THREADID;
				462	vg_threads[tid_main].retval = NULL; /* not important */
				463
				464	/* Copy VG_(baseBlock) state to tid_main's slot. */
				465	VG_(save_thread_state) ( tid_main );
				466	}
				467
				468
				469	/* What if fd isn't a valid fd? */
				470	static
				471	void set_fd_nonblocking ( Int fd )
				472	{
				473	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				474	vg_assert(!VG_(is_kerror)(res));
				475	res \|= VKI_O_NONBLOCK;
				476	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				477	vg_assert(!VG_(is_kerror)(res));
				478	}
				479
				480	static
				481	void set_fd_blocking ( Int fd )
				482	{
				483	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				484	vg_assert(!VG_(is_kerror)(res));
				485	res &= ~VKI_O_NONBLOCK;
				486	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				487	vg_assert(!VG_(is_kerror)(res));
				488	}
				489
				490	static
				491	Bool fd_is_blockful ( Int fd )
				492	{
				493	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				494	vg_assert(!VG_(is_kerror)(res));
				495	return (res & VKI_O_NONBLOCK) ? False : True;
				496	}
				497
				498
				499
				500	/* Do a purely thread-local request for tid, and put the result in its
				501	%EDX, without changing its scheduling state in any way, nor that of
				502	any other threads. Return True if so.
				503
				504	If the request is non-trivial, return False; a more capable but
				505	slower mechanism will deal with it.
				506	*/
				507	static
				508	Bool maybe_do_trivial_clientreq ( ThreadId tid )
				509	{
				510	# define SIMPLE_RETURN(vvv) \
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	511	{ tst->m_edx = (vvv); \
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	512	return True; \
				513	}
				514
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	515	ThreadState* tst = &vg_threads[tid];
				516	UInt* arg = (UInt*)(tst->m_eax);
				517	UInt req_no = arg[0];
				518
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	519	switch (req_no) {
				520	case VG_USERREQ__MALLOC:
				521	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	522	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	523	);
				524	case VG_USERREQ__BUILTIN_NEW:
				525	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	526	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	527	);
				528	case VG_USERREQ__BUILTIN_VEC_NEW:
				529	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	530	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	531	);
				532	case VG_USERREQ__FREE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	533	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	534	SIMPLE_RETURN(0); /* irrelevant */
				535	case VG_USERREQ__BUILTIN_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	536	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	537	SIMPLE_RETURN(0); /* irrelevant */
				538	case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	539	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	540	SIMPLE_RETURN(0); /* irrelevant */
				541	case VG_USERREQ__CALLOC:
				542	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	543	(UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	544	);
				545	case VG_USERREQ__REALLOC:
				546	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	547	(UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	548	);
				549	case VG_USERREQ__MEMALIGN:
				550	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	551	(UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	552	);
				553	default:
				554	/* Too hard; wimp out. */
				555	return False;
				556	}
				557	# undef SIMPLE_RETURN
				558	}
				559
				560
				561	static
				562	void sched_do_syscall ( ThreadId tid )
				563	{
				564	UInt saved_eax;
				565	UInt res, syscall_no;
				566	UInt fd;
				567	Bool might_block, assumed_nonblocking;
				568	Bool orig_fd_blockness;
				569	Char msg_buf[100];
				570
				571	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				572	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				573
				574	syscall_no = vg_threads[tid].m_eax; /* syscall number */
				575
				576	if (syscall_no == __NR_nanosleep) {
				577	ULong t_now, t_awaken;
				578	struct vki_timespec* req;
				579	req = (struct vki_timespec)vg_threads[tid].m_ebx; / arg1 */
				580	t_now = VG_(read_microsecond_timer)();
				581	t_awaken
				582	= t_now
				583	+ (ULong)1000000ULL * (ULong)(req->tv_sec)
				584	+ (ULong)( (UInt)(req->tv_nsec) / 1000 );
				585	vg_threads[tid].status = VgTs_Sleeping;
				586	vg_threads[tid].awaken_at = t_awaken;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	587	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	588	VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
				589	t_now, t_awaken-t_now);
				590	print_sched_event(tid, msg_buf);
				591	}
				592	/* Force the scheduler to run something else for a while. */
				593	return;
				594	}
				595
				596	switch (syscall_no) {
				597	case __NR_read:
				598	case __NR_write:
				599	assumed_nonblocking
				600	= False;
				601	might_block
				602	= fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
				603	break;
				604	default:
				605	might_block = False;
				606	assumed_nonblocking = True;
				607	}
				608
				609	if (assumed_nonblocking) {
				610	/* We think it's non-blocking. Just do it in the normal way. */
				611	VG_(perform_assumed_nonblocking_syscall)(tid);
				612	/* The thread is still runnable. */
				613	return;
				614	}
				615
				616	/* It might block. Take evasive action. */
				617	switch (syscall_no) {
				618	case __NR_read:
				619	case __NR_write:
				620	fd = vg_threads[tid].m_ebx; break;
				621	default:
				622	vg_assert(3+3 == 7);
				623	}
				624
				625	/* Set the fd to nonblocking, and do the syscall, which will return
				626	immediately, in order to lodge a request with the Linux kernel.
				627	We later poll for I/O completion using select(). */
				628
				629	orig_fd_blockness = fd_is_blockful(fd);
				630	set_fd_nonblocking(fd);
				631	vg_assert(!fd_is_blockful(fd));
				632	VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
				633
				634	/* This trashes the thread's %eax; we have to preserve it. */
				635	saved_eax = vg_threads[tid].m_eax;
				636	KERNEL_DO_SYSCALL(tid,res);
				637
				638	/* Restore original blockfulness of the fd. */
				639	if (orig_fd_blockness)
				640	set_fd_blocking(fd);
				641	else
				642	set_fd_nonblocking(fd);
				643
				644	if (res != -VKI_EWOULDBLOCK) {
				645	/* It didn't block; it went through immediately. So finish off
				646	in the normal way. Don't restore %EAX, since that now
				647	(correctly) holds the result of the call. */
				648	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				649	/* We're still runnable. */
				650	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				651
				652	} else {
				653
				654	/* It would have blocked. First, restore %EAX to what it was
				655	before our speculative call. */
				656	vg_threads[tid].m_eax = saved_eax;
				657	/* Put this fd in a table of fds on which we are waiting for
				658	completion. The arguments for select() later are constructed
				659	from this table. */
				660	add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
				661	/* Deschedule thread until an I/O completion happens. */
				662	vg_threads[tid].status = VgTs_WaitFD;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	663	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	664	VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
				665	print_sched_event(tid, msg_buf);
				666	}
				667
				668	}
				669	}
				670
				671
				672	/* Find out which of the fds in vg_waiting_fds are now ready to go, by
				673	making enquiries with select(), and mark them as ready. We have to
				674	wait for the requesting threads to fall into the the WaitFD state
				675	before we can actually finally deliver the results, so this
				676	procedure doesn't do that; complete_blocked_syscalls() does it.
				677
				678	It might seem odd that a thread which has done a blocking syscall
				679	is not in WaitFD state; the way this can happen is if it initially
				680	becomes WaitFD, but then a signal is delivered to it, so it becomes
				681	Runnable for a while. In this case we have to wait for the
				682	sighandler to return, whereupon the WaitFD state is resumed, and
				683	only at that point can the I/O result be delivered to it. However,
				684	this point may be long after the fd is actually ready.
				685
				686	So, poll_for_ready_fds() merely detects fds which are ready.
				687	complete_blocked_syscalls() does the second half of the trick,
				688	possibly much later: it delivers the results from ready fds to
				689	threads in WaitFD state.
				690	*/
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	691	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	692	void poll_for_ready_fds ( void )
				693	{
				694	vki_ksigset_t saved_procmask;
				695	vki_fd_set readfds;
				696	vki_fd_set writefds;
				697	vki_fd_set exceptfds;
				698	struct vki_timeval timeout;
				699	Int fd, fd_max, i, n_ready, syscall_no, n_ok;
				700	ThreadId tid;
				701	Bool rd_ok, wr_ok, ex_ok;
				702	Char msg_buf[100];
				703
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	704	struct vki_timespec* rem;
				705	ULong t_now;
				706
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	707	/* Awaken any sleeping threads whose sleep has expired. */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	708	t_now = VG_(read_microsecond_timer)();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	709	for (tid = 0; tid < VG_N_THREADS; tid++) {
				710	if (vg_threads[tid].status != VgTs_Sleeping)
				711	continue;
				712	if (t_now >= vg_threads[tid].awaken_at) {
				713	/* Resume this thread. Set to zero the remaining-time (second)
				714	arg of nanosleep, since it's used up all its time. */
				715	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				716	rem = (struct vki_timespec )vg_threads[tid].m_ecx; / arg2 */
				717	if (rem != NULL) {
				718	rem->tv_sec = 0;
				719	rem->tv_nsec = 0;
				720	}
				721	/* Make the syscall return 0 (success). */
				722	vg_threads[tid].m_eax = 0;
				723	/* Reschedule this thread. */
				724	vg_threads[tid].status = VgTs_Runnable;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	725	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	726	VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
				727	t_now);
				728	print_sched_event(tid, msg_buf);
				729	}
				730	}
				731	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	732
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	733	/* And look for threads waiting on file descriptors which are now
				734	ready for I/O.*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	735	timeout.tv_sec = 0;
				736	timeout.tv_usec = 0;
				737
				738	VKI_FD_ZERO(&readfds);
				739	VKI_FD_ZERO(&writefds);
				740	VKI_FD_ZERO(&exceptfds);
				741	fd_max = -1;
				742	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				743	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				744	continue;
				745	if (vg_waiting_fds[i].ready /* already ready? */)
				746	continue;
				747	fd = vg_waiting_fds[i].fd;
				748	/* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	749	vg_assert(fd >= 0);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	750	if (fd > fd_max)
				751	fd_max = fd;
				752	tid = vg_waiting_fds[i].tid;
				753	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				754	syscall_no = vg_waiting_fds[i].syscall_no;
				755	switch (syscall_no) {
				756	case __NR_read:
				757	VKI_FD_SET(fd, &readfds); break;
				758	case __NR_write:
				759	VKI_FD_SET(fd, &writefds); break;
				760	default:
				761	VG_(panic)("poll_for_ready_fds: unexpected syscall");
				762	/NOTREACHED/
				763	break;
				764	}
				765	}
				766
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	767	/* Short cut: if no fds are waiting, give up now. */
				768	if (fd_max == -1)
				769	return;
				770
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	771	/* BLOCK ALL SIGNALS. We don't want the complication of select()
				772	getting interrupted. */
				773	VG_(block_all_host_signals)( &saved_procmask );
				774
				775	n_ready = VG_(select)
				776	( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
				777	if (VG_(is_kerror)(n_ready)) {
				778	VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
				779	VG_(panic)("poll_for_ready_fds: select failed?!");
				780	/NOTREACHED/
				781	}
				782
				783	/* UNBLOCK ALL SIGNALS */
				784	VG_(restore_host_signals)( &saved_procmask );
				785
				786	/* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
				787
				788	if (n_ready == 0)
				789	return;
				790
				791	/* Inspect all the fds we know about, and handle any completions that
				792	have happened. */
				793	/*
				794	VG_(printf)("\n\n");
				795	for (fd = 0; fd < 100; fd++)
				796	if (VKI_FD_ISSET(fd, &writefds) \|\| VKI_FD_ISSET(fd, &readfds)) {
				797	VG_(printf)("X"); } else { VG_(printf)("."); };
				798	VG_(printf)("\n\nfd_max = %d\n", fd_max);
				799	*/
				800
				801	for (fd = 0; fd <= fd_max; fd++) {
				802	rd_ok = VKI_FD_ISSET(fd, &readfds);
				803	wr_ok = VKI_FD_ISSET(fd, &writefds);
				804	ex_ok = VKI_FD_ISSET(fd, &exceptfds);
				805
				806	n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
				807	if (n_ok == 0)
				808	continue;
				809	if (n_ok > 1) {
				810	VG_(printf)("offending fd = %d\n", fd);
				811	VG_(panic)("poll_for_ready_fds: multiple events on fd");
				812	}
				813
				814	/* An I/O event completed for fd. Find the thread which
				815	requested this. */
				816	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				817	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				818	continue;
				819	if (vg_waiting_fds[i].fd == fd)
				820	break;
				821	}
				822
				823	/* And a bit more paranoia ... */
				824	vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
				825
				826	/* Mark the fd as ready. */
				827	vg_assert(! vg_waiting_fds[i].ready);
				828	vg_waiting_fds[i].ready = True;
				829	}
				830	}
				831
				832
				833	/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	834	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	835	void complete_blocked_syscalls ( void )
				836	{
				837	Int fd, i, res, syscall_no;
				838	ThreadId tid;
				839	Char msg_buf[100];
				840
				841	/* Inspect all the outstanding fds we know about. */
				842
				843	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				844	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				845	continue;
				846	if (! vg_waiting_fds[i].ready)
				847	continue;
				848
				849	fd = vg_waiting_fds[i].fd;
				850	tid = vg_waiting_fds[i].tid;
				851	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				852
				853	/* The thread actually has to be waiting for the I/O event it
				854	requested before we can deliver the result! */
				855	if (vg_threads[tid].status != VgTs_WaitFD)
				856	continue;
				857
				858	/* Ok, actually do it! We can safely use %EAX as the syscall
				859	number, because the speculative call made by
				860	sched_do_syscall() doesn't change %EAX in the case where the
				861	call would have blocked. */
				862
				863	syscall_no = vg_waiting_fds[i].syscall_no;
				864	vg_assert(syscall_no == vg_threads[tid].m_eax);
				865	KERNEL_DO_SYSCALL(tid,res);
				866	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				867
				868	/* Reschedule. */
				869	vg_threads[tid].status = VgTs_Runnable;
				870	/* Mark slot as no longer in use. */
				871	vg_waiting_fds[i].fd = -1;
				872	/* pp_sched_status(); */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	873	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	874	VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
				875	print_sched_event(tid, msg_buf);
				876	}
				877	}
				878	}
				879
				880
				881	static
				882	void nanosleep_for_a_while ( void )
				883	{
				884	Int res;
				885	struct vki_timespec req;
				886	struct vki_timespec rem;
				887	req.tv_sec = 0;
				888	req.tv_nsec = 20 * 1000 * 1000;
				889	res = VG_(nanosleep)( &req, &rem );
				890	/* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
				891	vg_assert(res == 0);
				892	}
				893
				894
				895	/* ---------------------------------------------------------------------
				896	The scheduler proper.
				897	------------------------------------------------------------------ */
				898
				899	/* Run user-space threads until either
				900	* Deadlock occurs
				901	* One thread asks to shutdown Valgrind
				902	* The specified number of basic blocks has gone by.
				903	*/
				904	VgSchedReturnCode VG_(scheduler) ( void )
				905	{
				906	ThreadId tid, tid_next;
				907	UInt trc;
				908	UInt dispatch_ctr_SAVED;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	909	Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	910	Char msg_buf[100];
				911	Addr trans_addr;
				912
				913	/* For the LRU structures, records when the epoch began. */
				914	ULong lru_epoch_started_at = 0;
				915
				916	/* Start with the root thread. tid in general indicates the
				917	currently runnable/just-finished-running thread. */
				918	tid = 0;
				919
				920	/* This is the top level scheduler loop. It falls into three
				921	phases. */
				922	while (True) {
				923
				924	/* ======================= Phase 1 of 3 =======================
				925	Handle I/O completions and signals. This may change the
				926	status of various threads. Then select a new thread to run,
				927	or declare deadlock, or sleep if there are no runnable
				928	threads but some are blocked on I/O. */
				929
				930	/* Age the LRU structures if an epoch has been completed. */
				931	if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
				932	lru_epoch_started_at = VG_(bbs_done);
				933	increment_epoch();
				934	}
				935
				936	/* Was a debug-stop requested? */
				937	if (VG_(bbs_to_go) == 0)
				938	goto debug_stop;
				939
				940	/* Do the following loop until a runnable thread is found, or
				941	deadlock is detected. */
				942	while (True) {
				943
				944	/* For stats purposes only. */
				945	VG_(num_scheduling_events_MAJOR) ++;
				946
				947	/* See if any I/O operations which we were waiting for have
				948	completed, and, if so, make runnable the relevant waiting
				949	threads. */
				950	poll_for_ready_fds();
				951	complete_blocked_syscalls();
				952
				953	/* See if there are any signals which need to be delivered. If
				954	so, choose thread(s) to deliver them to, and build signal
				955	delivery frames on those thread(s) stacks. */
				956	VG_(deliver_signals)( 0 /HACK/ );
				957	VG_(do_sanity_checks)(0 /HACK/, False);
				958
				959	/* Try and find a thread (tid) to run. */
				960	tid_next = tid;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	961	n_in_fdwait_or_sleep = 0;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	962	while (True) {
				963	tid_next++;
				964	if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	965	if (vg_threads[tid_next].status == VgTs_WaitFD
				966	\|\| vg_threads[tid_next].status == VgTs_Sleeping)
				967	n_in_fdwait_or_sleep ++;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	968	if (vg_threads[tid_next].status == VgTs_Runnable)
				969	break; /* We can run this one. */
				970	if (tid_next == tid)
				971	break; /* been all the way round */
				972	}
				973	tid = tid_next;
				974
				975	if (vg_threads[tid].status == VgTs_Runnable) {
				976	/* Found a suitable candidate. Fall out of this loop, so
				977	we can advance to stage 2 of the scheduler: actually
				978	running the thread. */
				979	break;
				980	}
				981
				982	/* We didn't find a runnable thread. Now what? */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	983	if (n_in_fdwait_or_sleep == 0) {
				984	/* No runnable threads and no prospect of any appearing
				985	even if we wait for an arbitrary length of time. In
				986	short, we have a deadlock. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	987	pp_sched_status();
				988	return VgSrc_Deadlock;
				989	}
				990
				991	/* At least one thread is in a fd-wait state. Delay for a
				992	while, and go round again, in the hope that eventually a
				993	thread becomes runnable. */
				994	nanosleep_for_a_while();
				995	// pp_sched_status();
				996	// VG_(printf)(".\n");
				997	}
				998
				999
				1000	/* ======================= Phase 2 of 3 =======================
				1001	Wahey! We've finally decided that thread tid is runnable, so
				1002	we now do that. Run it for as much of a quanta as possible.
				1003	Trivial requests are handled and the thread continues. The
				1004	aim is not to do too many of Phase 1 since it is expensive. */
				1005
				1006	if (0)
				1007	VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
				1008
				1009	/* Figure out how many bbs to ask vg_run_innerloop to do. Note
				1010	that it decrements the counter before testing it for zero, so
				1011	that if VG_(dispatch_ctr) is set to N you get at most N-1
				1012	iterations. Also this means that VG_(dispatch_ctr) must
				1013	exceed zero before entering the innerloop. Also also, the
				1014	decrement is done before the bb is actually run, so you
				1015	always get at least one decrement even if nothing happens.
				1016	*/
				1017	if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
				1018	VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
				1019	else
				1020	VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
				1021
				1022	/* ... and remember what we asked for. */
				1023	dispatch_ctr_SAVED = VG_(dispatch_ctr);
				1024
				1025	/* Actually run thread tid. */
				1026	while (True) {
				1027
				1028	/* For stats purposes only. */
				1029	VG_(num_scheduling_events_MINOR) ++;
				1030
				1031	if (0)
				1032	VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
				1033	tid, VG_(dispatch_ctr) - 1 );
				1034
				1035	trc = run_thread_for_a_while ( tid );
				1036
				1037	/* Deal quickly with trivial scheduling events, and resume the
				1038	thread. */
				1039
				1040	if (trc == VG_TRC_INNER_FASTMISS) {
				1041	vg_assert(VG_(dispatch_ctr) > 0);
				1042
				1043	/* Trivial event. Miss in the fast-cache. Do a full
				1044	lookup for it. */
				1045	trans_addr
				1046	= VG_(search_transtab) ( vg_threads[tid].m_eip );
				1047	if (trans_addr == (Addr)0) {
				1048	/* Not found; we need to request a translation. */
				1049	VG_(create_translation_for)( vg_threads[tid].m_eip );
				1050	trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
				1051	if (trans_addr == (Addr)0)
				1052	VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
				1053	}
				1054	continue; /* with this thread */
				1055	}
				1056
				1057	if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
				1058	Bool is_triv = maybe_do_trivial_clientreq(tid);
				1059	if (is_triv) {
				1060	/* NOTE: a trivial request is something like a call to
				1061	malloc() or free(). It DOES NOT change the
				1062	Runnability of this thread nor the status of any
				1063	other thread; it is purely thread-local. */
				1064	continue; /* with this thread */
				1065	}
				1066	}
				1067
				1068	/* It's a non-trivial event. Give up running this thread and
				1069	handle things the expensive way. */
				1070	break;
				1071	}
				1072
				1073	/* ======================= Phase 3 of 3 =======================
				1074	Handle non-trivial thread requests, mostly pthread stuff. */
				1075
				1076	/* Ok, we've fallen out of the dispatcher for a
				1077	non-completely-trivial reason. First, update basic-block
				1078	counters. */
				1079
				1080	done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
				1081	vg_assert(done_this_time >= 0);
				1082	VG_(bbs_to_go) -= (ULong)done_this_time;
				1083	VG_(bbs_done) += (ULong)done_this_time;
				1084
				1085	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1086	VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
				1087	tid, done_this_time, (Int)trc );
				1088
				1089	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1090	VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
				1091	tid, VG_(bbs_done),
				1092	name_of_sched_event(trc) );
				1093
				1094	/* Examine the thread's return code to figure out why it
				1095	stopped, and handle requests. */
				1096
				1097	switch (trc) {
				1098
				1099	case VG_TRC_INNER_FASTMISS:
				1100	VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
				1101	/NOTREACHED/
				1102	break;
				1103
				1104	case VG_TRC_INNER_COUNTERZERO:
				1105	/* Timeslice is out. Let a new thread be scheduled,
				1106	simply by doing nothing, causing us to arrive back at
				1107	Phase 1. */
				1108	if (VG_(bbs_to_go) == 0) {
				1109	goto debug_stop;
				1110	}
				1111	vg_assert(VG_(dispatch_ctr) == 0);
				1112	break;
				1113
				1114	case VG_TRC_UNRESUMABLE_SIGNAL:
				1115	/* It got a SIGSEGV/SIGBUS, which we need to deliver right
				1116	away. Again, do nothing, so we wind up back at Phase
				1117	1, whereupon the signal will be "delivered". */
				1118	break;
				1119
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1120	case VG_TRC_EBP_JMP_SYSCALL:
				1121	/* Do a syscall for the vthread tid. This could cause it
				1122	to become non-runnable. */
				1123	sched_do_syscall(tid);
				1124	break;
				1125
				1126	case VG_TRC_EBP_JMP_CLIENTREQ:
				1127	/* Do a client request for the vthread tid. Note that
				1128	some requests will have been handled by
				1129	maybe_do_trivial_clientreq(), so we don't expect to see
				1130	those here.
				1131	*/
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1132	/* The thread's %EAX points at an arg block, the first
				1133	word of which is the request code. */
				1134	request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1135	if (0) {
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1136	VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1137	print_sched_event(tid, msg_buf);
				1138	}
				1139	/* Do a non-trivial client request for thread tid. tid's
				1140	%EAX points to a short vector of argument words, the
				1141	first of which is the request code. The result of the
				1142	request is put in tid's %EDX. Alternatively, perhaps
				1143	the request causes tid to become non-runnable and/or
				1144	other blocked threads become runnable. In general we
				1145	can and often do mess with the state of arbitrary
				1146	threads at this point. */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1147	if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
				1148	return VgSrc_Shutdown;
				1149	} else {
				1150	do_nontrivial_clientreq(tid);
				1151	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1152	break;
				1153
				1154	default:
				1155	VG_(printf)("\ntrc = %d\n", trc);
				1156	VG_(panic)("VG_(scheduler), phase 3: "
				1157	"unexpected thread return code");
				1158	/* NOTREACHED */
				1159	break;
				1160
				1161	} /* switch (trc) */
				1162
				1163	/* That completes Phase 3 of 3. Return now to the top of the
				1164	main scheduler loop, to Phase 1 of 3. */
				1165
				1166	} /* top-level scheduler loop */
				1167
				1168
				1169	/* NOTREACHED */
				1170	VG_(panic)("scheduler: post-main-loop ?!");
				1171	/* NOTREACHED */
				1172
				1173	debug_stop:
				1174	/* If we exited because of a debug stop, print the translation
				1175	of the last block executed -- by translating it again, and
				1176	throwing away the result. */
				1177	VG_(printf)(
				1178	"======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
				1179	VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
				1180	VG_(printf)("\n");
				1181	VG_(printf)(
				1182	"======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
				1183
				1184	return VgSrc_BbsDone;
				1185	}
				1186
				1187
				1188	/* ---------------------------------------------------------------------
				1189	The pthread implementation.
				1190	------------------------------------------------------------------ */
				1191
				1192	#include <pthread.h>
				1193	#include <errno.h>
				1194
				1195	#if !defined(PTHREAD_STACK_MIN)
				1196	# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
				1197	#endif
				1198
				1199	/* /usr/include/bits/pthreadtypes.h:
				1200	typedef unsigned long int pthread_t;
				1201	*/
				1202
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1203
				1204	static
				1205	void do_pthread_cancel ( ThreadId tid_canceller,
				1206	pthread_t tid_cancellee )
				1207	{
				1208	Char msg_buf[100];
				1209	/* We want make is appear that this thread has returned to
				1210	do_pthread_create_bogusRA with PTHREAD_CANCELED as the
				1211	return value. So: simple: put PTHREAD_CANCELED into %EAX
				1212	and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1213	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1214	VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
				1215	print_sched_event(tid_cancellee, msg_buf);
				1216	}
				1217	vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1218	vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1219	vg_threads[tid_cancellee].status = VgTs_Runnable;
				1220	}
				1221
				1222
				1223
				1224	/* Thread tid is exiting, by returning from the function it was
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1225	created with. Or possibly due to pthread_exit or cancellation.
				1226	The main complication here is to resume any thread waiting to join
				1227	with this one. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1228	static
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1229	void handle_pthread_return ( ThreadId tid, void* retval )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1230	{
				1231	ThreadId jnr; /* joiner, the thread calling pthread_join. */
				1232	UInt* jnr_args;
				1233	void** jnr_thread_return;
				1234	Char msg_buf[100];
				1235
				1236	/* Mark it as not in use. Leave the stack in place so the next
				1237	user of this slot doesn't reallocate it. */
				1238	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1239	vg_assert(vg_threads[tid].status != VgTs_Empty);
				1240
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1241	vg_threads[tid].retval = retval;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1242
				1243	if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
				1244	/* No one has yet done a join on me */
				1245	vg_threads[tid].status = VgTs_WaitJoiner;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1246	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1247	VG_(sprintf)(msg_buf,
				1248	"root fn returns, waiting for a call pthread_join(%d)",
				1249	tid);
				1250	print_sched_event(tid, msg_buf);
				1251	}
				1252	} else {
				1253	/* Some is waiting; make their join call return with success,
				1254	putting my exit code in the place specified by the caller's
				1255	thread_return param. This is all very horrible, since we
				1256	need to consult the joiner's arg block -- pointed to by its
				1257	%EAX -- in order to extract the 2nd param of its pthread_join
				1258	call. TODO: free properly the slot (also below).
				1259	*/
				1260	jnr = vg_threads[tid].joiner;
				1261	vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
				1262	vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
				1263	jnr_args = (UInt*)vg_threads[jnr].m_eax;
				1264	jnr_thread_return = (void**)(jnr_args[2]);
				1265	if (jnr_thread_return != NULL)
				1266	*jnr_thread_return = vg_threads[tid].retval;
				1267	vg_threads[jnr].m_edx = 0; /* success */
				1268	vg_threads[jnr].status = VgTs_Runnable;
				1269	vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1270	if (VG_(clo_instrument) && tid != 0)
				1271	VGM_(make_noaccess)( vg_threads[tid].stack_base,
				1272	vg_threads[tid].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1273	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1274	VG_(sprintf)(msg_buf,
				1275	"root fn returns, to find a waiting pthread_join(%d)", tid);
				1276	print_sched_event(tid, msg_buf);
				1277	VG_(sprintf)(msg_buf,
				1278	"my pthread_join(%d) returned; resuming", tid);
				1279	print_sched_event(jnr, msg_buf);
				1280	}
				1281	}
				1282
				1283	/* Return value is irrelevant; this thread will not get
				1284	rescheduled. */
				1285	}
				1286
				1287
				1288	static
				1289	void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
				1290	{
				1291	Char msg_buf[100];
				1292
				1293	/* jee, the joinee, is the thread specified as an arg in thread
				1294	tid's call to pthread_join. So tid is the join-er. */
				1295	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1296	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1297
				1298	if (jee == tid) {
				1299	vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
				1300	vg_threads[tid].status = VgTs_Runnable;
				1301	return;
				1302	}
				1303
				1304	if (jee < 0
				1305	\|\| jee >= VG_N_THREADS
				1306	\|\| vg_threads[jee].status == VgTs_Empty) {
				1307	/* Invalid thread to join to. */
				1308	vg_threads[tid].m_edx = EINVAL;
				1309	vg_threads[tid].status = VgTs_Runnable;
				1310	return;
				1311	}
				1312
				1313	if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
				1314	/* Someone already did join on this thread */
				1315	vg_threads[tid].m_edx = EINVAL;
				1316	vg_threads[tid].status = VgTs_Runnable;
				1317	return;
				1318	}
				1319
				1320	/* if (vg_threads[jee].detached) ... */
				1321
				1322	/* Perhaps the joinee has already finished? If so return
				1323	immediately with its return code, and free up the slot. TODO:
				1324	free it properly (also above). */
				1325	if (vg_threads[jee].status == VgTs_WaitJoiner) {
				1326	vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
				1327	vg_threads[tid].m_edx = 0; /* success */
				1328	if (thread_return != NULL)
				1329	*thread_return = vg_threads[jee].retval;
				1330	vg_threads[tid].status = VgTs_Runnable;
				1331	vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1332	if (VG_(clo_instrument) && jee != 0)
				1333	VGM_(make_noaccess)( vg_threads[jee].stack_base,
				1334	vg_threads[jee].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1335	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1336	VG_(sprintf)(msg_buf,
				1337	"someone called pthread_join() on me; bye!");
				1338	print_sched_event(jee, msg_buf);
				1339	VG_(sprintf)(msg_buf,
				1340	"my pthread_join(%d) returned immediately",
				1341	jee );
				1342	print_sched_event(tid, msg_buf);
				1343	}
				1344	return;
				1345	}
				1346
				1347	/* Ok, so we'll have to wait on jee. */
				1348	vg_threads[jee].joiner = tid;
				1349	vg_threads[tid].status = VgTs_WaitJoinee;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1350	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1351	VG_(sprintf)(msg_buf,
				1352	"blocking on call of pthread_join(%d)", jee );
				1353	print_sched_event(tid, msg_buf);
				1354	}
				1355	/* So tid's join call does not return just now. */
				1356	}
				1357
				1358
				1359	static
				1360	void do_pthread_create ( ThreadId parent_tid,
				1361	pthread_t* thread,
				1362	pthread_attr_t* attr,
				1363	void* (start_routine)(void ),
				1364	void* arg )
				1365	{
				1366	Addr new_stack;
				1367	UInt new_stk_szb;
				1368	ThreadId tid;
				1369	Char msg_buf[100];
				1370
				1371	/* Paranoia ... */
				1372	vg_assert(sizeof(pthread_t) == sizeof(UInt));
				1373
				1374	vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
				1375
				1376	tid = vg_alloc_ThreadState();
				1377
				1378	/* If we've created the main thread's tid, we're in deep trouble :) */
				1379	vg_assert(tid != 0);
				1380
				1381	/* Copy the parent's CPU state into the child's, in a roundabout
				1382	way (via baseBlock). */
				1383	VG_(load_thread_state)(parent_tid);
				1384	VG_(save_thread_state)(tid);
				1385
				1386	/* Consider allocating the child a stack, if the one it already has
				1387	is inadequate. */
				1388	new_stk_szb = PTHREAD_STACK_MIN;
				1389
				1390	if (new_stk_szb > vg_threads[tid].stack_size) {
				1391	/* Again, for good measure :) We definitely don't want to be
				1392	allocating a stack for the main thread. */
				1393	vg_assert(tid != 0);
				1394	/* for now, we don't handle the case of anything other than
				1395	assigning it for the first time. */
				1396	vg_assert(vg_threads[tid].stack_size == 0);
				1397	vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
				1398	new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
				1399	vg_threads[tid].stack_base = new_stack;
				1400	vg_threads[tid].stack_size = new_stk_szb;
				1401	vg_threads[tid].m_esp
				1402	= new_stack + new_stk_szb
				1403	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
				1404	}
				1405	if (VG_(clo_instrument))
				1406	VGM_(make_noaccess)( vg_threads[tid].m_esp,
				1407	VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
				1408
				1409	/* push arg */
				1410	vg_threads[tid].m_esp -= 4;
				1411	* (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
				1412
				1413	/* push (magical) return address */
				1414	vg_threads[tid].m_esp -= 4;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1415	* (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1416
				1417	if (VG_(clo_instrument))
				1418	VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
				1419
				1420	/* this is where we start */
				1421	vg_threads[tid].m_eip = (UInt)start_routine;
				1422
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1423	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1424	VG_(sprintf)(msg_buf,
				1425	"new thread, created by %d", parent_tid );
				1426	print_sched_event(tid, msg_buf);
				1427	}
				1428
				1429	/* store the thread id in thread. /
				1430	// if (VG_(clo_instrument))
				1431	// ***** CHECK *thread is writable
				1432	*thread = (pthread_t)tid;
				1433
				1434	/* return zero */
				1435	vg_threads[tid].joiner = VG_INVALID_THREADID;
				1436	vg_threads[tid].status = VgTs_Runnable;
				1437	vg_threads[tid].m_edx = 0; /* success */
				1438	}
				1439
				1440
				1441	/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
				1442	is a struct with at least 5 words:
				1443	typedef struct
				1444	{
				1445	int __m_reserved; -- Reserved for future use
				1446	int __m_count; -- Depth of recursive locking
				1447	_pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
				1448	int __m_kind; -- Mutex kind: fast, recursive or errcheck
				1449	struct _pthread_fastlock __m_lock; -- Underlying fast lock
				1450	} pthread_mutex_t;
				1451	Ours is just a single word, an index into vg_mutexes[].
				1452	For now I'll park it in the __m_reserved field.
				1453
				1454	Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
				1455	a zero __m_count field (see /usr/include/pthread.h). So I'll
				1456	use zero to mean non-inited, and 1 to mean inited.
				1457
				1458	How convenient.
				1459	*/
				1460
				1461	static
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1462	void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1463	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1464	MutexId mid;
				1465	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1466	/* vg_alloc_MutexId aborts if we can't allocate a mutex, for
				1467	whatever reason. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1468	mid = vg_alloc_VgMutex();
				1469	vg_mutexes[mid].in_use = True;
				1470	vg_mutexes[mid].held = False;
				1471	vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
				1472	mutex->__m_reserved = mid;
				1473	mutex->__m_count = 1; /* initialised */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1474	if (VG_(clo_trace_pthread)) {
				1475	VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
				1476	mutex, mid );
				1477	print_pthread_event(tid, msg_buf);
				1478	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1479	}
				1480
				1481	/* Allocate a new MutexId and write it into *mutex. Ideally take
				1482	notice of the attributes in mutexattr. /
				1483	static
				1484	void do_pthread_mutex_init ( ThreadId tid,
				1485	pthread_mutex_t *mutex,
				1486	const pthread_mutexattr_t *mutexattr)
				1487	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1488	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1489	/* Paranoia ... */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1490	vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
				1491
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1492	initialise_mutex(tid, mutex);
				1493
				1494	if (VG_(clo_trace_pthread)) {
				1495	VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
				1496	mutex, mutex->__m_reserved );
				1497	print_pthread_event(tid, msg_buf);
				1498	}
				1499
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1500	/*
				1501	RETURN VALUE
				1502	pthread_mutex_init always returns 0. The other mutex functions
				1503	return 0 on success and a non-zero error code on error.
				1504	*/
				1505	/* THIS THREAD returns with 0. */
				1506	vg_threads[tid].m_edx = 0;
				1507	}
				1508
				1509
				1510	static
				1511	void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
				1512	{
				1513	MutexId mid;
				1514	Char msg_buf[100];
				1515
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1516	/* *mutex contains the MutexId, or one of the magic values
				1517	PTHREAD_MUTEX_INITIALIZER, indicating we need to initialise it
				1518	now. See comment(s) above re use of __m_count to indicated
				1519	initialisation status.
				1520	*/
				1521
				1522	/* POSIX doesn't mandate this, but for sanity ... */
				1523	if (mutex == NULL) {
				1524	vg_threads[tid].m_edx = EINVAL;
				1525	return;
				1526	}
				1527
				1528	if (mutex->__m_count == 0) {
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1529	initialise_mutex(tid, mutex);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1530	}
				1531
				1532	mid = mutex->__m_reserved;
				1533	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1534	vg_threads[tid].m_edx = EINVAL;
				1535	return;
				1536	}
				1537
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1538	if (VG_(clo_trace_pthread)) {
				1539	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
				1540	mid, mutex );
				1541	print_pthread_event(tid, msg_buf);
				1542	}
				1543
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1544	/* Assert initialised. */
				1545	vg_assert(mutex->__m_count == 1);
				1546
				1547	/* Assume tid valid. */
				1548	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1549
				1550	if (vg_mutexes[mid].held) {
				1551	if (vg_mutexes[mid].owner == tid) {
				1552	vg_threads[tid].m_edx = EDEADLK;
				1553	return;
				1554	}
				1555	/* Someone else has it; we have to wait. */
				1556	vg_threads[tid].status = VgTs_WaitMX;
				1557	vg_threads[tid].waited_on_mid = mid;
				1558	/* No assignment to %EDX, since we're blocking. */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1559	if (VG_(clo_trace_pthread)) {
				1560	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
				1561	mid, mutex );
				1562	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1563	}
				1564	} else {
				1565	/* We get it! */
				1566	vg_mutexes[mid].held = True;
				1567	vg_mutexes[mid].owner = tid;
				1568	/* return 0 (success). */
				1569	vg_threads[tid].m_edx = 0;
				1570	}
				1571	}
				1572
				1573
				1574	static
				1575	void do_pthread_mutex_unlock ( ThreadId tid,
				1576	pthread_mutex_t *mutex )
				1577	{
				1578	MutexId mid;
				1579	Int i;
				1580	Char msg_buf[100];
				1581
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1582	if (mutex == NULL
				1583	\|\| mutex->__m_count != 1) {
				1584	vg_threads[tid].m_edx = EINVAL;
				1585	return;
				1586	}
				1587
				1588	mid = mutex->__m_reserved;
				1589	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1590	vg_threads[tid].m_edx = EINVAL;
				1591	return;
				1592	}
				1593
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1594	if (VG_(clo_trace_pthread)) {
				1595	VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
				1596	mid, mutex );
				1597	print_pthread_event(tid, msg_buf);
				1598	}
				1599
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1600	/* Assume tid valid */
				1601	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1602
				1603	/* Barf if we don't currently hold the mutex. */
				1604	if (!vg_mutexes[mid].held \|\| vg_mutexes[mid].owner != tid) {
				1605	vg_threads[tid].m_edx = EPERM;
				1606	return;
				1607	}
				1608
				1609	/* Find some arbitrary thread waiting on this mutex, and make it
				1610	runnable. If none are waiting, mark the mutex as not held. */
				1611	for (i = 0; i < VG_N_THREADS; i++) {
				1612	if (vg_threads[i].status == VgTs_Empty)
				1613	continue;
				1614	if (vg_threads[i].status == VgTs_WaitMX
				1615	&& vg_threads[i].waited_on_mid == mid)
				1616	break;
				1617	}
				1618
				1619	vg_assert(i <= VG_N_THREADS);
				1620	if (i == VG_N_THREADS) {
				1621	/* Nobody else is waiting on it. */
				1622	vg_mutexes[mid].held = False;
				1623	} else {
				1624	/* Notionally transfer the hold to thread i, whose
				1625	pthread_mutex_lock() call now returns with 0 (success). */
				1626	vg_mutexes[mid].owner = i;
				1627	vg_threads[i].status = VgTs_Runnable;
				1628	vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1629
				1630	if (VG_(clo_trace_pthread)) {
				1631	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
				1632	mid );
				1633	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1634	}
				1635	}
				1636
				1637	/* In either case, our (tid's) pth_unlock() returns with 0
				1638	(success). */
				1639	vg_threads[tid].m_edx = 0; /* Success. */
				1640	}
				1641
				1642
				1643	static void do_pthread_mutex_destroy ( ThreadId tid,
				1644	pthread_mutex_t *mutex )
				1645	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1646	MutexId mid;
				1647	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1648
				1649	if (mutex == NULL
				1650	\|\| mutex->__m_count != 1) {
				1651	vg_threads[tid].m_edx = EINVAL;
				1652	return;
				1653	}
				1654
				1655	mid = mutex->__m_reserved;
				1656	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1657	vg_threads[tid].m_edx = EINVAL;
				1658	return;
				1659	}
				1660
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1661	if (VG_(clo_trace_pthread)) {
				1662	VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
				1663	mid, mutex );
				1664	print_pthread_event(tid, msg_buf);
				1665	}
				1666
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1667	/* Assume tid valid */
				1668	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1669
				1670	/* Barf if the mutex is currently held. */
				1671	if (vg_mutexes[mid].held) {
				1672	vg_threads[tid].m_edx = EBUSY;
				1673	return;
				1674	}
				1675
				1676	mutex->__m_count = 0; /* uninitialised */
				1677	vg_mutexes[mid].in_use = False;
				1678	vg_threads[tid].m_edx = 0;
				1679	}
				1680
				1681
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1682	/* vthread tid is returning from a signal handler; modify its
				1683	stack/regs accordingly. */
				1684	static
				1685	void handle_signal_return ( ThreadId tid )
				1686	{
				1687	Char msg_buf[100];
				1688	Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
				1689
				1690	if (restart_blocked_syscalls)
				1691	/* Easy; we don't have to do anything. */
				1692	return;
				1693
				1694	if (vg_threads[tid].status == VgTs_WaitFD) {
				1695	vg_assert(vg_threads[tid].m_eax == __NR_read
				1696	\|\| vg_threads[tid].m_eax == __NR_write);
				1697	/* read() or write() interrupted. Force a return with EINTR. */
				1698	vg_threads[tid].m_eax = -VKI_EINTR;
				1699	vg_threads[tid].status = VgTs_Runnable;
				1700	if (VG_(clo_trace_sched)) {
				1701	VG_(sprintf)(msg_buf,
				1702	"read() / write() interrupted by signal; return EINTR" );
				1703	print_sched_event(tid, msg_buf);
				1704	}
				1705	return;
				1706	}
				1707
				1708	if (vg_threads[tid].status == VgTs_WaitFD) {
				1709	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				1710	/* We interrupted a nanosleep(). The right thing to do is to
				1711	write the unused time to nanosleep's second param and return
				1712	EINTR, but I'm too lazy for that. */
				1713	return;
				1714	}
				1715
				1716	/* All other cases? Just return. */
				1717	}
				1718
				1719
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1720	/* ---------------------------------------------------------------------
				1721	Handle non-trivial client requests.
				1722	------------------------------------------------------------------ */
				1723
				1724	static
				1725	void do_nontrivial_clientreq ( ThreadId tid )
				1726	{
				1727	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				1728	UInt req_no = arg[0];
				1729	switch (req_no) {
				1730
				1731	case VG_USERREQ__PTHREAD_CREATE:
				1732	do_pthread_create( tid,
				1733	(pthread_t*)arg[1],
				1734	(pthread_attr_t*)arg[2],
				1735	(void()(void*))arg[3],
				1736	(void*)arg[4] );
				1737	break;
				1738
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1739	case VG_USERREQ__PTHREAD_RETURNS:
				1740	handle_pthread_return( tid, (void*)arg[1] );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1741	break;
				1742
				1743	case VG_USERREQ__PTHREAD_JOIN:
				1744	do_pthread_join( tid, arg[1], (void**)(arg[2]) );
				1745	break;
				1746
				1747	/* Sigh ... this probably will cause huge numbers of major
				1748	(expensive) scheduling events, for no real reason.
				1749	Perhaps should be classified as a trivial-request. */
				1750	case VG_USERREQ__PTHREAD_GET_THREADID:
				1751	vg_threads[tid].m_edx = tid;
				1752	break;
				1753
				1754	case VG_USERREQ__PTHREAD_MUTEX_INIT:
				1755	do_pthread_mutex_init( tid,
				1756	(pthread_mutex_t *)(arg[1]),
				1757	(pthread_mutexattr_t *)(arg[2]) );
				1758	break;
				1759
				1760	case VG_USERREQ__PTHREAD_MUTEX_LOCK:
				1761	do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
				1762	break;
				1763
				1764	case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
				1765	do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
				1766	break;
				1767
				1768	case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
				1769	do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
				1770	break;
				1771
				1772	case VG_USERREQ__PTHREAD_CANCEL:
				1773	do_pthread_cancel( tid, (pthread_t)(arg[1]) );
				1774	break;
				1775
				1776	case VG_USERREQ__MAKE_NOACCESS:
				1777	case VG_USERREQ__MAKE_WRITABLE:
				1778	case VG_USERREQ__MAKE_READABLE:
				1779	case VG_USERREQ__DISCARD:
				1780	case VG_USERREQ__CHECK_WRITABLE:
				1781	case VG_USERREQ__CHECK_READABLE:
				1782	case VG_USERREQ__MAKE_NOACCESS_STACK:
				1783	case VG_USERREQ__RUNNING_ON_VALGRIND:
				1784	case VG_USERREQ__DO_LEAK_CHECK:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	1785	vg_threads[tid].m_edx
				1786	= VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1787	break;
				1788
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1789	case VG_USERREQ__SIGNAL_RETURNS:
				1790	handle_signal_return(tid);
				1791	break;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1792
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1793	default:
				1794	VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
				1795	VG_(panic)("handle_private_client_pthread_request: "
				1796	"unknown request");
				1797	/NOTREACHED/
				1798	break;
				1799	}
				1800	}
				1801
				1802
				1803	/--------------------------------------------------------------------/
				1804	/--- end vg_scheduler.c ---/
				1805	/--------------------------------------------------------------------/