Blame - vg_scheduler.c - fp2-dev/platform/external/valgrind

blob: 695f086efff3ef56d64d0bdf21661393569d3162 [file] [log] [blame]

sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- A user-space pthreads implementation. vg_scheduler.c ---/
				4	/--------------------------------------------------------------------/
				5
				6	/*
				7	This file is part of Valgrind, an x86 protected-mode emulator
				8	designed for debugging and profiling binaries on x86-Unixes.
				9
				10	Copyright (C) 2000-2002 Julian Seward
				11	jseward@acm.org
				12	Julian_Seward@muraroa.demon.co.uk
				13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				27	02111-1307, USA.
				28
				29	The GNU General Public License is contained in the file LICENSE.
				30	*/
				31
				32	#include "vg_include.h"
				33	#include "vg_constants.h"
				34
				35	#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
				36	VG_USERREQ__DO_LEAK_CHECK */
				37
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	38	/* BORKAGE/ISSUES as of 14 Apr 02
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	39
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	40	Note! This pthreads implementation is so poor as to not be
				41	suitable for use by anyone at all!
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	42
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	43	- Currently, when a signal is run, just the ThreadStatus.status fields
				44	are saved in the signal frame, along with the CPU state. Question:
				45	should I also save and restore:
				46	ThreadStatus.joiner
				47	ThreadStatus.waited_on_mid
				48	ThreadStatus.awaken_at
				49	ThreadStatus.retval
				50	Currently unsure, and so am not doing so.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	51
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	52	- Signals interrupting read/write and nanosleep: SA_RESTART settings.
				53	Read/write correctly return with EINTR when SA_RESTART isn't
				54	specified and they are interrupted by a signal. nanosleep just
				55	pretends signals don't exist -- should be fixed.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	56
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame^]	57	- Read/write syscall starts: don't crap out when the initial
				58	nonblocking read/write returns an error.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	59
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	60	- 0xDEADBEEF syscall errors ... fix.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	61
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	62	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	63
				64
				65	/* ---------------------------------------------------------------------
				66	Types and globals for the scheduler.
				67	------------------------------------------------------------------ */
				68
				69	/* type ThreadId is defined in vg_include.h. */
				70
				71	/* struct ThreadState is defined in vg_include.h. */
				72
				73	/* Private globals. A statically allocated array of threads. */
				74	static ThreadState vg_threads[VG_N_THREADS];
				75
				76
				77	/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
				78	jmp_buf VG_(scheduler_jmpbuf);
				79	/* ... and if so, here's the signal which caused it to do so. */
				80	Int VG_(longjmpd_on_signal);
				81
				82
				83	/* Machinery to keep track of which threads are waiting on which
				84	fds. */
				85	typedef
				86	struct {
				87	/* The thread which made the request. */
				88	ThreadId tid;
				89
				90	/* The next two fields describe the request. */
				91	/* File descriptor waited for. -1 means this slot is not in use */
				92	Int fd;
				93	/* The syscall number the fd is used in. */
				94	Int syscall_no;
				95
				96	/* False => still waiting for select to tell us the fd is ready
				97	to go. True => the fd is ready, but the results have not yet
				98	been delivered back to the calling thread. Once the latter
				99	happens, this entire record is marked as no longer in use, by
				100	making the fd field be -1. */
				101	Bool ready;
				102	}
				103	VgWaitedOnFd;
				104
				105	static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
				106
				107
				108
				109	typedef
				110	struct {
				111	/* Is this slot in use, or free? */
				112	Bool in_use;
				113	/* If in_use, is this mutex held by some thread, or not? */
				114	Bool held;
				115	/* if held==True, owner indicates who by. */
				116	ThreadId owner;
				117	}
				118	VgMutex;
				119
				120	static VgMutex vg_mutexes[VG_N_MUTEXES];
				121
				122	/* Forwards */
				123	static void do_nontrivial_clientreq ( ThreadId tid );
				124
				125
				126	/* ---------------------------------------------------------------------
				127	Helper functions for the scheduler.
				128	------------------------------------------------------------------ */
				129
				130	static
				131	void pp_sched_status ( void )
				132	{
				133	Int i;
				134	VG_(printf)("\nsched status:\n");
				135	for (i = 0; i < VG_N_THREADS; i++) {
				136	if (vg_threads[i].status == VgTs_Empty) continue;
				137	VG_(printf)("tid %d: ", i);
				138	switch (vg_threads[i].status) {
				139	case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
				140	case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
				141	case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
				142	vg_threads[i].joiner); break;
				143	case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame^]	144	case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	145	default: VG_(printf)("???"); break;
				146	}
				147	}
				148	VG_(printf)("\n");
				149	}
				150
				151	static
				152	void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
				153	{
				154	Int i;
				155
				156	vg_assert(fd != -1); /* avoid total chaos */
				157
				158	for (i = 0; i < VG_N_WAITING_FDS; i++)
				159	if (vg_waiting_fds[i].fd == -1)
				160	break;
				161
				162	if (i == VG_N_WAITING_FDS)
				163	VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
				164	/*
				165	VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
				166	tid, fd, i);
				167	*/
				168	vg_waiting_fds[i].fd = fd;
				169	vg_waiting_fds[i].tid = tid;
				170	vg_waiting_fds[i].ready = False;
				171	vg_waiting_fds[i].syscall_no = syscall_no;
				172	}
				173
				174
				175
				176	static
				177	void print_sched_event ( ThreadId tid, Char* what )
				178	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	179	VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
				180	}
				181
				182
				183	static
				184	void print_pthread_event ( ThreadId tid, Char* what )
				185	{
				186	VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	187	}
				188
				189
				190	static
				191	Char* name_of_sched_event ( UInt event )
				192	{
				193	switch (event) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	194	case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
				195	case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
				196	case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
				197	case VG_TRC_INNER_FASTMISS: return "FASTMISS";
				198	case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
				199	default: return "??UNKNOWN??";
				200	}
				201	}
				202
				203
				204	/* Create a translation of the client basic block beginning at
				205	orig_addr, and add it to the translation cache & translation table.
				206	This probably doesn't really belong here, but, hey ...
				207	*/
				208	void VG_(create_translation_for) ( Addr orig_addr )
				209	{
				210	Addr trans_addr;
				211	TTEntry tte;
				212	Int orig_size, trans_size;
				213	/* Ensure there is space to hold a translation. */
				214	VG_(maybe_do_lru_pass)();
				215	VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
				216	/* Copy data at trans_addr into the translation cache.
				217	Returned pointer is to the code, not to the 4-byte
				218	header. */
				219	/* Since the .orig_size and .trans_size fields are
				220	UShort, be paranoid. */
				221	vg_assert(orig_size > 0 && orig_size < 65536);
				222	vg_assert(trans_size > 0 && trans_size < 65536);
				223	tte.orig_size = orig_size;
				224	tte.orig_addr = orig_addr;
				225	tte.trans_size = trans_size;
				226	tte.trans_addr = VG_(copy_to_transcache)
				227	( trans_addr, trans_size );
				228	tte.mru_epoch = VG_(current_epoch);
				229	/* Free the intermediary -- was allocated by VG_(emit_code). */
				230	VG_(jitfree)( (void*)trans_addr );
				231	/* Add to trans tab and set back pointer. */
				232	VG_(add_to_trans_tab) ( &tte );
				233	/* Update stats. */
				234	VG_(this_epoch_in_count) ++;
				235	VG_(this_epoch_in_osize) += orig_size;
				236	VG_(this_epoch_in_tsize) += trans_size;
				237	VG_(overall_in_count) ++;
				238	VG_(overall_in_osize) += orig_size;
				239	VG_(overall_in_tsize) += trans_size;
				240	/* Record translated area for SMC detection. */
				241	VG_(smc_mark_original) ( orig_addr, orig_size );
				242	}
				243
				244
				245	/* Allocate a completely empty ThreadState record. */
				246	static
				247	ThreadId vg_alloc_ThreadState ( void )
				248	{
				249	Int i;
				250	for (i = 0; i < VG_N_THREADS; i++) {
				251	if (vg_threads[i].status == VgTs_Empty)
				252	return i;
				253	}
				254	VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
				255	VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
				256	VG_(panic)("VG_N_THREADS is too low");
				257	/NOTREACHED/
				258	}
				259
				260
				261	ThreadState* VG_(get_thread_state) ( ThreadId tid )
				262	{
				263	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				264	vg_assert(vg_threads[tid].status != VgTs_Empty);
				265	return & vg_threads[tid];
				266	}
				267
				268
				269	/* Find an unused VgMutex record. */
				270	static
				271	MutexId vg_alloc_VgMutex ( void )
				272	{
				273	Int i;
				274	for (i = 0; i < VG_N_MUTEXES; i++) {
				275	if (!vg_mutexes[i].in_use)
				276	return i;
				277	}
				278	VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
				279	VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
				280	VG_(panic)("VG_N_MUTEXES is too low");
				281	/NOTREACHED/
				282	}
				283
				284
				285	/* Copy the saved state of a thread into VG_(baseBlock), ready for it
				286	to be run. */
				287	__inline__
				288	void VG_(load_thread_state) ( ThreadId tid )
				289	{
				290	Int i;
				291	VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
				292	VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
				293	VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
				294	VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
				295	VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
				296	VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
				297	VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
				298	VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
				299	VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
				300	VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
				301
				302	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				303	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
				304
				305	VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
				306	VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
				307	VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
				308	VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
				309	VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
				310	VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
				311	VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
				312	VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
				313	VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
				314	}
				315
				316
				317	/* Copy the state of a thread from VG_(baseBlock), presumably after it
				318	has been descheduled. For sanity-check purposes, fill the vacated
				319	VG_(baseBlock) with garbage so as to make the system more likely to
				320	fail quickly if we erroneously continue to poke around inside
				321	VG_(baseBlock) without first doing a load_thread_state().
				322	*/
				323	__inline__
				324	void VG_(save_thread_state) ( ThreadId tid )
				325	{
				326	Int i;
				327	const UInt junk = 0xDEADBEEF;
				328
				329	vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
				330	vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
				331	vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
				332	vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
				333	vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
				334	vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
				335	vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
				336	vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				337	vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
				338	vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
				339
				340	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				341	vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
				342
				343	vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
				344	vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
				345	vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
				346	vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
				347	vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
				348	vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
				349	vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
				350	vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
				351	vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
				352
				353	/* Fill it up with junk. */
				354	VG_(baseBlock)[VGOFF_(m_eax)] = junk;
				355	VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
				356	VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
				357	VG_(baseBlock)[VGOFF_(m_edx)] = junk;
				358	VG_(baseBlock)[VGOFF_(m_esi)] = junk;
				359	VG_(baseBlock)[VGOFF_(m_edi)] = junk;
				360	VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
				361	VG_(baseBlock)[VGOFF_(m_esp)] = junk;
				362	VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
				363	VG_(baseBlock)[VGOFF_(m_eip)] = junk;
				364
				365	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				366	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
				367	}
				368
				369
				370	/* Run the thread tid for a while, and return a VG_TRC_* value to the
				371	scheduler indicating what happened. */
				372	static
				373	UInt run_thread_for_a_while ( ThreadId tid )
				374	{
				375	UInt trc = 0;
				376	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				377	vg_assert(vg_threads[tid].status != VgTs_Empty);
				378	vg_assert(VG_(bbs_to_go) > 0);
				379
				380	VG_(load_thread_state) ( tid );
				381	if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
				382	/* try this ... */
				383	trc = VG_(run_innerloop)();
				384	/* We get here if the client didn't take a fault. */
				385	} else {
				386	/* We get here if the client took a fault, which caused our
				387	signal handler to longjmp. */
				388	vg_assert(trc == 0);
				389	trc = VG_TRC_UNRESUMABLE_SIGNAL;
				390	}
				391	VG_(save_thread_state) ( tid );
				392	return trc;
				393	}
				394
				395
				396	/* Increment the LRU epoch counter. */
				397	static
				398	void increment_epoch ( void )
				399	{
				400	VG_(current_epoch)++;
				401	if (VG_(clo_verbosity) > 2) {
				402	UInt tt_used, tc_used;
				403	VG_(get_tt_tc_used) ( &tt_used, &tc_used );
				404	VG_(message)(Vg_UserMsg,
				405	"%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
				406	VG_(bbs_done),
				407	VG_(this_epoch_in_count),
				408	VG_(this_epoch_in_osize),
				409	VG_(this_epoch_in_tsize),
				410	VG_(this_epoch_out_count),
				411	VG_(this_epoch_out_osize),
				412	VG_(this_epoch_out_tsize),
				413	tt_used, tc_used
				414	);
				415	}
				416	VG_(this_epoch_in_count) = 0;
				417	VG_(this_epoch_in_osize) = 0;
				418	VG_(this_epoch_in_tsize) = 0;
				419	VG_(this_epoch_out_count) = 0;
				420	VG_(this_epoch_out_osize) = 0;
				421	VG_(this_epoch_out_tsize) = 0;
				422	}
				423
				424
				425	/* Initialise the scheduler. Create a single "main" thread ready to
				426	run, with special ThreadId of zero. This is called at startup; the
				427	caller takes care to park the client's state is parked in
				428	VG_(baseBlock).
				429	*/
				430	void VG_(scheduler_init) ( void )
				431	{
				432	Int i;
				433	Addr startup_esp;
				434	ThreadId tid_main;
				435
				436	startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				437	if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
				438	VG_(printf)("%esp at startup = %p is not near %p; aborting\n",
				439	startup_esp, VG_STARTUP_STACK_MASK);
				440	VG_(panic)("unexpected %esp at startup");
				441	}
				442
				443	for (i = 0; i < VG_N_THREADS; i++) {
				444	vg_threads[i].stack_size = 0;
				445	vg_threads[i].stack_base = (Addr)NULL;
				446	}
				447
				448	for (i = 0; i < VG_N_WAITING_FDS; i++)
				449	vg_waiting_fds[i].fd = -1; /* not in use */
				450
				451	for (i = 0; i < VG_N_MUTEXES; i++)
				452	vg_mutexes[i].in_use = False;
				453
				454	/* Assert this is thread zero, which has certain magic
				455	properties. */
				456	tid_main = vg_alloc_ThreadState();
				457	vg_assert(tid_main == 0);
				458
				459	vg_threads[tid_main].status = VgTs_Runnable;
				460	vg_threads[tid_main].joiner = VG_INVALID_THREADID;
				461	vg_threads[tid_main].retval = NULL; /* not important */
				462
				463	/* Copy VG_(baseBlock) state to tid_main's slot. */
				464	VG_(save_thread_state) ( tid_main );
				465	}
				466
				467
				468	/* What if fd isn't a valid fd? */
				469	static
				470	void set_fd_nonblocking ( Int fd )
				471	{
				472	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				473	vg_assert(!VG_(is_kerror)(res));
				474	res \|= VKI_O_NONBLOCK;
				475	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				476	vg_assert(!VG_(is_kerror)(res));
				477	}
				478
				479	static
				480	void set_fd_blocking ( Int fd )
				481	{
				482	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				483	vg_assert(!VG_(is_kerror)(res));
				484	res &= ~VKI_O_NONBLOCK;
				485	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				486	vg_assert(!VG_(is_kerror)(res));
				487	}
				488
				489	static
				490	Bool fd_is_blockful ( Int fd )
				491	{
				492	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				493	vg_assert(!VG_(is_kerror)(res));
				494	return (res & VKI_O_NONBLOCK) ? False : True;
				495	}
				496
				497
				498
				499	/* Do a purely thread-local request for tid, and put the result in its
				500	%EDX, without changing its scheduling state in any way, nor that of
				501	any other threads. Return True if so.
				502
				503	If the request is non-trivial, return False; a more capable but
				504	slower mechanism will deal with it.
				505	*/
				506	static
				507	Bool maybe_do_trivial_clientreq ( ThreadId tid )
				508	{
				509	# define SIMPLE_RETURN(vvv) \
				510	{ vg_threads[tid].m_edx = (vvv); \
				511	return True; \
				512	}
				513
				514	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				515	UInt req_no = arg[0];
				516	switch (req_no) {
				517	case VG_USERREQ__MALLOC:
				518	SIMPLE_RETURN(
				519	(UInt)VG_(client_malloc) ( arg[1], Vg_AllocMalloc )
				520	);
				521	case VG_USERREQ__BUILTIN_NEW:
				522	SIMPLE_RETURN(
				523	(UInt)VG_(client_malloc) ( arg[1], Vg_AllocNew )
				524	);
				525	case VG_USERREQ__BUILTIN_VEC_NEW:
				526	SIMPLE_RETURN(
				527	(UInt)VG_(client_malloc) ( arg[1], Vg_AllocNewVec )
				528	);
				529	case VG_USERREQ__FREE:
				530	VG_(client_free) ( (void*)arg[1], Vg_AllocMalloc );
				531	SIMPLE_RETURN(0); /* irrelevant */
				532	case VG_USERREQ__BUILTIN_DELETE:
				533	VG_(client_free) ( (void*)arg[1], Vg_AllocNew );
				534	SIMPLE_RETURN(0); /* irrelevant */
				535	case VG_USERREQ__BUILTIN_VEC_DELETE:
				536	VG_(client_free) ( (void*)arg[1], Vg_AllocNewVec );
				537	SIMPLE_RETURN(0); /* irrelevant */
				538	case VG_USERREQ__CALLOC:
				539	SIMPLE_RETURN(
				540	(UInt)VG_(client_calloc) ( arg[1], arg[2] )
				541	);
				542	case VG_USERREQ__REALLOC:
				543	SIMPLE_RETURN(
				544	(UInt)VG_(client_realloc) ( (void*)arg[1], arg[2] )
				545	);
				546	case VG_USERREQ__MEMALIGN:
				547	SIMPLE_RETURN(
				548	(UInt)VG_(client_memalign) ( arg[1], arg[2] )
				549	);
				550	default:
				551	/* Too hard; wimp out. */
				552	return False;
				553	}
				554	# undef SIMPLE_RETURN
				555	}
				556
				557
				558	static
				559	void sched_do_syscall ( ThreadId tid )
				560	{
				561	UInt saved_eax;
				562	UInt res, syscall_no;
				563	UInt fd;
				564	Bool might_block, assumed_nonblocking;
				565	Bool orig_fd_blockness;
				566	Char msg_buf[100];
				567
				568	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				569	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				570
				571	syscall_no = vg_threads[tid].m_eax; /* syscall number */
				572
				573	if (syscall_no == __NR_nanosleep) {
				574	ULong t_now, t_awaken;
				575	struct vki_timespec* req;
				576	req = (struct vki_timespec)vg_threads[tid].m_ebx; / arg1 */
				577	t_now = VG_(read_microsecond_timer)();
				578	t_awaken
				579	= t_now
				580	+ (ULong)1000000ULL * (ULong)(req->tv_sec)
				581	+ (ULong)( (UInt)(req->tv_nsec) / 1000 );
				582	vg_threads[tid].status = VgTs_Sleeping;
				583	vg_threads[tid].awaken_at = t_awaken;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	584	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	585	VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
				586	t_now, t_awaken-t_now);
				587	print_sched_event(tid, msg_buf);
				588	}
				589	/* Force the scheduler to run something else for a while. */
				590	return;
				591	}
				592
				593	switch (syscall_no) {
				594	case __NR_read:
				595	case __NR_write:
				596	assumed_nonblocking
				597	= False;
				598	might_block
				599	= fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
				600	break;
				601	default:
				602	might_block = False;
				603	assumed_nonblocking = True;
				604	}
				605
				606	if (assumed_nonblocking) {
				607	/* We think it's non-blocking. Just do it in the normal way. */
				608	VG_(perform_assumed_nonblocking_syscall)(tid);
				609	/* The thread is still runnable. */
				610	return;
				611	}
				612
				613	/* It might block. Take evasive action. */
				614	switch (syscall_no) {
				615	case __NR_read:
				616	case __NR_write:
				617	fd = vg_threads[tid].m_ebx; break;
				618	default:
				619	vg_assert(3+3 == 7);
				620	}
				621
				622	/* Set the fd to nonblocking, and do the syscall, which will return
				623	immediately, in order to lodge a request with the Linux kernel.
				624	We later poll for I/O completion using select(). */
				625
				626	orig_fd_blockness = fd_is_blockful(fd);
				627	set_fd_nonblocking(fd);
				628	vg_assert(!fd_is_blockful(fd));
				629	VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
				630
				631	/* This trashes the thread's %eax; we have to preserve it. */
				632	saved_eax = vg_threads[tid].m_eax;
				633	KERNEL_DO_SYSCALL(tid,res);
				634
				635	/* Restore original blockfulness of the fd. */
				636	if (orig_fd_blockness)
				637	set_fd_blocking(fd);
				638	else
				639	set_fd_nonblocking(fd);
				640
				641	if (res != -VKI_EWOULDBLOCK) {
				642	/* It didn't block; it went through immediately. So finish off
				643	in the normal way. Don't restore %EAX, since that now
				644	(correctly) holds the result of the call. */
				645	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				646	/* We're still runnable. */
				647	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				648
				649	} else {
				650
				651	/* It would have blocked. First, restore %EAX to what it was
				652	before our speculative call. */
				653	vg_threads[tid].m_eax = saved_eax;
				654	/* Put this fd in a table of fds on which we are waiting for
				655	completion. The arguments for select() later are constructed
				656	from this table. */
				657	add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
				658	/* Deschedule thread until an I/O completion happens. */
				659	vg_threads[tid].status = VgTs_WaitFD;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	660	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	661	VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
				662	print_sched_event(tid, msg_buf);
				663	}
				664
				665	}
				666	}
				667
				668
				669	/* Find out which of the fds in vg_waiting_fds are now ready to go, by
				670	making enquiries with select(), and mark them as ready. We have to
				671	wait for the requesting threads to fall into the the WaitFD state
				672	before we can actually finally deliver the results, so this
				673	procedure doesn't do that; complete_blocked_syscalls() does it.
				674
				675	It might seem odd that a thread which has done a blocking syscall
				676	is not in WaitFD state; the way this can happen is if it initially
				677	becomes WaitFD, but then a signal is delivered to it, so it becomes
				678	Runnable for a while. In this case we have to wait for the
				679	sighandler to return, whereupon the WaitFD state is resumed, and
				680	only at that point can the I/O result be delivered to it. However,
				681	this point may be long after the fd is actually ready.
				682
				683	So, poll_for_ready_fds() merely detects fds which are ready.
				684	complete_blocked_syscalls() does the second half of the trick,
				685	possibly much later: it delivers the results from ready fds to
				686	threads in WaitFD state.
				687	*/
				688	void poll_for_ready_fds ( void )
				689	{
				690	vki_ksigset_t saved_procmask;
				691	vki_fd_set readfds;
				692	vki_fd_set writefds;
				693	vki_fd_set exceptfds;
				694	struct vki_timeval timeout;
				695	Int fd, fd_max, i, n_ready, syscall_no, n_ok;
				696	ThreadId tid;
				697	Bool rd_ok, wr_ok, ex_ok;
				698	Char msg_buf[100];
				699
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	700	struct vki_timespec* rem;
				701	ULong t_now;
				702
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	703	/* Awaken any sleeping threads whose sleep has expired. */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	704	t_now = VG_(read_microsecond_timer)();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	705	for (tid = 0; tid < VG_N_THREADS; tid++) {
				706	if (vg_threads[tid].status != VgTs_Sleeping)
				707	continue;
				708	if (t_now >= vg_threads[tid].awaken_at) {
				709	/* Resume this thread. Set to zero the remaining-time (second)
				710	arg of nanosleep, since it's used up all its time. */
				711	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				712	rem = (struct vki_timespec )vg_threads[tid].m_ecx; / arg2 */
				713	if (rem != NULL) {
				714	rem->tv_sec = 0;
				715	rem->tv_nsec = 0;
				716	}
				717	/* Make the syscall return 0 (success). */
				718	vg_threads[tid].m_eax = 0;
				719	/* Reschedule this thread. */
				720	vg_threads[tid].status = VgTs_Runnable;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	721	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	722	VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
				723	t_now);
				724	print_sched_event(tid, msg_buf);
				725	}
				726	}
				727	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	728
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	729	/* And look for threads waiting on file descriptors which are now
				730	ready for I/O.*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	731	timeout.tv_sec = 0;
				732	timeout.tv_usec = 0;
				733
				734	VKI_FD_ZERO(&readfds);
				735	VKI_FD_ZERO(&writefds);
				736	VKI_FD_ZERO(&exceptfds);
				737	fd_max = -1;
				738	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				739	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				740	continue;
				741	if (vg_waiting_fds[i].ready /* already ready? */)
				742	continue;
				743	fd = vg_waiting_fds[i].fd;
				744	/* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	745	vg_assert(fd >= 0);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	746	if (fd > fd_max)
				747	fd_max = fd;
				748	tid = vg_waiting_fds[i].tid;
				749	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				750	syscall_no = vg_waiting_fds[i].syscall_no;
				751	switch (syscall_no) {
				752	case __NR_read:
				753	VKI_FD_SET(fd, &readfds); break;
				754	case __NR_write:
				755	VKI_FD_SET(fd, &writefds); break;
				756	default:
				757	VG_(panic)("poll_for_ready_fds: unexpected syscall");
				758	/NOTREACHED/
				759	break;
				760	}
				761	}
				762
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	763	/* Short cut: if no fds are waiting, give up now. */
				764	if (fd_max == -1)
				765	return;
				766
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	767	/* BLOCK ALL SIGNALS. We don't want the complication of select()
				768	getting interrupted. */
				769	VG_(block_all_host_signals)( &saved_procmask );
				770
				771	n_ready = VG_(select)
				772	( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
				773	if (VG_(is_kerror)(n_ready)) {
				774	VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
				775	VG_(panic)("poll_for_ready_fds: select failed?!");
				776	/NOTREACHED/
				777	}
				778
				779	/* UNBLOCK ALL SIGNALS */
				780	VG_(restore_host_signals)( &saved_procmask );
				781
				782	/* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
				783
				784	if (n_ready == 0)
				785	return;
				786
				787	/* Inspect all the fds we know about, and handle any completions that
				788	have happened. */
				789	/*
				790	VG_(printf)("\n\n");
				791	for (fd = 0; fd < 100; fd++)
				792	if (VKI_FD_ISSET(fd, &writefds) \|\| VKI_FD_ISSET(fd, &readfds)) {
				793	VG_(printf)("X"); } else { VG_(printf)("."); };
				794	VG_(printf)("\n\nfd_max = %d\n", fd_max);
				795	*/
				796
				797	for (fd = 0; fd <= fd_max; fd++) {
				798	rd_ok = VKI_FD_ISSET(fd, &readfds);
				799	wr_ok = VKI_FD_ISSET(fd, &writefds);
				800	ex_ok = VKI_FD_ISSET(fd, &exceptfds);
				801
				802	n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
				803	if (n_ok == 0)
				804	continue;
				805	if (n_ok > 1) {
				806	VG_(printf)("offending fd = %d\n", fd);
				807	VG_(panic)("poll_for_ready_fds: multiple events on fd");
				808	}
				809
				810	/* An I/O event completed for fd. Find the thread which
				811	requested this. */
				812	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				813	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				814	continue;
				815	if (vg_waiting_fds[i].fd == fd)
				816	break;
				817	}
				818
				819	/* And a bit more paranoia ... */
				820	vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
				821
				822	/* Mark the fd as ready. */
				823	vg_assert(! vg_waiting_fds[i].ready);
				824	vg_waiting_fds[i].ready = True;
				825	}
				826	}
				827
				828
				829	/* See comment attached to poll_for_ready_fds() for explaination. */
				830	void complete_blocked_syscalls ( void )
				831	{
				832	Int fd, i, res, syscall_no;
				833	ThreadId tid;
				834	Char msg_buf[100];
				835
				836	/* Inspect all the outstanding fds we know about. */
				837
				838	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				839	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				840	continue;
				841	if (! vg_waiting_fds[i].ready)
				842	continue;
				843
				844	fd = vg_waiting_fds[i].fd;
				845	tid = vg_waiting_fds[i].tid;
				846	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				847
				848	/* The thread actually has to be waiting for the I/O event it
				849	requested before we can deliver the result! */
				850	if (vg_threads[tid].status != VgTs_WaitFD)
				851	continue;
				852
				853	/* Ok, actually do it! We can safely use %EAX as the syscall
				854	number, because the speculative call made by
				855	sched_do_syscall() doesn't change %EAX in the case where the
				856	call would have blocked. */
				857
				858	syscall_no = vg_waiting_fds[i].syscall_no;
				859	vg_assert(syscall_no == vg_threads[tid].m_eax);
				860	KERNEL_DO_SYSCALL(tid,res);
				861	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				862
				863	/* Reschedule. */
				864	vg_threads[tid].status = VgTs_Runnable;
				865	/* Mark slot as no longer in use. */
				866	vg_waiting_fds[i].fd = -1;
				867	/* pp_sched_status(); */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	868	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	869	VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
				870	print_sched_event(tid, msg_buf);
				871	}
				872	}
				873	}
				874
				875
				876	static
				877	void nanosleep_for_a_while ( void )
				878	{
				879	Int res;
				880	struct vki_timespec req;
				881	struct vki_timespec rem;
				882	req.tv_sec = 0;
				883	req.tv_nsec = 20 * 1000 * 1000;
				884	res = VG_(nanosleep)( &req, &rem );
				885	/* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
				886	vg_assert(res == 0);
				887	}
				888
				889
				890	/* ---------------------------------------------------------------------
				891	The scheduler proper.
				892	------------------------------------------------------------------ */
				893
				894	/* Run user-space threads until either
				895	* Deadlock occurs
				896	* One thread asks to shutdown Valgrind
				897	* The specified number of basic blocks has gone by.
				898	*/
				899	VgSchedReturnCode VG_(scheduler) ( void )
				900	{
				901	ThreadId tid, tid_next;
				902	UInt trc;
				903	UInt dispatch_ctr_SAVED;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	904	Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	905	Char msg_buf[100];
				906	Addr trans_addr;
				907
				908	/* For the LRU structures, records when the epoch began. */
				909	ULong lru_epoch_started_at = 0;
				910
				911	/* Start with the root thread. tid in general indicates the
				912	currently runnable/just-finished-running thread. */
				913	tid = 0;
				914
				915	/* This is the top level scheduler loop. It falls into three
				916	phases. */
				917	while (True) {
				918
				919	/* ======================= Phase 1 of 3 =======================
				920	Handle I/O completions and signals. This may change the
				921	status of various threads. Then select a new thread to run,
				922	or declare deadlock, or sleep if there are no runnable
				923	threads but some are blocked on I/O. */
				924
				925	/* Age the LRU structures if an epoch has been completed. */
				926	if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
				927	lru_epoch_started_at = VG_(bbs_done);
				928	increment_epoch();
				929	}
				930
				931	/* Was a debug-stop requested? */
				932	if (VG_(bbs_to_go) == 0)
				933	goto debug_stop;
				934
				935	/* Do the following loop until a runnable thread is found, or
				936	deadlock is detected. */
				937	while (True) {
				938
				939	/* For stats purposes only. */
				940	VG_(num_scheduling_events_MAJOR) ++;
				941
				942	/* See if any I/O operations which we were waiting for have
				943	completed, and, if so, make runnable the relevant waiting
				944	threads. */
				945	poll_for_ready_fds();
				946	complete_blocked_syscalls();
				947
				948	/* See if there are any signals which need to be delivered. If
				949	so, choose thread(s) to deliver them to, and build signal
				950	delivery frames on those thread(s) stacks. */
				951	VG_(deliver_signals)( 0 /HACK/ );
				952	VG_(do_sanity_checks)(0 /HACK/, False);
				953
				954	/* Try and find a thread (tid) to run. */
				955	tid_next = tid;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	956	n_in_fdwait_or_sleep = 0;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	957	while (True) {
				958	tid_next++;
				959	if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	960	if (vg_threads[tid_next].status == VgTs_WaitFD
				961	\|\| vg_threads[tid_next].status == VgTs_Sleeping)
				962	n_in_fdwait_or_sleep ++;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	963	if (vg_threads[tid_next].status == VgTs_Runnable)
				964	break; /* We can run this one. */
				965	if (tid_next == tid)
				966	break; /* been all the way round */
				967	}
				968	tid = tid_next;
				969
				970	if (vg_threads[tid].status == VgTs_Runnable) {
				971	/* Found a suitable candidate. Fall out of this loop, so
				972	we can advance to stage 2 of the scheduler: actually
				973	running the thread. */
				974	break;
				975	}
				976
				977	/* We didn't find a runnable thread. Now what? */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	978	if (n_in_fdwait_or_sleep == 0) {
				979	/* No runnable threads and no prospect of any appearing
				980	even if we wait for an arbitrary length of time. In
				981	short, we have a deadlock. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	982	pp_sched_status();
				983	return VgSrc_Deadlock;
				984	}
				985
				986	/* At least one thread is in a fd-wait state. Delay for a
				987	while, and go round again, in the hope that eventually a
				988	thread becomes runnable. */
				989	nanosleep_for_a_while();
				990	// pp_sched_status();
				991	// VG_(printf)(".\n");
				992	}
				993
				994
				995	/* ======================= Phase 2 of 3 =======================
				996	Wahey! We've finally decided that thread tid is runnable, so
				997	we now do that. Run it for as much of a quanta as possible.
				998	Trivial requests are handled and the thread continues. The
				999	aim is not to do too many of Phase 1 since it is expensive. */
				1000
				1001	if (0)
				1002	VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
				1003
				1004	/* Figure out how many bbs to ask vg_run_innerloop to do. Note
				1005	that it decrements the counter before testing it for zero, so
				1006	that if VG_(dispatch_ctr) is set to N you get at most N-1
				1007	iterations. Also this means that VG_(dispatch_ctr) must
				1008	exceed zero before entering the innerloop. Also also, the
				1009	decrement is done before the bb is actually run, so you
				1010	always get at least one decrement even if nothing happens.
				1011	*/
				1012	if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
				1013	VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
				1014	else
				1015	VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
				1016
				1017	/* ... and remember what we asked for. */
				1018	dispatch_ctr_SAVED = VG_(dispatch_ctr);
				1019
				1020	/* Actually run thread tid. */
				1021	while (True) {
				1022
				1023	/* For stats purposes only. */
				1024	VG_(num_scheduling_events_MINOR) ++;
				1025
				1026	if (0)
				1027	VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
				1028	tid, VG_(dispatch_ctr) - 1 );
				1029
				1030	trc = run_thread_for_a_while ( tid );
				1031
				1032	/* Deal quickly with trivial scheduling events, and resume the
				1033	thread. */
				1034
				1035	if (trc == VG_TRC_INNER_FASTMISS) {
				1036	vg_assert(VG_(dispatch_ctr) > 0);
				1037
				1038	/* Trivial event. Miss in the fast-cache. Do a full
				1039	lookup for it. */
				1040	trans_addr
				1041	= VG_(search_transtab) ( vg_threads[tid].m_eip );
				1042	if (trans_addr == (Addr)0) {
				1043	/* Not found; we need to request a translation. */
				1044	VG_(create_translation_for)( vg_threads[tid].m_eip );
				1045	trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
				1046	if (trans_addr == (Addr)0)
				1047	VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
				1048	}
				1049	continue; /* with this thread */
				1050	}
				1051
				1052	if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
				1053	Bool is_triv = maybe_do_trivial_clientreq(tid);
				1054	if (is_triv) {
				1055	/* NOTE: a trivial request is something like a call to
				1056	malloc() or free(). It DOES NOT change the
				1057	Runnability of this thread nor the status of any
				1058	other thread; it is purely thread-local. */
				1059	continue; /* with this thread */
				1060	}
				1061	}
				1062
				1063	/* It's a non-trivial event. Give up running this thread and
				1064	handle things the expensive way. */
				1065	break;
				1066	}
				1067
				1068	/* ======================= Phase 3 of 3 =======================
				1069	Handle non-trivial thread requests, mostly pthread stuff. */
				1070
				1071	/* Ok, we've fallen out of the dispatcher for a
				1072	non-completely-trivial reason. First, update basic-block
				1073	counters. */
				1074
				1075	done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
				1076	vg_assert(done_this_time >= 0);
				1077	VG_(bbs_to_go) -= (ULong)done_this_time;
				1078	VG_(bbs_done) += (ULong)done_this_time;
				1079
				1080	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1081	VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
				1082	tid, done_this_time, (Int)trc );
				1083
				1084	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1085	VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
				1086	tid, VG_(bbs_done),
				1087	name_of_sched_event(trc) );
				1088
				1089	/* Examine the thread's return code to figure out why it
				1090	stopped, and handle requests. */
				1091
				1092	switch (trc) {
				1093
				1094	case VG_TRC_INNER_FASTMISS:
				1095	VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
				1096	/NOTREACHED/
				1097	break;
				1098
				1099	case VG_TRC_INNER_COUNTERZERO:
				1100	/* Timeslice is out. Let a new thread be scheduled,
				1101	simply by doing nothing, causing us to arrive back at
				1102	Phase 1. */
				1103	if (VG_(bbs_to_go) == 0) {
				1104	goto debug_stop;
				1105	}
				1106	vg_assert(VG_(dispatch_ctr) == 0);
				1107	break;
				1108
				1109	case VG_TRC_UNRESUMABLE_SIGNAL:
				1110	/* It got a SIGSEGV/SIGBUS, which we need to deliver right
				1111	away. Again, do nothing, so we wind up back at Phase
				1112	1, whereupon the signal will be "delivered". */
				1113	break;
				1114
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1115	case VG_TRC_EBP_JMP_SYSCALL:
				1116	/* Do a syscall for the vthread tid. This could cause it
				1117	to become non-runnable. */
				1118	sched_do_syscall(tid);
				1119	break;
				1120
				1121	case VG_TRC_EBP_JMP_CLIENTREQ:
				1122	/* Do a client request for the vthread tid. Note that
				1123	some requests will have been handled by
				1124	maybe_do_trivial_clientreq(), so we don't expect to see
				1125	those here.
				1126	*/
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1127	/* The thread's %EAX points at an arg block, the first
				1128	word of which is the request code. */
				1129	request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1130	if (0) {
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1131	VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1132	print_sched_event(tid, msg_buf);
				1133	}
				1134	/* Do a non-trivial client request for thread tid. tid's
				1135	%EAX points to a short vector of argument words, the
				1136	first of which is the request code. The result of the
				1137	request is put in tid's %EDX. Alternatively, perhaps
				1138	the request causes tid to become non-runnable and/or
				1139	other blocked threads become runnable. In general we
				1140	can and often do mess with the state of arbitrary
				1141	threads at this point. */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1142	if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
				1143	return VgSrc_Shutdown;
				1144	} else {
				1145	do_nontrivial_clientreq(tid);
				1146	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1147	break;
				1148
				1149	default:
				1150	VG_(printf)("\ntrc = %d\n", trc);
				1151	VG_(panic)("VG_(scheduler), phase 3: "
				1152	"unexpected thread return code");
				1153	/* NOTREACHED */
				1154	break;
				1155
				1156	} /* switch (trc) */
				1157
				1158	/* That completes Phase 3 of 3. Return now to the top of the
				1159	main scheduler loop, to Phase 1 of 3. */
				1160
				1161	} /* top-level scheduler loop */
				1162
				1163
				1164	/* NOTREACHED */
				1165	VG_(panic)("scheduler: post-main-loop ?!");
				1166	/* NOTREACHED */
				1167
				1168	debug_stop:
				1169	/* If we exited because of a debug stop, print the translation
				1170	of the last block executed -- by translating it again, and
				1171	throwing away the result. */
				1172	VG_(printf)(
				1173	"======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
				1174	VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
				1175	VG_(printf)("\n");
				1176	VG_(printf)(
				1177	"======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
				1178
				1179	return VgSrc_BbsDone;
				1180	}
				1181
				1182
				1183	/* ---------------------------------------------------------------------
				1184	The pthread implementation.
				1185	------------------------------------------------------------------ */
				1186
				1187	#include <pthread.h>
				1188	#include <errno.h>
				1189
				1190	#if !defined(PTHREAD_STACK_MIN)
				1191	# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
				1192	#endif
				1193
				1194	/* /usr/include/bits/pthreadtypes.h:
				1195	typedef unsigned long int pthread_t;
				1196	*/
				1197
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1198
				1199	static
				1200	void do_pthread_cancel ( ThreadId tid_canceller,
				1201	pthread_t tid_cancellee )
				1202	{
				1203	Char msg_buf[100];
				1204	/* We want make is appear that this thread has returned to
				1205	do_pthread_create_bogusRA with PTHREAD_CANCELED as the
				1206	return value. So: simple: put PTHREAD_CANCELED into %EAX
				1207	and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1208	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1209	VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
				1210	print_sched_event(tid_cancellee, msg_buf);
				1211	}
				1212	vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1213	vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1214	vg_threads[tid_cancellee].status = VgTs_Runnable;
				1215	}
				1216
				1217
				1218
				1219	/* Thread tid is exiting, by returning from the function it was
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1220	created with. Or possibly due to pthread_exit or cancellation.
				1221	The main complication here is to resume any thread waiting to join
				1222	with this one. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1223	static
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1224	void handle_pthread_return ( ThreadId tid, void* retval )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1225	{
				1226	ThreadId jnr; /* joiner, the thread calling pthread_join. */
				1227	UInt* jnr_args;
				1228	void** jnr_thread_return;
				1229	Char msg_buf[100];
				1230
				1231	/* Mark it as not in use. Leave the stack in place so the next
				1232	user of this slot doesn't reallocate it. */
				1233	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1234	vg_assert(vg_threads[tid].status != VgTs_Empty);
				1235
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1236	vg_threads[tid].retval = retval;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1237
				1238	if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
				1239	/* No one has yet done a join on me */
				1240	vg_threads[tid].status = VgTs_WaitJoiner;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1241	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1242	VG_(sprintf)(msg_buf,
				1243	"root fn returns, waiting for a call pthread_join(%d)",
				1244	tid);
				1245	print_sched_event(tid, msg_buf);
				1246	}
				1247	} else {
				1248	/* Some is waiting; make their join call return with success,
				1249	putting my exit code in the place specified by the caller's
				1250	thread_return param. This is all very horrible, since we
				1251	need to consult the joiner's arg block -- pointed to by its
				1252	%EAX -- in order to extract the 2nd param of its pthread_join
				1253	call. TODO: free properly the slot (also below).
				1254	*/
				1255	jnr = vg_threads[tid].joiner;
				1256	vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
				1257	vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
				1258	jnr_args = (UInt*)vg_threads[jnr].m_eax;
				1259	jnr_thread_return = (void**)(jnr_args[2]);
				1260	if (jnr_thread_return != NULL)
				1261	*jnr_thread_return = vg_threads[tid].retval;
				1262	vg_threads[jnr].m_edx = 0; /* success */
				1263	vg_threads[jnr].status = VgTs_Runnable;
				1264	vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame^]	1265	if (VG_(clo_instrument) && tid != 0)
				1266	VGM_(make_noaccess)( vg_threads[tid].stack_base,
				1267	vg_threads[tid].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1268	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1269	VG_(sprintf)(msg_buf,
				1270	"root fn returns, to find a waiting pthread_join(%d)", tid);
				1271	print_sched_event(tid, msg_buf);
				1272	VG_(sprintf)(msg_buf,
				1273	"my pthread_join(%d) returned; resuming", tid);
				1274	print_sched_event(jnr, msg_buf);
				1275	}
				1276	}
				1277
				1278	/* Return value is irrelevant; this thread will not get
				1279	rescheduled. */
				1280	}
				1281
				1282
				1283	static
				1284	void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
				1285	{
				1286	Char msg_buf[100];
				1287
				1288	/* jee, the joinee, is the thread specified as an arg in thread
				1289	tid's call to pthread_join. So tid is the join-er. */
				1290	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1291	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1292
				1293	if (jee == tid) {
				1294	vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
				1295	vg_threads[tid].status = VgTs_Runnable;
				1296	return;
				1297	}
				1298
				1299	if (jee < 0
				1300	\|\| jee >= VG_N_THREADS
				1301	\|\| vg_threads[jee].status == VgTs_Empty) {
				1302	/* Invalid thread to join to. */
				1303	vg_threads[tid].m_edx = EINVAL;
				1304	vg_threads[tid].status = VgTs_Runnable;
				1305	return;
				1306	}
				1307
				1308	if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
				1309	/* Someone already did join on this thread */
				1310	vg_threads[tid].m_edx = EINVAL;
				1311	vg_threads[tid].status = VgTs_Runnable;
				1312	return;
				1313	}
				1314
				1315	/* if (vg_threads[jee].detached) ... */
				1316
				1317	/* Perhaps the joinee has already finished? If so return
				1318	immediately with its return code, and free up the slot. TODO:
				1319	free it properly (also above). */
				1320	if (vg_threads[jee].status == VgTs_WaitJoiner) {
				1321	vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
				1322	vg_threads[tid].m_edx = 0; /* success */
				1323	if (thread_return != NULL)
				1324	*thread_return = vg_threads[jee].retval;
				1325	vg_threads[tid].status = VgTs_Runnable;
				1326	vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame^]	1327	if (VG_(clo_instrument) && jee != 0)
				1328	VGM_(make_noaccess)( vg_threads[jee].stack_base,
				1329	vg_threads[jee].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1330	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1331	VG_(sprintf)(msg_buf,
				1332	"someone called pthread_join() on me; bye!");
				1333	print_sched_event(jee, msg_buf);
				1334	VG_(sprintf)(msg_buf,
				1335	"my pthread_join(%d) returned immediately",
				1336	jee );
				1337	print_sched_event(tid, msg_buf);
				1338	}
				1339	return;
				1340	}
				1341
				1342	/* Ok, so we'll have to wait on jee. */
				1343	vg_threads[jee].joiner = tid;
				1344	vg_threads[tid].status = VgTs_WaitJoinee;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1345	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1346	VG_(sprintf)(msg_buf,
				1347	"blocking on call of pthread_join(%d)", jee );
				1348	print_sched_event(tid, msg_buf);
				1349	}
				1350	/* So tid's join call does not return just now. */
				1351	}
				1352
				1353
				1354	static
				1355	void do_pthread_create ( ThreadId parent_tid,
				1356	pthread_t* thread,
				1357	pthread_attr_t* attr,
				1358	void* (start_routine)(void ),
				1359	void* arg )
				1360	{
				1361	Addr new_stack;
				1362	UInt new_stk_szb;
				1363	ThreadId tid;
				1364	Char msg_buf[100];
				1365
				1366	/* Paranoia ... */
				1367	vg_assert(sizeof(pthread_t) == sizeof(UInt));
				1368
				1369	vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
				1370
				1371	tid = vg_alloc_ThreadState();
				1372
				1373	/* If we've created the main thread's tid, we're in deep trouble :) */
				1374	vg_assert(tid != 0);
				1375
				1376	/* Copy the parent's CPU state into the child's, in a roundabout
				1377	way (via baseBlock). */
				1378	VG_(load_thread_state)(parent_tid);
				1379	VG_(save_thread_state)(tid);
				1380
				1381	/* Consider allocating the child a stack, if the one it already has
				1382	is inadequate. */
				1383	new_stk_szb = PTHREAD_STACK_MIN;
				1384
				1385	if (new_stk_szb > vg_threads[tid].stack_size) {
				1386	/* Again, for good measure :) We definitely don't want to be
				1387	allocating a stack for the main thread. */
				1388	vg_assert(tid != 0);
				1389	/* for now, we don't handle the case of anything other than
				1390	assigning it for the first time. */
				1391	vg_assert(vg_threads[tid].stack_size == 0);
				1392	vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
				1393	new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
				1394	vg_threads[tid].stack_base = new_stack;
				1395	vg_threads[tid].stack_size = new_stk_szb;
				1396	vg_threads[tid].m_esp
				1397	= new_stack + new_stk_szb
				1398	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
				1399	}
				1400	if (VG_(clo_instrument))
				1401	VGM_(make_noaccess)( vg_threads[tid].m_esp,
				1402	VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
				1403
				1404	/* push arg */
				1405	vg_threads[tid].m_esp -= 4;
				1406	* (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
				1407
				1408	/* push (magical) return address */
				1409	vg_threads[tid].m_esp -= 4;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1410	* (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1411
				1412	if (VG_(clo_instrument))
				1413	VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
				1414
				1415	/* this is where we start */
				1416	vg_threads[tid].m_eip = (UInt)start_routine;
				1417
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1418	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1419	VG_(sprintf)(msg_buf,
				1420	"new thread, created by %d", parent_tid );
				1421	print_sched_event(tid, msg_buf);
				1422	}
				1423
				1424	/* store the thread id in thread. /
				1425	// if (VG_(clo_instrument))
				1426	// ***** CHECK *thread is writable
				1427	*thread = (pthread_t)tid;
				1428
				1429	/* return zero */
				1430	vg_threads[tid].joiner = VG_INVALID_THREADID;
				1431	vg_threads[tid].status = VgTs_Runnable;
				1432	vg_threads[tid].m_edx = 0; /* success */
				1433	}
				1434
				1435
				1436	/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
				1437	is a struct with at least 5 words:
				1438	typedef struct
				1439	{
				1440	int __m_reserved; -- Reserved for future use
				1441	int __m_count; -- Depth of recursive locking
				1442	_pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
				1443	int __m_kind; -- Mutex kind: fast, recursive or errcheck
				1444	struct _pthread_fastlock __m_lock; -- Underlying fast lock
				1445	} pthread_mutex_t;
				1446	Ours is just a single word, an index into vg_mutexes[].
				1447	For now I'll park it in the __m_reserved field.
				1448
				1449	Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
				1450	a zero __m_count field (see /usr/include/pthread.h). So I'll
				1451	use zero to mean non-inited, and 1 to mean inited.
				1452
				1453	How convenient.
				1454	*/
				1455
				1456	static
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1457	void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1458	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1459	MutexId mid;
				1460	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1461	/* vg_alloc_MutexId aborts if we can't allocate a mutex, for
				1462	whatever reason. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1463	mid = vg_alloc_VgMutex();
				1464	vg_mutexes[mid].in_use = True;
				1465	vg_mutexes[mid].held = False;
				1466	vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
				1467	mutex->__m_reserved = mid;
				1468	mutex->__m_count = 1; /* initialised */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1469	if (VG_(clo_trace_pthread)) {
				1470	VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
				1471	mutex, mid );
				1472	print_pthread_event(tid, msg_buf);
				1473	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1474	}
				1475
				1476	/* Allocate a new MutexId and write it into *mutex. Ideally take
				1477	notice of the attributes in mutexattr. /
				1478	static
				1479	void do_pthread_mutex_init ( ThreadId tid,
				1480	pthread_mutex_t *mutex,
				1481	const pthread_mutexattr_t *mutexattr)
				1482	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1483	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1484	/* Paranoia ... */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1485	vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
				1486
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1487	initialise_mutex(tid, mutex);
				1488
				1489	if (VG_(clo_trace_pthread)) {
				1490	VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
				1491	mutex, mutex->__m_reserved );
				1492	print_pthread_event(tid, msg_buf);
				1493	}
				1494
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1495	/*
				1496	RETURN VALUE
				1497	pthread_mutex_init always returns 0. The other mutex functions
				1498	return 0 on success and a non-zero error code on error.
				1499	*/
				1500	/* THIS THREAD returns with 0. */
				1501	vg_threads[tid].m_edx = 0;
				1502	}
				1503
				1504
				1505	static
				1506	void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
				1507	{
				1508	MutexId mid;
				1509	Char msg_buf[100];
				1510
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1511	/* *mutex contains the MutexId, or one of the magic values
				1512	PTHREAD_MUTEX_INITIALIZER, indicating we need to initialise it
				1513	now. See comment(s) above re use of __m_count to indicated
				1514	initialisation status.
				1515	*/
				1516
				1517	/* POSIX doesn't mandate this, but for sanity ... */
				1518	if (mutex == NULL) {
				1519	vg_threads[tid].m_edx = EINVAL;
				1520	return;
				1521	}
				1522
				1523	if (mutex->__m_count == 0) {
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1524	initialise_mutex(tid, mutex);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1525	}
				1526
				1527	mid = mutex->__m_reserved;
				1528	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1529	vg_threads[tid].m_edx = EINVAL;
				1530	return;
				1531	}
				1532
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1533	if (VG_(clo_trace_pthread)) {
				1534	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
				1535	mid, mutex );
				1536	print_pthread_event(tid, msg_buf);
				1537	}
				1538
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1539	/* Assert initialised. */
				1540	vg_assert(mutex->__m_count == 1);
				1541
				1542	/* Assume tid valid. */
				1543	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1544
				1545	if (vg_mutexes[mid].held) {
				1546	if (vg_mutexes[mid].owner == tid) {
				1547	vg_threads[tid].m_edx = EDEADLK;
				1548	return;
				1549	}
				1550	/* Someone else has it; we have to wait. */
				1551	vg_threads[tid].status = VgTs_WaitMX;
				1552	vg_threads[tid].waited_on_mid = mid;
				1553	/* No assignment to %EDX, since we're blocking. */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1554	if (VG_(clo_trace_pthread)) {
				1555	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
				1556	mid, mutex );
				1557	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1558	}
				1559	} else {
				1560	/* We get it! */
				1561	vg_mutexes[mid].held = True;
				1562	vg_mutexes[mid].owner = tid;
				1563	/* return 0 (success). */
				1564	vg_threads[tid].m_edx = 0;
				1565	}
				1566	}
				1567
				1568
				1569	static
				1570	void do_pthread_mutex_unlock ( ThreadId tid,
				1571	pthread_mutex_t *mutex )
				1572	{
				1573	MutexId mid;
				1574	Int i;
				1575	Char msg_buf[100];
				1576
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1577	if (mutex == NULL
				1578	\|\| mutex->__m_count != 1) {
				1579	vg_threads[tid].m_edx = EINVAL;
				1580	return;
				1581	}
				1582
				1583	mid = mutex->__m_reserved;
				1584	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1585	vg_threads[tid].m_edx = EINVAL;
				1586	return;
				1587	}
				1588
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1589	if (VG_(clo_trace_pthread)) {
				1590	VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
				1591	mid, mutex );
				1592	print_pthread_event(tid, msg_buf);
				1593	}
				1594
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1595	/* Assume tid valid */
				1596	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1597
				1598	/* Barf if we don't currently hold the mutex. */
				1599	if (!vg_mutexes[mid].held \|\| vg_mutexes[mid].owner != tid) {
				1600	vg_threads[tid].m_edx = EPERM;
				1601	return;
				1602	}
				1603
				1604	/* Find some arbitrary thread waiting on this mutex, and make it
				1605	runnable. If none are waiting, mark the mutex as not held. */
				1606	for (i = 0; i < VG_N_THREADS; i++) {
				1607	if (vg_threads[i].status == VgTs_Empty)
				1608	continue;
				1609	if (vg_threads[i].status == VgTs_WaitMX
				1610	&& vg_threads[i].waited_on_mid == mid)
				1611	break;
				1612	}
				1613
				1614	vg_assert(i <= VG_N_THREADS);
				1615	if (i == VG_N_THREADS) {
				1616	/* Nobody else is waiting on it. */
				1617	vg_mutexes[mid].held = False;
				1618	} else {
				1619	/* Notionally transfer the hold to thread i, whose
				1620	pthread_mutex_lock() call now returns with 0 (success). */
				1621	vg_mutexes[mid].owner = i;
				1622	vg_threads[i].status = VgTs_Runnable;
				1623	vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1624
				1625	if (VG_(clo_trace_pthread)) {
				1626	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
				1627	mid );
				1628	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1629	}
				1630	}
				1631
				1632	/* In either case, our (tid's) pth_unlock() returns with 0
				1633	(success). */
				1634	vg_threads[tid].m_edx = 0; /* Success. */
				1635	}
				1636
				1637
				1638	static void do_pthread_mutex_destroy ( ThreadId tid,
				1639	pthread_mutex_t *mutex )
				1640	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1641	MutexId mid;
				1642	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1643
				1644	if (mutex == NULL
				1645	\|\| mutex->__m_count != 1) {
				1646	vg_threads[tid].m_edx = EINVAL;
				1647	return;
				1648	}
				1649
				1650	mid = mutex->__m_reserved;
				1651	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1652	vg_threads[tid].m_edx = EINVAL;
				1653	return;
				1654	}
				1655
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1656	if (VG_(clo_trace_pthread)) {
				1657	VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
				1658	mid, mutex );
				1659	print_pthread_event(tid, msg_buf);
				1660	}
				1661
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1662	/* Assume tid valid */
				1663	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1664
				1665	/* Barf if the mutex is currently held. */
				1666	if (vg_mutexes[mid].held) {
				1667	vg_threads[tid].m_edx = EBUSY;
				1668	return;
				1669	}
				1670
				1671	mutex->__m_count = 0; /* uninitialised */
				1672	vg_mutexes[mid].in_use = False;
				1673	vg_threads[tid].m_edx = 0;
				1674	}
				1675
				1676
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1677	/* vthread tid is returning from a signal handler; modify its
				1678	stack/regs accordingly. */
				1679	static
				1680	void handle_signal_return ( ThreadId tid )
				1681	{
				1682	Char msg_buf[100];
				1683	Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
				1684
				1685	if (restart_blocked_syscalls)
				1686	/* Easy; we don't have to do anything. */
				1687	return;
				1688
				1689	if (vg_threads[tid].status == VgTs_WaitFD) {
				1690	vg_assert(vg_threads[tid].m_eax == __NR_read
				1691	\|\| vg_threads[tid].m_eax == __NR_write);
				1692	/* read() or write() interrupted. Force a return with EINTR. */
				1693	vg_threads[tid].m_eax = -VKI_EINTR;
				1694	vg_threads[tid].status = VgTs_Runnable;
				1695	if (VG_(clo_trace_sched)) {
				1696	VG_(sprintf)(msg_buf,
				1697	"read() / write() interrupted by signal; return EINTR" );
				1698	print_sched_event(tid, msg_buf);
				1699	}
				1700	return;
				1701	}
				1702
				1703	if (vg_threads[tid].status == VgTs_WaitFD) {
				1704	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				1705	/* We interrupted a nanosleep(). The right thing to do is to
				1706	write the unused time to nanosleep's second param and return
				1707	EINTR, but I'm too lazy for that. */
				1708	return;
				1709	}
				1710
				1711	/* All other cases? Just return. */
				1712	}
				1713
				1714
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1715	/* ---------------------------------------------------------------------
				1716	Handle non-trivial client requests.
				1717	------------------------------------------------------------------ */
				1718
				1719	static
				1720	void do_nontrivial_clientreq ( ThreadId tid )
				1721	{
				1722	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				1723	UInt req_no = arg[0];
				1724	switch (req_no) {
				1725
				1726	case VG_USERREQ__PTHREAD_CREATE:
				1727	do_pthread_create( tid,
				1728	(pthread_t*)arg[1],
				1729	(pthread_attr_t*)arg[2],
				1730	(void()(void*))arg[3],
				1731	(void*)arg[4] );
				1732	break;
				1733
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1734	case VG_USERREQ__PTHREAD_RETURNS:
				1735	handle_pthread_return( tid, (void*)arg[1] );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1736	break;
				1737
				1738	case VG_USERREQ__PTHREAD_JOIN:
				1739	do_pthread_join( tid, arg[1], (void**)(arg[2]) );
				1740	break;
				1741
				1742	/* Sigh ... this probably will cause huge numbers of major
				1743	(expensive) scheduling events, for no real reason.
				1744	Perhaps should be classified as a trivial-request. */
				1745	case VG_USERREQ__PTHREAD_GET_THREADID:
				1746	vg_threads[tid].m_edx = tid;
				1747	break;
				1748
				1749	case VG_USERREQ__PTHREAD_MUTEX_INIT:
				1750	do_pthread_mutex_init( tid,
				1751	(pthread_mutex_t *)(arg[1]),
				1752	(pthread_mutexattr_t *)(arg[2]) );
				1753	break;
				1754
				1755	case VG_USERREQ__PTHREAD_MUTEX_LOCK:
				1756	do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
				1757	break;
				1758
				1759	case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
				1760	do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
				1761	break;
				1762
				1763	case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
				1764	do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
				1765	break;
				1766
				1767	case VG_USERREQ__PTHREAD_CANCEL:
				1768	do_pthread_cancel( tid, (pthread_t)(arg[1]) );
				1769	break;
				1770
				1771	case VG_USERREQ__MAKE_NOACCESS:
				1772	case VG_USERREQ__MAKE_WRITABLE:
				1773	case VG_USERREQ__MAKE_READABLE:
				1774	case VG_USERREQ__DISCARD:
				1775	case VG_USERREQ__CHECK_WRITABLE:
				1776	case VG_USERREQ__CHECK_READABLE:
				1777	case VG_USERREQ__MAKE_NOACCESS_STACK:
				1778	case VG_USERREQ__RUNNING_ON_VALGRIND:
				1779	case VG_USERREQ__DO_LEAK_CHECK:
				1780	vg_threads[tid].m_edx = VG_(handle_client_request) ( arg );
				1781	break;
				1782
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1783	case VG_USERREQ__SIGNAL_RETURNS:
				1784	handle_signal_return(tid);
				1785	break;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1786
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1787	default:
				1788	VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
				1789	VG_(panic)("handle_private_client_pthread_request: "
				1790	"unknown request");
				1791	/NOTREACHED/
				1792	break;
				1793	}
				1794	}
				1795
				1796
				1797	/--------------------------------------------------------------------/
				1798	/--- end vg_scheduler.c ---/
				1799	/--------------------------------------------------------------------/