Blame - vg_scheduler.c - platform/external/valgrind

blob: 33cb15766e14bd6b86c278912ff15f95481b8db6 [file] [log] [blame]

sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- A user-space pthreads implementation. vg_scheduler.c ---/
				4	/--------------------------------------------------------------------/
				5
				6	/*
				7	This file is part of Valgrind, an x86 protected-mode emulator
				8	designed for debugging and profiling binaries on x86-Unixes.
				9
				10	Copyright (C) 2000-2002 Julian Seward
				11	jseward@acm.org
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	12
				13	This program is free software; you can redistribute it and/or
				14	modify it under the terms of the GNU General Public License as
				15	published by the Free Software Foundation; either version 2 of the
				16	License, or (at your option) any later version.
				17
				18	This program is distributed in the hope that it will be useful, but
				19	WITHOUT ANY WARRANTY; without even the implied warranty of
				20	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				21	General Public License for more details.
				22
				23	You should have received a copy of the GNU General Public License
				24	along with this program; if not, write to the Free Software
				25	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				26	02111-1307, USA.
				27
				28	The GNU General Public License is contained in the file LICENSE.
				29	*/
				30
				31	#include "vg_include.h"
				32	#include "vg_constants.h"
				33
				34	#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
				35	VG_USERREQ__DO_LEAK_CHECK */
				36
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	37	/* BORKAGE/ISSUES as of 14 Apr 02
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	38
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	39	Note! This pthreads implementation is so poor as to not be
				40	suitable for use by anyone at all!
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	41
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	42	- Currently, when a signal is run, just the ThreadStatus.status fields
				43	are saved in the signal frame, along with the CPU state. Question:
				44	should I also save and restore:
				45	ThreadStatus.joiner
				46	ThreadStatus.waited_on_mid
				47	ThreadStatus.awaken_at
				48	ThreadStatus.retval
				49	Currently unsure, and so am not doing so.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	50
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	51	- Signals interrupting read/write and nanosleep: SA_RESTART settings.
				52	Read/write correctly return with EINTR when SA_RESTART isn't
				53	specified and they are interrupted by a signal. nanosleep just
				54	pretends signals don't exist -- should be fixed.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	55
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	56	- Read/write syscall starts: don't crap out when the initial
				57	nonblocking read/write returns an error.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	58
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	59	- Get rid of restrictions re use of sigaltstack; they are no longer
				60	needed.
				61
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	62	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	63
				64
				65	/* ---------------------------------------------------------------------
				66	Types and globals for the scheduler.
				67	------------------------------------------------------------------ */
				68
				69	/* type ThreadId is defined in vg_include.h. */
				70
				71	/* struct ThreadState is defined in vg_include.h. */
				72
				73	/* Private globals. A statically allocated array of threads. */
				74	static ThreadState vg_threads[VG_N_THREADS];
				75
				76
				77	/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
				78	jmp_buf VG_(scheduler_jmpbuf);
				79	/* ... and if so, here's the signal which caused it to do so. */
				80	Int VG_(longjmpd_on_signal);
				81
				82
				83	/* Machinery to keep track of which threads are waiting on which
				84	fds. */
				85	typedef
				86	struct {
				87	/* The thread which made the request. */
				88	ThreadId tid;
				89
				90	/* The next two fields describe the request. */
				91	/* File descriptor waited for. -1 means this slot is not in use */
				92	Int fd;
				93	/* The syscall number the fd is used in. */
				94	Int syscall_no;
				95
				96	/* False => still waiting for select to tell us the fd is ready
				97	to go. True => the fd is ready, but the results have not yet
				98	been delivered back to the calling thread. Once the latter
				99	happens, this entire record is marked as no longer in use, by
				100	making the fd field be -1. */
				101	Bool ready;
				102	}
				103	VgWaitedOnFd;
				104
				105	static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
				106
				107
				108
				109	typedef
				110	struct {
				111	/* Is this slot in use, or free? */
				112	Bool in_use;
				113	/* If in_use, is this mutex held by some thread, or not? */
				114	Bool held;
				115	/* if held==True, owner indicates who by. */
				116	ThreadId owner;
				117	}
				118	VgMutex;
				119
				120	static VgMutex vg_mutexes[VG_N_MUTEXES];
				121
				122	/* Forwards */
				123	static void do_nontrivial_clientreq ( ThreadId tid );
				124
				125
				126	/* ---------------------------------------------------------------------
				127	Helper functions for the scheduler.
				128	------------------------------------------------------------------ */
				129
				130	static
				131	void pp_sched_status ( void )
				132	{
				133	Int i;
				134	VG_(printf)("\nsched status:\n");
				135	for (i = 0; i < VG_N_THREADS; i++) {
				136	if (vg_threads[i].status == VgTs_Empty) continue;
				137	VG_(printf)("tid %d: ", i);
				138	switch (vg_threads[i].status) {
				139	case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
				140	case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
				141	case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
				142	vg_threads[i].joiner); break;
				143	case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	144	case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	145	default: VG_(printf)("???"); break;
				146	}
				147	}
				148	VG_(printf)("\n");
				149	}
				150
				151	static
				152	void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
				153	{
				154	Int i;
				155
				156	vg_assert(fd != -1); /* avoid total chaos */
				157
				158	for (i = 0; i < VG_N_WAITING_FDS; i++)
				159	if (vg_waiting_fds[i].fd == -1)
				160	break;
				161
				162	if (i == VG_N_WAITING_FDS)
				163	VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
				164	/*
				165	VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
				166	tid, fd, i);
				167	*/
				168	vg_waiting_fds[i].fd = fd;
				169	vg_waiting_fds[i].tid = tid;
				170	vg_waiting_fds[i].ready = False;
				171	vg_waiting_fds[i].syscall_no = syscall_no;
				172	}
				173
				174
				175
				176	static
				177	void print_sched_event ( ThreadId tid, Char* what )
				178	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	179	VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
				180	}
				181
				182
				183	static
				184	void print_pthread_event ( ThreadId tid, Char* what )
				185	{
				186	VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	187	}
				188
				189
				190	static
				191	Char* name_of_sched_event ( UInt event )
				192	{
				193	switch (event) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	194	case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
				195	case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
				196	case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
				197	case VG_TRC_INNER_FASTMISS: return "FASTMISS";
				198	case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
				199	default: return "??UNKNOWN??";
				200	}
				201	}
				202
				203
				204	/* Create a translation of the client basic block beginning at
				205	orig_addr, and add it to the translation cache & translation table.
				206	This probably doesn't really belong here, but, hey ...
				207	*/
				208	void VG_(create_translation_for) ( Addr orig_addr )
				209	{
				210	Addr trans_addr;
				211	TTEntry tte;
				212	Int orig_size, trans_size;
				213	/* Ensure there is space to hold a translation. */
				214	VG_(maybe_do_lru_pass)();
				215	VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
				216	/* Copy data at trans_addr into the translation cache.
				217	Returned pointer is to the code, not to the 4-byte
				218	header. */
				219	/* Since the .orig_size and .trans_size fields are
				220	UShort, be paranoid. */
				221	vg_assert(orig_size > 0 && orig_size < 65536);
				222	vg_assert(trans_size > 0 && trans_size < 65536);
				223	tte.orig_size = orig_size;
				224	tte.orig_addr = orig_addr;
				225	tte.trans_size = trans_size;
				226	tte.trans_addr = VG_(copy_to_transcache)
				227	( trans_addr, trans_size );
				228	tte.mru_epoch = VG_(current_epoch);
				229	/* Free the intermediary -- was allocated by VG_(emit_code). */
				230	VG_(jitfree)( (void*)trans_addr );
				231	/* Add to trans tab and set back pointer. */
				232	VG_(add_to_trans_tab) ( &tte );
				233	/* Update stats. */
				234	VG_(this_epoch_in_count) ++;
				235	VG_(this_epoch_in_osize) += orig_size;
				236	VG_(this_epoch_in_tsize) += trans_size;
				237	VG_(overall_in_count) ++;
				238	VG_(overall_in_osize) += orig_size;
				239	VG_(overall_in_tsize) += trans_size;
				240	/* Record translated area for SMC detection. */
				241	VG_(smc_mark_original) ( orig_addr, orig_size );
				242	}
				243
				244
				245	/* Allocate a completely empty ThreadState record. */
				246	static
				247	ThreadId vg_alloc_ThreadState ( void )
				248	{
				249	Int i;
				250	for (i = 0; i < VG_N_THREADS; i++) {
				251	if (vg_threads[i].status == VgTs_Empty)
				252	return i;
				253	}
				254	VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
				255	VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
				256	VG_(panic)("VG_N_THREADS is too low");
				257	/NOTREACHED/
				258	}
				259
				260
				261	ThreadState* VG_(get_thread_state) ( ThreadId tid )
				262	{
				263	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				264	vg_assert(vg_threads[tid].status != VgTs_Empty);
				265	return & vg_threads[tid];
				266	}
				267
				268
				269	/* Find an unused VgMutex record. */
				270	static
				271	MutexId vg_alloc_VgMutex ( void )
				272	{
				273	Int i;
				274	for (i = 0; i < VG_N_MUTEXES; i++) {
				275	if (!vg_mutexes[i].in_use)
				276	return i;
				277	}
				278	VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
				279	VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
				280	VG_(panic)("VG_N_MUTEXES is too low");
				281	/NOTREACHED/
				282	}
				283
				284
				285	/* Copy the saved state of a thread into VG_(baseBlock), ready for it
				286	to be run. */
				287	__inline__
				288	void VG_(load_thread_state) ( ThreadId tid )
				289	{
				290	Int i;
				291	VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
				292	VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
				293	VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
				294	VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
				295	VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
				296	VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
				297	VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
				298	VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
				299	VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
				300	VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
				301
				302	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				303	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
				304
				305	VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
				306	VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
				307	VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
				308	VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
				309	VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
				310	VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
				311	VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
				312	VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
				313	VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
				314	}
				315
				316
				317	/* Copy the state of a thread from VG_(baseBlock), presumably after it
				318	has been descheduled. For sanity-check purposes, fill the vacated
				319	VG_(baseBlock) with garbage so as to make the system more likely to
				320	fail quickly if we erroneously continue to poke around inside
				321	VG_(baseBlock) without first doing a load_thread_state().
				322	*/
				323	__inline__
				324	void VG_(save_thread_state) ( ThreadId tid )
				325	{
				326	Int i;
				327	const UInt junk = 0xDEADBEEF;
				328
				329	vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
				330	vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
				331	vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
				332	vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
				333	vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
				334	vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
				335	vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
				336	vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				337	vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
				338	vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
				339
				340	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				341	vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
				342
				343	vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
				344	vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
				345	vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
				346	vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
				347	vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
				348	vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
				349	vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
				350	vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
				351	vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
				352
				353	/* Fill it up with junk. */
				354	VG_(baseBlock)[VGOFF_(m_eax)] = junk;
				355	VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
				356	VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
				357	VG_(baseBlock)[VGOFF_(m_edx)] = junk;
				358	VG_(baseBlock)[VGOFF_(m_esi)] = junk;
				359	VG_(baseBlock)[VGOFF_(m_edi)] = junk;
				360	VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
				361	VG_(baseBlock)[VGOFF_(m_esp)] = junk;
				362	VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
				363	VG_(baseBlock)[VGOFF_(m_eip)] = junk;
				364
				365	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				366	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
				367	}
				368
				369
				370	/* Run the thread tid for a while, and return a VG_TRC_* value to the
				371	scheduler indicating what happened. */
				372	static
				373	UInt run_thread_for_a_while ( ThreadId tid )
				374	{
				375	UInt trc = 0;
				376	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				377	vg_assert(vg_threads[tid].status != VgTs_Empty);
				378	vg_assert(VG_(bbs_to_go) > 0);
				379
				380	VG_(load_thread_state) ( tid );
				381	if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
				382	/* try this ... */
				383	trc = VG_(run_innerloop)();
				384	/* We get here if the client didn't take a fault. */
				385	} else {
				386	/* We get here if the client took a fault, which caused our
				387	signal handler to longjmp. */
				388	vg_assert(trc == 0);
				389	trc = VG_TRC_UNRESUMABLE_SIGNAL;
				390	}
				391	VG_(save_thread_state) ( tid );
				392	return trc;
				393	}
				394
				395
				396	/* Increment the LRU epoch counter. */
				397	static
				398	void increment_epoch ( void )
				399	{
				400	VG_(current_epoch)++;
				401	if (VG_(clo_verbosity) > 2) {
				402	UInt tt_used, tc_used;
				403	VG_(get_tt_tc_used) ( &tt_used, &tc_used );
				404	VG_(message)(Vg_UserMsg,
				405	"%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
				406	VG_(bbs_done),
				407	VG_(this_epoch_in_count),
				408	VG_(this_epoch_in_osize),
				409	VG_(this_epoch_in_tsize),
				410	VG_(this_epoch_out_count),
				411	VG_(this_epoch_out_osize),
				412	VG_(this_epoch_out_tsize),
				413	tt_used, tc_used
				414	);
				415	}
				416	VG_(this_epoch_in_count) = 0;
				417	VG_(this_epoch_in_osize) = 0;
				418	VG_(this_epoch_in_tsize) = 0;
				419	VG_(this_epoch_out_count) = 0;
				420	VG_(this_epoch_out_osize) = 0;
				421	VG_(this_epoch_out_tsize) = 0;
				422	}
				423
				424
				425	/* Initialise the scheduler. Create a single "main" thread ready to
				426	run, with special ThreadId of zero. This is called at startup; the
				427	caller takes care to park the client's state is parked in
				428	VG_(baseBlock).
				429	*/
				430	void VG_(scheduler_init) ( void )
				431	{
				432	Int i;
				433	Addr startup_esp;
				434	ThreadId tid_main;
				435
				436	startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				437	if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	438	VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
				439	(void)startup_esp, (void)VG_STARTUP_STACK_MASK);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	440	VG_(panic)("unexpected %esp at startup");
				441	}
				442
				443	for (i = 0; i < VG_N_THREADS; i++) {
				444	vg_threads[i].stack_size = 0;
				445	vg_threads[i].stack_base = (Addr)NULL;
				446	}
				447
				448	for (i = 0; i < VG_N_WAITING_FDS; i++)
				449	vg_waiting_fds[i].fd = -1; /* not in use */
				450
				451	for (i = 0; i < VG_N_MUTEXES; i++)
				452	vg_mutexes[i].in_use = False;
				453
				454	/* Assert this is thread zero, which has certain magic
				455	properties. */
				456	tid_main = vg_alloc_ThreadState();
				457	vg_assert(tid_main == 0);
				458
				459	vg_threads[tid_main].status = VgTs_Runnable;
				460	vg_threads[tid_main].joiner = VG_INVALID_THREADID;
				461	vg_threads[tid_main].retval = NULL; /* not important */
				462
				463	/* Copy VG_(baseBlock) state to tid_main's slot. */
				464	VG_(save_thread_state) ( tid_main );
				465	}
				466
				467
				468	/* What if fd isn't a valid fd? */
				469	static
				470	void set_fd_nonblocking ( Int fd )
				471	{
				472	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				473	vg_assert(!VG_(is_kerror)(res));
				474	res \|= VKI_O_NONBLOCK;
				475	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				476	vg_assert(!VG_(is_kerror)(res));
				477	}
				478
				479	static
				480	void set_fd_blocking ( Int fd )
				481	{
				482	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				483	vg_assert(!VG_(is_kerror)(res));
				484	res &= ~VKI_O_NONBLOCK;
				485	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				486	vg_assert(!VG_(is_kerror)(res));
				487	}
				488
				489	static
				490	Bool fd_is_blockful ( Int fd )
				491	{
				492	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				493	vg_assert(!VG_(is_kerror)(res));
				494	return (res & VKI_O_NONBLOCK) ? False : True;
				495	}
				496
				497
				498
				499	/* Do a purely thread-local request for tid, and put the result in its
				500	%EDX, without changing its scheduling state in any way, nor that of
				501	any other threads. Return True if so.
				502
				503	If the request is non-trivial, return False; a more capable but
				504	slower mechanism will deal with it.
				505	*/
				506	static
				507	Bool maybe_do_trivial_clientreq ( ThreadId tid )
				508	{
				509	# define SIMPLE_RETURN(vvv) \
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	510	{ tst->m_edx = (vvv); \
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	511	return True; \
				512	}
				513
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	514	ThreadState* tst = &vg_threads[tid];
				515	UInt* arg = (UInt*)(tst->m_eax);
				516	UInt req_no = arg[0];
				517
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	518	switch (req_no) {
				519	case VG_USERREQ__MALLOC:
				520	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	521	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	522	);
				523	case VG_USERREQ__BUILTIN_NEW:
				524	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	525	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	526	);
				527	case VG_USERREQ__BUILTIN_VEC_NEW:
				528	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	529	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	530	);
				531	case VG_USERREQ__FREE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	532	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	533	SIMPLE_RETURN(0); /* irrelevant */
				534	case VG_USERREQ__BUILTIN_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	535	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	536	SIMPLE_RETURN(0); /* irrelevant */
				537	case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	538	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	539	SIMPLE_RETURN(0); /* irrelevant */
				540	case VG_USERREQ__CALLOC:
				541	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	542	(UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	543	);
				544	case VG_USERREQ__REALLOC:
				545	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	546	(UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	547	);
				548	case VG_USERREQ__MEMALIGN:
				549	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	550	(UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	551	);
sewardj	9650c99	2002-04-16 03:44:31 +0000	[diff] [blame^]	552
				553	/* These are heavily used. */
				554	case VG_USERREQ__PTHREAD_GET_THREADID:
				555	SIMPLE_RETURN(tid);
				556	case VG_USERREQ__RUNNING_ON_VALGRIND:
				557	SIMPLE_RETURN(1);
				558
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	559	default:
				560	/* Too hard; wimp out. */
				561	return False;
				562	}
				563	# undef SIMPLE_RETURN
				564	}
				565
				566
				567	static
				568	void sched_do_syscall ( ThreadId tid )
				569	{
				570	UInt saved_eax;
				571	UInt res, syscall_no;
				572	UInt fd;
				573	Bool might_block, assumed_nonblocking;
				574	Bool orig_fd_blockness;
				575	Char msg_buf[100];
				576
				577	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				578	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				579
				580	syscall_no = vg_threads[tid].m_eax; /* syscall number */
				581
				582	if (syscall_no == __NR_nanosleep) {
				583	ULong t_now, t_awaken;
				584	struct vki_timespec* req;
				585	req = (struct vki_timespec)vg_threads[tid].m_ebx; / arg1 */
				586	t_now = VG_(read_microsecond_timer)();
				587	t_awaken
				588	= t_now
				589	+ (ULong)1000000ULL * (ULong)(req->tv_sec)
				590	+ (ULong)( (UInt)(req->tv_nsec) / 1000 );
				591	vg_threads[tid].status = VgTs_Sleeping;
				592	vg_threads[tid].awaken_at = t_awaken;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	593	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	594	VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
				595	t_now, t_awaken-t_now);
				596	print_sched_event(tid, msg_buf);
				597	}
				598	/* Force the scheduler to run something else for a while. */
				599	return;
				600	}
				601
				602	switch (syscall_no) {
				603	case __NR_read:
				604	case __NR_write:
				605	assumed_nonblocking
				606	= False;
				607	might_block
				608	= fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
				609	break;
				610	default:
				611	might_block = False;
				612	assumed_nonblocking = True;
				613	}
				614
				615	if (assumed_nonblocking) {
				616	/* We think it's non-blocking. Just do it in the normal way. */
				617	VG_(perform_assumed_nonblocking_syscall)(tid);
				618	/* The thread is still runnable. */
				619	return;
				620	}
				621
				622	/* It might block. Take evasive action. */
				623	switch (syscall_no) {
				624	case __NR_read:
				625	case __NR_write:
				626	fd = vg_threads[tid].m_ebx; break;
				627	default:
				628	vg_assert(3+3 == 7);
				629	}
				630
				631	/* Set the fd to nonblocking, and do the syscall, which will return
				632	immediately, in order to lodge a request with the Linux kernel.
				633	We later poll for I/O completion using select(). */
				634
				635	orig_fd_blockness = fd_is_blockful(fd);
				636	set_fd_nonblocking(fd);
				637	vg_assert(!fd_is_blockful(fd));
				638	VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
				639
				640	/* This trashes the thread's %eax; we have to preserve it. */
				641	saved_eax = vg_threads[tid].m_eax;
				642	KERNEL_DO_SYSCALL(tid,res);
				643
				644	/* Restore original blockfulness of the fd. */
				645	if (orig_fd_blockness)
				646	set_fd_blocking(fd);
				647	else
				648	set_fd_nonblocking(fd);
				649
				650	if (res != -VKI_EWOULDBLOCK) {
				651	/* It didn't block; it went through immediately. So finish off
				652	in the normal way. Don't restore %EAX, since that now
				653	(correctly) holds the result of the call. */
				654	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				655	/* We're still runnable. */
				656	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				657
				658	} else {
				659
				660	/* It would have blocked. First, restore %EAX to what it was
				661	before our speculative call. */
				662	vg_threads[tid].m_eax = saved_eax;
				663	/* Put this fd in a table of fds on which we are waiting for
				664	completion. The arguments for select() later are constructed
				665	from this table. */
				666	add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
				667	/* Deschedule thread until an I/O completion happens. */
				668	vg_threads[tid].status = VgTs_WaitFD;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	669	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	670	VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
				671	print_sched_event(tid, msg_buf);
				672	}
				673
				674	}
				675	}
				676
				677
				678	/* Find out which of the fds in vg_waiting_fds are now ready to go, by
				679	making enquiries with select(), and mark them as ready. We have to
				680	wait for the requesting threads to fall into the the WaitFD state
				681	before we can actually finally deliver the results, so this
				682	procedure doesn't do that; complete_blocked_syscalls() does it.
				683
				684	It might seem odd that a thread which has done a blocking syscall
				685	is not in WaitFD state; the way this can happen is if it initially
				686	becomes WaitFD, but then a signal is delivered to it, so it becomes
				687	Runnable for a while. In this case we have to wait for the
				688	sighandler to return, whereupon the WaitFD state is resumed, and
				689	only at that point can the I/O result be delivered to it. However,
				690	this point may be long after the fd is actually ready.
				691
				692	So, poll_for_ready_fds() merely detects fds which are ready.
				693	complete_blocked_syscalls() does the second half of the trick,
				694	possibly much later: it delivers the results from ready fds to
				695	threads in WaitFD state.
				696	*/
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	697	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	698	void poll_for_ready_fds ( void )
				699	{
				700	vki_ksigset_t saved_procmask;
				701	vki_fd_set readfds;
				702	vki_fd_set writefds;
				703	vki_fd_set exceptfds;
				704	struct vki_timeval timeout;
				705	Int fd, fd_max, i, n_ready, syscall_no, n_ok;
				706	ThreadId tid;
				707	Bool rd_ok, wr_ok, ex_ok;
				708	Char msg_buf[100];
				709
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	710	struct vki_timespec* rem;
				711	ULong t_now;
				712
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	713	/* Awaken any sleeping threads whose sleep has expired. */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	714	t_now = VG_(read_microsecond_timer)();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	715	for (tid = 0; tid < VG_N_THREADS; tid++) {
				716	if (vg_threads[tid].status != VgTs_Sleeping)
				717	continue;
				718	if (t_now >= vg_threads[tid].awaken_at) {
				719	/* Resume this thread. Set to zero the remaining-time (second)
				720	arg of nanosleep, since it's used up all its time. */
				721	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				722	rem = (struct vki_timespec )vg_threads[tid].m_ecx; / arg2 */
				723	if (rem != NULL) {
				724	rem->tv_sec = 0;
				725	rem->tv_nsec = 0;
				726	}
				727	/* Make the syscall return 0 (success). */
				728	vg_threads[tid].m_eax = 0;
				729	/* Reschedule this thread. */
				730	vg_threads[tid].status = VgTs_Runnable;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	731	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	732	VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
				733	t_now);
				734	print_sched_event(tid, msg_buf);
				735	}
				736	}
				737	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	738
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	739	/* And look for threads waiting on file descriptors which are now
				740	ready for I/O.*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	741	timeout.tv_sec = 0;
				742	timeout.tv_usec = 0;
				743
				744	VKI_FD_ZERO(&readfds);
				745	VKI_FD_ZERO(&writefds);
				746	VKI_FD_ZERO(&exceptfds);
				747	fd_max = -1;
				748	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				749	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				750	continue;
				751	if (vg_waiting_fds[i].ready /* already ready? */)
				752	continue;
				753	fd = vg_waiting_fds[i].fd;
				754	/* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	755	vg_assert(fd >= 0);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	756	if (fd > fd_max)
				757	fd_max = fd;
				758	tid = vg_waiting_fds[i].tid;
				759	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				760	syscall_no = vg_waiting_fds[i].syscall_no;
				761	switch (syscall_no) {
				762	case __NR_read:
				763	VKI_FD_SET(fd, &readfds); break;
				764	case __NR_write:
				765	VKI_FD_SET(fd, &writefds); break;
				766	default:
				767	VG_(panic)("poll_for_ready_fds: unexpected syscall");
				768	/NOTREACHED/
				769	break;
				770	}
				771	}
				772
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	773	/* Short cut: if no fds are waiting, give up now. */
				774	if (fd_max == -1)
				775	return;
				776
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	777	/* BLOCK ALL SIGNALS. We don't want the complication of select()
				778	getting interrupted. */
				779	VG_(block_all_host_signals)( &saved_procmask );
				780
				781	n_ready = VG_(select)
				782	( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
				783	if (VG_(is_kerror)(n_ready)) {
				784	VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
				785	VG_(panic)("poll_for_ready_fds: select failed?!");
				786	/NOTREACHED/
				787	}
				788
				789	/* UNBLOCK ALL SIGNALS */
				790	VG_(restore_host_signals)( &saved_procmask );
				791
				792	/* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
				793
				794	if (n_ready == 0)
				795	return;
				796
				797	/* Inspect all the fds we know about, and handle any completions that
				798	have happened. */
				799	/*
				800	VG_(printf)("\n\n");
				801	for (fd = 0; fd < 100; fd++)
				802	if (VKI_FD_ISSET(fd, &writefds) \|\| VKI_FD_ISSET(fd, &readfds)) {
				803	VG_(printf)("X"); } else { VG_(printf)("."); };
				804	VG_(printf)("\n\nfd_max = %d\n", fd_max);
				805	*/
				806
				807	for (fd = 0; fd <= fd_max; fd++) {
				808	rd_ok = VKI_FD_ISSET(fd, &readfds);
				809	wr_ok = VKI_FD_ISSET(fd, &writefds);
				810	ex_ok = VKI_FD_ISSET(fd, &exceptfds);
				811
				812	n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
				813	if (n_ok == 0)
				814	continue;
				815	if (n_ok > 1) {
				816	VG_(printf)("offending fd = %d\n", fd);
				817	VG_(panic)("poll_for_ready_fds: multiple events on fd");
				818	}
				819
				820	/* An I/O event completed for fd. Find the thread which
				821	requested this. */
				822	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				823	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				824	continue;
				825	if (vg_waiting_fds[i].fd == fd)
				826	break;
				827	}
				828
				829	/* And a bit more paranoia ... */
				830	vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
				831
				832	/* Mark the fd as ready. */
				833	vg_assert(! vg_waiting_fds[i].ready);
				834	vg_waiting_fds[i].ready = True;
				835	}
				836	}
				837
				838
				839	/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	840	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	841	void complete_blocked_syscalls ( void )
				842	{
				843	Int fd, i, res, syscall_no;
				844	ThreadId tid;
				845	Char msg_buf[100];
				846
				847	/* Inspect all the outstanding fds we know about. */
				848
				849	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				850	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				851	continue;
				852	if (! vg_waiting_fds[i].ready)
				853	continue;
				854
				855	fd = vg_waiting_fds[i].fd;
				856	tid = vg_waiting_fds[i].tid;
				857	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				858
				859	/* The thread actually has to be waiting for the I/O event it
				860	requested before we can deliver the result! */
				861	if (vg_threads[tid].status != VgTs_WaitFD)
				862	continue;
				863
				864	/* Ok, actually do it! We can safely use %EAX as the syscall
				865	number, because the speculative call made by
				866	sched_do_syscall() doesn't change %EAX in the case where the
				867	call would have blocked. */
				868
				869	syscall_no = vg_waiting_fds[i].syscall_no;
				870	vg_assert(syscall_no == vg_threads[tid].m_eax);
				871	KERNEL_DO_SYSCALL(tid,res);
				872	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				873
				874	/* Reschedule. */
				875	vg_threads[tid].status = VgTs_Runnable;
				876	/* Mark slot as no longer in use. */
				877	vg_waiting_fds[i].fd = -1;
				878	/* pp_sched_status(); */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	879	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	880	VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
				881	print_sched_event(tid, msg_buf);
				882	}
				883	}
				884	}
				885
				886
				887	static
				888	void nanosleep_for_a_while ( void )
				889	{
				890	Int res;
				891	struct vki_timespec req;
				892	struct vki_timespec rem;
				893	req.tv_sec = 0;
				894	req.tv_nsec = 20 * 1000 * 1000;
				895	res = VG_(nanosleep)( &req, &rem );
				896	/* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
				897	vg_assert(res == 0);
				898	}
				899
				900
				901	/* ---------------------------------------------------------------------
				902	The scheduler proper.
				903	------------------------------------------------------------------ */
				904
				905	/* Run user-space threads until either
				906	* Deadlock occurs
				907	* One thread asks to shutdown Valgrind
				908	* The specified number of basic blocks has gone by.
				909	*/
				910	VgSchedReturnCode VG_(scheduler) ( void )
				911	{
				912	ThreadId tid, tid_next;
				913	UInt trc;
				914	UInt dispatch_ctr_SAVED;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	915	Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	916	Char msg_buf[100];
				917	Addr trans_addr;
				918
				919	/* For the LRU structures, records when the epoch began. */
				920	ULong lru_epoch_started_at = 0;
				921
				922	/* Start with the root thread. tid in general indicates the
				923	currently runnable/just-finished-running thread. */
				924	tid = 0;
				925
				926	/* This is the top level scheduler loop. It falls into three
				927	phases. */
				928	while (True) {
				929
				930	/* ======================= Phase 1 of 3 =======================
				931	Handle I/O completions and signals. This may change the
				932	status of various threads. Then select a new thread to run,
				933	or declare deadlock, or sleep if there are no runnable
				934	threads but some are blocked on I/O. */
				935
				936	/* Age the LRU structures if an epoch has been completed. */
				937	if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
				938	lru_epoch_started_at = VG_(bbs_done);
				939	increment_epoch();
				940	}
				941
				942	/* Was a debug-stop requested? */
				943	if (VG_(bbs_to_go) == 0)
				944	goto debug_stop;
				945
				946	/* Do the following loop until a runnable thread is found, or
				947	deadlock is detected. */
				948	while (True) {
				949
				950	/* For stats purposes only. */
				951	VG_(num_scheduling_events_MAJOR) ++;
				952
				953	/* See if any I/O operations which we were waiting for have
				954	completed, and, if so, make runnable the relevant waiting
				955	threads. */
				956	poll_for_ready_fds();
				957	complete_blocked_syscalls();
				958
				959	/* See if there are any signals which need to be delivered. If
				960	so, choose thread(s) to deliver them to, and build signal
				961	delivery frames on those thread(s) stacks. */
				962	VG_(deliver_signals)( 0 /HACK/ );
				963	VG_(do_sanity_checks)(0 /HACK/, False);
				964
				965	/* Try and find a thread (tid) to run. */
				966	tid_next = tid;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	967	n_in_fdwait_or_sleep = 0;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	968	while (True) {
				969	tid_next++;
				970	if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	971	if (vg_threads[tid_next].status == VgTs_WaitFD
				972	\|\| vg_threads[tid_next].status == VgTs_Sleeping)
				973	n_in_fdwait_or_sleep ++;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	974	if (vg_threads[tid_next].status == VgTs_Runnable)
				975	break; /* We can run this one. */
				976	if (tid_next == tid)
				977	break; /* been all the way round */
				978	}
				979	tid = tid_next;
				980
				981	if (vg_threads[tid].status == VgTs_Runnable) {
				982	/* Found a suitable candidate. Fall out of this loop, so
				983	we can advance to stage 2 of the scheduler: actually
				984	running the thread. */
				985	break;
				986	}
				987
				988	/* We didn't find a runnable thread. Now what? */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	989	if (n_in_fdwait_or_sleep == 0) {
				990	/* No runnable threads and no prospect of any appearing
				991	even if we wait for an arbitrary length of time. In
				992	short, we have a deadlock. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	993	pp_sched_status();
				994	return VgSrc_Deadlock;
				995	}
				996
				997	/* At least one thread is in a fd-wait state. Delay for a
				998	while, and go round again, in the hope that eventually a
				999	thread becomes runnable. */
				1000	nanosleep_for_a_while();
				1001	// pp_sched_status();
				1002	// VG_(printf)(".\n");
				1003	}
				1004
				1005
				1006	/* ======================= Phase 2 of 3 =======================
				1007	Wahey! We've finally decided that thread tid is runnable, so
				1008	we now do that. Run it for as much of a quanta as possible.
				1009	Trivial requests are handled and the thread continues. The
				1010	aim is not to do too many of Phase 1 since it is expensive. */
				1011
				1012	if (0)
				1013	VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
				1014
				1015	/* Figure out how many bbs to ask vg_run_innerloop to do. Note
				1016	that it decrements the counter before testing it for zero, so
				1017	that if VG_(dispatch_ctr) is set to N you get at most N-1
				1018	iterations. Also this means that VG_(dispatch_ctr) must
				1019	exceed zero before entering the innerloop. Also also, the
				1020	decrement is done before the bb is actually run, so you
				1021	always get at least one decrement even if nothing happens.
				1022	*/
				1023	if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
				1024	VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
				1025	else
				1026	VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
				1027
				1028	/* ... and remember what we asked for. */
				1029	dispatch_ctr_SAVED = VG_(dispatch_ctr);
				1030
				1031	/* Actually run thread tid. */
				1032	while (True) {
				1033
				1034	/* For stats purposes only. */
				1035	VG_(num_scheduling_events_MINOR) ++;
				1036
				1037	if (0)
				1038	VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
				1039	tid, VG_(dispatch_ctr) - 1 );
				1040
				1041	trc = run_thread_for_a_while ( tid );
				1042
				1043	/* Deal quickly with trivial scheduling events, and resume the
				1044	thread. */
				1045
				1046	if (trc == VG_TRC_INNER_FASTMISS) {
				1047	vg_assert(VG_(dispatch_ctr) > 0);
				1048
				1049	/* Trivial event. Miss in the fast-cache. Do a full
				1050	lookup for it. */
				1051	trans_addr
				1052	= VG_(search_transtab) ( vg_threads[tid].m_eip );
				1053	if (trans_addr == (Addr)0) {
				1054	/* Not found; we need to request a translation. */
				1055	VG_(create_translation_for)( vg_threads[tid].m_eip );
				1056	trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
				1057	if (trans_addr == (Addr)0)
				1058	VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
				1059	}
				1060	continue; /* with this thread */
				1061	}
				1062
				1063	if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
				1064	Bool is_triv = maybe_do_trivial_clientreq(tid);
				1065	if (is_triv) {
				1066	/* NOTE: a trivial request is something like a call to
				1067	malloc() or free(). It DOES NOT change the
				1068	Runnability of this thread nor the status of any
				1069	other thread; it is purely thread-local. */
				1070	continue; /* with this thread */
				1071	}
				1072	}
				1073
				1074	/* It's a non-trivial event. Give up running this thread and
				1075	handle things the expensive way. */
				1076	break;
				1077	}
				1078
				1079	/* ======================= Phase 3 of 3 =======================
				1080	Handle non-trivial thread requests, mostly pthread stuff. */
				1081
				1082	/* Ok, we've fallen out of the dispatcher for a
				1083	non-completely-trivial reason. First, update basic-block
				1084	counters. */
				1085
				1086	done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
				1087	vg_assert(done_this_time >= 0);
				1088	VG_(bbs_to_go) -= (ULong)done_this_time;
				1089	VG_(bbs_done) += (ULong)done_this_time;
				1090
				1091	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1092	VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
				1093	tid, done_this_time, (Int)trc );
				1094
				1095	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1096	VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
				1097	tid, VG_(bbs_done),
				1098	name_of_sched_event(trc) );
				1099
				1100	/* Examine the thread's return code to figure out why it
				1101	stopped, and handle requests. */
				1102
				1103	switch (trc) {
				1104
				1105	case VG_TRC_INNER_FASTMISS:
				1106	VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
				1107	/NOTREACHED/
				1108	break;
				1109
				1110	case VG_TRC_INNER_COUNTERZERO:
				1111	/* Timeslice is out. Let a new thread be scheduled,
				1112	simply by doing nothing, causing us to arrive back at
				1113	Phase 1. */
				1114	if (VG_(bbs_to_go) == 0) {
				1115	goto debug_stop;
				1116	}
				1117	vg_assert(VG_(dispatch_ctr) == 0);
				1118	break;
				1119
				1120	case VG_TRC_UNRESUMABLE_SIGNAL:
				1121	/* It got a SIGSEGV/SIGBUS, which we need to deliver right
				1122	away. Again, do nothing, so we wind up back at Phase
				1123	1, whereupon the signal will be "delivered". */
				1124	break;
				1125
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1126	case VG_TRC_EBP_JMP_SYSCALL:
				1127	/* Do a syscall for the vthread tid. This could cause it
				1128	to become non-runnable. */
				1129	sched_do_syscall(tid);
				1130	break;
				1131
				1132	case VG_TRC_EBP_JMP_CLIENTREQ:
				1133	/* Do a client request for the vthread tid. Note that
				1134	some requests will have been handled by
				1135	maybe_do_trivial_clientreq(), so we don't expect to see
				1136	those here.
				1137	*/
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1138	/* The thread's %EAX points at an arg block, the first
				1139	word of which is the request code. */
				1140	request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1141	if (0) {
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1142	VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1143	print_sched_event(tid, msg_buf);
				1144	}
				1145	/* Do a non-trivial client request for thread tid. tid's
				1146	%EAX points to a short vector of argument words, the
				1147	first of which is the request code. The result of the
				1148	request is put in tid's %EDX. Alternatively, perhaps
				1149	the request causes tid to become non-runnable and/or
				1150	other blocked threads become runnable. In general we
				1151	can and often do mess with the state of arbitrary
				1152	threads at this point. */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1153	if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
				1154	return VgSrc_Shutdown;
				1155	} else {
				1156	do_nontrivial_clientreq(tid);
				1157	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1158	break;
				1159
				1160	default:
				1161	VG_(printf)("\ntrc = %d\n", trc);
				1162	VG_(panic)("VG_(scheduler), phase 3: "
				1163	"unexpected thread return code");
				1164	/* NOTREACHED */
				1165	break;
				1166
				1167	} /* switch (trc) */
				1168
				1169	/* That completes Phase 3 of 3. Return now to the top of the
				1170	main scheduler loop, to Phase 1 of 3. */
				1171
				1172	} /* top-level scheduler loop */
				1173
				1174
				1175	/* NOTREACHED */
				1176	VG_(panic)("scheduler: post-main-loop ?!");
				1177	/* NOTREACHED */
				1178
				1179	debug_stop:
				1180	/* If we exited because of a debug stop, print the translation
				1181	of the last block executed -- by translating it again, and
				1182	throwing away the result. */
				1183	VG_(printf)(
				1184	"======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
				1185	VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
				1186	VG_(printf)("\n");
				1187	VG_(printf)(
				1188	"======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
				1189
				1190	return VgSrc_BbsDone;
				1191	}
				1192
				1193
				1194	/* ---------------------------------------------------------------------
				1195	The pthread implementation.
				1196	------------------------------------------------------------------ */
				1197
				1198	#include <pthread.h>
				1199	#include <errno.h>
				1200
				1201	#if !defined(PTHREAD_STACK_MIN)
				1202	# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
				1203	#endif
				1204
				1205	/* /usr/include/bits/pthreadtypes.h:
				1206	typedef unsigned long int pthread_t;
				1207	*/
				1208
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1209
				1210	static
				1211	void do_pthread_cancel ( ThreadId tid_canceller,
				1212	pthread_t tid_cancellee )
				1213	{
				1214	Char msg_buf[100];
				1215	/* We want make is appear that this thread has returned to
				1216	do_pthread_create_bogusRA with PTHREAD_CANCELED as the
				1217	return value. So: simple: put PTHREAD_CANCELED into %EAX
				1218	and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1219	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1220	VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
				1221	print_sched_event(tid_cancellee, msg_buf);
				1222	}
				1223	vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1224	vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1225	vg_threads[tid_cancellee].status = VgTs_Runnable;
				1226	}
				1227
				1228
				1229
				1230	/* Thread tid is exiting, by returning from the function it was
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1231	created with. Or possibly due to pthread_exit or cancellation.
				1232	The main complication here is to resume any thread waiting to join
				1233	with this one. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1234	static
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1235	void handle_pthread_return ( ThreadId tid, void* retval )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1236	{
				1237	ThreadId jnr; /* joiner, the thread calling pthread_join. */
				1238	UInt* jnr_args;
				1239	void** jnr_thread_return;
				1240	Char msg_buf[100];
				1241
				1242	/* Mark it as not in use. Leave the stack in place so the next
				1243	user of this slot doesn't reallocate it. */
				1244	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1245	vg_assert(vg_threads[tid].status != VgTs_Empty);
				1246
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1247	vg_threads[tid].retval = retval;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1248
				1249	if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
				1250	/* No one has yet done a join on me */
				1251	vg_threads[tid].status = VgTs_WaitJoiner;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1252	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1253	VG_(sprintf)(msg_buf,
				1254	"root fn returns, waiting for a call pthread_join(%d)",
				1255	tid);
				1256	print_sched_event(tid, msg_buf);
				1257	}
				1258	} else {
				1259	/* Some is waiting; make their join call return with success,
				1260	putting my exit code in the place specified by the caller's
				1261	thread_return param. This is all very horrible, since we
				1262	need to consult the joiner's arg block -- pointed to by its
				1263	%EAX -- in order to extract the 2nd param of its pthread_join
				1264	call. TODO: free properly the slot (also below).
				1265	*/
				1266	jnr = vg_threads[tid].joiner;
				1267	vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
				1268	vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
				1269	jnr_args = (UInt*)vg_threads[jnr].m_eax;
				1270	jnr_thread_return = (void**)(jnr_args[2]);
				1271	if (jnr_thread_return != NULL)
				1272	*jnr_thread_return = vg_threads[tid].retval;
				1273	vg_threads[jnr].m_edx = 0; /* success */
				1274	vg_threads[jnr].status = VgTs_Runnable;
				1275	vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1276	if (VG_(clo_instrument) && tid != 0)
				1277	VGM_(make_noaccess)( vg_threads[tid].stack_base,
				1278	vg_threads[tid].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1279	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1280	VG_(sprintf)(msg_buf,
				1281	"root fn returns, to find a waiting pthread_join(%d)", tid);
				1282	print_sched_event(tid, msg_buf);
				1283	VG_(sprintf)(msg_buf,
				1284	"my pthread_join(%d) returned; resuming", tid);
				1285	print_sched_event(jnr, msg_buf);
				1286	}
				1287	}
				1288
				1289	/* Return value is irrelevant; this thread will not get
				1290	rescheduled. */
				1291	}
				1292
				1293
				1294	static
				1295	void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
				1296	{
				1297	Char msg_buf[100];
				1298
				1299	/* jee, the joinee, is the thread specified as an arg in thread
				1300	tid's call to pthread_join. So tid is the join-er. */
				1301	vg_assert(tid >= 0 && tid < VG_N_THREADS);
				1302	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1303
				1304	if (jee == tid) {
				1305	vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
				1306	vg_threads[tid].status = VgTs_Runnable;
				1307	return;
				1308	}
				1309
				1310	if (jee < 0
				1311	\|\| jee >= VG_N_THREADS
				1312	\|\| vg_threads[jee].status == VgTs_Empty) {
				1313	/* Invalid thread to join to. */
				1314	vg_threads[tid].m_edx = EINVAL;
				1315	vg_threads[tid].status = VgTs_Runnable;
				1316	return;
				1317	}
				1318
				1319	if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
				1320	/* Someone already did join on this thread */
				1321	vg_threads[tid].m_edx = EINVAL;
				1322	vg_threads[tid].status = VgTs_Runnable;
				1323	return;
				1324	}
				1325
				1326	/* if (vg_threads[jee].detached) ... */
				1327
				1328	/* Perhaps the joinee has already finished? If so return
				1329	immediately with its return code, and free up the slot. TODO:
				1330	free it properly (also above). */
				1331	if (vg_threads[jee].status == VgTs_WaitJoiner) {
				1332	vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
				1333	vg_threads[tid].m_edx = 0; /* success */
				1334	if (thread_return != NULL)
				1335	*thread_return = vg_threads[jee].retval;
				1336	vg_threads[tid].status = VgTs_Runnable;
				1337	vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1338	if (VG_(clo_instrument) && jee != 0)
				1339	VGM_(make_noaccess)( vg_threads[jee].stack_base,
				1340	vg_threads[jee].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1341	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1342	VG_(sprintf)(msg_buf,
				1343	"someone called pthread_join() on me; bye!");
				1344	print_sched_event(jee, msg_buf);
				1345	VG_(sprintf)(msg_buf,
				1346	"my pthread_join(%d) returned immediately",
				1347	jee );
				1348	print_sched_event(tid, msg_buf);
				1349	}
				1350	return;
				1351	}
				1352
				1353	/* Ok, so we'll have to wait on jee. */
				1354	vg_threads[jee].joiner = tid;
				1355	vg_threads[tid].status = VgTs_WaitJoinee;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1356	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1357	VG_(sprintf)(msg_buf,
				1358	"blocking on call of pthread_join(%d)", jee );
				1359	print_sched_event(tid, msg_buf);
				1360	}
				1361	/* So tid's join call does not return just now. */
				1362	}
				1363
				1364
				1365	static
				1366	void do_pthread_create ( ThreadId parent_tid,
				1367	pthread_t* thread,
				1368	pthread_attr_t* attr,
				1369	void* (start_routine)(void ),
				1370	void* arg )
				1371	{
				1372	Addr new_stack;
				1373	UInt new_stk_szb;
				1374	ThreadId tid;
				1375	Char msg_buf[100];
				1376
				1377	/* Paranoia ... */
				1378	vg_assert(sizeof(pthread_t) == sizeof(UInt));
				1379
				1380	vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
				1381
				1382	tid = vg_alloc_ThreadState();
				1383
				1384	/* If we've created the main thread's tid, we're in deep trouble :) */
				1385	vg_assert(tid != 0);
				1386
				1387	/* Copy the parent's CPU state into the child's, in a roundabout
				1388	way (via baseBlock). */
				1389	VG_(load_thread_state)(parent_tid);
				1390	VG_(save_thread_state)(tid);
				1391
				1392	/* Consider allocating the child a stack, if the one it already has
				1393	is inadequate. */
				1394	new_stk_szb = PTHREAD_STACK_MIN;
				1395
				1396	if (new_stk_szb > vg_threads[tid].stack_size) {
				1397	/* Again, for good measure :) We definitely don't want to be
				1398	allocating a stack for the main thread. */
				1399	vg_assert(tid != 0);
				1400	/* for now, we don't handle the case of anything other than
				1401	assigning it for the first time. */
				1402	vg_assert(vg_threads[tid].stack_size == 0);
				1403	vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
				1404	new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
				1405	vg_threads[tid].stack_base = new_stack;
				1406	vg_threads[tid].stack_size = new_stk_szb;
				1407	vg_threads[tid].m_esp
				1408	= new_stack + new_stk_szb
				1409	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
				1410	}
				1411	if (VG_(clo_instrument))
				1412	VGM_(make_noaccess)( vg_threads[tid].m_esp,
				1413	VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
				1414
				1415	/* push arg */
				1416	vg_threads[tid].m_esp -= 4;
				1417	* (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
				1418
				1419	/* push (magical) return address */
				1420	vg_threads[tid].m_esp -= 4;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1421	* (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1422
				1423	if (VG_(clo_instrument))
				1424	VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
				1425
				1426	/* this is where we start */
				1427	vg_threads[tid].m_eip = (UInt)start_routine;
				1428
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1429	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1430	VG_(sprintf)(msg_buf,
				1431	"new thread, created by %d", parent_tid );
				1432	print_sched_event(tid, msg_buf);
				1433	}
				1434
				1435	/* store the thread id in thread. /
				1436	// if (VG_(clo_instrument))
				1437	// ***** CHECK *thread is writable
				1438	*thread = (pthread_t)tid;
				1439
				1440	/* return zero */
				1441	vg_threads[tid].joiner = VG_INVALID_THREADID;
				1442	vg_threads[tid].status = VgTs_Runnable;
				1443	vg_threads[tid].m_edx = 0; /* success */
				1444	}
				1445
				1446
				1447	/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
				1448	is a struct with at least 5 words:
				1449	typedef struct
				1450	{
				1451	int __m_reserved; -- Reserved for future use
				1452	int __m_count; -- Depth of recursive locking
				1453	_pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
				1454	int __m_kind; -- Mutex kind: fast, recursive or errcheck
				1455	struct _pthread_fastlock __m_lock; -- Underlying fast lock
				1456	} pthread_mutex_t;
				1457	Ours is just a single word, an index into vg_mutexes[].
				1458	For now I'll park it in the __m_reserved field.
				1459
				1460	Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
				1461	a zero __m_count field (see /usr/include/pthread.h). So I'll
				1462	use zero to mean non-inited, and 1 to mean inited.
				1463
				1464	How convenient.
				1465	*/
				1466
				1467	static
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1468	void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1469	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1470	MutexId mid;
				1471	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1472	/* vg_alloc_MutexId aborts if we can't allocate a mutex, for
				1473	whatever reason. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1474	mid = vg_alloc_VgMutex();
				1475	vg_mutexes[mid].in_use = True;
				1476	vg_mutexes[mid].held = False;
				1477	vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
				1478	mutex->__m_reserved = mid;
				1479	mutex->__m_count = 1; /* initialised */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1480	if (VG_(clo_trace_pthread)) {
				1481	VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
				1482	mutex, mid );
				1483	print_pthread_event(tid, msg_buf);
				1484	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1485	}
				1486
				1487	/* Allocate a new MutexId and write it into *mutex. Ideally take
				1488	notice of the attributes in mutexattr. /
				1489	static
				1490	void do_pthread_mutex_init ( ThreadId tid,
				1491	pthread_mutex_t *mutex,
				1492	const pthread_mutexattr_t *mutexattr)
				1493	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1494	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1495	/* Paranoia ... */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1496	vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
				1497
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1498	initialise_mutex(tid, mutex);
				1499
				1500	if (VG_(clo_trace_pthread)) {
				1501	VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
				1502	mutex, mutex->__m_reserved );
				1503	print_pthread_event(tid, msg_buf);
				1504	}
				1505
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1506	/*
				1507	RETURN VALUE
				1508	pthread_mutex_init always returns 0. The other mutex functions
				1509	return 0 on success and a non-zero error code on error.
				1510	*/
				1511	/* THIS THREAD returns with 0. */
				1512	vg_threads[tid].m_edx = 0;
				1513	}
				1514
				1515
				1516	static
				1517	void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
				1518	{
				1519	MutexId mid;
				1520	Char msg_buf[100];
				1521
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1522	/* *mutex contains the MutexId, or one of the magic values
				1523	PTHREAD_MUTEX_INITIALIZER, indicating we need to initialise it
				1524	now. See comment(s) above re use of __m_count to indicated
				1525	initialisation status.
				1526	*/
				1527
				1528	/* POSIX doesn't mandate this, but for sanity ... */
				1529	if (mutex == NULL) {
				1530	vg_threads[tid].m_edx = EINVAL;
				1531	return;
				1532	}
				1533
				1534	if (mutex->__m_count == 0) {
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1535	initialise_mutex(tid, mutex);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1536	}
				1537
				1538	mid = mutex->__m_reserved;
				1539	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1540	vg_threads[tid].m_edx = EINVAL;
				1541	return;
				1542	}
				1543
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1544	if (VG_(clo_trace_pthread)) {
				1545	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
				1546	mid, mutex );
				1547	print_pthread_event(tid, msg_buf);
				1548	}
				1549
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1550	/* Assert initialised. */
				1551	vg_assert(mutex->__m_count == 1);
				1552
				1553	/* Assume tid valid. */
				1554	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1555
				1556	if (vg_mutexes[mid].held) {
				1557	if (vg_mutexes[mid].owner == tid) {
				1558	vg_threads[tid].m_edx = EDEADLK;
				1559	return;
				1560	}
				1561	/* Someone else has it; we have to wait. */
				1562	vg_threads[tid].status = VgTs_WaitMX;
				1563	vg_threads[tid].waited_on_mid = mid;
				1564	/* No assignment to %EDX, since we're blocking. */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1565	if (VG_(clo_trace_pthread)) {
				1566	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
				1567	mid, mutex );
				1568	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1569	}
				1570	} else {
				1571	/* We get it! */
				1572	vg_mutexes[mid].held = True;
				1573	vg_mutexes[mid].owner = tid;
				1574	/* return 0 (success). */
				1575	vg_threads[tid].m_edx = 0;
				1576	}
				1577	}
				1578
				1579
				1580	static
				1581	void do_pthread_mutex_unlock ( ThreadId tid,
				1582	pthread_mutex_t *mutex )
				1583	{
				1584	MutexId mid;
				1585	Int i;
				1586	Char msg_buf[100];
				1587
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1588	if (mutex == NULL
				1589	\|\| mutex->__m_count != 1) {
				1590	vg_threads[tid].m_edx = EINVAL;
				1591	return;
				1592	}
				1593
				1594	mid = mutex->__m_reserved;
				1595	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1596	vg_threads[tid].m_edx = EINVAL;
				1597	return;
				1598	}
				1599
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1600	if (VG_(clo_trace_pthread)) {
				1601	VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
				1602	mid, mutex );
				1603	print_pthread_event(tid, msg_buf);
				1604	}
				1605
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1606	/* Assume tid valid */
				1607	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1608
				1609	/* Barf if we don't currently hold the mutex. */
				1610	if (!vg_mutexes[mid].held \|\| vg_mutexes[mid].owner != tid) {
				1611	vg_threads[tid].m_edx = EPERM;
				1612	return;
				1613	}
				1614
				1615	/* Find some arbitrary thread waiting on this mutex, and make it
				1616	runnable. If none are waiting, mark the mutex as not held. */
				1617	for (i = 0; i < VG_N_THREADS; i++) {
				1618	if (vg_threads[i].status == VgTs_Empty)
				1619	continue;
				1620	if (vg_threads[i].status == VgTs_WaitMX
				1621	&& vg_threads[i].waited_on_mid == mid)
				1622	break;
				1623	}
				1624
				1625	vg_assert(i <= VG_N_THREADS);
				1626	if (i == VG_N_THREADS) {
				1627	/* Nobody else is waiting on it. */
				1628	vg_mutexes[mid].held = False;
				1629	} else {
				1630	/* Notionally transfer the hold to thread i, whose
				1631	pthread_mutex_lock() call now returns with 0 (success). */
				1632	vg_mutexes[mid].owner = i;
				1633	vg_threads[i].status = VgTs_Runnable;
				1634	vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1635
				1636	if (VG_(clo_trace_pthread)) {
				1637	VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
				1638	mid );
				1639	print_pthread_event(tid, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1640	}
				1641	}
				1642
				1643	/* In either case, our (tid's) pth_unlock() returns with 0
				1644	(success). */
				1645	vg_threads[tid].m_edx = 0; /* Success. */
				1646	}
				1647
				1648
				1649	static void do_pthread_mutex_destroy ( ThreadId tid,
				1650	pthread_mutex_t *mutex )
				1651	{
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1652	MutexId mid;
				1653	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1654
				1655	if (mutex == NULL
				1656	\|\| mutex->__m_count != 1) {
				1657	vg_threads[tid].m_edx = EINVAL;
				1658	return;
				1659	}
				1660
				1661	mid = mutex->__m_reserved;
				1662	if (mid < 0 \|\| mid >= VG_N_MUTEXES \|\| !vg_mutexes[mid].in_use) {
				1663	vg_threads[tid].m_edx = EINVAL;
				1664	return;
				1665	}
				1666
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1667	if (VG_(clo_trace_pthread)) {
				1668	VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
				1669	mid, mutex );
				1670	print_pthread_event(tid, msg_buf);
				1671	}
				1672
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1673	/* Assume tid valid */
				1674	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1675
				1676	/* Barf if the mutex is currently held. */
				1677	if (vg_mutexes[mid].held) {
				1678	vg_threads[tid].m_edx = EBUSY;
				1679	return;
				1680	}
				1681
				1682	mutex->__m_count = 0; /* uninitialised */
				1683	vg_mutexes[mid].in_use = False;
				1684	vg_threads[tid].m_edx = 0;
				1685	}
				1686
				1687
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1688	/* vthread tid is returning from a signal handler; modify its
				1689	stack/regs accordingly. */
				1690	static
				1691	void handle_signal_return ( ThreadId tid )
				1692	{
				1693	Char msg_buf[100];
				1694	Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
				1695
				1696	if (restart_blocked_syscalls)
				1697	/* Easy; we don't have to do anything. */
				1698	return;
				1699
				1700	if (vg_threads[tid].status == VgTs_WaitFD) {
				1701	vg_assert(vg_threads[tid].m_eax == __NR_read
				1702	\|\| vg_threads[tid].m_eax == __NR_write);
				1703	/* read() or write() interrupted. Force a return with EINTR. */
				1704	vg_threads[tid].m_eax = -VKI_EINTR;
				1705	vg_threads[tid].status = VgTs_Runnable;
				1706	if (VG_(clo_trace_sched)) {
				1707	VG_(sprintf)(msg_buf,
				1708	"read() / write() interrupted by signal; return EINTR" );
				1709	print_sched_event(tid, msg_buf);
				1710	}
				1711	return;
				1712	}
				1713
				1714	if (vg_threads[tid].status == VgTs_WaitFD) {
				1715	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				1716	/* We interrupted a nanosleep(). The right thing to do is to
				1717	write the unused time to nanosleep's second param and return
				1718	EINTR, but I'm too lazy for that. */
				1719	return;
				1720	}
				1721
				1722	/* All other cases? Just return. */
				1723	}
				1724
				1725
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1726	/* ---------------------------------------------------------------------
				1727	Handle non-trivial client requests.
				1728	------------------------------------------------------------------ */
				1729
				1730	static
				1731	void do_nontrivial_clientreq ( ThreadId tid )
				1732	{
				1733	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				1734	UInt req_no = arg[0];
				1735	switch (req_no) {
				1736
				1737	case VG_USERREQ__PTHREAD_CREATE:
				1738	do_pthread_create( tid,
				1739	(pthread_t*)arg[1],
				1740	(pthread_attr_t*)arg[2],
				1741	(void()(void*))arg[3],
				1742	(void*)arg[4] );
				1743	break;
				1744
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1745	case VG_USERREQ__PTHREAD_RETURNS:
				1746	handle_pthread_return( tid, (void*)arg[1] );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1747	break;
				1748
				1749	case VG_USERREQ__PTHREAD_JOIN:
				1750	do_pthread_join( tid, arg[1], (void**)(arg[2]) );
				1751	break;
				1752
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1753	case VG_USERREQ__PTHREAD_MUTEX_INIT:
				1754	do_pthread_mutex_init( tid,
				1755	(pthread_mutex_t *)(arg[1]),
				1756	(pthread_mutexattr_t *)(arg[2]) );
				1757	break;
				1758
				1759	case VG_USERREQ__PTHREAD_MUTEX_LOCK:
				1760	do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
				1761	break;
				1762
				1763	case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
				1764	do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
				1765	break;
				1766
				1767	case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
				1768	do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
				1769	break;
				1770
				1771	case VG_USERREQ__PTHREAD_CANCEL:
				1772	do_pthread_cancel( tid, (pthread_t)(arg[1]) );
				1773	break;
				1774
				1775	case VG_USERREQ__MAKE_NOACCESS:
				1776	case VG_USERREQ__MAKE_WRITABLE:
				1777	case VG_USERREQ__MAKE_READABLE:
				1778	case VG_USERREQ__DISCARD:
				1779	case VG_USERREQ__CHECK_WRITABLE:
				1780	case VG_USERREQ__CHECK_READABLE:
				1781	case VG_USERREQ__MAKE_NOACCESS_STACK:
				1782	case VG_USERREQ__RUNNING_ON_VALGRIND:
				1783	case VG_USERREQ__DO_LEAK_CHECK:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	1784	vg_threads[tid].m_edx
				1785	= VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1786	break;
				1787
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1788	case VG_USERREQ__SIGNAL_RETURNS:
				1789	handle_signal_return(tid);
				1790	break;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1791
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1792	default:
				1793	VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
				1794	VG_(panic)("handle_private_client_pthread_request: "
				1795	"unknown request");
				1796	/NOTREACHED/
				1797	break;
				1798	}
				1799	}
				1800
				1801
				1802	/--------------------------------------------------------------------/
				1803	/--- end vg_scheduler.c ---/
				1804	/--------------------------------------------------------------------/