Blame - vg_scheduler.c - platform/external/valgrind

blob: da8143cbee13229c0c967af1d7659427e2769edb [file] [log] [blame]

sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- A user-space pthreads implementation. vg_scheduler.c ---/
				4	/--------------------------------------------------------------------/
				5
				6	/*
				7	This file is part of Valgrind, an x86 protected-mode emulator
				8	designed for debugging and profiling binaries on x86-Unixes.
				9
				10	Copyright (C) 2000-2002 Julian Seward
				11	jseward@acm.org
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	12
				13	This program is free software; you can redistribute it and/or
				14	modify it under the terms of the GNU General Public License as
				15	published by the Free Software Foundation; either version 2 of the
				16	License, or (at your option) any later version.
				17
				18	This program is distributed in the hope that it will be useful, but
				19	WITHOUT ANY WARRANTY; without even the implied warranty of
				20	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				21	General Public License for more details.
				22
				23	You should have received a copy of the GNU General Public License
				24	along with this program; if not, write to the Free Software
				25	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				26	02111-1307, USA.
				27
				28	The GNU General Public License is contained in the file LICENSE.
				29	*/
				30
				31	#include "vg_include.h"
				32	#include "vg_constants.h"
				33
				34	#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
				35	VG_USERREQ__DO_LEAK_CHECK */
				36
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	37	/* BORKAGE/ISSUES as of 14 Apr 02
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	38
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	39	Note! This pthreads implementation is so poor as to not be
				40	suitable for use by anyone at all!
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	41
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	42	- Currently, when a signal is run, just the ThreadStatus.status fields
				43	are saved in the signal frame, along with the CPU state. Question:
				44	should I also save and restore:
				45	ThreadStatus.joiner
				46	ThreadStatus.waited_on_mid
				47	ThreadStatus.awaken_at
				48	ThreadStatus.retval
				49	Currently unsure, and so am not doing so.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	50
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	51	- Signals interrupting read/write and nanosleep: SA_RESTART settings.
				52	Read/write correctly return with EINTR when SA_RESTART isn't
				53	specified and they are interrupted by a signal. nanosleep just
				54	pretends signals don't exist -- should be fixed.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	55
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	56	- Read/write syscall starts: don't crap out when the initial
				57	nonblocking read/write returns an error.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	58
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	59	- Get rid of restrictions re use of sigaltstack; they are no longer
				60	needed.
				61
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	62	- Fix signals properly, so that each thread has its own blocking mask.
				63	Currently this isn't done, and (worse?) signals are delivered to
				64	Thread 1 (the root thread) regardless.
				65
				66	So, what's the deal with signals and mutexes? If a thread is
				67	blocked on a mutex, or for a condition variable for that matter, can
				68	signals still be delivered to it? This has serious consequences --
				69	deadlocks, etc.
				70
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	71	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	72
				73
				74	/* ---------------------------------------------------------------------
				75	Types and globals for the scheduler.
				76	------------------------------------------------------------------ */
				77
				78	/* type ThreadId is defined in vg_include.h. */
				79
				80	/* struct ThreadState is defined in vg_include.h. */
				81
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	82	/* Private globals. A statically allocated array of threads. NOTE:
				83	[0] is never used, to simplify the simulation of initialisers for
				84	LinuxThreads. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	85	static ThreadState vg_threads[VG_N_THREADS];
				86
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	87	/* The tid of the thread currently in VG_(baseBlock). */
				88	static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
				89
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	90
				91	/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
				92	jmp_buf VG_(scheduler_jmpbuf);
				93	/* ... and if so, here's the signal which caused it to do so. */
				94	Int VG_(longjmpd_on_signal);
				95
				96
				97	/* Machinery to keep track of which threads are waiting on which
				98	fds. */
				99	typedef
				100	struct {
				101	/* The thread which made the request. */
				102	ThreadId tid;
				103
				104	/* The next two fields describe the request. */
				105	/* File descriptor waited for. -1 means this slot is not in use */
				106	Int fd;
				107	/* The syscall number the fd is used in. */
				108	Int syscall_no;
				109
				110	/* False => still waiting for select to tell us the fd is ready
				111	to go. True => the fd is ready, but the results have not yet
				112	been delivered back to the calling thread. Once the latter
				113	happens, this entire record is marked as no longer in use, by
				114	making the fd field be -1. */
				115	Bool ready;
				116	}
				117	VgWaitedOnFd;
				118
				119	static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
				120
				121
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	122	/* Forwards */
				123	static void do_nontrivial_clientreq ( ThreadId tid );
				124
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	125	static void scheduler_sanity ( void );
				126
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	127
				128	/* ---------------------------------------------------------------------
				129	Helper functions for the scheduler.
				130	------------------------------------------------------------------ */
				131
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	132	static __inline__
				133	Bool is_valid_tid ( ThreadId tid )
				134	{
				135	/* tid is unsigned, hence no < 0 test. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	136	if (tid == 0) return False;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	137	if (tid >= VG_N_THREADS) return False;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	138	return True;
				139	}
				140
				141
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	142	/* For constructing error messages only: try and identify a thread
				143	whose stack this address currently falls within, or return
				144	VG_INVALID_THREADID if it doesn't. A small complication is dealing
				145	with any currently VG_(baseBlock)-resident thread.
				146	*/
				147	ThreadId VG_(identify_stack_addr)( Addr a )
				148	{
				149	ThreadId tid, tid_to_skip;
				150
				151	tid_to_skip = VG_INVALID_THREADID;
				152
				153	/* First check to see if there's a currently-loaded thread in
				154	VG_(baseBlock). */
				155	if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
				156	tid = vg_tid_currently_in_baseBlock;
				157	if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
				158	&& a <= vg_threads[tid].stack_highest_word)
				159	return tid;
				160	else
				161	tid_to_skip = tid;
				162	}
				163
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	164	for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	165	if (vg_threads[tid].status == VgTs_Empty) continue;
				166	if (tid == tid_to_skip) continue;
				167	if (vg_threads[tid].m_esp <= a
				168	&& a <= vg_threads[tid].stack_highest_word)
				169	return tid;
				170	}
				171	return VG_INVALID_THREADID;
				172	}
				173
				174
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	175	/* Print the scheduler status. */
				176	void VG_(pp_sched_status) ( void )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	177	{
				178	Int i;
				179	VG_(printf)("\nsched status:\n");
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	180	for (i = 1; i < VG_N_THREADS; i++) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	181	if (vg_threads[i].status == VgTs_Empty) continue;
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	182	VG_(printf)("\nThread %d: status = ", i);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	183	switch (vg_threads[i].status) {
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	184	case VgTs_Runnable: VG_(printf)("Runnable"); break;
				185	case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
				186	case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	187	vg_threads[i].joiner); break;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	188	case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
				189	case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
				190	case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	191	case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	192	default: VG_(printf)("???"); break;
				193	}
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	194	VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
				195	vg_threads[i].associated_mx,
				196	vg_threads[i].associated_cv );
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	197	VG_(pp_ExeContext)(
				198	VG_(get_ExeContext)( False, vg_threads[i].m_eip,
				199	vg_threads[i].m_ebp ));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	200	}
				201	VG_(printf)("\n");
				202	}
				203
				204	static
				205	void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
				206	{
				207	Int i;
				208
				209	vg_assert(fd != -1); /* avoid total chaos */
				210
				211	for (i = 0; i < VG_N_WAITING_FDS; i++)
				212	if (vg_waiting_fds[i].fd == -1)
				213	break;
				214
				215	if (i == VG_N_WAITING_FDS)
				216	VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
				217	/*
				218	VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
				219	tid, fd, i);
				220	*/
				221	vg_waiting_fds[i].fd = fd;
				222	vg_waiting_fds[i].tid = tid;
				223	vg_waiting_fds[i].ready = False;
				224	vg_waiting_fds[i].syscall_no = syscall_no;
				225	}
				226
				227
				228
				229	static
				230	void print_sched_event ( ThreadId tid, Char* what )
				231	{
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	232	VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	233	}
				234
				235
				236	static
				237	void print_pthread_event ( ThreadId tid, Char* what )
				238	{
				239	VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	240	}
				241
				242
				243	static
				244	Char* name_of_sched_event ( UInt event )
				245	{
				246	switch (event) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	247	case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
				248	case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
				249	case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
				250	case VG_TRC_INNER_FASTMISS: return "FASTMISS";
				251	case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
				252	default: return "??UNKNOWN??";
				253	}
				254	}
				255
				256
				257	/* Create a translation of the client basic block beginning at
				258	orig_addr, and add it to the translation cache & translation table.
				259	This probably doesn't really belong here, but, hey ...
				260	*/
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	261	static
				262	void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	263	{
				264	Addr trans_addr;
				265	TTEntry tte;
				266	Int orig_size, trans_size;
				267	/* Ensure there is space to hold a translation. */
				268	VG_(maybe_do_lru_pass)();
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	269	VG_(translate)( &vg_threads[tid],
				270	orig_addr, &orig_size, &trans_addr, &trans_size );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	271	/* Copy data at trans_addr into the translation cache.
				272	Returned pointer is to the code, not to the 4-byte
				273	header. */
				274	/* Since the .orig_size and .trans_size fields are
				275	UShort, be paranoid. */
				276	vg_assert(orig_size > 0 && orig_size < 65536);
				277	vg_assert(trans_size > 0 && trans_size < 65536);
				278	tte.orig_size = orig_size;
				279	tte.orig_addr = orig_addr;
				280	tte.trans_size = trans_size;
				281	tte.trans_addr = VG_(copy_to_transcache)
				282	( trans_addr, trans_size );
				283	tte.mru_epoch = VG_(current_epoch);
				284	/* Free the intermediary -- was allocated by VG_(emit_code). */
				285	VG_(jitfree)( (void*)trans_addr );
				286	/* Add to trans tab and set back pointer. */
				287	VG_(add_to_trans_tab) ( &tte );
				288	/* Update stats. */
				289	VG_(this_epoch_in_count) ++;
				290	VG_(this_epoch_in_osize) += orig_size;
				291	VG_(this_epoch_in_tsize) += trans_size;
				292	VG_(overall_in_count) ++;
				293	VG_(overall_in_osize) += orig_size;
				294	VG_(overall_in_tsize) += trans_size;
				295	/* Record translated area for SMC detection. */
				296	VG_(smc_mark_original) ( orig_addr, orig_size );
				297	}
				298
				299
				300	/* Allocate a completely empty ThreadState record. */
				301	static
				302	ThreadId vg_alloc_ThreadState ( void )
				303	{
				304	Int i;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	305	for (i = 1; i < VG_N_THREADS; i++) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	306	if (vg_threads[i].status == VgTs_Empty)
				307	return i;
				308	}
				309	VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
				310	VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
				311	VG_(panic)("VG_N_THREADS is too low");
				312	/NOTREACHED/
				313	}
				314
				315
				316	ThreadState* VG_(get_thread_state) ( ThreadId tid )
				317	{
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	318	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	319	vg_assert(vg_threads[tid].status != VgTs_Empty);
				320	return & vg_threads[tid];
				321	}
				322
				323
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	324	ThreadState* VG_(get_current_thread_state) ( void )
				325	{
				326	vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	327	return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	328	}
				329
				330
				331	ThreadId VG_(get_current_tid) ( void )
				332	{
				333	vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
				334	return vg_tid_currently_in_baseBlock;
				335	}
				336
				337
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	338	/* Copy the saved state of a thread into VG_(baseBlock), ready for it
				339	to be run. */
				340	__inline__
				341	void VG_(load_thread_state) ( ThreadId tid )
				342	{
				343	Int i;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	344	vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
				345
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	346	VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
				347	VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
				348	VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
				349	VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
				350	VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
				351	VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
				352	VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
				353	VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
				354	VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
				355	VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
				356
				357	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				358	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
				359
				360	VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
				361	VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
				362	VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
				363	VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
				364	VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
				365	VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
				366	VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
				367	VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
				368	VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	369
				370	vg_tid_currently_in_baseBlock = tid;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	371	}
				372
				373
				374	/* Copy the state of a thread from VG_(baseBlock), presumably after it
				375	has been descheduled. For sanity-check purposes, fill the vacated
				376	VG_(baseBlock) with garbage so as to make the system more likely to
				377	fail quickly if we erroneously continue to poke around inside
				378	VG_(baseBlock) without first doing a load_thread_state().
				379	*/
				380	__inline__
				381	void VG_(save_thread_state) ( ThreadId tid )
				382	{
				383	Int i;
				384	const UInt junk = 0xDEADBEEF;
				385
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	386	vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
				387
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	388	vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
				389	vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
				390	vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
				391	vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
				392	vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
				393	vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
				394	vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
				395	vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				396	vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
				397	vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
				398
				399	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				400	vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
				401
				402	vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
				403	vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
				404	vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
				405	vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
				406	vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
				407	vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
				408	vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
				409	vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
				410	vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
				411
				412	/* Fill it up with junk. */
				413	VG_(baseBlock)[VGOFF_(m_eax)] = junk;
				414	VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
				415	VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
				416	VG_(baseBlock)[VGOFF_(m_edx)] = junk;
				417	VG_(baseBlock)[VGOFF_(m_esi)] = junk;
				418	VG_(baseBlock)[VGOFF_(m_edi)] = junk;
				419	VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
				420	VG_(baseBlock)[VGOFF_(m_esp)] = junk;
				421	VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
				422	VG_(baseBlock)[VGOFF_(m_eip)] = junk;
				423
				424	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				425	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	426
				427	vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	428	}
				429
				430
				431	/* Run the thread tid for a while, and return a VG_TRC_* value to the
				432	scheduler indicating what happened. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	433	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	434	UInt run_thread_for_a_while ( ThreadId tid )
				435	{
				436	UInt trc = 0;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	437	vg_assert(is_valid_tid(tid));
				438	vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	439	vg_assert(VG_(bbs_to_go) > 0);
				440
				441	VG_(load_thread_state) ( tid );
				442	if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
				443	/* try this ... */
				444	trc = VG_(run_innerloop)();
				445	/* We get here if the client didn't take a fault. */
				446	} else {
				447	/* We get here if the client took a fault, which caused our
				448	signal handler to longjmp. */
				449	vg_assert(trc == 0);
				450	trc = VG_TRC_UNRESUMABLE_SIGNAL;
				451	}
				452	VG_(save_thread_state) ( tid );
				453	return trc;
				454	}
				455
				456
				457	/* Increment the LRU epoch counter. */
				458	static
				459	void increment_epoch ( void )
				460	{
				461	VG_(current_epoch)++;
				462	if (VG_(clo_verbosity) > 2) {
				463	UInt tt_used, tc_used;
				464	VG_(get_tt_tc_used) ( &tt_used, &tc_used );
				465	VG_(message)(Vg_UserMsg,
				466	"%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
				467	VG_(bbs_done),
				468	VG_(this_epoch_in_count),
				469	VG_(this_epoch_in_osize),
				470	VG_(this_epoch_in_tsize),
				471	VG_(this_epoch_out_count),
				472	VG_(this_epoch_out_osize),
				473	VG_(this_epoch_out_tsize),
				474	tt_used, tc_used
				475	);
				476	}
				477	VG_(this_epoch_in_count) = 0;
				478	VG_(this_epoch_in_osize) = 0;
				479	VG_(this_epoch_in_tsize) = 0;
				480	VG_(this_epoch_out_count) = 0;
				481	VG_(this_epoch_out_osize) = 0;
				482	VG_(this_epoch_out_tsize) = 0;
				483	}
				484
				485
				486	/* Initialise the scheduler. Create a single "main" thread ready to
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	487	run, with special ThreadId of one. This is called at startup; the
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	488	caller takes care to park the client's state is parked in
				489	VG_(baseBlock).
				490	*/
				491	void VG_(scheduler_init) ( void )
				492	{
				493	Int i;
				494	Addr startup_esp;
				495	ThreadId tid_main;
				496
				497	startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				498	if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	499	VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
				500	(void)startup_esp, (void)VG_STARTUP_STACK_MASK);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	501	VG_(panic)("unexpected %esp at startup");
				502	}
				503
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	504	for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
				505	vg_threads[i].status = VgTs_Empty;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	506	vg_threads[i].stack_size = 0;
				507	vg_threads[i].stack_base = (Addr)NULL;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	508	vg_threads[i].tid = i;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	509	}
				510
				511	for (i = 0; i < VG_N_WAITING_FDS; i++)
				512	vg_waiting_fds[i].fd = -1; /* not in use */
				513
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	514	/* Assert this is thread zero, which has certain magic
				515	properties. */
				516	tid_main = vg_alloc_ThreadState();
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	517	vg_assert(tid_main == 1);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	518
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	519	vg_threads[tid_main].status = VgTs_Runnable;
				520	vg_threads[tid_main].joiner = VG_INVALID_THREADID;
				521	vg_threads[tid_main].associated_mx = NULL;
				522	vg_threads[tid_main].associated_cv = NULL;
				523	vg_threads[tid_main].retval = NULL; /* not important */
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	524	vg_threads[tid_main].stack_highest_word
				525	= vg_threads[tid_main].m_esp /* -4 ??? */;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	526
				527	/* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	528	vg_tid_currently_in_baseBlock = tid_main;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	529	VG_(save_thread_state) ( tid_main );
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	530
				531	/* So now ... */
				532	vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	533	}
				534
				535
				536	/* What if fd isn't a valid fd? */
				537	static
				538	void set_fd_nonblocking ( Int fd )
				539	{
				540	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				541	vg_assert(!VG_(is_kerror)(res));
				542	res \|= VKI_O_NONBLOCK;
				543	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				544	vg_assert(!VG_(is_kerror)(res));
				545	}
				546
				547	static
				548	void set_fd_blocking ( Int fd )
				549	{
				550	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				551	vg_assert(!VG_(is_kerror)(res));
				552	res &= ~VKI_O_NONBLOCK;
				553	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				554	vg_assert(!VG_(is_kerror)(res));
				555	}
				556
				557	static
				558	Bool fd_is_blockful ( Int fd )
				559	{
				560	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				561	vg_assert(!VG_(is_kerror)(res));
				562	return (res & VKI_O_NONBLOCK) ? False : True;
				563	}
				564
				565
				566
				567	/* Do a purely thread-local request for tid, and put the result in its
				568	%EDX, without changing its scheduling state in any way, nor that of
				569	any other threads. Return True if so.
				570
				571	If the request is non-trivial, return False; a more capable but
				572	slower mechanism will deal with it.
				573	*/
				574	static
				575	Bool maybe_do_trivial_clientreq ( ThreadId tid )
				576	{
				577	# define SIMPLE_RETURN(vvv) \
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	578	{ tst->m_edx = (vvv); \
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	579	return True; \
				580	}
				581
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	582	ThreadState* tst = &vg_threads[tid];
				583	UInt* arg = (UInt*)(tst->m_eax);
				584	UInt req_no = arg[0];
				585
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	586	switch (req_no) {
				587	case VG_USERREQ__MALLOC:
				588	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	589	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	590	);
				591	case VG_USERREQ__BUILTIN_NEW:
				592	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	593	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	594	);
				595	case VG_USERREQ__BUILTIN_VEC_NEW:
				596	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	597	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	598	);
				599	case VG_USERREQ__FREE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	600	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	601	SIMPLE_RETURN(0); /* irrelevant */
				602	case VG_USERREQ__BUILTIN_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	603	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	604	SIMPLE_RETURN(0); /* irrelevant */
				605	case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	606	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	607	SIMPLE_RETURN(0); /* irrelevant */
				608	case VG_USERREQ__CALLOC:
				609	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	610	(UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	611	);
				612	case VG_USERREQ__REALLOC:
				613	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	614	(UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	615	);
				616	case VG_USERREQ__MEMALIGN:
				617	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	618	(UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	619	);
sewardj	9650c99	2002-04-16 03:44:31 +0000	[diff] [blame]	620
				621	/* These are heavily used. */
				622	case VG_USERREQ__PTHREAD_GET_THREADID:
				623	SIMPLE_RETURN(tid);
				624	case VG_USERREQ__RUNNING_ON_VALGRIND:
				625	SIMPLE_RETURN(1);
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	626	case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
				627	SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj	9650c99	2002-04-16 03:44:31 +0000	[diff] [blame]	628
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	629	default:
				630	/* Too hard; wimp out. */
				631	return False;
				632	}
				633	# undef SIMPLE_RETURN
				634	}
				635
				636
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	637	/* vthread tid is returning from a signal handler; modify its
				638	stack/regs accordingly. */
				639	static
				640	void handle_signal_return ( ThreadId tid )
				641	{
				642	Char msg_buf[100];
				643	Bool restart_blocked_syscalls;
				644
				645	vg_assert(is_valid_tid(tid));
				646
				647	restart_blocked_syscalls = VG_(signal_returns)(tid);
				648
				649	if (restart_blocked_syscalls)
				650	/* Easy; we don't have to do anything. */
				651	return;
				652
				653	if (vg_threads[tid].status == VgTs_WaitFD) {
				654	vg_assert(vg_threads[tid].m_eax == __NR_read
				655	\|\| vg_threads[tid].m_eax == __NR_write);
				656	/* read() or write() interrupted. Force a return with EINTR. */
				657	vg_threads[tid].m_eax = -VKI_EINTR;
				658	vg_threads[tid].status = VgTs_Runnable;
				659	if (VG_(clo_trace_sched)) {
				660	VG_(sprintf)(msg_buf,
				661	"read() / write() interrupted by signal; return EINTR" );
				662	print_sched_event(tid, msg_buf);
				663	}
				664	return;
				665	}
				666
				667	if (vg_threads[tid].status == VgTs_WaitFD) {
				668	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				669	/* We interrupted a nanosleep(). The right thing to do is to
				670	write the unused time to nanosleep's second param and return
				671	EINTR, but I'm too lazy for that. */
				672	return;
				673	}
				674
				675	/* All other cases? Just return. */
				676	}
				677
				678
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	679	static
				680	void sched_do_syscall ( ThreadId tid )
				681	{
				682	UInt saved_eax;
				683	UInt res, syscall_no;
				684	UInt fd;
				685	Bool might_block, assumed_nonblocking;
				686	Bool orig_fd_blockness;
				687	Char msg_buf[100];
				688
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	689	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	690	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				691
				692	syscall_no = vg_threads[tid].m_eax; /* syscall number */
				693
				694	if (syscall_no == __NR_nanosleep) {
				695	ULong t_now, t_awaken;
				696	struct vki_timespec* req;
				697	req = (struct vki_timespec)vg_threads[tid].m_ebx; / arg1 */
				698	t_now = VG_(read_microsecond_timer)();
				699	t_awaken
				700	= t_now
				701	+ (ULong)1000000ULL * (ULong)(req->tv_sec)
				702	+ (ULong)( (UInt)(req->tv_nsec) / 1000 );
				703	vg_threads[tid].status = VgTs_Sleeping;
				704	vg_threads[tid].awaken_at = t_awaken;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	705	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	706	VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
				707	t_now, t_awaken-t_now);
				708	print_sched_event(tid, msg_buf);
				709	}
				710	/* Force the scheduler to run something else for a while. */
				711	return;
				712	}
				713
				714	switch (syscall_no) {
				715	case __NR_read:
				716	case __NR_write:
				717	assumed_nonblocking
				718	= False;
				719	might_block
				720	= fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
				721	break;
				722	default:
				723	might_block = False;
				724	assumed_nonblocking = True;
				725	}
				726
				727	if (assumed_nonblocking) {
				728	/* We think it's non-blocking. Just do it in the normal way. */
				729	VG_(perform_assumed_nonblocking_syscall)(tid);
				730	/* The thread is still runnable. */
				731	return;
				732	}
				733
				734	/* It might block. Take evasive action. */
				735	switch (syscall_no) {
				736	case __NR_read:
				737	case __NR_write:
				738	fd = vg_threads[tid].m_ebx; break;
				739	default:
				740	vg_assert(3+3 == 7);
				741	}
				742
				743	/* Set the fd to nonblocking, and do the syscall, which will return
				744	immediately, in order to lodge a request with the Linux kernel.
				745	We later poll for I/O completion using select(). */
				746
				747	orig_fd_blockness = fd_is_blockful(fd);
				748	set_fd_nonblocking(fd);
				749	vg_assert(!fd_is_blockful(fd));
				750	VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
				751
				752	/* This trashes the thread's %eax; we have to preserve it. */
				753	saved_eax = vg_threads[tid].m_eax;
				754	KERNEL_DO_SYSCALL(tid,res);
				755
				756	/* Restore original blockfulness of the fd. */
				757	if (orig_fd_blockness)
				758	set_fd_blocking(fd);
				759	else
				760	set_fd_nonblocking(fd);
				761
				762	if (res != -VKI_EWOULDBLOCK) {
				763	/* It didn't block; it went through immediately. So finish off
				764	in the normal way. Don't restore %EAX, since that now
				765	(correctly) holds the result of the call. */
				766	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				767	/* We're still runnable. */
				768	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				769
				770	} else {
				771
				772	/* It would have blocked. First, restore %EAX to what it was
				773	before our speculative call. */
				774	vg_threads[tid].m_eax = saved_eax;
				775	/* Put this fd in a table of fds on which we are waiting for
				776	completion. The arguments for select() later are constructed
				777	from this table. */
				778	add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
				779	/* Deschedule thread until an I/O completion happens. */
				780	vg_threads[tid].status = VgTs_WaitFD;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	781	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	782	VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
				783	print_sched_event(tid, msg_buf);
				784	}
				785
				786	}
				787	}
				788
				789
				790	/* Find out which of the fds in vg_waiting_fds are now ready to go, by
				791	making enquiries with select(), and mark them as ready. We have to
				792	wait for the requesting threads to fall into the the WaitFD state
				793	before we can actually finally deliver the results, so this
				794	procedure doesn't do that; complete_blocked_syscalls() does it.
				795
				796	It might seem odd that a thread which has done a blocking syscall
				797	is not in WaitFD state; the way this can happen is if it initially
				798	becomes WaitFD, but then a signal is delivered to it, so it becomes
				799	Runnable for a while. In this case we have to wait for the
				800	sighandler to return, whereupon the WaitFD state is resumed, and
				801	only at that point can the I/O result be delivered to it. However,
				802	this point may be long after the fd is actually ready.
				803
				804	So, poll_for_ready_fds() merely detects fds which are ready.
				805	complete_blocked_syscalls() does the second half of the trick,
				806	possibly much later: it delivers the results from ready fds to
				807	threads in WaitFD state.
				808	*/
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	809	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	810	void poll_for_ready_fds ( void )
				811	{
				812	vki_ksigset_t saved_procmask;
				813	vki_fd_set readfds;
				814	vki_fd_set writefds;
				815	vki_fd_set exceptfds;
				816	struct vki_timeval timeout;
				817	Int fd, fd_max, i, n_ready, syscall_no, n_ok;
				818	ThreadId tid;
				819	Bool rd_ok, wr_ok, ex_ok;
				820	Char msg_buf[100];
				821
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	822	struct vki_timespec* rem;
				823	ULong t_now;
				824
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	825	/* Awaken any sleeping threads whose sleep has expired. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	826	for (tid = 1; tid < VG_N_THREADS; tid++)
				827	if (vg_threads[tid].status == VgTs_Sleeping)
				828	break;
				829
				830	/* Avoid pointless calls to VG_(read_microsecond_timer). */
				831	if (tid < VG_N_THREADS) {
				832	t_now = VG_(read_microsecond_timer)();
				833	for (tid = 1; tid < VG_N_THREADS; tid++) {
				834	if (vg_threads[tid].status != VgTs_Sleeping)
				835	continue;
				836	if (t_now >= vg_threads[tid].awaken_at) {
				837	/* Resume this thread. Set to zero the remaining-time
				838	(second) arg of nanosleep, since it's used up all its
				839	time. */
				840	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				841	rem = (struct vki_timespec )vg_threads[tid].m_ecx; / arg2 */
				842	if (rem != NULL) {
				843	rem->tv_sec = 0;
				844	rem->tv_nsec = 0;
				845	}
				846	/* Make the syscall return 0 (success). */
				847	vg_threads[tid].m_eax = 0;
				848	/* Reschedule this thread. */
				849	vg_threads[tid].status = VgTs_Runnable;
				850	if (VG_(clo_trace_sched)) {
				851	VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
				852	t_now);
				853	print_sched_event(tid, msg_buf);
				854	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	855	}
				856	}
				857	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	858
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	859	/* And look for threads waiting on file descriptors which are now
				860	ready for I/O.*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	861	timeout.tv_sec = 0;
				862	timeout.tv_usec = 0;
				863
				864	VKI_FD_ZERO(&readfds);
				865	VKI_FD_ZERO(&writefds);
				866	VKI_FD_ZERO(&exceptfds);
				867	fd_max = -1;
				868	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				869	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				870	continue;
				871	if (vg_waiting_fds[i].ready /* already ready? */)
				872	continue;
				873	fd = vg_waiting_fds[i].fd;
				874	/* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	875	vg_assert(fd >= 0);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	876	if (fd > fd_max)
				877	fd_max = fd;
				878	tid = vg_waiting_fds[i].tid;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	879	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	880	syscall_no = vg_waiting_fds[i].syscall_no;
				881	switch (syscall_no) {
				882	case __NR_read:
				883	VKI_FD_SET(fd, &readfds); break;
				884	case __NR_write:
				885	VKI_FD_SET(fd, &writefds); break;
				886	default:
				887	VG_(panic)("poll_for_ready_fds: unexpected syscall");
				888	/NOTREACHED/
				889	break;
				890	}
				891	}
				892
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	893	/* Short cut: if no fds are waiting, give up now. */
				894	if (fd_max == -1)
				895	return;
				896
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	897	/* BLOCK ALL SIGNALS. We don't want the complication of select()
				898	getting interrupted. */
				899	VG_(block_all_host_signals)( &saved_procmask );
				900
				901	n_ready = VG_(select)
				902	( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
				903	if (VG_(is_kerror)(n_ready)) {
				904	VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
				905	VG_(panic)("poll_for_ready_fds: select failed?!");
				906	/NOTREACHED/
				907	}
				908
				909	/* UNBLOCK ALL SIGNALS */
				910	VG_(restore_host_signals)( &saved_procmask );
				911
				912	/* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
				913
				914	if (n_ready == 0)
				915	return;
				916
				917	/* Inspect all the fds we know about, and handle any completions that
				918	have happened. */
				919	/*
				920	VG_(printf)("\n\n");
				921	for (fd = 0; fd < 100; fd++)
				922	if (VKI_FD_ISSET(fd, &writefds) \|\| VKI_FD_ISSET(fd, &readfds)) {
				923	VG_(printf)("X"); } else { VG_(printf)("."); };
				924	VG_(printf)("\n\nfd_max = %d\n", fd_max);
				925	*/
				926
				927	for (fd = 0; fd <= fd_max; fd++) {
				928	rd_ok = VKI_FD_ISSET(fd, &readfds);
				929	wr_ok = VKI_FD_ISSET(fd, &writefds);
				930	ex_ok = VKI_FD_ISSET(fd, &exceptfds);
				931
				932	n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
				933	if (n_ok == 0)
				934	continue;
				935	if (n_ok > 1) {
				936	VG_(printf)("offending fd = %d\n", fd);
				937	VG_(panic)("poll_for_ready_fds: multiple events on fd");
				938	}
				939
				940	/* An I/O event completed for fd. Find the thread which
				941	requested this. */
				942	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				943	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				944	continue;
				945	if (vg_waiting_fds[i].fd == fd)
				946	break;
				947	}
				948
				949	/* And a bit more paranoia ... */
				950	vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
				951
				952	/* Mark the fd as ready. */
				953	vg_assert(! vg_waiting_fds[i].ready);
				954	vg_waiting_fds[i].ready = True;
				955	}
				956	}
				957
				958
				959	/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	960	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	961	void complete_blocked_syscalls ( void )
				962	{
				963	Int fd, i, res, syscall_no;
				964	ThreadId tid;
				965	Char msg_buf[100];
				966
				967	/* Inspect all the outstanding fds we know about. */
				968
				969	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				970	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				971	continue;
				972	if (! vg_waiting_fds[i].ready)
				973	continue;
				974
				975	fd = vg_waiting_fds[i].fd;
				976	tid = vg_waiting_fds[i].tid;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	977	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	978
				979	/* The thread actually has to be waiting for the I/O event it
				980	requested before we can deliver the result! */
				981	if (vg_threads[tid].status != VgTs_WaitFD)
				982	continue;
				983
				984	/* Ok, actually do it! We can safely use %EAX as the syscall
				985	number, because the speculative call made by
				986	sched_do_syscall() doesn't change %EAX in the case where the
				987	call would have blocked. */
				988
				989	syscall_no = vg_waiting_fds[i].syscall_no;
				990	vg_assert(syscall_no == vg_threads[tid].m_eax);
				991	KERNEL_DO_SYSCALL(tid,res);
				992	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				993
				994	/* Reschedule. */
				995	vg_threads[tid].status = VgTs_Runnable;
				996	/* Mark slot as no longer in use. */
				997	vg_waiting_fds[i].fd = -1;
				998	/* pp_sched_status(); */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	999	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1000	VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
				1001	print_sched_event(tid, msg_buf);
				1002	}
				1003	}
				1004	}
				1005
				1006
				1007	static
				1008	void nanosleep_for_a_while ( void )
				1009	{
				1010	Int res;
				1011	struct vki_timespec req;
				1012	struct vki_timespec rem;
				1013	req.tv_sec = 0;
				1014	req.tv_nsec = 20 * 1000 * 1000;
				1015	res = VG_(nanosleep)( &req, &rem );
				1016	/* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
				1017	vg_assert(res == 0);
				1018	}
				1019
				1020
				1021	/* ---------------------------------------------------------------------
				1022	The scheduler proper.
				1023	------------------------------------------------------------------ */
				1024
				1025	/* Run user-space threads until either
				1026	* Deadlock occurs
				1027	* One thread asks to shutdown Valgrind
				1028	* The specified number of basic blocks has gone by.
				1029	*/
				1030	VgSchedReturnCode VG_(scheduler) ( void )
				1031	{
				1032	ThreadId tid, tid_next;
				1033	UInt trc;
				1034	UInt dispatch_ctr_SAVED;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1035	Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1036	Char msg_buf[100];
				1037	Addr trans_addr;
				1038
				1039	/* For the LRU structures, records when the epoch began. */
				1040	ULong lru_epoch_started_at = 0;
				1041
				1042	/* Start with the root thread. tid in general indicates the
				1043	currently runnable/just-finished-running thread. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1044	tid = 1;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1045
				1046	/* This is the top level scheduler loop. It falls into three
				1047	phases. */
				1048	while (True) {
				1049
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1050	/* ======================= Phase 0 of 3 =======================
				1051	Be paranoid. Always a good idea. */
				1052	scheduler_sanity();
				1053
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1054	/* ======================= Phase 1 of 3 =======================
				1055	Handle I/O completions and signals. This may change the
				1056	status of various threads. Then select a new thread to run,
				1057	or declare deadlock, or sleep if there are no runnable
				1058	threads but some are blocked on I/O. */
				1059
				1060	/* Age the LRU structures if an epoch has been completed. */
				1061	if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
				1062	lru_epoch_started_at = VG_(bbs_done);
				1063	increment_epoch();
				1064	}
				1065
				1066	/* Was a debug-stop requested? */
				1067	if (VG_(bbs_to_go) == 0)
				1068	goto debug_stop;
				1069
				1070	/* Do the following loop until a runnable thread is found, or
				1071	deadlock is detected. */
				1072	while (True) {
				1073
				1074	/* For stats purposes only. */
				1075	VG_(num_scheduling_events_MAJOR) ++;
				1076
				1077	/* See if any I/O operations which we were waiting for have
				1078	completed, and, if so, make runnable the relevant waiting
				1079	threads. */
				1080	poll_for_ready_fds();
				1081	complete_blocked_syscalls();
				1082
				1083	/* See if there are any signals which need to be delivered. If
				1084	so, choose thread(s) to deliver them to, and build signal
				1085	delivery frames on those thread(s) stacks. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1086
				1087	/* Be careful about delivering signals to a thread waiting
				1088	for a mutex. In particular, when the handler is running,
				1089	that thread is temporarily apparently-not-waiting for the
				1090	mutex, so if it is unlocked by another thread whilst the
				1091	handler is running, this thread is not informed. When the
				1092	handler returns, the thread resumes waiting on the mutex,
				1093	even if, as a result, it has missed the unlocking of it.
				1094	Potential deadlock. This sounds all very strange, but the
				1095	POSIX standard appears to require this behaviour. */
				1096	VG_(deliver_signals)( 1 /HACK/ );
				1097	VG_(do_sanity_checks)( 1 /HACK/, False );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1098
				1099	/* Try and find a thread (tid) to run. */
				1100	tid_next = tid;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1101	n_in_fdwait_or_sleep = 0;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1102	while (True) {
				1103	tid_next++;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1104	if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1105	if (vg_threads[tid_next].status == VgTs_WaitFD
				1106	\|\| vg_threads[tid_next].status == VgTs_Sleeping)
				1107	n_in_fdwait_or_sleep ++;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1108	if (vg_threads[tid_next].status == VgTs_Runnable)
				1109	break; /* We can run this one. */
				1110	if (tid_next == tid)
				1111	break; /* been all the way round */
				1112	}
				1113	tid = tid_next;
				1114
				1115	if (vg_threads[tid].status == VgTs_Runnable) {
				1116	/* Found a suitable candidate. Fall out of this loop, so
				1117	we can advance to stage 2 of the scheduler: actually
				1118	running the thread. */
				1119	break;
				1120	}
				1121
				1122	/* We didn't find a runnable thread. Now what? */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1123	if (n_in_fdwait_or_sleep == 0) {
				1124	/* No runnable threads and no prospect of any appearing
				1125	even if we wait for an arbitrary length of time. In
				1126	short, we have a deadlock. */
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	1127	VG_(pp_sched_status)();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1128	return VgSrc_Deadlock;
				1129	}
				1130
				1131	/* At least one thread is in a fd-wait state. Delay for a
				1132	while, and go round again, in the hope that eventually a
				1133	thread becomes runnable. */
				1134	nanosleep_for_a_while();
				1135	// pp_sched_status();
				1136	// VG_(printf)(".\n");
				1137	}
				1138
				1139
				1140	/* ======================= Phase 2 of 3 =======================
				1141	Wahey! We've finally decided that thread tid is runnable, so
				1142	we now do that. Run it for as much of a quanta as possible.
				1143	Trivial requests are handled and the thread continues. The
				1144	aim is not to do too many of Phase 1 since it is expensive. */
				1145
				1146	if (0)
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1147	VG_(printf)("SCHED: tid %d\n", tid);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1148
				1149	/* Figure out how many bbs to ask vg_run_innerloop to do. Note
				1150	that it decrements the counter before testing it for zero, so
				1151	that if VG_(dispatch_ctr) is set to N you get at most N-1
				1152	iterations. Also this means that VG_(dispatch_ctr) must
				1153	exceed zero before entering the innerloop. Also also, the
				1154	decrement is done before the bb is actually run, so you
				1155	always get at least one decrement even if nothing happens.
				1156	*/
				1157	if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
				1158	VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
				1159	else
				1160	VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
				1161
				1162	/* ... and remember what we asked for. */
				1163	dispatch_ctr_SAVED = VG_(dispatch_ctr);
				1164
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1165	/* paranoia ... */
				1166	vg_assert(vg_threads[tid].tid == tid);
				1167
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1168	/* Actually run thread tid. */
				1169	while (True) {
				1170
				1171	/* For stats purposes only. */
				1172	VG_(num_scheduling_events_MINOR) ++;
				1173
				1174	if (0)
				1175	VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
				1176	tid, VG_(dispatch_ctr) - 1 );
				1177
				1178	trc = run_thread_for_a_while ( tid );
				1179
				1180	/* Deal quickly with trivial scheduling events, and resume the
				1181	thread. */
				1182
				1183	if (trc == VG_TRC_INNER_FASTMISS) {
				1184	vg_assert(VG_(dispatch_ctr) > 0);
				1185
				1186	/* Trivial event. Miss in the fast-cache. Do a full
				1187	lookup for it. */
				1188	trans_addr
				1189	= VG_(search_transtab) ( vg_threads[tid].m_eip );
				1190	if (trans_addr == (Addr)0) {
				1191	/* Not found; we need to request a translation. */
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1192	create_translation_for( tid, vg_threads[tid].m_eip );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1193	trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
				1194	if (trans_addr == (Addr)0)
				1195	VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
				1196	}
				1197	continue; /* with this thread */
				1198	}
				1199
				1200	if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
				1201	Bool is_triv = maybe_do_trivial_clientreq(tid);
				1202	if (is_triv) {
				1203	/* NOTE: a trivial request is something like a call to
				1204	malloc() or free(). It DOES NOT change the
				1205	Runnability of this thread nor the status of any
				1206	other thread; it is purely thread-local. */
				1207	continue; /* with this thread */
				1208	}
				1209	}
				1210
				1211	/* It's a non-trivial event. Give up running this thread and
				1212	handle things the expensive way. */
				1213	break;
				1214	}
				1215
				1216	/* ======================= Phase 3 of 3 =======================
				1217	Handle non-trivial thread requests, mostly pthread stuff. */
				1218
				1219	/* Ok, we've fallen out of the dispatcher for a
				1220	non-completely-trivial reason. First, update basic-block
				1221	counters. */
				1222
				1223	done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
				1224	vg_assert(done_this_time >= 0);
				1225	VG_(bbs_to_go) -= (ULong)done_this_time;
				1226	VG_(bbs_done) += (ULong)done_this_time;
				1227
				1228	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1229	VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
				1230	tid, done_this_time, (Int)trc );
				1231
				1232	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1233	VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
				1234	tid, VG_(bbs_done),
				1235	name_of_sched_event(trc) );
sewardj	9d1b5d3	2002-04-17 19:40:49 +0000	[diff] [blame]	1236
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1237	/* Examine the thread's return code to figure out why it
				1238	stopped, and handle requests. */
				1239
				1240	switch (trc) {
				1241
				1242	case VG_TRC_INNER_FASTMISS:
				1243	VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
				1244	/NOTREACHED/
				1245	break;
				1246
				1247	case VG_TRC_INNER_COUNTERZERO:
				1248	/* Timeslice is out. Let a new thread be scheduled,
				1249	simply by doing nothing, causing us to arrive back at
				1250	Phase 1. */
				1251	if (VG_(bbs_to_go) == 0) {
				1252	goto debug_stop;
				1253	}
				1254	vg_assert(VG_(dispatch_ctr) == 0);
				1255	break;
				1256
				1257	case VG_TRC_UNRESUMABLE_SIGNAL:
				1258	/* It got a SIGSEGV/SIGBUS, which we need to deliver right
				1259	away. Again, do nothing, so we wind up back at Phase
				1260	1, whereupon the signal will be "delivered". */
				1261	break;
				1262
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1263	case VG_TRC_EBP_JMP_SYSCALL:
				1264	/* Do a syscall for the vthread tid. This could cause it
				1265	to become non-runnable. */
				1266	sched_do_syscall(tid);
				1267	break;
				1268
				1269	case VG_TRC_EBP_JMP_CLIENTREQ:
				1270	/* Do a client request for the vthread tid. Note that
				1271	some requests will have been handled by
				1272	maybe_do_trivial_clientreq(), so we don't expect to see
				1273	those here.
				1274	*/
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1275	/* The thread's %EAX points at an arg block, the first
				1276	word of which is the request code. */
				1277	request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1278	if (0) {
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1279	VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1280	print_sched_event(tid, msg_buf);
				1281	}
				1282	/* Do a non-trivial client request for thread tid. tid's
				1283	%EAX points to a short vector of argument words, the
				1284	first of which is the request code. The result of the
				1285	request is put in tid's %EDX. Alternatively, perhaps
				1286	the request causes tid to become non-runnable and/or
				1287	other blocked threads become runnable. In general we
				1288	can and often do mess with the state of arbitrary
				1289	threads at this point. */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1290	if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
				1291	return VgSrc_Shutdown;
				1292	} else {
				1293	do_nontrivial_clientreq(tid);
				1294	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1295	break;
				1296
				1297	default:
				1298	VG_(printf)("\ntrc = %d\n", trc);
				1299	VG_(panic)("VG_(scheduler), phase 3: "
				1300	"unexpected thread return code");
				1301	/* NOTREACHED */
				1302	break;
				1303
				1304	} /* switch (trc) */
				1305
				1306	/* That completes Phase 3 of 3. Return now to the top of the
				1307	main scheduler loop, to Phase 1 of 3. */
				1308
				1309	} /* top-level scheduler loop */
				1310
				1311
				1312	/* NOTREACHED */
				1313	VG_(panic)("scheduler: post-main-loop ?!");
				1314	/* NOTREACHED */
				1315
				1316	debug_stop:
				1317	/* If we exited because of a debug stop, print the translation
				1318	of the last block executed -- by translating it again, and
				1319	throwing away the result. */
				1320	VG_(printf)(
				1321	"======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1322	VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1323	VG_(printf)("\n");
				1324	VG_(printf)(
				1325	"======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
				1326
				1327	return VgSrc_BbsDone;
				1328	}
				1329
				1330
				1331	/* ---------------------------------------------------------------------
				1332	The pthread implementation.
				1333	------------------------------------------------------------------ */
				1334
				1335	#include <pthread.h>
				1336	#include <errno.h>
				1337
				1338	#if !defined(PTHREAD_STACK_MIN)
				1339	# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
				1340	#endif
				1341
				1342	/* /usr/include/bits/pthreadtypes.h:
				1343	typedef unsigned long int pthread_t;
				1344	*/
				1345
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1346
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1347	/* -----------------------------------------------------------
				1348	Thread CREATION, JOINAGE and CANCELLATION.
				1349	-------------------------------------------------------- */
				1350
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1351	static
				1352	void do_pthread_cancel ( ThreadId tid_canceller,
				1353	pthread_t tid_cancellee )
				1354	{
				1355	Char msg_buf[100];
				1356	/* We want make is appear that this thread has returned to
				1357	do_pthread_create_bogusRA with PTHREAD_CANCELED as the
				1358	return value. So: simple: put PTHREAD_CANCELED into %EAX
				1359	and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1360	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1361	VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
				1362	print_sched_event(tid_cancellee, msg_buf);
				1363	}
				1364	vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1365	vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1366	vg_threads[tid_cancellee].status = VgTs_Runnable;
				1367	}
				1368
				1369
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1370	static
				1371	void do_pthread_exit ( ThreadId tid, void* retval )
				1372	{
				1373	Char msg_buf[100];
				1374	/* We want make is appear that this thread has returned to
				1375	do_pthread_create_bogusRA with retval as the
				1376	return value. So: simple: put retval into %EAX
				1377	and &do_pthread_create_bogusRA into %EIP and keep going! */
				1378	if (VG_(clo_trace_sched)) {
				1379	VG_(sprintf)(msg_buf, "exiting with %p", retval);
				1380	print_sched_event(tid, msg_buf);
				1381	}
				1382	vg_threads[tid].m_eax = (UInt)retval;
				1383	vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
				1384	vg_threads[tid].status = VgTs_Runnable;
				1385	}
				1386
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1387
				1388	/* Thread tid is exiting, by returning from the function it was
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1389	created with. Or possibly due to pthread_exit or cancellation.
				1390	The main complication here is to resume any thread waiting to join
				1391	with this one. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1392	static
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1393	void handle_pthread_return ( ThreadId tid, void* retval )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1394	{
				1395	ThreadId jnr; /* joiner, the thread calling pthread_join. */
				1396	UInt* jnr_args;
				1397	void** jnr_thread_return;
				1398	Char msg_buf[100];
				1399
				1400	/* Mark it as not in use. Leave the stack in place so the next
				1401	user of this slot doesn't reallocate it. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1402	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1403	vg_assert(vg_threads[tid].status != VgTs_Empty);
				1404
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1405	vg_threads[tid].retval = retval;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1406
				1407	if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
				1408	/* No one has yet done a join on me */
				1409	vg_threads[tid].status = VgTs_WaitJoiner;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1410	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1411	VG_(sprintf)(msg_buf,
				1412	"root fn returns, waiting for a call pthread_join(%d)",
				1413	tid);
				1414	print_sched_event(tid, msg_buf);
				1415	}
				1416	} else {
				1417	/* Some is waiting; make their join call return with success,
				1418	putting my exit code in the place specified by the caller's
				1419	thread_return param. This is all very horrible, since we
				1420	need to consult the joiner's arg block -- pointed to by its
				1421	%EAX -- in order to extract the 2nd param of its pthread_join
				1422	call. TODO: free properly the slot (also below).
				1423	*/
				1424	jnr = vg_threads[tid].joiner;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1425	vg_assert(is_valid_tid(jnr));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1426	vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
				1427	jnr_args = (UInt*)vg_threads[jnr].m_eax;
				1428	jnr_thread_return = (void**)(jnr_args[2]);
				1429	if (jnr_thread_return != NULL)
				1430	*jnr_thread_return = vg_threads[tid].retval;
				1431	vg_threads[jnr].m_edx = 0; /* success */
				1432	vg_threads[jnr].status = VgTs_Runnable;
				1433	vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1434	if (VG_(clo_instrument) && tid != 0)
				1435	VGM_(make_noaccess)( vg_threads[tid].stack_base,
				1436	vg_threads[tid].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1437	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1438	VG_(sprintf)(msg_buf,
				1439	"root fn returns, to find a waiting pthread_join(%d)", tid);
				1440	print_sched_event(tid, msg_buf);
				1441	VG_(sprintf)(msg_buf,
				1442	"my pthread_join(%d) returned; resuming", tid);
				1443	print_sched_event(jnr, msg_buf);
				1444	}
				1445	}
				1446
				1447	/* Return value is irrelevant; this thread will not get
				1448	rescheduled. */
				1449	}
				1450
				1451
				1452	static
				1453	void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
				1454	{
				1455	Char msg_buf[100];
				1456
				1457	/* jee, the joinee, is the thread specified as an arg in thread
				1458	tid's call to pthread_join. So tid is the join-er. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1459	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1460	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1461
				1462	if (jee == tid) {
				1463	vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
				1464	vg_threads[tid].status = VgTs_Runnable;
				1465	return;
				1466	}
				1467
				1468	if (jee < 0
				1469	\|\| jee >= VG_N_THREADS
				1470	\|\| vg_threads[jee].status == VgTs_Empty) {
				1471	/* Invalid thread to join to. */
				1472	vg_threads[tid].m_edx = EINVAL;
				1473	vg_threads[tid].status = VgTs_Runnable;
				1474	return;
				1475	}
				1476
				1477	if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
				1478	/* Someone already did join on this thread */
				1479	vg_threads[tid].m_edx = EINVAL;
				1480	vg_threads[tid].status = VgTs_Runnable;
				1481	return;
				1482	}
				1483
				1484	/* if (vg_threads[jee].detached) ... */
				1485
				1486	/* Perhaps the joinee has already finished? If so return
				1487	immediately with its return code, and free up the slot. TODO:
				1488	free it properly (also above). */
				1489	if (vg_threads[jee].status == VgTs_WaitJoiner) {
				1490	vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
				1491	vg_threads[tid].m_edx = 0; /* success */
				1492	if (thread_return != NULL)
				1493	*thread_return = vg_threads[jee].retval;
				1494	vg_threads[tid].status = VgTs_Runnable;
				1495	vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1496	if (VG_(clo_instrument) && jee != 0)
				1497	VGM_(make_noaccess)( vg_threads[jee].stack_base,
				1498	vg_threads[jee].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1499	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1500	VG_(sprintf)(msg_buf,
				1501	"someone called pthread_join() on me; bye!");
				1502	print_sched_event(jee, msg_buf);
				1503	VG_(sprintf)(msg_buf,
				1504	"my pthread_join(%d) returned immediately",
				1505	jee );
				1506	print_sched_event(tid, msg_buf);
				1507	}
				1508	return;
				1509	}
				1510
				1511	/* Ok, so we'll have to wait on jee. */
				1512	vg_threads[jee].joiner = tid;
				1513	vg_threads[tid].status = VgTs_WaitJoinee;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1514	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1515	VG_(sprintf)(msg_buf,
				1516	"blocking on call of pthread_join(%d)", jee );
				1517	print_sched_event(tid, msg_buf);
				1518	}
				1519	/* So tid's join call does not return just now. */
				1520	}
				1521
				1522
				1523	static
				1524	void do_pthread_create ( ThreadId parent_tid,
				1525	pthread_t* thread,
				1526	pthread_attr_t* attr,
				1527	void* (start_routine)(void ),
				1528	void* arg )
				1529	{
				1530	Addr new_stack;
				1531	UInt new_stk_szb;
				1532	ThreadId tid;
				1533	Char msg_buf[100];
				1534
				1535	/* Paranoia ... */
				1536	vg_assert(sizeof(pthread_t) == sizeof(UInt));
				1537
				1538	vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
				1539
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1540	tid = vg_alloc_ThreadState();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1541
				1542	/* If we've created the main thread's tid, we're in deep trouble :) */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1543	vg_assert(tid != 1);
				1544	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1545
				1546	/* Copy the parent's CPU state into the child's, in a roundabout
				1547	way (via baseBlock). */
				1548	VG_(load_thread_state)(parent_tid);
				1549	VG_(save_thread_state)(tid);
				1550
				1551	/* Consider allocating the child a stack, if the one it already has
				1552	is inadequate. */
				1553	new_stk_szb = PTHREAD_STACK_MIN;
				1554
				1555	if (new_stk_szb > vg_threads[tid].stack_size) {
				1556	/* Again, for good measure :) We definitely don't want to be
				1557	allocating a stack for the main thread. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1558	vg_assert(tid != 1);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1559	/* for now, we don't handle the case of anything other than
				1560	assigning it for the first time. */
				1561	vg_assert(vg_threads[tid].stack_size == 0);
				1562	vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
				1563	new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
				1564	vg_threads[tid].stack_base = new_stack;
				1565	vg_threads[tid].stack_size = new_stk_szb;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1566	vg_threads[tid].stack_highest_word
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1567	= new_stack + new_stk_szb
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1568	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1569	}
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1570
				1571	vg_threads[tid].m_esp
				1572	= vg_threads[tid].stack_base
				1573	+ vg_threads[tid].stack_size
				1574	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
				1575
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1576	if (VG_(clo_instrument))
				1577	VGM_(make_noaccess)( vg_threads[tid].m_esp,
				1578	VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
				1579
				1580	/* push arg */
				1581	vg_threads[tid].m_esp -= 4;
				1582	* (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
				1583
				1584	/* push (magical) return address */
				1585	vg_threads[tid].m_esp -= 4;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1586	* (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1587
				1588	if (VG_(clo_instrument))
				1589	VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
				1590
				1591	/* this is where we start */
				1592	vg_threads[tid].m_eip = (UInt)start_routine;
				1593
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1594	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1595	VG_(sprintf)(msg_buf,
				1596	"new thread, created by %d", parent_tid );
				1597	print_sched_event(tid, msg_buf);
				1598	}
				1599
				1600	/* store the thread id in thread. /
				1601	// if (VG_(clo_instrument))
				1602	// ***** CHECK *thread is writable
				1603	*thread = (pthread_t)tid;
				1604
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1605	vg_threads[tid].associated_mx = NULL;
				1606	vg_threads[tid].associated_cv = NULL;
				1607	vg_threads[tid].joiner = VG_INVALID_THREADID;
				1608	vg_threads[tid].status = VgTs_Runnable;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1609
				1610	/* return zero */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1611	vg_threads[tid].m_edx = 0; /* success */
				1612	}
				1613
				1614
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1615	/* -----------------------------------------------------------
				1616	MUTEXes
				1617	-------------------------------------------------------- */
				1618
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1619	/* pthread_mutex_t is a struct with at 5 words:
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1620	typedef struct
				1621	{
				1622	int __m_reserved; -- Reserved for future use
				1623	int __m_count; -- Depth of recursive locking
				1624	_pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
				1625	int __m_kind; -- Mutex kind: fast, recursive or errcheck
				1626	struct _pthread_fastlock __m_lock; -- Underlying fast lock
				1627	} pthread_mutex_t;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1628
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1629	#define PTHREAD_MUTEX_INITIALIZER \
				1630	{0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
				1631	# define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
				1632	{0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
				1633	# define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
				1634	{0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
				1635	# define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
				1636	{0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1637
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1638	How we use it:
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1639
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1640	__m_kind never changes and indicates whether or not it is recursive.
				1641
				1642	__m_count indicates the lock count; if 0, the mutex is not owned by
				1643	anybody.
				1644
				1645	__m_owner has a ThreadId value stuffed into it. We carefully arrange
				1646	that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
				1647	statically initialised mutexes correctly appear
				1648	to belong to nobody.
				1649
				1650	In summary, a not-in-use mutex is distinguised by having __m_owner
				1651	== 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
				1652	conditions holds, the other should too.
				1653
				1654	There is no linked list of threads waiting for this mutex. Instead
				1655	a thread in WaitMX state points at the mutex with its waited_on_mx
				1656	field. This makes _unlock() inefficient, but simple to implement the
				1657	right semantics viz-a-viz signals.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1658
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1659	We don't have to deal with mutex initialisation; the client side
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1660	deals with that for us.
				1661	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1662
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1663	/* Helper fns ... */
				1664	static
				1665	void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
				1666	Char* caller )
				1667	{
				1668	Int i;
				1669	Char msg_buf[100];
				1670
				1671	/* Find some arbitrary thread waiting on this mutex, and make it
				1672	runnable. If none are waiting, mark the mutex as not held. */
				1673	for (i = 1; i < VG_N_THREADS; i++) {
				1674	if (vg_threads[i].status == VgTs_Empty)
				1675	continue;
				1676	if (vg_threads[i].status == VgTs_WaitMX
				1677	&& vg_threads[i].associated_mx == mutex)
				1678	break;
				1679	}
				1680
				1681	vg_assert(i <= VG_N_THREADS);
				1682	if (i == VG_N_THREADS) {
				1683	/* Nobody else is waiting on it. */
				1684	mutex->__m_count = 0;
				1685	mutex->__m_owner = VG_INVALID_THREADID;
				1686	} else {
				1687	/* Notionally transfer the hold to thread i, whose
				1688	pthread_mutex_lock() call now returns with 0 (success). */
				1689	/* The .count is already == 1. */
				1690	vg_assert(vg_threads[i].associated_mx == mutex);
				1691	mutex->__m_owner = (_pthread_descr)i;
				1692	vg_threads[i].status = VgTs_Runnable;
				1693	vg_threads[i].associated_mx = NULL;
				1694	vg_threads[i].m_edx = 0; /* pth_lock() success */
				1695
				1696	if (VG_(clo_trace_pthread_level) >= 1) {
				1697	VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
				1698	caller, mutex );
				1699	print_pthread_event(i, msg_buf);
				1700	}
				1701	}
				1702	}
				1703
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1704
				1705	static
				1706	void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
				1707	{
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1708	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1709
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1710	if (VG_(clo_trace_pthread_level) >= 2) {
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1711	VG_(sprintf)(msg_buf, "pthread_mutex_lock mx %p ...", mutex );
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1712	print_pthread_event(tid, msg_buf);
				1713	}
				1714
				1715	/* Paranoia ... */
				1716	vg_assert(is_valid_tid(tid)
				1717	&& vg_threads[tid].status == VgTs_Runnable);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1718
				1719	/* POSIX doesn't mandate this, but for sanity ... */
				1720	if (mutex == NULL) {
				1721	vg_threads[tid].m_edx = EINVAL;
				1722	return;
				1723	}
				1724
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1725	/* More paranoia ... */
				1726	switch (mutex->__m_kind) {
				1727	case PTHREAD_MUTEX_TIMED_NP:
				1728	case PTHREAD_MUTEX_RECURSIVE_NP:
				1729	case PTHREAD_MUTEX_ERRORCHECK_NP:
				1730	case PTHREAD_MUTEX_ADAPTIVE_NP:
				1731	if (mutex->__m_count >= 0) break;
				1732	/* else fall thru */
				1733	default:
				1734	vg_threads[tid].m_edx = EINVAL;
				1735	return;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1736	}
				1737
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1738	if (mutex->__m_count > 0) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1739
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1740	vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1741
				1742	/* Someone has it already. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1743	if ((ThreadId)mutex->__m_owner == tid) {
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1744	/* It's locked -- by me! */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1745	if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1746	/* return 0 (success). */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1747	mutex->__m_count++;
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1748	vg_threads[tid].m_edx = 0;
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1749	VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1750	tid, mutex, mutex->__m_count);
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1751	return;
				1752	} else {
				1753	vg_threads[tid].m_edx = EDEADLK;
				1754	return;
				1755	}
				1756	} else {
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1757	/* Someone else has it; we have to wait. Mark ourselves
				1758	thusly. */
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1759	vg_threads[tid].status = VgTs_WaitMX;
				1760	vg_threads[tid].associated_mx = mutex;
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1761	/* No assignment to %EDX, since we're blocking. */
				1762	if (VG_(clo_trace_pthread_level) >= 1) {
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1763	VG_(sprintf)(msg_buf, "pthread_mutex_lock mx %p: BLOCK",
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1764	mutex );
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1765	print_pthread_event(tid, msg_buf);
				1766	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1767	return;
				1768	}
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1769
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1770	} else {
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1771	/* Nobody owns it. Sanity check ... */
				1772	vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1773	/* We get it! [for the first time]. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1774	mutex->__m_count = 1;
				1775	mutex->__m_owner = (_pthread_descr)tid;
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1776	vg_assert(vg_threads[tid].associated_mx == NULL);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1777	/* return 0 (success). */
				1778	vg_threads[tid].m_edx = 0;
				1779	}
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1780
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1781	}
				1782
				1783
				1784	static
				1785	void do_pthread_mutex_unlock ( ThreadId tid,
				1786	pthread_mutex_t *mutex )
				1787	{
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1788	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1789
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	1790	if (VG_(clo_trace_pthread_level) >= 2) {
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1791	VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1792	print_pthread_event(tid, msg_buf);
				1793	}
				1794
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1795	/* Paranoia ... */
				1796	vg_assert(is_valid_tid(tid)
				1797	&& vg_threads[tid].status == VgTs_Runnable);
				1798
				1799	if (mutex == NULL) {
				1800	vg_threads[tid].m_edx = EINVAL;
				1801	return;
				1802	}
				1803
				1804	/* More paranoia ... */
				1805	switch (mutex->__m_kind) {
				1806	case PTHREAD_MUTEX_TIMED_NP:
				1807	case PTHREAD_MUTEX_RECURSIVE_NP:
				1808	case PTHREAD_MUTEX_ERRORCHECK_NP:
				1809	case PTHREAD_MUTEX_ADAPTIVE_NP:
				1810	if (mutex->__m_count >= 0) break;
				1811	/* else fall thru */
				1812	default:
				1813	vg_threads[tid].m_edx = EINVAL;
				1814	return;
				1815	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1816
				1817	/* Barf if we don't currently hold the mutex. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1818	if (mutex->__m_count == 0 /* nobody holds it */
				1819	\|\| (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1820	vg_threads[tid].m_edx = EPERM;
				1821	return;
				1822	}
				1823
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1824	/* If it's a multiply-locked recursive mutex, just decrement the
				1825	lock count and return. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1826	if (mutex->__m_count > 1) {
				1827	vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
				1828	mutex->__m_count --;
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1829	vg_threads[tid].m_edx = 0; /* success */
				1830	return;
				1831	}
				1832
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1833	/* Now we're sure it is locked exactly once, and by the thread who
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1834	is now doing an unlock on it. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1835	vg_assert(mutex->__m_count == 1);
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1836	vg_assert((ThreadId)mutex->__m_owner == tid);
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1837
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1838	/* Release at max one thread waiting on this mutex. */
				1839	release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1840
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1841	/* Our (tid's) pth_unlock() returns with 0 (success). */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1842	vg_threads[tid].m_edx = 0; /* Success. */
				1843	}
				1844
				1845
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1846	/* -----------------------------------------------------------
				1847	CONDITION VARIABLES
				1848	-------------------------------------------------------- */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1849
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1850	/* The relevant native types are as follows:
				1851	(copied from /usr/include/bits/pthreadtypes.h)
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1852
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1853	-- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
				1854	typedef struct
				1855	{
				1856	struct _pthread_fastlock __c_lock; -- Protect against concurrent access
				1857	_pthread_descr __c_waiting; -- Threads waiting on this condition
				1858	} pthread_cond_t;
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1859
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1860	-- Attribute for conditionally variables.
				1861	typedef struct
				1862	{
				1863	int __dummy;
				1864	} pthread_condattr_t;
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1865
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1866	#define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1867
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1868	We don't use any fields of pthread_cond_t for anything at all.
				1869	Only the identity of the CVs is important.
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1870
				1871	Linux pthreads supports no attributes on condition variables, so we
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1872	don't need to think too hard there. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	1873
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1874
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	1875	static
				1876	void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
				1877	Int n_to_release,
				1878	Char* caller )
				1879	{
				1880	Int i;
				1881	Char msg_buf[100];
				1882	pthread_mutex_t* mx;
				1883
				1884	while (True) {
				1885	if (n_to_release == 0)
				1886	return;
				1887
				1888	/* Find a thread waiting on this CV. */
				1889	for (i = 1; i < VG_N_THREADS; i++) {
				1890	if (vg_threads[i].status == VgTs_Empty)
				1891	continue;
				1892	if (vg_threads[i].status == VgTs_WaitCV
				1893	&& vg_threads[i].associated_cv == cond)
				1894	break;
				1895	}
				1896	vg_assert(i <= VG_N_THREADS);
				1897
				1898	if (i == VG_N_THREADS) {
				1899	/* Nobody else is waiting on it. */
				1900	return;
				1901	}
				1902
				1903	mx = vg_threads[i].associated_mx;
				1904	vg_assert(mx != NULL);
				1905
				1906	if (mx->__m_owner == VG_INVALID_THREADID) {
				1907	/* Currently unheld; hand it out to thread i. */
				1908	vg_assert(mx->__m_count == 0);
				1909	vg_threads[i].status = VgTs_Runnable;
				1910	vg_threads[i].associated_cv = NULL;
				1911	vg_threads[i].associated_mx = NULL;
				1912	mx->__m_owner = (_pthread_descr)i;
				1913	mx->__m_count = 1;
				1914	vg_threads[i].m_edx = 0; /* pthread_cond_wait returns success */
				1915
				1916	if (VG_(clo_trace_pthread_level) >= 1) {
				1917	VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
				1918	caller, cond, mx );
				1919	print_pthread_event(i, msg_buf);
				1920	}
				1921
				1922	} else {
				1923	/* Currently held. Make thread i be blocked on it. */
				1924	vg_threads[i].status = VgTs_WaitMX;
				1925	vg_threads[i].associated_cv = NULL;
				1926	vg_threads[i].associated_mx = mx;
				1927
				1928	if (VG_(clo_trace_pthread_level) >= 1) {
				1929	VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
				1930	caller, cond, mx );
				1931	print_pthread_event(i, msg_buf);
				1932	}
				1933
				1934	}
				1935
				1936	n_to_release--;
				1937	}
				1938	}
				1939
				1940
				1941	static
				1942	void do_pthread_cond_wait ( ThreadId tid,
				1943	pthread_cond_t *cond,
				1944	pthread_mutex_t *mutex )
				1945	{
				1946	Char msg_buf[100];
				1947
				1948	/* pre: mutex should be a valid mutex and owned by tid. */
				1949	if (VG_(clo_trace_pthread_level) >= 2) {
				1950	VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p ...",
				1951	cond, mutex );
				1952	print_pthread_event(tid, msg_buf);
				1953	}
				1954
				1955	/* Paranoia ... */
				1956	vg_assert(is_valid_tid(tid)
				1957	&& vg_threads[tid].status == VgTs_Runnable);
				1958
				1959	if (mutex == NULL \|\| cond == NULL) {
				1960	vg_threads[tid].m_edx = EINVAL;
				1961	return;
				1962	}
				1963
				1964	/* More paranoia ... */
				1965	switch (mutex->__m_kind) {
				1966	case PTHREAD_MUTEX_TIMED_NP:
				1967	case PTHREAD_MUTEX_RECURSIVE_NP:
				1968	case PTHREAD_MUTEX_ERRORCHECK_NP:
				1969	case PTHREAD_MUTEX_ADAPTIVE_NP:
				1970	if (mutex->__m_count >= 0) break;
				1971	/* else fall thru */
				1972	default:
				1973	vg_threads[tid].m_edx = EINVAL;
				1974	return;
				1975	}
				1976
				1977	/* Barf if we don't currently hold the mutex. */
				1978	if (mutex->__m_count == 0 /* nobody holds it */
				1979	\|\| (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
				1980	vg_threads[tid].m_edx = EINVAL;
				1981	return;
				1982	}
				1983
				1984	/* Queue ourselves on the condition. */
				1985	vg_threads[tid].status = VgTs_WaitCV;
				1986	vg_threads[tid].associated_cv = cond;
				1987	vg_threads[tid].associated_mx = mutex;
				1988
				1989	if (VG_(clo_trace_pthread_level) >= 1) {
				1990	VG_(sprintf)(msg_buf,
				1991	"pthread_cond_wait cv %p, mx %p: BLOCK",
				1992	cond, mutex );
				1993	print_pthread_event(tid, msg_buf);
				1994	}
				1995
				1996	/* Release the mutex. */
				1997	release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
				1998	}
				1999
				2000
				2001	static
				2002	void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
				2003	Bool broadcast,
				2004	pthread_cond_t *cond )
				2005	{
				2006	Char msg_buf[100];
				2007	Char* caller
				2008	= broadcast ? "pthread_cond_broadcast"
				2009	: "pthread_cond_signal ";
				2010
				2011	if (VG_(clo_trace_pthread_level) >= 2) {
				2012	VG_(sprintf)(msg_buf, "%s cv %p ...",
				2013	caller, cond );
				2014	print_pthread_event(tid, msg_buf);
				2015	}
				2016
				2017	/* Paranoia ... */
				2018	vg_assert(is_valid_tid(tid)
				2019	&& vg_threads[tid].status == VgTs_Runnable);
				2020
				2021	if (cond == NULL) {
				2022	vg_threads[tid].m_edx = EINVAL;
				2023	return;
				2024	}
				2025
				2026	release_N_threads_waiting_on_cond (
				2027	cond,
				2028	broadcast ? VG_N_THREADS : 1,
				2029	caller
				2030	);
				2031
				2032	vg_threads[tid].m_edx = 0; /* success */
				2033	}
				2034
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	2035
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2036	/* ---------------------------------------------------------------------
				2037	Handle non-trivial client requests.
				2038	------------------------------------------------------------------ */
				2039
				2040	static
				2041	void do_nontrivial_clientreq ( ThreadId tid )
				2042	{
				2043	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				2044	UInt req_no = arg[0];
				2045	switch (req_no) {
				2046
				2047	case VG_USERREQ__PTHREAD_CREATE:
				2048	do_pthread_create( tid,
				2049	(pthread_t*)arg[1],
				2050	(pthread_attr_t*)arg[2],
				2051	(void()(void*))arg[3],
				2052	(void*)arg[4] );
				2053	break;
				2054
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	2055	case VG_USERREQ__PTHREAD_RETURNS:
				2056	handle_pthread_return( tid, (void*)arg[1] );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2057	break;
				2058
				2059	case VG_USERREQ__PTHREAD_JOIN:
				2060	do_pthread_join( tid, arg[1], (void**)(arg[2]) );
				2061	break;
				2062
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2063	case VG_USERREQ__PTHREAD_MUTEX_LOCK:
				2064	do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
				2065	break;
				2066
				2067	case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
				2068	do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
				2069	break;
				2070
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2071	case VG_USERREQ__PTHREAD_CANCEL:
				2072	do_pthread_cancel( tid, (pthread_t)(arg[1]) );
				2073	break;
				2074
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	2075	case VG_USERREQ__PTHREAD_EXIT:
				2076	do_pthread_exit( tid, (void*)(arg[1]) );
				2077	break;
				2078
				2079	case VG_USERREQ__PTHREAD_COND_WAIT:
				2080	do_pthread_cond_wait( tid,
				2081	(pthread_cond_t *)(arg[1]),
				2082	(pthread_mutex_t *)(arg[2]) );
				2083	break;
				2084
				2085	case VG_USERREQ__PTHREAD_COND_SIGNAL:
				2086	do_pthread_cond_signal_or_broadcast(
				2087	tid,
				2088	False, /* signal, not broadcast */
				2089	(pthread_cond_t *)(arg[1]) );
				2090	break;
				2091
				2092	case VG_USERREQ__PTHREAD_COND_BROADCAST:
				2093	do_pthread_cond_signal_or_broadcast(
				2094	tid,
				2095	True, /* broadcast, not signal */
				2096	(pthread_cond_t *)(arg[1]) );
				2097	break;
				2098
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2099	case VG_USERREQ__MAKE_NOACCESS:
				2100	case VG_USERREQ__MAKE_WRITABLE:
				2101	case VG_USERREQ__MAKE_READABLE:
				2102	case VG_USERREQ__DISCARD:
				2103	case VG_USERREQ__CHECK_WRITABLE:
				2104	case VG_USERREQ__CHECK_READABLE:
				2105	case VG_USERREQ__MAKE_NOACCESS_STACK:
				2106	case VG_USERREQ__RUNNING_ON_VALGRIND:
				2107	case VG_USERREQ__DO_LEAK_CHECK:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	2108	vg_threads[tid].m_edx
				2109	= VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2110	break;
				2111
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	2112	case VG_USERREQ__SIGNAL_RETURNS:
				2113	handle_signal_return(tid);
				2114	break;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	2115
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2116	default:
				2117	VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
				2118	VG_(panic)("handle_private_client_pthread_request: "
				2119	"unknown request");
				2120	/NOTREACHED/
				2121	break;
				2122	}
				2123	}
				2124
				2125
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	2126	/* ---------------------------------------------------------------------
				2127	Sanity checking.
				2128	------------------------------------------------------------------ */
				2129
				2130	/* Internal consistency checks on the sched/pthread structures. */
				2131	static
				2132	void scheduler_sanity ( void )
				2133	{
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	2134	pthread_mutex_t* mx;
				2135	pthread_cond_t* cv;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	2136	Int i;
				2137	/* VG_(printf)("scheduler_sanity\n"); */
				2138	for (i = 1; i < VG_N_THREADS; i++) {
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	2139	mx = vg_threads[i].associated_mx;
				2140	cv = vg_threads[i].associated_cv;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	2141	if (vg_threads[i].status == VgTs_WaitMX) {
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	2142	vg_assert(cv == NULL);
				2143	vg_assert(mx != NULL);
				2144	vg_assert(mx->__m_count > 0);
				2145	vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
				2146	vg_assert(i != (ThreadId)mx->__m_owner);
				2147	/* otherwise thread i would be deadlocked. */
				2148	} else
				2149	if (vg_threads[i].status == VgTs_WaitCV) {
				2150	vg_assert(cv != NULL);
				2151	vg_assert(mx != NULL);
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	2152	} else {
sewardj	3b5d886	2002-04-20 13:53:23 +0000	[diff] [blame]	2153	vg_assert(cv == NULL);
				2154	vg_assert(mx == NULL);
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame]	2155	}
				2156	}
				2157	}
				2158
				2159
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	2160	/--------------------------------------------------------------------/
				2161	/--- end vg_scheduler.c ---/
				2162	/--------------------------------------------------------------------/