Blame - vg_scheduler.c - platform/external/valgrind

blob: 194c231236736f7c3ed09205683653032c2e666b [file] [log] [blame]

sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1
				2	/--------------------------------------------------------------------/
				3	/--- A user-space pthreads implementation. vg_scheduler.c ---/
				4	/--------------------------------------------------------------------/
				5
				6	/*
				7	This file is part of Valgrind, an x86 protected-mode emulator
				8	designed for debugging and profiling binaries on x86-Unixes.
				9
				10	Copyright (C) 2000-2002 Julian Seward
				11	jseward@acm.org
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	12
				13	This program is free software; you can redistribute it and/or
				14	modify it under the terms of the GNU General Public License as
				15	published by the Free Software Foundation; either version 2 of the
				16	License, or (at your option) any later version.
				17
				18	This program is distributed in the hope that it will be useful, but
				19	WITHOUT ANY WARRANTY; without even the implied warranty of
				20	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				21	General Public License for more details.
				22
				23	You should have received a copy of the GNU General Public License
				24	along with this program; if not, write to the Free Software
				25	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				26	02111-1307, USA.
				27
				28	The GNU General Public License is contained in the file LICENSE.
				29	*/
				30
				31	#include "vg_include.h"
				32	#include "vg_constants.h"
				33
				34	#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
				35	VG_USERREQ__DO_LEAK_CHECK */
				36
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	37	/* BORKAGE/ISSUES as of 14 Apr 02
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	38
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	39	Note! This pthreads implementation is so poor as to not be
				40	suitable for use by anyone at all!
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	41
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	42	- Currently, when a signal is run, just the ThreadStatus.status fields
				43	are saved in the signal frame, along with the CPU state. Question:
				44	should I also save and restore:
				45	ThreadStatus.joiner
				46	ThreadStatus.waited_on_mid
				47	ThreadStatus.awaken_at
				48	ThreadStatus.retval
				49	Currently unsure, and so am not doing so.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	50
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	51	- Signals interrupting read/write and nanosleep: SA_RESTART settings.
				52	Read/write correctly return with EINTR when SA_RESTART isn't
				53	specified and they are interrupted by a signal. nanosleep just
				54	pretends signals don't exist -- should be fixed.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	55
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	56	- Read/write syscall starts: don't crap out when the initial
				57	nonblocking read/write returns an error.
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	58
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	59	- Get rid of restrictions re use of sigaltstack; they are no longer
				60	needed.
				61
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	62	- Fix signals properly, so that each thread has its own blocking mask.
				63	Currently this isn't done, and (worse?) signals are delivered to
				64	Thread 1 (the root thread) regardless.
				65
				66	So, what's the deal with signals and mutexes? If a thread is
				67	blocked on a mutex, or for a condition variable for that matter, can
				68	signals still be delivered to it? This has serious consequences --
				69	deadlocks, etc.
				70
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	71	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	72
				73
				74	/* ---------------------------------------------------------------------
				75	Types and globals for the scheduler.
				76	------------------------------------------------------------------ */
				77
				78	/* type ThreadId is defined in vg_include.h. */
				79
				80	/* struct ThreadState is defined in vg_include.h. */
				81
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	82	/* Private globals. A statically allocated array of threads. NOTE:
				83	[0] is never used, to simplify the simulation of initialisers for
				84	LinuxThreads. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	85	static ThreadState vg_threads[VG_N_THREADS];
				86
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	87	/* The tid of the thread currently in VG_(baseBlock). */
				88	static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
				89
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	90
				91	/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
				92	jmp_buf VG_(scheduler_jmpbuf);
				93	/* ... and if so, here's the signal which caused it to do so. */
				94	Int VG_(longjmpd_on_signal);
				95
				96
				97	/* Machinery to keep track of which threads are waiting on which
				98	fds. */
				99	typedef
				100	struct {
				101	/* The thread which made the request. */
				102	ThreadId tid;
				103
				104	/* The next two fields describe the request. */
				105	/* File descriptor waited for. -1 means this slot is not in use */
				106	Int fd;
				107	/* The syscall number the fd is used in. */
				108	Int syscall_no;
				109
				110	/* False => still waiting for select to tell us the fd is ready
				111	to go. True => the fd is ready, but the results have not yet
				112	been delivered back to the calling thread. Once the latter
				113	happens, this entire record is marked as no longer in use, by
				114	making the fd field be -1. */
				115	Bool ready;
				116	}
				117	VgWaitedOnFd;
				118
				119	static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
				120
				121
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	122	/* Forwards */
				123	static void do_nontrivial_clientreq ( ThreadId tid );
				124
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	125	static void scheduler_sanity ( void );
				126
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	127
				128	/* ---------------------------------------------------------------------
				129	Helper functions for the scheduler.
				130	------------------------------------------------------------------ */
				131
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	132	static __inline__
				133	Bool is_valid_tid ( ThreadId tid )
				134	{
				135	/* tid is unsigned, hence no < 0 test. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	136	if (tid == 0) return False;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	137	if (tid >= VG_N_THREADS) return False;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	138	return True;
				139	}
				140
				141
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	142	/* For constructing error messages only: try and identify a thread
				143	whose stack this address currently falls within, or return
				144	VG_INVALID_THREADID if it doesn't. A small complication is dealing
				145	with any currently VG_(baseBlock)-resident thread.
				146	*/
				147	ThreadId VG_(identify_stack_addr)( Addr a )
				148	{
				149	ThreadId tid, tid_to_skip;
				150
				151	tid_to_skip = VG_INVALID_THREADID;
				152
				153	/* First check to see if there's a currently-loaded thread in
				154	VG_(baseBlock). */
				155	if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
				156	tid = vg_tid_currently_in_baseBlock;
				157	if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
				158	&& a <= vg_threads[tid].stack_highest_word)
				159	return tid;
				160	else
				161	tid_to_skip = tid;
				162	}
				163
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	164	for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	165	if (vg_threads[tid].status == VgTs_Empty) continue;
				166	if (tid == tid_to_skip) continue;
				167	if (vg_threads[tid].m_esp <= a
				168	&& a <= vg_threads[tid].stack_highest_word)
				169	return tid;
				170	}
				171	return VG_INVALID_THREADID;
				172	}
				173
				174
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	175	/* Print the scheduler status. */
				176	void VG_(pp_sched_status) ( void )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	177	{
				178	Int i;
				179	VG_(printf)("\nsched status:\n");
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	180	for (i = 1; i < VG_N_THREADS; i++) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	181	if (vg_threads[i].status == VgTs_Empty) continue;
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	182	VG_(printf)("\nThread %d: status = ", i);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	183	switch (vg_threads[i].status) {
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	184	case VgTs_Runnable: VG_(printf)("Runnable"); break;
				185	case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
				186	case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	187	vg_threads[i].joiner); break;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	188	case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
				189	case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
				190	case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	191	default: VG_(printf)("???"); break;
				192	}
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	193	VG_(printf)(", waited_on_mx = %p\n", vg_threads[i].waited_on_mx );
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	194	VG_(pp_ExeContext)(
				195	VG_(get_ExeContext)( False, vg_threads[i].m_eip,
				196	vg_threads[i].m_ebp ));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	197	}
				198	VG_(printf)("\n");
				199	}
				200
				201	static
				202	void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
				203	{
				204	Int i;
				205
				206	vg_assert(fd != -1); /* avoid total chaos */
				207
				208	for (i = 0; i < VG_N_WAITING_FDS; i++)
				209	if (vg_waiting_fds[i].fd == -1)
				210	break;
				211
				212	if (i == VG_N_WAITING_FDS)
				213	VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
				214	/*
				215	VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
				216	tid, fd, i);
				217	*/
				218	vg_waiting_fds[i].fd = fd;
				219	vg_waiting_fds[i].tid = tid;
				220	vg_waiting_fds[i].ready = False;
				221	vg_waiting_fds[i].syscall_no = syscall_no;
				222	}
				223
				224
				225
				226	static
				227	void print_sched_event ( ThreadId tid, Char* what )
				228	{
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	229	VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	230	}
				231
				232
				233	static
				234	void print_pthread_event ( ThreadId tid, Char* what )
				235	{
				236	VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	237	}
				238
				239
				240	static
				241	Char* name_of_sched_event ( UInt event )
				242	{
				243	switch (event) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	244	case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
				245	case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
				246	case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
				247	case VG_TRC_INNER_FASTMISS: return "FASTMISS";
				248	case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
				249	default: return "??UNKNOWN??";
				250	}
				251	}
				252
				253
				254	/* Create a translation of the client basic block beginning at
				255	orig_addr, and add it to the translation cache & translation table.
				256	This probably doesn't really belong here, but, hey ...
				257	*/
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	258	static
				259	void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	260	{
				261	Addr trans_addr;
				262	TTEntry tte;
				263	Int orig_size, trans_size;
				264	/* Ensure there is space to hold a translation. */
				265	VG_(maybe_do_lru_pass)();
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	266	VG_(translate)( &vg_threads[tid],
				267	orig_addr, &orig_size, &trans_addr, &trans_size );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	268	/* Copy data at trans_addr into the translation cache.
				269	Returned pointer is to the code, not to the 4-byte
				270	header. */
				271	/* Since the .orig_size and .trans_size fields are
				272	UShort, be paranoid. */
				273	vg_assert(orig_size > 0 && orig_size < 65536);
				274	vg_assert(trans_size > 0 && trans_size < 65536);
				275	tte.orig_size = orig_size;
				276	tte.orig_addr = orig_addr;
				277	tte.trans_size = trans_size;
				278	tte.trans_addr = VG_(copy_to_transcache)
				279	( trans_addr, trans_size );
				280	tte.mru_epoch = VG_(current_epoch);
				281	/* Free the intermediary -- was allocated by VG_(emit_code). */
				282	VG_(jitfree)( (void*)trans_addr );
				283	/* Add to trans tab and set back pointer. */
				284	VG_(add_to_trans_tab) ( &tte );
				285	/* Update stats. */
				286	VG_(this_epoch_in_count) ++;
				287	VG_(this_epoch_in_osize) += orig_size;
				288	VG_(this_epoch_in_tsize) += trans_size;
				289	VG_(overall_in_count) ++;
				290	VG_(overall_in_osize) += orig_size;
				291	VG_(overall_in_tsize) += trans_size;
				292	/* Record translated area for SMC detection. */
				293	VG_(smc_mark_original) ( orig_addr, orig_size );
				294	}
				295
				296
				297	/* Allocate a completely empty ThreadState record. */
				298	static
				299	ThreadId vg_alloc_ThreadState ( void )
				300	{
				301	Int i;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	302	for (i = 1; i < VG_N_THREADS; i++) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	303	if (vg_threads[i].status == VgTs_Empty)
				304	return i;
				305	}
				306	VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
				307	VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
				308	VG_(panic)("VG_N_THREADS is too low");
				309	/NOTREACHED/
				310	}
				311
				312
				313	ThreadState* VG_(get_thread_state) ( ThreadId tid )
				314	{
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	315	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	316	vg_assert(vg_threads[tid].status != VgTs_Empty);
				317	return & vg_threads[tid];
				318	}
				319
				320
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	321	ThreadState* VG_(get_current_thread_state) ( void )
				322	{
				323	vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	324	return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	325	}
				326
				327
				328	ThreadId VG_(get_current_tid) ( void )
				329	{
				330	vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
				331	return vg_tid_currently_in_baseBlock;
				332	}
				333
				334
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	335	/* Copy the saved state of a thread into VG_(baseBlock), ready for it
				336	to be run. */
				337	__inline__
				338	void VG_(load_thread_state) ( ThreadId tid )
				339	{
				340	Int i;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	341	vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
				342
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	343	VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
				344	VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
				345	VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
				346	VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
				347	VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
				348	VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
				349	VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
				350	VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
				351	VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
				352	VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
				353
				354	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				355	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
				356
				357	VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
				358	VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
				359	VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
				360	VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
				361	VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
				362	VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
				363	VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
				364	VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
				365	VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	366
				367	vg_tid_currently_in_baseBlock = tid;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	368	}
				369
				370
				371	/* Copy the state of a thread from VG_(baseBlock), presumably after it
				372	has been descheduled. For sanity-check purposes, fill the vacated
				373	VG_(baseBlock) with garbage so as to make the system more likely to
				374	fail quickly if we erroneously continue to poke around inside
				375	VG_(baseBlock) without first doing a load_thread_state().
				376	*/
				377	__inline__
				378	void VG_(save_thread_state) ( ThreadId tid )
				379	{
				380	Int i;
				381	const UInt junk = 0xDEADBEEF;
				382
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	383	vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
				384
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	385	vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
				386	vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
				387	vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
				388	vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
				389	vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
				390	vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
				391	vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
				392	vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				393	vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
				394	vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
				395
				396	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				397	vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
				398
				399	vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
				400	vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
				401	vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
				402	vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
				403	vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
				404	vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
				405	vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
				406	vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
				407	vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
				408
				409	/* Fill it up with junk. */
				410	VG_(baseBlock)[VGOFF_(m_eax)] = junk;
				411	VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
				412	VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
				413	VG_(baseBlock)[VGOFF_(m_edx)] = junk;
				414	VG_(baseBlock)[VGOFF_(m_esi)] = junk;
				415	VG_(baseBlock)[VGOFF_(m_edi)] = junk;
				416	VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
				417	VG_(baseBlock)[VGOFF_(m_esp)] = junk;
				418	VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
				419	VG_(baseBlock)[VGOFF_(m_eip)] = junk;
				420
				421	for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
				422	VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	423
				424	vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	425	}
				426
				427
				428	/* Run the thread tid for a while, and return a VG_TRC_* value to the
				429	scheduler indicating what happened. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	430	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	431	UInt run_thread_for_a_while ( ThreadId tid )
				432	{
				433	UInt trc = 0;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	434	vg_assert(is_valid_tid(tid));
				435	vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	436	vg_assert(VG_(bbs_to_go) > 0);
				437
				438	VG_(load_thread_state) ( tid );
				439	if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
				440	/* try this ... */
				441	trc = VG_(run_innerloop)();
				442	/* We get here if the client didn't take a fault. */
				443	} else {
				444	/* We get here if the client took a fault, which caused our
				445	signal handler to longjmp. */
				446	vg_assert(trc == 0);
				447	trc = VG_TRC_UNRESUMABLE_SIGNAL;
				448	}
				449	VG_(save_thread_state) ( tid );
				450	return trc;
				451	}
				452
				453
				454	/* Increment the LRU epoch counter. */
				455	static
				456	void increment_epoch ( void )
				457	{
				458	VG_(current_epoch)++;
				459	if (VG_(clo_verbosity) > 2) {
				460	UInt tt_used, tc_used;
				461	VG_(get_tt_tc_used) ( &tt_used, &tc_used );
				462	VG_(message)(Vg_UserMsg,
				463	"%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
				464	VG_(bbs_done),
				465	VG_(this_epoch_in_count),
				466	VG_(this_epoch_in_osize),
				467	VG_(this_epoch_in_tsize),
				468	VG_(this_epoch_out_count),
				469	VG_(this_epoch_out_osize),
				470	VG_(this_epoch_out_tsize),
				471	tt_used, tc_used
				472	);
				473	}
				474	VG_(this_epoch_in_count) = 0;
				475	VG_(this_epoch_in_osize) = 0;
				476	VG_(this_epoch_in_tsize) = 0;
				477	VG_(this_epoch_out_count) = 0;
				478	VG_(this_epoch_out_osize) = 0;
				479	VG_(this_epoch_out_tsize) = 0;
				480	}
				481
				482
				483	/* Initialise the scheduler. Create a single "main" thread ready to
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	484	run, with special ThreadId of one. This is called at startup; the
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	485	caller takes care to park the client's state is parked in
				486	VG_(baseBlock).
				487	*/
				488	void VG_(scheduler_init) ( void )
				489	{
				490	Int i;
				491	Addr startup_esp;
				492	ThreadId tid_main;
				493
				494	startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
				495	if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	496	VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
				497	(void)startup_esp, (void)VG_STARTUP_STACK_MASK);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	498	VG_(panic)("unexpected %esp at startup");
				499	}
				500
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	501	for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
				502	vg_threads[i].status = VgTs_Empty;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	503	vg_threads[i].stack_size = 0;
				504	vg_threads[i].stack_base = (Addr)NULL;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	505	vg_threads[i].tid = i;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	506	}
				507
				508	for (i = 0; i < VG_N_WAITING_FDS; i++)
				509	vg_waiting_fds[i].fd = -1; /* not in use */
				510
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	511	/* Assert this is thread zero, which has certain magic
				512	properties. */
				513	tid_main = vg_alloc_ThreadState();
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	514	vg_assert(tid_main == 1);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	515
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	516	vg_threads[tid_main].status = VgTs_Runnable;
				517	vg_threads[tid_main].joiner = VG_INVALID_THREADID;
				518	vg_threads[tid_main].waited_on_mx = NULL;
				519	vg_threads[tid_main].retval = NULL; /* not important */
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	520	vg_threads[tid_main].stack_highest_word
				521	= vg_threads[tid_main].m_esp /* -4 ??? */;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	522
				523	/* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	524	vg_tid_currently_in_baseBlock = tid_main;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	525	VG_(save_thread_state) ( tid_main );
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	526
				527	/* So now ... */
				528	vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	529	}
				530
				531
				532	/* What if fd isn't a valid fd? */
				533	static
				534	void set_fd_nonblocking ( Int fd )
				535	{
				536	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				537	vg_assert(!VG_(is_kerror)(res));
				538	res \|= VKI_O_NONBLOCK;
				539	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				540	vg_assert(!VG_(is_kerror)(res));
				541	}
				542
				543	static
				544	void set_fd_blocking ( Int fd )
				545	{
				546	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				547	vg_assert(!VG_(is_kerror)(res));
				548	res &= ~VKI_O_NONBLOCK;
				549	res = VG_(fcntl)( fd, VKI_F_SETFL, res );
				550	vg_assert(!VG_(is_kerror)(res));
				551	}
				552
				553	static
				554	Bool fd_is_blockful ( Int fd )
				555	{
				556	Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
				557	vg_assert(!VG_(is_kerror)(res));
				558	return (res & VKI_O_NONBLOCK) ? False : True;
				559	}
				560
				561
				562
				563	/* Do a purely thread-local request for tid, and put the result in its
				564	%EDX, without changing its scheduling state in any way, nor that of
				565	any other threads. Return True if so.
				566
				567	If the request is non-trivial, return False; a more capable but
				568	slower mechanism will deal with it.
				569	*/
				570	static
				571	Bool maybe_do_trivial_clientreq ( ThreadId tid )
				572	{
				573	# define SIMPLE_RETURN(vvv) \
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	574	{ tst->m_edx = (vvv); \
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	575	return True; \
				576	}
				577
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	578	ThreadState* tst = &vg_threads[tid];
				579	UInt* arg = (UInt*)(tst->m_eax);
				580	UInt req_no = arg[0];
				581
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	582	switch (req_no) {
				583	case VG_USERREQ__MALLOC:
				584	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	585	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	586	);
				587	case VG_USERREQ__BUILTIN_NEW:
				588	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	589	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	590	);
				591	case VG_USERREQ__BUILTIN_VEC_NEW:
				592	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	593	(UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	594	);
				595	case VG_USERREQ__FREE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	596	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	597	SIMPLE_RETURN(0); /* irrelevant */
				598	case VG_USERREQ__BUILTIN_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	599	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	600	SIMPLE_RETURN(0); /* irrelevant */
				601	case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	602	VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	603	SIMPLE_RETURN(0); /* irrelevant */
				604	case VG_USERREQ__CALLOC:
				605	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	606	(UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	607	);
				608	case VG_USERREQ__REALLOC:
				609	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	610	(UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	611	);
				612	case VG_USERREQ__MEMALIGN:
				613	SIMPLE_RETURN(
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	614	(UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	615	);
sewardj	9650c99	2002-04-16 03:44:31 +0000	[diff] [blame]	616
				617	/* These are heavily used. */
				618	case VG_USERREQ__PTHREAD_GET_THREADID:
				619	SIMPLE_RETURN(tid);
				620	case VG_USERREQ__RUNNING_ON_VALGRIND:
				621	SIMPLE_RETURN(1);
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	622	case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
				623	SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj	9650c99	2002-04-16 03:44:31 +0000	[diff] [blame]	624
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	625	default:
				626	/* Too hard; wimp out. */
				627	return False;
				628	}
				629	# undef SIMPLE_RETURN
				630	}
				631
				632
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	633	/* vthread tid is returning from a signal handler; modify its
				634	stack/regs accordingly. */
				635	static
				636	void handle_signal_return ( ThreadId tid )
				637	{
				638	Char msg_buf[100];
				639	Bool restart_blocked_syscalls;
				640
				641	vg_assert(is_valid_tid(tid));
				642
				643	restart_blocked_syscalls = VG_(signal_returns)(tid);
				644
				645	if (restart_blocked_syscalls)
				646	/* Easy; we don't have to do anything. */
				647	return;
				648
				649	if (vg_threads[tid].status == VgTs_WaitFD) {
				650	vg_assert(vg_threads[tid].m_eax == __NR_read
				651	\|\| vg_threads[tid].m_eax == __NR_write);
				652	/* read() or write() interrupted. Force a return with EINTR. */
				653	vg_threads[tid].m_eax = -VKI_EINTR;
				654	vg_threads[tid].status = VgTs_Runnable;
				655	if (VG_(clo_trace_sched)) {
				656	VG_(sprintf)(msg_buf,
				657	"read() / write() interrupted by signal; return EINTR" );
				658	print_sched_event(tid, msg_buf);
				659	}
				660	return;
				661	}
				662
				663	if (vg_threads[tid].status == VgTs_WaitFD) {
				664	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				665	/* We interrupted a nanosleep(). The right thing to do is to
				666	write the unused time to nanosleep's second param and return
				667	EINTR, but I'm too lazy for that. */
				668	return;
				669	}
				670
				671	/* All other cases? Just return. */
				672	}
				673
				674
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	675	static
				676	void sched_do_syscall ( ThreadId tid )
				677	{
				678	UInt saved_eax;
				679	UInt res, syscall_no;
				680	UInt fd;
				681	Bool might_block, assumed_nonblocking;
				682	Bool orig_fd_blockness;
				683	Char msg_buf[100];
				684
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	685	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	686	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				687
				688	syscall_no = vg_threads[tid].m_eax; /* syscall number */
				689
				690	if (syscall_no == __NR_nanosleep) {
				691	ULong t_now, t_awaken;
				692	struct vki_timespec* req;
				693	req = (struct vki_timespec)vg_threads[tid].m_ebx; / arg1 */
				694	t_now = VG_(read_microsecond_timer)();
				695	t_awaken
				696	= t_now
				697	+ (ULong)1000000ULL * (ULong)(req->tv_sec)
				698	+ (ULong)( (UInt)(req->tv_nsec) / 1000 );
				699	vg_threads[tid].status = VgTs_Sleeping;
				700	vg_threads[tid].awaken_at = t_awaken;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	701	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	702	VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
				703	t_now, t_awaken-t_now);
				704	print_sched_event(tid, msg_buf);
				705	}
				706	/* Force the scheduler to run something else for a while. */
				707	return;
				708	}
				709
				710	switch (syscall_no) {
				711	case __NR_read:
				712	case __NR_write:
				713	assumed_nonblocking
				714	= False;
				715	might_block
				716	= fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
				717	break;
				718	default:
				719	might_block = False;
				720	assumed_nonblocking = True;
				721	}
				722
				723	if (assumed_nonblocking) {
				724	/* We think it's non-blocking. Just do it in the normal way. */
				725	VG_(perform_assumed_nonblocking_syscall)(tid);
				726	/* The thread is still runnable. */
				727	return;
				728	}
				729
				730	/* It might block. Take evasive action. */
				731	switch (syscall_no) {
				732	case __NR_read:
				733	case __NR_write:
				734	fd = vg_threads[tid].m_ebx; break;
				735	default:
				736	vg_assert(3+3 == 7);
				737	}
				738
				739	/* Set the fd to nonblocking, and do the syscall, which will return
				740	immediately, in order to lodge a request with the Linux kernel.
				741	We later poll for I/O completion using select(). */
				742
				743	orig_fd_blockness = fd_is_blockful(fd);
				744	set_fd_nonblocking(fd);
				745	vg_assert(!fd_is_blockful(fd));
				746	VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
				747
				748	/* This trashes the thread's %eax; we have to preserve it. */
				749	saved_eax = vg_threads[tid].m_eax;
				750	KERNEL_DO_SYSCALL(tid,res);
				751
				752	/* Restore original blockfulness of the fd. */
				753	if (orig_fd_blockness)
				754	set_fd_blocking(fd);
				755	else
				756	set_fd_nonblocking(fd);
				757
				758	if (res != -VKI_EWOULDBLOCK) {
				759	/* It didn't block; it went through immediately. So finish off
				760	in the normal way. Don't restore %EAX, since that now
				761	(correctly) holds the result of the call. */
				762	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				763	/* We're still runnable. */
				764	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				765
				766	} else {
				767
				768	/* It would have blocked. First, restore %EAX to what it was
				769	before our speculative call. */
				770	vg_threads[tid].m_eax = saved_eax;
				771	/* Put this fd in a table of fds on which we are waiting for
				772	completion. The arguments for select() later are constructed
				773	from this table. */
				774	add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
				775	/* Deschedule thread until an I/O completion happens. */
				776	vg_threads[tid].status = VgTs_WaitFD;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	777	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	778	VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
				779	print_sched_event(tid, msg_buf);
				780	}
				781
				782	}
				783	}
				784
				785
				786	/* Find out which of the fds in vg_waiting_fds are now ready to go, by
				787	making enquiries with select(), and mark them as ready. We have to
				788	wait for the requesting threads to fall into the the WaitFD state
				789	before we can actually finally deliver the results, so this
				790	procedure doesn't do that; complete_blocked_syscalls() does it.
				791
				792	It might seem odd that a thread which has done a blocking syscall
				793	is not in WaitFD state; the way this can happen is if it initially
				794	becomes WaitFD, but then a signal is delivered to it, so it becomes
				795	Runnable for a while. In this case we have to wait for the
				796	sighandler to return, whereupon the WaitFD state is resumed, and
				797	only at that point can the I/O result be delivered to it. However,
				798	this point may be long after the fd is actually ready.
				799
				800	So, poll_for_ready_fds() merely detects fds which are ready.
				801	complete_blocked_syscalls() does the second half of the trick,
				802	possibly much later: it delivers the results from ready fds to
				803	threads in WaitFD state.
				804	*/
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	805	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	806	void poll_for_ready_fds ( void )
				807	{
				808	vki_ksigset_t saved_procmask;
				809	vki_fd_set readfds;
				810	vki_fd_set writefds;
				811	vki_fd_set exceptfds;
				812	struct vki_timeval timeout;
				813	Int fd, fd_max, i, n_ready, syscall_no, n_ok;
				814	ThreadId tid;
				815	Bool rd_ok, wr_ok, ex_ok;
				816	Char msg_buf[100];
				817
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	818	struct vki_timespec* rem;
				819	ULong t_now;
				820
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	821	/* Awaken any sleeping threads whose sleep has expired. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	822	for (tid = 1; tid < VG_N_THREADS; tid++)
				823	if (vg_threads[tid].status == VgTs_Sleeping)
				824	break;
				825
				826	/* Avoid pointless calls to VG_(read_microsecond_timer). */
				827	if (tid < VG_N_THREADS) {
				828	t_now = VG_(read_microsecond_timer)();
				829	for (tid = 1; tid < VG_N_THREADS; tid++) {
				830	if (vg_threads[tid].status != VgTs_Sleeping)
				831	continue;
				832	if (t_now >= vg_threads[tid].awaken_at) {
				833	/* Resume this thread. Set to zero the remaining-time
				834	(second) arg of nanosleep, since it's used up all its
				835	time. */
				836	vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
				837	rem = (struct vki_timespec )vg_threads[tid].m_ecx; / arg2 */
				838	if (rem != NULL) {
				839	rem->tv_sec = 0;
				840	rem->tv_nsec = 0;
				841	}
				842	/* Make the syscall return 0 (success). */
				843	vg_threads[tid].m_eax = 0;
				844	/* Reschedule this thread. */
				845	vg_threads[tid].status = VgTs_Runnable;
				846	if (VG_(clo_trace_sched)) {
				847	VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
				848	t_now);
				849	print_sched_event(tid, msg_buf);
				850	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	851	}
				852	}
				853	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	854
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	855	/* And look for threads waiting on file descriptors which are now
				856	ready for I/O.*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	857	timeout.tv_sec = 0;
				858	timeout.tv_usec = 0;
				859
				860	VKI_FD_ZERO(&readfds);
				861	VKI_FD_ZERO(&writefds);
				862	VKI_FD_ZERO(&exceptfds);
				863	fd_max = -1;
				864	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				865	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				866	continue;
				867	if (vg_waiting_fds[i].ready /* already ready? */)
				868	continue;
				869	fd = vg_waiting_fds[i].fd;
				870	/* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	871	vg_assert(fd >= 0);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	872	if (fd > fd_max)
				873	fd_max = fd;
				874	tid = vg_waiting_fds[i].tid;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	875	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	876	syscall_no = vg_waiting_fds[i].syscall_no;
				877	switch (syscall_no) {
				878	case __NR_read:
				879	VKI_FD_SET(fd, &readfds); break;
				880	case __NR_write:
				881	VKI_FD_SET(fd, &writefds); break;
				882	default:
				883	VG_(panic)("poll_for_ready_fds: unexpected syscall");
				884	/NOTREACHED/
				885	break;
				886	}
				887	}
				888
sewardj	e462e20	2002-04-13 04:09:07 +0000	[diff] [blame]	889	/* Short cut: if no fds are waiting, give up now. */
				890	if (fd_max == -1)
				891	return;
				892
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	893	/* BLOCK ALL SIGNALS. We don't want the complication of select()
				894	getting interrupted. */
				895	VG_(block_all_host_signals)( &saved_procmask );
				896
				897	n_ready = VG_(select)
				898	( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
				899	if (VG_(is_kerror)(n_ready)) {
				900	VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
				901	VG_(panic)("poll_for_ready_fds: select failed?!");
				902	/NOTREACHED/
				903	}
				904
				905	/* UNBLOCK ALL SIGNALS */
				906	VG_(restore_host_signals)( &saved_procmask );
				907
				908	/* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
				909
				910	if (n_ready == 0)
				911	return;
				912
				913	/* Inspect all the fds we know about, and handle any completions that
				914	have happened. */
				915	/*
				916	VG_(printf)("\n\n");
				917	for (fd = 0; fd < 100; fd++)
				918	if (VKI_FD_ISSET(fd, &writefds) \|\| VKI_FD_ISSET(fd, &readfds)) {
				919	VG_(printf)("X"); } else { VG_(printf)("."); };
				920	VG_(printf)("\n\nfd_max = %d\n", fd_max);
				921	*/
				922
				923	for (fd = 0; fd <= fd_max; fd++) {
				924	rd_ok = VKI_FD_ISSET(fd, &readfds);
				925	wr_ok = VKI_FD_ISSET(fd, &writefds);
				926	ex_ok = VKI_FD_ISSET(fd, &exceptfds);
				927
				928	n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
				929	if (n_ok == 0)
				930	continue;
				931	if (n_ok > 1) {
				932	VG_(printf)("offending fd = %d\n", fd);
				933	VG_(panic)("poll_for_ready_fds: multiple events on fd");
				934	}
				935
				936	/* An I/O event completed for fd. Find the thread which
				937	requested this. */
				938	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				939	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				940	continue;
				941	if (vg_waiting_fds[i].fd == fd)
				942	break;
				943	}
				944
				945	/* And a bit more paranoia ... */
				946	vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
				947
				948	/* Mark the fd as ready. */
				949	vg_assert(! vg_waiting_fds[i].ready);
				950	vg_waiting_fds[i].ready = True;
				951	}
				952	}
				953
				954
				955	/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj	9a199dc	2002-04-14 13:01:38 +0000	[diff] [blame]	956	static
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	957	void complete_blocked_syscalls ( void )
				958	{
				959	Int fd, i, res, syscall_no;
				960	ThreadId tid;
				961	Char msg_buf[100];
				962
				963	/* Inspect all the outstanding fds we know about. */
				964
				965	for (i = 0; i < VG_N_WAITING_FDS; i++) {
				966	if (vg_waiting_fds[i].fd == -1 /* not in use */)
				967	continue;
				968	if (! vg_waiting_fds[i].ready)
				969	continue;
				970
				971	fd = vg_waiting_fds[i].fd;
				972	tid = vg_waiting_fds[i].tid;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	973	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	974
				975	/* The thread actually has to be waiting for the I/O event it
				976	requested before we can deliver the result! */
				977	if (vg_threads[tid].status != VgTs_WaitFD)
				978	continue;
				979
				980	/* Ok, actually do it! We can safely use %EAX as the syscall
				981	number, because the speculative call made by
				982	sched_do_syscall() doesn't change %EAX in the case where the
				983	call would have blocked. */
				984
				985	syscall_no = vg_waiting_fds[i].syscall_no;
				986	vg_assert(syscall_no == vg_threads[tid].m_eax);
				987	KERNEL_DO_SYSCALL(tid,res);
				988	VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
				989
				990	/* Reschedule. */
				991	vg_threads[tid].status = VgTs_Runnable;
				992	/* Mark slot as no longer in use. */
				993	vg_waiting_fds[i].fd = -1;
				994	/* pp_sched_status(); */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	995	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	996	VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
				997	print_sched_event(tid, msg_buf);
				998	}
				999	}
				1000	}
				1001
				1002
				1003	static
				1004	void nanosleep_for_a_while ( void )
				1005	{
				1006	Int res;
				1007	struct vki_timespec req;
				1008	struct vki_timespec rem;
				1009	req.tv_sec = 0;
				1010	req.tv_nsec = 20 * 1000 * 1000;
				1011	res = VG_(nanosleep)( &req, &rem );
				1012	/* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
				1013	vg_assert(res == 0);
				1014	}
				1015
				1016
				1017	/* ---------------------------------------------------------------------
				1018	The scheduler proper.
				1019	------------------------------------------------------------------ */
				1020
				1021	/* Run user-space threads until either
				1022	* Deadlock occurs
				1023	* One thread asks to shutdown Valgrind
				1024	* The specified number of basic blocks has gone by.
				1025	*/
				1026	VgSchedReturnCode VG_(scheduler) ( void )
				1027	{
				1028	ThreadId tid, tid_next;
				1029	UInt trc;
				1030	UInt dispatch_ctr_SAVED;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1031	Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1032	Char msg_buf[100];
				1033	Addr trans_addr;
				1034
				1035	/* For the LRU structures, records when the epoch began. */
				1036	ULong lru_epoch_started_at = 0;
				1037
				1038	/* Start with the root thread. tid in general indicates the
				1039	currently runnable/just-finished-running thread. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1040	tid = 1;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1041
				1042	/* This is the top level scheduler loop. It falls into three
				1043	phases. */
				1044	while (True) {
				1045
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1046	/* ======================= Phase 0 of 3 =======================
				1047	Be paranoid. Always a good idea. */
				1048	scheduler_sanity();
				1049
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1050	/* ======================= Phase 1 of 3 =======================
				1051	Handle I/O completions and signals. This may change the
				1052	status of various threads. Then select a new thread to run,
				1053	or declare deadlock, or sleep if there are no runnable
				1054	threads but some are blocked on I/O. */
				1055
				1056	/* Age the LRU structures if an epoch has been completed. */
				1057	if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
				1058	lru_epoch_started_at = VG_(bbs_done);
				1059	increment_epoch();
				1060	}
				1061
				1062	/* Was a debug-stop requested? */
				1063	if (VG_(bbs_to_go) == 0)
				1064	goto debug_stop;
				1065
				1066	/* Do the following loop until a runnable thread is found, or
				1067	deadlock is detected. */
				1068	while (True) {
				1069
				1070	/* For stats purposes only. */
				1071	VG_(num_scheduling_events_MAJOR) ++;
				1072
				1073	/* See if any I/O operations which we were waiting for have
				1074	completed, and, if so, make runnable the relevant waiting
				1075	threads. */
				1076	poll_for_ready_fds();
				1077	complete_blocked_syscalls();
				1078
				1079	/* See if there are any signals which need to be delivered. If
				1080	so, choose thread(s) to deliver them to, and build signal
				1081	delivery frames on those thread(s) stacks. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1082
				1083	/* Be careful about delivering signals to a thread waiting
				1084	for a mutex. In particular, when the handler is running,
				1085	that thread is temporarily apparently-not-waiting for the
				1086	mutex, so if it is unlocked by another thread whilst the
				1087	handler is running, this thread is not informed. When the
				1088	handler returns, the thread resumes waiting on the mutex,
				1089	even if, as a result, it has missed the unlocking of it.
				1090	Potential deadlock. This sounds all very strange, but the
				1091	POSIX standard appears to require this behaviour. */
				1092	VG_(deliver_signals)( 1 /HACK/ );
				1093	VG_(do_sanity_checks)( 1 /HACK/, False );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1094
				1095	/* Try and find a thread (tid) to run. */
				1096	tid_next = tid;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1097	n_in_fdwait_or_sleep = 0;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1098	while (True) {
				1099	tid_next++;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1100	if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1101	if (vg_threads[tid_next].status == VgTs_WaitFD
				1102	\|\| vg_threads[tid_next].status == VgTs_Sleeping)
				1103	n_in_fdwait_or_sleep ++;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1104	if (vg_threads[tid_next].status == VgTs_Runnable)
				1105	break; /* We can run this one. */
				1106	if (tid_next == tid)
				1107	break; /* been all the way round */
				1108	}
				1109	tid = tid_next;
				1110
				1111	if (vg_threads[tid].status == VgTs_Runnable) {
				1112	/* Found a suitable candidate. Fall out of this loop, so
				1113	we can advance to stage 2 of the scheduler: actually
				1114	running the thread. */
				1115	break;
				1116	}
				1117
				1118	/* We didn't find a runnable thread. Now what? */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1119	if (n_in_fdwait_or_sleep == 0) {
				1120	/* No runnable threads and no prospect of any appearing
				1121	even if we wait for an arbitrary length of time. In
				1122	short, we have a deadlock. */
sewardj	15a43e1	2002-04-17 19:35:12 +0000	[diff] [blame]	1123	VG_(pp_sched_status)();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1124	return VgSrc_Deadlock;
				1125	}
				1126
				1127	/* At least one thread is in a fd-wait state. Delay for a
				1128	while, and go round again, in the hope that eventually a
				1129	thread becomes runnable. */
				1130	nanosleep_for_a_while();
				1131	// pp_sched_status();
				1132	// VG_(printf)(".\n");
				1133	}
				1134
				1135
				1136	/* ======================= Phase 2 of 3 =======================
				1137	Wahey! We've finally decided that thread tid is runnable, so
				1138	we now do that. Run it for as much of a quanta as possible.
				1139	Trivial requests are handled and the thread continues. The
				1140	aim is not to do too many of Phase 1 since it is expensive. */
				1141
				1142	if (0)
				1143	VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
				1144
				1145	/* Figure out how many bbs to ask vg_run_innerloop to do. Note
				1146	that it decrements the counter before testing it for zero, so
				1147	that if VG_(dispatch_ctr) is set to N you get at most N-1
				1148	iterations. Also this means that VG_(dispatch_ctr) must
				1149	exceed zero before entering the innerloop. Also also, the
				1150	decrement is done before the bb is actually run, so you
				1151	always get at least one decrement even if nothing happens.
				1152	*/
				1153	if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
				1154	VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
				1155	else
				1156	VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
				1157
				1158	/* ... and remember what we asked for. */
				1159	dispatch_ctr_SAVED = VG_(dispatch_ctr);
				1160
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1161	/* paranoia ... */
				1162	vg_assert(vg_threads[tid].tid == tid);
				1163
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1164	/* Actually run thread tid. */
				1165	while (True) {
				1166
				1167	/* For stats purposes only. */
				1168	VG_(num_scheduling_events_MINOR) ++;
				1169
				1170	if (0)
				1171	VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
				1172	tid, VG_(dispatch_ctr) - 1 );
				1173
				1174	trc = run_thread_for_a_while ( tid );
				1175
				1176	/* Deal quickly with trivial scheduling events, and resume the
				1177	thread. */
				1178
				1179	if (trc == VG_TRC_INNER_FASTMISS) {
				1180	vg_assert(VG_(dispatch_ctr) > 0);
				1181
				1182	/* Trivial event. Miss in the fast-cache. Do a full
				1183	lookup for it. */
				1184	trans_addr
				1185	= VG_(search_transtab) ( vg_threads[tid].m_eip );
				1186	if (trans_addr == (Addr)0) {
				1187	/* Not found; we need to request a translation. */
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1188	create_translation_for( tid, vg_threads[tid].m_eip );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1189	trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
				1190	if (trans_addr == (Addr)0)
				1191	VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
				1192	}
				1193	continue; /* with this thread */
				1194	}
				1195
				1196	if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
				1197	Bool is_triv = maybe_do_trivial_clientreq(tid);
				1198	if (is_triv) {
				1199	/* NOTE: a trivial request is something like a call to
				1200	malloc() or free(). It DOES NOT change the
				1201	Runnability of this thread nor the status of any
				1202	other thread; it is purely thread-local. */
				1203	continue; /* with this thread */
				1204	}
				1205	}
				1206
				1207	/* It's a non-trivial event. Give up running this thread and
				1208	handle things the expensive way. */
				1209	break;
				1210	}
				1211
				1212	/* ======================= Phase 3 of 3 =======================
				1213	Handle non-trivial thread requests, mostly pthread stuff. */
				1214
				1215	/* Ok, we've fallen out of the dispatcher for a
				1216	non-completely-trivial reason. First, update basic-block
				1217	counters. */
				1218
				1219	done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
				1220	vg_assert(done_this_time >= 0);
				1221	VG_(bbs_to_go) -= (ULong)done_this_time;
				1222	VG_(bbs_done) += (ULong)done_this_time;
				1223
				1224	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1225	VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
				1226	tid, done_this_time, (Int)trc );
				1227
				1228	if (0 && trc != VG_TRC_INNER_FASTMISS)
				1229	VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
				1230	tid, VG_(bbs_done),
				1231	name_of_sched_event(trc) );
sewardj	9d1b5d3	2002-04-17 19:40:49 +0000	[diff] [blame]	1232
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1233	/* Examine the thread's return code to figure out why it
				1234	stopped, and handle requests. */
				1235
				1236	switch (trc) {
				1237
				1238	case VG_TRC_INNER_FASTMISS:
				1239	VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
				1240	/NOTREACHED/
				1241	break;
				1242
				1243	case VG_TRC_INNER_COUNTERZERO:
				1244	/* Timeslice is out. Let a new thread be scheduled,
				1245	simply by doing nothing, causing us to arrive back at
				1246	Phase 1. */
				1247	if (VG_(bbs_to_go) == 0) {
				1248	goto debug_stop;
				1249	}
				1250	vg_assert(VG_(dispatch_ctr) == 0);
				1251	break;
				1252
				1253	case VG_TRC_UNRESUMABLE_SIGNAL:
				1254	/* It got a SIGSEGV/SIGBUS, which we need to deliver right
				1255	away. Again, do nothing, so we wind up back at Phase
				1256	1, whereupon the signal will be "delivered". */
				1257	break;
				1258
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1259	case VG_TRC_EBP_JMP_SYSCALL:
				1260	/* Do a syscall for the vthread tid. This could cause it
				1261	to become non-runnable. */
				1262	sched_do_syscall(tid);
				1263	break;
				1264
				1265	case VG_TRC_EBP_JMP_CLIENTREQ:
				1266	/* Do a client request for the vthread tid. Note that
				1267	some requests will have been handled by
				1268	maybe_do_trivial_clientreq(), so we don't expect to see
				1269	those here.
				1270	*/
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1271	/* The thread's %EAX points at an arg block, the first
				1272	word of which is the request code. */
				1273	request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1274	if (0) {
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1275	VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1276	print_sched_event(tid, msg_buf);
				1277	}
				1278	/* Do a non-trivial client request for thread tid. tid's
				1279	%EAX points to a short vector of argument words, the
				1280	first of which is the request code. The result of the
				1281	request is put in tid's %EDX. Alternatively, perhaps
				1282	the request causes tid to become non-runnable and/or
				1283	other blocked threads become runnable. In general we
				1284	can and often do mess with the state of arbitrary
				1285	threads at this point. */
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1286	if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
				1287	return VgSrc_Shutdown;
				1288	} else {
				1289	do_nontrivial_clientreq(tid);
				1290	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1291	break;
				1292
				1293	default:
				1294	VG_(printf)("\ntrc = %d\n", trc);
				1295	VG_(panic)("VG_(scheduler), phase 3: "
				1296	"unexpected thread return code");
				1297	/* NOTREACHED */
				1298	break;
				1299
				1300	} /* switch (trc) */
				1301
				1302	/* That completes Phase 3 of 3. Return now to the top of the
				1303	main scheduler loop, to Phase 1 of 3. */
				1304
				1305	} /* top-level scheduler loop */
				1306
				1307
				1308	/* NOTREACHED */
				1309	VG_(panic)("scheduler: post-main-loop ?!");
				1310	/* NOTREACHED */
				1311
				1312	debug_stop:
				1313	/* If we exited because of a debug stop, print the translation
				1314	of the last block executed -- by translating it again, and
				1315	throwing away the result. */
				1316	VG_(printf)(
				1317	"======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1318	VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1319	VG_(printf)("\n");
				1320	VG_(printf)(
				1321	"======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
				1322
				1323	return VgSrc_BbsDone;
				1324	}
				1325
				1326
				1327	/* ---------------------------------------------------------------------
				1328	The pthread implementation.
				1329	------------------------------------------------------------------ */
				1330
				1331	#include <pthread.h>
				1332	#include <errno.h>
				1333
				1334	#if !defined(PTHREAD_STACK_MIN)
				1335	# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
				1336	#endif
				1337
				1338	/* /usr/include/bits/pthreadtypes.h:
				1339	typedef unsigned long int pthread_t;
				1340	*/
				1341
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1342
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1343	/* -----------------------------------------------------------
				1344	Thread CREATION, JOINAGE and CANCELLATION.
				1345	-------------------------------------------------------- */
				1346
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1347	static
				1348	void do_pthread_cancel ( ThreadId tid_canceller,
				1349	pthread_t tid_cancellee )
				1350	{
				1351	Char msg_buf[100];
				1352	/* We want make is appear that this thread has returned to
				1353	do_pthread_create_bogusRA with PTHREAD_CANCELED as the
				1354	return value. So: simple: put PTHREAD_CANCELED into %EAX
				1355	and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1356	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1357	VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
				1358	print_sched_event(tid_cancellee, msg_buf);
				1359	}
				1360	vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1361	vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1362	vg_threads[tid_cancellee].status = VgTs_Runnable;
				1363	}
				1364
				1365
				1366
				1367	/* Thread tid is exiting, by returning from the function it was
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1368	created with. Or possibly due to pthread_exit or cancellation.
				1369	The main complication here is to resume any thread waiting to join
				1370	with this one. */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1371	static
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1372	void handle_pthread_return ( ThreadId tid, void* retval )
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1373	{
				1374	ThreadId jnr; /* joiner, the thread calling pthread_join. */
				1375	UInt* jnr_args;
				1376	void** jnr_thread_return;
				1377	Char msg_buf[100];
				1378
				1379	/* Mark it as not in use. Leave the stack in place so the next
				1380	user of this slot doesn't reallocate it. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1381	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1382	vg_assert(vg_threads[tid].status != VgTs_Empty);
				1383
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1384	vg_threads[tid].retval = retval;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1385
				1386	if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
				1387	/* No one has yet done a join on me */
				1388	vg_threads[tid].status = VgTs_WaitJoiner;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1389	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1390	VG_(sprintf)(msg_buf,
				1391	"root fn returns, waiting for a call pthread_join(%d)",
				1392	tid);
				1393	print_sched_event(tid, msg_buf);
				1394	}
				1395	} else {
				1396	/* Some is waiting; make their join call return with success,
				1397	putting my exit code in the place specified by the caller's
				1398	thread_return param. This is all very horrible, since we
				1399	need to consult the joiner's arg block -- pointed to by its
				1400	%EAX -- in order to extract the 2nd param of its pthread_join
				1401	call. TODO: free properly the slot (also below).
				1402	*/
				1403	jnr = vg_threads[tid].joiner;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1404	vg_assert(is_valid_tid(jnr));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1405	vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
				1406	jnr_args = (UInt*)vg_threads[jnr].m_eax;
				1407	jnr_thread_return = (void**)(jnr_args[2]);
				1408	if (jnr_thread_return != NULL)
				1409	*jnr_thread_return = vg_threads[tid].retval;
				1410	vg_threads[jnr].m_edx = 0; /* success */
				1411	vg_threads[jnr].status = VgTs_Runnable;
				1412	vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1413	if (VG_(clo_instrument) && tid != 0)
				1414	VGM_(make_noaccess)( vg_threads[tid].stack_base,
				1415	vg_threads[tid].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1416	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1417	VG_(sprintf)(msg_buf,
				1418	"root fn returns, to find a waiting pthread_join(%d)", tid);
				1419	print_sched_event(tid, msg_buf);
				1420	VG_(sprintf)(msg_buf,
				1421	"my pthread_join(%d) returned; resuming", tid);
				1422	print_sched_event(jnr, msg_buf);
				1423	}
				1424	}
				1425
				1426	/* Return value is irrelevant; this thread will not get
				1427	rescheduled. */
				1428	}
				1429
				1430
				1431	static
				1432	void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
				1433	{
				1434	Char msg_buf[100];
				1435
				1436	/* jee, the joinee, is the thread specified as an arg in thread
				1437	tid's call to pthread_join. So tid is the join-er. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1438	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1439	vg_assert(vg_threads[tid].status == VgTs_Runnable);
				1440
				1441	if (jee == tid) {
				1442	vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
				1443	vg_threads[tid].status = VgTs_Runnable;
				1444	return;
				1445	}
				1446
				1447	if (jee < 0
				1448	\|\| jee >= VG_N_THREADS
				1449	\|\| vg_threads[jee].status == VgTs_Empty) {
				1450	/* Invalid thread to join to. */
				1451	vg_threads[tid].m_edx = EINVAL;
				1452	vg_threads[tid].status = VgTs_Runnable;
				1453	return;
				1454	}
				1455
				1456	if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
				1457	/* Someone already did join on this thread */
				1458	vg_threads[tid].m_edx = EINVAL;
				1459	vg_threads[tid].status = VgTs_Runnable;
				1460	return;
				1461	}
				1462
				1463	/* if (vg_threads[jee].detached) ... */
				1464
				1465	/* Perhaps the joinee has already finished? If so return
				1466	immediately with its return code, and free up the slot. TODO:
				1467	free it properly (also above). */
				1468	if (vg_threads[jee].status == VgTs_WaitJoiner) {
				1469	vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
				1470	vg_threads[tid].m_edx = 0; /* success */
				1471	if (thread_return != NULL)
				1472	*thread_return = vg_threads[jee].retval;
				1473	vg_threads[tid].status = VgTs_Runnable;
				1474	vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj	75fe189	2002-04-14 02:46:33 +0000	[diff] [blame]	1475	if (VG_(clo_instrument) && jee != 0)
				1476	VGM_(make_noaccess)( vg_threads[jee].stack_base,
				1477	vg_threads[jee].stack_size );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1478	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1479	VG_(sprintf)(msg_buf,
				1480	"someone called pthread_join() on me; bye!");
				1481	print_sched_event(jee, msg_buf);
				1482	VG_(sprintf)(msg_buf,
				1483	"my pthread_join(%d) returned immediately",
				1484	jee );
				1485	print_sched_event(tid, msg_buf);
				1486	}
				1487	return;
				1488	}
				1489
				1490	/* Ok, so we'll have to wait on jee. */
				1491	vg_threads[jee].joiner = tid;
				1492	vg_threads[tid].status = VgTs_WaitJoinee;
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1493	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1494	VG_(sprintf)(msg_buf,
				1495	"blocking on call of pthread_join(%d)", jee );
				1496	print_sched_event(tid, msg_buf);
				1497	}
				1498	/* So tid's join call does not return just now. */
				1499	}
				1500
				1501
				1502	static
				1503	void do_pthread_create ( ThreadId parent_tid,
				1504	pthread_t* thread,
				1505	pthread_attr_t* attr,
				1506	void* (start_routine)(void ),
				1507	void* arg )
				1508	{
				1509	Addr new_stack;
				1510	UInt new_stk_szb;
				1511	ThreadId tid;
				1512	Char msg_buf[100];
				1513
				1514	/* Paranoia ... */
				1515	vg_assert(sizeof(pthread_t) == sizeof(UInt));
				1516
				1517	vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
				1518
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1519	tid = vg_alloc_ThreadState();
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1520
				1521	/* If we've created the main thread's tid, we're in deep trouble :) */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1522	vg_assert(tid != 1);
				1523	vg_assert(is_valid_tid(tid));
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1524
				1525	/* Copy the parent's CPU state into the child's, in a roundabout
				1526	way (via baseBlock). */
				1527	VG_(load_thread_state)(parent_tid);
				1528	VG_(save_thread_state)(tid);
				1529
				1530	/* Consider allocating the child a stack, if the one it already has
				1531	is inadequate. */
				1532	new_stk_szb = PTHREAD_STACK_MIN;
				1533
				1534	if (new_stk_szb > vg_threads[tid].stack_size) {
				1535	/* Again, for good measure :) We definitely don't want to be
				1536	allocating a stack for the main thread. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1537	vg_assert(tid != 1);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1538	/* for now, we don't handle the case of anything other than
				1539	assigning it for the first time. */
				1540	vg_assert(vg_threads[tid].stack_size == 0);
				1541	vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
				1542	new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
				1543	vg_threads[tid].stack_base = new_stack;
				1544	vg_threads[tid].stack_size = new_stk_szb;
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1545	vg_threads[tid].stack_highest_word
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1546	= new_stack + new_stk_szb
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1547	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1548	}
sewardj	1e8cdc9	2002-04-18 11:37:52 +0000	[diff] [blame]	1549
				1550	vg_threads[tid].m_esp
				1551	= vg_threads[tid].stack_base
				1552	+ vg_threads[tid].stack_size
				1553	- VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
				1554
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1555	if (VG_(clo_instrument))
				1556	VGM_(make_noaccess)( vg_threads[tid].m_esp,
				1557	VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
				1558
				1559	/* push arg */
				1560	vg_threads[tid].m_esp -= 4;
				1561	* (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
				1562
				1563	/* push (magical) return address */
				1564	vg_threads[tid].m_esp -= 4;
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1565	* (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1566
				1567	if (VG_(clo_instrument))
				1568	VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
				1569
				1570	/* this is where we start */
				1571	vg_threads[tid].m_eip = (UInt)start_routine;
				1572
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1573	if (VG_(clo_trace_sched)) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1574	VG_(sprintf)(msg_buf,
				1575	"new thread, created by %d", parent_tid );
				1576	print_sched_event(tid, msg_buf);
				1577	}
				1578
				1579	/* store the thread id in thread. /
				1580	// if (VG_(clo_instrument))
				1581	// ***** CHECK *thread is writable
				1582	*thread = (pthread_t)tid;
				1583
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1584	vg_threads[tid].waited_on_mx = NULL;
				1585	vg_threads[tid].joiner = VG_INVALID_THREADID;
				1586	vg_threads[tid].status = VgTs_Runnable;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1587
				1588	/* return zero */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1589	vg_threads[tid].m_edx = 0; /* success */
				1590	}
				1591
				1592
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1593	/* -----------------------------------------------------------
				1594	MUTEXes
				1595	-------------------------------------------------------- */
				1596
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1597	/* pthread_mutex_t is a struct with at 5 words:
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1598	typedef struct
				1599	{
				1600	int __m_reserved; -- Reserved for future use
				1601	int __m_count; -- Depth of recursive locking
				1602	_pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
				1603	int __m_kind; -- Mutex kind: fast, recursive or errcheck
				1604	struct _pthread_fastlock __m_lock; -- Underlying fast lock
				1605	} pthread_mutex_t;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1606
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1607	#define PTHREAD_MUTEX_INITIALIZER \
				1608	{0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
				1609	# define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
				1610	{0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
				1611	# define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
				1612	{0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
				1613	# define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
				1614	{0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1615
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1616	How we use it:
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1617
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1618	__m_kind never changes and indicates whether or not it is recursive.
				1619
				1620	__m_count indicates the lock count; if 0, the mutex is not owned by
				1621	anybody.
				1622
				1623	__m_owner has a ThreadId value stuffed into it. We carefully arrange
				1624	that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
				1625	statically initialised mutexes correctly appear
				1626	to belong to nobody.
				1627
				1628	In summary, a not-in-use mutex is distinguised by having __m_owner
				1629	== 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
				1630	conditions holds, the other should too.
				1631
				1632	There is no linked list of threads waiting for this mutex. Instead
				1633	a thread in WaitMX state points at the mutex with its waited_on_mx
				1634	field. This makes _unlock() inefficient, but simple to implement the
				1635	right semantics viz-a-viz signals.
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1636
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1637	We don't have to deal with mutex initialisation; the client side
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1638	deals with that for us.
				1639	*/
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1640
				1641
				1642	static
				1643	void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
				1644	{
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1645	Char msg_buf[100];
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1646
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1647	if (VG_(clo_trace_pthread_level) >= 2) {
				1648	VG_(sprintf)(msg_buf, "pthread_mutex_lock %p", mutex );
				1649	print_pthread_event(tid, msg_buf);
				1650	}
				1651
				1652	/* Paranoia ... */
				1653	vg_assert(is_valid_tid(tid)
				1654	&& vg_threads[tid].status == VgTs_Runnable);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1655
				1656	/* POSIX doesn't mandate this, but for sanity ... */
				1657	if (mutex == NULL) {
				1658	vg_threads[tid].m_edx = EINVAL;
				1659	return;
				1660	}
				1661
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1662	/* More paranoia ... */
				1663	switch (mutex->__m_kind) {
				1664	case PTHREAD_MUTEX_TIMED_NP:
				1665	case PTHREAD_MUTEX_RECURSIVE_NP:
				1666	case PTHREAD_MUTEX_ERRORCHECK_NP:
				1667	case PTHREAD_MUTEX_ADAPTIVE_NP:
				1668	if (mutex->__m_count >= 0) break;
				1669	/* else fall thru */
				1670	default:
				1671	vg_threads[tid].m_edx = EINVAL;
				1672	return;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1673	}
				1674
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1675	if (mutex->__m_count > 0) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1676
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1677	vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1678
				1679	/* Someone has it already. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1680	if ((ThreadId)mutex->__m_owner == tid) {
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1681	/* It's locked -- by me! */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1682	if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1683	/* return 0 (success). */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1684	mutex->__m_count++;
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1685	vg_threads[tid].m_edx = 0;
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1686	VG_(printf)("!!!!!! tid %d, mutex %p -> locked %d\n",
				1687	tid, mutex, mutex->__m_count);
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1688	return;
				1689	} else {
				1690	vg_threads[tid].m_edx = EDEADLK;
				1691	return;
				1692	}
				1693	} else {
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1694	/* Someone else has it; we have to wait. Mark ourselves
				1695	thusly. */
				1696	vg_threads[tid].status = VgTs_WaitMX;
				1697	vg_threads[tid].waited_on_mx = mutex;
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1698	/* No assignment to %EDX, since we're blocking. */
				1699	if (VG_(clo_trace_pthread_level) >= 1) {
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1700	VG_(sprintf)(msg_buf, "pthread_mutex_lock %p: BLOCK",
				1701	mutex );
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1702	print_pthread_event(tid, msg_buf);
				1703	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1704	return;
				1705	}
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1706
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1707	} else {
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1708	/* Nobody owns it. Sanity check ... */
				1709	vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1710	/* We get it! [for the first time]. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1711	mutex->__m_count = 1;
				1712	mutex->__m_owner = (_pthread_descr)tid;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1713	vg_assert(vg_threads[tid].waited_on_mx == NULL);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1714	/* return 0 (success). */
				1715	vg_threads[tid].m_edx = 0;
				1716	}
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1717
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1718	}
				1719
				1720
				1721	static
				1722	void do_pthread_mutex_unlock ( ThreadId tid,
				1723	pthread_mutex_t *mutex )
				1724	{
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1725	Int i;
				1726	Char msg_buf[100];
				1727
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	1728	if (VG_(clo_trace_pthread_level) >= 2) {
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1729	VG_(sprintf)(msg_buf, "pthread_mutex_unlock %p", mutex );
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1730	print_pthread_event(tid, msg_buf);
				1731	}
				1732
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1733	/* Paranoia ... */
				1734	vg_assert(is_valid_tid(tid)
				1735	&& vg_threads[tid].status == VgTs_Runnable);
				1736
				1737	if (mutex == NULL) {
				1738	vg_threads[tid].m_edx = EINVAL;
				1739	return;
				1740	}
				1741
				1742	/* More paranoia ... */
				1743	switch (mutex->__m_kind) {
				1744	case PTHREAD_MUTEX_TIMED_NP:
				1745	case PTHREAD_MUTEX_RECURSIVE_NP:
				1746	case PTHREAD_MUTEX_ERRORCHECK_NP:
				1747	case PTHREAD_MUTEX_ADAPTIVE_NP:
				1748	if (mutex->__m_count >= 0) break;
				1749	/* else fall thru */
				1750	default:
				1751	vg_threads[tid].m_edx = EINVAL;
				1752	return;
				1753	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1754
				1755	/* Barf if we don't currently hold the mutex. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1756	if (mutex->__m_count == 0 /* nobody holds it */
				1757	\|\| (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1758	vg_threads[tid].m_edx = EPERM;
				1759	return;
				1760	}
				1761
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1762	/* If it's a multiply-locked recursive mutex, just decrement the
				1763	lock count and return. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1764	if (mutex->__m_count > 1) {
				1765	vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
				1766	mutex->__m_count --;
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1767	vg_threads[tid].m_edx = 0; /* success */
				1768	return;
				1769	}
				1770
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1771	/* Now we're sure it is locked exactly once, and by the thread who
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1772	is now doing an unlock on it. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1773	vg_assert(mutex->__m_count == 1);
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1774	vg_assert((ThreadId)mutex->__m_owner == tid);
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1775
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1776	/* Find some arbitrary thread waiting on this mutex, and make it
				1777	runnable. If none are waiting, mark the mutex as not held. */
				1778	for (i = 1; i < VG_N_THREADS; i++) {
				1779	if (vg_threads[i].status == VgTs_Empty)
				1780	continue;
				1781	if (vg_threads[i].status == VgTs_WaitMX
				1782	&& vg_threads[i].waited_on_mx == mutex)
				1783	break;
				1784	}
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1785
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1786	vg_assert(i <= VG_N_THREADS);
				1787	if (i == VG_N_THREADS) {
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1788	/* Nobody else is waiting on it. */
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1789	mutex->__m_count = 0;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1790	mutex->__m_owner = VG_INVALID_THREADID;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1791	} else {
				1792	/* Notionally transfer the hold to thread i, whose
				1793	pthread_mutex_lock() call now returns with 0 (success). */
sewardj	f8f819e	2002-04-17 23:21:37 +0000	[diff] [blame]	1794	/* The .count is already == 1. */
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1795	vg_assert(vg_threads[i].waited_on_mx == mutex);
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1796	mutex->__m_owner = (_pthread_descr)i;
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1797	vg_threads[i].status = VgTs_Runnable;
				1798	vg_threads[i].waited_on_mx = NULL;
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1799	vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj	8937c81	2002-04-12 20:12:20 +0000	[diff] [blame]	1800
sewardj	45b4b37	2002-04-16 22:50:32 +0000	[diff] [blame]	1801	if (VG_(clo_trace_pthread_level) >= 1) {
sewardj	604ec3c	2002-04-18 22:38:41 +0000	[diff] [blame]	1802	VG_(sprintf)(msg_buf, "pthread_mutex_lock %p: RESUME",
				1803	mutex );
				1804	print_pthread_event(i, msg_buf);
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1805	}
				1806	}
				1807
				1808	/* In either case, our (tid's) pth_unlock() returns with 0
				1809	(success). */
				1810	vg_threads[tid].m_edx = 0; /* Success. */
				1811	}
				1812
				1813
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1814	/* -----------------------------------------------------------
				1815	CONDITION VARIABLES
				1816	-------------------------------------------------------- */
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1817
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1818	/* The relevant native types are as follows:
				1819	(copied from /usr/include/bits/pthreadtypes.h)
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1820
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1821	-- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
				1822	typedef struct
				1823	{
				1824	struct _pthread_fastlock __c_lock; -- Protect against concurrent access
				1825	_pthread_descr __c_waiting; -- Threads waiting on this condition
				1826	} pthread_cond_t;
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1827
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1828	-- Attribute for conditionally variables.
				1829	typedef struct
				1830	{
				1831	int __dummy;
				1832	} pthread_condattr_t;
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1833
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1834	#define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1835
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1836	We'll just use the __c_waiting field to point to the head of the
				1837	list of threads waiting on this condition. Note how the static
				1838	initialiser has __c_waiting == 0 == VG_INVALID_THREADID.
				1839
				1840	Linux pthreads supports no attributes on condition variables, so we
				1841	don't need to think too hard there.
				1842	*/
				1843
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1844
				1845
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1846	/* ---------------------------------------------------------------------
				1847	Handle non-trivial client requests.
				1848	------------------------------------------------------------------ */
				1849
				1850	static
				1851	void do_nontrivial_clientreq ( ThreadId tid )
				1852	{
				1853	UInt* arg = (UInt*)(vg_threads[tid].m_eax);
				1854	UInt req_no = arg[0];
				1855	switch (req_no) {
				1856
				1857	case VG_USERREQ__PTHREAD_CREATE:
				1858	do_pthread_create( tid,
				1859	(pthread_t*)arg[1],
				1860	(pthread_attr_t*)arg[2],
				1861	(void()(void*))arg[3],
				1862	(void*)arg[4] );
				1863	break;
				1864
sewardj	bc5b99f	2002-04-13 00:08:51 +0000	[diff] [blame]	1865	case VG_USERREQ__PTHREAD_RETURNS:
				1866	handle_pthread_return( tid, (void*)arg[1] );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1867	break;
				1868
				1869	case VG_USERREQ__PTHREAD_JOIN:
				1870	do_pthread_join( tid, arg[1], (void**)(arg[2]) );
				1871	break;
				1872
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1873	case VG_USERREQ__PTHREAD_MUTEX_LOCK:
				1874	do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
				1875	break;
				1876
				1877	case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
				1878	do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
				1879	break;
				1880
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1881	case VG_USERREQ__PTHREAD_CANCEL:
				1882	do_pthread_cancel( tid, (pthread_t)(arg[1]) );
				1883	break;
				1884
				1885	case VG_USERREQ__MAKE_NOACCESS:
				1886	case VG_USERREQ__MAKE_WRITABLE:
				1887	case VG_USERREQ__MAKE_READABLE:
				1888	case VG_USERREQ__DISCARD:
				1889	case VG_USERREQ__CHECK_WRITABLE:
				1890	case VG_USERREQ__CHECK_READABLE:
				1891	case VG_USERREQ__MAKE_NOACCESS_STACK:
				1892	case VG_USERREQ__RUNNING_ON_VALGRIND:
				1893	case VG_USERREQ__DO_LEAK_CHECK:
sewardj	8c82451	2002-04-14 04:16:48 +0000	[diff] [blame]	1894	vg_threads[tid].m_edx
				1895	= VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1896	break;
				1897
sewardj	77e466c	2002-04-14 02:29:29 +0000	[diff] [blame]	1898	case VG_USERREQ__SIGNAL_RETURNS:
				1899	handle_signal_return(tid);
				1900	break;
sewardj	54cacf0	2002-04-12 23:24:59 +0000	[diff] [blame]	1901
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1902	default:
				1903	VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
				1904	VG_(panic)("handle_private_client_pthread_request: "
				1905	"unknown request");
				1906	/NOTREACHED/
				1907	break;
				1908	}
				1909	}
				1910
				1911
sewardj	6072c36	2002-04-19 14:40:57 +0000	[diff] [blame^]	1912	/* ---------------------------------------------------------------------
				1913	Sanity checking.
				1914	------------------------------------------------------------------ */
				1915
				1916	/* Internal consistency checks on the sched/pthread structures. */
				1917	static
				1918	void scheduler_sanity ( void )
				1919	{
				1920	pthread_mutex_t* mutex;
				1921	Int i;
				1922	/* VG_(printf)("scheduler_sanity\n"); */
				1923	for (i = 1; i < VG_N_THREADS; i++) {
				1924	if (vg_threads[i].status == VgTs_WaitMX) {
				1925	mutex = vg_threads[i].waited_on_mx;
				1926	vg_assert(mutex != NULL);
				1927	vg_assert(mutex->__m_count > 0);
				1928	vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
				1929	} else {
				1930	vg_assert(vg_threads[i].waited_on_mx == NULL);
				1931	}
				1932	}
				1933	}
				1934
				1935
sewardj	e663cb9	2002-04-12 10:26:32 +0000	[diff] [blame]	1936	/--------------------------------------------------------------------/
				1937	/--- end vg_scheduler.c ---/
				1938	/--------------------------------------------------------------------/