blob: 330bbde3f3bd405adc4d021e21097628af8ed611 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
12 Julian_Seward@muraroa.demon.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file LICENSE.
30*/
31
32#include "vg_include.h"
33#include "vg_constants.h"
34
35#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
36 VG_USERREQ__DO_LEAK_CHECK */
37
sewardj77e466c2002-04-14 02:29:29 +000038/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000039
sewardj77e466c2002-04-14 02:29:29 +000040Note! This pthreads implementation is so poor as to not be
41suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000042
sewardj77e466c2002-04-14 02:29:29 +000043- Currently, when a signal is run, just the ThreadStatus.status fields
44 are saved in the signal frame, along with the CPU state. Question:
45 should I also save and restore:
46 ThreadStatus.joiner
47 ThreadStatus.waited_on_mid
48 ThreadStatus.awaken_at
49 ThreadStatus.retval
50 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000051
sewardj77e466c2002-04-14 02:29:29 +000052- Signals interrupting read/write and nanosleep: SA_RESTART settings.
53 Read/write correctly return with EINTR when SA_RESTART isn't
54 specified and they are interrupted by a signal. nanosleep just
55 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000056
sewardj75fe1892002-04-14 02:46:33 +000057- Read/write syscall starts: don't crap out when the initial
58 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000059
sewardj9a199dc2002-04-14 13:01:38 +000060- Get rid of restrictions re use of sigaltstack; they are no longer
61 needed.
62
sewardje462e202002-04-13 04:09:07 +000063*/
sewardje663cb92002-04-12 10:26:32 +000064
65
66/* ---------------------------------------------------------------------
67 Types and globals for the scheduler.
68 ------------------------------------------------------------------ */
69
70/* type ThreadId is defined in vg_include.h. */
71
72/* struct ThreadState is defined in vg_include.h. */
73
74/* Private globals. A statically allocated array of threads. */
75static ThreadState vg_threads[VG_N_THREADS];
76
77
78/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
79jmp_buf VG_(scheduler_jmpbuf);
80/* ... and if so, here's the signal which caused it to do so. */
81Int VG_(longjmpd_on_signal);
82
83
84/* Machinery to keep track of which threads are waiting on which
85 fds. */
86typedef
87 struct {
88 /* The thread which made the request. */
89 ThreadId tid;
90
91 /* The next two fields describe the request. */
92 /* File descriptor waited for. -1 means this slot is not in use */
93 Int fd;
94 /* The syscall number the fd is used in. */
95 Int syscall_no;
96
97 /* False => still waiting for select to tell us the fd is ready
98 to go. True => the fd is ready, but the results have not yet
99 been delivered back to the calling thread. Once the latter
100 happens, this entire record is marked as no longer in use, by
101 making the fd field be -1. */
102 Bool ready;
103 }
104 VgWaitedOnFd;
105
106static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
107
108
109
110typedef
111 struct {
112 /* Is this slot in use, or free? */
113 Bool in_use;
114 /* If in_use, is this mutex held by some thread, or not? */
115 Bool held;
116 /* if held==True, owner indicates who by. */
117 ThreadId owner;
118 }
119 VgMutex;
120
121static VgMutex vg_mutexes[VG_N_MUTEXES];
122
123/* Forwards */
124static void do_nontrivial_clientreq ( ThreadId tid );
125
126
127/* ---------------------------------------------------------------------
128 Helper functions for the scheduler.
129 ------------------------------------------------------------------ */
130
131static
132void pp_sched_status ( void )
133{
134 Int i;
135 VG_(printf)("\nsched status:\n");
136 for (i = 0; i < VG_N_THREADS; i++) {
137 if (vg_threads[i].status == VgTs_Empty) continue;
138 VG_(printf)("tid %d: ", i);
139 switch (vg_threads[i].status) {
140 case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
141 case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
142 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
143 vg_threads[i].joiner); break;
144 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj75fe1892002-04-14 02:46:33 +0000145 case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardje663cb92002-04-12 10:26:32 +0000146 default: VG_(printf)("???"); break;
147 }
148 }
149 VG_(printf)("\n");
150}
151
152static
153void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
154{
155 Int i;
156
157 vg_assert(fd != -1); /* avoid total chaos */
158
159 for (i = 0; i < VG_N_WAITING_FDS; i++)
160 if (vg_waiting_fds[i].fd == -1)
161 break;
162
163 if (i == VG_N_WAITING_FDS)
164 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
165 /*
166 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
167 tid, fd, i);
168 */
169 vg_waiting_fds[i].fd = fd;
170 vg_waiting_fds[i].tid = tid;
171 vg_waiting_fds[i].ready = False;
172 vg_waiting_fds[i].syscall_no = syscall_no;
173}
174
175
176
177static
178void print_sched_event ( ThreadId tid, Char* what )
179{
sewardj8937c812002-04-12 20:12:20 +0000180 VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
181}
182
183
184static
185void print_pthread_event ( ThreadId tid, Char* what )
186{
187 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000188}
189
190
191static
192Char* name_of_sched_event ( UInt event )
193{
194 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000195 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
196 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
197 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
198 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
199 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
200 default: return "??UNKNOWN??";
201 }
202}
203
204
205/* Create a translation of the client basic block beginning at
206 orig_addr, and add it to the translation cache & translation table.
207 This probably doesn't really belong here, but, hey ...
208*/
209void VG_(create_translation_for) ( Addr orig_addr )
210{
211 Addr trans_addr;
212 TTEntry tte;
213 Int orig_size, trans_size;
214 /* Ensure there is space to hold a translation. */
215 VG_(maybe_do_lru_pass)();
216 VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
217 /* Copy data at trans_addr into the translation cache.
218 Returned pointer is to the code, not to the 4-byte
219 header. */
220 /* Since the .orig_size and .trans_size fields are
221 UShort, be paranoid. */
222 vg_assert(orig_size > 0 && orig_size < 65536);
223 vg_assert(trans_size > 0 && trans_size < 65536);
224 tte.orig_size = orig_size;
225 tte.orig_addr = orig_addr;
226 tte.trans_size = trans_size;
227 tte.trans_addr = VG_(copy_to_transcache)
228 ( trans_addr, trans_size );
229 tte.mru_epoch = VG_(current_epoch);
230 /* Free the intermediary -- was allocated by VG_(emit_code). */
231 VG_(jitfree)( (void*)trans_addr );
232 /* Add to trans tab and set back pointer. */
233 VG_(add_to_trans_tab) ( &tte );
234 /* Update stats. */
235 VG_(this_epoch_in_count) ++;
236 VG_(this_epoch_in_osize) += orig_size;
237 VG_(this_epoch_in_tsize) += trans_size;
238 VG_(overall_in_count) ++;
239 VG_(overall_in_osize) += orig_size;
240 VG_(overall_in_tsize) += trans_size;
241 /* Record translated area for SMC detection. */
242 VG_(smc_mark_original) ( orig_addr, orig_size );
243}
244
245
246/* Allocate a completely empty ThreadState record. */
247static
248ThreadId vg_alloc_ThreadState ( void )
249{
250 Int i;
251 for (i = 0; i < VG_N_THREADS; i++) {
252 if (vg_threads[i].status == VgTs_Empty)
253 return i;
254 }
255 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
256 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
257 VG_(panic)("VG_N_THREADS is too low");
258 /*NOTREACHED*/
259}
260
261
262ThreadState* VG_(get_thread_state) ( ThreadId tid )
263{
264 vg_assert(tid >= 0 && tid < VG_N_THREADS);
265 vg_assert(vg_threads[tid].status != VgTs_Empty);
266 return & vg_threads[tid];
267}
268
269
270/* Find an unused VgMutex record. */
271static
272MutexId vg_alloc_VgMutex ( void )
273{
274 Int i;
275 for (i = 0; i < VG_N_MUTEXES; i++) {
276 if (!vg_mutexes[i].in_use)
277 return i;
278 }
279 VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
280 VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
281 VG_(panic)("VG_N_MUTEXES is too low");
282 /*NOTREACHED*/
283}
284
285
286/* Copy the saved state of a thread into VG_(baseBlock), ready for it
287 to be run. */
288__inline__
289void VG_(load_thread_state) ( ThreadId tid )
290{
291 Int i;
292 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
293 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
294 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
295 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
296 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
297 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
298 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
299 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
300 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
301 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
302
303 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
304 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
305
306 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
307 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
308 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
309 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
310 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
311 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
312 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
313 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
314 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
315}
316
317
318/* Copy the state of a thread from VG_(baseBlock), presumably after it
319 has been descheduled. For sanity-check purposes, fill the vacated
320 VG_(baseBlock) with garbage so as to make the system more likely to
321 fail quickly if we erroneously continue to poke around inside
322 VG_(baseBlock) without first doing a load_thread_state().
323*/
324__inline__
325void VG_(save_thread_state) ( ThreadId tid )
326{
327 Int i;
328 const UInt junk = 0xDEADBEEF;
329
330 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
331 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
332 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
333 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
334 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
335 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
336 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
337 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
338 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
339 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
340
341 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
342 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
343
344 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
345 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
346 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
347 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
348 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
349 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
350 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
351 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
352 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
353
354 /* Fill it up with junk. */
355 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
356 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
357 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
358 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
359 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
360 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
361 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
362 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
363 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
364 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
365
366 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
367 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
368}
369
370
371/* Run the thread tid for a while, and return a VG_TRC_* value to the
372 scheduler indicating what happened. */
373static
374UInt run_thread_for_a_while ( ThreadId tid )
375{
376 UInt trc = 0;
377 vg_assert(tid >= 0 && tid < VG_N_THREADS);
378 vg_assert(vg_threads[tid].status != VgTs_Empty);
379 vg_assert(VG_(bbs_to_go) > 0);
380
381 VG_(load_thread_state) ( tid );
382 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
383 /* try this ... */
384 trc = VG_(run_innerloop)();
385 /* We get here if the client didn't take a fault. */
386 } else {
387 /* We get here if the client took a fault, which caused our
388 signal handler to longjmp. */
389 vg_assert(trc == 0);
390 trc = VG_TRC_UNRESUMABLE_SIGNAL;
391 }
392 VG_(save_thread_state) ( tid );
393 return trc;
394}
395
396
397/* Increment the LRU epoch counter. */
398static
399void increment_epoch ( void )
400{
401 VG_(current_epoch)++;
402 if (VG_(clo_verbosity) > 2) {
403 UInt tt_used, tc_used;
404 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
405 VG_(message)(Vg_UserMsg,
406 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
407 VG_(bbs_done),
408 VG_(this_epoch_in_count),
409 VG_(this_epoch_in_osize),
410 VG_(this_epoch_in_tsize),
411 VG_(this_epoch_out_count),
412 VG_(this_epoch_out_osize),
413 VG_(this_epoch_out_tsize),
414 tt_used, tc_used
415 );
416 }
417 VG_(this_epoch_in_count) = 0;
418 VG_(this_epoch_in_osize) = 0;
419 VG_(this_epoch_in_tsize) = 0;
420 VG_(this_epoch_out_count) = 0;
421 VG_(this_epoch_out_osize) = 0;
422 VG_(this_epoch_out_tsize) = 0;
423}
424
425
426/* Initialise the scheduler. Create a single "main" thread ready to
427 run, with special ThreadId of zero. This is called at startup; the
428 caller takes care to park the client's state is parked in
429 VG_(baseBlock).
430*/
431void VG_(scheduler_init) ( void )
432{
433 Int i;
434 Addr startup_esp;
435 ThreadId tid_main;
436
437 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
438 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000439 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
440 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000441 VG_(panic)("unexpected %esp at startup");
442 }
443
444 for (i = 0; i < VG_N_THREADS; i++) {
445 vg_threads[i].stack_size = 0;
446 vg_threads[i].stack_base = (Addr)NULL;
447 }
448
449 for (i = 0; i < VG_N_WAITING_FDS; i++)
450 vg_waiting_fds[i].fd = -1; /* not in use */
451
452 for (i = 0; i < VG_N_MUTEXES; i++)
453 vg_mutexes[i].in_use = False;
454
455 /* Assert this is thread zero, which has certain magic
456 properties. */
457 tid_main = vg_alloc_ThreadState();
458 vg_assert(tid_main == 0);
459
460 vg_threads[tid_main].status = VgTs_Runnable;
461 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
462 vg_threads[tid_main].retval = NULL; /* not important */
463
464 /* Copy VG_(baseBlock) state to tid_main's slot. */
465 VG_(save_thread_state) ( tid_main );
466}
467
468
469/* What if fd isn't a valid fd? */
470static
471void set_fd_nonblocking ( Int fd )
472{
473 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
474 vg_assert(!VG_(is_kerror)(res));
475 res |= VKI_O_NONBLOCK;
476 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
477 vg_assert(!VG_(is_kerror)(res));
478}
479
480static
481void set_fd_blocking ( Int fd )
482{
483 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
484 vg_assert(!VG_(is_kerror)(res));
485 res &= ~VKI_O_NONBLOCK;
486 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
487 vg_assert(!VG_(is_kerror)(res));
488}
489
490static
491Bool fd_is_blockful ( Int fd )
492{
493 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
494 vg_assert(!VG_(is_kerror)(res));
495 return (res & VKI_O_NONBLOCK) ? False : True;
496}
497
498
499
500/* Do a purely thread-local request for tid, and put the result in its
501 %EDX, without changing its scheduling state in any way, nor that of
502 any other threads. Return True if so.
503
504 If the request is non-trivial, return False; a more capable but
505 slower mechanism will deal with it.
506*/
507static
508Bool maybe_do_trivial_clientreq ( ThreadId tid )
509{
510# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000511 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000512 return True; \
513 }
514
sewardj8c824512002-04-14 04:16:48 +0000515 ThreadState* tst = &vg_threads[tid];
516 UInt* arg = (UInt*)(tst->m_eax);
517 UInt req_no = arg[0];
518
sewardje663cb92002-04-12 10:26:32 +0000519 switch (req_no) {
520 case VG_USERREQ__MALLOC:
521 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000522 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000523 );
524 case VG_USERREQ__BUILTIN_NEW:
525 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000526 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000527 );
528 case VG_USERREQ__BUILTIN_VEC_NEW:
529 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000530 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000531 );
532 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000533 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000534 SIMPLE_RETURN(0); /* irrelevant */
535 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000536 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000537 SIMPLE_RETURN(0); /* irrelevant */
538 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000539 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000540 SIMPLE_RETURN(0); /* irrelevant */
541 case VG_USERREQ__CALLOC:
542 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000543 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000544 );
545 case VG_USERREQ__REALLOC:
546 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000547 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000548 );
549 case VG_USERREQ__MEMALIGN:
550 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000551 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000552 );
553 default:
554 /* Too hard; wimp out. */
555 return False;
556 }
557# undef SIMPLE_RETURN
558}
559
560
561static
562void sched_do_syscall ( ThreadId tid )
563{
564 UInt saved_eax;
565 UInt res, syscall_no;
566 UInt fd;
567 Bool might_block, assumed_nonblocking;
568 Bool orig_fd_blockness;
569 Char msg_buf[100];
570
571 vg_assert(tid >= 0 && tid < VG_N_THREADS);
572 vg_assert(vg_threads[tid].status == VgTs_Runnable);
573
574 syscall_no = vg_threads[tid].m_eax; /* syscall number */
575
576 if (syscall_no == __NR_nanosleep) {
577 ULong t_now, t_awaken;
578 struct vki_timespec* req;
579 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
580 t_now = VG_(read_microsecond_timer)();
581 t_awaken
582 = t_now
583 + (ULong)1000000ULL * (ULong)(req->tv_sec)
584 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
585 vg_threads[tid].status = VgTs_Sleeping;
586 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000587 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000588 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
589 t_now, t_awaken-t_now);
590 print_sched_event(tid, msg_buf);
591 }
592 /* Force the scheduler to run something else for a while. */
593 return;
594 }
595
596 switch (syscall_no) {
597 case __NR_read:
598 case __NR_write:
599 assumed_nonblocking
600 = False;
601 might_block
602 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
603 break;
604 default:
605 might_block = False;
606 assumed_nonblocking = True;
607 }
608
609 if (assumed_nonblocking) {
610 /* We think it's non-blocking. Just do it in the normal way. */
611 VG_(perform_assumed_nonblocking_syscall)(tid);
612 /* The thread is still runnable. */
613 return;
614 }
615
616 /* It might block. Take evasive action. */
617 switch (syscall_no) {
618 case __NR_read:
619 case __NR_write:
620 fd = vg_threads[tid].m_ebx; break;
621 default:
622 vg_assert(3+3 == 7);
623 }
624
625 /* Set the fd to nonblocking, and do the syscall, which will return
626 immediately, in order to lodge a request with the Linux kernel.
627 We later poll for I/O completion using select(). */
628
629 orig_fd_blockness = fd_is_blockful(fd);
630 set_fd_nonblocking(fd);
631 vg_assert(!fd_is_blockful(fd));
632 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
633
634 /* This trashes the thread's %eax; we have to preserve it. */
635 saved_eax = vg_threads[tid].m_eax;
636 KERNEL_DO_SYSCALL(tid,res);
637
638 /* Restore original blockfulness of the fd. */
639 if (orig_fd_blockness)
640 set_fd_blocking(fd);
641 else
642 set_fd_nonblocking(fd);
643
644 if (res != -VKI_EWOULDBLOCK) {
645 /* It didn't block; it went through immediately. So finish off
646 in the normal way. Don't restore %EAX, since that now
647 (correctly) holds the result of the call. */
648 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
649 /* We're still runnable. */
650 vg_assert(vg_threads[tid].status == VgTs_Runnable);
651
652 } else {
653
654 /* It would have blocked. First, restore %EAX to what it was
655 before our speculative call. */
656 vg_threads[tid].m_eax = saved_eax;
657 /* Put this fd in a table of fds on which we are waiting for
658 completion. The arguments for select() later are constructed
659 from this table. */
660 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
661 /* Deschedule thread until an I/O completion happens. */
662 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000663 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000664 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
665 print_sched_event(tid, msg_buf);
666 }
667
668 }
669}
670
671
672/* Find out which of the fds in vg_waiting_fds are now ready to go, by
673 making enquiries with select(), and mark them as ready. We have to
674 wait for the requesting threads to fall into the the WaitFD state
675 before we can actually finally deliver the results, so this
676 procedure doesn't do that; complete_blocked_syscalls() does it.
677
678 It might seem odd that a thread which has done a blocking syscall
679 is not in WaitFD state; the way this can happen is if it initially
680 becomes WaitFD, but then a signal is delivered to it, so it becomes
681 Runnable for a while. In this case we have to wait for the
682 sighandler to return, whereupon the WaitFD state is resumed, and
683 only at that point can the I/O result be delivered to it. However,
684 this point may be long after the fd is actually ready.
685
686 So, poll_for_ready_fds() merely detects fds which are ready.
687 complete_blocked_syscalls() does the second half of the trick,
688 possibly much later: it delivers the results from ready fds to
689 threads in WaitFD state.
690*/
sewardj9a199dc2002-04-14 13:01:38 +0000691static
sewardje663cb92002-04-12 10:26:32 +0000692void poll_for_ready_fds ( void )
693{
694 vki_ksigset_t saved_procmask;
695 vki_fd_set readfds;
696 vki_fd_set writefds;
697 vki_fd_set exceptfds;
698 struct vki_timeval timeout;
699 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
700 ThreadId tid;
701 Bool rd_ok, wr_ok, ex_ok;
702 Char msg_buf[100];
703
sewardje462e202002-04-13 04:09:07 +0000704 struct vki_timespec* rem;
705 ULong t_now;
706
sewardje663cb92002-04-12 10:26:32 +0000707 /* Awaken any sleeping threads whose sleep has expired. */
sewardje462e202002-04-13 04:09:07 +0000708 t_now = VG_(read_microsecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000709 for (tid = 0; tid < VG_N_THREADS; tid++) {
710 if (vg_threads[tid].status != VgTs_Sleeping)
711 continue;
712 if (t_now >= vg_threads[tid].awaken_at) {
713 /* Resume this thread. Set to zero the remaining-time (second)
714 arg of nanosleep, since it's used up all its time. */
715 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
716 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
717 if (rem != NULL) {
718 rem->tv_sec = 0;
719 rem->tv_nsec = 0;
720 }
721 /* Make the syscall return 0 (success). */
722 vg_threads[tid].m_eax = 0;
723 /* Reschedule this thread. */
724 vg_threads[tid].status = VgTs_Runnable;
sewardj8937c812002-04-12 20:12:20 +0000725 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000726 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
727 t_now);
728 print_sched_event(tid, msg_buf);
729 }
730 }
731 }
sewardje663cb92002-04-12 10:26:32 +0000732
sewardje462e202002-04-13 04:09:07 +0000733 /* And look for threads waiting on file descriptors which are now
734 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000735 timeout.tv_sec = 0;
736 timeout.tv_usec = 0;
737
738 VKI_FD_ZERO(&readfds);
739 VKI_FD_ZERO(&writefds);
740 VKI_FD_ZERO(&exceptfds);
741 fd_max = -1;
742 for (i = 0; i < VG_N_WAITING_FDS; i++) {
743 if (vg_waiting_fds[i].fd == -1 /* not in use */)
744 continue;
745 if (vg_waiting_fds[i].ready /* already ready? */)
746 continue;
747 fd = vg_waiting_fds[i].fd;
748 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000749 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000750 if (fd > fd_max)
751 fd_max = fd;
752 tid = vg_waiting_fds[i].tid;
753 vg_assert(tid >= 0 && tid < VG_N_THREADS);
754 syscall_no = vg_waiting_fds[i].syscall_no;
755 switch (syscall_no) {
756 case __NR_read:
757 VKI_FD_SET(fd, &readfds); break;
758 case __NR_write:
759 VKI_FD_SET(fd, &writefds); break;
760 default:
761 VG_(panic)("poll_for_ready_fds: unexpected syscall");
762 /*NOTREACHED*/
763 break;
764 }
765 }
766
sewardje462e202002-04-13 04:09:07 +0000767 /* Short cut: if no fds are waiting, give up now. */
768 if (fd_max == -1)
769 return;
770
sewardje663cb92002-04-12 10:26:32 +0000771 /* BLOCK ALL SIGNALS. We don't want the complication of select()
772 getting interrupted. */
773 VG_(block_all_host_signals)( &saved_procmask );
774
775 n_ready = VG_(select)
776 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
777 if (VG_(is_kerror)(n_ready)) {
778 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
779 VG_(panic)("poll_for_ready_fds: select failed?!");
780 /*NOTREACHED*/
781 }
782
783 /* UNBLOCK ALL SIGNALS */
784 VG_(restore_host_signals)( &saved_procmask );
785
786 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
787
788 if (n_ready == 0)
789 return;
790
791 /* Inspect all the fds we know about, and handle any completions that
792 have happened. */
793 /*
794 VG_(printf)("\n\n");
795 for (fd = 0; fd < 100; fd++)
796 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
797 VG_(printf)("X"); } else { VG_(printf)("."); };
798 VG_(printf)("\n\nfd_max = %d\n", fd_max);
799 */
800
801 for (fd = 0; fd <= fd_max; fd++) {
802 rd_ok = VKI_FD_ISSET(fd, &readfds);
803 wr_ok = VKI_FD_ISSET(fd, &writefds);
804 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
805
806 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
807 if (n_ok == 0)
808 continue;
809 if (n_ok > 1) {
810 VG_(printf)("offending fd = %d\n", fd);
811 VG_(panic)("poll_for_ready_fds: multiple events on fd");
812 }
813
814 /* An I/O event completed for fd. Find the thread which
815 requested this. */
816 for (i = 0; i < VG_N_WAITING_FDS; i++) {
817 if (vg_waiting_fds[i].fd == -1 /* not in use */)
818 continue;
819 if (vg_waiting_fds[i].fd == fd)
820 break;
821 }
822
823 /* And a bit more paranoia ... */
824 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
825
826 /* Mark the fd as ready. */
827 vg_assert(! vg_waiting_fds[i].ready);
828 vg_waiting_fds[i].ready = True;
829 }
830}
831
832
833/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +0000834static
sewardje663cb92002-04-12 10:26:32 +0000835void complete_blocked_syscalls ( void )
836{
837 Int fd, i, res, syscall_no;
838 ThreadId tid;
839 Char msg_buf[100];
840
841 /* Inspect all the outstanding fds we know about. */
842
843 for (i = 0; i < VG_N_WAITING_FDS; i++) {
844 if (vg_waiting_fds[i].fd == -1 /* not in use */)
845 continue;
846 if (! vg_waiting_fds[i].ready)
847 continue;
848
849 fd = vg_waiting_fds[i].fd;
850 tid = vg_waiting_fds[i].tid;
851 vg_assert(tid >= 0 && tid < VG_N_THREADS);
852
853 /* The thread actually has to be waiting for the I/O event it
854 requested before we can deliver the result! */
855 if (vg_threads[tid].status != VgTs_WaitFD)
856 continue;
857
858 /* Ok, actually do it! We can safely use %EAX as the syscall
859 number, because the speculative call made by
860 sched_do_syscall() doesn't change %EAX in the case where the
861 call would have blocked. */
862
863 syscall_no = vg_waiting_fds[i].syscall_no;
864 vg_assert(syscall_no == vg_threads[tid].m_eax);
865 KERNEL_DO_SYSCALL(tid,res);
866 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
867
868 /* Reschedule. */
869 vg_threads[tid].status = VgTs_Runnable;
870 /* Mark slot as no longer in use. */
871 vg_waiting_fds[i].fd = -1;
872 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +0000873 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000874 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
875 print_sched_event(tid, msg_buf);
876 }
877 }
878}
879
880
881static
882void nanosleep_for_a_while ( void )
883{
884 Int res;
885 struct vki_timespec req;
886 struct vki_timespec rem;
887 req.tv_sec = 0;
888 req.tv_nsec = 20 * 1000 * 1000;
889 res = VG_(nanosleep)( &req, &rem );
890 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
891 vg_assert(res == 0);
892}
893
894
895/* ---------------------------------------------------------------------
896 The scheduler proper.
897 ------------------------------------------------------------------ */
898
899/* Run user-space threads until either
900 * Deadlock occurs
901 * One thread asks to shutdown Valgrind
902 * The specified number of basic blocks has gone by.
903*/
904VgSchedReturnCode VG_(scheduler) ( void )
905{
906 ThreadId tid, tid_next;
907 UInt trc;
908 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +0000909 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +0000910 Char msg_buf[100];
911 Addr trans_addr;
912
913 /* For the LRU structures, records when the epoch began. */
914 ULong lru_epoch_started_at = 0;
915
916 /* Start with the root thread. tid in general indicates the
917 currently runnable/just-finished-running thread. */
918 tid = 0;
919
920 /* This is the top level scheduler loop. It falls into three
921 phases. */
922 while (True) {
923
924 /* ======================= Phase 1 of 3 =======================
925 Handle I/O completions and signals. This may change the
926 status of various threads. Then select a new thread to run,
927 or declare deadlock, or sleep if there are no runnable
928 threads but some are blocked on I/O. */
929
930 /* Age the LRU structures if an epoch has been completed. */
931 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
932 lru_epoch_started_at = VG_(bbs_done);
933 increment_epoch();
934 }
935
936 /* Was a debug-stop requested? */
937 if (VG_(bbs_to_go) == 0)
938 goto debug_stop;
939
940 /* Do the following loop until a runnable thread is found, or
941 deadlock is detected. */
942 while (True) {
943
944 /* For stats purposes only. */
945 VG_(num_scheduling_events_MAJOR) ++;
946
947 /* See if any I/O operations which we were waiting for have
948 completed, and, if so, make runnable the relevant waiting
949 threads. */
950 poll_for_ready_fds();
951 complete_blocked_syscalls();
952
953 /* See if there are any signals which need to be delivered. If
954 so, choose thread(s) to deliver them to, and build signal
955 delivery frames on those thread(s) stacks. */
956 VG_(deliver_signals)( 0 /*HACK*/ );
957 VG_(do_sanity_checks)(0 /*HACK*/, False);
958
959 /* Try and find a thread (tid) to run. */
960 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +0000961 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +0000962 while (True) {
963 tid_next++;
964 if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj54cacf02002-04-12 23:24:59 +0000965 if (vg_threads[tid_next].status == VgTs_WaitFD
966 || vg_threads[tid_next].status == VgTs_Sleeping)
967 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +0000968 if (vg_threads[tid_next].status == VgTs_Runnable)
969 break; /* We can run this one. */
970 if (tid_next == tid)
971 break; /* been all the way round */
972 }
973 tid = tid_next;
974
975 if (vg_threads[tid].status == VgTs_Runnable) {
976 /* Found a suitable candidate. Fall out of this loop, so
977 we can advance to stage 2 of the scheduler: actually
978 running the thread. */
979 break;
980 }
981
982 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +0000983 if (n_in_fdwait_or_sleep == 0) {
984 /* No runnable threads and no prospect of any appearing
985 even if we wait for an arbitrary length of time. In
986 short, we have a deadlock. */
sewardje663cb92002-04-12 10:26:32 +0000987 pp_sched_status();
988 return VgSrc_Deadlock;
989 }
990
991 /* At least one thread is in a fd-wait state. Delay for a
992 while, and go round again, in the hope that eventually a
993 thread becomes runnable. */
994 nanosleep_for_a_while();
995 // pp_sched_status();
996 // VG_(printf)(".\n");
997 }
998
999
1000 /* ======================= Phase 2 of 3 =======================
1001 Wahey! We've finally decided that thread tid is runnable, so
1002 we now do that. Run it for as much of a quanta as possible.
1003 Trivial requests are handled and the thread continues. The
1004 aim is not to do too many of Phase 1 since it is expensive. */
1005
1006 if (0)
1007 VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
1008
1009 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1010 that it decrements the counter before testing it for zero, so
1011 that if VG_(dispatch_ctr) is set to N you get at most N-1
1012 iterations. Also this means that VG_(dispatch_ctr) must
1013 exceed zero before entering the innerloop. Also also, the
1014 decrement is done before the bb is actually run, so you
1015 always get at least one decrement even if nothing happens.
1016 */
1017 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1018 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1019 else
1020 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1021
1022 /* ... and remember what we asked for. */
1023 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1024
1025 /* Actually run thread tid. */
1026 while (True) {
1027
1028 /* For stats purposes only. */
1029 VG_(num_scheduling_events_MINOR) ++;
1030
1031 if (0)
1032 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1033 tid, VG_(dispatch_ctr) - 1 );
1034
1035 trc = run_thread_for_a_while ( tid );
1036
1037 /* Deal quickly with trivial scheduling events, and resume the
1038 thread. */
1039
1040 if (trc == VG_TRC_INNER_FASTMISS) {
1041 vg_assert(VG_(dispatch_ctr) > 0);
1042
1043 /* Trivial event. Miss in the fast-cache. Do a full
1044 lookup for it. */
1045 trans_addr
1046 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1047 if (trans_addr == (Addr)0) {
1048 /* Not found; we need to request a translation. */
1049 VG_(create_translation_for)( vg_threads[tid].m_eip );
1050 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1051 if (trans_addr == (Addr)0)
1052 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1053 }
1054 continue; /* with this thread */
1055 }
1056
1057 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1058 Bool is_triv = maybe_do_trivial_clientreq(tid);
1059 if (is_triv) {
1060 /* NOTE: a trivial request is something like a call to
1061 malloc() or free(). It DOES NOT change the
1062 Runnability of this thread nor the status of any
1063 other thread; it is purely thread-local. */
1064 continue; /* with this thread */
1065 }
1066 }
1067
1068 /* It's a non-trivial event. Give up running this thread and
1069 handle things the expensive way. */
1070 break;
1071 }
1072
1073 /* ======================= Phase 3 of 3 =======================
1074 Handle non-trivial thread requests, mostly pthread stuff. */
1075
1076 /* Ok, we've fallen out of the dispatcher for a
1077 non-completely-trivial reason. First, update basic-block
1078 counters. */
1079
1080 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1081 vg_assert(done_this_time >= 0);
1082 VG_(bbs_to_go) -= (ULong)done_this_time;
1083 VG_(bbs_done) += (ULong)done_this_time;
1084
1085 if (0 && trc != VG_TRC_INNER_FASTMISS)
1086 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1087 tid, done_this_time, (Int)trc );
1088
1089 if (0 && trc != VG_TRC_INNER_FASTMISS)
1090 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1091 tid, VG_(bbs_done),
1092 name_of_sched_event(trc) );
1093
1094 /* Examine the thread's return code to figure out why it
1095 stopped, and handle requests. */
1096
1097 switch (trc) {
1098
1099 case VG_TRC_INNER_FASTMISS:
1100 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1101 /*NOTREACHED*/
1102 break;
1103
1104 case VG_TRC_INNER_COUNTERZERO:
1105 /* Timeslice is out. Let a new thread be scheduled,
1106 simply by doing nothing, causing us to arrive back at
1107 Phase 1. */
1108 if (VG_(bbs_to_go) == 0) {
1109 goto debug_stop;
1110 }
1111 vg_assert(VG_(dispatch_ctr) == 0);
1112 break;
1113
1114 case VG_TRC_UNRESUMABLE_SIGNAL:
1115 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1116 away. Again, do nothing, so we wind up back at Phase
1117 1, whereupon the signal will be "delivered". */
1118 break;
1119
sewardje663cb92002-04-12 10:26:32 +00001120 case VG_TRC_EBP_JMP_SYSCALL:
1121 /* Do a syscall for the vthread tid. This could cause it
1122 to become non-runnable. */
1123 sched_do_syscall(tid);
1124 break;
1125
1126 case VG_TRC_EBP_JMP_CLIENTREQ:
1127 /* Do a client request for the vthread tid. Note that
1128 some requests will have been handled by
1129 maybe_do_trivial_clientreq(), so we don't expect to see
1130 those here.
1131 */
sewardj54cacf02002-04-12 23:24:59 +00001132 /* The thread's %EAX points at an arg block, the first
1133 word of which is the request code. */
1134 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001135 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001136 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001137 print_sched_event(tid, msg_buf);
1138 }
1139 /* Do a non-trivial client request for thread tid. tid's
1140 %EAX points to a short vector of argument words, the
1141 first of which is the request code. The result of the
1142 request is put in tid's %EDX. Alternatively, perhaps
1143 the request causes tid to become non-runnable and/or
1144 other blocked threads become runnable. In general we
1145 can and often do mess with the state of arbitrary
1146 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001147 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1148 return VgSrc_Shutdown;
1149 } else {
1150 do_nontrivial_clientreq(tid);
1151 }
sewardje663cb92002-04-12 10:26:32 +00001152 break;
1153
1154 default:
1155 VG_(printf)("\ntrc = %d\n", trc);
1156 VG_(panic)("VG_(scheduler), phase 3: "
1157 "unexpected thread return code");
1158 /* NOTREACHED */
1159 break;
1160
1161 } /* switch (trc) */
1162
1163 /* That completes Phase 3 of 3. Return now to the top of the
1164 main scheduler loop, to Phase 1 of 3. */
1165
1166 } /* top-level scheduler loop */
1167
1168
1169 /* NOTREACHED */
1170 VG_(panic)("scheduler: post-main-loop ?!");
1171 /* NOTREACHED */
1172
1173 debug_stop:
1174 /* If we exited because of a debug stop, print the translation
1175 of the last block executed -- by translating it again, and
1176 throwing away the result. */
1177 VG_(printf)(
1178 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
1179 VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
1180 VG_(printf)("\n");
1181 VG_(printf)(
1182 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1183
1184 return VgSrc_BbsDone;
1185}
1186
1187
1188/* ---------------------------------------------------------------------
1189 The pthread implementation.
1190 ------------------------------------------------------------------ */
1191
1192#include <pthread.h>
1193#include <errno.h>
1194
1195#if !defined(PTHREAD_STACK_MIN)
1196# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1197#endif
1198
1199/* /usr/include/bits/pthreadtypes.h:
1200 typedef unsigned long int pthread_t;
1201*/
1202
sewardje663cb92002-04-12 10:26:32 +00001203
1204static
1205void do_pthread_cancel ( ThreadId tid_canceller,
1206 pthread_t tid_cancellee )
1207{
1208 Char msg_buf[100];
1209 /* We want make is appear that this thread has returned to
1210 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1211 return value. So: simple: put PTHREAD_CANCELED into %EAX
1212 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001213 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001214 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1215 print_sched_event(tid_cancellee, msg_buf);
1216 }
1217 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001218 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001219 vg_threads[tid_cancellee].status = VgTs_Runnable;
1220}
1221
1222
1223
1224/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001225 created with. Or possibly due to pthread_exit or cancellation.
1226 The main complication here is to resume any thread waiting to join
1227 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001228static
sewardjbc5b99f2002-04-13 00:08:51 +00001229void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001230{
1231 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1232 UInt* jnr_args;
1233 void** jnr_thread_return;
1234 Char msg_buf[100];
1235
1236 /* Mark it as not in use. Leave the stack in place so the next
1237 user of this slot doesn't reallocate it. */
1238 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1239 vg_assert(vg_threads[tid].status != VgTs_Empty);
1240
sewardjbc5b99f2002-04-13 00:08:51 +00001241 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001242
1243 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1244 /* No one has yet done a join on me */
1245 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001246 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001247 VG_(sprintf)(msg_buf,
1248 "root fn returns, waiting for a call pthread_join(%d)",
1249 tid);
1250 print_sched_event(tid, msg_buf);
1251 }
1252 } else {
1253 /* Some is waiting; make their join call return with success,
1254 putting my exit code in the place specified by the caller's
1255 thread_return param. This is all very horrible, since we
1256 need to consult the joiner's arg block -- pointed to by its
1257 %EAX -- in order to extract the 2nd param of its pthread_join
1258 call. TODO: free properly the slot (also below).
1259 */
1260 jnr = vg_threads[tid].joiner;
1261 vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
1262 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1263 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1264 jnr_thread_return = (void**)(jnr_args[2]);
1265 if (jnr_thread_return != NULL)
1266 *jnr_thread_return = vg_threads[tid].retval;
1267 vg_threads[jnr].m_edx = 0; /* success */
1268 vg_threads[jnr].status = VgTs_Runnable;
1269 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001270 if (VG_(clo_instrument) && tid != 0)
1271 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1272 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001273 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001274 VG_(sprintf)(msg_buf,
1275 "root fn returns, to find a waiting pthread_join(%d)", tid);
1276 print_sched_event(tid, msg_buf);
1277 VG_(sprintf)(msg_buf,
1278 "my pthread_join(%d) returned; resuming", tid);
1279 print_sched_event(jnr, msg_buf);
1280 }
1281 }
1282
1283 /* Return value is irrelevant; this thread will not get
1284 rescheduled. */
1285}
1286
1287
1288static
1289void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1290{
1291 Char msg_buf[100];
1292
1293 /* jee, the joinee, is the thread specified as an arg in thread
1294 tid's call to pthread_join. So tid is the join-er. */
1295 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1296 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1297
1298 if (jee == tid) {
1299 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1300 vg_threads[tid].status = VgTs_Runnable;
1301 return;
1302 }
1303
1304 if (jee < 0
1305 || jee >= VG_N_THREADS
1306 || vg_threads[jee].status == VgTs_Empty) {
1307 /* Invalid thread to join to. */
1308 vg_threads[tid].m_edx = EINVAL;
1309 vg_threads[tid].status = VgTs_Runnable;
1310 return;
1311 }
1312
1313 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1314 /* Someone already did join on this thread */
1315 vg_threads[tid].m_edx = EINVAL;
1316 vg_threads[tid].status = VgTs_Runnable;
1317 return;
1318 }
1319
1320 /* if (vg_threads[jee].detached) ... */
1321
1322 /* Perhaps the joinee has already finished? If so return
1323 immediately with its return code, and free up the slot. TODO:
1324 free it properly (also above). */
1325 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1326 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1327 vg_threads[tid].m_edx = 0; /* success */
1328 if (thread_return != NULL)
1329 *thread_return = vg_threads[jee].retval;
1330 vg_threads[tid].status = VgTs_Runnable;
1331 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001332 if (VG_(clo_instrument) && jee != 0)
1333 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1334 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001335 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001336 VG_(sprintf)(msg_buf,
1337 "someone called pthread_join() on me; bye!");
1338 print_sched_event(jee, msg_buf);
1339 VG_(sprintf)(msg_buf,
1340 "my pthread_join(%d) returned immediately",
1341 jee );
1342 print_sched_event(tid, msg_buf);
1343 }
1344 return;
1345 }
1346
1347 /* Ok, so we'll have to wait on jee. */
1348 vg_threads[jee].joiner = tid;
1349 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001350 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001351 VG_(sprintf)(msg_buf,
1352 "blocking on call of pthread_join(%d)", jee );
1353 print_sched_event(tid, msg_buf);
1354 }
1355 /* So tid's join call does not return just now. */
1356}
1357
1358
1359static
1360void do_pthread_create ( ThreadId parent_tid,
1361 pthread_t* thread,
1362 pthread_attr_t* attr,
1363 void* (*start_routine)(void *),
1364 void* arg )
1365{
1366 Addr new_stack;
1367 UInt new_stk_szb;
1368 ThreadId tid;
1369 Char msg_buf[100];
1370
1371 /* Paranoia ... */
1372 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1373
1374 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1375
1376 tid = vg_alloc_ThreadState();
1377
1378 /* If we've created the main thread's tid, we're in deep trouble :) */
1379 vg_assert(tid != 0);
1380
1381 /* Copy the parent's CPU state into the child's, in a roundabout
1382 way (via baseBlock). */
1383 VG_(load_thread_state)(parent_tid);
1384 VG_(save_thread_state)(tid);
1385
1386 /* Consider allocating the child a stack, if the one it already has
1387 is inadequate. */
1388 new_stk_szb = PTHREAD_STACK_MIN;
1389
1390 if (new_stk_szb > vg_threads[tid].stack_size) {
1391 /* Again, for good measure :) We definitely don't want to be
1392 allocating a stack for the main thread. */
1393 vg_assert(tid != 0);
1394 /* for now, we don't handle the case of anything other than
1395 assigning it for the first time. */
1396 vg_assert(vg_threads[tid].stack_size == 0);
1397 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1398 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1399 vg_threads[tid].stack_base = new_stack;
1400 vg_threads[tid].stack_size = new_stk_szb;
1401 vg_threads[tid].m_esp
1402 = new_stack + new_stk_szb
1403 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1404 }
1405 if (VG_(clo_instrument))
1406 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1407 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1408
1409 /* push arg */
1410 vg_threads[tid].m_esp -= 4;
1411 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1412
1413 /* push (magical) return address */
1414 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001415 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001416
1417 if (VG_(clo_instrument))
1418 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1419
1420 /* this is where we start */
1421 vg_threads[tid].m_eip = (UInt)start_routine;
1422
sewardj8937c812002-04-12 20:12:20 +00001423 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001424 VG_(sprintf)(msg_buf,
1425 "new thread, created by %d", parent_tid );
1426 print_sched_event(tid, msg_buf);
1427 }
1428
1429 /* store the thread id in *thread. */
1430 // if (VG_(clo_instrument))
1431 // ***** CHECK *thread is writable
1432 *thread = (pthread_t)tid;
1433
1434 /* return zero */
1435 vg_threads[tid].joiner = VG_INVALID_THREADID;
1436 vg_threads[tid].status = VgTs_Runnable;
1437 vg_threads[tid].m_edx = 0; /* success */
1438}
1439
1440
1441/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
1442 is a struct with at least 5 words:
1443 typedef struct
1444 {
1445 int __m_reserved; -- Reserved for future use
1446 int __m_count; -- Depth of recursive locking
1447 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1448 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1449 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1450 } pthread_mutex_t;
1451 Ours is just a single word, an index into vg_mutexes[].
1452 For now I'll park it in the __m_reserved field.
1453
1454 Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
1455 a zero __m_count field (see /usr/include/pthread.h). So I'll
1456 use zero to mean non-inited, and 1 to mean inited.
1457
1458 How convenient.
1459*/
1460
1461static
sewardj8937c812002-04-12 20:12:20 +00001462void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardje663cb92002-04-12 10:26:32 +00001463{
sewardj8937c812002-04-12 20:12:20 +00001464 MutexId mid;
1465 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001466 /* vg_alloc_MutexId aborts if we can't allocate a mutex, for
1467 whatever reason. */
sewardje663cb92002-04-12 10:26:32 +00001468 mid = vg_alloc_VgMutex();
1469 vg_mutexes[mid].in_use = True;
1470 vg_mutexes[mid].held = False;
1471 vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
1472 mutex->__m_reserved = mid;
1473 mutex->__m_count = 1; /* initialised */
sewardj8937c812002-04-12 20:12:20 +00001474 if (VG_(clo_trace_pthread)) {
1475 VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
1476 mutex, mid );
1477 print_pthread_event(tid, msg_buf);
1478 }
sewardje663cb92002-04-12 10:26:32 +00001479}
1480
1481/* Allocate a new MutexId and write it into *mutex. Ideally take
1482 notice of the attributes in *mutexattr. */
1483static
1484void do_pthread_mutex_init ( ThreadId tid,
1485 pthread_mutex_t *mutex,
1486 const pthread_mutexattr_t *mutexattr)
1487{
sewardj8937c812002-04-12 20:12:20 +00001488 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001489 /* Paranoia ... */
sewardje663cb92002-04-12 10:26:32 +00001490 vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
1491
sewardj8937c812002-04-12 20:12:20 +00001492 initialise_mutex(tid, mutex);
1493
1494 if (VG_(clo_trace_pthread)) {
1495 VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
1496 mutex, mutex->__m_reserved );
1497 print_pthread_event(tid, msg_buf);
1498 }
1499
sewardje663cb92002-04-12 10:26:32 +00001500 /*
1501 RETURN VALUE
1502 pthread_mutex_init always returns 0. The other mutex functions
1503 return 0 on success and a non-zero error code on error.
1504 */
1505 /* THIS THREAD returns with 0. */
1506 vg_threads[tid].m_edx = 0;
1507}
1508
1509
1510static
1511void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
1512{
1513 MutexId mid;
1514 Char msg_buf[100];
1515
sewardje663cb92002-04-12 10:26:32 +00001516 /* *mutex contains the MutexId, or one of the magic values
1517 PTHREAD_*MUTEX_INITIALIZER*, indicating we need to initialise it
1518 now. See comment(s) above re use of __m_count to indicated
1519 initialisation status.
1520 */
1521
1522 /* POSIX doesn't mandate this, but for sanity ... */
1523 if (mutex == NULL) {
1524 vg_threads[tid].m_edx = EINVAL;
1525 return;
1526 }
1527
1528 if (mutex->__m_count == 0) {
sewardj8937c812002-04-12 20:12:20 +00001529 initialise_mutex(tid, mutex);
sewardje663cb92002-04-12 10:26:32 +00001530 }
1531
1532 mid = mutex->__m_reserved;
1533 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1534 vg_threads[tid].m_edx = EINVAL;
1535 return;
1536 }
1537
sewardj8937c812002-04-12 20:12:20 +00001538 if (VG_(clo_trace_pthread)) {
1539 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
1540 mid, mutex );
1541 print_pthread_event(tid, msg_buf);
1542 }
1543
sewardje663cb92002-04-12 10:26:32 +00001544 /* Assert initialised. */
1545 vg_assert(mutex->__m_count == 1);
1546
1547 /* Assume tid valid. */
1548 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1549
1550 if (vg_mutexes[mid].held) {
1551 if (vg_mutexes[mid].owner == tid) {
1552 vg_threads[tid].m_edx = EDEADLK;
1553 return;
1554 }
1555 /* Someone else has it; we have to wait. */
1556 vg_threads[tid].status = VgTs_WaitMX;
1557 vg_threads[tid].waited_on_mid = mid;
1558 /* No assignment to %EDX, since we're blocking. */
sewardj8937c812002-04-12 20:12:20 +00001559 if (VG_(clo_trace_pthread)) {
1560 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
1561 mid, mutex );
1562 print_pthread_event(tid, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001563 }
1564 } else {
1565 /* We get it! */
1566 vg_mutexes[mid].held = True;
1567 vg_mutexes[mid].owner = tid;
1568 /* return 0 (success). */
1569 vg_threads[tid].m_edx = 0;
1570 }
1571}
1572
1573
1574static
1575void do_pthread_mutex_unlock ( ThreadId tid,
1576 pthread_mutex_t *mutex )
1577{
1578 MutexId mid;
1579 Int i;
1580 Char msg_buf[100];
1581
sewardje663cb92002-04-12 10:26:32 +00001582 if (mutex == NULL
1583 || mutex->__m_count != 1) {
1584 vg_threads[tid].m_edx = EINVAL;
1585 return;
1586 }
1587
1588 mid = mutex->__m_reserved;
1589 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1590 vg_threads[tid].m_edx = EINVAL;
1591 return;
1592 }
1593
sewardj8937c812002-04-12 20:12:20 +00001594 if (VG_(clo_trace_pthread)) {
1595 VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
1596 mid, mutex );
1597 print_pthread_event(tid, msg_buf);
1598 }
1599
sewardje663cb92002-04-12 10:26:32 +00001600 /* Assume tid valid */
1601 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1602
1603 /* Barf if we don't currently hold the mutex. */
1604 if (!vg_mutexes[mid].held || vg_mutexes[mid].owner != tid) {
1605 vg_threads[tid].m_edx = EPERM;
1606 return;
1607 }
1608
1609 /* Find some arbitrary thread waiting on this mutex, and make it
1610 runnable. If none are waiting, mark the mutex as not held. */
1611 for (i = 0; i < VG_N_THREADS; i++) {
1612 if (vg_threads[i].status == VgTs_Empty)
1613 continue;
1614 if (vg_threads[i].status == VgTs_WaitMX
1615 && vg_threads[i].waited_on_mid == mid)
1616 break;
1617 }
1618
1619 vg_assert(i <= VG_N_THREADS);
1620 if (i == VG_N_THREADS) {
1621 /* Nobody else is waiting on it. */
1622 vg_mutexes[mid].held = False;
1623 } else {
1624 /* Notionally transfer the hold to thread i, whose
1625 pthread_mutex_lock() call now returns with 0 (success). */
1626 vg_mutexes[mid].owner = i;
1627 vg_threads[i].status = VgTs_Runnable;
1628 vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj8937c812002-04-12 20:12:20 +00001629
1630 if (VG_(clo_trace_pthread)) {
1631 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
1632 mid );
1633 print_pthread_event(tid, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001634 }
1635 }
1636
1637 /* In either case, our (tid's) pth_unlock() returns with 0
1638 (success). */
1639 vg_threads[tid].m_edx = 0; /* Success. */
1640}
1641
1642
1643static void do_pthread_mutex_destroy ( ThreadId tid,
1644 pthread_mutex_t *mutex )
1645{
sewardj8937c812002-04-12 20:12:20 +00001646 MutexId mid;
1647 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001648
1649 if (mutex == NULL
1650 || mutex->__m_count != 1) {
1651 vg_threads[tid].m_edx = EINVAL;
1652 return;
1653 }
1654
1655 mid = mutex->__m_reserved;
1656 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1657 vg_threads[tid].m_edx = EINVAL;
1658 return;
1659 }
1660
sewardj8937c812002-04-12 20:12:20 +00001661 if (VG_(clo_trace_pthread)) {
1662 VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
1663 mid, mutex );
1664 print_pthread_event(tid, msg_buf);
1665 }
1666
sewardje663cb92002-04-12 10:26:32 +00001667 /* Assume tid valid */
1668 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1669
1670 /* Barf if the mutex is currently held. */
1671 if (vg_mutexes[mid].held) {
1672 vg_threads[tid].m_edx = EBUSY;
1673 return;
1674 }
1675
1676 mutex->__m_count = 0; /* uninitialised */
1677 vg_mutexes[mid].in_use = False;
1678 vg_threads[tid].m_edx = 0;
1679}
1680
1681
sewardj77e466c2002-04-14 02:29:29 +00001682/* vthread tid is returning from a signal handler; modify its
1683 stack/regs accordingly. */
1684static
1685void handle_signal_return ( ThreadId tid )
1686{
1687 Char msg_buf[100];
1688 Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
1689
1690 if (restart_blocked_syscalls)
1691 /* Easy; we don't have to do anything. */
1692 return;
1693
1694 if (vg_threads[tid].status == VgTs_WaitFD) {
1695 vg_assert(vg_threads[tid].m_eax == __NR_read
1696 || vg_threads[tid].m_eax == __NR_write);
1697 /* read() or write() interrupted. Force a return with EINTR. */
1698 vg_threads[tid].m_eax = -VKI_EINTR;
1699 vg_threads[tid].status = VgTs_Runnable;
1700 if (VG_(clo_trace_sched)) {
1701 VG_(sprintf)(msg_buf,
1702 "read() / write() interrupted by signal; return EINTR" );
1703 print_sched_event(tid, msg_buf);
1704 }
1705 return;
1706 }
1707
1708 if (vg_threads[tid].status == VgTs_WaitFD) {
1709 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
1710 /* We interrupted a nanosleep(). The right thing to do is to
1711 write the unused time to nanosleep's second param and return
1712 EINTR, but I'm too lazy for that. */
1713 return;
1714 }
1715
1716 /* All other cases? Just return. */
1717}
1718
1719
sewardje663cb92002-04-12 10:26:32 +00001720/* ---------------------------------------------------------------------
1721 Handle non-trivial client requests.
1722 ------------------------------------------------------------------ */
1723
1724static
1725void do_nontrivial_clientreq ( ThreadId tid )
1726{
1727 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
1728 UInt req_no = arg[0];
1729 switch (req_no) {
1730
1731 case VG_USERREQ__PTHREAD_CREATE:
1732 do_pthread_create( tid,
1733 (pthread_t*)arg[1],
1734 (pthread_attr_t*)arg[2],
1735 (void*(*)(void*))arg[3],
1736 (void*)arg[4] );
1737 break;
1738
sewardjbc5b99f2002-04-13 00:08:51 +00001739 case VG_USERREQ__PTHREAD_RETURNS:
1740 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00001741 break;
1742
1743 case VG_USERREQ__PTHREAD_JOIN:
1744 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
1745 break;
1746
1747 /* Sigh ... this probably will cause huge numbers of major
1748 (expensive) scheduling events, for no real reason.
1749 Perhaps should be classified as a trivial-request. */
1750 case VG_USERREQ__PTHREAD_GET_THREADID:
1751 vg_threads[tid].m_edx = tid;
1752 break;
1753
1754 case VG_USERREQ__PTHREAD_MUTEX_INIT:
1755 do_pthread_mutex_init( tid,
1756 (pthread_mutex_t *)(arg[1]),
1757 (pthread_mutexattr_t *)(arg[2]) );
1758 break;
1759
1760 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
1761 do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
1762 break;
1763
1764 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
1765 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
1766 break;
1767
1768 case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
1769 do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
1770 break;
1771
1772 case VG_USERREQ__PTHREAD_CANCEL:
1773 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
1774 break;
1775
1776 case VG_USERREQ__MAKE_NOACCESS:
1777 case VG_USERREQ__MAKE_WRITABLE:
1778 case VG_USERREQ__MAKE_READABLE:
1779 case VG_USERREQ__DISCARD:
1780 case VG_USERREQ__CHECK_WRITABLE:
1781 case VG_USERREQ__CHECK_READABLE:
1782 case VG_USERREQ__MAKE_NOACCESS_STACK:
1783 case VG_USERREQ__RUNNING_ON_VALGRIND:
1784 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00001785 vg_threads[tid].m_edx
1786 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00001787 break;
1788
sewardj77e466c2002-04-14 02:29:29 +00001789 case VG_USERREQ__SIGNAL_RETURNS:
1790 handle_signal_return(tid);
1791 break;
sewardj54cacf02002-04-12 23:24:59 +00001792
sewardje663cb92002-04-12 10:26:32 +00001793 default:
1794 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
1795 VG_(panic)("handle_private_client_pthread_request: "
1796 "unknown request");
1797 /*NOTREACHED*/
1798 break;
1799 }
1800}
1801
1802
1803/*--------------------------------------------------------------------*/
1804/*--- end vg_scheduler.c ---*/
1805/*--------------------------------------------------------------------*/