blob: 695f086efff3ef56d64d0bdf21661393569d3162 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
12 Julian_Seward@muraroa.demon.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file LICENSE.
30*/
31
32#include "vg_include.h"
33#include "vg_constants.h"
34
35#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
36 VG_USERREQ__DO_LEAK_CHECK */
37
sewardj77e466c2002-04-14 02:29:29 +000038/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000039
sewardj77e466c2002-04-14 02:29:29 +000040Note! This pthreads implementation is so poor as to not be
41suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000042
sewardj77e466c2002-04-14 02:29:29 +000043- Currently, when a signal is run, just the ThreadStatus.status fields
44 are saved in the signal frame, along with the CPU state. Question:
45 should I also save and restore:
46 ThreadStatus.joiner
47 ThreadStatus.waited_on_mid
48 ThreadStatus.awaken_at
49 ThreadStatus.retval
50 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000051
sewardj77e466c2002-04-14 02:29:29 +000052- Signals interrupting read/write and nanosleep: SA_RESTART settings.
53 Read/write correctly return with EINTR when SA_RESTART isn't
54 specified and they are interrupted by a signal. nanosleep just
55 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000056
sewardj75fe1892002-04-14 02:46:33 +000057- Read/write syscall starts: don't crap out when the initial
58 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000059
sewardje462e202002-04-13 04:09:07 +000060- 0xDEADBEEF syscall errors ... fix.
sewardj8937c812002-04-12 20:12:20 +000061
sewardje462e202002-04-13 04:09:07 +000062*/
sewardje663cb92002-04-12 10:26:32 +000063
64
65/* ---------------------------------------------------------------------
66 Types and globals for the scheduler.
67 ------------------------------------------------------------------ */
68
69/* type ThreadId is defined in vg_include.h. */
70
71/* struct ThreadState is defined in vg_include.h. */
72
73/* Private globals. A statically allocated array of threads. */
74static ThreadState vg_threads[VG_N_THREADS];
75
76
77/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
78jmp_buf VG_(scheduler_jmpbuf);
79/* ... and if so, here's the signal which caused it to do so. */
80Int VG_(longjmpd_on_signal);
81
82
83/* Machinery to keep track of which threads are waiting on which
84 fds. */
85typedef
86 struct {
87 /* The thread which made the request. */
88 ThreadId tid;
89
90 /* The next two fields describe the request. */
91 /* File descriptor waited for. -1 means this slot is not in use */
92 Int fd;
93 /* The syscall number the fd is used in. */
94 Int syscall_no;
95
96 /* False => still waiting for select to tell us the fd is ready
97 to go. True => the fd is ready, but the results have not yet
98 been delivered back to the calling thread. Once the latter
99 happens, this entire record is marked as no longer in use, by
100 making the fd field be -1. */
101 Bool ready;
102 }
103 VgWaitedOnFd;
104
105static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
106
107
108
109typedef
110 struct {
111 /* Is this slot in use, or free? */
112 Bool in_use;
113 /* If in_use, is this mutex held by some thread, or not? */
114 Bool held;
115 /* if held==True, owner indicates who by. */
116 ThreadId owner;
117 }
118 VgMutex;
119
120static VgMutex vg_mutexes[VG_N_MUTEXES];
121
122/* Forwards */
123static void do_nontrivial_clientreq ( ThreadId tid );
124
125
126/* ---------------------------------------------------------------------
127 Helper functions for the scheduler.
128 ------------------------------------------------------------------ */
129
130static
131void pp_sched_status ( void )
132{
133 Int i;
134 VG_(printf)("\nsched status:\n");
135 for (i = 0; i < VG_N_THREADS; i++) {
136 if (vg_threads[i].status == VgTs_Empty) continue;
137 VG_(printf)("tid %d: ", i);
138 switch (vg_threads[i].status) {
139 case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
140 case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
141 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
142 vg_threads[i].joiner); break;
143 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj75fe1892002-04-14 02:46:33 +0000144 case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardje663cb92002-04-12 10:26:32 +0000145 default: VG_(printf)("???"); break;
146 }
147 }
148 VG_(printf)("\n");
149}
150
151static
152void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
153{
154 Int i;
155
156 vg_assert(fd != -1); /* avoid total chaos */
157
158 for (i = 0; i < VG_N_WAITING_FDS; i++)
159 if (vg_waiting_fds[i].fd == -1)
160 break;
161
162 if (i == VG_N_WAITING_FDS)
163 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
164 /*
165 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
166 tid, fd, i);
167 */
168 vg_waiting_fds[i].fd = fd;
169 vg_waiting_fds[i].tid = tid;
170 vg_waiting_fds[i].ready = False;
171 vg_waiting_fds[i].syscall_no = syscall_no;
172}
173
174
175
176static
177void print_sched_event ( ThreadId tid, Char* what )
178{
sewardj8937c812002-04-12 20:12:20 +0000179 VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
180}
181
182
183static
184void print_pthread_event ( ThreadId tid, Char* what )
185{
186 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000187}
188
189
190static
191Char* name_of_sched_event ( UInt event )
192{
193 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000194 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
195 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
196 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
197 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
198 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
199 default: return "??UNKNOWN??";
200 }
201}
202
203
204/* Create a translation of the client basic block beginning at
205 orig_addr, and add it to the translation cache & translation table.
206 This probably doesn't really belong here, but, hey ...
207*/
208void VG_(create_translation_for) ( Addr orig_addr )
209{
210 Addr trans_addr;
211 TTEntry tte;
212 Int orig_size, trans_size;
213 /* Ensure there is space to hold a translation. */
214 VG_(maybe_do_lru_pass)();
215 VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
216 /* Copy data at trans_addr into the translation cache.
217 Returned pointer is to the code, not to the 4-byte
218 header. */
219 /* Since the .orig_size and .trans_size fields are
220 UShort, be paranoid. */
221 vg_assert(orig_size > 0 && orig_size < 65536);
222 vg_assert(trans_size > 0 && trans_size < 65536);
223 tte.orig_size = orig_size;
224 tte.orig_addr = orig_addr;
225 tte.trans_size = trans_size;
226 tte.trans_addr = VG_(copy_to_transcache)
227 ( trans_addr, trans_size );
228 tte.mru_epoch = VG_(current_epoch);
229 /* Free the intermediary -- was allocated by VG_(emit_code). */
230 VG_(jitfree)( (void*)trans_addr );
231 /* Add to trans tab and set back pointer. */
232 VG_(add_to_trans_tab) ( &tte );
233 /* Update stats. */
234 VG_(this_epoch_in_count) ++;
235 VG_(this_epoch_in_osize) += orig_size;
236 VG_(this_epoch_in_tsize) += trans_size;
237 VG_(overall_in_count) ++;
238 VG_(overall_in_osize) += orig_size;
239 VG_(overall_in_tsize) += trans_size;
240 /* Record translated area for SMC detection. */
241 VG_(smc_mark_original) ( orig_addr, orig_size );
242}
243
244
245/* Allocate a completely empty ThreadState record. */
246static
247ThreadId vg_alloc_ThreadState ( void )
248{
249 Int i;
250 for (i = 0; i < VG_N_THREADS; i++) {
251 if (vg_threads[i].status == VgTs_Empty)
252 return i;
253 }
254 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
255 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
256 VG_(panic)("VG_N_THREADS is too low");
257 /*NOTREACHED*/
258}
259
260
261ThreadState* VG_(get_thread_state) ( ThreadId tid )
262{
263 vg_assert(tid >= 0 && tid < VG_N_THREADS);
264 vg_assert(vg_threads[tid].status != VgTs_Empty);
265 return & vg_threads[tid];
266}
267
268
269/* Find an unused VgMutex record. */
270static
271MutexId vg_alloc_VgMutex ( void )
272{
273 Int i;
274 for (i = 0; i < VG_N_MUTEXES; i++) {
275 if (!vg_mutexes[i].in_use)
276 return i;
277 }
278 VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
279 VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
280 VG_(panic)("VG_N_MUTEXES is too low");
281 /*NOTREACHED*/
282}
283
284
285/* Copy the saved state of a thread into VG_(baseBlock), ready for it
286 to be run. */
287__inline__
288void VG_(load_thread_state) ( ThreadId tid )
289{
290 Int i;
291 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
292 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
293 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
294 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
295 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
296 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
297 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
298 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
299 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
300 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
301
302 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
303 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
304
305 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
306 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
307 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
308 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
309 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
310 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
311 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
312 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
313 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
314}
315
316
317/* Copy the state of a thread from VG_(baseBlock), presumably after it
318 has been descheduled. For sanity-check purposes, fill the vacated
319 VG_(baseBlock) with garbage so as to make the system more likely to
320 fail quickly if we erroneously continue to poke around inside
321 VG_(baseBlock) without first doing a load_thread_state().
322*/
323__inline__
324void VG_(save_thread_state) ( ThreadId tid )
325{
326 Int i;
327 const UInt junk = 0xDEADBEEF;
328
329 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
330 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
331 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
332 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
333 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
334 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
335 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
336 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
337 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
338 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
339
340 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
341 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
342
343 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
344 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
345 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
346 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
347 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
348 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
349 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
350 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
351 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
352
353 /* Fill it up with junk. */
354 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
355 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
356 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
357 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
358 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
359 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
360 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
361 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
362 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
363 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
364
365 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
366 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
367}
368
369
370/* Run the thread tid for a while, and return a VG_TRC_* value to the
371 scheduler indicating what happened. */
372static
373UInt run_thread_for_a_while ( ThreadId tid )
374{
375 UInt trc = 0;
376 vg_assert(tid >= 0 && tid < VG_N_THREADS);
377 vg_assert(vg_threads[tid].status != VgTs_Empty);
378 vg_assert(VG_(bbs_to_go) > 0);
379
380 VG_(load_thread_state) ( tid );
381 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
382 /* try this ... */
383 trc = VG_(run_innerloop)();
384 /* We get here if the client didn't take a fault. */
385 } else {
386 /* We get here if the client took a fault, which caused our
387 signal handler to longjmp. */
388 vg_assert(trc == 0);
389 trc = VG_TRC_UNRESUMABLE_SIGNAL;
390 }
391 VG_(save_thread_state) ( tid );
392 return trc;
393}
394
395
396/* Increment the LRU epoch counter. */
397static
398void increment_epoch ( void )
399{
400 VG_(current_epoch)++;
401 if (VG_(clo_verbosity) > 2) {
402 UInt tt_used, tc_used;
403 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
404 VG_(message)(Vg_UserMsg,
405 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
406 VG_(bbs_done),
407 VG_(this_epoch_in_count),
408 VG_(this_epoch_in_osize),
409 VG_(this_epoch_in_tsize),
410 VG_(this_epoch_out_count),
411 VG_(this_epoch_out_osize),
412 VG_(this_epoch_out_tsize),
413 tt_used, tc_used
414 );
415 }
416 VG_(this_epoch_in_count) = 0;
417 VG_(this_epoch_in_osize) = 0;
418 VG_(this_epoch_in_tsize) = 0;
419 VG_(this_epoch_out_count) = 0;
420 VG_(this_epoch_out_osize) = 0;
421 VG_(this_epoch_out_tsize) = 0;
422}
423
424
425/* Initialise the scheduler. Create a single "main" thread ready to
426 run, with special ThreadId of zero. This is called at startup; the
427 caller takes care to park the client's state is parked in
428 VG_(baseBlock).
429*/
430void VG_(scheduler_init) ( void )
431{
432 Int i;
433 Addr startup_esp;
434 ThreadId tid_main;
435
436 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
437 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
438 VG_(printf)("%esp at startup = %p is not near %p; aborting\n",
439 startup_esp, VG_STARTUP_STACK_MASK);
440 VG_(panic)("unexpected %esp at startup");
441 }
442
443 for (i = 0; i < VG_N_THREADS; i++) {
444 vg_threads[i].stack_size = 0;
445 vg_threads[i].stack_base = (Addr)NULL;
446 }
447
448 for (i = 0; i < VG_N_WAITING_FDS; i++)
449 vg_waiting_fds[i].fd = -1; /* not in use */
450
451 for (i = 0; i < VG_N_MUTEXES; i++)
452 vg_mutexes[i].in_use = False;
453
454 /* Assert this is thread zero, which has certain magic
455 properties. */
456 tid_main = vg_alloc_ThreadState();
457 vg_assert(tid_main == 0);
458
459 vg_threads[tid_main].status = VgTs_Runnable;
460 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
461 vg_threads[tid_main].retval = NULL; /* not important */
462
463 /* Copy VG_(baseBlock) state to tid_main's slot. */
464 VG_(save_thread_state) ( tid_main );
465}
466
467
468/* What if fd isn't a valid fd? */
469static
470void set_fd_nonblocking ( Int fd )
471{
472 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
473 vg_assert(!VG_(is_kerror)(res));
474 res |= VKI_O_NONBLOCK;
475 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
476 vg_assert(!VG_(is_kerror)(res));
477}
478
479static
480void set_fd_blocking ( Int fd )
481{
482 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
483 vg_assert(!VG_(is_kerror)(res));
484 res &= ~VKI_O_NONBLOCK;
485 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
486 vg_assert(!VG_(is_kerror)(res));
487}
488
489static
490Bool fd_is_blockful ( Int fd )
491{
492 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
493 vg_assert(!VG_(is_kerror)(res));
494 return (res & VKI_O_NONBLOCK) ? False : True;
495}
496
497
498
499/* Do a purely thread-local request for tid, and put the result in its
500 %EDX, without changing its scheduling state in any way, nor that of
501 any other threads. Return True if so.
502
503 If the request is non-trivial, return False; a more capable but
504 slower mechanism will deal with it.
505*/
506static
507Bool maybe_do_trivial_clientreq ( ThreadId tid )
508{
509# define SIMPLE_RETURN(vvv) \
510 { vg_threads[tid].m_edx = (vvv); \
511 return True; \
512 }
513
514 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
515 UInt req_no = arg[0];
516 switch (req_no) {
517 case VG_USERREQ__MALLOC:
518 SIMPLE_RETURN(
519 (UInt)VG_(client_malloc) ( arg[1], Vg_AllocMalloc )
520 );
521 case VG_USERREQ__BUILTIN_NEW:
522 SIMPLE_RETURN(
523 (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNew )
524 );
525 case VG_USERREQ__BUILTIN_VEC_NEW:
526 SIMPLE_RETURN(
527 (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNewVec )
528 );
529 case VG_USERREQ__FREE:
530 VG_(client_free) ( (void*)arg[1], Vg_AllocMalloc );
531 SIMPLE_RETURN(0); /* irrelevant */
532 case VG_USERREQ__BUILTIN_DELETE:
533 VG_(client_free) ( (void*)arg[1], Vg_AllocNew );
534 SIMPLE_RETURN(0); /* irrelevant */
535 case VG_USERREQ__BUILTIN_VEC_DELETE:
536 VG_(client_free) ( (void*)arg[1], Vg_AllocNewVec );
537 SIMPLE_RETURN(0); /* irrelevant */
538 case VG_USERREQ__CALLOC:
539 SIMPLE_RETURN(
540 (UInt)VG_(client_calloc) ( arg[1], arg[2] )
541 );
542 case VG_USERREQ__REALLOC:
543 SIMPLE_RETURN(
544 (UInt)VG_(client_realloc) ( (void*)arg[1], arg[2] )
545 );
546 case VG_USERREQ__MEMALIGN:
547 SIMPLE_RETURN(
548 (UInt)VG_(client_memalign) ( arg[1], arg[2] )
549 );
550 default:
551 /* Too hard; wimp out. */
552 return False;
553 }
554# undef SIMPLE_RETURN
555}
556
557
558static
559void sched_do_syscall ( ThreadId tid )
560{
561 UInt saved_eax;
562 UInt res, syscall_no;
563 UInt fd;
564 Bool might_block, assumed_nonblocking;
565 Bool orig_fd_blockness;
566 Char msg_buf[100];
567
568 vg_assert(tid >= 0 && tid < VG_N_THREADS);
569 vg_assert(vg_threads[tid].status == VgTs_Runnable);
570
571 syscall_no = vg_threads[tid].m_eax; /* syscall number */
572
573 if (syscall_no == __NR_nanosleep) {
574 ULong t_now, t_awaken;
575 struct vki_timespec* req;
576 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
577 t_now = VG_(read_microsecond_timer)();
578 t_awaken
579 = t_now
580 + (ULong)1000000ULL * (ULong)(req->tv_sec)
581 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
582 vg_threads[tid].status = VgTs_Sleeping;
583 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000584 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000585 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
586 t_now, t_awaken-t_now);
587 print_sched_event(tid, msg_buf);
588 }
589 /* Force the scheduler to run something else for a while. */
590 return;
591 }
592
593 switch (syscall_no) {
594 case __NR_read:
595 case __NR_write:
596 assumed_nonblocking
597 = False;
598 might_block
599 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
600 break;
601 default:
602 might_block = False;
603 assumed_nonblocking = True;
604 }
605
606 if (assumed_nonblocking) {
607 /* We think it's non-blocking. Just do it in the normal way. */
608 VG_(perform_assumed_nonblocking_syscall)(tid);
609 /* The thread is still runnable. */
610 return;
611 }
612
613 /* It might block. Take evasive action. */
614 switch (syscall_no) {
615 case __NR_read:
616 case __NR_write:
617 fd = vg_threads[tid].m_ebx; break;
618 default:
619 vg_assert(3+3 == 7);
620 }
621
622 /* Set the fd to nonblocking, and do the syscall, which will return
623 immediately, in order to lodge a request with the Linux kernel.
624 We later poll for I/O completion using select(). */
625
626 orig_fd_blockness = fd_is_blockful(fd);
627 set_fd_nonblocking(fd);
628 vg_assert(!fd_is_blockful(fd));
629 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
630
631 /* This trashes the thread's %eax; we have to preserve it. */
632 saved_eax = vg_threads[tid].m_eax;
633 KERNEL_DO_SYSCALL(tid,res);
634
635 /* Restore original blockfulness of the fd. */
636 if (orig_fd_blockness)
637 set_fd_blocking(fd);
638 else
639 set_fd_nonblocking(fd);
640
641 if (res != -VKI_EWOULDBLOCK) {
642 /* It didn't block; it went through immediately. So finish off
643 in the normal way. Don't restore %EAX, since that now
644 (correctly) holds the result of the call. */
645 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
646 /* We're still runnable. */
647 vg_assert(vg_threads[tid].status == VgTs_Runnable);
648
649 } else {
650
651 /* It would have blocked. First, restore %EAX to what it was
652 before our speculative call. */
653 vg_threads[tid].m_eax = saved_eax;
654 /* Put this fd in a table of fds on which we are waiting for
655 completion. The arguments for select() later are constructed
656 from this table. */
657 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
658 /* Deschedule thread until an I/O completion happens. */
659 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000660 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000661 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
662 print_sched_event(tid, msg_buf);
663 }
664
665 }
666}
667
668
669/* Find out which of the fds in vg_waiting_fds are now ready to go, by
670 making enquiries with select(), and mark them as ready. We have to
671 wait for the requesting threads to fall into the the WaitFD state
672 before we can actually finally deliver the results, so this
673 procedure doesn't do that; complete_blocked_syscalls() does it.
674
675 It might seem odd that a thread which has done a blocking syscall
676 is not in WaitFD state; the way this can happen is if it initially
677 becomes WaitFD, but then a signal is delivered to it, so it becomes
678 Runnable for a while. In this case we have to wait for the
679 sighandler to return, whereupon the WaitFD state is resumed, and
680 only at that point can the I/O result be delivered to it. However,
681 this point may be long after the fd is actually ready.
682
683 So, poll_for_ready_fds() merely detects fds which are ready.
684 complete_blocked_syscalls() does the second half of the trick,
685 possibly much later: it delivers the results from ready fds to
686 threads in WaitFD state.
687*/
688void poll_for_ready_fds ( void )
689{
690 vki_ksigset_t saved_procmask;
691 vki_fd_set readfds;
692 vki_fd_set writefds;
693 vki_fd_set exceptfds;
694 struct vki_timeval timeout;
695 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
696 ThreadId tid;
697 Bool rd_ok, wr_ok, ex_ok;
698 Char msg_buf[100];
699
sewardje462e202002-04-13 04:09:07 +0000700 struct vki_timespec* rem;
701 ULong t_now;
702
sewardje663cb92002-04-12 10:26:32 +0000703 /* Awaken any sleeping threads whose sleep has expired. */
sewardje462e202002-04-13 04:09:07 +0000704 t_now = VG_(read_microsecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000705 for (tid = 0; tid < VG_N_THREADS; tid++) {
706 if (vg_threads[tid].status != VgTs_Sleeping)
707 continue;
708 if (t_now >= vg_threads[tid].awaken_at) {
709 /* Resume this thread. Set to zero the remaining-time (second)
710 arg of nanosleep, since it's used up all its time. */
711 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
712 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
713 if (rem != NULL) {
714 rem->tv_sec = 0;
715 rem->tv_nsec = 0;
716 }
717 /* Make the syscall return 0 (success). */
718 vg_threads[tid].m_eax = 0;
719 /* Reschedule this thread. */
720 vg_threads[tid].status = VgTs_Runnable;
sewardj8937c812002-04-12 20:12:20 +0000721 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000722 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
723 t_now);
724 print_sched_event(tid, msg_buf);
725 }
726 }
727 }
sewardje663cb92002-04-12 10:26:32 +0000728
sewardje462e202002-04-13 04:09:07 +0000729 /* And look for threads waiting on file descriptors which are now
730 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000731 timeout.tv_sec = 0;
732 timeout.tv_usec = 0;
733
734 VKI_FD_ZERO(&readfds);
735 VKI_FD_ZERO(&writefds);
736 VKI_FD_ZERO(&exceptfds);
737 fd_max = -1;
738 for (i = 0; i < VG_N_WAITING_FDS; i++) {
739 if (vg_waiting_fds[i].fd == -1 /* not in use */)
740 continue;
741 if (vg_waiting_fds[i].ready /* already ready? */)
742 continue;
743 fd = vg_waiting_fds[i].fd;
744 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000745 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000746 if (fd > fd_max)
747 fd_max = fd;
748 tid = vg_waiting_fds[i].tid;
749 vg_assert(tid >= 0 && tid < VG_N_THREADS);
750 syscall_no = vg_waiting_fds[i].syscall_no;
751 switch (syscall_no) {
752 case __NR_read:
753 VKI_FD_SET(fd, &readfds); break;
754 case __NR_write:
755 VKI_FD_SET(fd, &writefds); break;
756 default:
757 VG_(panic)("poll_for_ready_fds: unexpected syscall");
758 /*NOTREACHED*/
759 break;
760 }
761 }
762
sewardje462e202002-04-13 04:09:07 +0000763 /* Short cut: if no fds are waiting, give up now. */
764 if (fd_max == -1)
765 return;
766
sewardje663cb92002-04-12 10:26:32 +0000767 /* BLOCK ALL SIGNALS. We don't want the complication of select()
768 getting interrupted. */
769 VG_(block_all_host_signals)( &saved_procmask );
770
771 n_ready = VG_(select)
772 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
773 if (VG_(is_kerror)(n_ready)) {
774 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
775 VG_(panic)("poll_for_ready_fds: select failed?!");
776 /*NOTREACHED*/
777 }
778
779 /* UNBLOCK ALL SIGNALS */
780 VG_(restore_host_signals)( &saved_procmask );
781
782 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
783
784 if (n_ready == 0)
785 return;
786
787 /* Inspect all the fds we know about, and handle any completions that
788 have happened. */
789 /*
790 VG_(printf)("\n\n");
791 for (fd = 0; fd < 100; fd++)
792 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
793 VG_(printf)("X"); } else { VG_(printf)("."); };
794 VG_(printf)("\n\nfd_max = %d\n", fd_max);
795 */
796
797 for (fd = 0; fd <= fd_max; fd++) {
798 rd_ok = VKI_FD_ISSET(fd, &readfds);
799 wr_ok = VKI_FD_ISSET(fd, &writefds);
800 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
801
802 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
803 if (n_ok == 0)
804 continue;
805 if (n_ok > 1) {
806 VG_(printf)("offending fd = %d\n", fd);
807 VG_(panic)("poll_for_ready_fds: multiple events on fd");
808 }
809
810 /* An I/O event completed for fd. Find the thread which
811 requested this. */
812 for (i = 0; i < VG_N_WAITING_FDS; i++) {
813 if (vg_waiting_fds[i].fd == -1 /* not in use */)
814 continue;
815 if (vg_waiting_fds[i].fd == fd)
816 break;
817 }
818
819 /* And a bit more paranoia ... */
820 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
821
822 /* Mark the fd as ready. */
823 vg_assert(! vg_waiting_fds[i].ready);
824 vg_waiting_fds[i].ready = True;
825 }
826}
827
828
829/* See comment attached to poll_for_ready_fds() for explaination. */
830void complete_blocked_syscalls ( void )
831{
832 Int fd, i, res, syscall_no;
833 ThreadId tid;
834 Char msg_buf[100];
835
836 /* Inspect all the outstanding fds we know about. */
837
838 for (i = 0; i < VG_N_WAITING_FDS; i++) {
839 if (vg_waiting_fds[i].fd == -1 /* not in use */)
840 continue;
841 if (! vg_waiting_fds[i].ready)
842 continue;
843
844 fd = vg_waiting_fds[i].fd;
845 tid = vg_waiting_fds[i].tid;
846 vg_assert(tid >= 0 && tid < VG_N_THREADS);
847
848 /* The thread actually has to be waiting for the I/O event it
849 requested before we can deliver the result! */
850 if (vg_threads[tid].status != VgTs_WaitFD)
851 continue;
852
853 /* Ok, actually do it! We can safely use %EAX as the syscall
854 number, because the speculative call made by
855 sched_do_syscall() doesn't change %EAX in the case where the
856 call would have blocked. */
857
858 syscall_no = vg_waiting_fds[i].syscall_no;
859 vg_assert(syscall_no == vg_threads[tid].m_eax);
860 KERNEL_DO_SYSCALL(tid,res);
861 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
862
863 /* Reschedule. */
864 vg_threads[tid].status = VgTs_Runnable;
865 /* Mark slot as no longer in use. */
866 vg_waiting_fds[i].fd = -1;
867 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +0000868 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000869 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
870 print_sched_event(tid, msg_buf);
871 }
872 }
873}
874
875
876static
877void nanosleep_for_a_while ( void )
878{
879 Int res;
880 struct vki_timespec req;
881 struct vki_timespec rem;
882 req.tv_sec = 0;
883 req.tv_nsec = 20 * 1000 * 1000;
884 res = VG_(nanosleep)( &req, &rem );
885 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
886 vg_assert(res == 0);
887}
888
889
890/* ---------------------------------------------------------------------
891 The scheduler proper.
892 ------------------------------------------------------------------ */
893
894/* Run user-space threads until either
895 * Deadlock occurs
896 * One thread asks to shutdown Valgrind
897 * The specified number of basic blocks has gone by.
898*/
899VgSchedReturnCode VG_(scheduler) ( void )
900{
901 ThreadId tid, tid_next;
902 UInt trc;
903 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +0000904 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +0000905 Char msg_buf[100];
906 Addr trans_addr;
907
908 /* For the LRU structures, records when the epoch began. */
909 ULong lru_epoch_started_at = 0;
910
911 /* Start with the root thread. tid in general indicates the
912 currently runnable/just-finished-running thread. */
913 tid = 0;
914
915 /* This is the top level scheduler loop. It falls into three
916 phases. */
917 while (True) {
918
919 /* ======================= Phase 1 of 3 =======================
920 Handle I/O completions and signals. This may change the
921 status of various threads. Then select a new thread to run,
922 or declare deadlock, or sleep if there are no runnable
923 threads but some are blocked on I/O. */
924
925 /* Age the LRU structures if an epoch has been completed. */
926 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
927 lru_epoch_started_at = VG_(bbs_done);
928 increment_epoch();
929 }
930
931 /* Was a debug-stop requested? */
932 if (VG_(bbs_to_go) == 0)
933 goto debug_stop;
934
935 /* Do the following loop until a runnable thread is found, or
936 deadlock is detected. */
937 while (True) {
938
939 /* For stats purposes only. */
940 VG_(num_scheduling_events_MAJOR) ++;
941
942 /* See if any I/O operations which we were waiting for have
943 completed, and, if so, make runnable the relevant waiting
944 threads. */
945 poll_for_ready_fds();
946 complete_blocked_syscalls();
947
948 /* See if there are any signals which need to be delivered. If
949 so, choose thread(s) to deliver them to, and build signal
950 delivery frames on those thread(s) stacks. */
951 VG_(deliver_signals)( 0 /*HACK*/ );
952 VG_(do_sanity_checks)(0 /*HACK*/, False);
953
954 /* Try and find a thread (tid) to run. */
955 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +0000956 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +0000957 while (True) {
958 tid_next++;
959 if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj54cacf02002-04-12 23:24:59 +0000960 if (vg_threads[tid_next].status == VgTs_WaitFD
961 || vg_threads[tid_next].status == VgTs_Sleeping)
962 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +0000963 if (vg_threads[tid_next].status == VgTs_Runnable)
964 break; /* We can run this one. */
965 if (tid_next == tid)
966 break; /* been all the way round */
967 }
968 tid = tid_next;
969
970 if (vg_threads[tid].status == VgTs_Runnable) {
971 /* Found a suitable candidate. Fall out of this loop, so
972 we can advance to stage 2 of the scheduler: actually
973 running the thread. */
974 break;
975 }
976
977 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +0000978 if (n_in_fdwait_or_sleep == 0) {
979 /* No runnable threads and no prospect of any appearing
980 even if we wait for an arbitrary length of time. In
981 short, we have a deadlock. */
sewardje663cb92002-04-12 10:26:32 +0000982 pp_sched_status();
983 return VgSrc_Deadlock;
984 }
985
986 /* At least one thread is in a fd-wait state. Delay for a
987 while, and go round again, in the hope that eventually a
988 thread becomes runnable. */
989 nanosleep_for_a_while();
990 // pp_sched_status();
991 // VG_(printf)(".\n");
992 }
993
994
995 /* ======================= Phase 2 of 3 =======================
996 Wahey! We've finally decided that thread tid is runnable, so
997 we now do that. Run it for as much of a quanta as possible.
998 Trivial requests are handled and the thread continues. The
999 aim is not to do too many of Phase 1 since it is expensive. */
1000
1001 if (0)
1002 VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
1003
1004 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1005 that it decrements the counter before testing it for zero, so
1006 that if VG_(dispatch_ctr) is set to N you get at most N-1
1007 iterations. Also this means that VG_(dispatch_ctr) must
1008 exceed zero before entering the innerloop. Also also, the
1009 decrement is done before the bb is actually run, so you
1010 always get at least one decrement even if nothing happens.
1011 */
1012 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1013 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1014 else
1015 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1016
1017 /* ... and remember what we asked for. */
1018 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1019
1020 /* Actually run thread tid. */
1021 while (True) {
1022
1023 /* For stats purposes only. */
1024 VG_(num_scheduling_events_MINOR) ++;
1025
1026 if (0)
1027 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1028 tid, VG_(dispatch_ctr) - 1 );
1029
1030 trc = run_thread_for_a_while ( tid );
1031
1032 /* Deal quickly with trivial scheduling events, and resume the
1033 thread. */
1034
1035 if (trc == VG_TRC_INNER_FASTMISS) {
1036 vg_assert(VG_(dispatch_ctr) > 0);
1037
1038 /* Trivial event. Miss in the fast-cache. Do a full
1039 lookup for it. */
1040 trans_addr
1041 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1042 if (trans_addr == (Addr)0) {
1043 /* Not found; we need to request a translation. */
1044 VG_(create_translation_for)( vg_threads[tid].m_eip );
1045 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1046 if (trans_addr == (Addr)0)
1047 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1048 }
1049 continue; /* with this thread */
1050 }
1051
1052 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1053 Bool is_triv = maybe_do_trivial_clientreq(tid);
1054 if (is_triv) {
1055 /* NOTE: a trivial request is something like a call to
1056 malloc() or free(). It DOES NOT change the
1057 Runnability of this thread nor the status of any
1058 other thread; it is purely thread-local. */
1059 continue; /* with this thread */
1060 }
1061 }
1062
1063 /* It's a non-trivial event. Give up running this thread and
1064 handle things the expensive way. */
1065 break;
1066 }
1067
1068 /* ======================= Phase 3 of 3 =======================
1069 Handle non-trivial thread requests, mostly pthread stuff. */
1070
1071 /* Ok, we've fallen out of the dispatcher for a
1072 non-completely-trivial reason. First, update basic-block
1073 counters. */
1074
1075 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1076 vg_assert(done_this_time >= 0);
1077 VG_(bbs_to_go) -= (ULong)done_this_time;
1078 VG_(bbs_done) += (ULong)done_this_time;
1079
1080 if (0 && trc != VG_TRC_INNER_FASTMISS)
1081 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1082 tid, done_this_time, (Int)trc );
1083
1084 if (0 && trc != VG_TRC_INNER_FASTMISS)
1085 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1086 tid, VG_(bbs_done),
1087 name_of_sched_event(trc) );
1088
1089 /* Examine the thread's return code to figure out why it
1090 stopped, and handle requests. */
1091
1092 switch (trc) {
1093
1094 case VG_TRC_INNER_FASTMISS:
1095 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1096 /*NOTREACHED*/
1097 break;
1098
1099 case VG_TRC_INNER_COUNTERZERO:
1100 /* Timeslice is out. Let a new thread be scheduled,
1101 simply by doing nothing, causing us to arrive back at
1102 Phase 1. */
1103 if (VG_(bbs_to_go) == 0) {
1104 goto debug_stop;
1105 }
1106 vg_assert(VG_(dispatch_ctr) == 0);
1107 break;
1108
1109 case VG_TRC_UNRESUMABLE_SIGNAL:
1110 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1111 away. Again, do nothing, so we wind up back at Phase
1112 1, whereupon the signal will be "delivered". */
1113 break;
1114
sewardje663cb92002-04-12 10:26:32 +00001115 case VG_TRC_EBP_JMP_SYSCALL:
1116 /* Do a syscall for the vthread tid. This could cause it
1117 to become non-runnable. */
1118 sched_do_syscall(tid);
1119 break;
1120
1121 case VG_TRC_EBP_JMP_CLIENTREQ:
1122 /* Do a client request for the vthread tid. Note that
1123 some requests will have been handled by
1124 maybe_do_trivial_clientreq(), so we don't expect to see
1125 those here.
1126 */
sewardj54cacf02002-04-12 23:24:59 +00001127 /* The thread's %EAX points at an arg block, the first
1128 word of which is the request code. */
1129 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001130 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001131 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001132 print_sched_event(tid, msg_buf);
1133 }
1134 /* Do a non-trivial client request for thread tid. tid's
1135 %EAX points to a short vector of argument words, the
1136 first of which is the request code. The result of the
1137 request is put in tid's %EDX. Alternatively, perhaps
1138 the request causes tid to become non-runnable and/or
1139 other blocked threads become runnable. In general we
1140 can and often do mess with the state of arbitrary
1141 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001142 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1143 return VgSrc_Shutdown;
1144 } else {
1145 do_nontrivial_clientreq(tid);
1146 }
sewardje663cb92002-04-12 10:26:32 +00001147 break;
1148
1149 default:
1150 VG_(printf)("\ntrc = %d\n", trc);
1151 VG_(panic)("VG_(scheduler), phase 3: "
1152 "unexpected thread return code");
1153 /* NOTREACHED */
1154 break;
1155
1156 } /* switch (trc) */
1157
1158 /* That completes Phase 3 of 3. Return now to the top of the
1159 main scheduler loop, to Phase 1 of 3. */
1160
1161 } /* top-level scheduler loop */
1162
1163
1164 /* NOTREACHED */
1165 VG_(panic)("scheduler: post-main-loop ?!");
1166 /* NOTREACHED */
1167
1168 debug_stop:
1169 /* If we exited because of a debug stop, print the translation
1170 of the last block executed -- by translating it again, and
1171 throwing away the result. */
1172 VG_(printf)(
1173 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
1174 VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
1175 VG_(printf)("\n");
1176 VG_(printf)(
1177 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1178
1179 return VgSrc_BbsDone;
1180}
1181
1182
1183/* ---------------------------------------------------------------------
1184 The pthread implementation.
1185 ------------------------------------------------------------------ */
1186
1187#include <pthread.h>
1188#include <errno.h>
1189
1190#if !defined(PTHREAD_STACK_MIN)
1191# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1192#endif
1193
1194/* /usr/include/bits/pthreadtypes.h:
1195 typedef unsigned long int pthread_t;
1196*/
1197
sewardje663cb92002-04-12 10:26:32 +00001198
1199static
1200void do_pthread_cancel ( ThreadId tid_canceller,
1201 pthread_t tid_cancellee )
1202{
1203 Char msg_buf[100];
1204 /* We want make is appear that this thread has returned to
1205 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1206 return value. So: simple: put PTHREAD_CANCELED into %EAX
1207 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001208 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001209 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1210 print_sched_event(tid_cancellee, msg_buf);
1211 }
1212 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001213 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001214 vg_threads[tid_cancellee].status = VgTs_Runnable;
1215}
1216
1217
1218
1219/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001220 created with. Or possibly due to pthread_exit or cancellation.
1221 The main complication here is to resume any thread waiting to join
1222 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001223static
sewardjbc5b99f2002-04-13 00:08:51 +00001224void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001225{
1226 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1227 UInt* jnr_args;
1228 void** jnr_thread_return;
1229 Char msg_buf[100];
1230
1231 /* Mark it as not in use. Leave the stack in place so the next
1232 user of this slot doesn't reallocate it. */
1233 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1234 vg_assert(vg_threads[tid].status != VgTs_Empty);
1235
sewardjbc5b99f2002-04-13 00:08:51 +00001236 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001237
1238 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1239 /* No one has yet done a join on me */
1240 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001241 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001242 VG_(sprintf)(msg_buf,
1243 "root fn returns, waiting for a call pthread_join(%d)",
1244 tid);
1245 print_sched_event(tid, msg_buf);
1246 }
1247 } else {
1248 /* Some is waiting; make their join call return with success,
1249 putting my exit code in the place specified by the caller's
1250 thread_return param. This is all very horrible, since we
1251 need to consult the joiner's arg block -- pointed to by its
1252 %EAX -- in order to extract the 2nd param of its pthread_join
1253 call. TODO: free properly the slot (also below).
1254 */
1255 jnr = vg_threads[tid].joiner;
1256 vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
1257 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1258 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1259 jnr_thread_return = (void**)(jnr_args[2]);
1260 if (jnr_thread_return != NULL)
1261 *jnr_thread_return = vg_threads[tid].retval;
1262 vg_threads[jnr].m_edx = 0; /* success */
1263 vg_threads[jnr].status = VgTs_Runnable;
1264 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001265 if (VG_(clo_instrument) && tid != 0)
1266 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1267 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001268 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001269 VG_(sprintf)(msg_buf,
1270 "root fn returns, to find a waiting pthread_join(%d)", tid);
1271 print_sched_event(tid, msg_buf);
1272 VG_(sprintf)(msg_buf,
1273 "my pthread_join(%d) returned; resuming", tid);
1274 print_sched_event(jnr, msg_buf);
1275 }
1276 }
1277
1278 /* Return value is irrelevant; this thread will not get
1279 rescheduled. */
1280}
1281
1282
1283static
1284void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1285{
1286 Char msg_buf[100];
1287
1288 /* jee, the joinee, is the thread specified as an arg in thread
1289 tid's call to pthread_join. So tid is the join-er. */
1290 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1291 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1292
1293 if (jee == tid) {
1294 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1295 vg_threads[tid].status = VgTs_Runnable;
1296 return;
1297 }
1298
1299 if (jee < 0
1300 || jee >= VG_N_THREADS
1301 || vg_threads[jee].status == VgTs_Empty) {
1302 /* Invalid thread to join to. */
1303 vg_threads[tid].m_edx = EINVAL;
1304 vg_threads[tid].status = VgTs_Runnable;
1305 return;
1306 }
1307
1308 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1309 /* Someone already did join on this thread */
1310 vg_threads[tid].m_edx = EINVAL;
1311 vg_threads[tid].status = VgTs_Runnable;
1312 return;
1313 }
1314
1315 /* if (vg_threads[jee].detached) ... */
1316
1317 /* Perhaps the joinee has already finished? If so return
1318 immediately with its return code, and free up the slot. TODO:
1319 free it properly (also above). */
1320 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1321 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1322 vg_threads[tid].m_edx = 0; /* success */
1323 if (thread_return != NULL)
1324 *thread_return = vg_threads[jee].retval;
1325 vg_threads[tid].status = VgTs_Runnable;
1326 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001327 if (VG_(clo_instrument) && jee != 0)
1328 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1329 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001330 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001331 VG_(sprintf)(msg_buf,
1332 "someone called pthread_join() on me; bye!");
1333 print_sched_event(jee, msg_buf);
1334 VG_(sprintf)(msg_buf,
1335 "my pthread_join(%d) returned immediately",
1336 jee );
1337 print_sched_event(tid, msg_buf);
1338 }
1339 return;
1340 }
1341
1342 /* Ok, so we'll have to wait on jee. */
1343 vg_threads[jee].joiner = tid;
1344 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001345 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001346 VG_(sprintf)(msg_buf,
1347 "blocking on call of pthread_join(%d)", jee );
1348 print_sched_event(tid, msg_buf);
1349 }
1350 /* So tid's join call does not return just now. */
1351}
1352
1353
1354static
1355void do_pthread_create ( ThreadId parent_tid,
1356 pthread_t* thread,
1357 pthread_attr_t* attr,
1358 void* (*start_routine)(void *),
1359 void* arg )
1360{
1361 Addr new_stack;
1362 UInt new_stk_szb;
1363 ThreadId tid;
1364 Char msg_buf[100];
1365
1366 /* Paranoia ... */
1367 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1368
1369 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1370
1371 tid = vg_alloc_ThreadState();
1372
1373 /* If we've created the main thread's tid, we're in deep trouble :) */
1374 vg_assert(tid != 0);
1375
1376 /* Copy the parent's CPU state into the child's, in a roundabout
1377 way (via baseBlock). */
1378 VG_(load_thread_state)(parent_tid);
1379 VG_(save_thread_state)(tid);
1380
1381 /* Consider allocating the child a stack, if the one it already has
1382 is inadequate. */
1383 new_stk_szb = PTHREAD_STACK_MIN;
1384
1385 if (new_stk_szb > vg_threads[tid].stack_size) {
1386 /* Again, for good measure :) We definitely don't want to be
1387 allocating a stack for the main thread. */
1388 vg_assert(tid != 0);
1389 /* for now, we don't handle the case of anything other than
1390 assigning it for the first time. */
1391 vg_assert(vg_threads[tid].stack_size == 0);
1392 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1393 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1394 vg_threads[tid].stack_base = new_stack;
1395 vg_threads[tid].stack_size = new_stk_szb;
1396 vg_threads[tid].m_esp
1397 = new_stack + new_stk_szb
1398 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1399 }
1400 if (VG_(clo_instrument))
1401 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1402 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1403
1404 /* push arg */
1405 vg_threads[tid].m_esp -= 4;
1406 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1407
1408 /* push (magical) return address */
1409 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001410 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001411
1412 if (VG_(clo_instrument))
1413 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1414
1415 /* this is where we start */
1416 vg_threads[tid].m_eip = (UInt)start_routine;
1417
sewardj8937c812002-04-12 20:12:20 +00001418 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001419 VG_(sprintf)(msg_buf,
1420 "new thread, created by %d", parent_tid );
1421 print_sched_event(tid, msg_buf);
1422 }
1423
1424 /* store the thread id in *thread. */
1425 // if (VG_(clo_instrument))
1426 // ***** CHECK *thread is writable
1427 *thread = (pthread_t)tid;
1428
1429 /* return zero */
1430 vg_threads[tid].joiner = VG_INVALID_THREADID;
1431 vg_threads[tid].status = VgTs_Runnable;
1432 vg_threads[tid].m_edx = 0; /* success */
1433}
1434
1435
1436/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
1437 is a struct with at least 5 words:
1438 typedef struct
1439 {
1440 int __m_reserved; -- Reserved for future use
1441 int __m_count; -- Depth of recursive locking
1442 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1443 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1444 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1445 } pthread_mutex_t;
1446 Ours is just a single word, an index into vg_mutexes[].
1447 For now I'll park it in the __m_reserved field.
1448
1449 Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
1450 a zero __m_count field (see /usr/include/pthread.h). So I'll
1451 use zero to mean non-inited, and 1 to mean inited.
1452
1453 How convenient.
1454*/
1455
1456static
sewardj8937c812002-04-12 20:12:20 +00001457void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardje663cb92002-04-12 10:26:32 +00001458{
sewardj8937c812002-04-12 20:12:20 +00001459 MutexId mid;
1460 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001461 /* vg_alloc_MutexId aborts if we can't allocate a mutex, for
1462 whatever reason. */
sewardje663cb92002-04-12 10:26:32 +00001463 mid = vg_alloc_VgMutex();
1464 vg_mutexes[mid].in_use = True;
1465 vg_mutexes[mid].held = False;
1466 vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
1467 mutex->__m_reserved = mid;
1468 mutex->__m_count = 1; /* initialised */
sewardj8937c812002-04-12 20:12:20 +00001469 if (VG_(clo_trace_pthread)) {
1470 VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
1471 mutex, mid );
1472 print_pthread_event(tid, msg_buf);
1473 }
sewardje663cb92002-04-12 10:26:32 +00001474}
1475
1476/* Allocate a new MutexId and write it into *mutex. Ideally take
1477 notice of the attributes in *mutexattr. */
1478static
1479void do_pthread_mutex_init ( ThreadId tid,
1480 pthread_mutex_t *mutex,
1481 const pthread_mutexattr_t *mutexattr)
1482{
sewardj8937c812002-04-12 20:12:20 +00001483 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001484 /* Paranoia ... */
sewardje663cb92002-04-12 10:26:32 +00001485 vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
1486
sewardj8937c812002-04-12 20:12:20 +00001487 initialise_mutex(tid, mutex);
1488
1489 if (VG_(clo_trace_pthread)) {
1490 VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
1491 mutex, mutex->__m_reserved );
1492 print_pthread_event(tid, msg_buf);
1493 }
1494
sewardje663cb92002-04-12 10:26:32 +00001495 /*
1496 RETURN VALUE
1497 pthread_mutex_init always returns 0. The other mutex functions
1498 return 0 on success and a non-zero error code on error.
1499 */
1500 /* THIS THREAD returns with 0. */
1501 vg_threads[tid].m_edx = 0;
1502}
1503
1504
1505static
1506void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
1507{
1508 MutexId mid;
1509 Char msg_buf[100];
1510
sewardje663cb92002-04-12 10:26:32 +00001511 /* *mutex contains the MutexId, or one of the magic values
1512 PTHREAD_*MUTEX_INITIALIZER*, indicating we need to initialise it
1513 now. See comment(s) above re use of __m_count to indicated
1514 initialisation status.
1515 */
1516
1517 /* POSIX doesn't mandate this, but for sanity ... */
1518 if (mutex == NULL) {
1519 vg_threads[tid].m_edx = EINVAL;
1520 return;
1521 }
1522
1523 if (mutex->__m_count == 0) {
sewardj8937c812002-04-12 20:12:20 +00001524 initialise_mutex(tid, mutex);
sewardje663cb92002-04-12 10:26:32 +00001525 }
1526
1527 mid = mutex->__m_reserved;
1528 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1529 vg_threads[tid].m_edx = EINVAL;
1530 return;
1531 }
1532
sewardj8937c812002-04-12 20:12:20 +00001533 if (VG_(clo_trace_pthread)) {
1534 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
1535 mid, mutex );
1536 print_pthread_event(tid, msg_buf);
1537 }
1538
sewardje663cb92002-04-12 10:26:32 +00001539 /* Assert initialised. */
1540 vg_assert(mutex->__m_count == 1);
1541
1542 /* Assume tid valid. */
1543 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1544
1545 if (vg_mutexes[mid].held) {
1546 if (vg_mutexes[mid].owner == tid) {
1547 vg_threads[tid].m_edx = EDEADLK;
1548 return;
1549 }
1550 /* Someone else has it; we have to wait. */
1551 vg_threads[tid].status = VgTs_WaitMX;
1552 vg_threads[tid].waited_on_mid = mid;
1553 /* No assignment to %EDX, since we're blocking. */
sewardj8937c812002-04-12 20:12:20 +00001554 if (VG_(clo_trace_pthread)) {
1555 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
1556 mid, mutex );
1557 print_pthread_event(tid, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001558 }
1559 } else {
1560 /* We get it! */
1561 vg_mutexes[mid].held = True;
1562 vg_mutexes[mid].owner = tid;
1563 /* return 0 (success). */
1564 vg_threads[tid].m_edx = 0;
1565 }
1566}
1567
1568
1569static
1570void do_pthread_mutex_unlock ( ThreadId tid,
1571 pthread_mutex_t *mutex )
1572{
1573 MutexId mid;
1574 Int i;
1575 Char msg_buf[100];
1576
sewardje663cb92002-04-12 10:26:32 +00001577 if (mutex == NULL
1578 || mutex->__m_count != 1) {
1579 vg_threads[tid].m_edx = EINVAL;
1580 return;
1581 }
1582
1583 mid = mutex->__m_reserved;
1584 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1585 vg_threads[tid].m_edx = EINVAL;
1586 return;
1587 }
1588
sewardj8937c812002-04-12 20:12:20 +00001589 if (VG_(clo_trace_pthread)) {
1590 VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
1591 mid, mutex );
1592 print_pthread_event(tid, msg_buf);
1593 }
1594
sewardje663cb92002-04-12 10:26:32 +00001595 /* Assume tid valid */
1596 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1597
1598 /* Barf if we don't currently hold the mutex. */
1599 if (!vg_mutexes[mid].held || vg_mutexes[mid].owner != tid) {
1600 vg_threads[tid].m_edx = EPERM;
1601 return;
1602 }
1603
1604 /* Find some arbitrary thread waiting on this mutex, and make it
1605 runnable. If none are waiting, mark the mutex as not held. */
1606 for (i = 0; i < VG_N_THREADS; i++) {
1607 if (vg_threads[i].status == VgTs_Empty)
1608 continue;
1609 if (vg_threads[i].status == VgTs_WaitMX
1610 && vg_threads[i].waited_on_mid == mid)
1611 break;
1612 }
1613
1614 vg_assert(i <= VG_N_THREADS);
1615 if (i == VG_N_THREADS) {
1616 /* Nobody else is waiting on it. */
1617 vg_mutexes[mid].held = False;
1618 } else {
1619 /* Notionally transfer the hold to thread i, whose
1620 pthread_mutex_lock() call now returns with 0 (success). */
1621 vg_mutexes[mid].owner = i;
1622 vg_threads[i].status = VgTs_Runnable;
1623 vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj8937c812002-04-12 20:12:20 +00001624
1625 if (VG_(clo_trace_pthread)) {
1626 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
1627 mid );
1628 print_pthread_event(tid, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001629 }
1630 }
1631
1632 /* In either case, our (tid's) pth_unlock() returns with 0
1633 (success). */
1634 vg_threads[tid].m_edx = 0; /* Success. */
1635}
1636
1637
1638static void do_pthread_mutex_destroy ( ThreadId tid,
1639 pthread_mutex_t *mutex )
1640{
sewardj8937c812002-04-12 20:12:20 +00001641 MutexId mid;
1642 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001643
1644 if (mutex == NULL
1645 || mutex->__m_count != 1) {
1646 vg_threads[tid].m_edx = EINVAL;
1647 return;
1648 }
1649
1650 mid = mutex->__m_reserved;
1651 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1652 vg_threads[tid].m_edx = EINVAL;
1653 return;
1654 }
1655
sewardj8937c812002-04-12 20:12:20 +00001656 if (VG_(clo_trace_pthread)) {
1657 VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
1658 mid, mutex );
1659 print_pthread_event(tid, msg_buf);
1660 }
1661
sewardje663cb92002-04-12 10:26:32 +00001662 /* Assume tid valid */
1663 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1664
1665 /* Barf if the mutex is currently held. */
1666 if (vg_mutexes[mid].held) {
1667 vg_threads[tid].m_edx = EBUSY;
1668 return;
1669 }
1670
1671 mutex->__m_count = 0; /* uninitialised */
1672 vg_mutexes[mid].in_use = False;
1673 vg_threads[tid].m_edx = 0;
1674}
1675
1676
sewardj77e466c2002-04-14 02:29:29 +00001677/* vthread tid is returning from a signal handler; modify its
1678 stack/regs accordingly. */
1679static
1680void handle_signal_return ( ThreadId tid )
1681{
1682 Char msg_buf[100];
1683 Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
1684
1685 if (restart_blocked_syscalls)
1686 /* Easy; we don't have to do anything. */
1687 return;
1688
1689 if (vg_threads[tid].status == VgTs_WaitFD) {
1690 vg_assert(vg_threads[tid].m_eax == __NR_read
1691 || vg_threads[tid].m_eax == __NR_write);
1692 /* read() or write() interrupted. Force a return with EINTR. */
1693 vg_threads[tid].m_eax = -VKI_EINTR;
1694 vg_threads[tid].status = VgTs_Runnable;
1695 if (VG_(clo_trace_sched)) {
1696 VG_(sprintf)(msg_buf,
1697 "read() / write() interrupted by signal; return EINTR" );
1698 print_sched_event(tid, msg_buf);
1699 }
1700 return;
1701 }
1702
1703 if (vg_threads[tid].status == VgTs_WaitFD) {
1704 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
1705 /* We interrupted a nanosleep(). The right thing to do is to
1706 write the unused time to nanosleep's second param and return
1707 EINTR, but I'm too lazy for that. */
1708 return;
1709 }
1710
1711 /* All other cases? Just return. */
1712}
1713
1714
sewardje663cb92002-04-12 10:26:32 +00001715/* ---------------------------------------------------------------------
1716 Handle non-trivial client requests.
1717 ------------------------------------------------------------------ */
1718
1719static
1720void do_nontrivial_clientreq ( ThreadId tid )
1721{
1722 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
1723 UInt req_no = arg[0];
1724 switch (req_no) {
1725
1726 case VG_USERREQ__PTHREAD_CREATE:
1727 do_pthread_create( tid,
1728 (pthread_t*)arg[1],
1729 (pthread_attr_t*)arg[2],
1730 (void*(*)(void*))arg[3],
1731 (void*)arg[4] );
1732 break;
1733
sewardjbc5b99f2002-04-13 00:08:51 +00001734 case VG_USERREQ__PTHREAD_RETURNS:
1735 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00001736 break;
1737
1738 case VG_USERREQ__PTHREAD_JOIN:
1739 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
1740 break;
1741
1742 /* Sigh ... this probably will cause huge numbers of major
1743 (expensive) scheduling events, for no real reason.
1744 Perhaps should be classified as a trivial-request. */
1745 case VG_USERREQ__PTHREAD_GET_THREADID:
1746 vg_threads[tid].m_edx = tid;
1747 break;
1748
1749 case VG_USERREQ__PTHREAD_MUTEX_INIT:
1750 do_pthread_mutex_init( tid,
1751 (pthread_mutex_t *)(arg[1]),
1752 (pthread_mutexattr_t *)(arg[2]) );
1753 break;
1754
1755 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
1756 do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
1757 break;
1758
1759 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
1760 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
1761 break;
1762
1763 case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
1764 do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
1765 break;
1766
1767 case VG_USERREQ__PTHREAD_CANCEL:
1768 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
1769 break;
1770
1771 case VG_USERREQ__MAKE_NOACCESS:
1772 case VG_USERREQ__MAKE_WRITABLE:
1773 case VG_USERREQ__MAKE_READABLE:
1774 case VG_USERREQ__DISCARD:
1775 case VG_USERREQ__CHECK_WRITABLE:
1776 case VG_USERREQ__CHECK_READABLE:
1777 case VG_USERREQ__MAKE_NOACCESS_STACK:
1778 case VG_USERREQ__RUNNING_ON_VALGRIND:
1779 case VG_USERREQ__DO_LEAK_CHECK:
1780 vg_threads[tid].m_edx = VG_(handle_client_request) ( arg );
1781 break;
1782
sewardj77e466c2002-04-14 02:29:29 +00001783 case VG_USERREQ__SIGNAL_RETURNS:
1784 handle_signal_return(tid);
1785 break;
sewardj54cacf02002-04-12 23:24:59 +00001786
sewardje663cb92002-04-12 10:26:32 +00001787 default:
1788 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
1789 VG_(panic)("handle_private_client_pthread_request: "
1790 "unknown request");
1791 /*NOTREACHED*/
1792 break;
1793 }
1794}
1795
1796
1797/*--------------------------------------------------------------------*/
1798/*--- end vg_scheduler.c ---*/
1799/*--------------------------------------------------------------------*/