blob: 33cb15766e14bd6b86c278912ff15f95481b8db6 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardje462e202002-04-13 04:09:07 +000062*/
sewardje663cb92002-04-12 10:26:32 +000063
64
65/* ---------------------------------------------------------------------
66 Types and globals for the scheduler.
67 ------------------------------------------------------------------ */
68
69/* type ThreadId is defined in vg_include.h. */
70
71/* struct ThreadState is defined in vg_include.h. */
72
73/* Private globals. A statically allocated array of threads. */
74static ThreadState vg_threads[VG_N_THREADS];
75
76
77/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
78jmp_buf VG_(scheduler_jmpbuf);
79/* ... and if so, here's the signal which caused it to do so. */
80Int VG_(longjmpd_on_signal);
81
82
83/* Machinery to keep track of which threads are waiting on which
84 fds. */
85typedef
86 struct {
87 /* The thread which made the request. */
88 ThreadId tid;
89
90 /* The next two fields describe the request. */
91 /* File descriptor waited for. -1 means this slot is not in use */
92 Int fd;
93 /* The syscall number the fd is used in. */
94 Int syscall_no;
95
96 /* False => still waiting for select to tell us the fd is ready
97 to go. True => the fd is ready, but the results have not yet
98 been delivered back to the calling thread. Once the latter
99 happens, this entire record is marked as no longer in use, by
100 making the fd field be -1. */
101 Bool ready;
102 }
103 VgWaitedOnFd;
104
105static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
106
107
108
109typedef
110 struct {
111 /* Is this slot in use, or free? */
112 Bool in_use;
113 /* If in_use, is this mutex held by some thread, or not? */
114 Bool held;
115 /* if held==True, owner indicates who by. */
116 ThreadId owner;
117 }
118 VgMutex;
119
120static VgMutex vg_mutexes[VG_N_MUTEXES];
121
122/* Forwards */
123static void do_nontrivial_clientreq ( ThreadId tid );
124
125
126/* ---------------------------------------------------------------------
127 Helper functions for the scheduler.
128 ------------------------------------------------------------------ */
129
130static
131void pp_sched_status ( void )
132{
133 Int i;
134 VG_(printf)("\nsched status:\n");
135 for (i = 0; i < VG_N_THREADS; i++) {
136 if (vg_threads[i].status == VgTs_Empty) continue;
137 VG_(printf)("tid %d: ", i);
138 switch (vg_threads[i].status) {
139 case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
140 case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
141 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
142 vg_threads[i].joiner); break;
143 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj75fe1892002-04-14 02:46:33 +0000144 case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardje663cb92002-04-12 10:26:32 +0000145 default: VG_(printf)("???"); break;
146 }
147 }
148 VG_(printf)("\n");
149}
150
151static
152void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
153{
154 Int i;
155
156 vg_assert(fd != -1); /* avoid total chaos */
157
158 for (i = 0; i < VG_N_WAITING_FDS; i++)
159 if (vg_waiting_fds[i].fd == -1)
160 break;
161
162 if (i == VG_N_WAITING_FDS)
163 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
164 /*
165 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
166 tid, fd, i);
167 */
168 vg_waiting_fds[i].fd = fd;
169 vg_waiting_fds[i].tid = tid;
170 vg_waiting_fds[i].ready = False;
171 vg_waiting_fds[i].syscall_no = syscall_no;
172}
173
174
175
176static
177void print_sched_event ( ThreadId tid, Char* what )
178{
sewardj8937c812002-04-12 20:12:20 +0000179 VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
180}
181
182
183static
184void print_pthread_event ( ThreadId tid, Char* what )
185{
186 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000187}
188
189
190static
191Char* name_of_sched_event ( UInt event )
192{
193 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000194 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
195 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
196 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
197 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
198 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
199 default: return "??UNKNOWN??";
200 }
201}
202
203
204/* Create a translation of the client basic block beginning at
205 orig_addr, and add it to the translation cache & translation table.
206 This probably doesn't really belong here, but, hey ...
207*/
208void VG_(create_translation_for) ( Addr orig_addr )
209{
210 Addr trans_addr;
211 TTEntry tte;
212 Int orig_size, trans_size;
213 /* Ensure there is space to hold a translation. */
214 VG_(maybe_do_lru_pass)();
215 VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
216 /* Copy data at trans_addr into the translation cache.
217 Returned pointer is to the code, not to the 4-byte
218 header. */
219 /* Since the .orig_size and .trans_size fields are
220 UShort, be paranoid. */
221 vg_assert(orig_size > 0 && orig_size < 65536);
222 vg_assert(trans_size > 0 && trans_size < 65536);
223 tte.orig_size = orig_size;
224 tte.orig_addr = orig_addr;
225 tte.trans_size = trans_size;
226 tte.trans_addr = VG_(copy_to_transcache)
227 ( trans_addr, trans_size );
228 tte.mru_epoch = VG_(current_epoch);
229 /* Free the intermediary -- was allocated by VG_(emit_code). */
230 VG_(jitfree)( (void*)trans_addr );
231 /* Add to trans tab and set back pointer. */
232 VG_(add_to_trans_tab) ( &tte );
233 /* Update stats. */
234 VG_(this_epoch_in_count) ++;
235 VG_(this_epoch_in_osize) += orig_size;
236 VG_(this_epoch_in_tsize) += trans_size;
237 VG_(overall_in_count) ++;
238 VG_(overall_in_osize) += orig_size;
239 VG_(overall_in_tsize) += trans_size;
240 /* Record translated area for SMC detection. */
241 VG_(smc_mark_original) ( orig_addr, orig_size );
242}
243
244
245/* Allocate a completely empty ThreadState record. */
246static
247ThreadId vg_alloc_ThreadState ( void )
248{
249 Int i;
250 for (i = 0; i < VG_N_THREADS; i++) {
251 if (vg_threads[i].status == VgTs_Empty)
252 return i;
253 }
254 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
255 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
256 VG_(panic)("VG_N_THREADS is too low");
257 /*NOTREACHED*/
258}
259
260
261ThreadState* VG_(get_thread_state) ( ThreadId tid )
262{
263 vg_assert(tid >= 0 && tid < VG_N_THREADS);
264 vg_assert(vg_threads[tid].status != VgTs_Empty);
265 return & vg_threads[tid];
266}
267
268
269/* Find an unused VgMutex record. */
270static
271MutexId vg_alloc_VgMutex ( void )
272{
273 Int i;
274 for (i = 0; i < VG_N_MUTEXES; i++) {
275 if (!vg_mutexes[i].in_use)
276 return i;
277 }
278 VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
279 VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
280 VG_(panic)("VG_N_MUTEXES is too low");
281 /*NOTREACHED*/
282}
283
284
285/* Copy the saved state of a thread into VG_(baseBlock), ready for it
286 to be run. */
287__inline__
288void VG_(load_thread_state) ( ThreadId tid )
289{
290 Int i;
291 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
292 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
293 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
294 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
295 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
296 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
297 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
298 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
299 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
300 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
301
302 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
303 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
304
305 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
306 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
307 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
308 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
309 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
310 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
311 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
312 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
313 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
314}
315
316
317/* Copy the state of a thread from VG_(baseBlock), presumably after it
318 has been descheduled. For sanity-check purposes, fill the vacated
319 VG_(baseBlock) with garbage so as to make the system more likely to
320 fail quickly if we erroneously continue to poke around inside
321 VG_(baseBlock) without first doing a load_thread_state().
322*/
323__inline__
324void VG_(save_thread_state) ( ThreadId tid )
325{
326 Int i;
327 const UInt junk = 0xDEADBEEF;
328
329 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
330 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
331 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
332 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
333 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
334 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
335 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
336 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
337 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
338 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
339
340 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
341 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
342
343 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
344 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
345 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
346 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
347 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
348 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
349 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
350 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
351 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
352
353 /* Fill it up with junk. */
354 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
355 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
356 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
357 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
358 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
359 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
360 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
361 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
362 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
363 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
364
365 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
366 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
367}
368
369
370/* Run the thread tid for a while, and return a VG_TRC_* value to the
371 scheduler indicating what happened. */
372static
373UInt run_thread_for_a_while ( ThreadId tid )
374{
375 UInt trc = 0;
376 vg_assert(tid >= 0 && tid < VG_N_THREADS);
377 vg_assert(vg_threads[tid].status != VgTs_Empty);
378 vg_assert(VG_(bbs_to_go) > 0);
379
380 VG_(load_thread_state) ( tid );
381 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
382 /* try this ... */
383 trc = VG_(run_innerloop)();
384 /* We get here if the client didn't take a fault. */
385 } else {
386 /* We get here if the client took a fault, which caused our
387 signal handler to longjmp. */
388 vg_assert(trc == 0);
389 trc = VG_TRC_UNRESUMABLE_SIGNAL;
390 }
391 VG_(save_thread_state) ( tid );
392 return trc;
393}
394
395
396/* Increment the LRU epoch counter. */
397static
398void increment_epoch ( void )
399{
400 VG_(current_epoch)++;
401 if (VG_(clo_verbosity) > 2) {
402 UInt tt_used, tc_used;
403 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
404 VG_(message)(Vg_UserMsg,
405 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
406 VG_(bbs_done),
407 VG_(this_epoch_in_count),
408 VG_(this_epoch_in_osize),
409 VG_(this_epoch_in_tsize),
410 VG_(this_epoch_out_count),
411 VG_(this_epoch_out_osize),
412 VG_(this_epoch_out_tsize),
413 tt_used, tc_used
414 );
415 }
416 VG_(this_epoch_in_count) = 0;
417 VG_(this_epoch_in_osize) = 0;
418 VG_(this_epoch_in_tsize) = 0;
419 VG_(this_epoch_out_count) = 0;
420 VG_(this_epoch_out_osize) = 0;
421 VG_(this_epoch_out_tsize) = 0;
422}
423
424
425/* Initialise the scheduler. Create a single "main" thread ready to
426 run, with special ThreadId of zero. This is called at startup; the
427 caller takes care to park the client's state is parked in
428 VG_(baseBlock).
429*/
430void VG_(scheduler_init) ( void )
431{
432 Int i;
433 Addr startup_esp;
434 ThreadId tid_main;
435
436 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
437 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000438 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
439 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000440 VG_(panic)("unexpected %esp at startup");
441 }
442
443 for (i = 0; i < VG_N_THREADS; i++) {
444 vg_threads[i].stack_size = 0;
445 vg_threads[i].stack_base = (Addr)NULL;
446 }
447
448 for (i = 0; i < VG_N_WAITING_FDS; i++)
449 vg_waiting_fds[i].fd = -1; /* not in use */
450
451 for (i = 0; i < VG_N_MUTEXES; i++)
452 vg_mutexes[i].in_use = False;
453
454 /* Assert this is thread zero, which has certain magic
455 properties. */
456 tid_main = vg_alloc_ThreadState();
457 vg_assert(tid_main == 0);
458
459 vg_threads[tid_main].status = VgTs_Runnable;
460 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
461 vg_threads[tid_main].retval = NULL; /* not important */
462
463 /* Copy VG_(baseBlock) state to tid_main's slot. */
464 VG_(save_thread_state) ( tid_main );
465}
466
467
468/* What if fd isn't a valid fd? */
469static
470void set_fd_nonblocking ( Int fd )
471{
472 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
473 vg_assert(!VG_(is_kerror)(res));
474 res |= VKI_O_NONBLOCK;
475 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
476 vg_assert(!VG_(is_kerror)(res));
477}
478
479static
480void set_fd_blocking ( Int fd )
481{
482 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
483 vg_assert(!VG_(is_kerror)(res));
484 res &= ~VKI_O_NONBLOCK;
485 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
486 vg_assert(!VG_(is_kerror)(res));
487}
488
489static
490Bool fd_is_blockful ( Int fd )
491{
492 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
493 vg_assert(!VG_(is_kerror)(res));
494 return (res & VKI_O_NONBLOCK) ? False : True;
495}
496
497
498
499/* Do a purely thread-local request for tid, and put the result in its
500 %EDX, without changing its scheduling state in any way, nor that of
501 any other threads. Return True if so.
502
503 If the request is non-trivial, return False; a more capable but
504 slower mechanism will deal with it.
505*/
506static
507Bool maybe_do_trivial_clientreq ( ThreadId tid )
508{
509# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000510 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000511 return True; \
512 }
513
sewardj8c824512002-04-14 04:16:48 +0000514 ThreadState* tst = &vg_threads[tid];
515 UInt* arg = (UInt*)(tst->m_eax);
516 UInt req_no = arg[0];
517
sewardje663cb92002-04-12 10:26:32 +0000518 switch (req_no) {
519 case VG_USERREQ__MALLOC:
520 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000521 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000522 );
523 case VG_USERREQ__BUILTIN_NEW:
524 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000525 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000526 );
527 case VG_USERREQ__BUILTIN_VEC_NEW:
528 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000529 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000530 );
531 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000532 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000533 SIMPLE_RETURN(0); /* irrelevant */
534 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000535 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000536 SIMPLE_RETURN(0); /* irrelevant */
537 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000538 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000539 SIMPLE_RETURN(0); /* irrelevant */
540 case VG_USERREQ__CALLOC:
541 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000542 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000543 );
544 case VG_USERREQ__REALLOC:
545 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000546 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000547 );
548 case VG_USERREQ__MEMALIGN:
549 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000550 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000551 );
sewardj9650c992002-04-16 03:44:31 +0000552
553 /* These are heavily used. */
554 case VG_USERREQ__PTHREAD_GET_THREADID:
555 SIMPLE_RETURN(tid);
556 case VG_USERREQ__RUNNING_ON_VALGRIND:
557 SIMPLE_RETURN(1);
558
sewardje663cb92002-04-12 10:26:32 +0000559 default:
560 /* Too hard; wimp out. */
561 return False;
562 }
563# undef SIMPLE_RETURN
564}
565
566
567static
568void sched_do_syscall ( ThreadId tid )
569{
570 UInt saved_eax;
571 UInt res, syscall_no;
572 UInt fd;
573 Bool might_block, assumed_nonblocking;
574 Bool orig_fd_blockness;
575 Char msg_buf[100];
576
577 vg_assert(tid >= 0 && tid < VG_N_THREADS);
578 vg_assert(vg_threads[tid].status == VgTs_Runnable);
579
580 syscall_no = vg_threads[tid].m_eax; /* syscall number */
581
582 if (syscall_no == __NR_nanosleep) {
583 ULong t_now, t_awaken;
584 struct vki_timespec* req;
585 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
586 t_now = VG_(read_microsecond_timer)();
587 t_awaken
588 = t_now
589 + (ULong)1000000ULL * (ULong)(req->tv_sec)
590 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
591 vg_threads[tid].status = VgTs_Sleeping;
592 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000593 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000594 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
595 t_now, t_awaken-t_now);
596 print_sched_event(tid, msg_buf);
597 }
598 /* Force the scheduler to run something else for a while. */
599 return;
600 }
601
602 switch (syscall_no) {
603 case __NR_read:
604 case __NR_write:
605 assumed_nonblocking
606 = False;
607 might_block
608 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
609 break;
610 default:
611 might_block = False;
612 assumed_nonblocking = True;
613 }
614
615 if (assumed_nonblocking) {
616 /* We think it's non-blocking. Just do it in the normal way. */
617 VG_(perform_assumed_nonblocking_syscall)(tid);
618 /* The thread is still runnable. */
619 return;
620 }
621
622 /* It might block. Take evasive action. */
623 switch (syscall_no) {
624 case __NR_read:
625 case __NR_write:
626 fd = vg_threads[tid].m_ebx; break;
627 default:
628 vg_assert(3+3 == 7);
629 }
630
631 /* Set the fd to nonblocking, and do the syscall, which will return
632 immediately, in order to lodge a request with the Linux kernel.
633 We later poll for I/O completion using select(). */
634
635 orig_fd_blockness = fd_is_blockful(fd);
636 set_fd_nonblocking(fd);
637 vg_assert(!fd_is_blockful(fd));
638 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
639
640 /* This trashes the thread's %eax; we have to preserve it. */
641 saved_eax = vg_threads[tid].m_eax;
642 KERNEL_DO_SYSCALL(tid,res);
643
644 /* Restore original blockfulness of the fd. */
645 if (orig_fd_blockness)
646 set_fd_blocking(fd);
647 else
648 set_fd_nonblocking(fd);
649
650 if (res != -VKI_EWOULDBLOCK) {
651 /* It didn't block; it went through immediately. So finish off
652 in the normal way. Don't restore %EAX, since that now
653 (correctly) holds the result of the call. */
654 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
655 /* We're still runnable. */
656 vg_assert(vg_threads[tid].status == VgTs_Runnable);
657
658 } else {
659
660 /* It would have blocked. First, restore %EAX to what it was
661 before our speculative call. */
662 vg_threads[tid].m_eax = saved_eax;
663 /* Put this fd in a table of fds on which we are waiting for
664 completion. The arguments for select() later are constructed
665 from this table. */
666 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
667 /* Deschedule thread until an I/O completion happens. */
668 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000669 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000670 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
671 print_sched_event(tid, msg_buf);
672 }
673
674 }
675}
676
677
678/* Find out which of the fds in vg_waiting_fds are now ready to go, by
679 making enquiries with select(), and mark them as ready. We have to
680 wait for the requesting threads to fall into the the WaitFD state
681 before we can actually finally deliver the results, so this
682 procedure doesn't do that; complete_blocked_syscalls() does it.
683
684 It might seem odd that a thread which has done a blocking syscall
685 is not in WaitFD state; the way this can happen is if it initially
686 becomes WaitFD, but then a signal is delivered to it, so it becomes
687 Runnable for a while. In this case we have to wait for the
688 sighandler to return, whereupon the WaitFD state is resumed, and
689 only at that point can the I/O result be delivered to it. However,
690 this point may be long after the fd is actually ready.
691
692 So, poll_for_ready_fds() merely detects fds which are ready.
693 complete_blocked_syscalls() does the second half of the trick,
694 possibly much later: it delivers the results from ready fds to
695 threads in WaitFD state.
696*/
sewardj9a199dc2002-04-14 13:01:38 +0000697static
sewardje663cb92002-04-12 10:26:32 +0000698void poll_for_ready_fds ( void )
699{
700 vki_ksigset_t saved_procmask;
701 vki_fd_set readfds;
702 vki_fd_set writefds;
703 vki_fd_set exceptfds;
704 struct vki_timeval timeout;
705 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
706 ThreadId tid;
707 Bool rd_ok, wr_ok, ex_ok;
708 Char msg_buf[100];
709
sewardje462e202002-04-13 04:09:07 +0000710 struct vki_timespec* rem;
711 ULong t_now;
712
sewardje663cb92002-04-12 10:26:32 +0000713 /* Awaken any sleeping threads whose sleep has expired. */
sewardje462e202002-04-13 04:09:07 +0000714 t_now = VG_(read_microsecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000715 for (tid = 0; tid < VG_N_THREADS; tid++) {
716 if (vg_threads[tid].status != VgTs_Sleeping)
717 continue;
718 if (t_now >= vg_threads[tid].awaken_at) {
719 /* Resume this thread. Set to zero the remaining-time (second)
720 arg of nanosleep, since it's used up all its time. */
721 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
722 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
723 if (rem != NULL) {
724 rem->tv_sec = 0;
725 rem->tv_nsec = 0;
726 }
727 /* Make the syscall return 0 (success). */
728 vg_threads[tid].m_eax = 0;
729 /* Reschedule this thread. */
730 vg_threads[tid].status = VgTs_Runnable;
sewardj8937c812002-04-12 20:12:20 +0000731 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000732 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
733 t_now);
734 print_sched_event(tid, msg_buf);
735 }
736 }
737 }
sewardje663cb92002-04-12 10:26:32 +0000738
sewardje462e202002-04-13 04:09:07 +0000739 /* And look for threads waiting on file descriptors which are now
740 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000741 timeout.tv_sec = 0;
742 timeout.tv_usec = 0;
743
744 VKI_FD_ZERO(&readfds);
745 VKI_FD_ZERO(&writefds);
746 VKI_FD_ZERO(&exceptfds);
747 fd_max = -1;
748 for (i = 0; i < VG_N_WAITING_FDS; i++) {
749 if (vg_waiting_fds[i].fd == -1 /* not in use */)
750 continue;
751 if (vg_waiting_fds[i].ready /* already ready? */)
752 continue;
753 fd = vg_waiting_fds[i].fd;
754 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000755 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000756 if (fd > fd_max)
757 fd_max = fd;
758 tid = vg_waiting_fds[i].tid;
759 vg_assert(tid >= 0 && tid < VG_N_THREADS);
760 syscall_no = vg_waiting_fds[i].syscall_no;
761 switch (syscall_no) {
762 case __NR_read:
763 VKI_FD_SET(fd, &readfds); break;
764 case __NR_write:
765 VKI_FD_SET(fd, &writefds); break;
766 default:
767 VG_(panic)("poll_for_ready_fds: unexpected syscall");
768 /*NOTREACHED*/
769 break;
770 }
771 }
772
sewardje462e202002-04-13 04:09:07 +0000773 /* Short cut: if no fds are waiting, give up now. */
774 if (fd_max == -1)
775 return;
776
sewardje663cb92002-04-12 10:26:32 +0000777 /* BLOCK ALL SIGNALS. We don't want the complication of select()
778 getting interrupted. */
779 VG_(block_all_host_signals)( &saved_procmask );
780
781 n_ready = VG_(select)
782 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
783 if (VG_(is_kerror)(n_ready)) {
784 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
785 VG_(panic)("poll_for_ready_fds: select failed?!");
786 /*NOTREACHED*/
787 }
788
789 /* UNBLOCK ALL SIGNALS */
790 VG_(restore_host_signals)( &saved_procmask );
791
792 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
793
794 if (n_ready == 0)
795 return;
796
797 /* Inspect all the fds we know about, and handle any completions that
798 have happened. */
799 /*
800 VG_(printf)("\n\n");
801 for (fd = 0; fd < 100; fd++)
802 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
803 VG_(printf)("X"); } else { VG_(printf)("."); };
804 VG_(printf)("\n\nfd_max = %d\n", fd_max);
805 */
806
807 for (fd = 0; fd <= fd_max; fd++) {
808 rd_ok = VKI_FD_ISSET(fd, &readfds);
809 wr_ok = VKI_FD_ISSET(fd, &writefds);
810 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
811
812 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
813 if (n_ok == 0)
814 continue;
815 if (n_ok > 1) {
816 VG_(printf)("offending fd = %d\n", fd);
817 VG_(panic)("poll_for_ready_fds: multiple events on fd");
818 }
819
820 /* An I/O event completed for fd. Find the thread which
821 requested this. */
822 for (i = 0; i < VG_N_WAITING_FDS; i++) {
823 if (vg_waiting_fds[i].fd == -1 /* not in use */)
824 continue;
825 if (vg_waiting_fds[i].fd == fd)
826 break;
827 }
828
829 /* And a bit more paranoia ... */
830 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
831
832 /* Mark the fd as ready. */
833 vg_assert(! vg_waiting_fds[i].ready);
834 vg_waiting_fds[i].ready = True;
835 }
836}
837
838
839/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +0000840static
sewardje663cb92002-04-12 10:26:32 +0000841void complete_blocked_syscalls ( void )
842{
843 Int fd, i, res, syscall_no;
844 ThreadId tid;
845 Char msg_buf[100];
846
847 /* Inspect all the outstanding fds we know about. */
848
849 for (i = 0; i < VG_N_WAITING_FDS; i++) {
850 if (vg_waiting_fds[i].fd == -1 /* not in use */)
851 continue;
852 if (! vg_waiting_fds[i].ready)
853 continue;
854
855 fd = vg_waiting_fds[i].fd;
856 tid = vg_waiting_fds[i].tid;
857 vg_assert(tid >= 0 && tid < VG_N_THREADS);
858
859 /* The thread actually has to be waiting for the I/O event it
860 requested before we can deliver the result! */
861 if (vg_threads[tid].status != VgTs_WaitFD)
862 continue;
863
864 /* Ok, actually do it! We can safely use %EAX as the syscall
865 number, because the speculative call made by
866 sched_do_syscall() doesn't change %EAX in the case where the
867 call would have blocked. */
868
869 syscall_no = vg_waiting_fds[i].syscall_no;
870 vg_assert(syscall_no == vg_threads[tid].m_eax);
871 KERNEL_DO_SYSCALL(tid,res);
872 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
873
874 /* Reschedule. */
875 vg_threads[tid].status = VgTs_Runnable;
876 /* Mark slot as no longer in use. */
877 vg_waiting_fds[i].fd = -1;
878 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +0000879 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000880 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
881 print_sched_event(tid, msg_buf);
882 }
883 }
884}
885
886
887static
888void nanosleep_for_a_while ( void )
889{
890 Int res;
891 struct vki_timespec req;
892 struct vki_timespec rem;
893 req.tv_sec = 0;
894 req.tv_nsec = 20 * 1000 * 1000;
895 res = VG_(nanosleep)( &req, &rem );
896 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
897 vg_assert(res == 0);
898}
899
900
901/* ---------------------------------------------------------------------
902 The scheduler proper.
903 ------------------------------------------------------------------ */
904
905/* Run user-space threads until either
906 * Deadlock occurs
907 * One thread asks to shutdown Valgrind
908 * The specified number of basic blocks has gone by.
909*/
910VgSchedReturnCode VG_(scheduler) ( void )
911{
912 ThreadId tid, tid_next;
913 UInt trc;
914 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +0000915 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +0000916 Char msg_buf[100];
917 Addr trans_addr;
918
919 /* For the LRU structures, records when the epoch began. */
920 ULong lru_epoch_started_at = 0;
921
922 /* Start with the root thread. tid in general indicates the
923 currently runnable/just-finished-running thread. */
924 tid = 0;
925
926 /* This is the top level scheduler loop. It falls into three
927 phases. */
928 while (True) {
929
930 /* ======================= Phase 1 of 3 =======================
931 Handle I/O completions and signals. This may change the
932 status of various threads. Then select a new thread to run,
933 or declare deadlock, or sleep if there are no runnable
934 threads but some are blocked on I/O. */
935
936 /* Age the LRU structures if an epoch has been completed. */
937 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
938 lru_epoch_started_at = VG_(bbs_done);
939 increment_epoch();
940 }
941
942 /* Was a debug-stop requested? */
943 if (VG_(bbs_to_go) == 0)
944 goto debug_stop;
945
946 /* Do the following loop until a runnable thread is found, or
947 deadlock is detected. */
948 while (True) {
949
950 /* For stats purposes only. */
951 VG_(num_scheduling_events_MAJOR) ++;
952
953 /* See if any I/O operations which we were waiting for have
954 completed, and, if so, make runnable the relevant waiting
955 threads. */
956 poll_for_ready_fds();
957 complete_blocked_syscalls();
958
959 /* See if there are any signals which need to be delivered. If
960 so, choose thread(s) to deliver them to, and build signal
961 delivery frames on those thread(s) stacks. */
962 VG_(deliver_signals)( 0 /*HACK*/ );
963 VG_(do_sanity_checks)(0 /*HACK*/, False);
964
965 /* Try and find a thread (tid) to run. */
966 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +0000967 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +0000968 while (True) {
969 tid_next++;
970 if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj54cacf02002-04-12 23:24:59 +0000971 if (vg_threads[tid_next].status == VgTs_WaitFD
972 || vg_threads[tid_next].status == VgTs_Sleeping)
973 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +0000974 if (vg_threads[tid_next].status == VgTs_Runnable)
975 break; /* We can run this one. */
976 if (tid_next == tid)
977 break; /* been all the way round */
978 }
979 tid = tid_next;
980
981 if (vg_threads[tid].status == VgTs_Runnable) {
982 /* Found a suitable candidate. Fall out of this loop, so
983 we can advance to stage 2 of the scheduler: actually
984 running the thread. */
985 break;
986 }
987
988 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +0000989 if (n_in_fdwait_or_sleep == 0) {
990 /* No runnable threads and no prospect of any appearing
991 even if we wait for an arbitrary length of time. In
992 short, we have a deadlock. */
sewardje663cb92002-04-12 10:26:32 +0000993 pp_sched_status();
994 return VgSrc_Deadlock;
995 }
996
997 /* At least one thread is in a fd-wait state. Delay for a
998 while, and go round again, in the hope that eventually a
999 thread becomes runnable. */
1000 nanosleep_for_a_while();
1001 // pp_sched_status();
1002 // VG_(printf)(".\n");
1003 }
1004
1005
1006 /* ======================= Phase 2 of 3 =======================
1007 Wahey! We've finally decided that thread tid is runnable, so
1008 we now do that. Run it for as much of a quanta as possible.
1009 Trivial requests are handled and the thread continues. The
1010 aim is not to do too many of Phase 1 since it is expensive. */
1011
1012 if (0)
1013 VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
1014
1015 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1016 that it decrements the counter before testing it for zero, so
1017 that if VG_(dispatch_ctr) is set to N you get at most N-1
1018 iterations. Also this means that VG_(dispatch_ctr) must
1019 exceed zero before entering the innerloop. Also also, the
1020 decrement is done before the bb is actually run, so you
1021 always get at least one decrement even if nothing happens.
1022 */
1023 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1024 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1025 else
1026 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1027
1028 /* ... and remember what we asked for. */
1029 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1030
1031 /* Actually run thread tid. */
1032 while (True) {
1033
1034 /* For stats purposes only. */
1035 VG_(num_scheduling_events_MINOR) ++;
1036
1037 if (0)
1038 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1039 tid, VG_(dispatch_ctr) - 1 );
1040
1041 trc = run_thread_for_a_while ( tid );
1042
1043 /* Deal quickly with trivial scheduling events, and resume the
1044 thread. */
1045
1046 if (trc == VG_TRC_INNER_FASTMISS) {
1047 vg_assert(VG_(dispatch_ctr) > 0);
1048
1049 /* Trivial event. Miss in the fast-cache. Do a full
1050 lookup for it. */
1051 trans_addr
1052 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1053 if (trans_addr == (Addr)0) {
1054 /* Not found; we need to request a translation. */
1055 VG_(create_translation_for)( vg_threads[tid].m_eip );
1056 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1057 if (trans_addr == (Addr)0)
1058 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1059 }
1060 continue; /* with this thread */
1061 }
1062
1063 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1064 Bool is_triv = maybe_do_trivial_clientreq(tid);
1065 if (is_triv) {
1066 /* NOTE: a trivial request is something like a call to
1067 malloc() or free(). It DOES NOT change the
1068 Runnability of this thread nor the status of any
1069 other thread; it is purely thread-local. */
1070 continue; /* with this thread */
1071 }
1072 }
1073
1074 /* It's a non-trivial event. Give up running this thread and
1075 handle things the expensive way. */
1076 break;
1077 }
1078
1079 /* ======================= Phase 3 of 3 =======================
1080 Handle non-trivial thread requests, mostly pthread stuff. */
1081
1082 /* Ok, we've fallen out of the dispatcher for a
1083 non-completely-trivial reason. First, update basic-block
1084 counters. */
1085
1086 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1087 vg_assert(done_this_time >= 0);
1088 VG_(bbs_to_go) -= (ULong)done_this_time;
1089 VG_(bbs_done) += (ULong)done_this_time;
1090
1091 if (0 && trc != VG_TRC_INNER_FASTMISS)
1092 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1093 tid, done_this_time, (Int)trc );
1094
1095 if (0 && trc != VG_TRC_INNER_FASTMISS)
1096 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1097 tid, VG_(bbs_done),
1098 name_of_sched_event(trc) );
1099
1100 /* Examine the thread's return code to figure out why it
1101 stopped, and handle requests. */
1102
1103 switch (trc) {
1104
1105 case VG_TRC_INNER_FASTMISS:
1106 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1107 /*NOTREACHED*/
1108 break;
1109
1110 case VG_TRC_INNER_COUNTERZERO:
1111 /* Timeslice is out. Let a new thread be scheduled,
1112 simply by doing nothing, causing us to arrive back at
1113 Phase 1. */
1114 if (VG_(bbs_to_go) == 0) {
1115 goto debug_stop;
1116 }
1117 vg_assert(VG_(dispatch_ctr) == 0);
1118 break;
1119
1120 case VG_TRC_UNRESUMABLE_SIGNAL:
1121 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1122 away. Again, do nothing, so we wind up back at Phase
1123 1, whereupon the signal will be "delivered". */
1124 break;
1125
sewardje663cb92002-04-12 10:26:32 +00001126 case VG_TRC_EBP_JMP_SYSCALL:
1127 /* Do a syscall for the vthread tid. This could cause it
1128 to become non-runnable. */
1129 sched_do_syscall(tid);
1130 break;
1131
1132 case VG_TRC_EBP_JMP_CLIENTREQ:
1133 /* Do a client request for the vthread tid. Note that
1134 some requests will have been handled by
1135 maybe_do_trivial_clientreq(), so we don't expect to see
1136 those here.
1137 */
sewardj54cacf02002-04-12 23:24:59 +00001138 /* The thread's %EAX points at an arg block, the first
1139 word of which is the request code. */
1140 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001141 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001142 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001143 print_sched_event(tid, msg_buf);
1144 }
1145 /* Do a non-trivial client request for thread tid. tid's
1146 %EAX points to a short vector of argument words, the
1147 first of which is the request code. The result of the
1148 request is put in tid's %EDX. Alternatively, perhaps
1149 the request causes tid to become non-runnable and/or
1150 other blocked threads become runnable. In general we
1151 can and often do mess with the state of arbitrary
1152 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001153 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1154 return VgSrc_Shutdown;
1155 } else {
1156 do_nontrivial_clientreq(tid);
1157 }
sewardje663cb92002-04-12 10:26:32 +00001158 break;
1159
1160 default:
1161 VG_(printf)("\ntrc = %d\n", trc);
1162 VG_(panic)("VG_(scheduler), phase 3: "
1163 "unexpected thread return code");
1164 /* NOTREACHED */
1165 break;
1166
1167 } /* switch (trc) */
1168
1169 /* That completes Phase 3 of 3. Return now to the top of the
1170 main scheduler loop, to Phase 1 of 3. */
1171
1172 } /* top-level scheduler loop */
1173
1174
1175 /* NOTREACHED */
1176 VG_(panic)("scheduler: post-main-loop ?!");
1177 /* NOTREACHED */
1178
1179 debug_stop:
1180 /* If we exited because of a debug stop, print the translation
1181 of the last block executed -- by translating it again, and
1182 throwing away the result. */
1183 VG_(printf)(
1184 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
1185 VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
1186 VG_(printf)("\n");
1187 VG_(printf)(
1188 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1189
1190 return VgSrc_BbsDone;
1191}
1192
1193
1194/* ---------------------------------------------------------------------
1195 The pthread implementation.
1196 ------------------------------------------------------------------ */
1197
1198#include <pthread.h>
1199#include <errno.h>
1200
1201#if !defined(PTHREAD_STACK_MIN)
1202# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1203#endif
1204
1205/* /usr/include/bits/pthreadtypes.h:
1206 typedef unsigned long int pthread_t;
1207*/
1208
sewardje663cb92002-04-12 10:26:32 +00001209
1210static
1211void do_pthread_cancel ( ThreadId tid_canceller,
1212 pthread_t tid_cancellee )
1213{
1214 Char msg_buf[100];
1215 /* We want make is appear that this thread has returned to
1216 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1217 return value. So: simple: put PTHREAD_CANCELED into %EAX
1218 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001219 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001220 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1221 print_sched_event(tid_cancellee, msg_buf);
1222 }
1223 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001224 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001225 vg_threads[tid_cancellee].status = VgTs_Runnable;
1226}
1227
1228
1229
1230/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001231 created with. Or possibly due to pthread_exit or cancellation.
1232 The main complication here is to resume any thread waiting to join
1233 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001234static
sewardjbc5b99f2002-04-13 00:08:51 +00001235void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001236{
1237 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1238 UInt* jnr_args;
1239 void** jnr_thread_return;
1240 Char msg_buf[100];
1241
1242 /* Mark it as not in use. Leave the stack in place so the next
1243 user of this slot doesn't reallocate it. */
1244 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1245 vg_assert(vg_threads[tid].status != VgTs_Empty);
1246
sewardjbc5b99f2002-04-13 00:08:51 +00001247 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001248
1249 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1250 /* No one has yet done a join on me */
1251 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001252 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001253 VG_(sprintf)(msg_buf,
1254 "root fn returns, waiting for a call pthread_join(%d)",
1255 tid);
1256 print_sched_event(tid, msg_buf);
1257 }
1258 } else {
1259 /* Some is waiting; make their join call return with success,
1260 putting my exit code in the place specified by the caller's
1261 thread_return param. This is all very horrible, since we
1262 need to consult the joiner's arg block -- pointed to by its
1263 %EAX -- in order to extract the 2nd param of its pthread_join
1264 call. TODO: free properly the slot (also below).
1265 */
1266 jnr = vg_threads[tid].joiner;
1267 vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
1268 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1269 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1270 jnr_thread_return = (void**)(jnr_args[2]);
1271 if (jnr_thread_return != NULL)
1272 *jnr_thread_return = vg_threads[tid].retval;
1273 vg_threads[jnr].m_edx = 0; /* success */
1274 vg_threads[jnr].status = VgTs_Runnable;
1275 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001276 if (VG_(clo_instrument) && tid != 0)
1277 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1278 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001279 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001280 VG_(sprintf)(msg_buf,
1281 "root fn returns, to find a waiting pthread_join(%d)", tid);
1282 print_sched_event(tid, msg_buf);
1283 VG_(sprintf)(msg_buf,
1284 "my pthread_join(%d) returned; resuming", tid);
1285 print_sched_event(jnr, msg_buf);
1286 }
1287 }
1288
1289 /* Return value is irrelevant; this thread will not get
1290 rescheduled. */
1291}
1292
1293
1294static
1295void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1296{
1297 Char msg_buf[100];
1298
1299 /* jee, the joinee, is the thread specified as an arg in thread
1300 tid's call to pthread_join. So tid is the join-er. */
1301 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1302 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1303
1304 if (jee == tid) {
1305 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1306 vg_threads[tid].status = VgTs_Runnable;
1307 return;
1308 }
1309
1310 if (jee < 0
1311 || jee >= VG_N_THREADS
1312 || vg_threads[jee].status == VgTs_Empty) {
1313 /* Invalid thread to join to. */
1314 vg_threads[tid].m_edx = EINVAL;
1315 vg_threads[tid].status = VgTs_Runnable;
1316 return;
1317 }
1318
1319 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1320 /* Someone already did join on this thread */
1321 vg_threads[tid].m_edx = EINVAL;
1322 vg_threads[tid].status = VgTs_Runnable;
1323 return;
1324 }
1325
1326 /* if (vg_threads[jee].detached) ... */
1327
1328 /* Perhaps the joinee has already finished? If so return
1329 immediately with its return code, and free up the slot. TODO:
1330 free it properly (also above). */
1331 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1332 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1333 vg_threads[tid].m_edx = 0; /* success */
1334 if (thread_return != NULL)
1335 *thread_return = vg_threads[jee].retval;
1336 vg_threads[tid].status = VgTs_Runnable;
1337 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001338 if (VG_(clo_instrument) && jee != 0)
1339 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1340 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001341 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001342 VG_(sprintf)(msg_buf,
1343 "someone called pthread_join() on me; bye!");
1344 print_sched_event(jee, msg_buf);
1345 VG_(sprintf)(msg_buf,
1346 "my pthread_join(%d) returned immediately",
1347 jee );
1348 print_sched_event(tid, msg_buf);
1349 }
1350 return;
1351 }
1352
1353 /* Ok, so we'll have to wait on jee. */
1354 vg_threads[jee].joiner = tid;
1355 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001356 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001357 VG_(sprintf)(msg_buf,
1358 "blocking on call of pthread_join(%d)", jee );
1359 print_sched_event(tid, msg_buf);
1360 }
1361 /* So tid's join call does not return just now. */
1362}
1363
1364
1365static
1366void do_pthread_create ( ThreadId parent_tid,
1367 pthread_t* thread,
1368 pthread_attr_t* attr,
1369 void* (*start_routine)(void *),
1370 void* arg )
1371{
1372 Addr new_stack;
1373 UInt new_stk_szb;
1374 ThreadId tid;
1375 Char msg_buf[100];
1376
1377 /* Paranoia ... */
1378 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1379
1380 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1381
1382 tid = vg_alloc_ThreadState();
1383
1384 /* If we've created the main thread's tid, we're in deep trouble :) */
1385 vg_assert(tid != 0);
1386
1387 /* Copy the parent's CPU state into the child's, in a roundabout
1388 way (via baseBlock). */
1389 VG_(load_thread_state)(parent_tid);
1390 VG_(save_thread_state)(tid);
1391
1392 /* Consider allocating the child a stack, if the one it already has
1393 is inadequate. */
1394 new_stk_szb = PTHREAD_STACK_MIN;
1395
1396 if (new_stk_szb > vg_threads[tid].stack_size) {
1397 /* Again, for good measure :) We definitely don't want to be
1398 allocating a stack for the main thread. */
1399 vg_assert(tid != 0);
1400 /* for now, we don't handle the case of anything other than
1401 assigning it for the first time. */
1402 vg_assert(vg_threads[tid].stack_size == 0);
1403 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1404 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1405 vg_threads[tid].stack_base = new_stack;
1406 vg_threads[tid].stack_size = new_stk_szb;
1407 vg_threads[tid].m_esp
1408 = new_stack + new_stk_szb
1409 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1410 }
1411 if (VG_(clo_instrument))
1412 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1413 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1414
1415 /* push arg */
1416 vg_threads[tid].m_esp -= 4;
1417 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1418
1419 /* push (magical) return address */
1420 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001421 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001422
1423 if (VG_(clo_instrument))
1424 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1425
1426 /* this is where we start */
1427 vg_threads[tid].m_eip = (UInt)start_routine;
1428
sewardj8937c812002-04-12 20:12:20 +00001429 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001430 VG_(sprintf)(msg_buf,
1431 "new thread, created by %d", parent_tid );
1432 print_sched_event(tid, msg_buf);
1433 }
1434
1435 /* store the thread id in *thread. */
1436 // if (VG_(clo_instrument))
1437 // ***** CHECK *thread is writable
1438 *thread = (pthread_t)tid;
1439
1440 /* return zero */
1441 vg_threads[tid].joiner = VG_INVALID_THREADID;
1442 vg_threads[tid].status = VgTs_Runnable;
1443 vg_threads[tid].m_edx = 0; /* success */
1444}
1445
1446
1447/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
1448 is a struct with at least 5 words:
1449 typedef struct
1450 {
1451 int __m_reserved; -- Reserved for future use
1452 int __m_count; -- Depth of recursive locking
1453 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1454 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1455 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1456 } pthread_mutex_t;
1457 Ours is just a single word, an index into vg_mutexes[].
1458 For now I'll park it in the __m_reserved field.
1459
1460 Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
1461 a zero __m_count field (see /usr/include/pthread.h). So I'll
1462 use zero to mean non-inited, and 1 to mean inited.
1463
1464 How convenient.
1465*/
1466
1467static
sewardj8937c812002-04-12 20:12:20 +00001468void initialise_mutex ( ThreadId tid, pthread_mutex_t *mutex )
sewardje663cb92002-04-12 10:26:32 +00001469{
sewardj8937c812002-04-12 20:12:20 +00001470 MutexId mid;
1471 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001472 /* vg_alloc_MutexId aborts if we can't allocate a mutex, for
1473 whatever reason. */
sewardje663cb92002-04-12 10:26:32 +00001474 mid = vg_alloc_VgMutex();
1475 vg_mutexes[mid].in_use = True;
1476 vg_mutexes[mid].held = False;
1477 vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
1478 mutex->__m_reserved = mid;
1479 mutex->__m_count = 1; /* initialised */
sewardj8937c812002-04-12 20:12:20 +00001480 if (VG_(clo_trace_pthread)) {
1481 VG_(sprintf)(msg_buf, "(initialise mutex) (%p) -> %d",
1482 mutex, mid );
1483 print_pthread_event(tid, msg_buf);
1484 }
sewardje663cb92002-04-12 10:26:32 +00001485}
1486
1487/* Allocate a new MutexId and write it into *mutex. Ideally take
1488 notice of the attributes in *mutexattr. */
1489static
1490void do_pthread_mutex_init ( ThreadId tid,
1491 pthread_mutex_t *mutex,
1492 const pthread_mutexattr_t *mutexattr)
1493{
sewardj8937c812002-04-12 20:12:20 +00001494 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001495 /* Paranoia ... */
sewardje663cb92002-04-12 10:26:32 +00001496 vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
1497
sewardj8937c812002-04-12 20:12:20 +00001498 initialise_mutex(tid, mutex);
1499
1500 if (VG_(clo_trace_pthread)) {
1501 VG_(sprintf)(msg_buf, "pthread_mutex_init (%p) -> %d",
1502 mutex, mutex->__m_reserved );
1503 print_pthread_event(tid, msg_buf);
1504 }
1505
sewardje663cb92002-04-12 10:26:32 +00001506 /*
1507 RETURN VALUE
1508 pthread_mutex_init always returns 0. The other mutex functions
1509 return 0 on success and a non-zero error code on error.
1510 */
1511 /* THIS THREAD returns with 0. */
1512 vg_threads[tid].m_edx = 0;
1513}
1514
1515
1516static
1517void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
1518{
1519 MutexId mid;
1520 Char msg_buf[100];
1521
sewardje663cb92002-04-12 10:26:32 +00001522 /* *mutex contains the MutexId, or one of the magic values
1523 PTHREAD_*MUTEX_INITIALIZER*, indicating we need to initialise it
1524 now. See comment(s) above re use of __m_count to indicated
1525 initialisation status.
1526 */
1527
1528 /* POSIX doesn't mandate this, but for sanity ... */
1529 if (mutex == NULL) {
1530 vg_threads[tid].m_edx = EINVAL;
1531 return;
1532 }
1533
1534 if (mutex->__m_count == 0) {
sewardj8937c812002-04-12 20:12:20 +00001535 initialise_mutex(tid, mutex);
sewardje663cb92002-04-12 10:26:32 +00001536 }
1537
1538 mid = mutex->__m_reserved;
1539 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1540 vg_threads[tid].m_edx = EINVAL;
1541 return;
1542 }
1543
sewardj8937c812002-04-12 20:12:20 +00001544 if (VG_(clo_trace_pthread)) {
1545 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p)",
1546 mid, mutex );
1547 print_pthread_event(tid, msg_buf);
1548 }
1549
sewardje663cb92002-04-12 10:26:32 +00001550 /* Assert initialised. */
1551 vg_assert(mutex->__m_count == 1);
1552
1553 /* Assume tid valid. */
1554 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1555
1556 if (vg_mutexes[mid].held) {
1557 if (vg_mutexes[mid].owner == tid) {
1558 vg_threads[tid].m_edx = EDEADLK;
1559 return;
1560 }
1561 /* Someone else has it; we have to wait. */
1562 vg_threads[tid].status = VgTs_WaitMX;
1563 vg_threads[tid].waited_on_mid = mid;
1564 /* No assignment to %EDX, since we're blocking. */
sewardj8937c812002-04-12 20:12:20 +00001565 if (VG_(clo_trace_pthread)) {
1566 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d (%p): BLOCK",
1567 mid, mutex );
1568 print_pthread_event(tid, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001569 }
1570 } else {
1571 /* We get it! */
1572 vg_mutexes[mid].held = True;
1573 vg_mutexes[mid].owner = tid;
1574 /* return 0 (success). */
1575 vg_threads[tid].m_edx = 0;
1576 }
1577}
1578
1579
1580static
1581void do_pthread_mutex_unlock ( ThreadId tid,
1582 pthread_mutex_t *mutex )
1583{
1584 MutexId mid;
1585 Int i;
1586 Char msg_buf[100];
1587
sewardje663cb92002-04-12 10:26:32 +00001588 if (mutex == NULL
1589 || mutex->__m_count != 1) {
1590 vg_threads[tid].m_edx = EINVAL;
1591 return;
1592 }
1593
1594 mid = mutex->__m_reserved;
1595 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1596 vg_threads[tid].m_edx = EINVAL;
1597 return;
1598 }
1599
sewardj8937c812002-04-12 20:12:20 +00001600 if (VG_(clo_trace_pthread)) {
1601 VG_(sprintf)(msg_buf, "pthread_mutex_unlock %d (%p)",
1602 mid, mutex );
1603 print_pthread_event(tid, msg_buf);
1604 }
1605
sewardje663cb92002-04-12 10:26:32 +00001606 /* Assume tid valid */
1607 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1608
1609 /* Barf if we don't currently hold the mutex. */
1610 if (!vg_mutexes[mid].held || vg_mutexes[mid].owner != tid) {
1611 vg_threads[tid].m_edx = EPERM;
1612 return;
1613 }
1614
1615 /* Find some arbitrary thread waiting on this mutex, and make it
1616 runnable. If none are waiting, mark the mutex as not held. */
1617 for (i = 0; i < VG_N_THREADS; i++) {
1618 if (vg_threads[i].status == VgTs_Empty)
1619 continue;
1620 if (vg_threads[i].status == VgTs_WaitMX
1621 && vg_threads[i].waited_on_mid == mid)
1622 break;
1623 }
1624
1625 vg_assert(i <= VG_N_THREADS);
1626 if (i == VG_N_THREADS) {
1627 /* Nobody else is waiting on it. */
1628 vg_mutexes[mid].held = False;
1629 } else {
1630 /* Notionally transfer the hold to thread i, whose
1631 pthread_mutex_lock() call now returns with 0 (success). */
1632 vg_mutexes[mid].owner = i;
1633 vg_threads[i].status = VgTs_Runnable;
1634 vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj8937c812002-04-12 20:12:20 +00001635
1636 if (VG_(clo_trace_pthread)) {
1637 VG_(sprintf)(msg_buf, "pthread_mutex_lock %d: RESUME",
1638 mid );
1639 print_pthread_event(tid, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001640 }
1641 }
1642
1643 /* In either case, our (tid's) pth_unlock() returns with 0
1644 (success). */
1645 vg_threads[tid].m_edx = 0; /* Success. */
1646}
1647
1648
1649static void do_pthread_mutex_destroy ( ThreadId tid,
1650 pthread_mutex_t *mutex )
1651{
sewardj8937c812002-04-12 20:12:20 +00001652 MutexId mid;
1653 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001654
1655 if (mutex == NULL
1656 || mutex->__m_count != 1) {
1657 vg_threads[tid].m_edx = EINVAL;
1658 return;
1659 }
1660
1661 mid = mutex->__m_reserved;
1662 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1663 vg_threads[tid].m_edx = EINVAL;
1664 return;
1665 }
1666
sewardj8937c812002-04-12 20:12:20 +00001667 if (VG_(clo_trace_pthread)) {
1668 VG_(sprintf)(msg_buf, "pthread_mutex_destroy %d (%p)",
1669 mid, mutex );
1670 print_pthread_event(tid, msg_buf);
1671 }
1672
sewardje663cb92002-04-12 10:26:32 +00001673 /* Assume tid valid */
1674 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1675
1676 /* Barf if the mutex is currently held. */
1677 if (vg_mutexes[mid].held) {
1678 vg_threads[tid].m_edx = EBUSY;
1679 return;
1680 }
1681
1682 mutex->__m_count = 0; /* uninitialised */
1683 vg_mutexes[mid].in_use = False;
1684 vg_threads[tid].m_edx = 0;
1685}
1686
1687
sewardj77e466c2002-04-14 02:29:29 +00001688/* vthread tid is returning from a signal handler; modify its
1689 stack/regs accordingly. */
1690static
1691void handle_signal_return ( ThreadId tid )
1692{
1693 Char msg_buf[100];
1694 Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
1695
1696 if (restart_blocked_syscalls)
1697 /* Easy; we don't have to do anything. */
1698 return;
1699
1700 if (vg_threads[tid].status == VgTs_WaitFD) {
1701 vg_assert(vg_threads[tid].m_eax == __NR_read
1702 || vg_threads[tid].m_eax == __NR_write);
1703 /* read() or write() interrupted. Force a return with EINTR. */
1704 vg_threads[tid].m_eax = -VKI_EINTR;
1705 vg_threads[tid].status = VgTs_Runnable;
1706 if (VG_(clo_trace_sched)) {
1707 VG_(sprintf)(msg_buf,
1708 "read() / write() interrupted by signal; return EINTR" );
1709 print_sched_event(tid, msg_buf);
1710 }
1711 return;
1712 }
1713
1714 if (vg_threads[tid].status == VgTs_WaitFD) {
1715 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
1716 /* We interrupted a nanosleep(). The right thing to do is to
1717 write the unused time to nanosleep's second param and return
1718 EINTR, but I'm too lazy for that. */
1719 return;
1720 }
1721
1722 /* All other cases? Just return. */
1723}
1724
1725
sewardje663cb92002-04-12 10:26:32 +00001726/* ---------------------------------------------------------------------
1727 Handle non-trivial client requests.
1728 ------------------------------------------------------------------ */
1729
1730static
1731void do_nontrivial_clientreq ( ThreadId tid )
1732{
1733 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
1734 UInt req_no = arg[0];
1735 switch (req_no) {
1736
1737 case VG_USERREQ__PTHREAD_CREATE:
1738 do_pthread_create( tid,
1739 (pthread_t*)arg[1],
1740 (pthread_attr_t*)arg[2],
1741 (void*(*)(void*))arg[3],
1742 (void*)arg[4] );
1743 break;
1744
sewardjbc5b99f2002-04-13 00:08:51 +00001745 case VG_USERREQ__PTHREAD_RETURNS:
1746 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00001747 break;
1748
1749 case VG_USERREQ__PTHREAD_JOIN:
1750 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
1751 break;
1752
sewardje663cb92002-04-12 10:26:32 +00001753 case VG_USERREQ__PTHREAD_MUTEX_INIT:
1754 do_pthread_mutex_init( tid,
1755 (pthread_mutex_t *)(arg[1]),
1756 (pthread_mutexattr_t *)(arg[2]) );
1757 break;
1758
1759 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
1760 do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
1761 break;
1762
1763 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
1764 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
1765 break;
1766
1767 case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
1768 do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
1769 break;
1770
1771 case VG_USERREQ__PTHREAD_CANCEL:
1772 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
1773 break;
1774
1775 case VG_USERREQ__MAKE_NOACCESS:
1776 case VG_USERREQ__MAKE_WRITABLE:
1777 case VG_USERREQ__MAKE_READABLE:
1778 case VG_USERREQ__DISCARD:
1779 case VG_USERREQ__CHECK_WRITABLE:
1780 case VG_USERREQ__CHECK_READABLE:
1781 case VG_USERREQ__MAKE_NOACCESS_STACK:
1782 case VG_USERREQ__RUNNING_ON_VALGRIND:
1783 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00001784 vg_threads[tid].m_edx
1785 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00001786 break;
1787
sewardj77e466c2002-04-14 02:29:29 +00001788 case VG_USERREQ__SIGNAL_RETURNS:
1789 handle_signal_return(tid);
1790 break;
sewardj54cacf02002-04-12 23:24:59 +00001791
sewardje663cb92002-04-12 10:26:32 +00001792 default:
1793 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
1794 VG_(panic)("handle_private_client_pthread_request: "
1795 "unknown request");
1796 /*NOTREACHED*/
1797 break;
1798 }
1799}
1800
1801
1802/*--------------------------------------------------------------------*/
1803/*--- end vg_scheduler.c ---*/
1804/*--------------------------------------------------------------------*/