blob: c1a26ad2c9a54ca8de7327eb8ce2d9a768aea0dd [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
12 Julian_Seward@muraroa.demon.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file LICENSE.
30*/
31
32#include "vg_include.h"
33#include "vg_constants.h"
34
35#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
36 VG_USERREQ__DO_LEAK_CHECK */
37
38/* BORKAGE as of 11 Apr 02
39
40Note! This implementation is so poor as to not be suitable for use by
41anyone at all!
42
43- properly save scheduler private state in signal delivery frames.
44
45- fd-poll optimisation (don't select with empty sets)
46
47- signals interrupting read/write and nanosleep, and take notice
48 of SA_RESTART or not
49
50- return bogus RA: %EAX trashed, so pthread_joiner gets nonsense
51 exit codes
52
53- when a thread is done mark its stack as noaccess */
54
55
56/* ---------------------------------------------------------------------
57 Types and globals for the scheduler.
58 ------------------------------------------------------------------ */
59
60/* type ThreadId is defined in vg_include.h. */
61
62/* struct ThreadState is defined in vg_include.h. */
63
64/* Private globals. A statically allocated array of threads. */
65static ThreadState vg_threads[VG_N_THREADS];
66
67
68/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
69jmp_buf VG_(scheduler_jmpbuf);
70/* ... and if so, here's the signal which caused it to do so. */
71Int VG_(longjmpd_on_signal);
72
73
74/* Machinery to keep track of which threads are waiting on which
75 fds. */
76typedef
77 struct {
78 /* The thread which made the request. */
79 ThreadId tid;
80
81 /* The next two fields describe the request. */
82 /* File descriptor waited for. -1 means this slot is not in use */
83 Int fd;
84 /* The syscall number the fd is used in. */
85 Int syscall_no;
86
87 /* False => still waiting for select to tell us the fd is ready
88 to go. True => the fd is ready, but the results have not yet
89 been delivered back to the calling thread. Once the latter
90 happens, this entire record is marked as no longer in use, by
91 making the fd field be -1. */
92 Bool ready;
93 }
94 VgWaitedOnFd;
95
96static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
97
98
99
100typedef
101 struct {
102 /* Is this slot in use, or free? */
103 Bool in_use;
104 /* If in_use, is this mutex held by some thread, or not? */
105 Bool held;
106 /* if held==True, owner indicates who by. */
107 ThreadId owner;
108 }
109 VgMutex;
110
111static VgMutex vg_mutexes[VG_N_MUTEXES];
112
113/* Forwards */
114static void do_nontrivial_clientreq ( ThreadId tid );
115
116
117/* ---------------------------------------------------------------------
118 Helper functions for the scheduler.
119 ------------------------------------------------------------------ */
120
121static
122void pp_sched_status ( void )
123{
124 Int i;
125 VG_(printf)("\nsched status:\n");
126 for (i = 0; i < VG_N_THREADS; i++) {
127 if (vg_threads[i].status == VgTs_Empty) continue;
128 VG_(printf)("tid %d: ", i);
129 switch (vg_threads[i].status) {
130 case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
131 case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
132 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
133 vg_threads[i].joiner); break;
134 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
135 default: VG_(printf)("???"); break;
136 }
137 }
138 VG_(printf)("\n");
139}
140
141static
142void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
143{
144 Int i;
145
146 vg_assert(fd != -1); /* avoid total chaos */
147
148 for (i = 0; i < VG_N_WAITING_FDS; i++)
149 if (vg_waiting_fds[i].fd == -1)
150 break;
151
152 if (i == VG_N_WAITING_FDS)
153 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
154 /*
155 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
156 tid, fd, i);
157 */
158 vg_waiting_fds[i].fd = fd;
159 vg_waiting_fds[i].tid = tid;
160 vg_waiting_fds[i].ready = False;
161 vg_waiting_fds[i].syscall_no = syscall_no;
162}
163
164
165
166static
167void print_sched_event ( ThreadId tid, Char* what )
168{
169 if (1)
170 VG_(message)(Vg_DebugMsg, "SCHED[%d]: %s", tid, what );
171}
172
173
174static
175Char* name_of_sched_event ( UInt event )
176{
177 switch (event) {
178 case VG_TRC_EBP_JMP_SPECIAL: return "JMP_SPECIAL";
179 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
180 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
181 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
182 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
183 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
184 default: return "??UNKNOWN??";
185 }
186}
187
188
189/* Create a translation of the client basic block beginning at
190 orig_addr, and add it to the translation cache & translation table.
191 This probably doesn't really belong here, but, hey ...
192*/
193void VG_(create_translation_for) ( Addr orig_addr )
194{
195 Addr trans_addr;
196 TTEntry tte;
197 Int orig_size, trans_size;
198 /* Ensure there is space to hold a translation. */
199 VG_(maybe_do_lru_pass)();
200 VG_(translate)( orig_addr, &orig_size, &trans_addr, &trans_size );
201 /* Copy data at trans_addr into the translation cache.
202 Returned pointer is to the code, not to the 4-byte
203 header. */
204 /* Since the .orig_size and .trans_size fields are
205 UShort, be paranoid. */
206 vg_assert(orig_size > 0 && orig_size < 65536);
207 vg_assert(trans_size > 0 && trans_size < 65536);
208 tte.orig_size = orig_size;
209 tte.orig_addr = orig_addr;
210 tte.trans_size = trans_size;
211 tte.trans_addr = VG_(copy_to_transcache)
212 ( trans_addr, trans_size );
213 tte.mru_epoch = VG_(current_epoch);
214 /* Free the intermediary -- was allocated by VG_(emit_code). */
215 VG_(jitfree)( (void*)trans_addr );
216 /* Add to trans tab and set back pointer. */
217 VG_(add_to_trans_tab) ( &tte );
218 /* Update stats. */
219 VG_(this_epoch_in_count) ++;
220 VG_(this_epoch_in_osize) += orig_size;
221 VG_(this_epoch_in_tsize) += trans_size;
222 VG_(overall_in_count) ++;
223 VG_(overall_in_osize) += orig_size;
224 VG_(overall_in_tsize) += trans_size;
225 /* Record translated area for SMC detection. */
226 VG_(smc_mark_original) ( orig_addr, orig_size );
227}
228
229
230/* Allocate a completely empty ThreadState record. */
231static
232ThreadId vg_alloc_ThreadState ( void )
233{
234 Int i;
235 for (i = 0; i < VG_N_THREADS; i++) {
236 if (vg_threads[i].status == VgTs_Empty)
237 return i;
238 }
239 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
240 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
241 VG_(panic)("VG_N_THREADS is too low");
242 /*NOTREACHED*/
243}
244
245
246ThreadState* VG_(get_thread_state) ( ThreadId tid )
247{
248 vg_assert(tid >= 0 && tid < VG_N_THREADS);
249 vg_assert(vg_threads[tid].status != VgTs_Empty);
250 return & vg_threads[tid];
251}
252
253
254/* Find an unused VgMutex record. */
255static
256MutexId vg_alloc_VgMutex ( void )
257{
258 Int i;
259 for (i = 0; i < VG_N_MUTEXES; i++) {
260 if (!vg_mutexes[i].in_use)
261 return i;
262 }
263 VG_(printf)("vg_alloc_VgMutex: no free slots available\n");
264 VG_(printf)("Increase VG_N_MUTEXES, rebuild and try again.\n");
265 VG_(panic)("VG_N_MUTEXES is too low");
266 /*NOTREACHED*/
267}
268
269
270/* Copy the saved state of a thread into VG_(baseBlock), ready for it
271 to be run. */
272__inline__
273void VG_(load_thread_state) ( ThreadId tid )
274{
275 Int i;
276 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
277 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
278 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
279 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
280 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
281 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
282 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
283 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
284 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
285 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
286
287 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
288 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
289
290 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
291 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
292 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
293 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
294 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
295 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
296 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
297 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
298 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
299}
300
301
302/* Copy the state of a thread from VG_(baseBlock), presumably after it
303 has been descheduled. For sanity-check purposes, fill the vacated
304 VG_(baseBlock) with garbage so as to make the system more likely to
305 fail quickly if we erroneously continue to poke around inside
306 VG_(baseBlock) without first doing a load_thread_state().
307*/
308__inline__
309void VG_(save_thread_state) ( ThreadId tid )
310{
311 Int i;
312 const UInt junk = 0xDEADBEEF;
313
314 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
315 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
316 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
317 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
318 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
319 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
320 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
321 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
322 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
323 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
324
325 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
326 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
327
328 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
329 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
330 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
331 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
332 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
333 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
334 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
335 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
336 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
337
338 /* Fill it up with junk. */
339 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
340 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
341 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
342 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
343 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
344 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
345 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
346 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
347 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
348 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
349
350 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
351 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
352}
353
354
355/* Run the thread tid for a while, and return a VG_TRC_* value to the
356 scheduler indicating what happened. */
357static
358UInt run_thread_for_a_while ( ThreadId tid )
359{
360 UInt trc = 0;
361 vg_assert(tid >= 0 && tid < VG_N_THREADS);
362 vg_assert(vg_threads[tid].status != VgTs_Empty);
363 vg_assert(VG_(bbs_to_go) > 0);
364
365 VG_(load_thread_state) ( tid );
366 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
367 /* try this ... */
368 trc = VG_(run_innerloop)();
369 /* We get here if the client didn't take a fault. */
370 } else {
371 /* We get here if the client took a fault, which caused our
372 signal handler to longjmp. */
373 vg_assert(trc == 0);
374 trc = VG_TRC_UNRESUMABLE_SIGNAL;
375 }
376 VG_(save_thread_state) ( tid );
377 return trc;
378}
379
380
381/* Increment the LRU epoch counter. */
382static
383void increment_epoch ( void )
384{
385 VG_(current_epoch)++;
386 if (VG_(clo_verbosity) > 2) {
387 UInt tt_used, tc_used;
388 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
389 VG_(message)(Vg_UserMsg,
390 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
391 VG_(bbs_done),
392 VG_(this_epoch_in_count),
393 VG_(this_epoch_in_osize),
394 VG_(this_epoch_in_tsize),
395 VG_(this_epoch_out_count),
396 VG_(this_epoch_out_osize),
397 VG_(this_epoch_out_tsize),
398 tt_used, tc_used
399 );
400 }
401 VG_(this_epoch_in_count) = 0;
402 VG_(this_epoch_in_osize) = 0;
403 VG_(this_epoch_in_tsize) = 0;
404 VG_(this_epoch_out_count) = 0;
405 VG_(this_epoch_out_osize) = 0;
406 VG_(this_epoch_out_tsize) = 0;
407}
408
409
410/* Initialise the scheduler. Create a single "main" thread ready to
411 run, with special ThreadId of zero. This is called at startup; the
412 caller takes care to park the client's state is parked in
413 VG_(baseBlock).
414*/
415void VG_(scheduler_init) ( void )
416{
417 Int i;
418 Addr startup_esp;
419 ThreadId tid_main;
420
421 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
422 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
423 VG_(printf)("%esp at startup = %p is not near %p; aborting\n",
424 startup_esp, VG_STARTUP_STACK_MASK);
425 VG_(panic)("unexpected %esp at startup");
426 }
427
428 for (i = 0; i < VG_N_THREADS; i++) {
429 vg_threads[i].stack_size = 0;
430 vg_threads[i].stack_base = (Addr)NULL;
431 }
432
433 for (i = 0; i < VG_N_WAITING_FDS; i++)
434 vg_waiting_fds[i].fd = -1; /* not in use */
435
436 for (i = 0; i < VG_N_MUTEXES; i++)
437 vg_mutexes[i].in_use = False;
438
439 /* Assert this is thread zero, which has certain magic
440 properties. */
441 tid_main = vg_alloc_ThreadState();
442 vg_assert(tid_main == 0);
443
444 vg_threads[tid_main].status = VgTs_Runnable;
445 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
446 vg_threads[tid_main].retval = NULL; /* not important */
447
448 /* Copy VG_(baseBlock) state to tid_main's slot. */
449 VG_(save_thread_state) ( tid_main );
450}
451
452
453/* What if fd isn't a valid fd? */
454static
455void set_fd_nonblocking ( Int fd )
456{
457 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
458 vg_assert(!VG_(is_kerror)(res));
459 res |= VKI_O_NONBLOCK;
460 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
461 vg_assert(!VG_(is_kerror)(res));
462}
463
464static
465void set_fd_blocking ( Int fd )
466{
467 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
468 vg_assert(!VG_(is_kerror)(res));
469 res &= ~VKI_O_NONBLOCK;
470 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
471 vg_assert(!VG_(is_kerror)(res));
472}
473
474static
475Bool fd_is_blockful ( Int fd )
476{
477 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
478 vg_assert(!VG_(is_kerror)(res));
479 return (res & VKI_O_NONBLOCK) ? False : True;
480}
481
482
483
484/* Do a purely thread-local request for tid, and put the result in its
485 %EDX, without changing its scheduling state in any way, nor that of
486 any other threads. Return True if so.
487
488 If the request is non-trivial, return False; a more capable but
489 slower mechanism will deal with it.
490*/
491static
492Bool maybe_do_trivial_clientreq ( ThreadId tid )
493{
494# define SIMPLE_RETURN(vvv) \
495 { vg_threads[tid].m_edx = (vvv); \
496 return True; \
497 }
498
499 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
500 UInt req_no = arg[0];
501 switch (req_no) {
502 case VG_USERREQ__MALLOC:
503 SIMPLE_RETURN(
504 (UInt)VG_(client_malloc) ( arg[1], Vg_AllocMalloc )
505 );
506 case VG_USERREQ__BUILTIN_NEW:
507 SIMPLE_RETURN(
508 (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNew )
509 );
510 case VG_USERREQ__BUILTIN_VEC_NEW:
511 SIMPLE_RETURN(
512 (UInt)VG_(client_malloc) ( arg[1], Vg_AllocNewVec )
513 );
514 case VG_USERREQ__FREE:
515 VG_(client_free) ( (void*)arg[1], Vg_AllocMalloc );
516 SIMPLE_RETURN(0); /* irrelevant */
517 case VG_USERREQ__BUILTIN_DELETE:
518 VG_(client_free) ( (void*)arg[1], Vg_AllocNew );
519 SIMPLE_RETURN(0); /* irrelevant */
520 case VG_USERREQ__BUILTIN_VEC_DELETE:
521 VG_(client_free) ( (void*)arg[1], Vg_AllocNewVec );
522 SIMPLE_RETURN(0); /* irrelevant */
523 case VG_USERREQ__CALLOC:
524 SIMPLE_RETURN(
525 (UInt)VG_(client_calloc) ( arg[1], arg[2] )
526 );
527 case VG_USERREQ__REALLOC:
528 SIMPLE_RETURN(
529 (UInt)VG_(client_realloc) ( (void*)arg[1], arg[2] )
530 );
531 case VG_USERREQ__MEMALIGN:
532 SIMPLE_RETURN(
533 (UInt)VG_(client_memalign) ( arg[1], arg[2] )
534 );
535 default:
536 /* Too hard; wimp out. */
537 return False;
538 }
539# undef SIMPLE_RETURN
540}
541
542
543static
544void sched_do_syscall ( ThreadId tid )
545{
546 UInt saved_eax;
547 UInt res, syscall_no;
548 UInt fd;
549 Bool might_block, assumed_nonblocking;
550 Bool orig_fd_blockness;
551 Char msg_buf[100];
552
553 vg_assert(tid >= 0 && tid < VG_N_THREADS);
554 vg_assert(vg_threads[tid].status == VgTs_Runnable);
555
556 syscall_no = vg_threads[tid].m_eax; /* syscall number */
557
558 if (syscall_no == __NR_nanosleep) {
559 ULong t_now, t_awaken;
560 struct vki_timespec* req;
561 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
562 t_now = VG_(read_microsecond_timer)();
563 t_awaken
564 = t_now
565 + (ULong)1000000ULL * (ULong)(req->tv_sec)
566 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
567 vg_threads[tid].status = VgTs_Sleeping;
568 vg_threads[tid].awaken_at = t_awaken;
569 if (1) {
570 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
571 t_now, t_awaken-t_now);
572 print_sched_event(tid, msg_buf);
573 }
574 /* Force the scheduler to run something else for a while. */
575 return;
576 }
577
578 switch (syscall_no) {
579 case __NR_read:
580 case __NR_write:
581 assumed_nonblocking
582 = False;
583 might_block
584 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
585 break;
586 default:
587 might_block = False;
588 assumed_nonblocking = True;
589 }
590
591 if (assumed_nonblocking) {
592 /* We think it's non-blocking. Just do it in the normal way. */
593 VG_(perform_assumed_nonblocking_syscall)(tid);
594 /* The thread is still runnable. */
595 return;
596 }
597
598 /* It might block. Take evasive action. */
599 switch (syscall_no) {
600 case __NR_read:
601 case __NR_write:
602 fd = vg_threads[tid].m_ebx; break;
603 default:
604 vg_assert(3+3 == 7);
605 }
606
607 /* Set the fd to nonblocking, and do the syscall, which will return
608 immediately, in order to lodge a request with the Linux kernel.
609 We later poll for I/O completion using select(). */
610
611 orig_fd_blockness = fd_is_blockful(fd);
612 set_fd_nonblocking(fd);
613 vg_assert(!fd_is_blockful(fd));
614 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
615
616 /* This trashes the thread's %eax; we have to preserve it. */
617 saved_eax = vg_threads[tid].m_eax;
618 KERNEL_DO_SYSCALL(tid,res);
619
620 /* Restore original blockfulness of the fd. */
621 if (orig_fd_blockness)
622 set_fd_blocking(fd);
623 else
624 set_fd_nonblocking(fd);
625
626 if (res != -VKI_EWOULDBLOCK) {
627 /* It didn't block; it went through immediately. So finish off
628 in the normal way. Don't restore %EAX, since that now
629 (correctly) holds the result of the call. */
630 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
631 /* We're still runnable. */
632 vg_assert(vg_threads[tid].status == VgTs_Runnable);
633
634 } else {
635
636 /* It would have blocked. First, restore %EAX to what it was
637 before our speculative call. */
638 vg_threads[tid].m_eax = saved_eax;
639 /* Put this fd in a table of fds on which we are waiting for
640 completion. The arguments for select() later are constructed
641 from this table. */
642 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
643 /* Deschedule thread until an I/O completion happens. */
644 vg_threads[tid].status = VgTs_WaitFD;
645 if (1) {
646 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
647 print_sched_event(tid, msg_buf);
648 }
649
650 }
651}
652
653
654/* Find out which of the fds in vg_waiting_fds are now ready to go, by
655 making enquiries with select(), and mark them as ready. We have to
656 wait for the requesting threads to fall into the the WaitFD state
657 before we can actually finally deliver the results, so this
658 procedure doesn't do that; complete_blocked_syscalls() does it.
659
660 It might seem odd that a thread which has done a blocking syscall
661 is not in WaitFD state; the way this can happen is if it initially
662 becomes WaitFD, but then a signal is delivered to it, so it becomes
663 Runnable for a while. In this case we have to wait for the
664 sighandler to return, whereupon the WaitFD state is resumed, and
665 only at that point can the I/O result be delivered to it. However,
666 this point may be long after the fd is actually ready.
667
668 So, poll_for_ready_fds() merely detects fds which are ready.
669 complete_blocked_syscalls() does the second half of the trick,
670 possibly much later: it delivers the results from ready fds to
671 threads in WaitFD state.
672*/
673void poll_for_ready_fds ( void )
674{
675 vki_ksigset_t saved_procmask;
676 vki_fd_set readfds;
677 vki_fd_set writefds;
678 vki_fd_set exceptfds;
679 struct vki_timeval timeout;
680 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
681 ThreadId tid;
682 Bool rd_ok, wr_ok, ex_ok;
683 Char msg_buf[100];
684
685 /* Awaken any sleeping threads whose sleep has expired. */
686 {
687 struct vki_timespec * rem;
688 ULong t_now = VG_(read_microsecond_timer)();
689 for (tid = 0; tid < VG_N_THREADS; tid++) {
690 if (vg_threads[tid].status != VgTs_Sleeping)
691 continue;
692 if (t_now >= vg_threads[tid].awaken_at) {
693 /* Resume this thread. Set to zero the remaining-time (second)
694 arg of nanosleep, since it's used up all its time. */
695 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
696 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
697 if (rem != NULL) {
698 rem->tv_sec = 0;
699 rem->tv_nsec = 0;
700 }
701 /* Make the syscall return 0 (success). */
702 vg_threads[tid].m_eax = 0;
703 /* Reschedule this thread. */
704 vg_threads[tid].status = VgTs_Runnable;
705 if (1) {
706 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
707 t_now);
708 print_sched_event(tid, msg_buf);
709 }
710 }
711 }
712 }
713
714 timeout.tv_sec = 0;
715 timeout.tv_usec = 0;
716
717 VKI_FD_ZERO(&readfds);
718 VKI_FD_ZERO(&writefds);
719 VKI_FD_ZERO(&exceptfds);
720 fd_max = -1;
721 for (i = 0; i < VG_N_WAITING_FDS; i++) {
722 if (vg_waiting_fds[i].fd == -1 /* not in use */)
723 continue;
724 if (vg_waiting_fds[i].ready /* already ready? */)
725 continue;
726 fd = vg_waiting_fds[i].fd;
727 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
728 if (fd > fd_max)
729 fd_max = fd;
730 tid = vg_waiting_fds[i].tid;
731 vg_assert(tid >= 0 && tid < VG_N_THREADS);
732 syscall_no = vg_waiting_fds[i].syscall_no;
733 switch (syscall_no) {
734 case __NR_read:
735 VKI_FD_SET(fd, &readfds); break;
736 case __NR_write:
737 VKI_FD_SET(fd, &writefds); break;
738 default:
739 VG_(panic)("poll_for_ready_fds: unexpected syscall");
740 /*NOTREACHED*/
741 break;
742 }
743 }
744
745 /* BLOCK ALL SIGNALS. We don't want the complication of select()
746 getting interrupted. */
747 VG_(block_all_host_signals)( &saved_procmask );
748
749 n_ready = VG_(select)
750 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
751 if (VG_(is_kerror)(n_ready)) {
752 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
753 VG_(panic)("poll_for_ready_fds: select failed?!");
754 /*NOTREACHED*/
755 }
756
757 /* UNBLOCK ALL SIGNALS */
758 VG_(restore_host_signals)( &saved_procmask );
759
760 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
761
762 if (n_ready == 0)
763 return;
764
765 /* Inspect all the fds we know about, and handle any completions that
766 have happened. */
767 /*
768 VG_(printf)("\n\n");
769 for (fd = 0; fd < 100; fd++)
770 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
771 VG_(printf)("X"); } else { VG_(printf)("."); };
772 VG_(printf)("\n\nfd_max = %d\n", fd_max);
773 */
774
775 for (fd = 0; fd <= fd_max; fd++) {
776 rd_ok = VKI_FD_ISSET(fd, &readfds);
777 wr_ok = VKI_FD_ISSET(fd, &writefds);
778 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
779
780 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
781 if (n_ok == 0)
782 continue;
783 if (n_ok > 1) {
784 VG_(printf)("offending fd = %d\n", fd);
785 VG_(panic)("poll_for_ready_fds: multiple events on fd");
786 }
787
788 /* An I/O event completed for fd. Find the thread which
789 requested this. */
790 for (i = 0; i < VG_N_WAITING_FDS; i++) {
791 if (vg_waiting_fds[i].fd == -1 /* not in use */)
792 continue;
793 if (vg_waiting_fds[i].fd == fd)
794 break;
795 }
796
797 /* And a bit more paranoia ... */
798 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
799
800 /* Mark the fd as ready. */
801 vg_assert(! vg_waiting_fds[i].ready);
802 vg_waiting_fds[i].ready = True;
803 }
804}
805
806
807/* See comment attached to poll_for_ready_fds() for explaination. */
808void complete_blocked_syscalls ( void )
809{
810 Int fd, i, res, syscall_no;
811 ThreadId tid;
812 Char msg_buf[100];
813
814 /* Inspect all the outstanding fds we know about. */
815
816 for (i = 0; i < VG_N_WAITING_FDS; i++) {
817 if (vg_waiting_fds[i].fd == -1 /* not in use */)
818 continue;
819 if (! vg_waiting_fds[i].ready)
820 continue;
821
822 fd = vg_waiting_fds[i].fd;
823 tid = vg_waiting_fds[i].tid;
824 vg_assert(tid >= 0 && tid < VG_N_THREADS);
825
826 /* The thread actually has to be waiting for the I/O event it
827 requested before we can deliver the result! */
828 if (vg_threads[tid].status != VgTs_WaitFD)
829 continue;
830
831 /* Ok, actually do it! We can safely use %EAX as the syscall
832 number, because the speculative call made by
833 sched_do_syscall() doesn't change %EAX in the case where the
834 call would have blocked. */
835
836 syscall_no = vg_waiting_fds[i].syscall_no;
837 vg_assert(syscall_no == vg_threads[tid].m_eax);
838 KERNEL_DO_SYSCALL(tid,res);
839 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
840
841 /* Reschedule. */
842 vg_threads[tid].status = VgTs_Runnable;
843 /* Mark slot as no longer in use. */
844 vg_waiting_fds[i].fd = -1;
845 /* pp_sched_status(); */
846 if (1) {
847 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
848 print_sched_event(tid, msg_buf);
849 }
850 }
851}
852
853
854static
855void nanosleep_for_a_while ( void )
856{
857 Int res;
858 struct vki_timespec req;
859 struct vki_timespec rem;
860 req.tv_sec = 0;
861 req.tv_nsec = 20 * 1000 * 1000;
862 res = VG_(nanosleep)( &req, &rem );
863 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
864 vg_assert(res == 0);
865}
866
867
868/* ---------------------------------------------------------------------
869 The scheduler proper.
870 ------------------------------------------------------------------ */
871
872/* Run user-space threads until either
873 * Deadlock occurs
874 * One thread asks to shutdown Valgrind
875 * The specified number of basic blocks has gone by.
876*/
877VgSchedReturnCode VG_(scheduler) ( void )
878{
879 ThreadId tid, tid_next;
880 UInt trc;
881 UInt dispatch_ctr_SAVED;
882 Int done_this_time, n_in_fdwait;
883 Char msg_buf[100];
884 Addr trans_addr;
885
886 /* For the LRU structures, records when the epoch began. */
887 ULong lru_epoch_started_at = 0;
888
889 /* Start with the root thread. tid in general indicates the
890 currently runnable/just-finished-running thread. */
891 tid = 0;
892
893 /* This is the top level scheduler loop. It falls into three
894 phases. */
895 while (True) {
896
897 /* ======================= Phase 1 of 3 =======================
898 Handle I/O completions and signals. This may change the
899 status of various threads. Then select a new thread to run,
900 or declare deadlock, or sleep if there are no runnable
901 threads but some are blocked on I/O. */
902
903 /* Age the LRU structures if an epoch has been completed. */
904 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
905 lru_epoch_started_at = VG_(bbs_done);
906 increment_epoch();
907 }
908
909 /* Was a debug-stop requested? */
910 if (VG_(bbs_to_go) == 0)
911 goto debug_stop;
912
913 /* Do the following loop until a runnable thread is found, or
914 deadlock is detected. */
915 while (True) {
916
917 /* For stats purposes only. */
918 VG_(num_scheduling_events_MAJOR) ++;
919
920 /* See if any I/O operations which we were waiting for have
921 completed, and, if so, make runnable the relevant waiting
922 threads. */
923 poll_for_ready_fds();
924 complete_blocked_syscalls();
925
926 /* See if there are any signals which need to be delivered. If
927 so, choose thread(s) to deliver them to, and build signal
928 delivery frames on those thread(s) stacks. */
929 VG_(deliver_signals)( 0 /*HACK*/ );
930 VG_(do_sanity_checks)(0 /*HACK*/, False);
931
932 /* Try and find a thread (tid) to run. */
933 tid_next = tid;
934 n_in_fdwait = 0;
935 while (True) {
936 tid_next++;
937 if (tid_next >= VG_N_THREADS) tid_next = 0;
938 if (vg_threads[tid_next].status == VgTs_WaitFD)
939 n_in_fdwait ++;
940 if (vg_threads[tid_next].status == VgTs_Runnable)
941 break; /* We can run this one. */
942 if (tid_next == tid)
943 break; /* been all the way round */
944 }
945 tid = tid_next;
946
947 if (vg_threads[tid].status == VgTs_Runnable) {
948 /* Found a suitable candidate. Fall out of this loop, so
949 we can advance to stage 2 of the scheduler: actually
950 running the thread. */
951 break;
952 }
953
954 /* We didn't find a runnable thread. Now what? */
955 if (n_in_fdwait == 0) {
956 /* No runnable threads and non in fd-wait either. Not
957 good. */
958 pp_sched_status();
959 return VgSrc_Deadlock;
960 }
961
962 /* At least one thread is in a fd-wait state. Delay for a
963 while, and go round again, in the hope that eventually a
964 thread becomes runnable. */
965 nanosleep_for_a_while();
966 // pp_sched_status();
967 // VG_(printf)(".\n");
968 }
969
970
971 /* ======================= Phase 2 of 3 =======================
972 Wahey! We've finally decided that thread tid is runnable, so
973 we now do that. Run it for as much of a quanta as possible.
974 Trivial requests are handled and the thread continues. The
975 aim is not to do too many of Phase 1 since it is expensive. */
976
977 if (0)
978 VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
979
980 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
981 that it decrements the counter before testing it for zero, so
982 that if VG_(dispatch_ctr) is set to N you get at most N-1
983 iterations. Also this means that VG_(dispatch_ctr) must
984 exceed zero before entering the innerloop. Also also, the
985 decrement is done before the bb is actually run, so you
986 always get at least one decrement even if nothing happens.
987 */
988 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
989 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
990 else
991 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
992
993 /* ... and remember what we asked for. */
994 dispatch_ctr_SAVED = VG_(dispatch_ctr);
995
996 /* Actually run thread tid. */
997 while (True) {
998
999 /* For stats purposes only. */
1000 VG_(num_scheduling_events_MINOR) ++;
1001
1002 if (0)
1003 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1004 tid, VG_(dispatch_ctr) - 1 );
1005
1006 trc = run_thread_for_a_while ( tid );
1007
1008 /* Deal quickly with trivial scheduling events, and resume the
1009 thread. */
1010
1011 if (trc == VG_TRC_INNER_FASTMISS) {
1012 vg_assert(VG_(dispatch_ctr) > 0);
1013
1014 /* Trivial event. Miss in the fast-cache. Do a full
1015 lookup for it. */
1016 trans_addr
1017 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1018 if (trans_addr == (Addr)0) {
1019 /* Not found; we need to request a translation. */
1020 VG_(create_translation_for)( vg_threads[tid].m_eip );
1021 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1022 if (trans_addr == (Addr)0)
1023 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1024 }
1025 continue; /* with this thread */
1026 }
1027
1028 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1029 Bool is_triv = maybe_do_trivial_clientreq(tid);
1030 if (is_triv) {
1031 /* NOTE: a trivial request is something like a call to
1032 malloc() or free(). It DOES NOT change the
1033 Runnability of this thread nor the status of any
1034 other thread; it is purely thread-local. */
1035 continue; /* with this thread */
1036 }
1037 }
1038
1039 /* It's a non-trivial event. Give up running this thread and
1040 handle things the expensive way. */
1041 break;
1042 }
1043
1044 /* ======================= Phase 3 of 3 =======================
1045 Handle non-trivial thread requests, mostly pthread stuff. */
1046
1047 /* Ok, we've fallen out of the dispatcher for a
1048 non-completely-trivial reason. First, update basic-block
1049 counters. */
1050
1051 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1052 vg_assert(done_this_time >= 0);
1053 VG_(bbs_to_go) -= (ULong)done_this_time;
1054 VG_(bbs_done) += (ULong)done_this_time;
1055
1056 if (0 && trc != VG_TRC_INNER_FASTMISS)
1057 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1058 tid, done_this_time, (Int)trc );
1059
1060 if (0 && trc != VG_TRC_INNER_FASTMISS)
1061 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1062 tid, VG_(bbs_done),
1063 name_of_sched_event(trc) );
1064
1065 /* Examine the thread's return code to figure out why it
1066 stopped, and handle requests. */
1067
1068 switch (trc) {
1069
1070 case VG_TRC_INNER_FASTMISS:
1071 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1072 /*NOTREACHED*/
1073 break;
1074
1075 case VG_TRC_INNER_COUNTERZERO:
1076 /* Timeslice is out. Let a new thread be scheduled,
1077 simply by doing nothing, causing us to arrive back at
1078 Phase 1. */
1079 if (VG_(bbs_to_go) == 0) {
1080 goto debug_stop;
1081 }
1082 vg_assert(VG_(dispatch_ctr) == 0);
1083 break;
1084
1085 case VG_TRC_UNRESUMABLE_SIGNAL:
1086 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1087 away. Again, do nothing, so we wind up back at Phase
1088 1, whereupon the signal will be "delivered". */
1089 break;
1090
1091 case VG_TRC_EBP_JMP_SPECIAL: {
1092 Addr next_eip = vg_threads[tid].m_eip;
1093 if (next_eip == (Addr) & VG_(signalreturn_bogusRA)) {
1094 /* vthread tid is returning from a signal handler;
1095 modify its stack/regs accordingly. */
1096 VG_(signal_returns)(tid);
1097 }
1098 else
1099 if (next_eip == (Addr) & VG_(shutdown)) {
1100 return VgSrc_Shutdown;
1101 } else {
1102 VG_(panic)("vg_schedule: VG_TRC_EBP_JMP_SPECIAL");
1103 }
1104 break;
1105 }
1106
1107 case VG_TRC_EBP_JMP_SYSCALL:
1108 /* Do a syscall for the vthread tid. This could cause it
1109 to become non-runnable. */
1110 sched_do_syscall(tid);
1111 break;
1112
1113 case VG_TRC_EBP_JMP_CLIENTREQ:
1114 /* Do a client request for the vthread tid. Note that
1115 some requests will have been handled by
1116 maybe_do_trivial_clientreq(), so we don't expect to see
1117 those here.
1118 */
1119 if (0) {
1120 VG_(sprintf)(msg_buf, "request 0x%x",
1121 vg_threads[tid].m_eax);
1122 print_sched_event(tid, msg_buf);
1123 }
1124 /* Do a non-trivial client request for thread tid. tid's
1125 %EAX points to a short vector of argument words, the
1126 first of which is the request code. The result of the
1127 request is put in tid's %EDX. Alternatively, perhaps
1128 the request causes tid to become non-runnable and/or
1129 other blocked threads become runnable. In general we
1130 can and often do mess with the state of arbitrary
1131 threads at this point. */
1132 do_nontrivial_clientreq(tid);
1133 break;
1134
1135 default:
1136 VG_(printf)("\ntrc = %d\n", trc);
1137 VG_(panic)("VG_(scheduler), phase 3: "
1138 "unexpected thread return code");
1139 /* NOTREACHED */
1140 break;
1141
1142 } /* switch (trc) */
1143
1144 /* That completes Phase 3 of 3. Return now to the top of the
1145 main scheduler loop, to Phase 1 of 3. */
1146
1147 } /* top-level scheduler loop */
1148
1149
1150 /* NOTREACHED */
1151 VG_(panic)("scheduler: post-main-loop ?!");
1152 /* NOTREACHED */
1153
1154 debug_stop:
1155 /* If we exited because of a debug stop, print the translation
1156 of the last block executed -- by translating it again, and
1157 throwing away the result. */
1158 VG_(printf)(
1159 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
1160 VG_(translate)( vg_threads[tid].m_eip, NULL, NULL, NULL );
1161 VG_(printf)("\n");
1162 VG_(printf)(
1163 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1164
1165 return VgSrc_BbsDone;
1166}
1167
1168
1169/* ---------------------------------------------------------------------
1170 The pthread implementation.
1171 ------------------------------------------------------------------ */
1172
1173#include <pthread.h>
1174#include <errno.h>
1175
1176#if !defined(PTHREAD_STACK_MIN)
1177# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1178#endif
1179
1180/* /usr/include/bits/pthreadtypes.h:
1181 typedef unsigned long int pthread_t;
1182*/
1183
1184/* RUNS ON SIMD CPU!
1185 This is the return address that pthread_create uses.
1186*/
1187static
1188void do_pthread_create_bogusRA ( void )
1189{
1190 /* Tell the scheduler that this thread has returned. */
1191 Int res;
1192 VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
1193 VG_USERREQ__PTHREAD_CREATE_BOGUSRA,
1194 0, 0, 0, 0);
1195 VG_(panic)("do_pthread_create_bogusRA: shouldn't be still alive!");
1196}
1197
1198
1199static
1200void do_pthread_cancel ( ThreadId tid_canceller,
1201 pthread_t tid_cancellee )
1202{
1203 Char msg_buf[100];
1204 /* We want make is appear that this thread has returned to
1205 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1206 return value. So: simple: put PTHREAD_CANCELED into %EAX
1207 and &do_pthread_create_bogusRA into %EIP and keep going! */
1208 if (1) {
1209 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1210 print_sched_event(tid_cancellee, msg_buf);
1211 }
1212 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
1213 vg_threads[tid_cancellee].m_eip = (UInt)&do_pthread_create_bogusRA;
1214 vg_threads[tid_cancellee].status = VgTs_Runnable;
1215}
1216
1217
1218
1219/* Thread tid is exiting, by returning from the function it was
1220 created with. The main complication here is to resume any thread
1221 waiting to join with this one. */
1222static
1223void do_pthread_create_exit_by_returning ( ThreadId tid )
1224{
1225 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1226 UInt* jnr_args;
1227 void** jnr_thread_return;
1228 Char msg_buf[100];
1229
1230 /* Mark it as not in use. Leave the stack in place so the next
1231 user of this slot doesn't reallocate it. */
1232 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1233 vg_assert(vg_threads[tid].status != VgTs_Empty);
1234
1235 vg_threads[tid].retval = (void*)vg_threads[tid].m_eax;
1236
1237 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1238 /* No one has yet done a join on me */
1239 vg_threads[tid].status = VgTs_WaitJoiner;
1240 if (1) {
1241 VG_(sprintf)(msg_buf,
1242 "root fn returns, waiting for a call pthread_join(%d)",
1243 tid);
1244 print_sched_event(tid, msg_buf);
1245 }
1246 } else {
1247 /* Some is waiting; make their join call return with success,
1248 putting my exit code in the place specified by the caller's
1249 thread_return param. This is all very horrible, since we
1250 need to consult the joiner's arg block -- pointed to by its
1251 %EAX -- in order to extract the 2nd param of its pthread_join
1252 call. TODO: free properly the slot (also below).
1253 */
1254 jnr = vg_threads[tid].joiner;
1255 vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
1256 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1257 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1258 jnr_thread_return = (void**)(jnr_args[2]);
1259 if (jnr_thread_return != NULL)
1260 *jnr_thread_return = vg_threads[tid].retval;
1261 vg_threads[jnr].m_edx = 0; /* success */
1262 vg_threads[jnr].status = VgTs_Runnable;
1263 vg_threads[tid].status = VgTs_Empty; /* bye! */
1264 if (1) {
1265 VG_(sprintf)(msg_buf,
1266 "root fn returns, to find a waiting pthread_join(%d)", tid);
1267 print_sched_event(tid, msg_buf);
1268 VG_(sprintf)(msg_buf,
1269 "my pthread_join(%d) returned; resuming", tid);
1270 print_sched_event(jnr, msg_buf);
1271 }
1272 }
1273
1274 /* Return value is irrelevant; this thread will not get
1275 rescheduled. */
1276}
1277
1278
1279static
1280void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1281{
1282 Char msg_buf[100];
1283
1284 /* jee, the joinee, is the thread specified as an arg in thread
1285 tid's call to pthread_join. So tid is the join-er. */
1286 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1287 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1288
1289 if (jee == tid) {
1290 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1291 vg_threads[tid].status = VgTs_Runnable;
1292 return;
1293 }
1294
1295 if (jee < 0
1296 || jee >= VG_N_THREADS
1297 || vg_threads[jee].status == VgTs_Empty) {
1298 /* Invalid thread to join to. */
1299 vg_threads[tid].m_edx = EINVAL;
1300 vg_threads[tid].status = VgTs_Runnable;
1301 return;
1302 }
1303
1304 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1305 /* Someone already did join on this thread */
1306 vg_threads[tid].m_edx = EINVAL;
1307 vg_threads[tid].status = VgTs_Runnable;
1308 return;
1309 }
1310
1311 /* if (vg_threads[jee].detached) ... */
1312
1313 /* Perhaps the joinee has already finished? If so return
1314 immediately with its return code, and free up the slot. TODO:
1315 free it properly (also above). */
1316 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1317 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1318 vg_threads[tid].m_edx = 0; /* success */
1319 if (thread_return != NULL)
1320 *thread_return = vg_threads[jee].retval;
1321 vg_threads[tid].status = VgTs_Runnable;
1322 vg_threads[jee].status = VgTs_Empty; /* bye! */
1323 if (1) {
1324 VG_(sprintf)(msg_buf,
1325 "someone called pthread_join() on me; bye!");
1326 print_sched_event(jee, msg_buf);
1327 VG_(sprintf)(msg_buf,
1328 "my pthread_join(%d) returned immediately",
1329 jee );
1330 print_sched_event(tid, msg_buf);
1331 }
1332 return;
1333 }
1334
1335 /* Ok, so we'll have to wait on jee. */
1336 vg_threads[jee].joiner = tid;
1337 vg_threads[tid].status = VgTs_WaitJoinee;
1338 if (1) {
1339 VG_(sprintf)(msg_buf,
1340 "blocking on call of pthread_join(%d)", jee );
1341 print_sched_event(tid, msg_buf);
1342 }
1343 /* So tid's join call does not return just now. */
1344}
1345
1346
1347static
1348void do_pthread_create ( ThreadId parent_tid,
1349 pthread_t* thread,
1350 pthread_attr_t* attr,
1351 void* (*start_routine)(void *),
1352 void* arg )
1353{
1354 Addr new_stack;
1355 UInt new_stk_szb;
1356 ThreadId tid;
1357 Char msg_buf[100];
1358
1359 /* Paranoia ... */
1360 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1361
1362 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1363
1364 tid = vg_alloc_ThreadState();
1365
1366 /* If we've created the main thread's tid, we're in deep trouble :) */
1367 vg_assert(tid != 0);
1368
1369 /* Copy the parent's CPU state into the child's, in a roundabout
1370 way (via baseBlock). */
1371 VG_(load_thread_state)(parent_tid);
1372 VG_(save_thread_state)(tid);
1373
1374 /* Consider allocating the child a stack, if the one it already has
1375 is inadequate. */
1376 new_stk_szb = PTHREAD_STACK_MIN;
1377
1378 if (new_stk_szb > vg_threads[tid].stack_size) {
1379 /* Again, for good measure :) We definitely don't want to be
1380 allocating a stack for the main thread. */
1381 vg_assert(tid != 0);
1382 /* for now, we don't handle the case of anything other than
1383 assigning it for the first time. */
1384 vg_assert(vg_threads[tid].stack_size == 0);
1385 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1386 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1387 vg_threads[tid].stack_base = new_stack;
1388 vg_threads[tid].stack_size = new_stk_szb;
1389 vg_threads[tid].m_esp
1390 = new_stack + new_stk_szb
1391 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1392 }
1393 if (VG_(clo_instrument))
1394 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1395 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1396
1397 /* push arg */
1398 vg_threads[tid].m_esp -= 4;
1399 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1400
1401 /* push (magical) return address */
1402 vg_threads[tid].m_esp -= 4;
1403 * (UInt*)(vg_threads[tid].m_esp) = (UInt)do_pthread_create_bogusRA;
1404
1405 if (VG_(clo_instrument))
1406 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1407
1408 /* this is where we start */
1409 vg_threads[tid].m_eip = (UInt)start_routine;
1410
1411 if (1) {
1412 VG_(sprintf)(msg_buf,
1413 "new thread, created by %d", parent_tid );
1414 print_sched_event(tid, msg_buf);
1415 }
1416
1417 /* store the thread id in *thread. */
1418 // if (VG_(clo_instrument))
1419 // ***** CHECK *thread is writable
1420 *thread = (pthread_t)tid;
1421
1422 /* return zero */
1423 vg_threads[tid].joiner = VG_INVALID_THREADID;
1424 vg_threads[tid].status = VgTs_Runnable;
1425 vg_threads[tid].m_edx = 0; /* success */
1426}
1427
1428
1429/* Horrible hacks to do with pthread_mutex_t: the real pthread_mutex_t
1430 is a struct with at least 5 words:
1431 typedef struct
1432 {
1433 int __m_reserved; -- Reserved for future use
1434 int __m_count; -- Depth of recursive locking
1435 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1436 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1437 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1438 } pthread_mutex_t;
1439 Ours is just a single word, an index into vg_mutexes[].
1440 For now I'll park it in the __m_reserved field.
1441
1442 Uninitialised mutexes (PTHREAD_MUTEX_INITIALIZER) all have
1443 a zero __m_count field (see /usr/include/pthread.h). So I'll
1444 use zero to mean non-inited, and 1 to mean inited.
1445
1446 How convenient.
1447*/
1448
1449static
1450void initialise_mutex ( pthread_mutex_t *mutex )
1451{
1452 MutexId mid;
1453 /* vg_alloc_MutexId aborts if we can't allocate a mutex, for
1454 whatever reason. */
1455VG_(printf)("initialise_mutex %p\n", mutex);
1456 mid = vg_alloc_VgMutex();
1457 vg_mutexes[mid].in_use = True;
1458 vg_mutexes[mid].held = False;
1459 vg_mutexes[mid].owner = VG_INVALID_THREADID; /* irrelevant */
1460 mutex->__m_reserved = mid;
1461 mutex->__m_count = 1; /* initialised */
1462}
1463
1464/* Allocate a new MutexId and write it into *mutex. Ideally take
1465 notice of the attributes in *mutexattr. */
1466static
1467void do_pthread_mutex_init ( ThreadId tid,
1468 pthread_mutex_t *mutex,
1469 const pthread_mutexattr_t *mutexattr)
1470{
1471 /* Paranoia ... */
1472VG_(printf)("mutex_init %d %p %p\n", tid, mutex, mutexattr);
1473
1474 vg_assert(sizeof(pthread_mutex_t) >= sizeof(UInt));
1475
1476 initialise_mutex(mutex);
1477 /*
1478 RETURN VALUE
1479 pthread_mutex_init always returns 0. The other mutex functions
1480 return 0 on success and a non-zero error code on error.
1481 */
1482 /* THIS THREAD returns with 0. */
1483 vg_threads[tid].m_edx = 0;
1484}
1485
1486
1487static
1488void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
1489{
1490 MutexId mid;
1491 Char msg_buf[100];
1492
1493VG_(printf)("mutex_lock %d %p\n", tid, mutex);
1494
1495 /* *mutex contains the MutexId, or one of the magic values
1496 PTHREAD_*MUTEX_INITIALIZER*, indicating we need to initialise it
1497 now. See comment(s) above re use of __m_count to indicated
1498 initialisation status.
1499 */
1500
1501 /* POSIX doesn't mandate this, but for sanity ... */
1502 if (mutex == NULL) {
1503 vg_threads[tid].m_edx = EINVAL;
1504 return;
1505 }
1506
1507 if (mutex->__m_count == 0) {
1508 initialise_mutex(mutex);
1509 }
1510
1511 mid = mutex->__m_reserved;
1512 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1513 vg_threads[tid].m_edx = EINVAL;
1514 return;
1515 }
1516
1517 /* Assert initialised. */
1518 vg_assert(mutex->__m_count == 1);
1519
1520 /* Assume tid valid. */
1521 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1522
1523 if (vg_mutexes[mid].held) {
1524 if (vg_mutexes[mid].owner == tid) {
1525 vg_threads[tid].m_edx = EDEADLK;
1526 return;
1527 }
1528 /* Someone else has it; we have to wait. */
1529 vg_threads[tid].status = VgTs_WaitMX;
1530 vg_threads[tid].waited_on_mid = mid;
1531 /* No assignment to %EDX, since we're blocking. */
1532 if (1) {
1533 VG_(sprintf)(msg_buf, "wait for mutex %d", mid );
1534 print_sched_event(tid, msg_buf);
1535 }
1536 } else {
1537 /* We get it! */
1538 vg_mutexes[mid].held = True;
1539 vg_mutexes[mid].owner = tid;
1540 /* return 0 (success). */
1541 vg_threads[tid].m_edx = 0;
1542 }
1543}
1544
1545
1546static
1547void do_pthread_mutex_unlock ( ThreadId tid,
1548 pthread_mutex_t *mutex )
1549{
1550 MutexId mid;
1551 Int i;
1552 Char msg_buf[100];
1553
1554VG_(printf)("mutex_unlock %d %p\n", tid, mutex);
1555
1556 if (mutex == NULL
1557 || mutex->__m_count != 1) {
1558 vg_threads[tid].m_edx = EINVAL;
1559 return;
1560 }
1561
1562 mid = mutex->__m_reserved;
1563 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1564 vg_threads[tid].m_edx = EINVAL;
1565 return;
1566 }
1567
1568 /* Assume tid valid */
1569 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1570
1571 /* Barf if we don't currently hold the mutex. */
1572 if (!vg_mutexes[mid].held || vg_mutexes[mid].owner != tid) {
1573 vg_threads[tid].m_edx = EPERM;
1574 return;
1575 }
1576
1577 /* Find some arbitrary thread waiting on this mutex, and make it
1578 runnable. If none are waiting, mark the mutex as not held. */
1579 for (i = 0; i < VG_N_THREADS; i++) {
1580 if (vg_threads[i].status == VgTs_Empty)
1581 continue;
1582 if (vg_threads[i].status == VgTs_WaitMX
1583 && vg_threads[i].waited_on_mid == mid)
1584 break;
1585 }
1586
1587 vg_assert(i <= VG_N_THREADS);
1588 if (i == VG_N_THREADS) {
1589 /* Nobody else is waiting on it. */
1590 vg_mutexes[mid].held = False;
1591 } else {
1592 /* Notionally transfer the hold to thread i, whose
1593 pthread_mutex_lock() call now returns with 0 (success). */
1594 vg_mutexes[mid].owner = i;
1595 vg_threads[i].status = VgTs_Runnable;
1596 vg_threads[i].m_edx = 0; /* pth_lock() success */
1597 if (1) {
1598 VG_(sprintf)(msg_buf, "acquire mutex %d, resume", mid );
1599 print_sched_event(tid, msg_buf);
1600 }
1601 }
1602
1603 /* In either case, our (tid's) pth_unlock() returns with 0
1604 (success). */
1605 vg_threads[tid].m_edx = 0; /* Success. */
1606}
1607
1608
1609static void do_pthread_mutex_destroy ( ThreadId tid,
1610 pthread_mutex_t *mutex )
1611{
1612 MutexId mid;
1613
1614VG_(printf)("mutex_destroy %d %p\n", tid, mutex);
1615
1616 if (mutex == NULL
1617 || mutex->__m_count != 1) {
1618 vg_threads[tid].m_edx = EINVAL;
1619 return;
1620 }
1621
1622 mid = mutex->__m_reserved;
1623 if (mid < 0 || mid >= VG_N_MUTEXES || !vg_mutexes[mid].in_use) {
1624 vg_threads[tid].m_edx = EINVAL;
1625 return;
1626 }
1627
1628 /* Assume tid valid */
1629 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1630
1631 /* Barf if the mutex is currently held. */
1632 if (vg_mutexes[mid].held) {
1633 vg_threads[tid].m_edx = EBUSY;
1634 return;
1635 }
1636
1637 mutex->__m_count = 0; /* uninitialised */
1638 vg_mutexes[mid].in_use = False;
1639 vg_threads[tid].m_edx = 0;
1640}
1641
1642
1643/* ---------------------------------------------------------------------
1644 Handle non-trivial client requests.
1645 ------------------------------------------------------------------ */
1646
1647static
1648void do_nontrivial_clientreq ( ThreadId tid )
1649{
1650 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
1651 UInt req_no = arg[0];
1652 switch (req_no) {
1653
1654 case VG_USERREQ__PTHREAD_CREATE:
1655 do_pthread_create( tid,
1656 (pthread_t*)arg[1],
1657 (pthread_attr_t*)arg[2],
1658 (void*(*)(void*))arg[3],
1659 (void*)arg[4] );
1660 break;
1661
1662 case VG_USERREQ__PTHREAD_CREATE_BOGUSRA:
1663 do_pthread_create_exit_by_returning( tid );
1664 break;
1665
1666 case VG_USERREQ__PTHREAD_JOIN:
1667 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
1668 break;
1669
1670 /* Sigh ... this probably will cause huge numbers of major
1671 (expensive) scheduling events, for no real reason.
1672 Perhaps should be classified as a trivial-request. */
1673 case VG_USERREQ__PTHREAD_GET_THREADID:
1674 vg_threads[tid].m_edx = tid;
1675 break;
1676
1677 case VG_USERREQ__PTHREAD_MUTEX_INIT:
1678 do_pthread_mutex_init( tid,
1679 (pthread_mutex_t *)(arg[1]),
1680 (pthread_mutexattr_t *)(arg[2]) );
1681 break;
1682
1683 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
1684 do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
1685 break;
1686
1687 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
1688 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
1689 break;
1690
1691 case VG_USERREQ__PTHREAD_MUTEX_DESTROY:
1692 do_pthread_mutex_destroy( tid, (pthread_mutex_t *)(arg[1]) );
1693 break;
1694
1695 case VG_USERREQ__PTHREAD_CANCEL:
1696 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
1697 break;
1698
1699 case VG_USERREQ__MAKE_NOACCESS:
1700 case VG_USERREQ__MAKE_WRITABLE:
1701 case VG_USERREQ__MAKE_READABLE:
1702 case VG_USERREQ__DISCARD:
1703 case VG_USERREQ__CHECK_WRITABLE:
1704 case VG_USERREQ__CHECK_READABLE:
1705 case VG_USERREQ__MAKE_NOACCESS_STACK:
1706 case VG_USERREQ__RUNNING_ON_VALGRIND:
1707 case VG_USERREQ__DO_LEAK_CHECK:
1708 vg_threads[tid].m_edx = VG_(handle_client_request) ( arg );
1709 break;
1710
1711 default:
1712 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
1713 VG_(panic)("handle_private_client_pthread_request: "
1714 "unknown request");
1715 /*NOTREACHED*/
1716 break;
1717 }
1718}
1719
1720
1721/*--------------------------------------------------------------------*/
1722/*--- end vg_scheduler.c ---*/
1723/*--------------------------------------------------------------------*/