blob: 06a768710c7a6c8b20a43d4d2ebd7eef098b5b3e [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardje462e202002-04-13 04:09:07 +000062*/
sewardje663cb92002-04-12 10:26:32 +000063
64
65/* ---------------------------------------------------------------------
66 Types and globals for the scheduler.
67 ------------------------------------------------------------------ */
68
69/* type ThreadId is defined in vg_include.h. */
70
71/* struct ThreadState is defined in vg_include.h. */
72
73/* Private globals. A statically allocated array of threads. */
74static ThreadState vg_threads[VG_N_THREADS];
75
sewardj1e8cdc92002-04-18 11:37:52 +000076/* The tid of the thread currently in VG_(baseBlock). */
77static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
78
sewardje663cb92002-04-12 10:26:32 +000079
80/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
81jmp_buf VG_(scheduler_jmpbuf);
82/* ... and if so, here's the signal which caused it to do so. */
83Int VG_(longjmpd_on_signal);
84
85
86/* Machinery to keep track of which threads are waiting on which
87 fds. */
88typedef
89 struct {
90 /* The thread which made the request. */
91 ThreadId tid;
92
93 /* The next two fields describe the request. */
94 /* File descriptor waited for. -1 means this slot is not in use */
95 Int fd;
96 /* The syscall number the fd is used in. */
97 Int syscall_no;
98
99 /* False => still waiting for select to tell us the fd is ready
100 to go. True => the fd is ready, but the results have not yet
101 been delivered back to the calling thread. Once the latter
102 happens, this entire record is marked as no longer in use, by
103 making the fd field be -1. */
104 Bool ready;
105 }
106 VgWaitedOnFd;
107
108static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
109
110
sewardje663cb92002-04-12 10:26:32 +0000111/* Forwards */
112static void do_nontrivial_clientreq ( ThreadId tid );
113
114
115/* ---------------------------------------------------------------------
116 Helper functions for the scheduler.
117 ------------------------------------------------------------------ */
118
sewardj604ec3c2002-04-18 22:38:41 +0000119static __inline__
120Bool is_valid_tid ( ThreadId tid )
121{
122 /* tid is unsigned, hence no < 0 test. */
123 if (tid >= VG_N_THREADS) return False;
124 if (vg_threads[tid].status == VgTs_Empty) return False;
125 return True;
126}
127
128
sewardj1e8cdc92002-04-18 11:37:52 +0000129/* For constructing error messages only: try and identify a thread
130 whose stack this address currently falls within, or return
131 VG_INVALID_THREADID if it doesn't. A small complication is dealing
132 with any currently VG_(baseBlock)-resident thread.
133*/
134ThreadId VG_(identify_stack_addr)( Addr a )
135{
136 ThreadId tid, tid_to_skip;
137
138 tid_to_skip = VG_INVALID_THREADID;
139
140 /* First check to see if there's a currently-loaded thread in
141 VG_(baseBlock). */
142 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
143 tid = vg_tid_currently_in_baseBlock;
144 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
145 && a <= vg_threads[tid].stack_highest_word)
146 return tid;
147 else
148 tid_to_skip = tid;
149 }
150
151 for (tid = 0; tid < VG_N_THREADS; tid++) {
152 if (vg_threads[tid].status == VgTs_Empty) continue;
153 if (tid == tid_to_skip) continue;
154 if (vg_threads[tid].m_esp <= a
155 && a <= vg_threads[tid].stack_highest_word)
156 return tid;
157 }
158 return VG_INVALID_THREADID;
159}
160
161
sewardj15a43e12002-04-17 19:35:12 +0000162/* Print the scheduler status. */
163void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000164{
165 Int i;
166 VG_(printf)("\nsched status:\n");
167 for (i = 0; i < VG_N_THREADS; i++) {
168 if (vg_threads[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000169 VG_(printf)("\nThread %d: status = ", i);
sewardje663cb92002-04-12 10:26:32 +0000170 switch (vg_threads[i].status) {
171 case VgTs_Runnable: VG_(printf)("Runnable\n"); break;
172 case VgTs_WaitFD: VG_(printf)("WaitFD\n"); break;
173 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)\n",
174 vg_threads[i].joiner); break;
175 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee\n"); break;
sewardj75fe1892002-04-14 02:46:33 +0000176 case VgTs_Sleeping: VG_(printf)("Sleeping\n"); break;
sewardje663cb92002-04-12 10:26:32 +0000177 default: VG_(printf)("???"); break;
178 }
sewardj15a43e12002-04-17 19:35:12 +0000179 VG_(pp_ExeContext)(
180 VG_(get_ExeContext)( False, vg_threads[i].m_eip,
181 vg_threads[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000182 }
183 VG_(printf)("\n");
184}
185
186static
187void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
188{
189 Int i;
190
191 vg_assert(fd != -1); /* avoid total chaos */
192
193 for (i = 0; i < VG_N_WAITING_FDS; i++)
194 if (vg_waiting_fds[i].fd == -1)
195 break;
196
197 if (i == VG_N_WAITING_FDS)
198 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
199 /*
200 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
201 tid, fd, i);
202 */
203 vg_waiting_fds[i].fd = fd;
204 vg_waiting_fds[i].tid = tid;
205 vg_waiting_fds[i].ready = False;
206 vg_waiting_fds[i].syscall_no = syscall_no;
207}
208
209
210
211static
212void print_sched_event ( ThreadId tid, Char* what )
213{
sewardj45b4b372002-04-16 22:50:32 +0000214 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000215}
216
217
218static
219void print_pthread_event ( ThreadId tid, Char* what )
220{
221 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000222}
223
224
225static
226Char* name_of_sched_event ( UInt event )
227{
228 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000229 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
230 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
231 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
232 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
233 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
234 default: return "??UNKNOWN??";
235 }
236}
237
238
239/* Create a translation of the client basic block beginning at
240 orig_addr, and add it to the translation cache & translation table.
241 This probably doesn't really belong here, but, hey ...
242*/
sewardj1e8cdc92002-04-18 11:37:52 +0000243static
244void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000245{
246 Addr trans_addr;
247 TTEntry tte;
248 Int orig_size, trans_size;
249 /* Ensure there is space to hold a translation. */
250 VG_(maybe_do_lru_pass)();
sewardj1e8cdc92002-04-18 11:37:52 +0000251 VG_(translate)( &vg_threads[tid],
252 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000253 /* Copy data at trans_addr into the translation cache.
254 Returned pointer is to the code, not to the 4-byte
255 header. */
256 /* Since the .orig_size and .trans_size fields are
257 UShort, be paranoid. */
258 vg_assert(orig_size > 0 && orig_size < 65536);
259 vg_assert(trans_size > 0 && trans_size < 65536);
260 tte.orig_size = orig_size;
261 tte.orig_addr = orig_addr;
262 tte.trans_size = trans_size;
263 tte.trans_addr = VG_(copy_to_transcache)
264 ( trans_addr, trans_size );
265 tte.mru_epoch = VG_(current_epoch);
266 /* Free the intermediary -- was allocated by VG_(emit_code). */
267 VG_(jitfree)( (void*)trans_addr );
268 /* Add to trans tab and set back pointer. */
269 VG_(add_to_trans_tab) ( &tte );
270 /* Update stats. */
271 VG_(this_epoch_in_count) ++;
272 VG_(this_epoch_in_osize) += orig_size;
273 VG_(this_epoch_in_tsize) += trans_size;
274 VG_(overall_in_count) ++;
275 VG_(overall_in_osize) += orig_size;
276 VG_(overall_in_tsize) += trans_size;
277 /* Record translated area for SMC detection. */
278 VG_(smc_mark_original) ( orig_addr, orig_size );
279}
280
281
282/* Allocate a completely empty ThreadState record. */
283static
284ThreadId vg_alloc_ThreadState ( void )
285{
286 Int i;
287 for (i = 0; i < VG_N_THREADS; i++) {
288 if (vg_threads[i].status == VgTs_Empty)
289 return i;
290 }
291 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
292 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
293 VG_(panic)("VG_N_THREADS is too low");
294 /*NOTREACHED*/
295}
296
297
298ThreadState* VG_(get_thread_state) ( ThreadId tid )
299{
300 vg_assert(tid >= 0 && tid < VG_N_THREADS);
301 vg_assert(vg_threads[tid].status != VgTs_Empty);
302 return & vg_threads[tid];
303}
304
305
sewardj1e8cdc92002-04-18 11:37:52 +0000306ThreadState* VG_(get_current_thread_state) ( void )
307{
308 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
309 return VG_(get_thread_state) ( VG_INVALID_THREADID );
310}
311
312
313ThreadId VG_(get_current_tid) ( void )
314{
315 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
316 return vg_tid_currently_in_baseBlock;
317}
318
319
sewardje663cb92002-04-12 10:26:32 +0000320/* Copy the saved state of a thread into VG_(baseBlock), ready for it
321 to be run. */
322__inline__
323void VG_(load_thread_state) ( ThreadId tid )
324{
325 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000326 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
327
sewardje663cb92002-04-12 10:26:32 +0000328 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
329 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
330 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
331 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
332 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
333 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
334 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
335 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
336 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
337 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
338
339 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
340 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
341
342 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
343 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
344 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
345 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
346 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
347 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
348 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
349 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
350 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000351
352 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000353}
354
355
356/* Copy the state of a thread from VG_(baseBlock), presumably after it
357 has been descheduled. For sanity-check purposes, fill the vacated
358 VG_(baseBlock) with garbage so as to make the system more likely to
359 fail quickly if we erroneously continue to poke around inside
360 VG_(baseBlock) without first doing a load_thread_state().
361*/
362__inline__
363void VG_(save_thread_state) ( ThreadId tid )
364{
365 Int i;
366 const UInt junk = 0xDEADBEEF;
367
sewardj1e8cdc92002-04-18 11:37:52 +0000368 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
369
sewardje663cb92002-04-12 10:26:32 +0000370 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
371 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
372 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
373 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
374 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
375 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
376 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
377 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
378 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
379 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
380
381 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
382 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
383
384 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
385 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
386 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
387 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
388 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
389 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
390 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
391 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
392 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
393
394 /* Fill it up with junk. */
395 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
396 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
397 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
398 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
399 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
400 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
401 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
402 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
403 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
404 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
405
406 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
407 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000408
409 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000410}
411
412
413/* Run the thread tid for a while, and return a VG_TRC_* value to the
414 scheduler indicating what happened. */
415static
416UInt run_thread_for_a_while ( ThreadId tid )
417{
418 UInt trc = 0;
419 vg_assert(tid >= 0 && tid < VG_N_THREADS);
420 vg_assert(vg_threads[tid].status != VgTs_Empty);
421 vg_assert(VG_(bbs_to_go) > 0);
422
423 VG_(load_thread_state) ( tid );
424 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
425 /* try this ... */
426 trc = VG_(run_innerloop)();
427 /* We get here if the client didn't take a fault. */
428 } else {
429 /* We get here if the client took a fault, which caused our
430 signal handler to longjmp. */
431 vg_assert(trc == 0);
432 trc = VG_TRC_UNRESUMABLE_SIGNAL;
433 }
434 VG_(save_thread_state) ( tid );
435 return trc;
436}
437
438
439/* Increment the LRU epoch counter. */
440static
441void increment_epoch ( void )
442{
443 VG_(current_epoch)++;
444 if (VG_(clo_verbosity) > 2) {
445 UInt tt_used, tc_used;
446 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
447 VG_(message)(Vg_UserMsg,
448 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
449 VG_(bbs_done),
450 VG_(this_epoch_in_count),
451 VG_(this_epoch_in_osize),
452 VG_(this_epoch_in_tsize),
453 VG_(this_epoch_out_count),
454 VG_(this_epoch_out_osize),
455 VG_(this_epoch_out_tsize),
456 tt_used, tc_used
457 );
458 }
459 VG_(this_epoch_in_count) = 0;
460 VG_(this_epoch_in_osize) = 0;
461 VG_(this_epoch_in_tsize) = 0;
462 VG_(this_epoch_out_count) = 0;
463 VG_(this_epoch_out_osize) = 0;
464 VG_(this_epoch_out_tsize) = 0;
465}
466
467
468/* Initialise the scheduler. Create a single "main" thread ready to
469 run, with special ThreadId of zero. This is called at startup; the
470 caller takes care to park the client's state is parked in
471 VG_(baseBlock).
472*/
473void VG_(scheduler_init) ( void )
474{
475 Int i;
476 Addr startup_esp;
477 ThreadId tid_main;
478
479 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
480 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000481 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
482 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000483 VG_(panic)("unexpected %esp at startup");
484 }
485
486 for (i = 0; i < VG_N_THREADS; i++) {
487 vg_threads[i].stack_size = 0;
488 vg_threads[i].stack_base = (Addr)NULL;
sewardj1e8cdc92002-04-18 11:37:52 +0000489 vg_threads[i].tid = i;
sewardje663cb92002-04-12 10:26:32 +0000490 }
491
492 for (i = 0; i < VG_N_WAITING_FDS; i++)
493 vg_waiting_fds[i].fd = -1; /* not in use */
494
sewardje663cb92002-04-12 10:26:32 +0000495 /* Assert this is thread zero, which has certain magic
496 properties. */
497 tid_main = vg_alloc_ThreadState();
498 vg_assert(tid_main == 0);
499
500 vg_threads[tid_main].status = VgTs_Runnable;
501 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
sewardj604ec3c2002-04-18 22:38:41 +0000502 vg_threads[tid_main].q_next = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000503 vg_threads[tid_main].retval = NULL; /* not important */
sewardj1e8cdc92002-04-18 11:37:52 +0000504 vg_threads[tid_main].stack_highest_word
505 = vg_threads[tid_main].m_esp /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +0000506
507 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000508 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000509 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000510
511 /* So now ... */
512 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000513}
514
515
516/* What if fd isn't a valid fd? */
517static
518void set_fd_nonblocking ( Int fd )
519{
520 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
521 vg_assert(!VG_(is_kerror)(res));
522 res |= VKI_O_NONBLOCK;
523 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
524 vg_assert(!VG_(is_kerror)(res));
525}
526
527static
528void set_fd_blocking ( Int fd )
529{
530 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
531 vg_assert(!VG_(is_kerror)(res));
532 res &= ~VKI_O_NONBLOCK;
533 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
534 vg_assert(!VG_(is_kerror)(res));
535}
536
537static
538Bool fd_is_blockful ( Int fd )
539{
540 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
541 vg_assert(!VG_(is_kerror)(res));
542 return (res & VKI_O_NONBLOCK) ? False : True;
543}
544
545
546
547/* Do a purely thread-local request for tid, and put the result in its
548 %EDX, without changing its scheduling state in any way, nor that of
549 any other threads. Return True if so.
550
551 If the request is non-trivial, return False; a more capable but
552 slower mechanism will deal with it.
553*/
554static
555Bool maybe_do_trivial_clientreq ( ThreadId tid )
556{
557# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000558 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000559 return True; \
560 }
561
sewardj8c824512002-04-14 04:16:48 +0000562 ThreadState* tst = &vg_threads[tid];
563 UInt* arg = (UInt*)(tst->m_eax);
564 UInt req_no = arg[0];
565
sewardje663cb92002-04-12 10:26:32 +0000566 switch (req_no) {
567 case VG_USERREQ__MALLOC:
568 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000569 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000570 );
571 case VG_USERREQ__BUILTIN_NEW:
572 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000573 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000574 );
575 case VG_USERREQ__BUILTIN_VEC_NEW:
576 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000577 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000578 );
579 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000580 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000581 SIMPLE_RETURN(0); /* irrelevant */
582 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000583 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000584 SIMPLE_RETURN(0); /* irrelevant */
585 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000586 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000587 SIMPLE_RETURN(0); /* irrelevant */
588 case VG_USERREQ__CALLOC:
589 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000590 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000591 );
592 case VG_USERREQ__REALLOC:
593 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000594 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000595 );
596 case VG_USERREQ__MEMALIGN:
597 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000598 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000599 );
sewardj9650c992002-04-16 03:44:31 +0000600
601 /* These are heavily used. */
602 case VG_USERREQ__PTHREAD_GET_THREADID:
603 SIMPLE_RETURN(tid);
604 case VG_USERREQ__RUNNING_ON_VALGRIND:
605 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000606 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
607 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj9650c992002-04-16 03:44:31 +0000608
sewardje663cb92002-04-12 10:26:32 +0000609 default:
610 /* Too hard; wimp out. */
611 return False;
612 }
613# undef SIMPLE_RETURN
614}
615
616
617static
618void sched_do_syscall ( ThreadId tid )
619{
620 UInt saved_eax;
621 UInt res, syscall_no;
622 UInt fd;
623 Bool might_block, assumed_nonblocking;
624 Bool orig_fd_blockness;
625 Char msg_buf[100];
626
627 vg_assert(tid >= 0 && tid < VG_N_THREADS);
628 vg_assert(vg_threads[tid].status == VgTs_Runnable);
629
630 syscall_no = vg_threads[tid].m_eax; /* syscall number */
631
632 if (syscall_no == __NR_nanosleep) {
633 ULong t_now, t_awaken;
634 struct vki_timespec* req;
635 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
636 t_now = VG_(read_microsecond_timer)();
637 t_awaken
638 = t_now
639 + (ULong)1000000ULL * (ULong)(req->tv_sec)
640 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
641 vg_threads[tid].status = VgTs_Sleeping;
642 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000643 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000644 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
645 t_now, t_awaken-t_now);
646 print_sched_event(tid, msg_buf);
647 }
648 /* Force the scheduler to run something else for a while. */
649 return;
650 }
651
652 switch (syscall_no) {
653 case __NR_read:
654 case __NR_write:
655 assumed_nonblocking
656 = False;
657 might_block
658 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
659 break;
660 default:
661 might_block = False;
662 assumed_nonblocking = True;
663 }
664
665 if (assumed_nonblocking) {
666 /* We think it's non-blocking. Just do it in the normal way. */
667 VG_(perform_assumed_nonblocking_syscall)(tid);
668 /* The thread is still runnable. */
669 return;
670 }
671
672 /* It might block. Take evasive action. */
673 switch (syscall_no) {
674 case __NR_read:
675 case __NR_write:
676 fd = vg_threads[tid].m_ebx; break;
677 default:
678 vg_assert(3+3 == 7);
679 }
680
681 /* Set the fd to nonblocking, and do the syscall, which will return
682 immediately, in order to lodge a request with the Linux kernel.
683 We later poll for I/O completion using select(). */
684
685 orig_fd_blockness = fd_is_blockful(fd);
686 set_fd_nonblocking(fd);
687 vg_assert(!fd_is_blockful(fd));
688 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
689
690 /* This trashes the thread's %eax; we have to preserve it. */
691 saved_eax = vg_threads[tid].m_eax;
692 KERNEL_DO_SYSCALL(tid,res);
693
694 /* Restore original blockfulness of the fd. */
695 if (orig_fd_blockness)
696 set_fd_blocking(fd);
697 else
698 set_fd_nonblocking(fd);
699
700 if (res != -VKI_EWOULDBLOCK) {
701 /* It didn't block; it went through immediately. So finish off
702 in the normal way. Don't restore %EAX, since that now
703 (correctly) holds the result of the call. */
704 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
705 /* We're still runnable. */
706 vg_assert(vg_threads[tid].status == VgTs_Runnable);
707
708 } else {
709
710 /* It would have blocked. First, restore %EAX to what it was
711 before our speculative call. */
712 vg_threads[tid].m_eax = saved_eax;
713 /* Put this fd in a table of fds on which we are waiting for
714 completion. The arguments for select() later are constructed
715 from this table. */
716 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
717 /* Deschedule thread until an I/O completion happens. */
718 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000719 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000720 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
721 print_sched_event(tid, msg_buf);
722 }
723
724 }
725}
726
727
728/* Find out which of the fds in vg_waiting_fds are now ready to go, by
729 making enquiries with select(), and mark them as ready. We have to
730 wait for the requesting threads to fall into the the WaitFD state
731 before we can actually finally deliver the results, so this
732 procedure doesn't do that; complete_blocked_syscalls() does it.
733
734 It might seem odd that a thread which has done a blocking syscall
735 is not in WaitFD state; the way this can happen is if it initially
736 becomes WaitFD, but then a signal is delivered to it, so it becomes
737 Runnable for a while. In this case we have to wait for the
738 sighandler to return, whereupon the WaitFD state is resumed, and
739 only at that point can the I/O result be delivered to it. However,
740 this point may be long after the fd is actually ready.
741
742 So, poll_for_ready_fds() merely detects fds which are ready.
743 complete_blocked_syscalls() does the second half of the trick,
744 possibly much later: it delivers the results from ready fds to
745 threads in WaitFD state.
746*/
sewardj9a199dc2002-04-14 13:01:38 +0000747static
sewardje663cb92002-04-12 10:26:32 +0000748void poll_for_ready_fds ( void )
749{
750 vki_ksigset_t saved_procmask;
751 vki_fd_set readfds;
752 vki_fd_set writefds;
753 vki_fd_set exceptfds;
754 struct vki_timeval timeout;
755 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
756 ThreadId tid;
757 Bool rd_ok, wr_ok, ex_ok;
758 Char msg_buf[100];
759
sewardje462e202002-04-13 04:09:07 +0000760 struct vki_timespec* rem;
761 ULong t_now;
762
sewardje663cb92002-04-12 10:26:32 +0000763 /* Awaken any sleeping threads whose sleep has expired. */
sewardje462e202002-04-13 04:09:07 +0000764 t_now = VG_(read_microsecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000765 for (tid = 0; tid < VG_N_THREADS; tid++) {
766 if (vg_threads[tid].status != VgTs_Sleeping)
767 continue;
768 if (t_now >= vg_threads[tid].awaken_at) {
769 /* Resume this thread. Set to zero the remaining-time (second)
770 arg of nanosleep, since it's used up all its time. */
771 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
772 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
773 if (rem != NULL) {
774 rem->tv_sec = 0;
775 rem->tv_nsec = 0;
776 }
777 /* Make the syscall return 0 (success). */
778 vg_threads[tid].m_eax = 0;
779 /* Reschedule this thread. */
780 vg_threads[tid].status = VgTs_Runnable;
sewardj8937c812002-04-12 20:12:20 +0000781 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000782 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
783 t_now);
784 print_sched_event(tid, msg_buf);
785 }
786 }
787 }
sewardje663cb92002-04-12 10:26:32 +0000788
sewardje462e202002-04-13 04:09:07 +0000789 /* And look for threads waiting on file descriptors which are now
790 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000791 timeout.tv_sec = 0;
792 timeout.tv_usec = 0;
793
794 VKI_FD_ZERO(&readfds);
795 VKI_FD_ZERO(&writefds);
796 VKI_FD_ZERO(&exceptfds);
797 fd_max = -1;
798 for (i = 0; i < VG_N_WAITING_FDS; i++) {
799 if (vg_waiting_fds[i].fd == -1 /* not in use */)
800 continue;
801 if (vg_waiting_fds[i].ready /* already ready? */)
802 continue;
803 fd = vg_waiting_fds[i].fd;
804 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000805 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000806 if (fd > fd_max)
807 fd_max = fd;
808 tid = vg_waiting_fds[i].tid;
809 vg_assert(tid >= 0 && tid < VG_N_THREADS);
810 syscall_no = vg_waiting_fds[i].syscall_no;
811 switch (syscall_no) {
812 case __NR_read:
813 VKI_FD_SET(fd, &readfds); break;
814 case __NR_write:
815 VKI_FD_SET(fd, &writefds); break;
816 default:
817 VG_(panic)("poll_for_ready_fds: unexpected syscall");
818 /*NOTREACHED*/
819 break;
820 }
821 }
822
sewardje462e202002-04-13 04:09:07 +0000823 /* Short cut: if no fds are waiting, give up now. */
824 if (fd_max == -1)
825 return;
826
sewardje663cb92002-04-12 10:26:32 +0000827 /* BLOCK ALL SIGNALS. We don't want the complication of select()
828 getting interrupted. */
829 VG_(block_all_host_signals)( &saved_procmask );
830
831 n_ready = VG_(select)
832 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
833 if (VG_(is_kerror)(n_ready)) {
834 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
835 VG_(panic)("poll_for_ready_fds: select failed?!");
836 /*NOTREACHED*/
837 }
838
839 /* UNBLOCK ALL SIGNALS */
840 VG_(restore_host_signals)( &saved_procmask );
841
842 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
843
844 if (n_ready == 0)
845 return;
846
847 /* Inspect all the fds we know about, and handle any completions that
848 have happened. */
849 /*
850 VG_(printf)("\n\n");
851 for (fd = 0; fd < 100; fd++)
852 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
853 VG_(printf)("X"); } else { VG_(printf)("."); };
854 VG_(printf)("\n\nfd_max = %d\n", fd_max);
855 */
856
857 for (fd = 0; fd <= fd_max; fd++) {
858 rd_ok = VKI_FD_ISSET(fd, &readfds);
859 wr_ok = VKI_FD_ISSET(fd, &writefds);
860 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
861
862 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
863 if (n_ok == 0)
864 continue;
865 if (n_ok > 1) {
866 VG_(printf)("offending fd = %d\n", fd);
867 VG_(panic)("poll_for_ready_fds: multiple events on fd");
868 }
869
870 /* An I/O event completed for fd. Find the thread which
871 requested this. */
872 for (i = 0; i < VG_N_WAITING_FDS; i++) {
873 if (vg_waiting_fds[i].fd == -1 /* not in use */)
874 continue;
875 if (vg_waiting_fds[i].fd == fd)
876 break;
877 }
878
879 /* And a bit more paranoia ... */
880 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
881
882 /* Mark the fd as ready. */
883 vg_assert(! vg_waiting_fds[i].ready);
884 vg_waiting_fds[i].ready = True;
885 }
886}
887
888
889/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +0000890static
sewardje663cb92002-04-12 10:26:32 +0000891void complete_blocked_syscalls ( void )
892{
893 Int fd, i, res, syscall_no;
894 ThreadId tid;
895 Char msg_buf[100];
896
897 /* Inspect all the outstanding fds we know about. */
898
899 for (i = 0; i < VG_N_WAITING_FDS; i++) {
900 if (vg_waiting_fds[i].fd == -1 /* not in use */)
901 continue;
902 if (! vg_waiting_fds[i].ready)
903 continue;
904
905 fd = vg_waiting_fds[i].fd;
906 tid = vg_waiting_fds[i].tid;
907 vg_assert(tid >= 0 && tid < VG_N_THREADS);
908
909 /* The thread actually has to be waiting for the I/O event it
910 requested before we can deliver the result! */
911 if (vg_threads[tid].status != VgTs_WaitFD)
912 continue;
913
914 /* Ok, actually do it! We can safely use %EAX as the syscall
915 number, because the speculative call made by
916 sched_do_syscall() doesn't change %EAX in the case where the
917 call would have blocked. */
918
919 syscall_no = vg_waiting_fds[i].syscall_no;
920 vg_assert(syscall_no == vg_threads[tid].m_eax);
921 KERNEL_DO_SYSCALL(tid,res);
922 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
923
924 /* Reschedule. */
925 vg_threads[tid].status = VgTs_Runnable;
926 /* Mark slot as no longer in use. */
927 vg_waiting_fds[i].fd = -1;
928 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +0000929 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000930 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
931 print_sched_event(tid, msg_buf);
932 }
933 }
934}
935
936
937static
938void nanosleep_for_a_while ( void )
939{
940 Int res;
941 struct vki_timespec req;
942 struct vki_timespec rem;
943 req.tv_sec = 0;
944 req.tv_nsec = 20 * 1000 * 1000;
945 res = VG_(nanosleep)( &req, &rem );
946 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
947 vg_assert(res == 0);
948}
949
950
951/* ---------------------------------------------------------------------
952 The scheduler proper.
953 ------------------------------------------------------------------ */
954
955/* Run user-space threads until either
956 * Deadlock occurs
957 * One thread asks to shutdown Valgrind
958 * The specified number of basic blocks has gone by.
959*/
960VgSchedReturnCode VG_(scheduler) ( void )
961{
962 ThreadId tid, tid_next;
963 UInt trc;
964 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +0000965 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +0000966 Char msg_buf[100];
967 Addr trans_addr;
968
969 /* For the LRU structures, records when the epoch began. */
970 ULong lru_epoch_started_at = 0;
971
972 /* Start with the root thread. tid in general indicates the
973 currently runnable/just-finished-running thread. */
974 tid = 0;
975
976 /* This is the top level scheduler loop. It falls into three
977 phases. */
978 while (True) {
979
980 /* ======================= Phase 1 of 3 =======================
981 Handle I/O completions and signals. This may change the
982 status of various threads. Then select a new thread to run,
983 or declare deadlock, or sleep if there are no runnable
984 threads but some are blocked on I/O. */
985
986 /* Age the LRU structures if an epoch has been completed. */
987 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
988 lru_epoch_started_at = VG_(bbs_done);
989 increment_epoch();
990 }
991
992 /* Was a debug-stop requested? */
993 if (VG_(bbs_to_go) == 0)
994 goto debug_stop;
995
996 /* Do the following loop until a runnable thread is found, or
997 deadlock is detected. */
998 while (True) {
999
1000 /* For stats purposes only. */
1001 VG_(num_scheduling_events_MAJOR) ++;
1002
1003 /* See if any I/O operations which we were waiting for have
1004 completed, and, if so, make runnable the relevant waiting
1005 threads. */
1006 poll_for_ready_fds();
1007 complete_blocked_syscalls();
1008
1009 /* See if there are any signals which need to be delivered. If
1010 so, choose thread(s) to deliver them to, and build signal
1011 delivery frames on those thread(s) stacks. */
1012 VG_(deliver_signals)( 0 /*HACK*/ );
1013 VG_(do_sanity_checks)(0 /*HACK*/, False);
1014
1015 /* Try and find a thread (tid) to run. */
1016 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +00001017 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +00001018 while (True) {
1019 tid_next++;
1020 if (tid_next >= VG_N_THREADS) tid_next = 0;
sewardj54cacf02002-04-12 23:24:59 +00001021 if (vg_threads[tid_next].status == VgTs_WaitFD
1022 || vg_threads[tid_next].status == VgTs_Sleeping)
1023 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +00001024 if (vg_threads[tid_next].status == VgTs_Runnable)
1025 break; /* We can run this one. */
1026 if (tid_next == tid)
1027 break; /* been all the way round */
1028 }
1029 tid = tid_next;
1030
1031 if (vg_threads[tid].status == VgTs_Runnable) {
1032 /* Found a suitable candidate. Fall out of this loop, so
1033 we can advance to stage 2 of the scheduler: actually
1034 running the thread. */
1035 break;
1036 }
1037
1038 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +00001039 if (n_in_fdwait_or_sleep == 0) {
1040 /* No runnable threads and no prospect of any appearing
1041 even if we wait for an arbitrary length of time. In
1042 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001043 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001044 return VgSrc_Deadlock;
1045 }
1046
1047 /* At least one thread is in a fd-wait state. Delay for a
1048 while, and go round again, in the hope that eventually a
1049 thread becomes runnable. */
1050 nanosleep_for_a_while();
1051 // pp_sched_status();
1052 // VG_(printf)(".\n");
1053 }
1054
1055
1056 /* ======================= Phase 2 of 3 =======================
1057 Wahey! We've finally decided that thread tid is runnable, so
1058 we now do that. Run it for as much of a quanta as possible.
1059 Trivial requests are handled and the thread continues. The
1060 aim is not to do too many of Phase 1 since it is expensive. */
1061
1062 if (0)
1063 VG_(printf)("SCHED: tid %d, used %d\n", tid, VG_N_THREADS);
1064
1065 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1066 that it decrements the counter before testing it for zero, so
1067 that if VG_(dispatch_ctr) is set to N you get at most N-1
1068 iterations. Also this means that VG_(dispatch_ctr) must
1069 exceed zero before entering the innerloop. Also also, the
1070 decrement is done before the bb is actually run, so you
1071 always get at least one decrement even if nothing happens.
1072 */
1073 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1074 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1075 else
1076 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1077
1078 /* ... and remember what we asked for. */
1079 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1080
sewardj1e8cdc92002-04-18 11:37:52 +00001081 /* paranoia ... */
1082 vg_assert(vg_threads[tid].tid == tid);
1083
sewardje663cb92002-04-12 10:26:32 +00001084 /* Actually run thread tid. */
1085 while (True) {
1086
1087 /* For stats purposes only. */
1088 VG_(num_scheduling_events_MINOR) ++;
1089
1090 if (0)
1091 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1092 tid, VG_(dispatch_ctr) - 1 );
1093
1094 trc = run_thread_for_a_while ( tid );
1095
1096 /* Deal quickly with trivial scheduling events, and resume the
1097 thread. */
1098
1099 if (trc == VG_TRC_INNER_FASTMISS) {
1100 vg_assert(VG_(dispatch_ctr) > 0);
1101
1102 /* Trivial event. Miss in the fast-cache. Do a full
1103 lookup for it. */
1104 trans_addr
1105 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1106 if (trans_addr == (Addr)0) {
1107 /* Not found; we need to request a translation. */
sewardj1e8cdc92002-04-18 11:37:52 +00001108 create_translation_for( tid, vg_threads[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001109 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1110 if (trans_addr == (Addr)0)
1111 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1112 }
1113 continue; /* with this thread */
1114 }
1115
1116 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1117 Bool is_triv = maybe_do_trivial_clientreq(tid);
1118 if (is_triv) {
1119 /* NOTE: a trivial request is something like a call to
1120 malloc() or free(). It DOES NOT change the
1121 Runnability of this thread nor the status of any
1122 other thread; it is purely thread-local. */
1123 continue; /* with this thread */
1124 }
1125 }
1126
1127 /* It's a non-trivial event. Give up running this thread and
1128 handle things the expensive way. */
1129 break;
1130 }
1131
1132 /* ======================= Phase 3 of 3 =======================
1133 Handle non-trivial thread requests, mostly pthread stuff. */
1134
1135 /* Ok, we've fallen out of the dispatcher for a
1136 non-completely-trivial reason. First, update basic-block
1137 counters. */
1138
1139 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1140 vg_assert(done_this_time >= 0);
1141 VG_(bbs_to_go) -= (ULong)done_this_time;
1142 VG_(bbs_done) += (ULong)done_this_time;
1143
1144 if (0 && trc != VG_TRC_INNER_FASTMISS)
1145 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1146 tid, done_this_time, (Int)trc );
1147
1148 if (0 && trc != VG_TRC_INNER_FASTMISS)
1149 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1150 tid, VG_(bbs_done),
1151 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001152
sewardje663cb92002-04-12 10:26:32 +00001153 /* Examine the thread's return code to figure out why it
1154 stopped, and handle requests. */
1155
1156 switch (trc) {
1157
1158 case VG_TRC_INNER_FASTMISS:
1159 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1160 /*NOTREACHED*/
1161 break;
1162
1163 case VG_TRC_INNER_COUNTERZERO:
1164 /* Timeslice is out. Let a new thread be scheduled,
1165 simply by doing nothing, causing us to arrive back at
1166 Phase 1. */
1167 if (VG_(bbs_to_go) == 0) {
1168 goto debug_stop;
1169 }
1170 vg_assert(VG_(dispatch_ctr) == 0);
1171 break;
1172
1173 case VG_TRC_UNRESUMABLE_SIGNAL:
1174 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1175 away. Again, do nothing, so we wind up back at Phase
1176 1, whereupon the signal will be "delivered". */
1177 break;
1178
sewardje663cb92002-04-12 10:26:32 +00001179 case VG_TRC_EBP_JMP_SYSCALL:
1180 /* Do a syscall for the vthread tid. This could cause it
1181 to become non-runnable. */
1182 sched_do_syscall(tid);
1183 break;
1184
1185 case VG_TRC_EBP_JMP_CLIENTREQ:
1186 /* Do a client request for the vthread tid. Note that
1187 some requests will have been handled by
1188 maybe_do_trivial_clientreq(), so we don't expect to see
1189 those here.
1190 */
sewardj54cacf02002-04-12 23:24:59 +00001191 /* The thread's %EAX points at an arg block, the first
1192 word of which is the request code. */
1193 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001194 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001195 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001196 print_sched_event(tid, msg_buf);
1197 }
1198 /* Do a non-trivial client request for thread tid. tid's
1199 %EAX points to a short vector of argument words, the
1200 first of which is the request code. The result of the
1201 request is put in tid's %EDX. Alternatively, perhaps
1202 the request causes tid to become non-runnable and/or
1203 other blocked threads become runnable. In general we
1204 can and often do mess with the state of arbitrary
1205 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001206 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1207 return VgSrc_Shutdown;
1208 } else {
1209 do_nontrivial_clientreq(tid);
1210 }
sewardje663cb92002-04-12 10:26:32 +00001211 break;
1212
1213 default:
1214 VG_(printf)("\ntrc = %d\n", trc);
1215 VG_(panic)("VG_(scheduler), phase 3: "
1216 "unexpected thread return code");
1217 /* NOTREACHED */
1218 break;
1219
1220 } /* switch (trc) */
1221
1222 /* That completes Phase 3 of 3. Return now to the top of the
1223 main scheduler loop, to Phase 1 of 3. */
1224
1225 } /* top-level scheduler loop */
1226
1227
1228 /* NOTREACHED */
1229 VG_(panic)("scheduler: post-main-loop ?!");
1230 /* NOTREACHED */
1231
1232 debug_stop:
1233 /* If we exited because of a debug stop, print the translation
1234 of the last block executed -- by translating it again, and
1235 throwing away the result. */
1236 VG_(printf)(
1237 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj1e8cdc92002-04-18 11:37:52 +00001238 VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001239 VG_(printf)("\n");
1240 VG_(printf)(
1241 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1242
1243 return VgSrc_BbsDone;
1244}
1245
1246
1247/* ---------------------------------------------------------------------
1248 The pthread implementation.
1249 ------------------------------------------------------------------ */
1250
1251#include <pthread.h>
1252#include <errno.h>
1253
1254#if !defined(PTHREAD_STACK_MIN)
1255# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1256#endif
1257
1258/* /usr/include/bits/pthreadtypes.h:
1259 typedef unsigned long int pthread_t;
1260*/
1261
sewardje663cb92002-04-12 10:26:32 +00001262
sewardj604ec3c2002-04-18 22:38:41 +00001263/* -----------------------------------------------------------
1264 Thread CREATION, JOINAGE and CANCELLATION.
1265 -------------------------------------------------------- */
1266
sewardje663cb92002-04-12 10:26:32 +00001267static
1268void do_pthread_cancel ( ThreadId tid_canceller,
1269 pthread_t tid_cancellee )
1270{
1271 Char msg_buf[100];
1272 /* We want make is appear that this thread has returned to
1273 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1274 return value. So: simple: put PTHREAD_CANCELED into %EAX
1275 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001276 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001277 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1278 print_sched_event(tid_cancellee, msg_buf);
1279 }
1280 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001281 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001282 vg_threads[tid_cancellee].status = VgTs_Runnable;
1283}
1284
1285
1286
1287/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001288 created with. Or possibly due to pthread_exit or cancellation.
1289 The main complication here is to resume any thread waiting to join
1290 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001291static
sewardjbc5b99f2002-04-13 00:08:51 +00001292void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001293{
1294 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1295 UInt* jnr_args;
1296 void** jnr_thread_return;
1297 Char msg_buf[100];
1298
1299 /* Mark it as not in use. Leave the stack in place so the next
1300 user of this slot doesn't reallocate it. */
1301 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1302 vg_assert(vg_threads[tid].status != VgTs_Empty);
1303
sewardjbc5b99f2002-04-13 00:08:51 +00001304 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001305
1306 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1307 /* No one has yet done a join on me */
1308 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001309 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001310 VG_(sprintf)(msg_buf,
1311 "root fn returns, waiting for a call pthread_join(%d)",
1312 tid);
1313 print_sched_event(tid, msg_buf);
1314 }
1315 } else {
1316 /* Some is waiting; make their join call return with success,
1317 putting my exit code in the place specified by the caller's
1318 thread_return param. This is all very horrible, since we
1319 need to consult the joiner's arg block -- pointed to by its
1320 %EAX -- in order to extract the 2nd param of its pthread_join
1321 call. TODO: free properly the slot (also below).
1322 */
1323 jnr = vg_threads[tid].joiner;
1324 vg_assert(jnr >= 0 && jnr < VG_N_THREADS);
1325 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1326 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1327 jnr_thread_return = (void**)(jnr_args[2]);
1328 if (jnr_thread_return != NULL)
1329 *jnr_thread_return = vg_threads[tid].retval;
1330 vg_threads[jnr].m_edx = 0; /* success */
1331 vg_threads[jnr].status = VgTs_Runnable;
1332 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001333 if (VG_(clo_instrument) && tid != 0)
1334 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1335 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001336 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001337 VG_(sprintf)(msg_buf,
1338 "root fn returns, to find a waiting pthread_join(%d)", tid);
1339 print_sched_event(tid, msg_buf);
1340 VG_(sprintf)(msg_buf,
1341 "my pthread_join(%d) returned; resuming", tid);
1342 print_sched_event(jnr, msg_buf);
1343 }
1344 }
1345
1346 /* Return value is irrelevant; this thread will not get
1347 rescheduled. */
1348}
1349
1350
1351static
1352void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1353{
1354 Char msg_buf[100];
1355
1356 /* jee, the joinee, is the thread specified as an arg in thread
1357 tid's call to pthread_join. So tid is the join-er. */
1358 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1359 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1360
1361 if (jee == tid) {
1362 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1363 vg_threads[tid].status = VgTs_Runnable;
1364 return;
1365 }
1366
1367 if (jee < 0
1368 || jee >= VG_N_THREADS
1369 || vg_threads[jee].status == VgTs_Empty) {
1370 /* Invalid thread to join to. */
1371 vg_threads[tid].m_edx = EINVAL;
1372 vg_threads[tid].status = VgTs_Runnable;
1373 return;
1374 }
1375
1376 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1377 /* Someone already did join on this thread */
1378 vg_threads[tid].m_edx = EINVAL;
1379 vg_threads[tid].status = VgTs_Runnable;
1380 return;
1381 }
1382
1383 /* if (vg_threads[jee].detached) ... */
1384
1385 /* Perhaps the joinee has already finished? If so return
1386 immediately with its return code, and free up the slot. TODO:
1387 free it properly (also above). */
1388 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1389 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1390 vg_threads[tid].m_edx = 0; /* success */
1391 if (thread_return != NULL)
1392 *thread_return = vg_threads[jee].retval;
1393 vg_threads[tid].status = VgTs_Runnable;
1394 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001395 if (VG_(clo_instrument) && jee != 0)
1396 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1397 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001398 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001399 VG_(sprintf)(msg_buf,
1400 "someone called pthread_join() on me; bye!");
1401 print_sched_event(jee, msg_buf);
1402 VG_(sprintf)(msg_buf,
1403 "my pthread_join(%d) returned immediately",
1404 jee );
1405 print_sched_event(tid, msg_buf);
1406 }
1407 return;
1408 }
1409
1410 /* Ok, so we'll have to wait on jee. */
1411 vg_threads[jee].joiner = tid;
1412 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001413 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001414 VG_(sprintf)(msg_buf,
1415 "blocking on call of pthread_join(%d)", jee );
1416 print_sched_event(tid, msg_buf);
1417 }
1418 /* So tid's join call does not return just now. */
1419}
1420
1421
1422static
1423void do_pthread_create ( ThreadId parent_tid,
1424 pthread_t* thread,
1425 pthread_attr_t* attr,
1426 void* (*start_routine)(void *),
1427 void* arg )
1428{
1429 Addr new_stack;
1430 UInt new_stk_szb;
1431 ThreadId tid;
1432 Char msg_buf[100];
1433
1434 /* Paranoia ... */
1435 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1436
1437 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1438
sewardj1e8cdc92002-04-18 11:37:52 +00001439 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001440
1441 /* If we've created the main thread's tid, we're in deep trouble :) */
1442 vg_assert(tid != 0);
1443
1444 /* Copy the parent's CPU state into the child's, in a roundabout
1445 way (via baseBlock). */
1446 VG_(load_thread_state)(parent_tid);
1447 VG_(save_thread_state)(tid);
1448
1449 /* Consider allocating the child a stack, if the one it already has
1450 is inadequate. */
1451 new_stk_szb = PTHREAD_STACK_MIN;
1452
1453 if (new_stk_szb > vg_threads[tid].stack_size) {
1454 /* Again, for good measure :) We definitely don't want to be
1455 allocating a stack for the main thread. */
1456 vg_assert(tid != 0);
1457 /* for now, we don't handle the case of anything other than
1458 assigning it for the first time. */
1459 vg_assert(vg_threads[tid].stack_size == 0);
1460 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1461 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1462 vg_threads[tid].stack_base = new_stack;
1463 vg_threads[tid].stack_size = new_stk_szb;
sewardj1e8cdc92002-04-18 11:37:52 +00001464 vg_threads[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001465 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001466 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001467 }
sewardj1e8cdc92002-04-18 11:37:52 +00001468
1469 vg_threads[tid].m_esp
1470 = vg_threads[tid].stack_base
1471 + vg_threads[tid].stack_size
1472 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1473
sewardje663cb92002-04-12 10:26:32 +00001474 if (VG_(clo_instrument))
1475 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1476 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1477
1478 /* push arg */
1479 vg_threads[tid].m_esp -= 4;
1480 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1481
1482 /* push (magical) return address */
1483 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001484 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001485
1486 if (VG_(clo_instrument))
1487 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1488
1489 /* this is where we start */
1490 vg_threads[tid].m_eip = (UInt)start_routine;
1491
sewardj8937c812002-04-12 20:12:20 +00001492 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001493 VG_(sprintf)(msg_buf,
1494 "new thread, created by %d", parent_tid );
1495 print_sched_event(tid, msg_buf);
1496 }
1497
1498 /* store the thread id in *thread. */
1499 // if (VG_(clo_instrument))
1500 // ***** CHECK *thread is writable
1501 *thread = (pthread_t)tid;
1502
sewardj604ec3c2002-04-18 22:38:41 +00001503 vg_threads[tid].q_next = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +00001504 vg_threads[tid].joiner = VG_INVALID_THREADID;
1505 vg_threads[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001506
1507 /* return zero */
sewardje663cb92002-04-12 10:26:32 +00001508 vg_threads[tid].m_edx = 0; /* success */
1509}
1510
1511
sewardj604ec3c2002-04-18 22:38:41 +00001512/* -----------------------------------------------------------
1513 MUTEXes
1514 -------------------------------------------------------- */
1515
1516/* Add a tid to the end of a queue threaded through the vg_threads
1517 entries on the q_next fields. */
1518static
1519void add_to_queue ( ThreadId q_start, ThreadId tid_to_add )
1520{
1521 vg_assert(is_valid_tid(q_start));
1522 vg_assert(is_valid_tid(tid_to_add));
1523 vg_assert(vg_threads[tid_to_add].q_next = VG_INVALID_THREADID);
1524 while (vg_threads[q_start].q_next != VG_INVALID_THREADID) {
1525 q_start = vg_threads[q_start].q_next;
1526 vg_assert(is_valid_tid(q_start));
1527 }
1528 vg_threads[q_start].q_next = tid_to_add;
1529}
1530
1531
1532/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001533 typedef struct
1534 {
1535 int __m_reserved; -- Reserved for future use
1536 int __m_count; -- Depth of recursive locking
1537 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1538 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1539 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1540 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001541
1542 How we use it: __m_kind never changes and indicates whether or not
1543 it is recursive. __m_count indicates the lock count; if 0, the
1544 mutex is not owned by anybody. __m_owner has a ThreadId value
1545 stuffed into it, but this is only meaningful if __m_count > 0,
1546 since otherwise the mutex is actually unowned.
1547
1548 This is important to make the static initialisers work properly.
1549 They set __m_reserved, __m_count and __m_owner to zero, and
1550 __m_kind to the relevant kind. The problem is that __m_owner == 0
1551 is a valid ThreadId, but we distinguish the unowned case by
1552 __m_count == 0 rather than __m_owner being some special value.
1553
sewardje663cb92002-04-12 10:26:32 +00001554 Ours is just a single word, an index into vg_mutexes[].
1555 For now I'll park it in the __m_reserved field.
1556
sewardj604ec3c2002-04-18 22:38:41 +00001557 We don't have to deal with mutex initialisation; the client side
1558 deals with that for us. */
sewardje663cb92002-04-12 10:26:32 +00001559
1560
1561static
1562void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
1563{
sewardj604ec3c2002-04-18 22:38:41 +00001564 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001565
sewardj604ec3c2002-04-18 22:38:41 +00001566 if (VG_(clo_trace_pthread_level) >= 2) {
1567 VG_(sprintf)(msg_buf, "pthread_mutex_lock %p", mutex );
1568 print_pthread_event(tid, msg_buf);
1569 }
1570
1571 /* Paranoia ... */
1572 vg_assert(is_valid_tid(tid)
1573 && vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001574
1575 /* POSIX doesn't mandate this, but for sanity ... */
1576 if (mutex == NULL) {
1577 vg_threads[tid].m_edx = EINVAL;
1578 return;
1579 }
1580
sewardj604ec3c2002-04-18 22:38:41 +00001581 /* More paranoia ... */
1582 switch (mutex->__m_kind) {
1583 case PTHREAD_MUTEX_TIMED_NP:
1584 case PTHREAD_MUTEX_RECURSIVE_NP:
1585 case PTHREAD_MUTEX_ERRORCHECK_NP:
1586 case PTHREAD_MUTEX_ADAPTIVE_NP:
1587 if (mutex->__m_count >= 0) break;
1588 /* else fall thru */
1589 default:
1590 vg_threads[tid].m_edx = EINVAL;
1591 return;
sewardje663cb92002-04-12 10:26:32 +00001592 }
1593
sewardj604ec3c2002-04-18 22:38:41 +00001594 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001595
sewardj604ec3c2002-04-18 22:38:41 +00001596 vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001597
1598 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001599 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001600 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001601 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001602 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001603 mutex->__m_count++;
sewardjf8f819e2002-04-17 23:21:37 +00001604 vg_threads[tid].m_edx = 0;
sewardj604ec3c2002-04-18 22:38:41 +00001605 VG_(printf)("!!!!!! tid %d, mutex %p -> locked %d\n",
1606 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001607 return;
1608 } else {
1609 vg_threads[tid].m_edx = EDEADLK;
1610 return;
1611 }
1612 } else {
sewardj604ec3c2002-04-18 22:38:41 +00001613 /* Someone else has it; we have to wait. Add ourselves to
1614 the end of the list of threads waiting for this mutex. */
sewardjf8f819e2002-04-17 23:21:37 +00001615 vg_threads[tid].status = VgTs_WaitMX;
sewardj604ec3c2002-04-18 22:38:41 +00001616 add_to_queue((ThreadId)mutex->__m_owner, tid);
sewardjf8f819e2002-04-17 23:21:37 +00001617 /* No assignment to %EDX, since we're blocking. */
1618 if (VG_(clo_trace_pthread_level) >= 1) {
sewardj604ec3c2002-04-18 22:38:41 +00001619 VG_(sprintf)(msg_buf, "pthread_mutex_lock %p: BLOCK",
1620 mutex );
sewardjf8f819e2002-04-17 23:21:37 +00001621 print_pthread_event(tid, msg_buf);
1622 }
sewardje663cb92002-04-12 10:26:32 +00001623 return;
1624 }
sewardjf8f819e2002-04-17 23:21:37 +00001625
sewardje663cb92002-04-12 10:26:32 +00001626 } else {
sewardjf8f819e2002-04-17 23:21:37 +00001627 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00001628 mutex->__m_count = 1;
1629 mutex->__m_owner = (_pthread_descr)tid;
1630 vg_threads[tid].q_next = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +00001631 /* return 0 (success). */
1632 vg_threads[tid].m_edx = 0;
1633 }
sewardjf8f819e2002-04-17 23:21:37 +00001634
sewardje663cb92002-04-12 10:26:32 +00001635}
1636
1637
1638static
1639void do_pthread_mutex_unlock ( ThreadId tid,
1640 pthread_mutex_t *mutex )
1641{
sewardje663cb92002-04-12 10:26:32 +00001642 Int i;
1643 Char msg_buf[100];
1644
sewardj45b4b372002-04-16 22:50:32 +00001645 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj604ec3c2002-04-18 22:38:41 +00001646 VG_(sprintf)(msg_buf, "pthread_mutex_unlock %p", mutex );
sewardj8937c812002-04-12 20:12:20 +00001647 print_pthread_event(tid, msg_buf);
1648 }
1649
sewardj604ec3c2002-04-18 22:38:41 +00001650 /* Paranoia ... */
1651 vg_assert(is_valid_tid(tid)
1652 && vg_threads[tid].status == VgTs_Runnable);
1653
1654 if (mutex == NULL) {
1655 vg_threads[tid].m_edx = EINVAL;
1656 return;
1657 }
1658
1659 /* More paranoia ... */
1660 switch (mutex->__m_kind) {
1661 case PTHREAD_MUTEX_TIMED_NP:
1662 case PTHREAD_MUTEX_RECURSIVE_NP:
1663 case PTHREAD_MUTEX_ERRORCHECK_NP:
1664 case PTHREAD_MUTEX_ADAPTIVE_NP:
1665 if (mutex->__m_count >= 0) break;
1666 /* else fall thru */
1667 default:
1668 vg_threads[tid].m_edx = EINVAL;
1669 return;
1670 }
sewardje663cb92002-04-12 10:26:32 +00001671
1672 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00001673 if (mutex->__m_count == 0 /* nobody holds it */
1674 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardje663cb92002-04-12 10:26:32 +00001675 vg_threads[tid].m_edx = EPERM;
1676 return;
1677 }
1678
sewardjf8f819e2002-04-17 23:21:37 +00001679 /* If it's a multiply-locked recursive mutex, just decrement the
1680 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00001681 if (mutex->__m_count > 1) {
1682 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
1683 mutex->__m_count --;
sewardjf8f819e2002-04-17 23:21:37 +00001684 vg_threads[tid].m_edx = 0; /* success */
1685 return;
1686 }
1687
sewardj604ec3c2002-04-18 22:38:41 +00001688 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00001689 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00001690 vg_assert(mutex->__m_count == 1);
sewardjf8f819e2002-04-17 23:21:37 +00001691
sewardj604ec3c2002-04-18 22:38:41 +00001692 /* Hand ownership of the mutex to the next in the queue waiting for
1693 it. This queue starts from our .q_next field. If none are
1694 waiting, mark the mutex as not held. */
sewardje663cb92002-04-12 10:26:32 +00001695
sewardj604ec3c2002-04-18 22:38:41 +00001696 if (vg_threads[tid].q_next == VG_INVALID_THREADID) {
sewardje663cb92002-04-12 10:26:32 +00001697 /* Nobody else is waiting on it. */
sewardj604ec3c2002-04-18 22:38:41 +00001698 mutex->__m_count = 0;
sewardje663cb92002-04-12 10:26:32 +00001699 } else {
1700 /* Notionally transfer the hold to thread i, whose
1701 pthread_mutex_lock() call now returns with 0 (success). */
sewardjf8f819e2002-04-17 23:21:37 +00001702 /* The .count is already == 1. */
sewardj604ec3c2002-04-18 22:38:41 +00001703 i = vg_threads[tid].q_next;
1704 vg_assert(is_valid_tid(i));
1705 mutex->__m_owner = (_pthread_descr)i;
sewardje663cb92002-04-12 10:26:32 +00001706 vg_threads[i].status = VgTs_Runnable;
1707 vg_threads[i].m_edx = 0; /* pth_lock() success */
sewardj604ec3c2002-04-18 22:38:41 +00001708 /* remove Me (tid) from the queue for the mutex */
1709 vg_threads[tid].q_next = VG_INVALID_THREADID;
sewardj8937c812002-04-12 20:12:20 +00001710
sewardj45b4b372002-04-16 22:50:32 +00001711 if (VG_(clo_trace_pthread_level) >= 1) {
sewardj604ec3c2002-04-18 22:38:41 +00001712 VG_(sprintf)(msg_buf, "pthread_mutex_lock %p: RESUME",
1713 mutex );
1714 print_pthread_event(i, msg_buf);
sewardje663cb92002-04-12 10:26:32 +00001715 }
1716 }
1717
1718 /* In either case, our (tid's) pth_unlock() returns with 0
1719 (success). */
1720 vg_threads[tid].m_edx = 0; /* Success. */
1721}
1722
1723
sewardje663cb92002-04-12 10:26:32 +00001724
sewardj77e466c2002-04-14 02:29:29 +00001725/* vthread tid is returning from a signal handler; modify its
1726 stack/regs accordingly. */
1727static
1728void handle_signal_return ( ThreadId tid )
1729{
1730 Char msg_buf[100];
1731 Bool restart_blocked_syscalls = VG_(signal_returns)(tid);
1732
1733 if (restart_blocked_syscalls)
1734 /* Easy; we don't have to do anything. */
1735 return;
1736
1737 if (vg_threads[tid].status == VgTs_WaitFD) {
1738 vg_assert(vg_threads[tid].m_eax == __NR_read
1739 || vg_threads[tid].m_eax == __NR_write);
1740 /* read() or write() interrupted. Force a return with EINTR. */
1741 vg_threads[tid].m_eax = -VKI_EINTR;
1742 vg_threads[tid].status = VgTs_Runnable;
1743 if (VG_(clo_trace_sched)) {
1744 VG_(sprintf)(msg_buf,
1745 "read() / write() interrupted by signal; return EINTR" );
1746 print_sched_event(tid, msg_buf);
1747 }
1748 return;
1749 }
1750
1751 if (vg_threads[tid].status == VgTs_WaitFD) {
1752 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
1753 /* We interrupted a nanosleep(). The right thing to do is to
1754 write the unused time to nanosleep's second param and return
1755 EINTR, but I'm too lazy for that. */
1756 return;
1757 }
1758
1759 /* All other cases? Just return. */
1760}
1761
1762
sewardje663cb92002-04-12 10:26:32 +00001763/* ---------------------------------------------------------------------
1764 Handle non-trivial client requests.
1765 ------------------------------------------------------------------ */
1766
1767static
1768void do_nontrivial_clientreq ( ThreadId tid )
1769{
1770 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
1771 UInt req_no = arg[0];
1772 switch (req_no) {
1773
1774 case VG_USERREQ__PTHREAD_CREATE:
1775 do_pthread_create( tid,
1776 (pthread_t*)arg[1],
1777 (pthread_attr_t*)arg[2],
1778 (void*(*)(void*))arg[3],
1779 (void*)arg[4] );
1780 break;
1781
sewardjbc5b99f2002-04-13 00:08:51 +00001782 case VG_USERREQ__PTHREAD_RETURNS:
1783 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00001784 break;
1785
1786 case VG_USERREQ__PTHREAD_JOIN:
1787 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
1788 break;
1789
sewardje663cb92002-04-12 10:26:32 +00001790 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
1791 do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
1792 break;
1793
1794 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
1795 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
1796 break;
1797
sewardje663cb92002-04-12 10:26:32 +00001798 case VG_USERREQ__PTHREAD_CANCEL:
1799 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
1800 break;
1801
1802 case VG_USERREQ__MAKE_NOACCESS:
1803 case VG_USERREQ__MAKE_WRITABLE:
1804 case VG_USERREQ__MAKE_READABLE:
1805 case VG_USERREQ__DISCARD:
1806 case VG_USERREQ__CHECK_WRITABLE:
1807 case VG_USERREQ__CHECK_READABLE:
1808 case VG_USERREQ__MAKE_NOACCESS_STACK:
1809 case VG_USERREQ__RUNNING_ON_VALGRIND:
1810 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00001811 vg_threads[tid].m_edx
1812 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00001813 break;
1814
sewardj77e466c2002-04-14 02:29:29 +00001815 case VG_USERREQ__SIGNAL_RETURNS:
1816 handle_signal_return(tid);
1817 break;
sewardj54cacf02002-04-12 23:24:59 +00001818
sewardje663cb92002-04-12 10:26:32 +00001819 default:
1820 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
1821 VG_(panic)("handle_private_client_pthread_request: "
1822 "unknown request");
1823 /*NOTREACHED*/
1824 break;
1825 }
1826}
1827
1828
1829/*--------------------------------------------------------------------*/
1830/*--- end vg_scheduler.c ---*/
1831/*--------------------------------------------------------------------*/