blob: da8143cbee13229c0c967af1d7659427e2769edb [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardj6072c362002-04-19 14:40:57 +000062- Fix signals properly, so that each thread has its own blocking mask.
63 Currently this isn't done, and (worse?) signals are delivered to
64 Thread 1 (the root thread) regardless.
65
66 So, what's the deal with signals and mutexes? If a thread is
67 blocked on a mutex, or for a condition variable for that matter, can
68 signals still be delivered to it? This has serious consequences --
69 deadlocks, etc.
70
sewardje462e202002-04-13 04:09:07 +000071*/
sewardje663cb92002-04-12 10:26:32 +000072
73
74/* ---------------------------------------------------------------------
75 Types and globals for the scheduler.
76 ------------------------------------------------------------------ */
77
78/* type ThreadId is defined in vg_include.h. */
79
80/* struct ThreadState is defined in vg_include.h. */
81
sewardj6072c362002-04-19 14:40:57 +000082/* Private globals. A statically allocated array of threads. NOTE:
83 [0] is never used, to simplify the simulation of initialisers for
84 LinuxThreads. */
sewardje663cb92002-04-12 10:26:32 +000085static ThreadState vg_threads[VG_N_THREADS];
86
sewardj1e8cdc92002-04-18 11:37:52 +000087/* The tid of the thread currently in VG_(baseBlock). */
88static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
89
sewardje663cb92002-04-12 10:26:32 +000090
91/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
92jmp_buf VG_(scheduler_jmpbuf);
93/* ... and if so, here's the signal which caused it to do so. */
94Int VG_(longjmpd_on_signal);
95
96
97/* Machinery to keep track of which threads are waiting on which
98 fds. */
99typedef
100 struct {
101 /* The thread which made the request. */
102 ThreadId tid;
103
104 /* The next two fields describe the request. */
105 /* File descriptor waited for. -1 means this slot is not in use */
106 Int fd;
107 /* The syscall number the fd is used in. */
108 Int syscall_no;
109
110 /* False => still waiting for select to tell us the fd is ready
111 to go. True => the fd is ready, but the results have not yet
112 been delivered back to the calling thread. Once the latter
113 happens, this entire record is marked as no longer in use, by
114 making the fd field be -1. */
115 Bool ready;
116 }
117 VgWaitedOnFd;
118
119static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
120
121
sewardje663cb92002-04-12 10:26:32 +0000122/* Forwards */
123static void do_nontrivial_clientreq ( ThreadId tid );
124
sewardj6072c362002-04-19 14:40:57 +0000125static void scheduler_sanity ( void );
126
sewardje663cb92002-04-12 10:26:32 +0000127
128/* ---------------------------------------------------------------------
129 Helper functions for the scheduler.
130 ------------------------------------------------------------------ */
131
sewardj604ec3c2002-04-18 22:38:41 +0000132static __inline__
133Bool is_valid_tid ( ThreadId tid )
134{
135 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000136 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000137 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000138 return True;
139}
140
141
sewardj1e8cdc92002-04-18 11:37:52 +0000142/* For constructing error messages only: try and identify a thread
143 whose stack this address currently falls within, or return
144 VG_INVALID_THREADID if it doesn't. A small complication is dealing
145 with any currently VG_(baseBlock)-resident thread.
146*/
147ThreadId VG_(identify_stack_addr)( Addr a )
148{
149 ThreadId tid, tid_to_skip;
150
151 tid_to_skip = VG_INVALID_THREADID;
152
153 /* First check to see if there's a currently-loaded thread in
154 VG_(baseBlock). */
155 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
156 tid = vg_tid_currently_in_baseBlock;
157 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
158 && a <= vg_threads[tid].stack_highest_word)
159 return tid;
160 else
161 tid_to_skip = tid;
162 }
163
sewardj6072c362002-04-19 14:40:57 +0000164 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj1e8cdc92002-04-18 11:37:52 +0000165 if (vg_threads[tid].status == VgTs_Empty) continue;
166 if (tid == tid_to_skip) continue;
167 if (vg_threads[tid].m_esp <= a
168 && a <= vg_threads[tid].stack_highest_word)
169 return tid;
170 }
171 return VG_INVALID_THREADID;
172}
173
174
sewardj15a43e12002-04-17 19:35:12 +0000175/* Print the scheduler status. */
176void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000177{
178 Int i;
179 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000180 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000181 if (vg_threads[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000182 VG_(printf)("\nThread %d: status = ", i);
sewardje663cb92002-04-12 10:26:32 +0000183 switch (vg_threads[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000184 case VgTs_Runnable: VG_(printf)("Runnable"); break;
185 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
186 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardje663cb92002-04-12 10:26:32 +0000187 vg_threads[i].joiner); break;
sewardj6072c362002-04-19 14:40:57 +0000188 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
189 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
190 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000191 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardje663cb92002-04-12 10:26:32 +0000192 default: VG_(printf)("???"); break;
193 }
sewardj3b5d8862002-04-20 13:53:23 +0000194 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
195 vg_threads[i].associated_mx,
196 vg_threads[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000197 VG_(pp_ExeContext)(
198 VG_(get_ExeContext)( False, vg_threads[i].m_eip,
199 vg_threads[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000200 }
201 VG_(printf)("\n");
202}
203
204static
205void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
206{
207 Int i;
208
209 vg_assert(fd != -1); /* avoid total chaos */
210
211 for (i = 0; i < VG_N_WAITING_FDS; i++)
212 if (vg_waiting_fds[i].fd == -1)
213 break;
214
215 if (i == VG_N_WAITING_FDS)
216 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
217 /*
218 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
219 tid, fd, i);
220 */
221 vg_waiting_fds[i].fd = fd;
222 vg_waiting_fds[i].tid = tid;
223 vg_waiting_fds[i].ready = False;
224 vg_waiting_fds[i].syscall_no = syscall_no;
225}
226
227
228
229static
230void print_sched_event ( ThreadId tid, Char* what )
231{
sewardj45b4b372002-04-16 22:50:32 +0000232 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000233}
234
235
236static
237void print_pthread_event ( ThreadId tid, Char* what )
238{
239 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000240}
241
242
243static
244Char* name_of_sched_event ( UInt event )
245{
246 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000247 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
248 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
249 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
250 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
251 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
252 default: return "??UNKNOWN??";
253 }
254}
255
256
257/* Create a translation of the client basic block beginning at
258 orig_addr, and add it to the translation cache & translation table.
259 This probably doesn't really belong here, but, hey ...
260*/
sewardj1e8cdc92002-04-18 11:37:52 +0000261static
262void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000263{
264 Addr trans_addr;
265 TTEntry tte;
266 Int orig_size, trans_size;
267 /* Ensure there is space to hold a translation. */
268 VG_(maybe_do_lru_pass)();
sewardj1e8cdc92002-04-18 11:37:52 +0000269 VG_(translate)( &vg_threads[tid],
270 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000271 /* Copy data at trans_addr into the translation cache.
272 Returned pointer is to the code, not to the 4-byte
273 header. */
274 /* Since the .orig_size and .trans_size fields are
275 UShort, be paranoid. */
276 vg_assert(orig_size > 0 && orig_size < 65536);
277 vg_assert(trans_size > 0 && trans_size < 65536);
278 tte.orig_size = orig_size;
279 tte.orig_addr = orig_addr;
280 tte.trans_size = trans_size;
281 tte.trans_addr = VG_(copy_to_transcache)
282 ( trans_addr, trans_size );
283 tte.mru_epoch = VG_(current_epoch);
284 /* Free the intermediary -- was allocated by VG_(emit_code). */
285 VG_(jitfree)( (void*)trans_addr );
286 /* Add to trans tab and set back pointer. */
287 VG_(add_to_trans_tab) ( &tte );
288 /* Update stats. */
289 VG_(this_epoch_in_count) ++;
290 VG_(this_epoch_in_osize) += orig_size;
291 VG_(this_epoch_in_tsize) += trans_size;
292 VG_(overall_in_count) ++;
293 VG_(overall_in_osize) += orig_size;
294 VG_(overall_in_tsize) += trans_size;
295 /* Record translated area for SMC detection. */
296 VG_(smc_mark_original) ( orig_addr, orig_size );
297}
298
299
300/* Allocate a completely empty ThreadState record. */
301static
302ThreadId vg_alloc_ThreadState ( void )
303{
304 Int i;
sewardj6072c362002-04-19 14:40:57 +0000305 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000306 if (vg_threads[i].status == VgTs_Empty)
307 return i;
308 }
309 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
310 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
311 VG_(panic)("VG_N_THREADS is too low");
312 /*NOTREACHED*/
313}
314
315
316ThreadState* VG_(get_thread_state) ( ThreadId tid )
317{
sewardj6072c362002-04-19 14:40:57 +0000318 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000319 vg_assert(vg_threads[tid].status != VgTs_Empty);
320 return & vg_threads[tid];
321}
322
323
sewardj1e8cdc92002-04-18 11:37:52 +0000324ThreadState* VG_(get_current_thread_state) ( void )
325{
326 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj6072c362002-04-19 14:40:57 +0000327 return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj1e8cdc92002-04-18 11:37:52 +0000328}
329
330
331ThreadId VG_(get_current_tid) ( void )
332{
333 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
334 return vg_tid_currently_in_baseBlock;
335}
336
337
sewardje663cb92002-04-12 10:26:32 +0000338/* Copy the saved state of a thread into VG_(baseBlock), ready for it
339 to be run. */
340__inline__
341void VG_(load_thread_state) ( ThreadId tid )
342{
343 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000344 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
345
sewardje663cb92002-04-12 10:26:32 +0000346 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
347 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
348 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
349 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
350 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
351 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
352 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
353 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
354 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
355 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
356
357 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
358 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
359
360 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
361 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
362 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
363 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
364 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
365 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
366 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
367 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
368 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000369
370 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000371}
372
373
374/* Copy the state of a thread from VG_(baseBlock), presumably after it
375 has been descheduled. For sanity-check purposes, fill the vacated
376 VG_(baseBlock) with garbage so as to make the system more likely to
377 fail quickly if we erroneously continue to poke around inside
378 VG_(baseBlock) without first doing a load_thread_state().
379*/
380__inline__
381void VG_(save_thread_state) ( ThreadId tid )
382{
383 Int i;
384 const UInt junk = 0xDEADBEEF;
385
sewardj1e8cdc92002-04-18 11:37:52 +0000386 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
387
sewardje663cb92002-04-12 10:26:32 +0000388 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
389 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
390 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
391 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
392 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
393 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
394 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
395 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
396 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
397 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
398
399 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
400 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
401
402 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
403 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
404 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
405 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
406 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
407 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
408 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
409 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
410 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
411
412 /* Fill it up with junk. */
413 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
414 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
415 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
416 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
417 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
418 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
419 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
420 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
421 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
422 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
423
424 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
425 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000426
427 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000428}
429
430
431/* Run the thread tid for a while, and return a VG_TRC_* value to the
432 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000433static
sewardje663cb92002-04-12 10:26:32 +0000434UInt run_thread_for_a_while ( ThreadId tid )
435{
436 UInt trc = 0;
sewardj6072c362002-04-19 14:40:57 +0000437 vg_assert(is_valid_tid(tid));
438 vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000439 vg_assert(VG_(bbs_to_go) > 0);
440
441 VG_(load_thread_state) ( tid );
442 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
443 /* try this ... */
444 trc = VG_(run_innerloop)();
445 /* We get here if the client didn't take a fault. */
446 } else {
447 /* We get here if the client took a fault, which caused our
448 signal handler to longjmp. */
449 vg_assert(trc == 0);
450 trc = VG_TRC_UNRESUMABLE_SIGNAL;
451 }
452 VG_(save_thread_state) ( tid );
453 return trc;
454}
455
456
457/* Increment the LRU epoch counter. */
458static
459void increment_epoch ( void )
460{
461 VG_(current_epoch)++;
462 if (VG_(clo_verbosity) > 2) {
463 UInt tt_used, tc_used;
464 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
465 VG_(message)(Vg_UserMsg,
466 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
467 VG_(bbs_done),
468 VG_(this_epoch_in_count),
469 VG_(this_epoch_in_osize),
470 VG_(this_epoch_in_tsize),
471 VG_(this_epoch_out_count),
472 VG_(this_epoch_out_osize),
473 VG_(this_epoch_out_tsize),
474 tt_used, tc_used
475 );
476 }
477 VG_(this_epoch_in_count) = 0;
478 VG_(this_epoch_in_osize) = 0;
479 VG_(this_epoch_in_tsize) = 0;
480 VG_(this_epoch_out_count) = 0;
481 VG_(this_epoch_out_osize) = 0;
482 VG_(this_epoch_out_tsize) = 0;
483}
484
485
486/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000487 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000488 caller takes care to park the client's state is parked in
489 VG_(baseBlock).
490*/
491void VG_(scheduler_init) ( void )
492{
493 Int i;
494 Addr startup_esp;
495 ThreadId tid_main;
496
497 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
498 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000499 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
500 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000501 VG_(panic)("unexpected %esp at startup");
502 }
503
sewardj6072c362002-04-19 14:40:57 +0000504 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
505 vg_threads[i].status = VgTs_Empty;
sewardje663cb92002-04-12 10:26:32 +0000506 vg_threads[i].stack_size = 0;
507 vg_threads[i].stack_base = (Addr)NULL;
sewardj1e8cdc92002-04-18 11:37:52 +0000508 vg_threads[i].tid = i;
sewardje663cb92002-04-12 10:26:32 +0000509 }
510
511 for (i = 0; i < VG_N_WAITING_FDS; i++)
512 vg_waiting_fds[i].fd = -1; /* not in use */
513
sewardje663cb92002-04-12 10:26:32 +0000514 /* Assert this is thread zero, which has certain magic
515 properties. */
516 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000517 vg_assert(tid_main == 1);
sewardje663cb92002-04-12 10:26:32 +0000518
sewardj3b5d8862002-04-20 13:53:23 +0000519 vg_threads[tid_main].status = VgTs_Runnable;
520 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
521 vg_threads[tid_main].associated_mx = NULL;
522 vg_threads[tid_main].associated_cv = NULL;
523 vg_threads[tid_main].retval = NULL; /* not important */
sewardj1e8cdc92002-04-18 11:37:52 +0000524 vg_threads[tid_main].stack_highest_word
525 = vg_threads[tid_main].m_esp /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +0000526
527 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000528 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000529 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000530
531 /* So now ... */
532 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000533}
534
535
536/* What if fd isn't a valid fd? */
537static
538void set_fd_nonblocking ( Int fd )
539{
540 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
541 vg_assert(!VG_(is_kerror)(res));
542 res |= VKI_O_NONBLOCK;
543 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
544 vg_assert(!VG_(is_kerror)(res));
545}
546
547static
548void set_fd_blocking ( Int fd )
549{
550 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
551 vg_assert(!VG_(is_kerror)(res));
552 res &= ~VKI_O_NONBLOCK;
553 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
554 vg_assert(!VG_(is_kerror)(res));
555}
556
557static
558Bool fd_is_blockful ( Int fd )
559{
560 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
561 vg_assert(!VG_(is_kerror)(res));
562 return (res & VKI_O_NONBLOCK) ? False : True;
563}
564
565
566
567/* Do a purely thread-local request for tid, and put the result in its
568 %EDX, without changing its scheduling state in any way, nor that of
569 any other threads. Return True if so.
570
571 If the request is non-trivial, return False; a more capable but
572 slower mechanism will deal with it.
573*/
574static
575Bool maybe_do_trivial_clientreq ( ThreadId tid )
576{
577# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000578 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000579 return True; \
580 }
581
sewardj8c824512002-04-14 04:16:48 +0000582 ThreadState* tst = &vg_threads[tid];
583 UInt* arg = (UInt*)(tst->m_eax);
584 UInt req_no = arg[0];
585
sewardje663cb92002-04-12 10:26:32 +0000586 switch (req_no) {
587 case VG_USERREQ__MALLOC:
588 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000589 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000590 );
591 case VG_USERREQ__BUILTIN_NEW:
592 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000593 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000594 );
595 case VG_USERREQ__BUILTIN_VEC_NEW:
596 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000597 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000598 );
599 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000600 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000601 SIMPLE_RETURN(0); /* irrelevant */
602 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000603 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000604 SIMPLE_RETURN(0); /* irrelevant */
605 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000606 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000607 SIMPLE_RETURN(0); /* irrelevant */
608 case VG_USERREQ__CALLOC:
609 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000610 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000611 );
612 case VG_USERREQ__REALLOC:
613 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000614 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000615 );
616 case VG_USERREQ__MEMALIGN:
617 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000618 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000619 );
sewardj9650c992002-04-16 03:44:31 +0000620
621 /* These are heavily used. */
622 case VG_USERREQ__PTHREAD_GET_THREADID:
623 SIMPLE_RETURN(tid);
624 case VG_USERREQ__RUNNING_ON_VALGRIND:
625 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000626 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
627 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj9650c992002-04-16 03:44:31 +0000628
sewardje663cb92002-04-12 10:26:32 +0000629 default:
630 /* Too hard; wimp out. */
631 return False;
632 }
633# undef SIMPLE_RETURN
634}
635
636
sewardj6072c362002-04-19 14:40:57 +0000637/* vthread tid is returning from a signal handler; modify its
638 stack/regs accordingly. */
639static
640void handle_signal_return ( ThreadId tid )
641{
642 Char msg_buf[100];
643 Bool restart_blocked_syscalls;
644
645 vg_assert(is_valid_tid(tid));
646
647 restart_blocked_syscalls = VG_(signal_returns)(tid);
648
649 if (restart_blocked_syscalls)
650 /* Easy; we don't have to do anything. */
651 return;
652
653 if (vg_threads[tid].status == VgTs_WaitFD) {
654 vg_assert(vg_threads[tid].m_eax == __NR_read
655 || vg_threads[tid].m_eax == __NR_write);
656 /* read() or write() interrupted. Force a return with EINTR. */
657 vg_threads[tid].m_eax = -VKI_EINTR;
658 vg_threads[tid].status = VgTs_Runnable;
659 if (VG_(clo_trace_sched)) {
660 VG_(sprintf)(msg_buf,
661 "read() / write() interrupted by signal; return EINTR" );
662 print_sched_event(tid, msg_buf);
663 }
664 return;
665 }
666
667 if (vg_threads[tid].status == VgTs_WaitFD) {
668 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
669 /* We interrupted a nanosleep(). The right thing to do is to
670 write the unused time to nanosleep's second param and return
671 EINTR, but I'm too lazy for that. */
672 return;
673 }
674
675 /* All other cases? Just return. */
676}
677
678
sewardje663cb92002-04-12 10:26:32 +0000679static
680void sched_do_syscall ( ThreadId tid )
681{
682 UInt saved_eax;
683 UInt res, syscall_no;
684 UInt fd;
685 Bool might_block, assumed_nonblocking;
686 Bool orig_fd_blockness;
687 Char msg_buf[100];
688
sewardj6072c362002-04-19 14:40:57 +0000689 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000690 vg_assert(vg_threads[tid].status == VgTs_Runnable);
691
692 syscall_no = vg_threads[tid].m_eax; /* syscall number */
693
694 if (syscall_no == __NR_nanosleep) {
695 ULong t_now, t_awaken;
696 struct vki_timespec* req;
697 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
698 t_now = VG_(read_microsecond_timer)();
699 t_awaken
700 = t_now
701 + (ULong)1000000ULL * (ULong)(req->tv_sec)
702 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
703 vg_threads[tid].status = VgTs_Sleeping;
704 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000705 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000706 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
707 t_now, t_awaken-t_now);
708 print_sched_event(tid, msg_buf);
709 }
710 /* Force the scheduler to run something else for a while. */
711 return;
712 }
713
714 switch (syscall_no) {
715 case __NR_read:
716 case __NR_write:
717 assumed_nonblocking
718 = False;
719 might_block
720 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
721 break;
722 default:
723 might_block = False;
724 assumed_nonblocking = True;
725 }
726
727 if (assumed_nonblocking) {
728 /* We think it's non-blocking. Just do it in the normal way. */
729 VG_(perform_assumed_nonblocking_syscall)(tid);
730 /* The thread is still runnable. */
731 return;
732 }
733
734 /* It might block. Take evasive action. */
735 switch (syscall_no) {
736 case __NR_read:
737 case __NR_write:
738 fd = vg_threads[tid].m_ebx; break;
739 default:
740 vg_assert(3+3 == 7);
741 }
742
743 /* Set the fd to nonblocking, and do the syscall, which will return
744 immediately, in order to lodge a request with the Linux kernel.
745 We later poll for I/O completion using select(). */
746
747 orig_fd_blockness = fd_is_blockful(fd);
748 set_fd_nonblocking(fd);
749 vg_assert(!fd_is_blockful(fd));
750 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
751
752 /* This trashes the thread's %eax; we have to preserve it. */
753 saved_eax = vg_threads[tid].m_eax;
754 KERNEL_DO_SYSCALL(tid,res);
755
756 /* Restore original blockfulness of the fd. */
757 if (orig_fd_blockness)
758 set_fd_blocking(fd);
759 else
760 set_fd_nonblocking(fd);
761
762 if (res != -VKI_EWOULDBLOCK) {
763 /* It didn't block; it went through immediately. So finish off
764 in the normal way. Don't restore %EAX, since that now
765 (correctly) holds the result of the call. */
766 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
767 /* We're still runnable. */
768 vg_assert(vg_threads[tid].status == VgTs_Runnable);
769
770 } else {
771
772 /* It would have blocked. First, restore %EAX to what it was
773 before our speculative call. */
774 vg_threads[tid].m_eax = saved_eax;
775 /* Put this fd in a table of fds on which we are waiting for
776 completion. The arguments for select() later are constructed
777 from this table. */
778 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
779 /* Deschedule thread until an I/O completion happens. */
780 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000781 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000782 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
783 print_sched_event(tid, msg_buf);
784 }
785
786 }
787}
788
789
790/* Find out which of the fds in vg_waiting_fds are now ready to go, by
791 making enquiries with select(), and mark them as ready. We have to
792 wait for the requesting threads to fall into the the WaitFD state
793 before we can actually finally deliver the results, so this
794 procedure doesn't do that; complete_blocked_syscalls() does it.
795
796 It might seem odd that a thread which has done a blocking syscall
797 is not in WaitFD state; the way this can happen is if it initially
798 becomes WaitFD, but then a signal is delivered to it, so it becomes
799 Runnable for a while. In this case we have to wait for the
800 sighandler to return, whereupon the WaitFD state is resumed, and
801 only at that point can the I/O result be delivered to it. However,
802 this point may be long after the fd is actually ready.
803
804 So, poll_for_ready_fds() merely detects fds which are ready.
805 complete_blocked_syscalls() does the second half of the trick,
806 possibly much later: it delivers the results from ready fds to
807 threads in WaitFD state.
808*/
sewardj9a199dc2002-04-14 13:01:38 +0000809static
sewardje663cb92002-04-12 10:26:32 +0000810void poll_for_ready_fds ( void )
811{
812 vki_ksigset_t saved_procmask;
813 vki_fd_set readfds;
814 vki_fd_set writefds;
815 vki_fd_set exceptfds;
816 struct vki_timeval timeout;
817 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
818 ThreadId tid;
819 Bool rd_ok, wr_ok, ex_ok;
820 Char msg_buf[100];
821
sewardje462e202002-04-13 04:09:07 +0000822 struct vki_timespec* rem;
823 ULong t_now;
824
sewardje663cb92002-04-12 10:26:32 +0000825 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000826 for (tid = 1; tid < VG_N_THREADS; tid++)
827 if (vg_threads[tid].status == VgTs_Sleeping)
828 break;
829
830 /* Avoid pointless calls to VG_(read_microsecond_timer). */
831 if (tid < VG_N_THREADS) {
832 t_now = VG_(read_microsecond_timer)();
833 for (tid = 1; tid < VG_N_THREADS; tid++) {
834 if (vg_threads[tid].status != VgTs_Sleeping)
835 continue;
836 if (t_now >= vg_threads[tid].awaken_at) {
837 /* Resume this thread. Set to zero the remaining-time
838 (second) arg of nanosleep, since it's used up all its
839 time. */
840 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
841 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
842 if (rem != NULL) {
843 rem->tv_sec = 0;
844 rem->tv_nsec = 0;
845 }
846 /* Make the syscall return 0 (success). */
847 vg_threads[tid].m_eax = 0;
848 /* Reschedule this thread. */
849 vg_threads[tid].status = VgTs_Runnable;
850 if (VG_(clo_trace_sched)) {
851 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
852 t_now);
853 print_sched_event(tid, msg_buf);
854 }
sewardje663cb92002-04-12 10:26:32 +0000855 }
856 }
857 }
sewardje663cb92002-04-12 10:26:32 +0000858
sewardje462e202002-04-13 04:09:07 +0000859 /* And look for threads waiting on file descriptors which are now
860 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000861 timeout.tv_sec = 0;
862 timeout.tv_usec = 0;
863
864 VKI_FD_ZERO(&readfds);
865 VKI_FD_ZERO(&writefds);
866 VKI_FD_ZERO(&exceptfds);
867 fd_max = -1;
868 for (i = 0; i < VG_N_WAITING_FDS; i++) {
869 if (vg_waiting_fds[i].fd == -1 /* not in use */)
870 continue;
871 if (vg_waiting_fds[i].ready /* already ready? */)
872 continue;
873 fd = vg_waiting_fds[i].fd;
874 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000875 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000876 if (fd > fd_max)
877 fd_max = fd;
878 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000879 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000880 syscall_no = vg_waiting_fds[i].syscall_no;
881 switch (syscall_no) {
882 case __NR_read:
883 VKI_FD_SET(fd, &readfds); break;
884 case __NR_write:
885 VKI_FD_SET(fd, &writefds); break;
886 default:
887 VG_(panic)("poll_for_ready_fds: unexpected syscall");
888 /*NOTREACHED*/
889 break;
890 }
891 }
892
sewardje462e202002-04-13 04:09:07 +0000893 /* Short cut: if no fds are waiting, give up now. */
894 if (fd_max == -1)
895 return;
896
sewardje663cb92002-04-12 10:26:32 +0000897 /* BLOCK ALL SIGNALS. We don't want the complication of select()
898 getting interrupted. */
899 VG_(block_all_host_signals)( &saved_procmask );
900
901 n_ready = VG_(select)
902 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
903 if (VG_(is_kerror)(n_ready)) {
904 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
905 VG_(panic)("poll_for_ready_fds: select failed?!");
906 /*NOTREACHED*/
907 }
908
909 /* UNBLOCK ALL SIGNALS */
910 VG_(restore_host_signals)( &saved_procmask );
911
912 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
913
914 if (n_ready == 0)
915 return;
916
917 /* Inspect all the fds we know about, and handle any completions that
918 have happened. */
919 /*
920 VG_(printf)("\n\n");
921 for (fd = 0; fd < 100; fd++)
922 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
923 VG_(printf)("X"); } else { VG_(printf)("."); };
924 VG_(printf)("\n\nfd_max = %d\n", fd_max);
925 */
926
927 for (fd = 0; fd <= fd_max; fd++) {
928 rd_ok = VKI_FD_ISSET(fd, &readfds);
929 wr_ok = VKI_FD_ISSET(fd, &writefds);
930 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
931
932 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
933 if (n_ok == 0)
934 continue;
935 if (n_ok > 1) {
936 VG_(printf)("offending fd = %d\n", fd);
937 VG_(panic)("poll_for_ready_fds: multiple events on fd");
938 }
939
940 /* An I/O event completed for fd. Find the thread which
941 requested this. */
942 for (i = 0; i < VG_N_WAITING_FDS; i++) {
943 if (vg_waiting_fds[i].fd == -1 /* not in use */)
944 continue;
945 if (vg_waiting_fds[i].fd == fd)
946 break;
947 }
948
949 /* And a bit more paranoia ... */
950 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
951
952 /* Mark the fd as ready. */
953 vg_assert(! vg_waiting_fds[i].ready);
954 vg_waiting_fds[i].ready = True;
955 }
956}
957
958
959/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +0000960static
sewardje663cb92002-04-12 10:26:32 +0000961void complete_blocked_syscalls ( void )
962{
963 Int fd, i, res, syscall_no;
964 ThreadId tid;
965 Char msg_buf[100];
966
967 /* Inspect all the outstanding fds we know about. */
968
969 for (i = 0; i < VG_N_WAITING_FDS; i++) {
970 if (vg_waiting_fds[i].fd == -1 /* not in use */)
971 continue;
972 if (! vg_waiting_fds[i].ready)
973 continue;
974
975 fd = vg_waiting_fds[i].fd;
976 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000977 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000978
979 /* The thread actually has to be waiting for the I/O event it
980 requested before we can deliver the result! */
981 if (vg_threads[tid].status != VgTs_WaitFD)
982 continue;
983
984 /* Ok, actually do it! We can safely use %EAX as the syscall
985 number, because the speculative call made by
986 sched_do_syscall() doesn't change %EAX in the case where the
987 call would have blocked. */
988
989 syscall_no = vg_waiting_fds[i].syscall_no;
990 vg_assert(syscall_no == vg_threads[tid].m_eax);
991 KERNEL_DO_SYSCALL(tid,res);
992 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
993
994 /* Reschedule. */
995 vg_threads[tid].status = VgTs_Runnable;
996 /* Mark slot as no longer in use. */
997 vg_waiting_fds[i].fd = -1;
998 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +0000999 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001000 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1001 print_sched_event(tid, msg_buf);
1002 }
1003 }
1004}
1005
1006
1007static
1008void nanosleep_for_a_while ( void )
1009{
1010 Int res;
1011 struct vki_timespec req;
1012 struct vki_timespec rem;
1013 req.tv_sec = 0;
1014 req.tv_nsec = 20 * 1000 * 1000;
1015 res = VG_(nanosleep)( &req, &rem );
1016 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
1017 vg_assert(res == 0);
1018}
1019
1020
1021/* ---------------------------------------------------------------------
1022 The scheduler proper.
1023 ------------------------------------------------------------------ */
1024
1025/* Run user-space threads until either
1026 * Deadlock occurs
1027 * One thread asks to shutdown Valgrind
1028 * The specified number of basic blocks has gone by.
1029*/
1030VgSchedReturnCode VG_(scheduler) ( void )
1031{
1032 ThreadId tid, tid_next;
1033 UInt trc;
1034 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +00001035 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +00001036 Char msg_buf[100];
1037 Addr trans_addr;
1038
1039 /* For the LRU structures, records when the epoch began. */
1040 ULong lru_epoch_started_at = 0;
1041
1042 /* Start with the root thread. tid in general indicates the
1043 currently runnable/just-finished-running thread. */
sewardj6072c362002-04-19 14:40:57 +00001044 tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001045
1046 /* This is the top level scheduler loop. It falls into three
1047 phases. */
1048 while (True) {
1049
sewardj6072c362002-04-19 14:40:57 +00001050 /* ======================= Phase 0 of 3 =======================
1051 Be paranoid. Always a good idea. */
1052 scheduler_sanity();
1053
sewardje663cb92002-04-12 10:26:32 +00001054 /* ======================= Phase 1 of 3 =======================
1055 Handle I/O completions and signals. This may change the
1056 status of various threads. Then select a new thread to run,
1057 or declare deadlock, or sleep if there are no runnable
1058 threads but some are blocked on I/O. */
1059
1060 /* Age the LRU structures if an epoch has been completed. */
1061 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1062 lru_epoch_started_at = VG_(bbs_done);
1063 increment_epoch();
1064 }
1065
1066 /* Was a debug-stop requested? */
1067 if (VG_(bbs_to_go) == 0)
1068 goto debug_stop;
1069
1070 /* Do the following loop until a runnable thread is found, or
1071 deadlock is detected. */
1072 while (True) {
1073
1074 /* For stats purposes only. */
1075 VG_(num_scheduling_events_MAJOR) ++;
1076
1077 /* See if any I/O operations which we were waiting for have
1078 completed, and, if so, make runnable the relevant waiting
1079 threads. */
1080 poll_for_ready_fds();
1081 complete_blocked_syscalls();
1082
1083 /* See if there are any signals which need to be delivered. If
1084 so, choose thread(s) to deliver them to, and build signal
1085 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001086
1087 /* Be careful about delivering signals to a thread waiting
1088 for a mutex. In particular, when the handler is running,
1089 that thread is temporarily apparently-not-waiting for the
1090 mutex, so if it is unlocked by another thread whilst the
1091 handler is running, this thread is not informed. When the
1092 handler returns, the thread resumes waiting on the mutex,
1093 even if, as a result, it has missed the unlocking of it.
1094 Potential deadlock. This sounds all very strange, but the
1095 POSIX standard appears to require this behaviour. */
1096 VG_(deliver_signals)( 1 /*HACK*/ );
1097 VG_(do_sanity_checks)( 1 /*HACK*/, False );
sewardje663cb92002-04-12 10:26:32 +00001098
1099 /* Try and find a thread (tid) to run. */
1100 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +00001101 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +00001102 while (True) {
1103 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001104 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj54cacf02002-04-12 23:24:59 +00001105 if (vg_threads[tid_next].status == VgTs_WaitFD
1106 || vg_threads[tid_next].status == VgTs_Sleeping)
1107 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +00001108 if (vg_threads[tid_next].status == VgTs_Runnable)
1109 break; /* We can run this one. */
1110 if (tid_next == tid)
1111 break; /* been all the way round */
1112 }
1113 tid = tid_next;
1114
1115 if (vg_threads[tid].status == VgTs_Runnable) {
1116 /* Found a suitable candidate. Fall out of this loop, so
1117 we can advance to stage 2 of the scheduler: actually
1118 running the thread. */
1119 break;
1120 }
1121
1122 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +00001123 if (n_in_fdwait_or_sleep == 0) {
1124 /* No runnable threads and no prospect of any appearing
1125 even if we wait for an arbitrary length of time. In
1126 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001127 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001128 return VgSrc_Deadlock;
1129 }
1130
1131 /* At least one thread is in a fd-wait state. Delay for a
1132 while, and go round again, in the hope that eventually a
1133 thread becomes runnable. */
1134 nanosleep_for_a_while();
1135 // pp_sched_status();
1136 // VG_(printf)(".\n");
1137 }
1138
1139
1140 /* ======================= Phase 2 of 3 =======================
1141 Wahey! We've finally decided that thread tid is runnable, so
1142 we now do that. Run it for as much of a quanta as possible.
1143 Trivial requests are handled and the thread continues. The
1144 aim is not to do too many of Phase 1 since it is expensive. */
1145
1146 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001147 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001148
1149 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1150 that it decrements the counter before testing it for zero, so
1151 that if VG_(dispatch_ctr) is set to N you get at most N-1
1152 iterations. Also this means that VG_(dispatch_ctr) must
1153 exceed zero before entering the innerloop. Also also, the
1154 decrement is done before the bb is actually run, so you
1155 always get at least one decrement even if nothing happens.
1156 */
1157 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1158 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1159 else
1160 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1161
1162 /* ... and remember what we asked for. */
1163 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1164
sewardj1e8cdc92002-04-18 11:37:52 +00001165 /* paranoia ... */
1166 vg_assert(vg_threads[tid].tid == tid);
1167
sewardje663cb92002-04-12 10:26:32 +00001168 /* Actually run thread tid. */
1169 while (True) {
1170
1171 /* For stats purposes only. */
1172 VG_(num_scheduling_events_MINOR) ++;
1173
1174 if (0)
1175 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1176 tid, VG_(dispatch_ctr) - 1 );
1177
1178 trc = run_thread_for_a_while ( tid );
1179
1180 /* Deal quickly with trivial scheduling events, and resume the
1181 thread. */
1182
1183 if (trc == VG_TRC_INNER_FASTMISS) {
1184 vg_assert(VG_(dispatch_ctr) > 0);
1185
1186 /* Trivial event. Miss in the fast-cache. Do a full
1187 lookup for it. */
1188 trans_addr
1189 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1190 if (trans_addr == (Addr)0) {
1191 /* Not found; we need to request a translation. */
sewardj1e8cdc92002-04-18 11:37:52 +00001192 create_translation_for( tid, vg_threads[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001193 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1194 if (trans_addr == (Addr)0)
1195 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1196 }
1197 continue; /* with this thread */
1198 }
1199
1200 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1201 Bool is_triv = maybe_do_trivial_clientreq(tid);
1202 if (is_triv) {
1203 /* NOTE: a trivial request is something like a call to
1204 malloc() or free(). It DOES NOT change the
1205 Runnability of this thread nor the status of any
1206 other thread; it is purely thread-local. */
1207 continue; /* with this thread */
1208 }
1209 }
1210
1211 /* It's a non-trivial event. Give up running this thread and
1212 handle things the expensive way. */
1213 break;
1214 }
1215
1216 /* ======================= Phase 3 of 3 =======================
1217 Handle non-trivial thread requests, mostly pthread stuff. */
1218
1219 /* Ok, we've fallen out of the dispatcher for a
1220 non-completely-trivial reason. First, update basic-block
1221 counters. */
1222
1223 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1224 vg_assert(done_this_time >= 0);
1225 VG_(bbs_to_go) -= (ULong)done_this_time;
1226 VG_(bbs_done) += (ULong)done_this_time;
1227
1228 if (0 && trc != VG_TRC_INNER_FASTMISS)
1229 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1230 tid, done_this_time, (Int)trc );
1231
1232 if (0 && trc != VG_TRC_INNER_FASTMISS)
1233 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1234 tid, VG_(bbs_done),
1235 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001236
sewardje663cb92002-04-12 10:26:32 +00001237 /* Examine the thread's return code to figure out why it
1238 stopped, and handle requests. */
1239
1240 switch (trc) {
1241
1242 case VG_TRC_INNER_FASTMISS:
1243 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1244 /*NOTREACHED*/
1245 break;
1246
1247 case VG_TRC_INNER_COUNTERZERO:
1248 /* Timeslice is out. Let a new thread be scheduled,
1249 simply by doing nothing, causing us to arrive back at
1250 Phase 1. */
1251 if (VG_(bbs_to_go) == 0) {
1252 goto debug_stop;
1253 }
1254 vg_assert(VG_(dispatch_ctr) == 0);
1255 break;
1256
1257 case VG_TRC_UNRESUMABLE_SIGNAL:
1258 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1259 away. Again, do nothing, so we wind up back at Phase
1260 1, whereupon the signal will be "delivered". */
1261 break;
1262
sewardje663cb92002-04-12 10:26:32 +00001263 case VG_TRC_EBP_JMP_SYSCALL:
1264 /* Do a syscall for the vthread tid. This could cause it
1265 to become non-runnable. */
1266 sched_do_syscall(tid);
1267 break;
1268
1269 case VG_TRC_EBP_JMP_CLIENTREQ:
1270 /* Do a client request for the vthread tid. Note that
1271 some requests will have been handled by
1272 maybe_do_trivial_clientreq(), so we don't expect to see
1273 those here.
1274 */
sewardj54cacf02002-04-12 23:24:59 +00001275 /* The thread's %EAX points at an arg block, the first
1276 word of which is the request code. */
1277 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001278 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001279 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001280 print_sched_event(tid, msg_buf);
1281 }
1282 /* Do a non-trivial client request for thread tid. tid's
1283 %EAX points to a short vector of argument words, the
1284 first of which is the request code. The result of the
1285 request is put in tid's %EDX. Alternatively, perhaps
1286 the request causes tid to become non-runnable and/or
1287 other blocked threads become runnable. In general we
1288 can and often do mess with the state of arbitrary
1289 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001290 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1291 return VgSrc_Shutdown;
1292 } else {
1293 do_nontrivial_clientreq(tid);
1294 }
sewardje663cb92002-04-12 10:26:32 +00001295 break;
1296
1297 default:
1298 VG_(printf)("\ntrc = %d\n", trc);
1299 VG_(panic)("VG_(scheduler), phase 3: "
1300 "unexpected thread return code");
1301 /* NOTREACHED */
1302 break;
1303
1304 } /* switch (trc) */
1305
1306 /* That completes Phase 3 of 3. Return now to the top of the
1307 main scheduler loop, to Phase 1 of 3. */
1308
1309 } /* top-level scheduler loop */
1310
1311
1312 /* NOTREACHED */
1313 VG_(panic)("scheduler: post-main-loop ?!");
1314 /* NOTREACHED */
1315
1316 debug_stop:
1317 /* If we exited because of a debug stop, print the translation
1318 of the last block executed -- by translating it again, and
1319 throwing away the result. */
1320 VG_(printf)(
1321 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj1e8cdc92002-04-18 11:37:52 +00001322 VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001323 VG_(printf)("\n");
1324 VG_(printf)(
1325 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1326
1327 return VgSrc_BbsDone;
1328}
1329
1330
1331/* ---------------------------------------------------------------------
1332 The pthread implementation.
1333 ------------------------------------------------------------------ */
1334
1335#include <pthread.h>
1336#include <errno.h>
1337
1338#if !defined(PTHREAD_STACK_MIN)
1339# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1340#endif
1341
1342/* /usr/include/bits/pthreadtypes.h:
1343 typedef unsigned long int pthread_t;
1344*/
1345
sewardje663cb92002-04-12 10:26:32 +00001346
sewardj604ec3c2002-04-18 22:38:41 +00001347/* -----------------------------------------------------------
1348 Thread CREATION, JOINAGE and CANCELLATION.
1349 -------------------------------------------------------- */
1350
sewardje663cb92002-04-12 10:26:32 +00001351static
1352void do_pthread_cancel ( ThreadId tid_canceller,
1353 pthread_t tid_cancellee )
1354{
1355 Char msg_buf[100];
1356 /* We want make is appear that this thread has returned to
1357 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1358 return value. So: simple: put PTHREAD_CANCELED into %EAX
1359 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001360 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001361 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1362 print_sched_event(tid_cancellee, msg_buf);
1363 }
1364 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001365 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001366 vg_threads[tid_cancellee].status = VgTs_Runnable;
1367}
1368
1369
sewardj3b5d8862002-04-20 13:53:23 +00001370static
1371void do_pthread_exit ( ThreadId tid, void* retval )
1372{
1373 Char msg_buf[100];
1374 /* We want make is appear that this thread has returned to
1375 do_pthread_create_bogusRA with retval as the
1376 return value. So: simple: put retval into %EAX
1377 and &do_pthread_create_bogusRA into %EIP and keep going! */
1378 if (VG_(clo_trace_sched)) {
1379 VG_(sprintf)(msg_buf, "exiting with %p", retval);
1380 print_sched_event(tid, msg_buf);
1381 }
1382 vg_threads[tid].m_eax = (UInt)retval;
1383 vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1384 vg_threads[tid].status = VgTs_Runnable;
1385}
1386
sewardje663cb92002-04-12 10:26:32 +00001387
1388/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001389 created with. Or possibly due to pthread_exit or cancellation.
1390 The main complication here is to resume any thread waiting to join
1391 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001392static
sewardjbc5b99f2002-04-13 00:08:51 +00001393void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001394{
1395 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1396 UInt* jnr_args;
1397 void** jnr_thread_return;
1398 Char msg_buf[100];
1399
1400 /* Mark it as not in use. Leave the stack in place so the next
1401 user of this slot doesn't reallocate it. */
sewardj6072c362002-04-19 14:40:57 +00001402 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001403 vg_assert(vg_threads[tid].status != VgTs_Empty);
1404
sewardjbc5b99f2002-04-13 00:08:51 +00001405 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001406
1407 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1408 /* No one has yet done a join on me */
1409 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001410 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001411 VG_(sprintf)(msg_buf,
1412 "root fn returns, waiting for a call pthread_join(%d)",
1413 tid);
1414 print_sched_event(tid, msg_buf);
1415 }
1416 } else {
1417 /* Some is waiting; make their join call return with success,
1418 putting my exit code in the place specified by the caller's
1419 thread_return param. This is all very horrible, since we
1420 need to consult the joiner's arg block -- pointed to by its
1421 %EAX -- in order to extract the 2nd param of its pthread_join
1422 call. TODO: free properly the slot (also below).
1423 */
1424 jnr = vg_threads[tid].joiner;
sewardj6072c362002-04-19 14:40:57 +00001425 vg_assert(is_valid_tid(jnr));
sewardje663cb92002-04-12 10:26:32 +00001426 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1427 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1428 jnr_thread_return = (void**)(jnr_args[2]);
1429 if (jnr_thread_return != NULL)
1430 *jnr_thread_return = vg_threads[tid].retval;
1431 vg_threads[jnr].m_edx = 0; /* success */
1432 vg_threads[jnr].status = VgTs_Runnable;
1433 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001434 if (VG_(clo_instrument) && tid != 0)
1435 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1436 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001437 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001438 VG_(sprintf)(msg_buf,
1439 "root fn returns, to find a waiting pthread_join(%d)", tid);
1440 print_sched_event(tid, msg_buf);
1441 VG_(sprintf)(msg_buf,
1442 "my pthread_join(%d) returned; resuming", tid);
1443 print_sched_event(jnr, msg_buf);
1444 }
1445 }
1446
1447 /* Return value is irrelevant; this thread will not get
1448 rescheduled. */
1449}
1450
1451
1452static
1453void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1454{
1455 Char msg_buf[100];
1456
1457 /* jee, the joinee, is the thread specified as an arg in thread
1458 tid's call to pthread_join. So tid is the join-er. */
sewardj6072c362002-04-19 14:40:57 +00001459 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001460 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1461
1462 if (jee == tid) {
1463 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1464 vg_threads[tid].status = VgTs_Runnable;
1465 return;
1466 }
1467
1468 if (jee < 0
1469 || jee >= VG_N_THREADS
1470 || vg_threads[jee].status == VgTs_Empty) {
1471 /* Invalid thread to join to. */
1472 vg_threads[tid].m_edx = EINVAL;
1473 vg_threads[tid].status = VgTs_Runnable;
1474 return;
1475 }
1476
1477 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1478 /* Someone already did join on this thread */
1479 vg_threads[tid].m_edx = EINVAL;
1480 vg_threads[tid].status = VgTs_Runnable;
1481 return;
1482 }
1483
1484 /* if (vg_threads[jee].detached) ... */
1485
1486 /* Perhaps the joinee has already finished? If so return
1487 immediately with its return code, and free up the slot. TODO:
1488 free it properly (also above). */
1489 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1490 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1491 vg_threads[tid].m_edx = 0; /* success */
1492 if (thread_return != NULL)
1493 *thread_return = vg_threads[jee].retval;
1494 vg_threads[tid].status = VgTs_Runnable;
1495 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001496 if (VG_(clo_instrument) && jee != 0)
1497 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1498 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001499 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001500 VG_(sprintf)(msg_buf,
1501 "someone called pthread_join() on me; bye!");
1502 print_sched_event(jee, msg_buf);
1503 VG_(sprintf)(msg_buf,
1504 "my pthread_join(%d) returned immediately",
1505 jee );
1506 print_sched_event(tid, msg_buf);
1507 }
1508 return;
1509 }
1510
1511 /* Ok, so we'll have to wait on jee. */
1512 vg_threads[jee].joiner = tid;
1513 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001514 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001515 VG_(sprintf)(msg_buf,
1516 "blocking on call of pthread_join(%d)", jee );
1517 print_sched_event(tid, msg_buf);
1518 }
1519 /* So tid's join call does not return just now. */
1520}
1521
1522
1523static
1524void do_pthread_create ( ThreadId parent_tid,
1525 pthread_t* thread,
1526 pthread_attr_t* attr,
1527 void* (*start_routine)(void *),
1528 void* arg )
1529{
1530 Addr new_stack;
1531 UInt new_stk_szb;
1532 ThreadId tid;
1533 Char msg_buf[100];
1534
1535 /* Paranoia ... */
1536 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1537
1538 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1539
sewardj1e8cdc92002-04-18 11:37:52 +00001540 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001541
1542 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001543 vg_assert(tid != 1);
1544 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001545
1546 /* Copy the parent's CPU state into the child's, in a roundabout
1547 way (via baseBlock). */
1548 VG_(load_thread_state)(parent_tid);
1549 VG_(save_thread_state)(tid);
1550
1551 /* Consider allocating the child a stack, if the one it already has
1552 is inadequate. */
1553 new_stk_szb = PTHREAD_STACK_MIN;
1554
1555 if (new_stk_szb > vg_threads[tid].stack_size) {
1556 /* Again, for good measure :) We definitely don't want to be
1557 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001558 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001559 /* for now, we don't handle the case of anything other than
1560 assigning it for the first time. */
1561 vg_assert(vg_threads[tid].stack_size == 0);
1562 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1563 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1564 vg_threads[tid].stack_base = new_stack;
1565 vg_threads[tid].stack_size = new_stk_szb;
sewardj1e8cdc92002-04-18 11:37:52 +00001566 vg_threads[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001567 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001568 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001569 }
sewardj1e8cdc92002-04-18 11:37:52 +00001570
1571 vg_threads[tid].m_esp
1572 = vg_threads[tid].stack_base
1573 + vg_threads[tid].stack_size
1574 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1575
sewardje663cb92002-04-12 10:26:32 +00001576 if (VG_(clo_instrument))
1577 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1578 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1579
1580 /* push arg */
1581 vg_threads[tid].m_esp -= 4;
1582 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1583
1584 /* push (magical) return address */
1585 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001586 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001587
1588 if (VG_(clo_instrument))
1589 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1590
1591 /* this is where we start */
1592 vg_threads[tid].m_eip = (UInt)start_routine;
1593
sewardj8937c812002-04-12 20:12:20 +00001594 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001595 VG_(sprintf)(msg_buf,
1596 "new thread, created by %d", parent_tid );
1597 print_sched_event(tid, msg_buf);
1598 }
1599
1600 /* store the thread id in *thread. */
1601 // if (VG_(clo_instrument))
1602 // ***** CHECK *thread is writable
1603 *thread = (pthread_t)tid;
1604
sewardj3b5d8862002-04-20 13:53:23 +00001605 vg_threads[tid].associated_mx = NULL;
1606 vg_threads[tid].associated_cv = NULL;
1607 vg_threads[tid].joiner = VG_INVALID_THREADID;
1608 vg_threads[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001609
1610 /* return zero */
sewardje663cb92002-04-12 10:26:32 +00001611 vg_threads[tid].m_edx = 0; /* success */
1612}
1613
1614
sewardj604ec3c2002-04-18 22:38:41 +00001615/* -----------------------------------------------------------
1616 MUTEXes
1617 -------------------------------------------------------- */
1618
sewardj604ec3c2002-04-18 22:38:41 +00001619/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001620 typedef struct
1621 {
1622 int __m_reserved; -- Reserved for future use
1623 int __m_count; -- Depth of recursive locking
1624 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1625 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1626 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1627 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001628
sewardj6072c362002-04-19 14:40:57 +00001629 #define PTHREAD_MUTEX_INITIALIZER \
1630 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
1631 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
1632 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
1633 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
1634 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
1635 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
1636 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00001637
sewardj6072c362002-04-19 14:40:57 +00001638 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00001639
sewardj6072c362002-04-19 14:40:57 +00001640 __m_kind never changes and indicates whether or not it is recursive.
1641
1642 __m_count indicates the lock count; if 0, the mutex is not owned by
1643 anybody.
1644
1645 __m_owner has a ThreadId value stuffed into it. We carefully arrange
1646 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
1647 statically initialised mutexes correctly appear
1648 to belong to nobody.
1649
1650 In summary, a not-in-use mutex is distinguised by having __m_owner
1651 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
1652 conditions holds, the other should too.
1653
1654 There is no linked list of threads waiting for this mutex. Instead
1655 a thread in WaitMX state points at the mutex with its waited_on_mx
1656 field. This makes _unlock() inefficient, but simple to implement the
1657 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00001658
sewardj604ec3c2002-04-18 22:38:41 +00001659 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00001660 deals with that for us.
1661*/
sewardje663cb92002-04-12 10:26:32 +00001662
sewardj3b5d8862002-04-20 13:53:23 +00001663/* Helper fns ... */
1664static
1665void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
1666 Char* caller )
1667{
1668 Int i;
1669 Char msg_buf[100];
1670
1671 /* Find some arbitrary thread waiting on this mutex, and make it
1672 runnable. If none are waiting, mark the mutex as not held. */
1673 for (i = 1; i < VG_N_THREADS; i++) {
1674 if (vg_threads[i].status == VgTs_Empty)
1675 continue;
1676 if (vg_threads[i].status == VgTs_WaitMX
1677 && vg_threads[i].associated_mx == mutex)
1678 break;
1679 }
1680
1681 vg_assert(i <= VG_N_THREADS);
1682 if (i == VG_N_THREADS) {
1683 /* Nobody else is waiting on it. */
1684 mutex->__m_count = 0;
1685 mutex->__m_owner = VG_INVALID_THREADID;
1686 } else {
1687 /* Notionally transfer the hold to thread i, whose
1688 pthread_mutex_lock() call now returns with 0 (success). */
1689 /* The .count is already == 1. */
1690 vg_assert(vg_threads[i].associated_mx == mutex);
1691 mutex->__m_owner = (_pthread_descr)i;
1692 vg_threads[i].status = VgTs_Runnable;
1693 vg_threads[i].associated_mx = NULL;
1694 vg_threads[i].m_edx = 0; /* pth_lock() success */
1695
1696 if (VG_(clo_trace_pthread_level) >= 1) {
1697 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
1698 caller, mutex );
1699 print_pthread_event(i, msg_buf);
1700 }
1701 }
1702}
1703
sewardje663cb92002-04-12 10:26:32 +00001704
1705static
1706void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex )
1707{
sewardj604ec3c2002-04-18 22:38:41 +00001708 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001709
sewardj604ec3c2002-04-18 22:38:41 +00001710 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00001711 VG_(sprintf)(msg_buf, "pthread_mutex_lock mx %p ...", mutex );
sewardj604ec3c2002-04-18 22:38:41 +00001712 print_pthread_event(tid, msg_buf);
1713 }
1714
1715 /* Paranoia ... */
1716 vg_assert(is_valid_tid(tid)
1717 && vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001718
1719 /* POSIX doesn't mandate this, but for sanity ... */
1720 if (mutex == NULL) {
1721 vg_threads[tid].m_edx = EINVAL;
1722 return;
1723 }
1724
sewardj604ec3c2002-04-18 22:38:41 +00001725 /* More paranoia ... */
1726 switch (mutex->__m_kind) {
1727 case PTHREAD_MUTEX_TIMED_NP:
1728 case PTHREAD_MUTEX_RECURSIVE_NP:
1729 case PTHREAD_MUTEX_ERRORCHECK_NP:
1730 case PTHREAD_MUTEX_ADAPTIVE_NP:
1731 if (mutex->__m_count >= 0) break;
1732 /* else fall thru */
1733 default:
1734 vg_threads[tid].m_edx = EINVAL;
1735 return;
sewardje663cb92002-04-12 10:26:32 +00001736 }
1737
sewardj604ec3c2002-04-18 22:38:41 +00001738 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001739
sewardj604ec3c2002-04-18 22:38:41 +00001740 vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001741
1742 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001743 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001744 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001745 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001746 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001747 mutex->__m_count++;
sewardjf8f819e2002-04-17 23:21:37 +00001748 vg_threads[tid].m_edx = 0;
sewardj3b5d8862002-04-20 13:53:23 +00001749 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
sewardj604ec3c2002-04-18 22:38:41 +00001750 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001751 return;
1752 } else {
1753 vg_threads[tid].m_edx = EDEADLK;
1754 return;
1755 }
1756 } else {
sewardj6072c362002-04-19 14:40:57 +00001757 /* Someone else has it; we have to wait. Mark ourselves
1758 thusly. */
sewardj3b5d8862002-04-20 13:53:23 +00001759 vg_threads[tid].status = VgTs_WaitMX;
1760 vg_threads[tid].associated_mx = mutex;
sewardjf8f819e2002-04-17 23:21:37 +00001761 /* No assignment to %EDX, since we're blocking. */
1762 if (VG_(clo_trace_pthread_level) >= 1) {
sewardj3b5d8862002-04-20 13:53:23 +00001763 VG_(sprintf)(msg_buf, "pthread_mutex_lock mx %p: BLOCK",
sewardj604ec3c2002-04-18 22:38:41 +00001764 mutex );
sewardjf8f819e2002-04-17 23:21:37 +00001765 print_pthread_event(tid, msg_buf);
1766 }
sewardje663cb92002-04-12 10:26:32 +00001767 return;
1768 }
sewardjf8f819e2002-04-17 23:21:37 +00001769
sewardje663cb92002-04-12 10:26:32 +00001770 } else {
sewardj6072c362002-04-19 14:40:57 +00001771 /* Nobody owns it. Sanity check ... */
1772 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00001773 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00001774 mutex->__m_count = 1;
1775 mutex->__m_owner = (_pthread_descr)tid;
sewardj3b5d8862002-04-20 13:53:23 +00001776 vg_assert(vg_threads[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00001777 /* return 0 (success). */
1778 vg_threads[tid].m_edx = 0;
1779 }
sewardjf8f819e2002-04-17 23:21:37 +00001780
sewardje663cb92002-04-12 10:26:32 +00001781}
1782
1783
1784static
1785void do_pthread_mutex_unlock ( ThreadId tid,
1786 pthread_mutex_t *mutex )
1787{
sewardj3b5d8862002-04-20 13:53:23 +00001788 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001789
sewardj45b4b372002-04-16 22:50:32 +00001790 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00001791 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00001792 print_pthread_event(tid, msg_buf);
1793 }
1794
sewardj604ec3c2002-04-18 22:38:41 +00001795 /* Paranoia ... */
1796 vg_assert(is_valid_tid(tid)
1797 && vg_threads[tid].status == VgTs_Runnable);
1798
1799 if (mutex == NULL) {
1800 vg_threads[tid].m_edx = EINVAL;
1801 return;
1802 }
1803
1804 /* More paranoia ... */
1805 switch (mutex->__m_kind) {
1806 case PTHREAD_MUTEX_TIMED_NP:
1807 case PTHREAD_MUTEX_RECURSIVE_NP:
1808 case PTHREAD_MUTEX_ERRORCHECK_NP:
1809 case PTHREAD_MUTEX_ADAPTIVE_NP:
1810 if (mutex->__m_count >= 0) break;
1811 /* else fall thru */
1812 default:
1813 vg_threads[tid].m_edx = EINVAL;
1814 return;
1815 }
sewardje663cb92002-04-12 10:26:32 +00001816
1817 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00001818 if (mutex->__m_count == 0 /* nobody holds it */
1819 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardje663cb92002-04-12 10:26:32 +00001820 vg_threads[tid].m_edx = EPERM;
1821 return;
1822 }
1823
sewardjf8f819e2002-04-17 23:21:37 +00001824 /* If it's a multiply-locked recursive mutex, just decrement the
1825 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00001826 if (mutex->__m_count > 1) {
1827 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
1828 mutex->__m_count --;
sewardjf8f819e2002-04-17 23:21:37 +00001829 vg_threads[tid].m_edx = 0; /* success */
1830 return;
1831 }
1832
sewardj604ec3c2002-04-18 22:38:41 +00001833 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00001834 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00001835 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00001836 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00001837
sewardj3b5d8862002-04-20 13:53:23 +00001838 /* Release at max one thread waiting on this mutex. */
1839 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00001840
sewardj3b5d8862002-04-20 13:53:23 +00001841 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardje663cb92002-04-12 10:26:32 +00001842 vg_threads[tid].m_edx = 0; /* Success. */
1843}
1844
1845
sewardj6072c362002-04-19 14:40:57 +00001846/* -----------------------------------------------------------
1847 CONDITION VARIABLES
1848 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00001849
sewardj6072c362002-04-19 14:40:57 +00001850/* The relevant native types are as follows:
1851 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00001852
sewardj6072c362002-04-19 14:40:57 +00001853 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
1854 typedef struct
1855 {
1856 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
1857 _pthread_descr __c_waiting; -- Threads waiting on this condition
1858 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00001859
sewardj6072c362002-04-19 14:40:57 +00001860 -- Attribute for conditionally variables.
1861 typedef struct
1862 {
1863 int __dummy;
1864 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00001865
sewardj6072c362002-04-19 14:40:57 +00001866 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00001867
sewardj3b5d8862002-04-20 13:53:23 +00001868 We don't use any fields of pthread_cond_t for anything at all.
1869 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00001870
1871 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00001872 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00001873
sewardj77e466c2002-04-14 02:29:29 +00001874
sewardj3b5d8862002-04-20 13:53:23 +00001875static
1876void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
1877 Int n_to_release,
1878 Char* caller )
1879{
1880 Int i;
1881 Char msg_buf[100];
1882 pthread_mutex_t* mx;
1883
1884 while (True) {
1885 if (n_to_release == 0)
1886 return;
1887
1888 /* Find a thread waiting on this CV. */
1889 for (i = 1; i < VG_N_THREADS; i++) {
1890 if (vg_threads[i].status == VgTs_Empty)
1891 continue;
1892 if (vg_threads[i].status == VgTs_WaitCV
1893 && vg_threads[i].associated_cv == cond)
1894 break;
1895 }
1896 vg_assert(i <= VG_N_THREADS);
1897
1898 if (i == VG_N_THREADS) {
1899 /* Nobody else is waiting on it. */
1900 return;
1901 }
1902
1903 mx = vg_threads[i].associated_mx;
1904 vg_assert(mx != NULL);
1905
1906 if (mx->__m_owner == VG_INVALID_THREADID) {
1907 /* Currently unheld; hand it out to thread i. */
1908 vg_assert(mx->__m_count == 0);
1909 vg_threads[i].status = VgTs_Runnable;
1910 vg_threads[i].associated_cv = NULL;
1911 vg_threads[i].associated_mx = NULL;
1912 mx->__m_owner = (_pthread_descr)i;
1913 mx->__m_count = 1;
1914 vg_threads[i].m_edx = 0; /* pthread_cond_wait returns success */
1915
1916 if (VG_(clo_trace_pthread_level) >= 1) {
1917 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
1918 caller, cond, mx );
1919 print_pthread_event(i, msg_buf);
1920 }
1921
1922 } else {
1923 /* Currently held. Make thread i be blocked on it. */
1924 vg_threads[i].status = VgTs_WaitMX;
1925 vg_threads[i].associated_cv = NULL;
1926 vg_threads[i].associated_mx = mx;
1927
1928 if (VG_(clo_trace_pthread_level) >= 1) {
1929 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
1930 caller, cond, mx );
1931 print_pthread_event(i, msg_buf);
1932 }
1933
1934 }
1935
1936 n_to_release--;
1937 }
1938}
1939
1940
1941static
1942void do_pthread_cond_wait ( ThreadId tid,
1943 pthread_cond_t *cond,
1944 pthread_mutex_t *mutex )
1945{
1946 Char msg_buf[100];
1947
1948 /* pre: mutex should be a valid mutex and owned by tid. */
1949 if (VG_(clo_trace_pthread_level) >= 2) {
1950 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p ...",
1951 cond, mutex );
1952 print_pthread_event(tid, msg_buf);
1953 }
1954
1955 /* Paranoia ... */
1956 vg_assert(is_valid_tid(tid)
1957 && vg_threads[tid].status == VgTs_Runnable);
1958
1959 if (mutex == NULL || cond == NULL) {
1960 vg_threads[tid].m_edx = EINVAL;
1961 return;
1962 }
1963
1964 /* More paranoia ... */
1965 switch (mutex->__m_kind) {
1966 case PTHREAD_MUTEX_TIMED_NP:
1967 case PTHREAD_MUTEX_RECURSIVE_NP:
1968 case PTHREAD_MUTEX_ERRORCHECK_NP:
1969 case PTHREAD_MUTEX_ADAPTIVE_NP:
1970 if (mutex->__m_count >= 0) break;
1971 /* else fall thru */
1972 default:
1973 vg_threads[tid].m_edx = EINVAL;
1974 return;
1975 }
1976
1977 /* Barf if we don't currently hold the mutex. */
1978 if (mutex->__m_count == 0 /* nobody holds it */
1979 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
1980 vg_threads[tid].m_edx = EINVAL;
1981 return;
1982 }
1983
1984 /* Queue ourselves on the condition. */
1985 vg_threads[tid].status = VgTs_WaitCV;
1986 vg_threads[tid].associated_cv = cond;
1987 vg_threads[tid].associated_mx = mutex;
1988
1989 if (VG_(clo_trace_pthread_level) >= 1) {
1990 VG_(sprintf)(msg_buf,
1991 "pthread_cond_wait cv %p, mx %p: BLOCK",
1992 cond, mutex );
1993 print_pthread_event(tid, msg_buf);
1994 }
1995
1996 /* Release the mutex. */
1997 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
1998}
1999
2000
2001static
2002void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2003 Bool broadcast,
2004 pthread_cond_t *cond )
2005{
2006 Char msg_buf[100];
2007 Char* caller
2008 = broadcast ? "pthread_cond_broadcast"
2009 : "pthread_cond_signal ";
2010
2011 if (VG_(clo_trace_pthread_level) >= 2) {
2012 VG_(sprintf)(msg_buf, "%s cv %p ...",
2013 caller, cond );
2014 print_pthread_event(tid, msg_buf);
2015 }
2016
2017 /* Paranoia ... */
2018 vg_assert(is_valid_tid(tid)
2019 && vg_threads[tid].status == VgTs_Runnable);
2020
2021 if (cond == NULL) {
2022 vg_threads[tid].m_edx = EINVAL;
2023 return;
2024 }
2025
2026 release_N_threads_waiting_on_cond (
2027 cond,
2028 broadcast ? VG_N_THREADS : 1,
2029 caller
2030 );
2031
2032 vg_threads[tid].m_edx = 0; /* success */
2033}
2034
sewardj77e466c2002-04-14 02:29:29 +00002035
sewardje663cb92002-04-12 10:26:32 +00002036/* ---------------------------------------------------------------------
2037 Handle non-trivial client requests.
2038 ------------------------------------------------------------------ */
2039
2040static
2041void do_nontrivial_clientreq ( ThreadId tid )
2042{
2043 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
2044 UInt req_no = arg[0];
2045 switch (req_no) {
2046
2047 case VG_USERREQ__PTHREAD_CREATE:
2048 do_pthread_create( tid,
2049 (pthread_t*)arg[1],
2050 (pthread_attr_t*)arg[2],
2051 (void*(*)(void*))arg[3],
2052 (void*)arg[4] );
2053 break;
2054
sewardjbc5b99f2002-04-13 00:08:51 +00002055 case VG_USERREQ__PTHREAD_RETURNS:
2056 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00002057 break;
2058
2059 case VG_USERREQ__PTHREAD_JOIN:
2060 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2061 break;
2062
sewardje663cb92002-04-12 10:26:32 +00002063 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
2064 do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) );
2065 break;
2066
2067 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
2068 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
2069 break;
2070
sewardje663cb92002-04-12 10:26:32 +00002071 case VG_USERREQ__PTHREAD_CANCEL:
2072 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
2073 break;
2074
sewardj3b5d8862002-04-20 13:53:23 +00002075 case VG_USERREQ__PTHREAD_EXIT:
2076 do_pthread_exit( tid, (void*)(arg[1]) );
2077 break;
2078
2079 case VG_USERREQ__PTHREAD_COND_WAIT:
2080 do_pthread_cond_wait( tid,
2081 (pthread_cond_t *)(arg[1]),
2082 (pthread_mutex_t *)(arg[2]) );
2083 break;
2084
2085 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2086 do_pthread_cond_signal_or_broadcast(
2087 tid,
2088 False, /* signal, not broadcast */
2089 (pthread_cond_t *)(arg[1]) );
2090 break;
2091
2092 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2093 do_pthread_cond_signal_or_broadcast(
2094 tid,
2095 True, /* broadcast, not signal */
2096 (pthread_cond_t *)(arg[1]) );
2097 break;
2098
sewardje663cb92002-04-12 10:26:32 +00002099 case VG_USERREQ__MAKE_NOACCESS:
2100 case VG_USERREQ__MAKE_WRITABLE:
2101 case VG_USERREQ__MAKE_READABLE:
2102 case VG_USERREQ__DISCARD:
2103 case VG_USERREQ__CHECK_WRITABLE:
2104 case VG_USERREQ__CHECK_READABLE:
2105 case VG_USERREQ__MAKE_NOACCESS_STACK:
2106 case VG_USERREQ__RUNNING_ON_VALGRIND:
2107 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00002108 vg_threads[tid].m_edx
2109 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00002110 break;
2111
sewardj77e466c2002-04-14 02:29:29 +00002112 case VG_USERREQ__SIGNAL_RETURNS:
2113 handle_signal_return(tid);
2114 break;
sewardj54cacf02002-04-12 23:24:59 +00002115
sewardje663cb92002-04-12 10:26:32 +00002116 default:
2117 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2118 VG_(panic)("handle_private_client_pthread_request: "
2119 "unknown request");
2120 /*NOTREACHED*/
2121 break;
2122 }
2123}
2124
2125
sewardj6072c362002-04-19 14:40:57 +00002126/* ---------------------------------------------------------------------
2127 Sanity checking.
2128 ------------------------------------------------------------------ */
2129
2130/* Internal consistency checks on the sched/pthread structures. */
2131static
2132void scheduler_sanity ( void )
2133{
sewardj3b5d8862002-04-20 13:53:23 +00002134 pthread_mutex_t* mx;
2135 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002136 Int i;
2137 /* VG_(printf)("scheduler_sanity\n"); */
2138 for (i = 1; i < VG_N_THREADS; i++) {
sewardj3b5d8862002-04-20 13:53:23 +00002139 mx = vg_threads[i].associated_mx;
2140 cv = vg_threads[i].associated_cv;
sewardj6072c362002-04-19 14:40:57 +00002141 if (vg_threads[i].status == VgTs_WaitMX) {
sewardj3b5d8862002-04-20 13:53:23 +00002142 vg_assert(cv == NULL);
2143 vg_assert(mx != NULL);
2144 vg_assert(mx->__m_count > 0);
2145 vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
2146 vg_assert(i != (ThreadId)mx->__m_owner);
2147 /* otherwise thread i would be deadlocked. */
2148 } else
2149 if (vg_threads[i].status == VgTs_WaitCV) {
2150 vg_assert(cv != NULL);
2151 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002152 } else {
sewardj3b5d8862002-04-20 13:53:23 +00002153 vg_assert(cv == NULL);
2154 vg_assert(mx == NULL);
sewardj6072c362002-04-19 14:40:57 +00002155 }
2156 }
2157}
2158
2159
sewardje663cb92002-04-12 10:26:32 +00002160/*--------------------------------------------------------------------*/
2161/*--- end vg_scheduler.c ---*/
2162/*--------------------------------------------------------------------*/