blob: 68dbf19a4f6d2e0e4f8b0e17cc9256d7584a39f9 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardj6072c362002-04-19 14:40:57 +000062- Fix signals properly, so that each thread has its own blocking mask.
63 Currently this isn't done, and (worse?) signals are delivered to
64 Thread 1 (the root thread) regardless.
65
66 So, what's the deal with signals and mutexes? If a thread is
67 blocked on a mutex, or for a condition variable for that matter, can
68 signals still be delivered to it? This has serious consequences --
69 deadlocks, etc.
70
sewardje462e202002-04-13 04:09:07 +000071*/
sewardje663cb92002-04-12 10:26:32 +000072
73
74/* ---------------------------------------------------------------------
75 Types and globals for the scheduler.
76 ------------------------------------------------------------------ */
77
78/* type ThreadId is defined in vg_include.h. */
79
80/* struct ThreadState is defined in vg_include.h. */
81
sewardj6072c362002-04-19 14:40:57 +000082/* Private globals. A statically allocated array of threads. NOTE:
83 [0] is never used, to simplify the simulation of initialisers for
84 LinuxThreads. */
sewardje663cb92002-04-12 10:26:32 +000085static ThreadState vg_threads[VG_N_THREADS];
86
sewardj1e8cdc92002-04-18 11:37:52 +000087/* The tid of the thread currently in VG_(baseBlock). */
88static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
89
sewardje663cb92002-04-12 10:26:32 +000090
91/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
92jmp_buf VG_(scheduler_jmpbuf);
93/* ... and if so, here's the signal which caused it to do so. */
94Int VG_(longjmpd_on_signal);
95
96
97/* Machinery to keep track of which threads are waiting on which
98 fds. */
99typedef
100 struct {
101 /* The thread which made the request. */
102 ThreadId tid;
103
104 /* The next two fields describe the request. */
105 /* File descriptor waited for. -1 means this slot is not in use */
106 Int fd;
107 /* The syscall number the fd is used in. */
108 Int syscall_no;
109
110 /* False => still waiting for select to tell us the fd is ready
111 to go. True => the fd is ready, but the results have not yet
112 been delivered back to the calling thread. Once the latter
113 happens, this entire record is marked as no longer in use, by
114 making the fd field be -1. */
115 Bool ready;
116 }
117 VgWaitedOnFd;
118
119static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
120
121
sewardje663cb92002-04-12 10:26:32 +0000122/* Forwards */
123static void do_nontrivial_clientreq ( ThreadId tid );
124
sewardj6072c362002-04-19 14:40:57 +0000125static void scheduler_sanity ( void );
126
sewardje663cb92002-04-12 10:26:32 +0000127
128/* ---------------------------------------------------------------------
129 Helper functions for the scheduler.
130 ------------------------------------------------------------------ */
131
sewardj604ec3c2002-04-18 22:38:41 +0000132static __inline__
133Bool is_valid_tid ( ThreadId tid )
134{
135 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000136 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000137 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000138 return True;
139}
140
141
sewardj1e8cdc92002-04-18 11:37:52 +0000142/* For constructing error messages only: try and identify a thread
143 whose stack this address currently falls within, or return
144 VG_INVALID_THREADID if it doesn't. A small complication is dealing
145 with any currently VG_(baseBlock)-resident thread.
146*/
147ThreadId VG_(identify_stack_addr)( Addr a )
148{
149 ThreadId tid, tid_to_skip;
150
151 tid_to_skip = VG_INVALID_THREADID;
152
153 /* First check to see if there's a currently-loaded thread in
154 VG_(baseBlock). */
155 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
156 tid = vg_tid_currently_in_baseBlock;
157 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
158 && a <= vg_threads[tid].stack_highest_word)
159 return tid;
160 else
161 tid_to_skip = tid;
162 }
163
sewardj6072c362002-04-19 14:40:57 +0000164 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj1e8cdc92002-04-18 11:37:52 +0000165 if (vg_threads[tid].status == VgTs_Empty) continue;
166 if (tid == tid_to_skip) continue;
167 if (vg_threads[tid].m_esp <= a
168 && a <= vg_threads[tid].stack_highest_word)
169 return tid;
170 }
171 return VG_INVALID_THREADID;
172}
173
174
sewardj15a43e12002-04-17 19:35:12 +0000175/* Print the scheduler status. */
176void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000177{
178 Int i;
179 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000180 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000181 if (vg_threads[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000182 VG_(printf)("\nThread %d: status = ", i);
sewardje663cb92002-04-12 10:26:32 +0000183 switch (vg_threads[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000184 case VgTs_Runnable: VG_(printf)("Runnable"); break;
185 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
186 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardje663cb92002-04-12 10:26:32 +0000187 vg_threads[i].joiner); break;
sewardj6072c362002-04-19 14:40:57 +0000188 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
189 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
190 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000191 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardje663cb92002-04-12 10:26:32 +0000192 default: VG_(printf)("???"); break;
193 }
sewardj3b5d8862002-04-20 13:53:23 +0000194 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
195 vg_threads[i].associated_mx,
196 vg_threads[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000197 VG_(pp_ExeContext)(
198 VG_(get_ExeContext)( False, vg_threads[i].m_eip,
199 vg_threads[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000200 }
201 VG_(printf)("\n");
202}
203
204static
205void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
206{
207 Int i;
208
209 vg_assert(fd != -1); /* avoid total chaos */
210
211 for (i = 0; i < VG_N_WAITING_FDS; i++)
212 if (vg_waiting_fds[i].fd == -1)
213 break;
214
215 if (i == VG_N_WAITING_FDS)
216 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
217 /*
218 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
219 tid, fd, i);
220 */
221 vg_waiting_fds[i].fd = fd;
222 vg_waiting_fds[i].tid = tid;
223 vg_waiting_fds[i].ready = False;
224 vg_waiting_fds[i].syscall_no = syscall_no;
225}
226
227
228
229static
230void print_sched_event ( ThreadId tid, Char* what )
231{
sewardj45b4b372002-04-16 22:50:32 +0000232 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000233}
234
235
236static
237void print_pthread_event ( ThreadId tid, Char* what )
238{
239 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000240}
241
242
243static
244Char* name_of_sched_event ( UInt event )
245{
246 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000247 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
248 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
249 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
250 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
251 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
252 default: return "??UNKNOWN??";
253 }
254}
255
256
257/* Create a translation of the client basic block beginning at
258 orig_addr, and add it to the translation cache & translation table.
259 This probably doesn't really belong here, but, hey ...
260*/
sewardj1e8cdc92002-04-18 11:37:52 +0000261static
262void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000263{
264 Addr trans_addr;
265 TTEntry tte;
266 Int orig_size, trans_size;
267 /* Ensure there is space to hold a translation. */
268 VG_(maybe_do_lru_pass)();
sewardj1e8cdc92002-04-18 11:37:52 +0000269 VG_(translate)( &vg_threads[tid],
270 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000271 /* Copy data at trans_addr into the translation cache.
272 Returned pointer is to the code, not to the 4-byte
273 header. */
274 /* Since the .orig_size and .trans_size fields are
275 UShort, be paranoid. */
276 vg_assert(orig_size > 0 && orig_size < 65536);
277 vg_assert(trans_size > 0 && trans_size < 65536);
278 tte.orig_size = orig_size;
279 tte.orig_addr = orig_addr;
280 tte.trans_size = trans_size;
281 tte.trans_addr = VG_(copy_to_transcache)
282 ( trans_addr, trans_size );
283 tte.mru_epoch = VG_(current_epoch);
284 /* Free the intermediary -- was allocated by VG_(emit_code). */
285 VG_(jitfree)( (void*)trans_addr );
286 /* Add to trans tab and set back pointer. */
287 VG_(add_to_trans_tab) ( &tte );
288 /* Update stats. */
289 VG_(this_epoch_in_count) ++;
290 VG_(this_epoch_in_osize) += orig_size;
291 VG_(this_epoch_in_tsize) += trans_size;
292 VG_(overall_in_count) ++;
293 VG_(overall_in_osize) += orig_size;
294 VG_(overall_in_tsize) += trans_size;
295 /* Record translated area for SMC detection. */
296 VG_(smc_mark_original) ( orig_addr, orig_size );
297}
298
299
300/* Allocate a completely empty ThreadState record. */
301static
302ThreadId vg_alloc_ThreadState ( void )
303{
304 Int i;
sewardj6072c362002-04-19 14:40:57 +0000305 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000306 if (vg_threads[i].status == VgTs_Empty)
307 return i;
308 }
309 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
310 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
311 VG_(panic)("VG_N_THREADS is too low");
312 /*NOTREACHED*/
313}
314
315
316ThreadState* VG_(get_thread_state) ( ThreadId tid )
317{
sewardj6072c362002-04-19 14:40:57 +0000318 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000319 vg_assert(vg_threads[tid].status != VgTs_Empty);
320 return & vg_threads[tid];
321}
322
323
sewardj1e8cdc92002-04-18 11:37:52 +0000324ThreadState* VG_(get_current_thread_state) ( void )
325{
326 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj6072c362002-04-19 14:40:57 +0000327 return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj1e8cdc92002-04-18 11:37:52 +0000328}
329
330
331ThreadId VG_(get_current_tid) ( void )
332{
333 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
334 return vg_tid_currently_in_baseBlock;
335}
336
337
sewardje663cb92002-04-12 10:26:32 +0000338/* Copy the saved state of a thread into VG_(baseBlock), ready for it
339 to be run. */
340__inline__
341void VG_(load_thread_state) ( ThreadId tid )
342{
343 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000344 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
345
sewardje663cb92002-04-12 10:26:32 +0000346 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
347 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
348 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
349 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
350 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
351 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
352 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
353 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
354 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
355 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
356
357 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
358 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
359
360 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
361 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
362 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
363 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
364 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
365 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
366 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
367 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
368 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000369
370 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000371}
372
373
374/* Copy the state of a thread from VG_(baseBlock), presumably after it
375 has been descheduled. For sanity-check purposes, fill the vacated
376 VG_(baseBlock) with garbage so as to make the system more likely to
377 fail quickly if we erroneously continue to poke around inside
378 VG_(baseBlock) without first doing a load_thread_state().
379*/
380__inline__
381void VG_(save_thread_state) ( ThreadId tid )
382{
383 Int i;
384 const UInt junk = 0xDEADBEEF;
385
sewardj1e8cdc92002-04-18 11:37:52 +0000386 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
387
sewardje663cb92002-04-12 10:26:32 +0000388 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
389 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
390 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
391 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
392 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
393 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
394 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
395 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
396 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
397 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
398
399 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
400 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
401
402 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
403 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
404 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
405 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
406 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
407 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
408 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
409 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
410 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
411
412 /* Fill it up with junk. */
413 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
414 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
415 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
416 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
417 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
418 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
419 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
420 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
421 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
422 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
423
424 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
425 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000426
427 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000428}
429
430
431/* Run the thread tid for a while, and return a VG_TRC_* value to the
432 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000433static
sewardje663cb92002-04-12 10:26:32 +0000434UInt run_thread_for_a_while ( ThreadId tid )
435{
436 UInt trc = 0;
sewardj6072c362002-04-19 14:40:57 +0000437 vg_assert(is_valid_tid(tid));
438 vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000439 vg_assert(VG_(bbs_to_go) > 0);
440
441 VG_(load_thread_state) ( tid );
442 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
443 /* try this ... */
444 trc = VG_(run_innerloop)();
445 /* We get here if the client didn't take a fault. */
446 } else {
447 /* We get here if the client took a fault, which caused our
448 signal handler to longjmp. */
449 vg_assert(trc == 0);
450 trc = VG_TRC_UNRESUMABLE_SIGNAL;
451 }
452 VG_(save_thread_state) ( tid );
453 return trc;
454}
455
456
457/* Increment the LRU epoch counter. */
458static
459void increment_epoch ( void )
460{
461 VG_(current_epoch)++;
462 if (VG_(clo_verbosity) > 2) {
463 UInt tt_used, tc_used;
464 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
465 VG_(message)(Vg_UserMsg,
466 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
467 VG_(bbs_done),
468 VG_(this_epoch_in_count),
469 VG_(this_epoch_in_osize),
470 VG_(this_epoch_in_tsize),
471 VG_(this_epoch_out_count),
472 VG_(this_epoch_out_osize),
473 VG_(this_epoch_out_tsize),
474 tt_used, tc_used
475 );
476 }
477 VG_(this_epoch_in_count) = 0;
478 VG_(this_epoch_in_osize) = 0;
479 VG_(this_epoch_in_tsize) = 0;
480 VG_(this_epoch_out_count) = 0;
481 VG_(this_epoch_out_osize) = 0;
482 VG_(this_epoch_out_tsize) = 0;
483}
484
485
486/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000487 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000488 caller takes care to park the client's state is parked in
489 VG_(baseBlock).
490*/
491void VG_(scheduler_init) ( void )
492{
493 Int i;
494 Addr startup_esp;
495 ThreadId tid_main;
496
497 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
498 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000499 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
500 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000501 VG_(panic)("unexpected %esp at startup");
502 }
503
sewardj6072c362002-04-19 14:40:57 +0000504 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
505 vg_threads[i].status = VgTs_Empty;
sewardje663cb92002-04-12 10:26:32 +0000506 vg_threads[i].stack_size = 0;
507 vg_threads[i].stack_base = (Addr)NULL;
sewardj1e8cdc92002-04-18 11:37:52 +0000508 vg_threads[i].tid = i;
sewardje663cb92002-04-12 10:26:32 +0000509 }
510
511 for (i = 0; i < VG_N_WAITING_FDS; i++)
512 vg_waiting_fds[i].fd = -1; /* not in use */
513
sewardje663cb92002-04-12 10:26:32 +0000514 /* Assert this is thread zero, which has certain magic
515 properties. */
516 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000517 vg_assert(tid_main == 1);
sewardje663cb92002-04-12 10:26:32 +0000518
sewardj3b5d8862002-04-20 13:53:23 +0000519 vg_threads[tid_main].status = VgTs_Runnable;
520 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
521 vg_threads[tid_main].associated_mx = NULL;
522 vg_threads[tid_main].associated_cv = NULL;
523 vg_threads[tid_main].retval = NULL; /* not important */
sewardj1e8cdc92002-04-18 11:37:52 +0000524 vg_threads[tid_main].stack_highest_word
525 = vg_threads[tid_main].m_esp /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +0000526
527 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000528 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000529 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000530
531 /* So now ... */
532 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000533}
534
535
536/* What if fd isn't a valid fd? */
537static
538void set_fd_nonblocking ( Int fd )
539{
540 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
541 vg_assert(!VG_(is_kerror)(res));
542 res |= VKI_O_NONBLOCK;
543 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
544 vg_assert(!VG_(is_kerror)(res));
545}
546
547static
548void set_fd_blocking ( Int fd )
549{
550 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
551 vg_assert(!VG_(is_kerror)(res));
552 res &= ~VKI_O_NONBLOCK;
553 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
554 vg_assert(!VG_(is_kerror)(res));
555}
556
557static
558Bool fd_is_blockful ( Int fd )
559{
560 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
561 vg_assert(!VG_(is_kerror)(res));
562 return (res & VKI_O_NONBLOCK) ? False : True;
563}
564
565
566
567/* Do a purely thread-local request for tid, and put the result in its
568 %EDX, without changing its scheduling state in any way, nor that of
569 any other threads. Return True if so.
570
571 If the request is non-trivial, return False; a more capable but
572 slower mechanism will deal with it.
573*/
574static
575Bool maybe_do_trivial_clientreq ( ThreadId tid )
576{
577# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000578 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000579 return True; \
580 }
581
sewardj8c824512002-04-14 04:16:48 +0000582 ThreadState* tst = &vg_threads[tid];
583 UInt* arg = (UInt*)(tst->m_eax);
584 UInt req_no = arg[0];
585
sewardje663cb92002-04-12 10:26:32 +0000586 switch (req_no) {
587 case VG_USERREQ__MALLOC:
588 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000589 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000590 );
591 case VG_USERREQ__BUILTIN_NEW:
592 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000593 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000594 );
595 case VG_USERREQ__BUILTIN_VEC_NEW:
596 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000597 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000598 );
599 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000600 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000601 SIMPLE_RETURN(0); /* irrelevant */
602 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000603 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000604 SIMPLE_RETURN(0); /* irrelevant */
605 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000606 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000607 SIMPLE_RETURN(0); /* irrelevant */
608 case VG_USERREQ__CALLOC:
609 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000610 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000611 );
612 case VG_USERREQ__REALLOC:
613 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000614 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000615 );
616 case VG_USERREQ__MEMALIGN:
617 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000618 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000619 );
sewardj9650c992002-04-16 03:44:31 +0000620
621 /* These are heavily used. */
622 case VG_USERREQ__PTHREAD_GET_THREADID:
623 SIMPLE_RETURN(tid);
624 case VG_USERREQ__RUNNING_ON_VALGRIND:
625 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000626 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
627 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj9650c992002-04-16 03:44:31 +0000628
sewardje663cb92002-04-12 10:26:32 +0000629 default:
630 /* Too hard; wimp out. */
631 return False;
632 }
633# undef SIMPLE_RETURN
634}
635
636
sewardj6072c362002-04-19 14:40:57 +0000637/* vthread tid is returning from a signal handler; modify its
638 stack/regs accordingly. */
639static
640void handle_signal_return ( ThreadId tid )
641{
642 Char msg_buf[100];
643 Bool restart_blocked_syscalls;
644
645 vg_assert(is_valid_tid(tid));
646
647 restart_blocked_syscalls = VG_(signal_returns)(tid);
648
649 if (restart_blocked_syscalls)
650 /* Easy; we don't have to do anything. */
651 return;
652
653 if (vg_threads[tid].status == VgTs_WaitFD) {
654 vg_assert(vg_threads[tid].m_eax == __NR_read
655 || vg_threads[tid].m_eax == __NR_write);
656 /* read() or write() interrupted. Force a return with EINTR. */
657 vg_threads[tid].m_eax = -VKI_EINTR;
658 vg_threads[tid].status = VgTs_Runnable;
659 if (VG_(clo_trace_sched)) {
660 VG_(sprintf)(msg_buf,
661 "read() / write() interrupted by signal; return EINTR" );
662 print_sched_event(tid, msg_buf);
663 }
664 return;
665 }
666
667 if (vg_threads[tid].status == VgTs_WaitFD) {
668 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
669 /* We interrupted a nanosleep(). The right thing to do is to
670 write the unused time to nanosleep's second param and return
671 EINTR, but I'm too lazy for that. */
672 return;
673 }
674
675 /* All other cases? Just return. */
676}
677
678
sewardje663cb92002-04-12 10:26:32 +0000679static
680void sched_do_syscall ( ThreadId tid )
681{
682 UInt saved_eax;
683 UInt res, syscall_no;
684 UInt fd;
685 Bool might_block, assumed_nonblocking;
686 Bool orig_fd_blockness;
687 Char msg_buf[100];
688
sewardj6072c362002-04-19 14:40:57 +0000689 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000690 vg_assert(vg_threads[tid].status == VgTs_Runnable);
691
692 syscall_no = vg_threads[tid].m_eax; /* syscall number */
693
694 if (syscall_no == __NR_nanosleep) {
695 ULong t_now, t_awaken;
696 struct vki_timespec* req;
697 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
698 t_now = VG_(read_microsecond_timer)();
699 t_awaken
700 = t_now
701 + (ULong)1000000ULL * (ULong)(req->tv_sec)
702 + (ULong)( (UInt)(req->tv_nsec) / 1000 );
703 vg_threads[tid].status = VgTs_Sleeping;
704 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000705 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000706 VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu",
707 t_now, t_awaken-t_now);
708 print_sched_event(tid, msg_buf);
709 }
710 /* Force the scheduler to run something else for a while. */
711 return;
712 }
713
714 switch (syscall_no) {
715 case __NR_read:
716 case __NR_write:
717 assumed_nonblocking
718 = False;
719 might_block
720 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
721 break;
722 default:
723 might_block = False;
724 assumed_nonblocking = True;
725 }
726
727 if (assumed_nonblocking) {
728 /* We think it's non-blocking. Just do it in the normal way. */
729 VG_(perform_assumed_nonblocking_syscall)(tid);
730 /* The thread is still runnable. */
731 return;
732 }
733
734 /* It might block. Take evasive action. */
735 switch (syscall_no) {
736 case __NR_read:
737 case __NR_write:
738 fd = vg_threads[tid].m_ebx; break;
739 default:
740 vg_assert(3+3 == 7);
741 }
742
743 /* Set the fd to nonblocking, and do the syscall, which will return
744 immediately, in order to lodge a request with the Linux kernel.
745 We later poll for I/O completion using select(). */
746
747 orig_fd_blockness = fd_is_blockful(fd);
748 set_fd_nonblocking(fd);
749 vg_assert(!fd_is_blockful(fd));
750 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
751
752 /* This trashes the thread's %eax; we have to preserve it. */
753 saved_eax = vg_threads[tid].m_eax;
754 KERNEL_DO_SYSCALL(tid,res);
755
756 /* Restore original blockfulness of the fd. */
757 if (orig_fd_blockness)
758 set_fd_blocking(fd);
759 else
760 set_fd_nonblocking(fd);
761
762 if (res != -VKI_EWOULDBLOCK) {
763 /* It didn't block; it went through immediately. So finish off
764 in the normal way. Don't restore %EAX, since that now
765 (correctly) holds the result of the call. */
766 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
767 /* We're still runnable. */
768 vg_assert(vg_threads[tid].status == VgTs_Runnable);
769
770 } else {
771
772 /* It would have blocked. First, restore %EAX to what it was
773 before our speculative call. */
774 vg_threads[tid].m_eax = saved_eax;
775 /* Put this fd in a table of fds on which we are waiting for
776 completion. The arguments for select() later are constructed
777 from this table. */
778 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
779 /* Deschedule thread until an I/O completion happens. */
780 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000781 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000782 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
783 print_sched_event(tid, msg_buf);
784 }
785
786 }
787}
788
789
790/* Find out which of the fds in vg_waiting_fds are now ready to go, by
791 making enquiries with select(), and mark them as ready. We have to
792 wait for the requesting threads to fall into the the WaitFD state
793 before we can actually finally deliver the results, so this
794 procedure doesn't do that; complete_blocked_syscalls() does it.
795
796 It might seem odd that a thread which has done a blocking syscall
797 is not in WaitFD state; the way this can happen is if it initially
798 becomes WaitFD, but then a signal is delivered to it, so it becomes
799 Runnable for a while. In this case we have to wait for the
800 sighandler to return, whereupon the WaitFD state is resumed, and
801 only at that point can the I/O result be delivered to it. However,
802 this point may be long after the fd is actually ready.
803
804 So, poll_for_ready_fds() merely detects fds which are ready.
805 complete_blocked_syscalls() does the second half of the trick,
806 possibly much later: it delivers the results from ready fds to
807 threads in WaitFD state.
808*/
sewardj9a199dc2002-04-14 13:01:38 +0000809static
sewardje663cb92002-04-12 10:26:32 +0000810void poll_for_ready_fds ( void )
811{
812 vki_ksigset_t saved_procmask;
813 vki_fd_set readfds;
814 vki_fd_set writefds;
815 vki_fd_set exceptfds;
816 struct vki_timeval timeout;
817 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
818 ThreadId tid;
819 Bool rd_ok, wr_ok, ex_ok;
820 Char msg_buf[100];
821
sewardje462e202002-04-13 04:09:07 +0000822 struct vki_timespec* rem;
823 ULong t_now;
824
sewardje663cb92002-04-12 10:26:32 +0000825 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000826 for (tid = 1; tid < VG_N_THREADS; tid++)
827 if (vg_threads[tid].status == VgTs_Sleeping)
828 break;
829
830 /* Avoid pointless calls to VG_(read_microsecond_timer). */
831 if (tid < VG_N_THREADS) {
832 t_now = VG_(read_microsecond_timer)();
833 for (tid = 1; tid < VG_N_THREADS; tid++) {
834 if (vg_threads[tid].status != VgTs_Sleeping)
835 continue;
836 if (t_now >= vg_threads[tid].awaken_at) {
837 /* Resume this thread. Set to zero the remaining-time
838 (second) arg of nanosleep, since it's used up all its
839 time. */
840 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
841 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
842 if (rem != NULL) {
843 rem->tv_sec = 0;
844 rem->tv_nsec = 0;
845 }
846 /* Make the syscall return 0 (success). */
847 vg_threads[tid].m_eax = 0;
848 /* Reschedule this thread. */
849 vg_threads[tid].status = VgTs_Runnable;
850 if (VG_(clo_trace_sched)) {
851 VG_(sprintf)(msg_buf, "at %lu: nanosleep done",
852 t_now);
853 print_sched_event(tid, msg_buf);
854 }
sewardje663cb92002-04-12 10:26:32 +0000855 }
856 }
857 }
sewardje663cb92002-04-12 10:26:32 +0000858
sewardje462e202002-04-13 04:09:07 +0000859 /* And look for threads waiting on file descriptors which are now
860 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000861 timeout.tv_sec = 0;
862 timeout.tv_usec = 0;
863
864 VKI_FD_ZERO(&readfds);
865 VKI_FD_ZERO(&writefds);
866 VKI_FD_ZERO(&exceptfds);
867 fd_max = -1;
868 for (i = 0; i < VG_N_WAITING_FDS; i++) {
869 if (vg_waiting_fds[i].fd == -1 /* not in use */)
870 continue;
871 if (vg_waiting_fds[i].ready /* already ready? */)
872 continue;
873 fd = vg_waiting_fds[i].fd;
874 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000875 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000876 if (fd > fd_max)
877 fd_max = fd;
878 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000879 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000880 syscall_no = vg_waiting_fds[i].syscall_no;
881 switch (syscall_no) {
882 case __NR_read:
883 VKI_FD_SET(fd, &readfds); break;
884 case __NR_write:
885 VKI_FD_SET(fd, &writefds); break;
886 default:
887 VG_(panic)("poll_for_ready_fds: unexpected syscall");
888 /*NOTREACHED*/
889 break;
890 }
891 }
892
sewardje462e202002-04-13 04:09:07 +0000893 /* Short cut: if no fds are waiting, give up now. */
894 if (fd_max == -1)
895 return;
896
sewardje663cb92002-04-12 10:26:32 +0000897 /* BLOCK ALL SIGNALS. We don't want the complication of select()
898 getting interrupted. */
899 VG_(block_all_host_signals)( &saved_procmask );
900
901 n_ready = VG_(select)
902 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
903 if (VG_(is_kerror)(n_ready)) {
904 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
905 VG_(panic)("poll_for_ready_fds: select failed?!");
906 /*NOTREACHED*/
907 }
908
909 /* UNBLOCK ALL SIGNALS */
910 VG_(restore_host_signals)( &saved_procmask );
911
912 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
913
914 if (n_ready == 0)
915 return;
916
917 /* Inspect all the fds we know about, and handle any completions that
918 have happened. */
919 /*
920 VG_(printf)("\n\n");
921 for (fd = 0; fd < 100; fd++)
922 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
923 VG_(printf)("X"); } else { VG_(printf)("."); };
924 VG_(printf)("\n\nfd_max = %d\n", fd_max);
925 */
926
927 for (fd = 0; fd <= fd_max; fd++) {
928 rd_ok = VKI_FD_ISSET(fd, &readfds);
929 wr_ok = VKI_FD_ISSET(fd, &writefds);
930 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
931
932 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
933 if (n_ok == 0)
934 continue;
935 if (n_ok > 1) {
936 VG_(printf)("offending fd = %d\n", fd);
937 VG_(panic)("poll_for_ready_fds: multiple events on fd");
938 }
939
940 /* An I/O event completed for fd. Find the thread which
941 requested this. */
942 for (i = 0; i < VG_N_WAITING_FDS; i++) {
943 if (vg_waiting_fds[i].fd == -1 /* not in use */)
944 continue;
945 if (vg_waiting_fds[i].fd == fd)
946 break;
947 }
948
949 /* And a bit more paranoia ... */
950 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
951
952 /* Mark the fd as ready. */
953 vg_assert(! vg_waiting_fds[i].ready);
954 vg_waiting_fds[i].ready = True;
955 }
956}
957
958
959/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +0000960static
sewardje663cb92002-04-12 10:26:32 +0000961void complete_blocked_syscalls ( void )
962{
963 Int fd, i, res, syscall_no;
964 ThreadId tid;
965 Char msg_buf[100];
966
967 /* Inspect all the outstanding fds we know about. */
968
969 for (i = 0; i < VG_N_WAITING_FDS; i++) {
970 if (vg_waiting_fds[i].fd == -1 /* not in use */)
971 continue;
972 if (! vg_waiting_fds[i].ready)
973 continue;
974
975 fd = vg_waiting_fds[i].fd;
976 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000977 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000978
979 /* The thread actually has to be waiting for the I/O event it
980 requested before we can deliver the result! */
981 if (vg_threads[tid].status != VgTs_WaitFD)
982 continue;
983
984 /* Ok, actually do it! We can safely use %EAX as the syscall
985 number, because the speculative call made by
986 sched_do_syscall() doesn't change %EAX in the case where the
987 call would have blocked. */
988
989 syscall_no = vg_waiting_fds[i].syscall_no;
990 vg_assert(syscall_no == vg_threads[tid].m_eax);
991 KERNEL_DO_SYSCALL(tid,res);
992 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
993
994 /* Reschedule. */
995 vg_threads[tid].status = VgTs_Runnable;
996 /* Mark slot as no longer in use. */
997 vg_waiting_fds[i].fd = -1;
998 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +0000999 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001000 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1001 print_sched_event(tid, msg_buf);
1002 }
1003 }
1004}
1005
1006
1007static
1008void nanosleep_for_a_while ( void )
1009{
1010 Int res;
1011 struct vki_timespec req;
1012 struct vki_timespec rem;
1013 req.tv_sec = 0;
1014 req.tv_nsec = 20 * 1000 * 1000;
1015 res = VG_(nanosleep)( &req, &rem );
1016 /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */
1017 vg_assert(res == 0);
1018}
1019
1020
1021/* ---------------------------------------------------------------------
1022 The scheduler proper.
1023 ------------------------------------------------------------------ */
1024
1025/* Run user-space threads until either
1026 * Deadlock occurs
1027 * One thread asks to shutdown Valgrind
1028 * The specified number of basic blocks has gone by.
1029*/
1030VgSchedReturnCode VG_(scheduler) ( void )
1031{
1032 ThreadId tid, tid_next;
1033 UInt trc;
1034 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +00001035 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +00001036 Char msg_buf[100];
1037 Addr trans_addr;
1038
1039 /* For the LRU structures, records when the epoch began. */
1040 ULong lru_epoch_started_at = 0;
1041
1042 /* Start with the root thread. tid in general indicates the
1043 currently runnable/just-finished-running thread. */
sewardj6072c362002-04-19 14:40:57 +00001044 tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001045
1046 /* This is the top level scheduler loop. It falls into three
1047 phases. */
1048 while (True) {
1049
sewardj6072c362002-04-19 14:40:57 +00001050 /* ======================= Phase 0 of 3 =======================
1051 Be paranoid. Always a good idea. */
1052 scheduler_sanity();
1053
sewardje663cb92002-04-12 10:26:32 +00001054 /* ======================= Phase 1 of 3 =======================
1055 Handle I/O completions and signals. This may change the
1056 status of various threads. Then select a new thread to run,
1057 or declare deadlock, or sleep if there are no runnable
1058 threads but some are blocked on I/O. */
1059
1060 /* Age the LRU structures if an epoch has been completed. */
1061 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1062 lru_epoch_started_at = VG_(bbs_done);
1063 increment_epoch();
1064 }
1065
1066 /* Was a debug-stop requested? */
1067 if (VG_(bbs_to_go) == 0)
1068 goto debug_stop;
1069
1070 /* Do the following loop until a runnable thread is found, or
1071 deadlock is detected. */
1072 while (True) {
1073
1074 /* For stats purposes only. */
1075 VG_(num_scheduling_events_MAJOR) ++;
1076
1077 /* See if any I/O operations which we were waiting for have
1078 completed, and, if so, make runnable the relevant waiting
1079 threads. */
1080 poll_for_ready_fds();
1081 complete_blocked_syscalls();
1082
1083 /* See if there are any signals which need to be delivered. If
1084 so, choose thread(s) to deliver them to, and build signal
1085 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001086
1087 /* Be careful about delivering signals to a thread waiting
1088 for a mutex. In particular, when the handler is running,
1089 that thread is temporarily apparently-not-waiting for the
1090 mutex, so if it is unlocked by another thread whilst the
1091 handler is running, this thread is not informed. When the
1092 handler returns, the thread resumes waiting on the mutex,
1093 even if, as a result, it has missed the unlocking of it.
1094 Potential deadlock. This sounds all very strange, but the
1095 POSIX standard appears to require this behaviour. */
1096 VG_(deliver_signals)( 1 /*HACK*/ );
1097 VG_(do_sanity_checks)( 1 /*HACK*/, False );
sewardje663cb92002-04-12 10:26:32 +00001098
1099 /* Try and find a thread (tid) to run. */
1100 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +00001101 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +00001102 while (True) {
1103 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001104 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj54cacf02002-04-12 23:24:59 +00001105 if (vg_threads[tid_next].status == VgTs_WaitFD
1106 || vg_threads[tid_next].status == VgTs_Sleeping)
1107 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +00001108 if (vg_threads[tid_next].status == VgTs_Runnable)
1109 break; /* We can run this one. */
1110 if (tid_next == tid)
1111 break; /* been all the way round */
1112 }
1113 tid = tid_next;
1114
1115 if (vg_threads[tid].status == VgTs_Runnable) {
1116 /* Found a suitable candidate. Fall out of this loop, so
1117 we can advance to stage 2 of the scheduler: actually
1118 running the thread. */
1119 break;
1120 }
1121
1122 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +00001123 if (n_in_fdwait_or_sleep == 0) {
1124 /* No runnable threads and no prospect of any appearing
1125 even if we wait for an arbitrary length of time. In
1126 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001127 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001128 return VgSrc_Deadlock;
1129 }
1130
1131 /* At least one thread is in a fd-wait state. Delay for a
1132 while, and go round again, in the hope that eventually a
1133 thread becomes runnable. */
1134 nanosleep_for_a_while();
1135 // pp_sched_status();
1136 // VG_(printf)(".\n");
1137 }
1138
1139
1140 /* ======================= Phase 2 of 3 =======================
1141 Wahey! We've finally decided that thread tid is runnable, so
1142 we now do that. Run it for as much of a quanta as possible.
1143 Trivial requests are handled and the thread continues. The
1144 aim is not to do too many of Phase 1 since it is expensive. */
1145
1146 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001147 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001148
1149 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1150 that it decrements the counter before testing it for zero, so
1151 that if VG_(dispatch_ctr) is set to N you get at most N-1
1152 iterations. Also this means that VG_(dispatch_ctr) must
1153 exceed zero before entering the innerloop. Also also, the
1154 decrement is done before the bb is actually run, so you
1155 always get at least one decrement even if nothing happens.
1156 */
1157 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1158 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1159 else
1160 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1161
1162 /* ... and remember what we asked for. */
1163 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1164
sewardj1e8cdc92002-04-18 11:37:52 +00001165 /* paranoia ... */
1166 vg_assert(vg_threads[tid].tid == tid);
1167
sewardje663cb92002-04-12 10:26:32 +00001168 /* Actually run thread tid. */
1169 while (True) {
1170
1171 /* For stats purposes only. */
1172 VG_(num_scheduling_events_MINOR) ++;
1173
1174 if (0)
1175 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1176 tid, VG_(dispatch_ctr) - 1 );
1177
1178 trc = run_thread_for_a_while ( tid );
1179
1180 /* Deal quickly with trivial scheduling events, and resume the
1181 thread. */
1182
1183 if (trc == VG_TRC_INNER_FASTMISS) {
1184 vg_assert(VG_(dispatch_ctr) > 0);
1185
1186 /* Trivial event. Miss in the fast-cache. Do a full
1187 lookup for it. */
1188 trans_addr
1189 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1190 if (trans_addr == (Addr)0) {
1191 /* Not found; we need to request a translation. */
sewardj1e8cdc92002-04-18 11:37:52 +00001192 create_translation_for( tid, vg_threads[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001193 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1194 if (trans_addr == (Addr)0)
1195 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1196 }
1197 continue; /* with this thread */
1198 }
1199
1200 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1201 Bool is_triv = maybe_do_trivial_clientreq(tid);
1202 if (is_triv) {
1203 /* NOTE: a trivial request is something like a call to
1204 malloc() or free(). It DOES NOT change the
1205 Runnability of this thread nor the status of any
1206 other thread; it is purely thread-local. */
1207 continue; /* with this thread */
1208 }
1209 }
1210
1211 /* It's a non-trivial event. Give up running this thread and
1212 handle things the expensive way. */
1213 break;
1214 }
1215
1216 /* ======================= Phase 3 of 3 =======================
1217 Handle non-trivial thread requests, mostly pthread stuff. */
1218
1219 /* Ok, we've fallen out of the dispatcher for a
1220 non-completely-trivial reason. First, update basic-block
1221 counters. */
1222
1223 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1224 vg_assert(done_this_time >= 0);
1225 VG_(bbs_to_go) -= (ULong)done_this_time;
1226 VG_(bbs_done) += (ULong)done_this_time;
1227
1228 if (0 && trc != VG_TRC_INNER_FASTMISS)
1229 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1230 tid, done_this_time, (Int)trc );
1231
1232 if (0 && trc != VG_TRC_INNER_FASTMISS)
1233 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1234 tid, VG_(bbs_done),
1235 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001236
sewardje663cb92002-04-12 10:26:32 +00001237 /* Examine the thread's return code to figure out why it
1238 stopped, and handle requests. */
1239
1240 switch (trc) {
1241
1242 case VG_TRC_INNER_FASTMISS:
1243 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1244 /*NOTREACHED*/
1245 break;
1246
1247 case VG_TRC_INNER_COUNTERZERO:
1248 /* Timeslice is out. Let a new thread be scheduled,
1249 simply by doing nothing, causing us to arrive back at
1250 Phase 1. */
1251 if (VG_(bbs_to_go) == 0) {
1252 goto debug_stop;
1253 }
1254 vg_assert(VG_(dispatch_ctr) == 0);
1255 break;
1256
1257 case VG_TRC_UNRESUMABLE_SIGNAL:
1258 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1259 away. Again, do nothing, so we wind up back at Phase
1260 1, whereupon the signal will be "delivered". */
1261 break;
1262
sewardje663cb92002-04-12 10:26:32 +00001263 case VG_TRC_EBP_JMP_SYSCALL:
1264 /* Do a syscall for the vthread tid. This could cause it
1265 to become non-runnable. */
1266 sched_do_syscall(tid);
1267 break;
1268
1269 case VG_TRC_EBP_JMP_CLIENTREQ:
1270 /* Do a client request for the vthread tid. Note that
1271 some requests will have been handled by
1272 maybe_do_trivial_clientreq(), so we don't expect to see
1273 those here.
1274 */
sewardj54cacf02002-04-12 23:24:59 +00001275 /* The thread's %EAX points at an arg block, the first
1276 word of which is the request code. */
1277 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001278 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001279 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001280 print_sched_event(tid, msg_buf);
1281 }
1282 /* Do a non-trivial client request for thread tid. tid's
1283 %EAX points to a short vector of argument words, the
1284 first of which is the request code. The result of the
1285 request is put in tid's %EDX. Alternatively, perhaps
1286 the request causes tid to become non-runnable and/or
1287 other blocked threads become runnable. In general we
1288 can and often do mess with the state of arbitrary
1289 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001290 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1291 return VgSrc_Shutdown;
1292 } else {
1293 do_nontrivial_clientreq(tid);
1294 }
sewardje663cb92002-04-12 10:26:32 +00001295 break;
1296
1297 default:
1298 VG_(printf)("\ntrc = %d\n", trc);
1299 VG_(panic)("VG_(scheduler), phase 3: "
1300 "unexpected thread return code");
1301 /* NOTREACHED */
1302 break;
1303
1304 } /* switch (trc) */
1305
1306 /* That completes Phase 3 of 3. Return now to the top of the
1307 main scheduler loop, to Phase 1 of 3. */
1308
1309 } /* top-level scheduler loop */
1310
1311
1312 /* NOTREACHED */
1313 VG_(panic)("scheduler: post-main-loop ?!");
1314 /* NOTREACHED */
1315
1316 debug_stop:
1317 /* If we exited because of a debug stop, print the translation
1318 of the last block executed -- by translating it again, and
1319 throwing away the result. */
1320 VG_(printf)(
1321 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj1e8cdc92002-04-18 11:37:52 +00001322 VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001323 VG_(printf)("\n");
1324 VG_(printf)(
1325 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1326
1327 return VgSrc_BbsDone;
1328}
1329
1330
1331/* ---------------------------------------------------------------------
1332 The pthread implementation.
1333 ------------------------------------------------------------------ */
1334
1335#include <pthread.h>
1336#include <errno.h>
1337
1338#if !defined(PTHREAD_STACK_MIN)
1339# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1340#endif
1341
1342/* /usr/include/bits/pthreadtypes.h:
1343 typedef unsigned long int pthread_t;
1344*/
1345
sewardje663cb92002-04-12 10:26:32 +00001346
sewardj604ec3c2002-04-18 22:38:41 +00001347/* -----------------------------------------------------------
1348 Thread CREATION, JOINAGE and CANCELLATION.
1349 -------------------------------------------------------- */
1350
sewardje663cb92002-04-12 10:26:32 +00001351static
1352void do_pthread_cancel ( ThreadId tid_canceller,
1353 pthread_t tid_cancellee )
1354{
1355 Char msg_buf[100];
1356 /* We want make is appear that this thread has returned to
1357 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1358 return value. So: simple: put PTHREAD_CANCELED into %EAX
1359 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001360 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001361 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1362 print_sched_event(tid_cancellee, msg_buf);
1363 }
1364 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001365 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001366 vg_threads[tid_cancellee].status = VgTs_Runnable;
1367}
1368
1369
sewardj3b5d8862002-04-20 13:53:23 +00001370static
1371void do_pthread_exit ( ThreadId tid, void* retval )
1372{
1373 Char msg_buf[100];
1374 /* We want make is appear that this thread has returned to
1375 do_pthread_create_bogusRA with retval as the
1376 return value. So: simple: put retval into %EAX
1377 and &do_pthread_create_bogusRA into %EIP and keep going! */
1378 if (VG_(clo_trace_sched)) {
1379 VG_(sprintf)(msg_buf, "exiting with %p", retval);
1380 print_sched_event(tid, msg_buf);
1381 }
1382 vg_threads[tid].m_eax = (UInt)retval;
1383 vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1384 vg_threads[tid].status = VgTs_Runnable;
1385}
1386
sewardje663cb92002-04-12 10:26:32 +00001387
1388/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001389 created with. Or possibly due to pthread_exit or cancellation.
1390 The main complication here is to resume any thread waiting to join
1391 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001392static
sewardjbc5b99f2002-04-13 00:08:51 +00001393void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001394{
1395 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1396 UInt* jnr_args;
1397 void** jnr_thread_return;
1398 Char msg_buf[100];
1399
1400 /* Mark it as not in use. Leave the stack in place so the next
1401 user of this slot doesn't reallocate it. */
sewardj6072c362002-04-19 14:40:57 +00001402 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001403 vg_assert(vg_threads[tid].status != VgTs_Empty);
1404
sewardjbc5b99f2002-04-13 00:08:51 +00001405 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001406
1407 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1408 /* No one has yet done a join on me */
1409 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001410 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001411 VG_(sprintf)(msg_buf,
1412 "root fn returns, waiting for a call pthread_join(%d)",
1413 tid);
1414 print_sched_event(tid, msg_buf);
1415 }
1416 } else {
1417 /* Some is waiting; make their join call return with success,
1418 putting my exit code in the place specified by the caller's
1419 thread_return param. This is all very horrible, since we
1420 need to consult the joiner's arg block -- pointed to by its
1421 %EAX -- in order to extract the 2nd param of its pthread_join
1422 call. TODO: free properly the slot (also below).
1423 */
1424 jnr = vg_threads[tid].joiner;
sewardj6072c362002-04-19 14:40:57 +00001425 vg_assert(is_valid_tid(jnr));
sewardje663cb92002-04-12 10:26:32 +00001426 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1427 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1428 jnr_thread_return = (void**)(jnr_args[2]);
1429 if (jnr_thread_return != NULL)
1430 *jnr_thread_return = vg_threads[tid].retval;
1431 vg_threads[jnr].m_edx = 0; /* success */
1432 vg_threads[jnr].status = VgTs_Runnable;
1433 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001434 if (VG_(clo_instrument) && tid != 0)
1435 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1436 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001437 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001438 VG_(sprintf)(msg_buf,
1439 "root fn returns, to find a waiting pthread_join(%d)", tid);
1440 print_sched_event(tid, msg_buf);
1441 VG_(sprintf)(msg_buf,
1442 "my pthread_join(%d) returned; resuming", tid);
1443 print_sched_event(jnr, msg_buf);
1444 }
1445 }
1446
1447 /* Return value is irrelevant; this thread will not get
1448 rescheduled. */
1449}
1450
1451
1452static
1453void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1454{
1455 Char msg_buf[100];
1456
1457 /* jee, the joinee, is the thread specified as an arg in thread
1458 tid's call to pthread_join. So tid is the join-er. */
sewardj6072c362002-04-19 14:40:57 +00001459 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001460 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1461
1462 if (jee == tid) {
1463 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1464 vg_threads[tid].status = VgTs_Runnable;
1465 return;
1466 }
1467
1468 if (jee < 0
1469 || jee >= VG_N_THREADS
1470 || vg_threads[jee].status == VgTs_Empty) {
1471 /* Invalid thread to join to. */
1472 vg_threads[tid].m_edx = EINVAL;
1473 vg_threads[tid].status = VgTs_Runnable;
1474 return;
1475 }
1476
1477 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1478 /* Someone already did join on this thread */
1479 vg_threads[tid].m_edx = EINVAL;
1480 vg_threads[tid].status = VgTs_Runnable;
1481 return;
1482 }
1483
1484 /* if (vg_threads[jee].detached) ... */
1485
1486 /* Perhaps the joinee has already finished? If so return
1487 immediately with its return code, and free up the slot. TODO:
1488 free it properly (also above). */
1489 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1490 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1491 vg_threads[tid].m_edx = 0; /* success */
1492 if (thread_return != NULL)
1493 *thread_return = vg_threads[jee].retval;
1494 vg_threads[tid].status = VgTs_Runnable;
1495 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001496 if (VG_(clo_instrument) && jee != 0)
1497 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1498 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001499 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001500 VG_(sprintf)(msg_buf,
1501 "someone called pthread_join() on me; bye!");
1502 print_sched_event(jee, msg_buf);
1503 VG_(sprintf)(msg_buf,
1504 "my pthread_join(%d) returned immediately",
1505 jee );
1506 print_sched_event(tid, msg_buf);
1507 }
1508 return;
1509 }
1510
1511 /* Ok, so we'll have to wait on jee. */
1512 vg_threads[jee].joiner = tid;
1513 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001514 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001515 VG_(sprintf)(msg_buf,
1516 "blocking on call of pthread_join(%d)", jee );
1517 print_sched_event(tid, msg_buf);
1518 }
1519 /* So tid's join call does not return just now. */
1520}
1521
1522
1523static
1524void do_pthread_create ( ThreadId parent_tid,
1525 pthread_t* thread,
1526 pthread_attr_t* attr,
1527 void* (*start_routine)(void *),
1528 void* arg )
1529{
1530 Addr new_stack;
1531 UInt new_stk_szb;
1532 ThreadId tid;
1533 Char msg_buf[100];
1534
1535 /* Paranoia ... */
1536 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1537
1538 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1539
sewardj1e8cdc92002-04-18 11:37:52 +00001540 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001541
1542 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001543 vg_assert(tid != 1);
1544 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001545
1546 /* Copy the parent's CPU state into the child's, in a roundabout
1547 way (via baseBlock). */
1548 VG_(load_thread_state)(parent_tid);
1549 VG_(save_thread_state)(tid);
1550
1551 /* Consider allocating the child a stack, if the one it already has
1552 is inadequate. */
1553 new_stk_szb = PTHREAD_STACK_MIN;
1554
1555 if (new_stk_szb > vg_threads[tid].stack_size) {
1556 /* Again, for good measure :) We definitely don't want to be
1557 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001558 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001559 /* for now, we don't handle the case of anything other than
1560 assigning it for the first time. */
1561 vg_assert(vg_threads[tid].stack_size == 0);
1562 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1563 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1564 vg_threads[tid].stack_base = new_stack;
1565 vg_threads[tid].stack_size = new_stk_szb;
sewardj1e8cdc92002-04-18 11:37:52 +00001566 vg_threads[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001567 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001568 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001569 }
sewardj1e8cdc92002-04-18 11:37:52 +00001570
1571 vg_threads[tid].m_esp
1572 = vg_threads[tid].stack_base
1573 + vg_threads[tid].stack_size
1574 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1575
sewardje663cb92002-04-12 10:26:32 +00001576 if (VG_(clo_instrument))
1577 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1578 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1579
1580 /* push arg */
1581 vg_threads[tid].m_esp -= 4;
1582 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1583
1584 /* push (magical) return address */
1585 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001586 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001587
1588 if (VG_(clo_instrument))
1589 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1590
1591 /* this is where we start */
1592 vg_threads[tid].m_eip = (UInt)start_routine;
1593
sewardj8937c812002-04-12 20:12:20 +00001594 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001595 VG_(sprintf)(msg_buf,
1596 "new thread, created by %d", parent_tid );
1597 print_sched_event(tid, msg_buf);
1598 }
1599
1600 /* store the thread id in *thread. */
1601 // if (VG_(clo_instrument))
1602 // ***** CHECK *thread is writable
1603 *thread = (pthread_t)tid;
1604
sewardj3b5d8862002-04-20 13:53:23 +00001605 vg_threads[tid].associated_mx = NULL;
1606 vg_threads[tid].associated_cv = NULL;
1607 vg_threads[tid].joiner = VG_INVALID_THREADID;
1608 vg_threads[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001609
1610 /* return zero */
sewardje663cb92002-04-12 10:26:32 +00001611 vg_threads[tid].m_edx = 0; /* success */
1612}
1613
1614
sewardj604ec3c2002-04-18 22:38:41 +00001615/* -----------------------------------------------------------
1616 MUTEXes
1617 -------------------------------------------------------- */
1618
sewardj604ec3c2002-04-18 22:38:41 +00001619/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001620 typedef struct
1621 {
1622 int __m_reserved; -- Reserved for future use
1623 int __m_count; -- Depth of recursive locking
1624 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1625 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1626 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1627 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001628
sewardj6072c362002-04-19 14:40:57 +00001629 #define PTHREAD_MUTEX_INITIALIZER \
1630 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
1631 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
1632 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
1633 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
1634 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
1635 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
1636 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00001637
sewardj6072c362002-04-19 14:40:57 +00001638 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00001639
sewardj6072c362002-04-19 14:40:57 +00001640 __m_kind never changes and indicates whether or not it is recursive.
1641
1642 __m_count indicates the lock count; if 0, the mutex is not owned by
1643 anybody.
1644
1645 __m_owner has a ThreadId value stuffed into it. We carefully arrange
1646 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
1647 statically initialised mutexes correctly appear
1648 to belong to nobody.
1649
1650 In summary, a not-in-use mutex is distinguised by having __m_owner
1651 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
1652 conditions holds, the other should too.
1653
1654 There is no linked list of threads waiting for this mutex. Instead
1655 a thread in WaitMX state points at the mutex with its waited_on_mx
1656 field. This makes _unlock() inefficient, but simple to implement the
1657 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00001658
sewardj604ec3c2002-04-18 22:38:41 +00001659 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00001660 deals with that for us.
1661*/
sewardje663cb92002-04-12 10:26:32 +00001662
sewardj3b5d8862002-04-20 13:53:23 +00001663/* Helper fns ... */
1664static
1665void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
1666 Char* caller )
1667{
1668 Int i;
1669 Char msg_buf[100];
1670
1671 /* Find some arbitrary thread waiting on this mutex, and make it
1672 runnable. If none are waiting, mark the mutex as not held. */
1673 for (i = 1; i < VG_N_THREADS; i++) {
1674 if (vg_threads[i].status == VgTs_Empty)
1675 continue;
1676 if (vg_threads[i].status == VgTs_WaitMX
1677 && vg_threads[i].associated_mx == mutex)
1678 break;
1679 }
1680
1681 vg_assert(i <= VG_N_THREADS);
1682 if (i == VG_N_THREADS) {
1683 /* Nobody else is waiting on it. */
1684 mutex->__m_count = 0;
1685 mutex->__m_owner = VG_INVALID_THREADID;
1686 } else {
1687 /* Notionally transfer the hold to thread i, whose
1688 pthread_mutex_lock() call now returns with 0 (success). */
1689 /* The .count is already == 1. */
1690 vg_assert(vg_threads[i].associated_mx == mutex);
1691 mutex->__m_owner = (_pthread_descr)i;
1692 vg_threads[i].status = VgTs_Runnable;
1693 vg_threads[i].associated_mx = NULL;
1694 vg_threads[i].m_edx = 0; /* pth_lock() success */
1695
1696 if (VG_(clo_trace_pthread_level) >= 1) {
1697 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
1698 caller, mutex );
1699 print_pthread_event(i, msg_buf);
1700 }
1701 }
1702}
1703
sewardje663cb92002-04-12 10:26:32 +00001704
1705static
sewardj30671ff2002-04-21 00:13:57 +00001706void do_pthread_mutex_lock( ThreadId tid,
1707 Bool is_trylock,
1708 pthread_mutex_t *mutex )
sewardje663cb92002-04-12 10:26:32 +00001709{
sewardj30671ff2002-04-21 00:13:57 +00001710 Char msg_buf[100];
1711 Char* caller
1712 = is_trylock ? "pthread_mutex_lock "
1713 : "pthread_mutex_trylock";
sewardje663cb92002-04-12 10:26:32 +00001714
sewardj604ec3c2002-04-18 22:38:41 +00001715 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj30671ff2002-04-21 00:13:57 +00001716 VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex );
sewardj604ec3c2002-04-18 22:38:41 +00001717 print_pthread_event(tid, msg_buf);
1718 }
1719
1720 /* Paranoia ... */
1721 vg_assert(is_valid_tid(tid)
1722 && vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001723
1724 /* POSIX doesn't mandate this, but for sanity ... */
1725 if (mutex == NULL) {
1726 vg_threads[tid].m_edx = EINVAL;
1727 return;
1728 }
1729
sewardj604ec3c2002-04-18 22:38:41 +00001730 /* More paranoia ... */
1731 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001732# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001733 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001734 case PTHREAD_MUTEX_ADAPTIVE_NP:
1735# endif
sewardj604ec3c2002-04-18 22:38:41 +00001736 case PTHREAD_MUTEX_RECURSIVE_NP:
1737 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001738 if (mutex->__m_count >= 0) break;
1739 /* else fall thru */
1740 default:
1741 vg_threads[tid].m_edx = EINVAL;
1742 return;
sewardje663cb92002-04-12 10:26:32 +00001743 }
1744
sewardj604ec3c2002-04-18 22:38:41 +00001745 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001746
sewardj604ec3c2002-04-18 22:38:41 +00001747 vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001748
1749 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001750 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001751 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001752 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001753 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001754 mutex->__m_count++;
sewardjf8f819e2002-04-17 23:21:37 +00001755 vg_threads[tid].m_edx = 0;
sewardj3b5d8862002-04-20 13:53:23 +00001756 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
sewardj604ec3c2002-04-18 22:38:41 +00001757 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001758 return;
1759 } else {
sewardj30671ff2002-04-21 00:13:57 +00001760 if (is_trylock)
1761 vg_threads[tid].m_edx = EBUSY;
1762 else
1763 vg_threads[tid].m_edx = EDEADLK;
sewardjf8f819e2002-04-17 23:21:37 +00001764 return;
1765 }
1766 } else {
sewardj6072c362002-04-19 14:40:57 +00001767 /* Someone else has it; we have to wait. Mark ourselves
1768 thusly. */
sewardj05553872002-04-20 20:53:17 +00001769 /* GUARD: __m_count > 0 && __m_owner is valid */
sewardj30671ff2002-04-21 00:13:57 +00001770 if (is_trylock) {
1771 /* caller is polling; so return immediately. */
1772 vg_threads[tid].m_edx = EBUSY;
1773 } else {
1774 vg_threads[tid].status = VgTs_WaitMX;
1775 vg_threads[tid].associated_mx = mutex;
1776 /* No assignment to %EDX, since we're blocking. */
1777 if (VG_(clo_trace_pthread_level) >= 1) {
1778 VG_(sprintf)(msg_buf, "%s mx %p: BLOCK",
1779 caller, mutex );
1780 print_pthread_event(tid, msg_buf);
1781 }
1782 }
sewardje663cb92002-04-12 10:26:32 +00001783 return;
1784 }
sewardjf8f819e2002-04-17 23:21:37 +00001785
sewardje663cb92002-04-12 10:26:32 +00001786 } else {
sewardj6072c362002-04-19 14:40:57 +00001787 /* Nobody owns it. Sanity check ... */
1788 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00001789 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00001790 mutex->__m_count = 1;
1791 mutex->__m_owner = (_pthread_descr)tid;
sewardj3b5d8862002-04-20 13:53:23 +00001792 vg_assert(vg_threads[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00001793 /* return 0 (success). */
1794 vg_threads[tid].m_edx = 0;
1795 }
sewardjf8f819e2002-04-17 23:21:37 +00001796
sewardje663cb92002-04-12 10:26:32 +00001797}
1798
1799
1800static
1801void do_pthread_mutex_unlock ( ThreadId tid,
1802 pthread_mutex_t *mutex )
1803{
sewardj3b5d8862002-04-20 13:53:23 +00001804 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001805
sewardj45b4b372002-04-16 22:50:32 +00001806 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00001807 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00001808 print_pthread_event(tid, msg_buf);
1809 }
1810
sewardj604ec3c2002-04-18 22:38:41 +00001811 /* Paranoia ... */
1812 vg_assert(is_valid_tid(tid)
1813 && vg_threads[tid].status == VgTs_Runnable);
1814
1815 if (mutex == NULL) {
1816 vg_threads[tid].m_edx = EINVAL;
1817 return;
1818 }
1819
1820 /* More paranoia ... */
1821 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001822# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001823 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001824 case PTHREAD_MUTEX_ADAPTIVE_NP:
1825# endif
sewardj604ec3c2002-04-18 22:38:41 +00001826 case PTHREAD_MUTEX_RECURSIVE_NP:
1827 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001828 if (mutex->__m_count >= 0) break;
1829 /* else fall thru */
1830 default:
1831 vg_threads[tid].m_edx = EINVAL;
1832 return;
1833 }
sewardje663cb92002-04-12 10:26:32 +00001834
1835 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00001836 if (mutex->__m_count == 0 /* nobody holds it */
1837 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardje663cb92002-04-12 10:26:32 +00001838 vg_threads[tid].m_edx = EPERM;
1839 return;
1840 }
1841
sewardjf8f819e2002-04-17 23:21:37 +00001842 /* If it's a multiply-locked recursive mutex, just decrement the
1843 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00001844 if (mutex->__m_count > 1) {
1845 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
1846 mutex->__m_count --;
sewardjf8f819e2002-04-17 23:21:37 +00001847 vg_threads[tid].m_edx = 0; /* success */
1848 return;
1849 }
1850
sewardj604ec3c2002-04-18 22:38:41 +00001851 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00001852 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00001853 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00001854 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00001855
sewardj3b5d8862002-04-20 13:53:23 +00001856 /* Release at max one thread waiting on this mutex. */
1857 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00001858
sewardj3b5d8862002-04-20 13:53:23 +00001859 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardje663cb92002-04-12 10:26:32 +00001860 vg_threads[tid].m_edx = 0; /* Success. */
1861}
1862
1863
sewardj6072c362002-04-19 14:40:57 +00001864/* -----------------------------------------------------------
1865 CONDITION VARIABLES
1866 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00001867
sewardj6072c362002-04-19 14:40:57 +00001868/* The relevant native types are as follows:
1869 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00001870
sewardj6072c362002-04-19 14:40:57 +00001871 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
1872 typedef struct
1873 {
1874 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
1875 _pthread_descr __c_waiting; -- Threads waiting on this condition
1876 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00001877
sewardj6072c362002-04-19 14:40:57 +00001878 -- Attribute for conditionally variables.
1879 typedef struct
1880 {
1881 int __dummy;
1882 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00001883
sewardj6072c362002-04-19 14:40:57 +00001884 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00001885
sewardj3b5d8862002-04-20 13:53:23 +00001886 We don't use any fields of pthread_cond_t for anything at all.
1887 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00001888
1889 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00001890 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00001891
sewardj77e466c2002-04-14 02:29:29 +00001892
sewardj3b5d8862002-04-20 13:53:23 +00001893static
1894void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
1895 Int n_to_release,
1896 Char* caller )
1897{
1898 Int i;
1899 Char msg_buf[100];
1900 pthread_mutex_t* mx;
1901
1902 while (True) {
1903 if (n_to_release == 0)
1904 return;
1905
1906 /* Find a thread waiting on this CV. */
1907 for (i = 1; i < VG_N_THREADS; i++) {
1908 if (vg_threads[i].status == VgTs_Empty)
1909 continue;
1910 if (vg_threads[i].status == VgTs_WaitCV
1911 && vg_threads[i].associated_cv == cond)
1912 break;
1913 }
1914 vg_assert(i <= VG_N_THREADS);
1915
1916 if (i == VG_N_THREADS) {
1917 /* Nobody else is waiting on it. */
1918 return;
1919 }
1920
1921 mx = vg_threads[i].associated_mx;
1922 vg_assert(mx != NULL);
sewardj05553872002-04-20 20:53:17 +00001923 vg_assert(mx->__m_count > 0);
1924 vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
sewardj3b5d8862002-04-20 13:53:23 +00001925
1926 if (mx->__m_owner == VG_INVALID_THREADID) {
1927 /* Currently unheld; hand it out to thread i. */
1928 vg_assert(mx->__m_count == 0);
1929 vg_threads[i].status = VgTs_Runnable;
1930 vg_threads[i].associated_cv = NULL;
1931 vg_threads[i].associated_mx = NULL;
1932 mx->__m_owner = (_pthread_descr)i;
1933 mx->__m_count = 1;
1934 vg_threads[i].m_edx = 0; /* pthread_cond_wait returns success */
1935
1936 if (VG_(clo_trace_pthread_level) >= 1) {
1937 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
1938 caller, cond, mx );
1939 print_pthread_event(i, msg_buf);
1940 }
1941
1942 } else {
1943 /* Currently held. Make thread i be blocked on it. */
1944 vg_threads[i].status = VgTs_WaitMX;
1945 vg_threads[i].associated_cv = NULL;
1946 vg_threads[i].associated_mx = mx;
1947
1948 if (VG_(clo_trace_pthread_level) >= 1) {
1949 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
1950 caller, cond, mx );
1951 print_pthread_event(i, msg_buf);
1952 }
1953
1954 }
1955
1956 n_to_release--;
1957 }
1958}
1959
1960
1961static
1962void do_pthread_cond_wait ( ThreadId tid,
1963 pthread_cond_t *cond,
1964 pthread_mutex_t *mutex )
1965{
1966 Char msg_buf[100];
1967
1968 /* pre: mutex should be a valid mutex and owned by tid. */
1969 if (VG_(clo_trace_pthread_level) >= 2) {
1970 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p ...",
1971 cond, mutex );
1972 print_pthread_event(tid, msg_buf);
1973 }
1974
1975 /* Paranoia ... */
1976 vg_assert(is_valid_tid(tid)
1977 && vg_threads[tid].status == VgTs_Runnable);
1978
1979 if (mutex == NULL || cond == NULL) {
1980 vg_threads[tid].m_edx = EINVAL;
1981 return;
1982 }
1983
1984 /* More paranoia ... */
1985 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001986# ifndef GLIBC_2_1
sewardj3b5d8862002-04-20 13:53:23 +00001987 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001988 case PTHREAD_MUTEX_ADAPTIVE_NP:
1989# endif
sewardj3b5d8862002-04-20 13:53:23 +00001990 case PTHREAD_MUTEX_RECURSIVE_NP:
1991 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj3b5d8862002-04-20 13:53:23 +00001992 if (mutex->__m_count >= 0) break;
1993 /* else fall thru */
1994 default:
1995 vg_threads[tid].m_edx = EINVAL;
1996 return;
1997 }
1998
1999 /* Barf if we don't currently hold the mutex. */
2000 if (mutex->__m_count == 0 /* nobody holds it */
2001 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
2002 vg_threads[tid].m_edx = EINVAL;
2003 return;
2004 }
2005
2006 /* Queue ourselves on the condition. */
2007 vg_threads[tid].status = VgTs_WaitCV;
2008 vg_threads[tid].associated_cv = cond;
2009 vg_threads[tid].associated_mx = mutex;
2010
2011 if (VG_(clo_trace_pthread_level) >= 1) {
2012 VG_(sprintf)(msg_buf,
2013 "pthread_cond_wait cv %p, mx %p: BLOCK",
2014 cond, mutex );
2015 print_pthread_event(tid, msg_buf);
2016 }
2017
2018 /* Release the mutex. */
2019 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
2020}
2021
2022
2023static
2024void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2025 Bool broadcast,
2026 pthread_cond_t *cond )
2027{
2028 Char msg_buf[100];
2029 Char* caller
2030 = broadcast ? "pthread_cond_broadcast"
2031 : "pthread_cond_signal ";
2032
2033 if (VG_(clo_trace_pthread_level) >= 2) {
2034 VG_(sprintf)(msg_buf, "%s cv %p ...",
2035 caller, cond );
2036 print_pthread_event(tid, msg_buf);
2037 }
2038
2039 /* Paranoia ... */
2040 vg_assert(is_valid_tid(tid)
2041 && vg_threads[tid].status == VgTs_Runnable);
2042
2043 if (cond == NULL) {
2044 vg_threads[tid].m_edx = EINVAL;
2045 return;
2046 }
2047
2048 release_N_threads_waiting_on_cond (
2049 cond,
2050 broadcast ? VG_N_THREADS : 1,
2051 caller
2052 );
2053
2054 vg_threads[tid].m_edx = 0; /* success */
2055}
2056
sewardj77e466c2002-04-14 02:29:29 +00002057
sewardje663cb92002-04-12 10:26:32 +00002058/* ---------------------------------------------------------------------
2059 Handle non-trivial client requests.
2060 ------------------------------------------------------------------ */
2061
2062static
2063void do_nontrivial_clientreq ( ThreadId tid )
2064{
2065 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
2066 UInt req_no = arg[0];
2067 switch (req_no) {
2068
2069 case VG_USERREQ__PTHREAD_CREATE:
2070 do_pthread_create( tid,
2071 (pthread_t*)arg[1],
2072 (pthread_attr_t*)arg[2],
2073 (void*(*)(void*))arg[3],
2074 (void*)arg[4] );
2075 break;
2076
sewardjbc5b99f2002-04-13 00:08:51 +00002077 case VG_USERREQ__PTHREAD_RETURNS:
2078 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00002079 break;
2080
2081 case VG_USERREQ__PTHREAD_JOIN:
2082 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2083 break;
2084
sewardje663cb92002-04-12 10:26:32 +00002085 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
sewardj30671ff2002-04-21 00:13:57 +00002086 do_pthread_mutex_lock( tid, False, (pthread_mutex_t *)(arg[1]) );
2087 break;
2088
2089 case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK:
2090 do_pthread_mutex_lock( tid, True, (pthread_mutex_t *)(arg[1]) );
sewardje663cb92002-04-12 10:26:32 +00002091 break;
2092
2093 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
2094 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
2095 break;
2096
sewardje663cb92002-04-12 10:26:32 +00002097 case VG_USERREQ__PTHREAD_CANCEL:
2098 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
2099 break;
2100
sewardj3b5d8862002-04-20 13:53:23 +00002101 case VG_USERREQ__PTHREAD_EXIT:
2102 do_pthread_exit( tid, (void*)(arg[1]) );
2103 break;
2104
2105 case VG_USERREQ__PTHREAD_COND_WAIT:
2106 do_pthread_cond_wait( tid,
2107 (pthread_cond_t *)(arg[1]),
2108 (pthread_mutex_t *)(arg[2]) );
2109 break;
2110
2111 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2112 do_pthread_cond_signal_or_broadcast(
2113 tid,
2114 False, /* signal, not broadcast */
2115 (pthread_cond_t *)(arg[1]) );
2116 break;
2117
2118 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2119 do_pthread_cond_signal_or_broadcast(
2120 tid,
2121 True, /* broadcast, not signal */
2122 (pthread_cond_t *)(arg[1]) );
2123 break;
2124
sewardje663cb92002-04-12 10:26:32 +00002125 case VG_USERREQ__MAKE_NOACCESS:
2126 case VG_USERREQ__MAKE_WRITABLE:
2127 case VG_USERREQ__MAKE_READABLE:
2128 case VG_USERREQ__DISCARD:
2129 case VG_USERREQ__CHECK_WRITABLE:
2130 case VG_USERREQ__CHECK_READABLE:
2131 case VG_USERREQ__MAKE_NOACCESS_STACK:
2132 case VG_USERREQ__RUNNING_ON_VALGRIND:
2133 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00002134 vg_threads[tid].m_edx
2135 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00002136 break;
2137
sewardj77e466c2002-04-14 02:29:29 +00002138 case VG_USERREQ__SIGNAL_RETURNS:
2139 handle_signal_return(tid);
2140 break;
sewardj54cacf02002-04-12 23:24:59 +00002141
sewardje663cb92002-04-12 10:26:32 +00002142 default:
2143 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2144 VG_(panic)("handle_private_client_pthread_request: "
2145 "unknown request");
2146 /*NOTREACHED*/
2147 break;
2148 }
2149}
2150
2151
sewardj6072c362002-04-19 14:40:57 +00002152/* ---------------------------------------------------------------------
2153 Sanity checking.
2154 ------------------------------------------------------------------ */
2155
2156/* Internal consistency checks on the sched/pthread structures. */
2157static
2158void scheduler_sanity ( void )
2159{
sewardj3b5d8862002-04-20 13:53:23 +00002160 pthread_mutex_t* mx;
2161 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002162 Int i;
2163 /* VG_(printf)("scheduler_sanity\n"); */
2164 for (i = 1; i < VG_N_THREADS; i++) {
sewardj3b5d8862002-04-20 13:53:23 +00002165 mx = vg_threads[i].associated_mx;
2166 cv = vg_threads[i].associated_cv;
sewardj6072c362002-04-19 14:40:57 +00002167 if (vg_threads[i].status == VgTs_WaitMX) {
sewardj05553872002-04-20 20:53:17 +00002168 /* If we're waiting on a MX: (1) the mx is not null, (2, 3)
2169 it's actually held by someone, since otherwise this thread
2170 is deadlocked, (4) the mutex's owner is not us, since
2171 otherwise this thread is also deadlocked. The logic in
2172 do_pthread_mutex_lock rejects attempts by a thread to lock
2173 a (non-recursive) mutex which it already owns.
2174
2175 (2) has been seen to fail sometimes. I don't know why.
2176 Possibly to do with signals. */
sewardj3b5d8862002-04-20 13:53:23 +00002177 vg_assert(cv == NULL);
sewardj05553872002-04-20 20:53:17 +00002178 /* 1 */ vg_assert(mx != NULL);
2179 /* 2 */ vg_assert(mx->__m_count > 0);
2180 /* 3 */ vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
2181 /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner);
sewardj3b5d8862002-04-20 13:53:23 +00002182 } else
2183 if (vg_threads[i].status == VgTs_WaitCV) {
2184 vg_assert(cv != NULL);
2185 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002186 } else {
sewardj05553872002-04-20 20:53:17 +00002187 /* Unfortunately these don't hold true when a sighandler is
2188 running. To be fixed. */
2189 /* vg_assert(cv == NULL); */
2190 /* vg_assert(mx == NULL); */
sewardj6072c362002-04-19 14:40:57 +00002191 }
2192 }
2193}
2194
2195
sewardje663cb92002-04-12 10:26:32 +00002196/*--------------------------------------------------------------------*/
2197/*--- end vg_scheduler.c ---*/
2198/*--------------------------------------------------------------------*/