blob: 32201b93839a26886f4989d35e2a0c5cf82f0795 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardj6072c362002-04-19 14:40:57 +000062- Fix signals properly, so that each thread has its own blocking mask.
63 Currently this isn't done, and (worse?) signals are delivered to
64 Thread 1 (the root thread) regardless.
65
66 So, what's the deal with signals and mutexes? If a thread is
67 blocked on a mutex, or for a condition variable for that matter, can
68 signals still be delivered to it? This has serious consequences --
69 deadlocks, etc.
70
sewardje462e202002-04-13 04:09:07 +000071*/
sewardje663cb92002-04-12 10:26:32 +000072
73
74/* ---------------------------------------------------------------------
75 Types and globals for the scheduler.
76 ------------------------------------------------------------------ */
77
78/* type ThreadId is defined in vg_include.h. */
79
80/* struct ThreadState is defined in vg_include.h. */
81
sewardj6072c362002-04-19 14:40:57 +000082/* Private globals. A statically allocated array of threads. NOTE:
83 [0] is never used, to simplify the simulation of initialisers for
84 LinuxThreads. */
sewardje663cb92002-04-12 10:26:32 +000085static ThreadState vg_threads[VG_N_THREADS];
86
sewardj1e8cdc92002-04-18 11:37:52 +000087/* The tid of the thread currently in VG_(baseBlock). */
88static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
89
sewardje663cb92002-04-12 10:26:32 +000090
91/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
92jmp_buf VG_(scheduler_jmpbuf);
93/* ... and if so, here's the signal which caused it to do so. */
94Int VG_(longjmpd_on_signal);
95
96
97/* Machinery to keep track of which threads are waiting on which
98 fds. */
99typedef
100 struct {
101 /* The thread which made the request. */
102 ThreadId tid;
103
104 /* The next two fields describe the request. */
105 /* File descriptor waited for. -1 means this slot is not in use */
106 Int fd;
107 /* The syscall number the fd is used in. */
108 Int syscall_no;
109
110 /* False => still waiting for select to tell us the fd is ready
111 to go. True => the fd is ready, but the results have not yet
112 been delivered back to the calling thread. Once the latter
113 happens, this entire record is marked as no longer in use, by
114 making the fd field be -1. */
115 Bool ready;
116 }
117 VgWaitedOnFd;
118
119static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
120
121
sewardj5f07b662002-04-23 16:52:51 +0000122/* Keeping track of keys. */
123typedef
124 struct {
125 /* Has this key been allocated ? */
126 Bool inuse;
127 /* If .inuse==True, records the address of the associated
128 destructor, or NULL if none. */
129 void (*destructor)(void*);
130 }
131 ThreadKeyState;
132
133/* And our array of thread keys. */
134static ThreadKeyState vg_thread_keys[VG_N_THREAD_KEYS];
135
136typedef UInt ThreadKey;
137
138
sewardje663cb92002-04-12 10:26:32 +0000139/* Forwards */
sewardj5f07b662002-04-23 16:52:51 +0000140static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
141
sewardje663cb92002-04-12 10:26:32 +0000142static void do_nontrivial_clientreq ( ThreadId tid );
143
sewardj6072c362002-04-19 14:40:57 +0000144static void scheduler_sanity ( void );
145
sewardje663cb92002-04-12 10:26:32 +0000146
147/* ---------------------------------------------------------------------
148 Helper functions for the scheduler.
149 ------------------------------------------------------------------ */
150
sewardj604ec3c2002-04-18 22:38:41 +0000151static __inline__
152Bool is_valid_tid ( ThreadId tid )
153{
154 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000155 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000156 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000157 return True;
158}
159
160
sewardj1e8cdc92002-04-18 11:37:52 +0000161/* For constructing error messages only: try and identify a thread
162 whose stack this address currently falls within, or return
163 VG_INVALID_THREADID if it doesn't. A small complication is dealing
164 with any currently VG_(baseBlock)-resident thread.
165*/
166ThreadId VG_(identify_stack_addr)( Addr a )
167{
168 ThreadId tid, tid_to_skip;
169
170 tid_to_skip = VG_INVALID_THREADID;
171
172 /* First check to see if there's a currently-loaded thread in
173 VG_(baseBlock). */
174 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
175 tid = vg_tid_currently_in_baseBlock;
176 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
177 && a <= vg_threads[tid].stack_highest_word)
178 return tid;
179 else
180 tid_to_skip = tid;
181 }
182
sewardj6072c362002-04-19 14:40:57 +0000183 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj1e8cdc92002-04-18 11:37:52 +0000184 if (vg_threads[tid].status == VgTs_Empty) continue;
185 if (tid == tid_to_skip) continue;
186 if (vg_threads[tid].m_esp <= a
187 && a <= vg_threads[tid].stack_highest_word)
188 return tid;
189 }
190 return VG_INVALID_THREADID;
191}
192
193
sewardj15a43e12002-04-17 19:35:12 +0000194/* Print the scheduler status. */
195void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000196{
197 Int i;
198 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000199 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000200 if (vg_threads[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000201 VG_(printf)("\nThread %d: status = ", i);
sewardje663cb92002-04-12 10:26:32 +0000202 switch (vg_threads[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000203 case VgTs_Runnable: VG_(printf)("Runnable"); break;
204 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
205 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardje663cb92002-04-12 10:26:32 +0000206 vg_threads[i].joiner); break;
sewardj6072c362002-04-19 14:40:57 +0000207 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
208 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
209 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000210 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardje663cb92002-04-12 10:26:32 +0000211 default: VG_(printf)("???"); break;
212 }
sewardj3b5d8862002-04-20 13:53:23 +0000213 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
214 vg_threads[i].associated_mx,
215 vg_threads[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000216 VG_(pp_ExeContext)(
217 VG_(get_ExeContext)( False, vg_threads[i].m_eip,
218 vg_threads[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000219 }
220 VG_(printf)("\n");
221}
222
223static
224void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
225{
226 Int i;
227
228 vg_assert(fd != -1); /* avoid total chaos */
229
230 for (i = 0; i < VG_N_WAITING_FDS; i++)
231 if (vg_waiting_fds[i].fd == -1)
232 break;
233
234 if (i == VG_N_WAITING_FDS)
235 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
236 /*
237 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
238 tid, fd, i);
239 */
240 vg_waiting_fds[i].fd = fd;
241 vg_waiting_fds[i].tid = tid;
242 vg_waiting_fds[i].ready = False;
243 vg_waiting_fds[i].syscall_no = syscall_no;
244}
245
246
247
248static
249void print_sched_event ( ThreadId tid, Char* what )
250{
sewardj45b4b372002-04-16 22:50:32 +0000251 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000252}
253
254
255static
256void print_pthread_event ( ThreadId tid, Char* what )
257{
258 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000259}
260
261
262static
263Char* name_of_sched_event ( UInt event )
264{
265 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000266 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
267 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
268 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
269 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
270 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
271 default: return "??UNKNOWN??";
272 }
273}
274
275
276/* Create a translation of the client basic block beginning at
277 orig_addr, and add it to the translation cache & translation table.
278 This probably doesn't really belong here, but, hey ...
279*/
sewardj1e8cdc92002-04-18 11:37:52 +0000280static
281void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000282{
283 Addr trans_addr;
284 TTEntry tte;
285 Int orig_size, trans_size;
286 /* Ensure there is space to hold a translation. */
287 VG_(maybe_do_lru_pass)();
sewardj1e8cdc92002-04-18 11:37:52 +0000288 VG_(translate)( &vg_threads[tid],
289 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000290 /* Copy data at trans_addr into the translation cache.
291 Returned pointer is to the code, not to the 4-byte
292 header. */
293 /* Since the .orig_size and .trans_size fields are
294 UShort, be paranoid. */
295 vg_assert(orig_size > 0 && orig_size < 65536);
296 vg_assert(trans_size > 0 && trans_size < 65536);
297 tte.orig_size = orig_size;
298 tte.orig_addr = orig_addr;
299 tte.trans_size = trans_size;
300 tte.trans_addr = VG_(copy_to_transcache)
301 ( trans_addr, trans_size );
302 tte.mru_epoch = VG_(current_epoch);
303 /* Free the intermediary -- was allocated by VG_(emit_code). */
304 VG_(jitfree)( (void*)trans_addr );
305 /* Add to trans tab and set back pointer. */
306 VG_(add_to_trans_tab) ( &tte );
307 /* Update stats. */
308 VG_(this_epoch_in_count) ++;
309 VG_(this_epoch_in_osize) += orig_size;
310 VG_(this_epoch_in_tsize) += trans_size;
311 VG_(overall_in_count) ++;
312 VG_(overall_in_osize) += orig_size;
313 VG_(overall_in_tsize) += trans_size;
314 /* Record translated area for SMC detection. */
315 VG_(smc_mark_original) ( orig_addr, orig_size );
316}
317
318
319/* Allocate a completely empty ThreadState record. */
320static
321ThreadId vg_alloc_ThreadState ( void )
322{
323 Int i;
sewardj6072c362002-04-19 14:40:57 +0000324 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000325 if (vg_threads[i].status == VgTs_Empty)
326 return i;
327 }
328 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
329 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
330 VG_(panic)("VG_N_THREADS is too low");
331 /*NOTREACHED*/
332}
333
334
335ThreadState* VG_(get_thread_state) ( ThreadId tid )
336{
sewardj6072c362002-04-19 14:40:57 +0000337 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000338 vg_assert(vg_threads[tid].status != VgTs_Empty);
339 return & vg_threads[tid];
340}
341
342
sewardj1e8cdc92002-04-18 11:37:52 +0000343ThreadState* VG_(get_current_thread_state) ( void )
344{
345 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj6072c362002-04-19 14:40:57 +0000346 return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj1e8cdc92002-04-18 11:37:52 +0000347}
348
349
350ThreadId VG_(get_current_tid) ( void )
351{
352 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
353 return vg_tid_currently_in_baseBlock;
354}
355
356
sewardje663cb92002-04-12 10:26:32 +0000357/* Copy the saved state of a thread into VG_(baseBlock), ready for it
358 to be run. */
359__inline__
360void VG_(load_thread_state) ( ThreadId tid )
361{
362 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000363 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
364
sewardje663cb92002-04-12 10:26:32 +0000365 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
366 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
367 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
368 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
369 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
370 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
371 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
372 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
373 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
374 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
375
376 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
377 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
378
379 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
380 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
381 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
382 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
383 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
384 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
385 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
386 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
387 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000388
389 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000390}
391
392
393/* Copy the state of a thread from VG_(baseBlock), presumably after it
394 has been descheduled. For sanity-check purposes, fill the vacated
395 VG_(baseBlock) with garbage so as to make the system more likely to
396 fail quickly if we erroneously continue to poke around inside
397 VG_(baseBlock) without first doing a load_thread_state().
398*/
399__inline__
400void VG_(save_thread_state) ( ThreadId tid )
401{
402 Int i;
403 const UInt junk = 0xDEADBEEF;
404
sewardj1e8cdc92002-04-18 11:37:52 +0000405 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
406
sewardje663cb92002-04-12 10:26:32 +0000407 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
408 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
409 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
410 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
411 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
412 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
413 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
414 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
415 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
416 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
417
418 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
419 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
420
421 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
422 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
423 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
424 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
425 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
426 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
427 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
428 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
429 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
430
431 /* Fill it up with junk. */
432 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
433 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
434 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
435 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
436 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
437 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
438 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
439 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
440 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
441 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
442
443 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
444 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000445
446 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000447}
448
449
450/* Run the thread tid for a while, and return a VG_TRC_* value to the
451 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000452static
sewardje663cb92002-04-12 10:26:32 +0000453UInt run_thread_for_a_while ( ThreadId tid )
454{
455 UInt trc = 0;
sewardj6072c362002-04-19 14:40:57 +0000456 vg_assert(is_valid_tid(tid));
457 vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000458 vg_assert(VG_(bbs_to_go) > 0);
459
460 VG_(load_thread_state) ( tid );
461 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
462 /* try this ... */
463 trc = VG_(run_innerloop)();
464 /* We get here if the client didn't take a fault. */
465 } else {
466 /* We get here if the client took a fault, which caused our
467 signal handler to longjmp. */
468 vg_assert(trc == 0);
469 trc = VG_TRC_UNRESUMABLE_SIGNAL;
470 }
471 VG_(save_thread_state) ( tid );
472 return trc;
473}
474
475
476/* Increment the LRU epoch counter. */
477static
478void increment_epoch ( void )
479{
480 VG_(current_epoch)++;
481 if (VG_(clo_verbosity) > 2) {
482 UInt tt_used, tc_used;
483 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
484 VG_(message)(Vg_UserMsg,
485 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
486 VG_(bbs_done),
487 VG_(this_epoch_in_count),
488 VG_(this_epoch_in_osize),
489 VG_(this_epoch_in_tsize),
490 VG_(this_epoch_out_count),
491 VG_(this_epoch_out_osize),
492 VG_(this_epoch_out_tsize),
493 tt_used, tc_used
494 );
495 }
496 VG_(this_epoch_in_count) = 0;
497 VG_(this_epoch_in_osize) = 0;
498 VG_(this_epoch_in_tsize) = 0;
499 VG_(this_epoch_out_count) = 0;
500 VG_(this_epoch_out_osize) = 0;
501 VG_(this_epoch_out_tsize) = 0;
502}
503
504
505/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000506 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000507 caller takes care to park the client's state is parked in
508 VG_(baseBlock).
509*/
510void VG_(scheduler_init) ( void )
511{
512 Int i;
513 Addr startup_esp;
514 ThreadId tid_main;
515
516 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
517 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000518 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
519 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000520 VG_(panic)("unexpected %esp at startup");
521 }
522
sewardj6072c362002-04-19 14:40:57 +0000523 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
524 vg_threads[i].status = VgTs_Empty;
sewardje663cb92002-04-12 10:26:32 +0000525 vg_threads[i].stack_size = 0;
526 vg_threads[i].stack_base = (Addr)NULL;
sewardj1e8cdc92002-04-18 11:37:52 +0000527 vg_threads[i].tid = i;
sewardje663cb92002-04-12 10:26:32 +0000528 }
529
530 for (i = 0; i < VG_N_WAITING_FDS; i++)
531 vg_waiting_fds[i].fd = -1; /* not in use */
532
sewardj5f07b662002-04-23 16:52:51 +0000533 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
534 vg_thread_keys[i].inuse = False;
535 vg_thread_keys[i].destructor = NULL;
536 }
537
sewardje663cb92002-04-12 10:26:32 +0000538 /* Assert this is thread zero, which has certain magic
539 properties. */
540 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000541 vg_assert(tid_main == 1);
sewardje663cb92002-04-12 10:26:32 +0000542
sewardj3b5d8862002-04-20 13:53:23 +0000543 vg_threads[tid_main].status = VgTs_Runnable;
544 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
545 vg_threads[tid_main].associated_mx = NULL;
546 vg_threads[tid_main].associated_cv = NULL;
547 vg_threads[tid_main].retval = NULL; /* not important */
sewardj1e8cdc92002-04-18 11:37:52 +0000548 vg_threads[tid_main].stack_highest_word
549 = vg_threads[tid_main].m_esp /* -4 ??? */;
sewardj5f07b662002-04-23 16:52:51 +0000550 for (i = 0; i < VG_N_THREAD_KEYS; i++)
551 vg_threads[tid_main].specifics[i] = NULL;
sewardje663cb92002-04-12 10:26:32 +0000552
553 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000554 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000555 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000556
557 /* So now ... */
558 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000559}
560
561
562/* What if fd isn't a valid fd? */
563static
564void set_fd_nonblocking ( Int fd )
565{
566 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
567 vg_assert(!VG_(is_kerror)(res));
568 res |= VKI_O_NONBLOCK;
569 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
570 vg_assert(!VG_(is_kerror)(res));
571}
572
573static
574void set_fd_blocking ( Int fd )
575{
576 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
577 vg_assert(!VG_(is_kerror)(res));
578 res &= ~VKI_O_NONBLOCK;
579 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
580 vg_assert(!VG_(is_kerror)(res));
581}
582
583static
584Bool fd_is_blockful ( Int fd )
585{
586 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
587 vg_assert(!VG_(is_kerror)(res));
588 return (res & VKI_O_NONBLOCK) ? False : True;
589}
590
591
592
593/* Do a purely thread-local request for tid, and put the result in its
594 %EDX, without changing its scheduling state in any way, nor that of
595 any other threads. Return True if so.
596
597 If the request is non-trivial, return False; a more capable but
598 slower mechanism will deal with it.
599*/
600static
601Bool maybe_do_trivial_clientreq ( ThreadId tid )
602{
603# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000604 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000605 return True; \
606 }
607
sewardj8c824512002-04-14 04:16:48 +0000608 ThreadState* tst = &vg_threads[tid];
609 UInt* arg = (UInt*)(tst->m_eax);
610 UInt req_no = arg[0];
611
sewardje663cb92002-04-12 10:26:32 +0000612 switch (req_no) {
613 case VG_USERREQ__MALLOC:
614 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000615 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000616 );
617 case VG_USERREQ__BUILTIN_NEW:
618 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000619 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000620 );
621 case VG_USERREQ__BUILTIN_VEC_NEW:
622 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000623 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000624 );
625 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000626 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000627 SIMPLE_RETURN(0); /* irrelevant */
628 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000629 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000630 SIMPLE_RETURN(0); /* irrelevant */
631 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000632 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000633 SIMPLE_RETURN(0); /* irrelevant */
634 case VG_USERREQ__CALLOC:
635 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000636 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000637 );
638 case VG_USERREQ__REALLOC:
639 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000640 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000641 );
642 case VG_USERREQ__MEMALIGN:
643 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000644 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000645 );
sewardj9650c992002-04-16 03:44:31 +0000646
sewardj5f07b662002-04-23 16:52:51 +0000647 /* These are heavily used -- or at least we want them to be
648 cheap. */
sewardj9650c992002-04-16 03:44:31 +0000649 case VG_USERREQ__PTHREAD_GET_THREADID:
650 SIMPLE_RETURN(tid);
651 case VG_USERREQ__RUNNING_ON_VALGRIND:
652 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000653 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
654 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj5f07b662002-04-23 16:52:51 +0000655 case VG_USERREQ__READ_MILLISECOND_TIMER:
656 SIMPLE_RETURN(VG_(read_millisecond_timer)());
sewardj9650c992002-04-16 03:44:31 +0000657
sewardje663cb92002-04-12 10:26:32 +0000658 default:
659 /* Too hard; wimp out. */
660 return False;
661 }
662# undef SIMPLE_RETURN
663}
664
665
sewardj6072c362002-04-19 14:40:57 +0000666/* vthread tid is returning from a signal handler; modify its
667 stack/regs accordingly. */
668static
669void handle_signal_return ( ThreadId tid )
670{
671 Char msg_buf[100];
672 Bool restart_blocked_syscalls;
673
674 vg_assert(is_valid_tid(tid));
675
676 restart_blocked_syscalls = VG_(signal_returns)(tid);
677
678 if (restart_blocked_syscalls)
679 /* Easy; we don't have to do anything. */
680 return;
681
682 if (vg_threads[tid].status == VgTs_WaitFD) {
683 vg_assert(vg_threads[tid].m_eax == __NR_read
684 || vg_threads[tid].m_eax == __NR_write);
685 /* read() or write() interrupted. Force a return with EINTR. */
686 vg_threads[tid].m_eax = -VKI_EINTR;
687 vg_threads[tid].status = VgTs_Runnable;
688 if (VG_(clo_trace_sched)) {
689 VG_(sprintf)(msg_buf,
690 "read() / write() interrupted by signal; return EINTR" );
691 print_sched_event(tid, msg_buf);
692 }
693 return;
694 }
695
696 if (vg_threads[tid].status == VgTs_WaitFD) {
697 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
698 /* We interrupted a nanosleep(). The right thing to do is to
699 write the unused time to nanosleep's second param and return
700 EINTR, but I'm too lazy for that. */
701 return;
702 }
703
704 /* All other cases? Just return. */
705}
706
707
sewardje663cb92002-04-12 10:26:32 +0000708static
709void sched_do_syscall ( ThreadId tid )
710{
711 UInt saved_eax;
712 UInt res, syscall_no;
713 UInt fd;
714 Bool might_block, assumed_nonblocking;
715 Bool orig_fd_blockness;
716 Char msg_buf[100];
717
sewardj6072c362002-04-19 14:40:57 +0000718 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000719 vg_assert(vg_threads[tid].status == VgTs_Runnable);
720
721 syscall_no = vg_threads[tid].m_eax; /* syscall number */
722
723 if (syscall_no == __NR_nanosleep) {
sewardj5f07b662002-04-23 16:52:51 +0000724 UInt t_now, t_awaken;
sewardje663cb92002-04-12 10:26:32 +0000725 struct vki_timespec* req;
726 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
sewardj5f07b662002-04-23 16:52:51 +0000727 t_now = VG_(read_millisecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000728 t_awaken
729 = t_now
sewardj5f07b662002-04-23 16:52:51 +0000730 + (UInt)1000ULL * (UInt)(req->tv_sec)
731 + (UInt)(req->tv_nsec) / 1000000;
sewardje663cb92002-04-12 10:26:32 +0000732 vg_threads[tid].status = VgTs_Sleeping;
733 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000734 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000735 VG_(sprintf)(msg_buf, "at %d: nanosleep for %d",
sewardje663cb92002-04-12 10:26:32 +0000736 t_now, t_awaken-t_now);
737 print_sched_event(tid, msg_buf);
738 }
739 /* Force the scheduler to run something else for a while. */
740 return;
741 }
742
743 switch (syscall_no) {
744 case __NR_read:
745 case __NR_write:
746 assumed_nonblocking
747 = False;
748 might_block
749 = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */);
750 break;
751 default:
752 might_block = False;
753 assumed_nonblocking = True;
754 }
755
756 if (assumed_nonblocking) {
757 /* We think it's non-blocking. Just do it in the normal way. */
758 VG_(perform_assumed_nonblocking_syscall)(tid);
759 /* The thread is still runnable. */
760 return;
761 }
762
763 /* It might block. Take evasive action. */
764 switch (syscall_no) {
765 case __NR_read:
766 case __NR_write:
767 fd = vg_threads[tid].m_ebx; break;
768 default:
769 vg_assert(3+3 == 7);
770 }
771
772 /* Set the fd to nonblocking, and do the syscall, which will return
773 immediately, in order to lodge a request with the Linux kernel.
774 We later poll for I/O completion using select(). */
775
776 orig_fd_blockness = fd_is_blockful(fd);
777 set_fd_nonblocking(fd);
778 vg_assert(!fd_is_blockful(fd));
779 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
780
781 /* This trashes the thread's %eax; we have to preserve it. */
782 saved_eax = vg_threads[tid].m_eax;
783 KERNEL_DO_SYSCALL(tid,res);
784
785 /* Restore original blockfulness of the fd. */
786 if (orig_fd_blockness)
787 set_fd_blocking(fd);
788 else
789 set_fd_nonblocking(fd);
790
791 if (res != -VKI_EWOULDBLOCK) {
792 /* It didn't block; it went through immediately. So finish off
793 in the normal way. Don't restore %EAX, since that now
794 (correctly) holds the result of the call. */
795 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
796 /* We're still runnable. */
797 vg_assert(vg_threads[tid].status == VgTs_Runnable);
798
799 } else {
800
801 /* It would have blocked. First, restore %EAX to what it was
802 before our speculative call. */
803 vg_threads[tid].m_eax = saved_eax;
804 /* Put this fd in a table of fds on which we are waiting for
805 completion. The arguments for select() later are constructed
806 from this table. */
807 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
808 /* Deschedule thread until an I/O completion happens. */
809 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000810 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000811 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
812 print_sched_event(tid, msg_buf);
813 }
814
815 }
816}
817
818
819/* Find out which of the fds in vg_waiting_fds are now ready to go, by
820 making enquiries with select(), and mark them as ready. We have to
821 wait for the requesting threads to fall into the the WaitFD state
822 before we can actually finally deliver the results, so this
823 procedure doesn't do that; complete_blocked_syscalls() does it.
824
825 It might seem odd that a thread which has done a blocking syscall
826 is not in WaitFD state; the way this can happen is if it initially
827 becomes WaitFD, but then a signal is delivered to it, so it becomes
828 Runnable for a while. In this case we have to wait for the
829 sighandler to return, whereupon the WaitFD state is resumed, and
830 only at that point can the I/O result be delivered to it. However,
831 this point may be long after the fd is actually ready.
832
833 So, poll_for_ready_fds() merely detects fds which are ready.
834 complete_blocked_syscalls() does the second half of the trick,
835 possibly much later: it delivers the results from ready fds to
836 threads in WaitFD state.
837*/
sewardj9a199dc2002-04-14 13:01:38 +0000838static
sewardje663cb92002-04-12 10:26:32 +0000839void poll_for_ready_fds ( void )
840{
841 vki_ksigset_t saved_procmask;
842 vki_fd_set readfds;
843 vki_fd_set writefds;
844 vki_fd_set exceptfds;
845 struct vki_timeval timeout;
846 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
847 ThreadId tid;
848 Bool rd_ok, wr_ok, ex_ok;
849 Char msg_buf[100];
850
sewardje462e202002-04-13 04:09:07 +0000851 struct vki_timespec* rem;
sewardj5f07b662002-04-23 16:52:51 +0000852 UInt t_now;
sewardje462e202002-04-13 04:09:07 +0000853
sewardje663cb92002-04-12 10:26:32 +0000854 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000855 for (tid = 1; tid < VG_N_THREADS; tid++)
856 if (vg_threads[tid].status == VgTs_Sleeping)
857 break;
858
sewardj5f07b662002-04-23 16:52:51 +0000859 /* Avoid pointless calls to VG_(read_millisecond_timer). */
sewardj6072c362002-04-19 14:40:57 +0000860 if (tid < VG_N_THREADS) {
sewardj5f07b662002-04-23 16:52:51 +0000861 t_now = VG_(read_millisecond_timer)();
sewardj6072c362002-04-19 14:40:57 +0000862 for (tid = 1; tid < VG_N_THREADS; tid++) {
863 if (vg_threads[tid].status != VgTs_Sleeping)
864 continue;
865 if (t_now >= vg_threads[tid].awaken_at) {
866 /* Resume this thread. Set to zero the remaining-time
867 (second) arg of nanosleep, since it's used up all its
868 time. */
869 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
870 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
871 if (rem != NULL) {
872 rem->tv_sec = 0;
873 rem->tv_nsec = 0;
874 }
875 /* Make the syscall return 0 (success). */
876 vg_threads[tid].m_eax = 0;
877 /* Reschedule this thread. */
878 vg_threads[tid].status = VgTs_Runnable;
879 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000880 VG_(sprintf)(msg_buf, "at %d: nanosleep done",
sewardj6072c362002-04-19 14:40:57 +0000881 t_now);
882 print_sched_event(tid, msg_buf);
883 }
sewardje663cb92002-04-12 10:26:32 +0000884 }
885 }
886 }
sewardje663cb92002-04-12 10:26:32 +0000887
sewardje462e202002-04-13 04:09:07 +0000888 /* And look for threads waiting on file descriptors which are now
889 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000890 timeout.tv_sec = 0;
891 timeout.tv_usec = 0;
892
893 VKI_FD_ZERO(&readfds);
894 VKI_FD_ZERO(&writefds);
895 VKI_FD_ZERO(&exceptfds);
896 fd_max = -1;
897 for (i = 0; i < VG_N_WAITING_FDS; i++) {
898 if (vg_waiting_fds[i].fd == -1 /* not in use */)
899 continue;
900 if (vg_waiting_fds[i].ready /* already ready? */)
901 continue;
902 fd = vg_waiting_fds[i].fd;
903 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000904 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000905 if (fd > fd_max)
906 fd_max = fd;
907 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000908 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000909 syscall_no = vg_waiting_fds[i].syscall_no;
910 switch (syscall_no) {
911 case __NR_read:
912 VKI_FD_SET(fd, &readfds); break;
913 case __NR_write:
914 VKI_FD_SET(fd, &writefds); break;
915 default:
916 VG_(panic)("poll_for_ready_fds: unexpected syscall");
917 /*NOTREACHED*/
918 break;
919 }
920 }
921
sewardje462e202002-04-13 04:09:07 +0000922 /* Short cut: if no fds are waiting, give up now. */
923 if (fd_max == -1)
924 return;
925
sewardje663cb92002-04-12 10:26:32 +0000926 /* BLOCK ALL SIGNALS. We don't want the complication of select()
927 getting interrupted. */
928 VG_(block_all_host_signals)( &saved_procmask );
929
930 n_ready = VG_(select)
931 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
932 if (VG_(is_kerror)(n_ready)) {
933 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
934 VG_(panic)("poll_for_ready_fds: select failed?!");
935 /*NOTREACHED*/
936 }
937
938 /* UNBLOCK ALL SIGNALS */
939 VG_(restore_host_signals)( &saved_procmask );
940
941 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
942
943 if (n_ready == 0)
944 return;
945
946 /* Inspect all the fds we know about, and handle any completions that
947 have happened. */
948 /*
949 VG_(printf)("\n\n");
950 for (fd = 0; fd < 100; fd++)
951 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
952 VG_(printf)("X"); } else { VG_(printf)("."); };
953 VG_(printf)("\n\nfd_max = %d\n", fd_max);
954 */
955
956 for (fd = 0; fd <= fd_max; fd++) {
957 rd_ok = VKI_FD_ISSET(fd, &readfds);
958 wr_ok = VKI_FD_ISSET(fd, &writefds);
959 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
960
961 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
962 if (n_ok == 0)
963 continue;
964 if (n_ok > 1) {
965 VG_(printf)("offending fd = %d\n", fd);
966 VG_(panic)("poll_for_ready_fds: multiple events on fd");
967 }
968
969 /* An I/O event completed for fd. Find the thread which
970 requested this. */
971 for (i = 0; i < VG_N_WAITING_FDS; i++) {
972 if (vg_waiting_fds[i].fd == -1 /* not in use */)
973 continue;
974 if (vg_waiting_fds[i].fd == fd)
975 break;
976 }
977
978 /* And a bit more paranoia ... */
979 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
980
981 /* Mark the fd as ready. */
982 vg_assert(! vg_waiting_fds[i].ready);
983 vg_waiting_fds[i].ready = True;
984 }
985}
986
987
988/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +0000989static
sewardje663cb92002-04-12 10:26:32 +0000990void complete_blocked_syscalls ( void )
991{
992 Int fd, i, res, syscall_no;
993 ThreadId tid;
994 Char msg_buf[100];
995
996 /* Inspect all the outstanding fds we know about. */
997
998 for (i = 0; i < VG_N_WAITING_FDS; i++) {
999 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1000 continue;
1001 if (! vg_waiting_fds[i].ready)
1002 continue;
1003
1004 fd = vg_waiting_fds[i].fd;
1005 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +00001006 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001007
1008 /* The thread actually has to be waiting for the I/O event it
1009 requested before we can deliver the result! */
1010 if (vg_threads[tid].status != VgTs_WaitFD)
1011 continue;
1012
1013 /* Ok, actually do it! We can safely use %EAX as the syscall
1014 number, because the speculative call made by
1015 sched_do_syscall() doesn't change %EAX in the case where the
1016 call would have blocked. */
1017
1018 syscall_no = vg_waiting_fds[i].syscall_no;
1019 vg_assert(syscall_no == vg_threads[tid].m_eax);
1020 KERNEL_DO_SYSCALL(tid,res);
1021 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
1022
1023 /* Reschedule. */
1024 vg_threads[tid].status = VgTs_Runnable;
1025 /* Mark slot as no longer in use. */
1026 vg_waiting_fds[i].fd = -1;
1027 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +00001028 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001029 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1030 print_sched_event(tid, msg_buf);
1031 }
1032 }
1033}
1034
1035
1036static
sewardj5f07b662002-04-23 16:52:51 +00001037void check_for_pthread_cond_timedwait ( void )
1038{
1039 Int i;
1040 for (i = 1; i < VG_N_THREADS; i++) {
1041 if (vg_threads[i].status != VgTs_WaitCV)
1042 continue;
1043 if (vg_threads[i].awaken_at == 0xFFFFFFFF /* no timeout */)
1044 continue;
1045 if (VG_(read_millisecond_timer)() >= vg_threads[i].awaken_at)
1046 do_pthread_cond_timedwait_TIMEOUT(i);
1047 }
1048}
1049
1050
1051static
sewardje663cb92002-04-12 10:26:32 +00001052void nanosleep_for_a_while ( void )
1053{
1054 Int res;
1055 struct vki_timespec req;
1056 struct vki_timespec rem;
1057 req.tv_sec = 0;
sewardj5f07b662002-04-23 16:52:51 +00001058 req.tv_nsec = 50 * 1000 * 1000;
sewardje663cb92002-04-12 10:26:32 +00001059 res = VG_(nanosleep)( &req, &rem );
sewardj5f07b662002-04-23 16:52:51 +00001060 vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
sewardje663cb92002-04-12 10:26:32 +00001061}
1062
1063
1064/* ---------------------------------------------------------------------
1065 The scheduler proper.
1066 ------------------------------------------------------------------ */
1067
1068/* Run user-space threads until either
1069 * Deadlock occurs
1070 * One thread asks to shutdown Valgrind
1071 * The specified number of basic blocks has gone by.
1072*/
1073VgSchedReturnCode VG_(scheduler) ( void )
1074{
1075 ThreadId tid, tid_next;
1076 UInt trc;
1077 UInt dispatch_ctr_SAVED;
sewardj54cacf02002-04-12 23:24:59 +00001078 Int request_code, done_this_time, n_in_fdwait_or_sleep;
sewardje663cb92002-04-12 10:26:32 +00001079 Char msg_buf[100];
1080 Addr trans_addr;
1081
1082 /* For the LRU structures, records when the epoch began. */
1083 ULong lru_epoch_started_at = 0;
1084
1085 /* Start with the root thread. tid in general indicates the
1086 currently runnable/just-finished-running thread. */
sewardj6072c362002-04-19 14:40:57 +00001087 tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001088
1089 /* This is the top level scheduler loop. It falls into three
1090 phases. */
1091 while (True) {
1092
sewardj6072c362002-04-19 14:40:57 +00001093 /* ======================= Phase 0 of 3 =======================
1094 Be paranoid. Always a good idea. */
1095 scheduler_sanity();
1096
sewardje663cb92002-04-12 10:26:32 +00001097 /* ======================= Phase 1 of 3 =======================
1098 Handle I/O completions and signals. This may change the
1099 status of various threads. Then select a new thread to run,
1100 or declare deadlock, or sleep if there are no runnable
1101 threads but some are blocked on I/O. */
1102
1103 /* Age the LRU structures if an epoch has been completed. */
1104 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1105 lru_epoch_started_at = VG_(bbs_done);
1106 increment_epoch();
1107 }
1108
1109 /* Was a debug-stop requested? */
1110 if (VG_(bbs_to_go) == 0)
1111 goto debug_stop;
1112
1113 /* Do the following loop until a runnable thread is found, or
1114 deadlock is detected. */
1115 while (True) {
1116
1117 /* For stats purposes only. */
1118 VG_(num_scheduling_events_MAJOR) ++;
1119
1120 /* See if any I/O operations which we were waiting for have
1121 completed, and, if so, make runnable the relevant waiting
1122 threads. */
1123 poll_for_ready_fds();
1124 complete_blocked_syscalls();
sewardj5f07b662002-04-23 16:52:51 +00001125 check_for_pthread_cond_timedwait();
sewardje663cb92002-04-12 10:26:32 +00001126
1127 /* See if there are any signals which need to be delivered. If
1128 so, choose thread(s) to deliver them to, and build signal
1129 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001130
1131 /* Be careful about delivering signals to a thread waiting
1132 for a mutex. In particular, when the handler is running,
1133 that thread is temporarily apparently-not-waiting for the
1134 mutex, so if it is unlocked by another thread whilst the
1135 handler is running, this thread is not informed. When the
1136 handler returns, the thread resumes waiting on the mutex,
1137 even if, as a result, it has missed the unlocking of it.
1138 Potential deadlock. This sounds all very strange, but the
1139 POSIX standard appears to require this behaviour. */
1140 VG_(deliver_signals)( 1 /*HACK*/ );
1141 VG_(do_sanity_checks)( 1 /*HACK*/, False );
sewardje663cb92002-04-12 10:26:32 +00001142
1143 /* Try and find a thread (tid) to run. */
1144 tid_next = tid;
sewardj54cacf02002-04-12 23:24:59 +00001145 n_in_fdwait_or_sleep = 0;
sewardje663cb92002-04-12 10:26:32 +00001146 while (True) {
1147 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001148 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj54cacf02002-04-12 23:24:59 +00001149 if (vg_threads[tid_next].status == VgTs_WaitFD
1150 || vg_threads[tid_next].status == VgTs_Sleeping)
1151 n_in_fdwait_or_sleep ++;
sewardje663cb92002-04-12 10:26:32 +00001152 if (vg_threads[tid_next].status == VgTs_Runnable)
1153 break; /* We can run this one. */
1154 if (tid_next == tid)
1155 break; /* been all the way round */
1156 }
1157 tid = tid_next;
1158
1159 if (vg_threads[tid].status == VgTs_Runnable) {
1160 /* Found a suitable candidate. Fall out of this loop, so
1161 we can advance to stage 2 of the scheduler: actually
1162 running the thread. */
1163 break;
1164 }
1165
1166 /* We didn't find a runnable thread. Now what? */
sewardj54cacf02002-04-12 23:24:59 +00001167 if (n_in_fdwait_or_sleep == 0) {
1168 /* No runnable threads and no prospect of any appearing
1169 even if we wait for an arbitrary length of time. In
1170 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001171 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001172 return VgSrc_Deadlock;
1173 }
1174
1175 /* At least one thread is in a fd-wait state. Delay for a
1176 while, and go round again, in the hope that eventually a
1177 thread becomes runnable. */
1178 nanosleep_for_a_while();
1179 // pp_sched_status();
1180 // VG_(printf)(".\n");
1181 }
1182
1183
1184 /* ======================= Phase 2 of 3 =======================
1185 Wahey! We've finally decided that thread tid is runnable, so
1186 we now do that. Run it for as much of a quanta as possible.
1187 Trivial requests are handled and the thread continues. The
1188 aim is not to do too many of Phase 1 since it is expensive. */
1189
1190 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001191 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001192
1193 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1194 that it decrements the counter before testing it for zero, so
1195 that if VG_(dispatch_ctr) is set to N you get at most N-1
1196 iterations. Also this means that VG_(dispatch_ctr) must
1197 exceed zero before entering the innerloop. Also also, the
1198 decrement is done before the bb is actually run, so you
1199 always get at least one decrement even if nothing happens.
1200 */
1201 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1202 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1203 else
1204 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1205
1206 /* ... and remember what we asked for. */
1207 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1208
sewardj1e8cdc92002-04-18 11:37:52 +00001209 /* paranoia ... */
1210 vg_assert(vg_threads[tid].tid == tid);
1211
sewardje663cb92002-04-12 10:26:32 +00001212 /* Actually run thread tid. */
1213 while (True) {
1214
1215 /* For stats purposes only. */
1216 VG_(num_scheduling_events_MINOR) ++;
1217
1218 if (0)
1219 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1220 tid, VG_(dispatch_ctr) - 1 );
1221
1222 trc = run_thread_for_a_while ( tid );
1223
1224 /* Deal quickly with trivial scheduling events, and resume the
1225 thread. */
1226
1227 if (trc == VG_TRC_INNER_FASTMISS) {
1228 vg_assert(VG_(dispatch_ctr) > 0);
1229
1230 /* Trivial event. Miss in the fast-cache. Do a full
1231 lookup for it. */
1232 trans_addr
1233 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1234 if (trans_addr == (Addr)0) {
1235 /* Not found; we need to request a translation. */
sewardj1e8cdc92002-04-18 11:37:52 +00001236 create_translation_for( tid, vg_threads[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001237 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1238 if (trans_addr == (Addr)0)
1239 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1240 }
1241 continue; /* with this thread */
1242 }
1243
1244 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
1245 Bool is_triv = maybe_do_trivial_clientreq(tid);
1246 if (is_triv) {
1247 /* NOTE: a trivial request is something like a call to
1248 malloc() or free(). It DOES NOT change the
1249 Runnability of this thread nor the status of any
1250 other thread; it is purely thread-local. */
1251 continue; /* with this thread */
1252 }
1253 }
1254
1255 /* It's a non-trivial event. Give up running this thread and
1256 handle things the expensive way. */
1257 break;
1258 }
1259
1260 /* ======================= Phase 3 of 3 =======================
1261 Handle non-trivial thread requests, mostly pthread stuff. */
1262
1263 /* Ok, we've fallen out of the dispatcher for a
1264 non-completely-trivial reason. First, update basic-block
1265 counters. */
1266
1267 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1268 vg_assert(done_this_time >= 0);
1269 VG_(bbs_to_go) -= (ULong)done_this_time;
1270 VG_(bbs_done) += (ULong)done_this_time;
1271
1272 if (0 && trc != VG_TRC_INNER_FASTMISS)
1273 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1274 tid, done_this_time, (Int)trc );
1275
1276 if (0 && trc != VG_TRC_INNER_FASTMISS)
1277 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1278 tid, VG_(bbs_done),
1279 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001280
sewardje663cb92002-04-12 10:26:32 +00001281 /* Examine the thread's return code to figure out why it
1282 stopped, and handle requests. */
1283
1284 switch (trc) {
1285
1286 case VG_TRC_INNER_FASTMISS:
1287 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1288 /*NOTREACHED*/
1289 break;
1290
1291 case VG_TRC_INNER_COUNTERZERO:
1292 /* Timeslice is out. Let a new thread be scheduled,
1293 simply by doing nothing, causing us to arrive back at
1294 Phase 1. */
1295 if (VG_(bbs_to_go) == 0) {
1296 goto debug_stop;
1297 }
1298 vg_assert(VG_(dispatch_ctr) == 0);
1299 break;
1300
1301 case VG_TRC_UNRESUMABLE_SIGNAL:
1302 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1303 away. Again, do nothing, so we wind up back at Phase
1304 1, whereupon the signal will be "delivered". */
1305 break;
1306
sewardje663cb92002-04-12 10:26:32 +00001307 case VG_TRC_EBP_JMP_SYSCALL:
1308 /* Do a syscall for the vthread tid. This could cause it
1309 to become non-runnable. */
1310 sched_do_syscall(tid);
1311 break;
1312
1313 case VG_TRC_EBP_JMP_CLIENTREQ:
1314 /* Do a client request for the vthread tid. Note that
1315 some requests will have been handled by
1316 maybe_do_trivial_clientreq(), so we don't expect to see
1317 those here.
1318 */
sewardj54cacf02002-04-12 23:24:59 +00001319 /* The thread's %EAX points at an arg block, the first
1320 word of which is the request code. */
1321 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001322 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001323 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001324 print_sched_event(tid, msg_buf);
1325 }
1326 /* Do a non-trivial client request for thread tid. tid's
1327 %EAX points to a short vector of argument words, the
1328 first of which is the request code. The result of the
1329 request is put in tid's %EDX. Alternatively, perhaps
1330 the request causes tid to become non-runnable and/or
1331 other blocked threads become runnable. In general we
1332 can and often do mess with the state of arbitrary
1333 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001334 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1335 return VgSrc_Shutdown;
1336 } else {
1337 do_nontrivial_clientreq(tid);
1338 }
sewardje663cb92002-04-12 10:26:32 +00001339 break;
1340
1341 default:
1342 VG_(printf)("\ntrc = %d\n", trc);
1343 VG_(panic)("VG_(scheduler), phase 3: "
1344 "unexpected thread return code");
1345 /* NOTREACHED */
1346 break;
1347
1348 } /* switch (trc) */
1349
1350 /* That completes Phase 3 of 3. Return now to the top of the
1351 main scheduler loop, to Phase 1 of 3. */
1352
1353 } /* top-level scheduler loop */
1354
1355
1356 /* NOTREACHED */
1357 VG_(panic)("scheduler: post-main-loop ?!");
1358 /* NOTREACHED */
1359
1360 debug_stop:
1361 /* If we exited because of a debug stop, print the translation
1362 of the last block executed -- by translating it again, and
1363 throwing away the result. */
1364 VG_(printf)(
1365 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj1e8cdc92002-04-18 11:37:52 +00001366 VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001367 VG_(printf)("\n");
1368 VG_(printf)(
1369 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1370
1371 return VgSrc_BbsDone;
1372}
1373
1374
1375/* ---------------------------------------------------------------------
1376 The pthread implementation.
1377 ------------------------------------------------------------------ */
1378
1379#include <pthread.h>
1380#include <errno.h>
1381
1382#if !defined(PTHREAD_STACK_MIN)
1383# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1384#endif
1385
1386/* /usr/include/bits/pthreadtypes.h:
1387 typedef unsigned long int pthread_t;
1388*/
1389
sewardje663cb92002-04-12 10:26:32 +00001390
sewardj604ec3c2002-04-18 22:38:41 +00001391/* -----------------------------------------------------------
1392 Thread CREATION, JOINAGE and CANCELLATION.
1393 -------------------------------------------------------- */
1394
sewardje663cb92002-04-12 10:26:32 +00001395static
1396void do_pthread_cancel ( ThreadId tid_canceller,
1397 pthread_t tid_cancellee )
1398{
1399 Char msg_buf[100];
1400 /* We want make is appear that this thread has returned to
1401 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1402 return value. So: simple: put PTHREAD_CANCELED into %EAX
1403 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001404 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001405 VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller);
1406 print_sched_event(tid_cancellee, msg_buf);
1407 }
1408 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001409 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001410 vg_threads[tid_cancellee].status = VgTs_Runnable;
1411}
1412
1413
sewardj3b5d8862002-04-20 13:53:23 +00001414static
1415void do_pthread_exit ( ThreadId tid, void* retval )
1416{
1417 Char msg_buf[100];
1418 /* We want make is appear that this thread has returned to
1419 do_pthread_create_bogusRA with retval as the
1420 return value. So: simple: put retval into %EAX
1421 and &do_pthread_create_bogusRA into %EIP and keep going! */
1422 if (VG_(clo_trace_sched)) {
1423 VG_(sprintf)(msg_buf, "exiting with %p", retval);
1424 print_sched_event(tid, msg_buf);
1425 }
1426 vg_threads[tid].m_eax = (UInt)retval;
1427 vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1428 vg_threads[tid].status = VgTs_Runnable;
1429}
1430
sewardje663cb92002-04-12 10:26:32 +00001431
1432/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001433 created with. Or possibly due to pthread_exit or cancellation.
1434 The main complication here is to resume any thread waiting to join
1435 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001436static
sewardjbc5b99f2002-04-13 00:08:51 +00001437void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001438{
1439 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1440 UInt* jnr_args;
1441 void** jnr_thread_return;
1442 Char msg_buf[100];
1443
1444 /* Mark it as not in use. Leave the stack in place so the next
1445 user of this slot doesn't reallocate it. */
sewardj6072c362002-04-19 14:40:57 +00001446 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001447 vg_assert(vg_threads[tid].status != VgTs_Empty);
1448
sewardjbc5b99f2002-04-13 00:08:51 +00001449 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001450
1451 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1452 /* No one has yet done a join on me */
1453 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001454 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001455 VG_(sprintf)(msg_buf,
1456 "root fn returns, waiting for a call pthread_join(%d)",
1457 tid);
1458 print_sched_event(tid, msg_buf);
1459 }
1460 } else {
1461 /* Some is waiting; make their join call return with success,
1462 putting my exit code in the place specified by the caller's
1463 thread_return param. This is all very horrible, since we
1464 need to consult the joiner's arg block -- pointed to by its
1465 %EAX -- in order to extract the 2nd param of its pthread_join
1466 call. TODO: free properly the slot (also below).
1467 */
1468 jnr = vg_threads[tid].joiner;
sewardj6072c362002-04-19 14:40:57 +00001469 vg_assert(is_valid_tid(jnr));
sewardje663cb92002-04-12 10:26:32 +00001470 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1471 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1472 jnr_thread_return = (void**)(jnr_args[2]);
1473 if (jnr_thread_return != NULL)
1474 *jnr_thread_return = vg_threads[tid].retval;
1475 vg_threads[jnr].m_edx = 0; /* success */
1476 vg_threads[jnr].status = VgTs_Runnable;
1477 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001478 if (VG_(clo_instrument) && tid != 0)
1479 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1480 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001481 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001482 VG_(sprintf)(msg_buf,
1483 "root fn returns, to find a waiting pthread_join(%d)", tid);
1484 print_sched_event(tid, msg_buf);
1485 VG_(sprintf)(msg_buf,
1486 "my pthread_join(%d) returned; resuming", tid);
1487 print_sched_event(jnr, msg_buf);
1488 }
1489 }
1490
1491 /* Return value is irrelevant; this thread will not get
1492 rescheduled. */
1493}
1494
1495
1496static
1497void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1498{
1499 Char msg_buf[100];
1500
1501 /* jee, the joinee, is the thread specified as an arg in thread
1502 tid's call to pthread_join. So tid is the join-er. */
sewardj6072c362002-04-19 14:40:57 +00001503 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001504 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1505
1506 if (jee == tid) {
1507 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1508 vg_threads[tid].status = VgTs_Runnable;
1509 return;
1510 }
1511
1512 if (jee < 0
1513 || jee >= VG_N_THREADS
1514 || vg_threads[jee].status == VgTs_Empty) {
1515 /* Invalid thread to join to. */
1516 vg_threads[tid].m_edx = EINVAL;
1517 vg_threads[tid].status = VgTs_Runnable;
1518 return;
1519 }
1520
1521 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1522 /* Someone already did join on this thread */
1523 vg_threads[tid].m_edx = EINVAL;
1524 vg_threads[tid].status = VgTs_Runnable;
1525 return;
1526 }
1527
1528 /* if (vg_threads[jee].detached) ... */
1529
1530 /* Perhaps the joinee has already finished? If so return
1531 immediately with its return code, and free up the slot. TODO:
1532 free it properly (also above). */
1533 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1534 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1535 vg_threads[tid].m_edx = 0; /* success */
1536 if (thread_return != NULL)
1537 *thread_return = vg_threads[jee].retval;
1538 vg_threads[tid].status = VgTs_Runnable;
1539 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001540 if (VG_(clo_instrument) && jee != 0)
1541 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1542 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001543 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001544 VG_(sprintf)(msg_buf,
1545 "someone called pthread_join() on me; bye!");
1546 print_sched_event(jee, msg_buf);
1547 VG_(sprintf)(msg_buf,
1548 "my pthread_join(%d) returned immediately",
1549 jee );
1550 print_sched_event(tid, msg_buf);
1551 }
1552 return;
1553 }
1554
1555 /* Ok, so we'll have to wait on jee. */
1556 vg_threads[jee].joiner = tid;
1557 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001558 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001559 VG_(sprintf)(msg_buf,
1560 "blocking on call of pthread_join(%d)", jee );
1561 print_sched_event(tid, msg_buf);
1562 }
1563 /* So tid's join call does not return just now. */
1564}
1565
1566
1567static
1568void do_pthread_create ( ThreadId parent_tid,
1569 pthread_t* thread,
1570 pthread_attr_t* attr,
1571 void* (*start_routine)(void *),
1572 void* arg )
1573{
sewardj5f07b662002-04-23 16:52:51 +00001574 Int i;
sewardje663cb92002-04-12 10:26:32 +00001575 Addr new_stack;
1576 UInt new_stk_szb;
1577 ThreadId tid;
1578 Char msg_buf[100];
1579
1580 /* Paranoia ... */
1581 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1582
1583 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1584
sewardj1e8cdc92002-04-18 11:37:52 +00001585 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001586
1587 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001588 vg_assert(tid != 1);
1589 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001590
1591 /* Copy the parent's CPU state into the child's, in a roundabout
1592 way (via baseBlock). */
1593 VG_(load_thread_state)(parent_tid);
1594 VG_(save_thread_state)(tid);
1595
1596 /* Consider allocating the child a stack, if the one it already has
1597 is inadequate. */
1598 new_stk_szb = PTHREAD_STACK_MIN;
1599
1600 if (new_stk_szb > vg_threads[tid].stack_size) {
1601 /* Again, for good measure :) We definitely don't want to be
1602 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001603 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001604 /* for now, we don't handle the case of anything other than
1605 assigning it for the first time. */
1606 vg_assert(vg_threads[tid].stack_size == 0);
1607 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1608 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1609 vg_threads[tid].stack_base = new_stack;
1610 vg_threads[tid].stack_size = new_stk_szb;
sewardj1e8cdc92002-04-18 11:37:52 +00001611 vg_threads[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001612 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001613 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001614 }
sewardj1e8cdc92002-04-18 11:37:52 +00001615
1616 vg_threads[tid].m_esp
1617 = vg_threads[tid].stack_base
1618 + vg_threads[tid].stack_size
1619 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1620
sewardje663cb92002-04-12 10:26:32 +00001621 if (VG_(clo_instrument))
1622 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1623 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1624
1625 /* push arg */
1626 vg_threads[tid].m_esp -= 4;
1627 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1628
1629 /* push (magical) return address */
1630 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001631 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001632
1633 if (VG_(clo_instrument))
1634 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1635
1636 /* this is where we start */
1637 vg_threads[tid].m_eip = (UInt)start_routine;
1638
sewardj8937c812002-04-12 20:12:20 +00001639 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001640 VG_(sprintf)(msg_buf,
1641 "new thread, created by %d", parent_tid );
1642 print_sched_event(tid, msg_buf);
1643 }
1644
1645 /* store the thread id in *thread. */
1646 // if (VG_(clo_instrument))
1647 // ***** CHECK *thread is writable
1648 *thread = (pthread_t)tid;
1649
sewardj3b5d8862002-04-20 13:53:23 +00001650 vg_threads[tid].associated_mx = NULL;
1651 vg_threads[tid].associated_cv = NULL;
1652 vg_threads[tid].joiner = VG_INVALID_THREADID;
1653 vg_threads[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001654
sewardj5f07b662002-04-23 16:52:51 +00001655 for (i = 0; i < VG_N_THREAD_KEYS; i++)
1656 vg_threads[tid].specifics[i] = NULL;
1657
sewardj604ec3c2002-04-18 22:38:41 +00001658 /* return zero */
sewardje663cb92002-04-12 10:26:32 +00001659 vg_threads[tid].m_edx = 0; /* success */
1660}
1661
1662
sewardj604ec3c2002-04-18 22:38:41 +00001663/* -----------------------------------------------------------
1664 MUTEXes
1665 -------------------------------------------------------- */
1666
sewardj604ec3c2002-04-18 22:38:41 +00001667/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001668 typedef struct
1669 {
1670 int __m_reserved; -- Reserved for future use
1671 int __m_count; -- Depth of recursive locking
1672 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1673 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1674 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1675 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001676
sewardj6072c362002-04-19 14:40:57 +00001677 #define PTHREAD_MUTEX_INITIALIZER \
1678 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
1679 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
1680 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
1681 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
1682 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
1683 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
1684 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00001685
sewardj6072c362002-04-19 14:40:57 +00001686 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00001687
sewardj6072c362002-04-19 14:40:57 +00001688 __m_kind never changes and indicates whether or not it is recursive.
1689
1690 __m_count indicates the lock count; if 0, the mutex is not owned by
1691 anybody.
1692
1693 __m_owner has a ThreadId value stuffed into it. We carefully arrange
1694 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
1695 statically initialised mutexes correctly appear
1696 to belong to nobody.
1697
1698 In summary, a not-in-use mutex is distinguised by having __m_owner
1699 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
1700 conditions holds, the other should too.
1701
1702 There is no linked list of threads waiting for this mutex. Instead
1703 a thread in WaitMX state points at the mutex with its waited_on_mx
1704 field. This makes _unlock() inefficient, but simple to implement the
1705 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00001706
sewardj604ec3c2002-04-18 22:38:41 +00001707 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00001708 deals with that for us.
1709*/
sewardje663cb92002-04-12 10:26:32 +00001710
sewardj3b5d8862002-04-20 13:53:23 +00001711/* Helper fns ... */
1712static
1713void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
1714 Char* caller )
1715{
1716 Int i;
1717 Char msg_buf[100];
1718
1719 /* Find some arbitrary thread waiting on this mutex, and make it
1720 runnable. If none are waiting, mark the mutex as not held. */
1721 for (i = 1; i < VG_N_THREADS; i++) {
1722 if (vg_threads[i].status == VgTs_Empty)
1723 continue;
1724 if (vg_threads[i].status == VgTs_WaitMX
1725 && vg_threads[i].associated_mx == mutex)
1726 break;
1727 }
1728
1729 vg_assert(i <= VG_N_THREADS);
1730 if (i == VG_N_THREADS) {
1731 /* Nobody else is waiting on it. */
1732 mutex->__m_count = 0;
1733 mutex->__m_owner = VG_INVALID_THREADID;
1734 } else {
1735 /* Notionally transfer the hold to thread i, whose
1736 pthread_mutex_lock() call now returns with 0 (success). */
1737 /* The .count is already == 1. */
1738 vg_assert(vg_threads[i].associated_mx == mutex);
1739 mutex->__m_owner = (_pthread_descr)i;
1740 vg_threads[i].status = VgTs_Runnable;
1741 vg_threads[i].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00001742 /* m_edx already holds pth_mx_lock() success (0) */
sewardj3b5d8862002-04-20 13:53:23 +00001743
1744 if (VG_(clo_trace_pthread_level) >= 1) {
1745 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
1746 caller, mutex );
1747 print_pthread_event(i, msg_buf);
1748 }
1749 }
1750}
1751
sewardje663cb92002-04-12 10:26:32 +00001752
1753static
sewardj30671ff2002-04-21 00:13:57 +00001754void do_pthread_mutex_lock( ThreadId tid,
1755 Bool is_trylock,
1756 pthread_mutex_t *mutex )
sewardje663cb92002-04-12 10:26:32 +00001757{
sewardj30671ff2002-04-21 00:13:57 +00001758 Char msg_buf[100];
1759 Char* caller
1760 = is_trylock ? "pthread_mutex_lock "
1761 : "pthread_mutex_trylock";
sewardje663cb92002-04-12 10:26:32 +00001762
sewardj604ec3c2002-04-18 22:38:41 +00001763 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj30671ff2002-04-21 00:13:57 +00001764 VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex );
sewardj604ec3c2002-04-18 22:38:41 +00001765 print_pthread_event(tid, msg_buf);
1766 }
1767
1768 /* Paranoia ... */
1769 vg_assert(is_valid_tid(tid)
1770 && vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001771
1772 /* POSIX doesn't mandate this, but for sanity ... */
1773 if (mutex == NULL) {
1774 vg_threads[tid].m_edx = EINVAL;
1775 return;
1776 }
1777
sewardj604ec3c2002-04-18 22:38:41 +00001778 /* More paranoia ... */
1779 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001780# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001781 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001782 case PTHREAD_MUTEX_ADAPTIVE_NP:
1783# endif
sewardj604ec3c2002-04-18 22:38:41 +00001784 case PTHREAD_MUTEX_RECURSIVE_NP:
1785 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001786 if (mutex->__m_count >= 0) break;
1787 /* else fall thru */
1788 default:
1789 vg_threads[tid].m_edx = EINVAL;
1790 return;
sewardje663cb92002-04-12 10:26:32 +00001791 }
1792
sewardj604ec3c2002-04-18 22:38:41 +00001793 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001794
sewardj604ec3c2002-04-18 22:38:41 +00001795 vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001796
1797 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001798 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001799 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001800 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001801 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001802 mutex->__m_count++;
sewardjf8f819e2002-04-17 23:21:37 +00001803 vg_threads[tid].m_edx = 0;
sewardj3b5d8862002-04-20 13:53:23 +00001804 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
sewardj604ec3c2002-04-18 22:38:41 +00001805 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001806 return;
1807 } else {
sewardj30671ff2002-04-21 00:13:57 +00001808 if (is_trylock)
1809 vg_threads[tid].m_edx = EBUSY;
1810 else
1811 vg_threads[tid].m_edx = EDEADLK;
sewardjf8f819e2002-04-17 23:21:37 +00001812 return;
1813 }
1814 } else {
sewardj6072c362002-04-19 14:40:57 +00001815 /* Someone else has it; we have to wait. Mark ourselves
1816 thusly. */
sewardj05553872002-04-20 20:53:17 +00001817 /* GUARD: __m_count > 0 && __m_owner is valid */
sewardj30671ff2002-04-21 00:13:57 +00001818 if (is_trylock) {
1819 /* caller is polling; so return immediately. */
1820 vg_threads[tid].m_edx = EBUSY;
1821 } else {
1822 vg_threads[tid].status = VgTs_WaitMX;
1823 vg_threads[tid].associated_mx = mutex;
sewardj5f07b662002-04-23 16:52:51 +00001824 vg_threads[tid].m_edx = 0; /* pth_mx_lock success value */
sewardj30671ff2002-04-21 00:13:57 +00001825 if (VG_(clo_trace_pthread_level) >= 1) {
1826 VG_(sprintf)(msg_buf, "%s mx %p: BLOCK",
1827 caller, mutex );
1828 print_pthread_event(tid, msg_buf);
1829 }
1830 }
sewardje663cb92002-04-12 10:26:32 +00001831 return;
1832 }
sewardjf8f819e2002-04-17 23:21:37 +00001833
sewardje663cb92002-04-12 10:26:32 +00001834 } else {
sewardj6072c362002-04-19 14:40:57 +00001835 /* Nobody owns it. Sanity check ... */
1836 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00001837 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00001838 mutex->__m_count = 1;
1839 mutex->__m_owner = (_pthread_descr)tid;
sewardj3b5d8862002-04-20 13:53:23 +00001840 vg_assert(vg_threads[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00001841 /* return 0 (success). */
1842 vg_threads[tid].m_edx = 0;
1843 }
sewardjf8f819e2002-04-17 23:21:37 +00001844
sewardje663cb92002-04-12 10:26:32 +00001845}
1846
1847
1848static
1849void do_pthread_mutex_unlock ( ThreadId tid,
1850 pthread_mutex_t *mutex )
1851{
sewardj3b5d8862002-04-20 13:53:23 +00001852 Char msg_buf[100];
sewardje663cb92002-04-12 10:26:32 +00001853
sewardj45b4b372002-04-16 22:50:32 +00001854 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00001855 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00001856 print_pthread_event(tid, msg_buf);
1857 }
1858
sewardj604ec3c2002-04-18 22:38:41 +00001859 /* Paranoia ... */
1860 vg_assert(is_valid_tid(tid)
1861 && vg_threads[tid].status == VgTs_Runnable);
1862
1863 if (mutex == NULL) {
1864 vg_threads[tid].m_edx = EINVAL;
1865 return;
1866 }
1867
1868 /* More paranoia ... */
1869 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001870# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001871 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001872 case PTHREAD_MUTEX_ADAPTIVE_NP:
1873# endif
sewardj604ec3c2002-04-18 22:38:41 +00001874 case PTHREAD_MUTEX_RECURSIVE_NP:
1875 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001876 if (mutex->__m_count >= 0) break;
1877 /* else fall thru */
1878 default:
1879 vg_threads[tid].m_edx = EINVAL;
1880 return;
1881 }
sewardje663cb92002-04-12 10:26:32 +00001882
1883 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00001884 if (mutex->__m_count == 0 /* nobody holds it */
1885 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardje663cb92002-04-12 10:26:32 +00001886 vg_threads[tid].m_edx = EPERM;
1887 return;
1888 }
1889
sewardjf8f819e2002-04-17 23:21:37 +00001890 /* If it's a multiply-locked recursive mutex, just decrement the
1891 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00001892 if (mutex->__m_count > 1) {
1893 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
1894 mutex->__m_count --;
sewardjf8f819e2002-04-17 23:21:37 +00001895 vg_threads[tid].m_edx = 0; /* success */
1896 return;
1897 }
1898
sewardj604ec3c2002-04-18 22:38:41 +00001899 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00001900 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00001901 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00001902 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00001903
sewardj3b5d8862002-04-20 13:53:23 +00001904 /* Release at max one thread waiting on this mutex. */
1905 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00001906
sewardj3b5d8862002-04-20 13:53:23 +00001907 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardje663cb92002-04-12 10:26:32 +00001908 vg_threads[tid].m_edx = 0; /* Success. */
1909}
1910
1911
sewardj6072c362002-04-19 14:40:57 +00001912/* -----------------------------------------------------------
1913 CONDITION VARIABLES
1914 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00001915
sewardj6072c362002-04-19 14:40:57 +00001916/* The relevant native types are as follows:
1917 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00001918
sewardj6072c362002-04-19 14:40:57 +00001919 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
1920 typedef struct
1921 {
1922 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
1923 _pthread_descr __c_waiting; -- Threads waiting on this condition
1924 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00001925
sewardj6072c362002-04-19 14:40:57 +00001926 -- Attribute for conditionally variables.
1927 typedef struct
1928 {
1929 int __dummy;
1930 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00001931
sewardj6072c362002-04-19 14:40:57 +00001932 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00001933
sewardj3b5d8862002-04-20 13:53:23 +00001934 We don't use any fields of pthread_cond_t for anything at all.
1935 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00001936
1937 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00001938 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00001939
sewardj77e466c2002-04-14 02:29:29 +00001940
sewardj5f07b662002-04-23 16:52:51 +00001941static
1942void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid )
1943{
1944 Char msg_buf[100];
1945 pthread_mutex_t* mx;
1946 pthread_cond_t* cv;
1947
1948 vg_assert(is_valid_tid(tid)
1949 && vg_threads[tid].status == VgTs_WaitCV
1950 && vg_threads[tid].awaken_at != 0xFFFFFFFF);
1951 mx = vg_threads[tid].associated_mx;
1952 vg_assert(mx != NULL);
1953 cv = vg_threads[tid].associated_cv;
1954 vg_assert(cv != NULL);
1955
1956 if (mx->__m_owner == VG_INVALID_THREADID) {
1957 /* Currently unheld; hand it out to thread tid. */
1958 vg_assert(mx->__m_count == 0);
1959 vg_threads[tid].status = VgTs_Runnable;
1960 vg_threads[tid].m_edx = ETIMEDOUT;
1961 /* pthread_cond_wait return value */
1962 vg_threads[tid].associated_cv = NULL;
1963 vg_threads[tid].associated_mx = NULL;
1964 mx->__m_owner = (_pthread_descr)tid;
1965 mx->__m_count = 1;
1966
1967 if (VG_(clo_trace_pthread_level) >= 1) {
1968 VG_(sprintf)(msg_buf, "pthread_cond_timedwai cv %p: TIMEOUT with mx %p",
1969 cv, mx );
1970 print_pthread_event(tid, msg_buf);
1971 }
1972 } else {
1973 /* Currently held. Make thread tid be blocked on it. */
1974 vg_assert(mx->__m_count > 0);
1975 vg_threads[tid].status = VgTs_WaitMX;
1976 vg_threads[tid].m_edx = ETIMEDOUT;
1977 /* pthread_cond_wait return value */
1978 vg_threads[tid].associated_cv = NULL;
1979 vg_threads[tid].associated_mx = mx;
1980 if (VG_(clo_trace_pthread_level) >= 1) {
1981 VG_(sprintf)(msg_buf,
1982 "pthread_cond_timedwai cv %p: TIMEOUT -> BLOCK for mx %p",
1983 cv, mx );
1984 print_pthread_event(tid, msg_buf);
1985 }
1986
1987 }
1988}
1989
1990
sewardj3b5d8862002-04-20 13:53:23 +00001991static
1992void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
1993 Int n_to_release,
1994 Char* caller )
1995{
1996 Int i;
1997 Char msg_buf[100];
1998 pthread_mutex_t* mx;
1999
2000 while (True) {
2001 if (n_to_release == 0)
2002 return;
2003
2004 /* Find a thread waiting on this CV. */
2005 for (i = 1; i < VG_N_THREADS; i++) {
2006 if (vg_threads[i].status == VgTs_Empty)
2007 continue;
2008 if (vg_threads[i].status == VgTs_WaitCV
2009 && vg_threads[i].associated_cv == cond)
2010 break;
2011 }
2012 vg_assert(i <= VG_N_THREADS);
2013
2014 if (i == VG_N_THREADS) {
2015 /* Nobody else is waiting on it. */
2016 return;
2017 }
2018
2019 mx = vg_threads[i].associated_mx;
2020 vg_assert(mx != NULL);
2021
2022 if (mx->__m_owner == VG_INVALID_THREADID) {
2023 /* Currently unheld; hand it out to thread i. */
2024 vg_assert(mx->__m_count == 0);
2025 vg_threads[i].status = VgTs_Runnable;
2026 vg_threads[i].associated_cv = NULL;
2027 vg_threads[i].associated_mx = NULL;
2028 mx->__m_owner = (_pthread_descr)i;
2029 mx->__m_count = 1;
sewardj5f07b662002-04-23 16:52:51 +00002030 /* .m_edx already holds pth_cond_wait success value (0) */
sewardj3b5d8862002-04-20 13:53:23 +00002031
2032 if (VG_(clo_trace_pthread_level) >= 1) {
2033 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
2034 caller, cond, mx );
2035 print_pthread_event(i, msg_buf);
2036 }
2037
2038 } else {
2039 /* Currently held. Make thread i be blocked on it. */
sewardj5f07b662002-04-23 16:52:51 +00002040 vg_assert(mx->__m_count > 0);
sewardj3b5d8862002-04-20 13:53:23 +00002041 vg_threads[i].status = VgTs_WaitMX;
2042 vg_threads[i].associated_cv = NULL;
2043 vg_threads[i].associated_mx = mx;
sewardj5f07b662002-04-23 16:52:51 +00002044 vg_threads[i].m_edx = 0; /* pth_cond_wait success value */
sewardj3b5d8862002-04-20 13:53:23 +00002045
2046 if (VG_(clo_trace_pthread_level) >= 1) {
2047 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
2048 caller, cond, mx );
2049 print_pthread_event(i, msg_buf);
2050 }
2051
2052 }
2053
2054 n_to_release--;
2055 }
2056}
2057
2058
2059static
2060void do_pthread_cond_wait ( ThreadId tid,
2061 pthread_cond_t *cond,
sewardj5f07b662002-04-23 16:52:51 +00002062 pthread_mutex_t *mutex,
2063 UInt ms_end )
sewardj3b5d8862002-04-20 13:53:23 +00002064{
2065 Char msg_buf[100];
2066
sewardj5f07b662002-04-23 16:52:51 +00002067 /* If ms_end == 0xFFFFFFFF, wait forever (no timeout). Otherwise,
2068 ms_end is the ending millisecond. */
2069
sewardj3b5d8862002-04-20 13:53:23 +00002070 /* pre: mutex should be a valid mutex and owned by tid. */
2071 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj5f07b662002-04-23 16:52:51 +00002072 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p, end %d ...",
2073 cond, mutex, ms_end );
sewardj3b5d8862002-04-20 13:53:23 +00002074 print_pthread_event(tid, msg_buf);
2075 }
2076
2077 /* Paranoia ... */
2078 vg_assert(is_valid_tid(tid)
2079 && vg_threads[tid].status == VgTs_Runnable);
2080
2081 if (mutex == NULL || cond == NULL) {
2082 vg_threads[tid].m_edx = EINVAL;
2083 return;
2084 }
2085
2086 /* More paranoia ... */
2087 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002088# ifndef GLIBC_2_1
sewardj3b5d8862002-04-20 13:53:23 +00002089 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002090 case PTHREAD_MUTEX_ADAPTIVE_NP:
2091# endif
sewardj3b5d8862002-04-20 13:53:23 +00002092 case PTHREAD_MUTEX_RECURSIVE_NP:
2093 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj3b5d8862002-04-20 13:53:23 +00002094 if (mutex->__m_count >= 0) break;
2095 /* else fall thru */
2096 default:
2097 vg_threads[tid].m_edx = EINVAL;
2098 return;
2099 }
2100
2101 /* Barf if we don't currently hold the mutex. */
2102 if (mutex->__m_count == 0 /* nobody holds it */
2103 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
2104 vg_threads[tid].m_edx = EINVAL;
2105 return;
2106 }
2107
2108 /* Queue ourselves on the condition. */
2109 vg_threads[tid].status = VgTs_WaitCV;
2110 vg_threads[tid].associated_cv = cond;
2111 vg_threads[tid].associated_mx = mutex;
sewardj5f07b662002-04-23 16:52:51 +00002112 vg_threads[tid].awaken_at = ms_end;
sewardj3b5d8862002-04-20 13:53:23 +00002113
2114 if (VG_(clo_trace_pthread_level) >= 1) {
2115 VG_(sprintf)(msg_buf,
2116 "pthread_cond_wait cv %p, mx %p: BLOCK",
2117 cond, mutex );
2118 print_pthread_event(tid, msg_buf);
2119 }
2120
2121 /* Release the mutex. */
2122 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
2123}
2124
2125
2126static
2127void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2128 Bool broadcast,
2129 pthread_cond_t *cond )
2130{
2131 Char msg_buf[100];
2132 Char* caller
2133 = broadcast ? "pthread_cond_broadcast"
2134 : "pthread_cond_signal ";
2135
2136 if (VG_(clo_trace_pthread_level) >= 2) {
2137 VG_(sprintf)(msg_buf, "%s cv %p ...",
2138 caller, cond );
2139 print_pthread_event(tid, msg_buf);
2140 }
2141
2142 /* Paranoia ... */
2143 vg_assert(is_valid_tid(tid)
2144 && vg_threads[tid].status == VgTs_Runnable);
2145
2146 if (cond == NULL) {
2147 vg_threads[tid].m_edx = EINVAL;
2148 return;
2149 }
2150
2151 release_N_threads_waiting_on_cond (
2152 cond,
2153 broadcast ? VG_N_THREADS : 1,
2154 caller
2155 );
2156
2157 vg_threads[tid].m_edx = 0; /* success */
2158}
2159
sewardj77e466c2002-04-14 02:29:29 +00002160
sewardj5f07b662002-04-23 16:52:51 +00002161/* -----------------------------------------------------------
2162 THREAD SPECIFIC DATA
2163 -------------------------------------------------------- */
2164
2165static __inline__
2166Bool is_valid_key ( ThreadKey k )
2167{
2168 /* k unsigned; hence no < 0 check */
2169 if (k >= VG_N_THREAD_KEYS) return False;
2170 if (!vg_thread_keys[k].inuse) return False;
2171 return True;
2172}
2173
2174static
2175void do_pthread_key_create ( ThreadId tid,
2176 pthread_key_t* key,
2177 void (*destructor)(void*) )
2178{
2179 Int i;
2180 Char msg_buf[100];
2181
2182 if (VG_(clo_trace_pthread_level) >= 1) {
2183 VG_(sprintf)(msg_buf, "pthread_key_create *key %p, destr %p",
2184 key, destructor );
2185 print_pthread_event(tid, msg_buf);
2186 }
2187
2188 vg_assert(sizeof(pthread_key_t) == sizeof(ThreadKey));
2189 vg_assert(is_valid_tid(tid)
2190 && vg_threads[tid].status == VgTs_Runnable);
2191
2192 for (i = 0; i < VG_N_THREAD_KEYS; i++)
2193 if (!vg_thread_keys[i].inuse)
2194 break;
2195
2196 if (i == VG_N_THREAD_KEYS) {
2197 /* vg_threads[tid].m_edx = EAGAIN;
2198 return;
2199 */
2200 VG_(panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;"
2201 " increase and recompile");
2202 }
2203
2204 vg_thread_keys[i].inuse = True;
2205 /* TODO: check key for addressibility */
2206 *key = i;
2207 vg_threads[tid].m_edx = 0;
2208}
2209
2210
2211static
2212void do_pthread_key_delete ( ThreadId tid, pthread_key_t key )
2213{
2214 Char msg_buf[100];
2215 if (VG_(clo_trace_pthread_level) >= 1) {
2216 VG_(sprintf)(msg_buf, "pthread_key_delete key %d",
2217 key );
2218 print_pthread_event(tid, msg_buf);
2219 }
2220
2221 vg_assert(is_valid_tid(tid)
2222 && vg_threads[tid].status == VgTs_Runnable);
2223
2224 if (!is_valid_key(key)) {
2225 vg_threads[tid].m_edx = EINVAL;
2226 return;
2227 }
2228
2229 vg_thread_keys[key].inuse = False;
2230
2231 /* Optional. We're not required to do this, although it shouldn't
2232 make any difference to programs which use the key/specifics
2233 functions correctly. */
2234 for (tid = 1; tid < VG_N_THREADS; tid++) {
2235 if (vg_threads[tid].status != VgTs_Empty)
2236 vg_threads[tid].specifics[key] = NULL;
2237 }
2238}
2239
2240
2241static
2242void do_pthread_getspecific ( ThreadId tid, pthread_key_t key )
2243{
2244 Char msg_buf[100];
2245 if (VG_(clo_trace_pthread_level) >= 1) {
2246 VG_(sprintf)(msg_buf, "pthread_getspecific key %d",
2247 key );
2248 print_pthread_event(tid, msg_buf);
2249 }
2250
2251 vg_assert(is_valid_tid(tid)
2252 && vg_threads[tid].status == VgTs_Runnable);
2253
2254 if (!is_valid_key(key)) {
2255 vg_threads[tid].m_edx = (UInt)NULL;
2256 return;
2257 }
2258
2259 vg_threads[tid].m_edx = (UInt)vg_threads[tid].specifics[key];
2260}
2261
2262
2263static
2264void do_pthread_setspecific ( ThreadId tid,
2265 pthread_key_t key,
2266 void *pointer )
2267{
2268 Char msg_buf[100];
2269 if (VG_(clo_trace_pthread_level) >= 1) {
2270 VG_(sprintf)(msg_buf, "pthread_setspecific key %d, ptr %p",
2271 key, pointer );
2272 print_pthread_event(tid, msg_buf);
2273 }
2274
2275 vg_assert(is_valid_tid(tid)
2276 && vg_threads[tid].status == VgTs_Runnable);
2277
2278 if (!is_valid_key(key)) {
2279 vg_threads[tid].m_edx = EINVAL;
2280 return;
2281 }
2282
2283 vg_threads[tid].specifics[key] = pointer;
2284 vg_threads[tid].m_edx = 0;
2285}
2286
2287
sewardje663cb92002-04-12 10:26:32 +00002288/* ---------------------------------------------------------------------
2289 Handle non-trivial client requests.
2290 ------------------------------------------------------------------ */
2291
2292static
2293void do_nontrivial_clientreq ( ThreadId tid )
2294{
2295 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
2296 UInt req_no = arg[0];
2297 switch (req_no) {
2298
2299 case VG_USERREQ__PTHREAD_CREATE:
2300 do_pthread_create( tid,
2301 (pthread_t*)arg[1],
2302 (pthread_attr_t*)arg[2],
2303 (void*(*)(void*))arg[3],
2304 (void*)arg[4] );
2305 break;
2306
sewardjbc5b99f2002-04-13 00:08:51 +00002307 case VG_USERREQ__PTHREAD_RETURNS:
2308 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00002309 break;
2310
2311 case VG_USERREQ__PTHREAD_JOIN:
2312 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2313 break;
2314
sewardje663cb92002-04-12 10:26:32 +00002315 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
sewardj30671ff2002-04-21 00:13:57 +00002316 do_pthread_mutex_lock( tid, False, (pthread_mutex_t *)(arg[1]) );
2317 break;
2318
2319 case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK:
2320 do_pthread_mutex_lock( tid, True, (pthread_mutex_t *)(arg[1]) );
sewardje663cb92002-04-12 10:26:32 +00002321 break;
2322
2323 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
2324 do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) );
2325 break;
2326
sewardje663cb92002-04-12 10:26:32 +00002327 case VG_USERREQ__PTHREAD_CANCEL:
2328 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
2329 break;
2330
sewardj3b5d8862002-04-20 13:53:23 +00002331 case VG_USERREQ__PTHREAD_EXIT:
2332 do_pthread_exit( tid, (void*)(arg[1]) );
2333 break;
2334
2335 case VG_USERREQ__PTHREAD_COND_WAIT:
2336 do_pthread_cond_wait( tid,
2337 (pthread_cond_t *)(arg[1]),
sewardj5f07b662002-04-23 16:52:51 +00002338 (pthread_mutex_t *)(arg[2]),
2339 0xFFFFFFFF /* no timeout */ );
2340 break;
2341
2342 case VG_USERREQ__PTHREAD_COND_TIMEDWAIT:
2343 do_pthread_cond_wait( tid,
2344 (pthread_cond_t *)(arg[1]),
2345 (pthread_mutex_t *)(arg[2]),
2346 arg[3] /* timeout millisecond point */ );
sewardj3b5d8862002-04-20 13:53:23 +00002347 break;
2348
2349 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2350 do_pthread_cond_signal_or_broadcast(
2351 tid,
2352 False, /* signal, not broadcast */
2353 (pthread_cond_t *)(arg[1]) );
2354 break;
2355
2356 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2357 do_pthread_cond_signal_or_broadcast(
2358 tid,
2359 True, /* broadcast, not signal */
2360 (pthread_cond_t *)(arg[1]) );
2361 break;
2362
sewardj5f07b662002-04-23 16:52:51 +00002363 case VG_USERREQ__PTHREAD_KEY_CREATE:
2364 do_pthread_key_create ( tid,
2365 (pthread_key_t*)(arg[1]),
2366 (void(*)(void*))(arg[2]) );
2367 break;
2368
2369 case VG_USERREQ__PTHREAD_KEY_DELETE:
2370 do_pthread_key_delete ( tid,
2371 (pthread_key_t)(arg[1]) );
2372 break;
2373
2374 case VG_USERREQ__PTHREAD_GETSPECIFIC:
2375 do_pthread_getspecific ( tid,
2376 (pthread_key_t)(arg[1]) );
2377 break;
2378
2379 case VG_USERREQ__PTHREAD_SETSPECIFIC:
2380 do_pthread_setspecific ( tid,
2381 (pthread_key_t)(arg[1]),
2382 (void*)(arg[2]) );
2383 break;
2384
sewardje663cb92002-04-12 10:26:32 +00002385 case VG_USERREQ__MAKE_NOACCESS:
2386 case VG_USERREQ__MAKE_WRITABLE:
2387 case VG_USERREQ__MAKE_READABLE:
2388 case VG_USERREQ__DISCARD:
2389 case VG_USERREQ__CHECK_WRITABLE:
2390 case VG_USERREQ__CHECK_READABLE:
2391 case VG_USERREQ__MAKE_NOACCESS_STACK:
2392 case VG_USERREQ__RUNNING_ON_VALGRIND:
2393 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00002394 vg_threads[tid].m_edx
2395 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00002396 break;
2397
sewardj77e466c2002-04-14 02:29:29 +00002398 case VG_USERREQ__SIGNAL_RETURNS:
2399 handle_signal_return(tid);
2400 break;
sewardj54cacf02002-04-12 23:24:59 +00002401
sewardje663cb92002-04-12 10:26:32 +00002402 default:
2403 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2404 VG_(panic)("handle_private_client_pthread_request: "
2405 "unknown request");
2406 /*NOTREACHED*/
2407 break;
2408 }
2409}
2410
2411
sewardj6072c362002-04-19 14:40:57 +00002412/* ---------------------------------------------------------------------
2413 Sanity checking.
2414 ------------------------------------------------------------------ */
2415
2416/* Internal consistency checks on the sched/pthread structures. */
2417static
2418void scheduler_sanity ( void )
2419{
sewardj3b5d8862002-04-20 13:53:23 +00002420 pthread_mutex_t* mx;
2421 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002422 Int i;
sewardj5f07b662002-04-23 16:52:51 +00002423
sewardj6072c362002-04-19 14:40:57 +00002424 /* VG_(printf)("scheduler_sanity\n"); */
2425 for (i = 1; i < VG_N_THREADS; i++) {
sewardj3b5d8862002-04-20 13:53:23 +00002426 mx = vg_threads[i].associated_mx;
2427 cv = vg_threads[i].associated_cv;
sewardj6072c362002-04-19 14:40:57 +00002428 if (vg_threads[i].status == VgTs_WaitMX) {
sewardj05553872002-04-20 20:53:17 +00002429 /* If we're waiting on a MX: (1) the mx is not null, (2, 3)
2430 it's actually held by someone, since otherwise this thread
2431 is deadlocked, (4) the mutex's owner is not us, since
2432 otherwise this thread is also deadlocked. The logic in
2433 do_pthread_mutex_lock rejects attempts by a thread to lock
2434 a (non-recursive) mutex which it already owns.
2435
2436 (2) has been seen to fail sometimes. I don't know why.
2437 Possibly to do with signals. */
sewardj3b5d8862002-04-20 13:53:23 +00002438 vg_assert(cv == NULL);
sewardj05553872002-04-20 20:53:17 +00002439 /* 1 */ vg_assert(mx != NULL);
2440 /* 2 */ vg_assert(mx->__m_count > 0);
2441 /* 3 */ vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
2442 /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner);
sewardj3b5d8862002-04-20 13:53:23 +00002443 } else
2444 if (vg_threads[i].status == VgTs_WaitCV) {
2445 vg_assert(cv != NULL);
2446 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002447 } else {
sewardj05553872002-04-20 20:53:17 +00002448 /* Unfortunately these don't hold true when a sighandler is
2449 running. To be fixed. */
2450 /* vg_assert(cv == NULL); */
2451 /* vg_assert(mx == NULL); */
sewardj6072c362002-04-19 14:40:57 +00002452 }
2453 }
sewardj5f07b662002-04-23 16:52:51 +00002454
2455 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
2456 if (!vg_thread_keys[i].inuse)
2457 vg_assert(vg_thread_keys[i].destructor == NULL);
2458 }
sewardj6072c362002-04-19 14:40:57 +00002459}
2460
2461
sewardje663cb92002-04-12 10:26:32 +00002462/*--------------------------------------------------------------------*/
2463/*--- end vg_scheduler.c ---*/
2464/*--------------------------------------------------------------------*/