blob: 57d687d5f806e228d794ecb5443d8b62dfb054f2 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
sewardje663cb92002-04-12 10:26:32 +000033#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
34 VG_USERREQ__DO_LEAK_CHECK */
35
sewardj77e466c2002-04-14 02:29:29 +000036/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000037
sewardj77e466c2002-04-14 02:29:29 +000038Note! This pthreads implementation is so poor as to not be
39suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000040
sewardj77e466c2002-04-14 02:29:29 +000041- Currently, when a signal is run, just the ThreadStatus.status fields
42 are saved in the signal frame, along with the CPU state. Question:
43 should I also save and restore:
44 ThreadStatus.joiner
45 ThreadStatus.waited_on_mid
46 ThreadStatus.awaken_at
47 ThreadStatus.retval
48 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000049
sewardj77e466c2002-04-14 02:29:29 +000050- Signals interrupting read/write and nanosleep: SA_RESTART settings.
51 Read/write correctly return with EINTR when SA_RESTART isn't
52 specified and they are interrupted by a signal. nanosleep just
53 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000054
sewardj75fe1892002-04-14 02:46:33 +000055- Read/write syscall starts: don't crap out when the initial
56 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000057
sewardj9a199dc2002-04-14 13:01:38 +000058- Get rid of restrictions re use of sigaltstack; they are no longer
59 needed.
60
sewardj6072c362002-04-19 14:40:57 +000061- Fix signals properly, so that each thread has its own blocking mask.
62 Currently this isn't done, and (worse?) signals are delivered to
63 Thread 1 (the root thread) regardless.
64
65 So, what's the deal with signals and mutexes? If a thread is
66 blocked on a mutex, or for a condition variable for that matter, can
67 signals still be delivered to it? This has serious consequences --
68 deadlocks, etc.
69
sewardje462e202002-04-13 04:09:07 +000070*/
sewardje663cb92002-04-12 10:26:32 +000071
72
73/* ---------------------------------------------------------------------
74 Types and globals for the scheduler.
75 ------------------------------------------------------------------ */
76
77/* type ThreadId is defined in vg_include.h. */
78
79/* struct ThreadState is defined in vg_include.h. */
80
sewardj018f7622002-05-15 21:13:39 +000081/* Globals. A statically allocated array of threads. NOTE: [0] is
82 never used, to simplify the simulation of initialisers for
sewardj6072c362002-04-19 14:40:57 +000083 LinuxThreads. */
sewardj018f7622002-05-15 21:13:39 +000084ThreadState VG_(threads)[VG_N_THREADS];
sewardje663cb92002-04-12 10:26:32 +000085
sewardj1e8cdc92002-04-18 11:37:52 +000086/* The tid of the thread currently in VG_(baseBlock). */
87static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
88
sewardje663cb92002-04-12 10:26:32 +000089
90/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
91jmp_buf VG_(scheduler_jmpbuf);
92/* ... and if so, here's the signal which caused it to do so. */
93Int VG_(longjmpd_on_signal);
94
95
96/* Machinery to keep track of which threads are waiting on which
97 fds. */
98typedef
99 struct {
100 /* The thread which made the request. */
101 ThreadId tid;
102
103 /* The next two fields describe the request. */
104 /* File descriptor waited for. -1 means this slot is not in use */
105 Int fd;
106 /* The syscall number the fd is used in. */
107 Int syscall_no;
108
109 /* False => still waiting for select to tell us the fd is ready
110 to go. True => the fd is ready, but the results have not yet
111 been delivered back to the calling thread. Once the latter
112 happens, this entire record is marked as no longer in use, by
113 making the fd field be -1. */
114 Bool ready;
115 }
116 VgWaitedOnFd;
117
118static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
119
120
sewardj5f07b662002-04-23 16:52:51 +0000121/* Keeping track of keys. */
122typedef
123 struct {
124 /* Has this key been allocated ? */
125 Bool inuse;
126 /* If .inuse==True, records the address of the associated
127 destructor, or NULL if none. */
128 void (*destructor)(void*);
129 }
130 ThreadKeyState;
131
132/* And our array of thread keys. */
133static ThreadKeyState vg_thread_keys[VG_N_THREAD_KEYS];
134
135typedef UInt ThreadKey;
136
137
sewardje663cb92002-04-12 10:26:32 +0000138/* Forwards */
sewardj5f07b662002-04-23 16:52:51 +0000139static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
140
sewardje663cb92002-04-12 10:26:32 +0000141static void do_nontrivial_clientreq ( ThreadId tid );
142
sewardj6072c362002-04-19 14:40:57 +0000143static void scheduler_sanity ( void );
144
sewardjd7fd4d22002-04-24 01:57:27 +0000145static void do_pthread_mutex_unlock ( ThreadId,
sewardj8ccc2be2002-05-10 20:26:37 +0000146 void* /* pthread_mutex_t* */ );
sewardjd7fd4d22002-04-24 01:57:27 +0000147static void do_pthread_mutex_lock ( ThreadId, Bool,
sewardj8ccc2be2002-05-10 20:26:37 +0000148 void* /* pthread_mutex_t* */ );
sewardjd7fd4d22002-04-24 01:57:27 +0000149
sewardj51c0aaf2002-04-25 01:32:10 +0000150static void do_pthread_getspecific ( ThreadId,
151 UInt /* pthread_key_t */ );
152
sewardje663cb92002-04-12 10:26:32 +0000153
154/* ---------------------------------------------------------------------
155 Helper functions for the scheduler.
156 ------------------------------------------------------------------ */
157
sewardjb48e5002002-05-13 00:16:03 +0000158__inline__
159Bool VG_(is_valid_tid) ( ThreadId tid )
sewardj604ec3c2002-04-18 22:38:41 +0000160{
161 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000162 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000163 if (tid >= VG_N_THREADS) return False;
sewardj018f7622002-05-15 21:13:39 +0000164 if (VG_(threads)[tid].status == VgTs_Empty) return False;
165 return True;
166}
167
168
169__inline__
170Bool VG_(is_valid_or_empty_tid) ( ThreadId tid )
171{
172 /* tid is unsigned, hence no < 0 test. */
173 if (tid == 0) return False;
174 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000175 return True;
176}
177
178
sewardj1e8cdc92002-04-18 11:37:52 +0000179/* For constructing error messages only: try and identify a thread
180 whose stack this address currently falls within, or return
181 VG_INVALID_THREADID if it doesn't. A small complication is dealing
182 with any currently VG_(baseBlock)-resident thread.
183*/
184ThreadId VG_(identify_stack_addr)( Addr a )
185{
186 ThreadId tid, tid_to_skip;
187
188 tid_to_skip = VG_INVALID_THREADID;
189
190 /* First check to see if there's a currently-loaded thread in
191 VG_(baseBlock). */
192 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
193 tid = vg_tid_currently_in_baseBlock;
194 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
sewardj018f7622002-05-15 21:13:39 +0000195 && a <= VG_(threads)[tid].stack_highest_word)
sewardj1e8cdc92002-04-18 11:37:52 +0000196 return tid;
197 else
198 tid_to_skip = tid;
199 }
200
sewardj6072c362002-04-19 14:40:57 +0000201 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj018f7622002-05-15 21:13:39 +0000202 if (VG_(threads)[tid].status == VgTs_Empty) continue;
sewardj1e8cdc92002-04-18 11:37:52 +0000203 if (tid == tid_to_skip) continue;
sewardj018f7622002-05-15 21:13:39 +0000204 if (VG_(threads)[tid].m_esp <= a
205 && a <= VG_(threads)[tid].stack_highest_word)
sewardj1e8cdc92002-04-18 11:37:52 +0000206 return tid;
207 }
208 return VG_INVALID_THREADID;
209}
210
211
sewardj15a43e12002-04-17 19:35:12 +0000212/* Print the scheduler status. */
213void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000214{
215 Int i;
216 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000217 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +0000218 if (VG_(threads)[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000219 VG_(printf)("\nThread %d: status = ", i);
sewardj018f7622002-05-15 21:13:39 +0000220 switch (VG_(threads)[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000221 case VgTs_Runnable: VG_(printf)("Runnable"); break;
222 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
223 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardj018f7622002-05-15 21:13:39 +0000224 VG_(threads)[i].joiner); break;
sewardj6072c362002-04-19 14:40:57 +0000225 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
226 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
227 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000228 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardjb48e5002002-05-13 00:16:03 +0000229 case VgTs_WaitSIG: VG_(printf)("WaitSIG"); break;
sewardje663cb92002-04-12 10:26:32 +0000230 default: VG_(printf)("???"); break;
231 }
sewardj3b5d8862002-04-20 13:53:23 +0000232 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
sewardj018f7622002-05-15 21:13:39 +0000233 VG_(threads)[i].associated_mx,
234 VG_(threads)[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000235 VG_(pp_ExeContext)(
sewardj018f7622002-05-15 21:13:39 +0000236 VG_(get_ExeContext)( False, VG_(threads)[i].m_eip,
237 VG_(threads)[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000238 }
239 VG_(printf)("\n");
240}
241
242static
243void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
244{
245 Int i;
246
247 vg_assert(fd != -1); /* avoid total chaos */
248
249 for (i = 0; i < VG_N_WAITING_FDS; i++)
250 if (vg_waiting_fds[i].fd == -1)
251 break;
252
253 if (i == VG_N_WAITING_FDS)
254 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
255 /*
256 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
257 tid, fd, i);
258 */
259 vg_waiting_fds[i].fd = fd;
260 vg_waiting_fds[i].tid = tid;
261 vg_waiting_fds[i].ready = False;
262 vg_waiting_fds[i].syscall_no = syscall_no;
263}
264
265
266
267static
268void print_sched_event ( ThreadId tid, Char* what )
269{
sewardj45b4b372002-04-16 22:50:32 +0000270 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000271}
272
273
274static
275void print_pthread_event ( ThreadId tid, Char* what )
276{
277 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000278}
279
280
281static
282Char* name_of_sched_event ( UInt event )
283{
284 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000285 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
286 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
287 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
288 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
289 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
290 default: return "??UNKNOWN??";
291 }
292}
293
294
295/* Create a translation of the client basic block beginning at
296 orig_addr, and add it to the translation cache & translation table.
297 This probably doesn't really belong here, but, hey ...
298*/
sewardj1e8cdc92002-04-18 11:37:52 +0000299static
300void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000301{
302 Addr trans_addr;
303 TTEntry tte;
304 Int orig_size, trans_size;
305 /* Ensure there is space to hold a translation. */
306 VG_(maybe_do_lru_pass)();
sewardj018f7622002-05-15 21:13:39 +0000307 VG_(translate)( &VG_(threads)[tid],
sewardj1e8cdc92002-04-18 11:37:52 +0000308 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000309 /* Copy data at trans_addr into the translation cache.
310 Returned pointer is to the code, not to the 4-byte
311 header. */
312 /* Since the .orig_size and .trans_size fields are
313 UShort, be paranoid. */
314 vg_assert(orig_size > 0 && orig_size < 65536);
315 vg_assert(trans_size > 0 && trans_size < 65536);
316 tte.orig_size = orig_size;
317 tte.orig_addr = orig_addr;
318 tte.trans_size = trans_size;
319 tte.trans_addr = VG_(copy_to_transcache)
320 ( trans_addr, trans_size );
321 tte.mru_epoch = VG_(current_epoch);
322 /* Free the intermediary -- was allocated by VG_(emit_code). */
323 VG_(jitfree)( (void*)trans_addr );
324 /* Add to trans tab and set back pointer. */
325 VG_(add_to_trans_tab) ( &tte );
326 /* Update stats. */
327 VG_(this_epoch_in_count) ++;
328 VG_(this_epoch_in_osize) += orig_size;
329 VG_(this_epoch_in_tsize) += trans_size;
330 VG_(overall_in_count) ++;
331 VG_(overall_in_osize) += orig_size;
332 VG_(overall_in_tsize) += trans_size;
sewardje663cb92002-04-12 10:26:32 +0000333}
334
335
336/* Allocate a completely empty ThreadState record. */
337static
338ThreadId vg_alloc_ThreadState ( void )
339{
340 Int i;
sewardj6072c362002-04-19 14:40:57 +0000341 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +0000342 if (VG_(threads)[i].status == VgTs_Empty)
sewardje663cb92002-04-12 10:26:32 +0000343 return i;
344 }
345 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
346 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
347 VG_(panic)("VG_N_THREADS is too low");
348 /*NOTREACHED*/
349}
350
351
sewardj1e8cdc92002-04-18 11:37:52 +0000352ThreadState* VG_(get_current_thread_state) ( void )
353{
sewardj018f7622002-05-15 21:13:39 +0000354 vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
355 return & VG_(threads)[vg_tid_currently_in_baseBlock];
sewardj1e8cdc92002-04-18 11:37:52 +0000356}
357
358
359ThreadId VG_(get_current_tid) ( void )
360{
sewardj018f7622002-05-15 21:13:39 +0000361 vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
sewardj1e8cdc92002-04-18 11:37:52 +0000362 return vg_tid_currently_in_baseBlock;
363}
364
365
sewardje663cb92002-04-12 10:26:32 +0000366/* Copy the saved state of a thread into VG_(baseBlock), ready for it
367 to be run. */
368__inline__
369void VG_(load_thread_state) ( ThreadId tid )
370{
371 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000372 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
373
sewardj018f7622002-05-15 21:13:39 +0000374 VG_(baseBlock)[VGOFF_(m_eax)] = VG_(threads)[tid].m_eax;
375 VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(threads)[tid].m_ebx;
376 VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(threads)[tid].m_ecx;
377 VG_(baseBlock)[VGOFF_(m_edx)] = VG_(threads)[tid].m_edx;
378 VG_(baseBlock)[VGOFF_(m_esi)] = VG_(threads)[tid].m_esi;
379 VG_(baseBlock)[VGOFF_(m_edi)] = VG_(threads)[tid].m_edi;
380 VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(threads)[tid].m_ebp;
381 VG_(baseBlock)[VGOFF_(m_esp)] = VG_(threads)[tid].m_esp;
382 VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(threads)[tid].m_eflags;
383 VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip;
sewardje663cb92002-04-12 10:26:32 +0000384
385 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
sewardj018f7622002-05-15 21:13:39 +0000386 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
sewardje663cb92002-04-12 10:26:32 +0000387
sewardj018f7622002-05-15 21:13:39 +0000388 VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
389 VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
390 VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
391 VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
392 VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
393 VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
394 VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
395 VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
396 VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000397
398 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000399}
400
401
402/* Copy the state of a thread from VG_(baseBlock), presumably after it
403 has been descheduled. For sanity-check purposes, fill the vacated
404 VG_(baseBlock) with garbage so as to make the system more likely to
405 fail quickly if we erroneously continue to poke around inside
406 VG_(baseBlock) without first doing a load_thread_state().
407*/
408__inline__
409void VG_(save_thread_state) ( ThreadId tid )
410{
411 Int i;
412 const UInt junk = 0xDEADBEEF;
413
sewardj1e8cdc92002-04-18 11:37:52 +0000414 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
415
sewardj018f7622002-05-15 21:13:39 +0000416 VG_(threads)[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
417 VG_(threads)[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
418 VG_(threads)[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
419 VG_(threads)[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
420 VG_(threads)[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
421 VG_(threads)[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
422 VG_(threads)[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
423 VG_(threads)[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
424 VG_(threads)[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
425 VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
sewardje663cb92002-04-12 10:26:32 +0000426
427 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
sewardj018f7622002-05-15 21:13:39 +0000428 VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
sewardje663cb92002-04-12 10:26:32 +0000429
sewardj018f7622002-05-15 21:13:39 +0000430 VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
431 VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
432 VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
433 VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
434 VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
435 VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
436 VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
437 VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
438 VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
sewardje663cb92002-04-12 10:26:32 +0000439
440 /* Fill it up with junk. */
441 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
442 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
443 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
444 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
445 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
446 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
447 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
448 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
449 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
450 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
451
452 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
453 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000454
455 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000456}
457
458
459/* Run the thread tid for a while, and return a VG_TRC_* value to the
460 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000461static
sewardje663cb92002-04-12 10:26:32 +0000462UInt run_thread_for_a_while ( ThreadId tid )
463{
sewardj7ccc5c22002-04-24 21:39:11 +0000464 volatile UInt trc = 0;
sewardjb48e5002002-05-13 00:16:03 +0000465 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +0000466 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000467 vg_assert(VG_(bbs_to_go) > 0);
468
sewardj671ff542002-05-07 09:25:30 +0000469 VGP_PUSHCC(VgpRun);
sewardje663cb92002-04-12 10:26:32 +0000470 VG_(load_thread_state) ( tid );
471 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
472 /* try this ... */
473 trc = VG_(run_innerloop)();
474 /* We get here if the client didn't take a fault. */
475 } else {
476 /* We get here if the client took a fault, which caused our
477 signal handler to longjmp. */
478 vg_assert(trc == 0);
479 trc = VG_TRC_UNRESUMABLE_SIGNAL;
480 }
481 VG_(save_thread_state) ( tid );
sewardj671ff542002-05-07 09:25:30 +0000482 VGP_POPCC;
sewardje663cb92002-04-12 10:26:32 +0000483 return trc;
484}
485
486
487/* Increment the LRU epoch counter. */
488static
489void increment_epoch ( void )
490{
491 VG_(current_epoch)++;
492 if (VG_(clo_verbosity) > 2) {
493 UInt tt_used, tc_used;
494 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
495 VG_(message)(Vg_UserMsg,
496 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
497 VG_(bbs_done),
498 VG_(this_epoch_in_count),
499 VG_(this_epoch_in_osize),
500 VG_(this_epoch_in_tsize),
501 VG_(this_epoch_out_count),
502 VG_(this_epoch_out_osize),
503 VG_(this_epoch_out_tsize),
504 tt_used, tc_used
505 );
506 }
507 VG_(this_epoch_in_count) = 0;
508 VG_(this_epoch_in_osize) = 0;
509 VG_(this_epoch_in_tsize) = 0;
510 VG_(this_epoch_out_count) = 0;
511 VG_(this_epoch_out_osize) = 0;
512 VG_(this_epoch_out_tsize) = 0;
513}
514
515
516/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000517 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000518 caller takes care to park the client's state is parked in
519 VG_(baseBlock).
520*/
521void VG_(scheduler_init) ( void )
522{
523 Int i;
524 Addr startup_esp;
525 ThreadId tid_main;
526
527 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
sewardja1679dd2002-05-10 22:31:40 +0000528
529 if (VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_1)
530 || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_2)) {
531 /* Jolly good! */
532 } else {
533 VG_(printf)("%%esp at startup = %p is not near %p or %p; aborting\n",
534 (void*)startup_esp,
535 (void*)VG_STARTUP_STACK_BASE_1,
536 (void*)VG_STARTUP_STACK_BASE_2 );
sewardje663cb92002-04-12 10:26:32 +0000537 VG_(panic)("unexpected %esp at startup");
538 }
539
sewardj6072c362002-04-19 14:40:57 +0000540 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +0000541 VG_(threads)[i].status = VgTs_Empty;
542 VG_(threads)[i].stack_size = 0;
543 VG_(threads)[i].stack_base = (Addr)NULL;
544 VG_(threads)[i].tid = i;
545 VG_(ksigemptyset)(&VG_(threads)[i].sig_mask);
546 VG_(ksigemptyset)(&VG_(threads)[i].sigs_waited_for);
sewardje663cb92002-04-12 10:26:32 +0000547 }
548
549 for (i = 0; i < VG_N_WAITING_FDS; i++)
550 vg_waiting_fds[i].fd = -1; /* not in use */
551
sewardj5f07b662002-04-23 16:52:51 +0000552 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
553 vg_thread_keys[i].inuse = False;
554 vg_thread_keys[i].destructor = NULL;
555 }
556
sewardje663cb92002-04-12 10:26:32 +0000557 /* Assert this is thread zero, which has certain magic
558 properties. */
559 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000560 vg_assert(tid_main == 1);
sewardje663cb92002-04-12 10:26:32 +0000561
sewardj018f7622002-05-15 21:13:39 +0000562 VG_(threads)[tid_main].status = VgTs_Runnable;
563 VG_(threads)[tid_main].joiner = VG_INVALID_THREADID;
564 VG_(threads)[tid_main].associated_mx = NULL;
565 VG_(threads)[tid_main].associated_cv = NULL;
566 VG_(threads)[tid_main].retval = NULL; /* not important */
sewardj5f07b662002-04-23 16:52:51 +0000567 for (i = 0; i < VG_N_THREAD_KEYS; i++)
sewardj018f7622002-05-15 21:13:39 +0000568 VG_(threads)[tid_main].specifics[i] = NULL;
sewardje663cb92002-04-12 10:26:32 +0000569
570 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000571 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000572 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000573
sewardj018f7622002-05-15 21:13:39 +0000574 VG_(threads)[tid_main].stack_highest_word
575 = VG_(threads)[tid_main].m_esp /* -4 ??? */;
sewardjbf290b92002-05-01 02:28:01 +0000576
sewardj1e8cdc92002-04-18 11:37:52 +0000577 /* So now ... */
578 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000579}
580
581
582/* What if fd isn't a valid fd? */
583static
584void set_fd_nonblocking ( Int fd )
585{
586 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
587 vg_assert(!VG_(is_kerror)(res));
588 res |= VKI_O_NONBLOCK;
589 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
590 vg_assert(!VG_(is_kerror)(res));
591}
592
593static
594void set_fd_blocking ( Int fd )
595{
596 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
597 vg_assert(!VG_(is_kerror)(res));
598 res &= ~VKI_O_NONBLOCK;
599 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
600 vg_assert(!VG_(is_kerror)(res));
601}
602
603static
604Bool fd_is_blockful ( Int fd )
605{
606 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
607 vg_assert(!VG_(is_kerror)(res));
608 return (res & VKI_O_NONBLOCK) ? False : True;
609}
610
611
612
sewardjd7fd4d22002-04-24 01:57:27 +0000613/* Possibly do a for tid. Return values are:
sewardje663cb92002-04-12 10:26:32 +0000614
sewardjd7fd4d22002-04-24 01:57:27 +0000615 True = request done. Thread may or may not be still runnable;
616 caller must check. If it is still runnable, the result will be in
617 the thread's %EDX as expected.
618
619 False = request not done. A more capable but slower mechanism will
620 deal with it.
sewardje663cb92002-04-12 10:26:32 +0000621*/
sewardjd7fd4d22002-04-24 01:57:27 +0000622static
sewardje663cb92002-04-12 10:26:32 +0000623Bool maybe_do_trivial_clientreq ( ThreadId tid )
624{
625# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000626 { tst->m_edx = (vvv); \
sewardjc3bd5f52002-05-01 03:24:23 +0000627 tst->sh_edx = VGM_WORD_VALID; \
sewardje663cb92002-04-12 10:26:32 +0000628 return True; \
629 }
630
sewardj018f7622002-05-15 21:13:39 +0000631 ThreadState* tst = &VG_(threads)[tid];
sewardj8c824512002-04-14 04:16:48 +0000632 UInt* arg = (UInt*)(tst->m_eax);
633 UInt req_no = arg[0];
634
sewardj8ccc2be2002-05-10 20:26:37 +0000635 /* VG_(printf)("req no = 0x%x\n", req_no); */
sewardje663cb92002-04-12 10:26:32 +0000636 switch (req_no) {
637 case VG_USERREQ__MALLOC:
638 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000639 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000640 );
641 case VG_USERREQ__BUILTIN_NEW:
642 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000643 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000644 );
645 case VG_USERREQ__BUILTIN_VEC_NEW:
646 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000647 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000648 );
649 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000650 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000651 SIMPLE_RETURN(0); /* irrelevant */
652 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000653 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000654 SIMPLE_RETURN(0); /* irrelevant */
655 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000656 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000657 SIMPLE_RETURN(0); /* irrelevant */
658 case VG_USERREQ__CALLOC:
659 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000660 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000661 );
662 case VG_USERREQ__REALLOC:
663 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000664 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000665 );
666 case VG_USERREQ__MEMALIGN:
667 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000668 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000669 );
sewardj9650c992002-04-16 03:44:31 +0000670
sewardj5f07b662002-04-23 16:52:51 +0000671 /* These are heavily used -- or at least we want them to be
672 cheap. */
sewardj9650c992002-04-16 03:44:31 +0000673 case VG_USERREQ__PTHREAD_GET_THREADID:
674 SIMPLE_RETURN(tid);
675 case VG_USERREQ__RUNNING_ON_VALGRIND:
676 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000677 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
678 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj5f07b662002-04-23 16:52:51 +0000679 case VG_USERREQ__READ_MILLISECOND_TIMER:
680 SIMPLE_RETURN(VG_(read_millisecond_timer)());
sewardj9650c992002-04-16 03:44:31 +0000681
sewardjd7fd4d22002-04-24 01:57:27 +0000682 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
683 do_pthread_mutex_unlock( tid, (void *)(arg[1]) );
684 return True;
685
686 /* This may make thread tid non-runnable, but the scheduler
687 checks for that on return from this function. */
688 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
689 do_pthread_mutex_lock( tid, False, (void *)(arg[1]) );
690 return True;
691
sewardj14e03422002-04-24 19:51:31 +0000692 case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK:
693 do_pthread_mutex_lock( tid, True, (void *)(arg[1]) );
694 return True;
695
sewardj51c0aaf2002-04-25 01:32:10 +0000696 case VG_USERREQ__PTHREAD_GETSPECIFIC:
697 do_pthread_getspecific ( tid, (UInt)(arg[1]) );
698 return True;
699
sewardje663cb92002-04-12 10:26:32 +0000700 default:
701 /* Too hard; wimp out. */
702 return False;
703 }
704# undef SIMPLE_RETURN
705}
706
707
sewardj6072c362002-04-19 14:40:57 +0000708/* vthread tid is returning from a signal handler; modify its
709 stack/regs accordingly. */
sewardj1ffa8da2002-04-26 22:47:57 +0000710
711/* [Helper fn for handle_signal_return] tid, assumed to be in WaitFD
712 for read or write, has been interrupted by a signal. Find and
713 clear the relevant vg_waiting_fd[] entry. Most of the code in this
714 procedure is total paranoia, if you look closely. */
715static
716void cleanup_waiting_fd_table ( ThreadId tid )
717{
718 Int i, waiters;
719
sewardjb48e5002002-05-13 00:16:03 +0000720 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +0000721 vg_assert(VG_(threads)[tid].status == VgTs_WaitFD);
722 vg_assert(VG_(threads)[tid].m_eax == __NR_read
723 || VG_(threads)[tid].m_eax == __NR_write);
sewardj1ffa8da2002-04-26 22:47:57 +0000724
725 /* Excessively paranoidly ... find the fd this op was waiting
726 for, and mark it as not being waited on. */
727 waiters = 0;
728 for (i = 0; i < VG_N_WAITING_FDS; i++) {
729 if (vg_waiting_fds[i].tid == tid) {
730 waiters++;
sewardj018f7622002-05-15 21:13:39 +0000731 vg_assert(vg_waiting_fds[i].syscall_no == VG_(threads)[tid].m_eax);
sewardj1ffa8da2002-04-26 22:47:57 +0000732 }
733 }
734 vg_assert(waiters == 1);
735 for (i = 0; i < VG_N_WAITING_FDS; i++)
736 if (vg_waiting_fds[i].tid == tid)
737 break;
738 vg_assert(i < VG_N_WAITING_FDS);
739 vg_assert(vg_waiting_fds[i].fd != -1);
740 vg_waiting_fds[i].fd = -1; /* not in use */
741}
742
743
sewardj6072c362002-04-19 14:40:57 +0000744static
745void handle_signal_return ( ThreadId tid )
746{
747 Char msg_buf[100];
748 Bool restart_blocked_syscalls;
749
sewardjb48e5002002-05-13 00:16:03 +0000750 vg_assert(VG_(is_valid_tid)(tid));
sewardj6072c362002-04-19 14:40:57 +0000751
752 restart_blocked_syscalls = VG_(signal_returns)(tid);
753
754 if (restart_blocked_syscalls)
755 /* Easy; we don't have to do anything. */
756 return;
757
sewardj018f7622002-05-15 21:13:39 +0000758 if (VG_(threads)[tid].status == VgTs_WaitFD
759 && (VG_(threads)[tid].m_eax == __NR_read
760 || VG_(threads)[tid].m_eax == __NR_write)) {
sewardj6072c362002-04-19 14:40:57 +0000761 /* read() or write() interrupted. Force a return with EINTR. */
sewardj1ffa8da2002-04-26 22:47:57 +0000762 cleanup_waiting_fd_table(tid);
sewardj018f7622002-05-15 21:13:39 +0000763 VG_(threads)[tid].m_eax = -VKI_EINTR;
764 VG_(threads)[tid].status = VgTs_Runnable;
sewardj1ffa8da2002-04-26 22:47:57 +0000765
sewardj6072c362002-04-19 14:40:57 +0000766 if (VG_(clo_trace_sched)) {
767 VG_(sprintf)(msg_buf,
768 "read() / write() interrupted by signal; return EINTR" );
769 print_sched_event(tid, msg_buf);
770 }
771 return;
772 }
773
sewardj018f7622002-05-15 21:13:39 +0000774 if (VG_(threads)[tid].status == VgTs_WaitFD
775 && VG_(threads)[tid].m_eax == __NR_nanosleep) {
sewardj6072c362002-04-19 14:40:57 +0000776 /* We interrupted a nanosleep(). The right thing to do is to
777 write the unused time to nanosleep's second param and return
778 EINTR, but I'm too lazy for that. */
779 return;
780 }
781
sewardj018f7622002-05-15 21:13:39 +0000782 if (VG_(threads)[tid].status == VgTs_WaitFD) {
sewardj1ffa8da2002-04-26 22:47:57 +0000783 VG_(panic)("handle_signal_return: unknown interrupted syscall");
784 }
785
sewardj6072c362002-04-19 14:40:57 +0000786 /* All other cases? Just return. */
787}
788
789
sewardje663cb92002-04-12 10:26:32 +0000790static
791void sched_do_syscall ( ThreadId tid )
792{
793 UInt saved_eax;
794 UInt res, syscall_no;
795 UInt fd;
sewardje663cb92002-04-12 10:26:32 +0000796 Bool orig_fd_blockness;
797 Char msg_buf[100];
798
sewardjb48e5002002-05-13 00:16:03 +0000799 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +0000800 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000801
sewardj018f7622002-05-15 21:13:39 +0000802 syscall_no = VG_(threads)[tid].m_eax; /* syscall number */
sewardje663cb92002-04-12 10:26:32 +0000803
804 if (syscall_no == __NR_nanosleep) {
sewardj5f07b662002-04-23 16:52:51 +0000805 UInt t_now, t_awaken;
sewardje663cb92002-04-12 10:26:32 +0000806 struct vki_timespec* req;
sewardj018f7622002-05-15 21:13:39 +0000807 req = (struct vki_timespec*)VG_(threads)[tid].m_ebx; /* arg1 */
sewardj5f07b662002-04-23 16:52:51 +0000808 t_now = VG_(read_millisecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000809 t_awaken
810 = t_now
sewardj5f07b662002-04-23 16:52:51 +0000811 + (UInt)1000ULL * (UInt)(req->tv_sec)
812 + (UInt)(req->tv_nsec) / 1000000;
sewardj018f7622002-05-15 21:13:39 +0000813 VG_(threads)[tid].status = VgTs_Sleeping;
814 VG_(threads)[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000815 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000816 VG_(sprintf)(msg_buf, "at %d: nanosleep for %d",
sewardje663cb92002-04-12 10:26:32 +0000817 t_now, t_awaken-t_now);
818 print_sched_event(tid, msg_buf);
819 }
820 /* Force the scheduler to run something else for a while. */
821 return;
822 }
823
sewardjaec22c02002-04-29 01:58:08 +0000824 if (syscall_no != __NR_read && syscall_no != __NR_write) {
sewardje663cb92002-04-12 10:26:32 +0000825 /* We think it's non-blocking. Just do it in the normal way. */
826 VG_(perform_assumed_nonblocking_syscall)(tid);
827 /* The thread is still runnable. */
828 return;
829 }
830
sewardje663cb92002-04-12 10:26:32 +0000831 /* Set the fd to nonblocking, and do the syscall, which will return
832 immediately, in order to lodge a request with the Linux kernel.
833 We later poll for I/O completion using select(). */
834
sewardj018f7622002-05-15 21:13:39 +0000835 fd = VG_(threads)[tid].m_ebx /* arg1 */;
sewardje663cb92002-04-12 10:26:32 +0000836 orig_fd_blockness = fd_is_blockful(fd);
837 set_fd_nonblocking(fd);
838 vg_assert(!fd_is_blockful(fd));
839 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
840
841 /* This trashes the thread's %eax; we have to preserve it. */
sewardj018f7622002-05-15 21:13:39 +0000842 saved_eax = VG_(threads)[tid].m_eax;
sewardje663cb92002-04-12 10:26:32 +0000843 KERNEL_DO_SYSCALL(tid,res);
844
845 /* Restore original blockfulness of the fd. */
846 if (orig_fd_blockness)
847 set_fd_blocking(fd);
848 else
849 set_fd_nonblocking(fd);
850
sewardjaec22c02002-04-29 01:58:08 +0000851 if (res != -VKI_EWOULDBLOCK || !orig_fd_blockness) {
852 /* Finish off in the normal way. Don't restore %EAX, since that
853 now (correctly) holds the result of the call. We get here if either:
854 1. The call didn't block, or
855 2. The fd was already in nonblocking mode before we started to
856 mess with it. In this case, we're not expecting to handle
857 the I/O completion -- the client is. So don't file a
858 completion-wait entry.
859 */
sewardje663cb92002-04-12 10:26:32 +0000860 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
861 /* We're still runnable. */
sewardj018f7622002-05-15 21:13:39 +0000862 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000863
864 } else {
865
sewardjaec22c02002-04-29 01:58:08 +0000866 vg_assert(res == -VKI_EWOULDBLOCK && orig_fd_blockness);
867
sewardje663cb92002-04-12 10:26:32 +0000868 /* It would have blocked. First, restore %EAX to what it was
869 before our speculative call. */
sewardj018f7622002-05-15 21:13:39 +0000870 VG_(threads)[tid].m_eax = saved_eax;
sewardje663cb92002-04-12 10:26:32 +0000871 /* Put this fd in a table of fds on which we are waiting for
872 completion. The arguments for select() later are constructed
873 from this table. */
874 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
875 /* Deschedule thread until an I/O completion happens. */
sewardj018f7622002-05-15 21:13:39 +0000876 VG_(threads)[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000877 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000878 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
879 print_sched_event(tid, msg_buf);
880 }
881
882 }
883}
884
885
886/* Find out which of the fds in vg_waiting_fds are now ready to go, by
887 making enquiries with select(), and mark them as ready. We have to
888 wait for the requesting threads to fall into the the WaitFD state
889 before we can actually finally deliver the results, so this
890 procedure doesn't do that; complete_blocked_syscalls() does it.
891
892 It might seem odd that a thread which has done a blocking syscall
893 is not in WaitFD state; the way this can happen is if it initially
894 becomes WaitFD, but then a signal is delivered to it, so it becomes
895 Runnable for a while. In this case we have to wait for the
896 sighandler to return, whereupon the WaitFD state is resumed, and
897 only at that point can the I/O result be delivered to it. However,
898 this point may be long after the fd is actually ready.
899
900 So, poll_for_ready_fds() merely detects fds which are ready.
901 complete_blocked_syscalls() does the second half of the trick,
902 possibly much later: it delivers the results from ready fds to
903 threads in WaitFD state.
904*/
sewardj9a199dc2002-04-14 13:01:38 +0000905static
sewardje663cb92002-04-12 10:26:32 +0000906void poll_for_ready_fds ( void )
907{
908 vki_ksigset_t saved_procmask;
909 vki_fd_set readfds;
910 vki_fd_set writefds;
911 vki_fd_set exceptfds;
912 struct vki_timeval timeout;
913 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
914 ThreadId tid;
915 Bool rd_ok, wr_ok, ex_ok;
916 Char msg_buf[100];
917
sewardje462e202002-04-13 04:09:07 +0000918 struct vki_timespec* rem;
sewardj5f07b662002-04-23 16:52:51 +0000919 UInt t_now;
sewardje462e202002-04-13 04:09:07 +0000920
sewardje663cb92002-04-12 10:26:32 +0000921 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000922 for (tid = 1; tid < VG_N_THREADS; tid++)
sewardj018f7622002-05-15 21:13:39 +0000923 if (VG_(threads)[tid].status == VgTs_Sleeping)
sewardj853f55d2002-04-26 00:27:53 +0000924 break;
sewardj6072c362002-04-19 14:40:57 +0000925
sewardj5f07b662002-04-23 16:52:51 +0000926 /* Avoid pointless calls to VG_(read_millisecond_timer). */
sewardj6072c362002-04-19 14:40:57 +0000927 if (tid < VG_N_THREADS) {
sewardj5f07b662002-04-23 16:52:51 +0000928 t_now = VG_(read_millisecond_timer)();
sewardj6072c362002-04-19 14:40:57 +0000929 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj018f7622002-05-15 21:13:39 +0000930 if (VG_(threads)[tid].status != VgTs_Sleeping)
sewardj6072c362002-04-19 14:40:57 +0000931 continue;
sewardj018f7622002-05-15 21:13:39 +0000932 if (t_now >= VG_(threads)[tid].awaken_at) {
sewardj6072c362002-04-19 14:40:57 +0000933 /* Resume this thread. Set to zero the remaining-time
934 (second) arg of nanosleep, since it's used up all its
935 time. */
sewardj018f7622002-05-15 21:13:39 +0000936 vg_assert(VG_(threads)[tid].m_eax == __NR_nanosleep);
937 rem = (struct vki_timespec *)VG_(threads)[tid].m_ecx; /* arg2 */
sewardj6072c362002-04-19 14:40:57 +0000938 if (rem != NULL) {
939 rem->tv_sec = 0;
940 rem->tv_nsec = 0;
941 }
942 /* Make the syscall return 0 (success). */
sewardj018f7622002-05-15 21:13:39 +0000943 VG_(threads)[tid].m_eax = 0;
sewardj6072c362002-04-19 14:40:57 +0000944 /* Reschedule this thread. */
sewardj018f7622002-05-15 21:13:39 +0000945 VG_(threads)[tid].status = VgTs_Runnable;
sewardj6072c362002-04-19 14:40:57 +0000946 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000947 VG_(sprintf)(msg_buf, "at %d: nanosleep done",
sewardj6072c362002-04-19 14:40:57 +0000948 t_now);
949 print_sched_event(tid, msg_buf);
950 }
sewardje663cb92002-04-12 10:26:32 +0000951 }
952 }
953 }
sewardje663cb92002-04-12 10:26:32 +0000954
sewardje462e202002-04-13 04:09:07 +0000955 /* And look for threads waiting on file descriptors which are now
956 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000957 timeout.tv_sec = 0;
958 timeout.tv_usec = 0;
959
960 VKI_FD_ZERO(&readfds);
961 VKI_FD_ZERO(&writefds);
962 VKI_FD_ZERO(&exceptfds);
963 fd_max = -1;
964 for (i = 0; i < VG_N_WAITING_FDS; i++) {
965 if (vg_waiting_fds[i].fd == -1 /* not in use */)
966 continue;
967 if (vg_waiting_fds[i].ready /* already ready? */)
968 continue;
969 fd = vg_waiting_fds[i].fd;
970 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000971 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000972 if (fd > fd_max)
973 fd_max = fd;
974 tid = vg_waiting_fds[i].tid;
sewardjb48e5002002-05-13 00:16:03 +0000975 vg_assert(VG_(is_valid_tid)(tid));
sewardje663cb92002-04-12 10:26:32 +0000976 syscall_no = vg_waiting_fds[i].syscall_no;
977 switch (syscall_no) {
sewardj3984b852002-05-12 03:00:17 +0000978 case __NR_read:
979 /* In order to catch timeout events on fds which are
980 readable and which have been ioctl(TCSETA)'d with a
981 VTIMEout, we appear to need to ask if the fd is
982 writable, for some reason. Ask me not why. Since this
983 is strange and potentially troublesome we only do it if
984 the user asks specially. */
sewardj8d365b52002-05-12 10:52:16 +0000985 if (VG_(strstr)(VG_(clo_weird_hacks), "ioctl-VTIME") != NULL)
sewardj3984b852002-05-12 03:00:17 +0000986 VKI_FD_SET(fd, &writefds);
sewardje663cb92002-04-12 10:26:32 +0000987 VKI_FD_SET(fd, &readfds); break;
988 case __NR_write:
989 VKI_FD_SET(fd, &writefds); break;
990 default:
991 VG_(panic)("poll_for_ready_fds: unexpected syscall");
992 /*NOTREACHED*/
993 break;
994 }
995 }
996
sewardje462e202002-04-13 04:09:07 +0000997 /* Short cut: if no fds are waiting, give up now. */
998 if (fd_max == -1)
999 return;
1000
sewardje663cb92002-04-12 10:26:32 +00001001 /* BLOCK ALL SIGNALS. We don't want the complication of select()
1002 getting interrupted. */
1003 VG_(block_all_host_signals)( &saved_procmask );
1004
1005 n_ready = VG_(select)
1006 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
1007 if (VG_(is_kerror)(n_ready)) {
1008 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
1009 VG_(panic)("poll_for_ready_fds: select failed?!");
1010 /*NOTREACHED*/
1011 }
1012
1013 /* UNBLOCK ALL SIGNALS */
sewardj018f7622002-05-15 21:13:39 +00001014 VG_(restore_all_host_signals)( &saved_procmask );
sewardje663cb92002-04-12 10:26:32 +00001015
1016 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
1017
1018 if (n_ready == 0)
1019 return;
1020
1021 /* Inspect all the fds we know about, and handle any completions that
1022 have happened. */
1023 /*
1024 VG_(printf)("\n\n");
1025 for (fd = 0; fd < 100; fd++)
1026 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
1027 VG_(printf)("X"); } else { VG_(printf)("."); };
1028 VG_(printf)("\n\nfd_max = %d\n", fd_max);
1029 */
1030
1031 for (fd = 0; fd <= fd_max; fd++) {
1032 rd_ok = VKI_FD_ISSET(fd, &readfds);
1033 wr_ok = VKI_FD_ISSET(fd, &writefds);
1034 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
1035
1036 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
1037 if (n_ok == 0)
1038 continue;
1039 if (n_ok > 1) {
1040 VG_(printf)("offending fd = %d\n", fd);
1041 VG_(panic)("poll_for_ready_fds: multiple events on fd");
1042 }
1043
1044 /* An I/O event completed for fd. Find the thread which
1045 requested this. */
1046 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1047 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1048 continue;
1049 if (vg_waiting_fds[i].fd == fd)
1050 break;
1051 }
1052
1053 /* And a bit more paranoia ... */
1054 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
1055
1056 /* Mark the fd as ready. */
1057 vg_assert(! vg_waiting_fds[i].ready);
1058 vg_waiting_fds[i].ready = True;
1059 }
1060}
1061
1062
1063/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +00001064static
sewardje663cb92002-04-12 10:26:32 +00001065void complete_blocked_syscalls ( void )
1066{
1067 Int fd, i, res, syscall_no;
1068 ThreadId tid;
1069 Char msg_buf[100];
1070
1071 /* Inspect all the outstanding fds we know about. */
1072
1073 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1074 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1075 continue;
1076 if (! vg_waiting_fds[i].ready)
1077 continue;
1078
1079 fd = vg_waiting_fds[i].fd;
1080 tid = vg_waiting_fds[i].tid;
sewardjb48e5002002-05-13 00:16:03 +00001081 vg_assert(VG_(is_valid_tid)(tid));
sewardje663cb92002-04-12 10:26:32 +00001082
1083 /* The thread actually has to be waiting for the I/O event it
1084 requested before we can deliver the result! */
sewardj018f7622002-05-15 21:13:39 +00001085 if (VG_(threads)[tid].status != VgTs_WaitFD)
sewardje663cb92002-04-12 10:26:32 +00001086 continue;
1087
1088 /* Ok, actually do it! We can safely use %EAX as the syscall
1089 number, because the speculative call made by
1090 sched_do_syscall() doesn't change %EAX in the case where the
1091 call would have blocked. */
1092
1093 syscall_no = vg_waiting_fds[i].syscall_no;
sewardj018f7622002-05-15 21:13:39 +00001094 vg_assert(syscall_no == VG_(threads)[tid].m_eax);
sewardje663cb92002-04-12 10:26:32 +00001095 KERNEL_DO_SYSCALL(tid,res);
1096 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
1097
1098 /* Reschedule. */
sewardj018f7622002-05-15 21:13:39 +00001099 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001100 /* Mark slot as no longer in use. */
1101 vg_waiting_fds[i].fd = -1;
1102 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +00001103 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001104 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1105 print_sched_event(tid, msg_buf);
1106 }
1107 }
1108}
1109
1110
1111static
sewardj5f07b662002-04-23 16:52:51 +00001112void check_for_pthread_cond_timedwait ( void )
1113{
sewardj51c0aaf2002-04-25 01:32:10 +00001114 Int i, now;
sewardj5f07b662002-04-23 16:52:51 +00001115 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00001116 if (VG_(threads)[i].status != VgTs_WaitCV)
sewardj5f07b662002-04-23 16:52:51 +00001117 continue;
sewardj018f7622002-05-15 21:13:39 +00001118 if (VG_(threads)[i].awaken_at == 0xFFFFFFFF /* no timeout */)
sewardj5f07b662002-04-23 16:52:51 +00001119 continue;
sewardj51c0aaf2002-04-25 01:32:10 +00001120 now = VG_(read_millisecond_timer)();
sewardj018f7622002-05-15 21:13:39 +00001121 if (now >= VG_(threads)[i].awaken_at) {
sewardj5f07b662002-04-23 16:52:51 +00001122 do_pthread_cond_timedwait_TIMEOUT(i);
sewardj51c0aaf2002-04-25 01:32:10 +00001123 }
sewardj5f07b662002-04-23 16:52:51 +00001124 }
1125}
1126
1127
1128static
sewardje663cb92002-04-12 10:26:32 +00001129void nanosleep_for_a_while ( void )
1130{
1131 Int res;
1132 struct vki_timespec req;
1133 struct vki_timespec rem;
1134 req.tv_sec = 0;
sewardj51c0aaf2002-04-25 01:32:10 +00001135 req.tv_nsec = 20 * 1000 * 1000;
sewardje663cb92002-04-12 10:26:32 +00001136 res = VG_(nanosleep)( &req, &rem );
sewardj5f07b662002-04-23 16:52:51 +00001137 vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
sewardje663cb92002-04-12 10:26:32 +00001138}
1139
1140
1141/* ---------------------------------------------------------------------
1142 The scheduler proper.
1143 ------------------------------------------------------------------ */
1144
1145/* Run user-space threads until either
1146 * Deadlock occurs
1147 * One thread asks to shutdown Valgrind
1148 * The specified number of basic blocks has gone by.
1149*/
1150VgSchedReturnCode VG_(scheduler) ( void )
1151{
1152 ThreadId tid, tid_next;
1153 UInt trc;
1154 UInt dispatch_ctr_SAVED;
sewardj51c0aaf2002-04-25 01:32:10 +00001155 Int request_code, done_this_time, n_in_bounded_wait;
sewardje663cb92002-04-12 10:26:32 +00001156 Char msg_buf[100];
1157 Addr trans_addr;
sewardj14e03422002-04-24 19:51:31 +00001158 Bool sigs_delivered;
sewardje663cb92002-04-12 10:26:32 +00001159
1160 /* For the LRU structures, records when the epoch began. */
1161 ULong lru_epoch_started_at = 0;
1162
1163 /* Start with the root thread. tid in general indicates the
1164 currently runnable/just-finished-running thread. */
sewardj7e87e382002-05-03 19:09:05 +00001165 VG_(last_run_tid) = tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001166
1167 /* This is the top level scheduler loop. It falls into three
1168 phases. */
1169 while (True) {
1170
sewardj6072c362002-04-19 14:40:57 +00001171 /* ======================= Phase 0 of 3 =======================
1172 Be paranoid. Always a good idea. */
sewardjd7fd4d22002-04-24 01:57:27 +00001173 stage1:
sewardj6072c362002-04-19 14:40:57 +00001174 scheduler_sanity();
sewardj0c3b53f2002-05-01 01:58:35 +00001175 VG_(do_sanity_checks)( False );
sewardj6072c362002-04-19 14:40:57 +00001176
sewardje663cb92002-04-12 10:26:32 +00001177 /* ======================= Phase 1 of 3 =======================
1178 Handle I/O completions and signals. This may change the
1179 status of various threads. Then select a new thread to run,
1180 or declare deadlock, or sleep if there are no runnable
1181 threads but some are blocked on I/O. */
1182
1183 /* Age the LRU structures if an epoch has been completed. */
1184 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1185 lru_epoch_started_at = VG_(bbs_done);
1186 increment_epoch();
1187 }
1188
1189 /* Was a debug-stop requested? */
1190 if (VG_(bbs_to_go) == 0)
1191 goto debug_stop;
1192
1193 /* Do the following loop until a runnable thread is found, or
1194 deadlock is detected. */
1195 while (True) {
1196
1197 /* For stats purposes only. */
1198 VG_(num_scheduling_events_MAJOR) ++;
1199
1200 /* See if any I/O operations which we were waiting for have
1201 completed, and, if so, make runnable the relevant waiting
1202 threads. */
1203 poll_for_ready_fds();
1204 complete_blocked_syscalls();
sewardj5f07b662002-04-23 16:52:51 +00001205 check_for_pthread_cond_timedwait();
sewardje663cb92002-04-12 10:26:32 +00001206
1207 /* See if there are any signals which need to be delivered. If
1208 so, choose thread(s) to deliver them to, and build signal
1209 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001210
1211 /* Be careful about delivering signals to a thread waiting
1212 for a mutex. In particular, when the handler is running,
1213 that thread is temporarily apparently-not-waiting for the
1214 mutex, so if it is unlocked by another thread whilst the
1215 handler is running, this thread is not informed. When the
1216 handler returns, the thread resumes waiting on the mutex,
1217 even if, as a result, it has missed the unlocking of it.
1218 Potential deadlock. This sounds all very strange, but the
1219 POSIX standard appears to require this behaviour. */
sewardjb48e5002002-05-13 00:16:03 +00001220 sigs_delivered = VG_(deliver_signals)();
sewardj14e03422002-04-24 19:51:31 +00001221 if (sigs_delivered)
sewardj0c3b53f2002-05-01 01:58:35 +00001222 VG_(do_sanity_checks)( False );
sewardje663cb92002-04-12 10:26:32 +00001223
1224 /* Try and find a thread (tid) to run. */
1225 tid_next = tid;
sewardj51c0aaf2002-04-25 01:32:10 +00001226 n_in_bounded_wait = 0;
sewardje663cb92002-04-12 10:26:32 +00001227 while (True) {
1228 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001229 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj018f7622002-05-15 21:13:39 +00001230 if (VG_(threads)[tid_next].status == VgTs_WaitFD
1231 || VG_(threads)[tid_next].status == VgTs_Sleeping
1232 || VG_(threads)[tid_next].status == VgTs_WaitSIG
1233 || (VG_(threads)[tid_next].status == VgTs_WaitCV
1234 && VG_(threads)[tid_next].awaken_at != 0xFFFFFFFF))
sewardj51c0aaf2002-04-25 01:32:10 +00001235 n_in_bounded_wait ++;
sewardj018f7622002-05-15 21:13:39 +00001236 if (VG_(threads)[tid_next].status == VgTs_Runnable)
sewardje663cb92002-04-12 10:26:32 +00001237 break; /* We can run this one. */
1238 if (tid_next == tid)
1239 break; /* been all the way round */
1240 }
1241 tid = tid_next;
1242
sewardj018f7622002-05-15 21:13:39 +00001243 if (VG_(threads)[tid].status == VgTs_Runnable) {
sewardje663cb92002-04-12 10:26:32 +00001244 /* Found a suitable candidate. Fall out of this loop, so
1245 we can advance to stage 2 of the scheduler: actually
1246 running the thread. */
1247 break;
1248 }
1249
1250 /* We didn't find a runnable thread. Now what? */
sewardj51c0aaf2002-04-25 01:32:10 +00001251 if (n_in_bounded_wait == 0) {
sewardj54cacf02002-04-12 23:24:59 +00001252 /* No runnable threads and no prospect of any appearing
1253 even if we wait for an arbitrary length of time. In
1254 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001255 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001256 return VgSrc_Deadlock;
1257 }
1258
1259 /* At least one thread is in a fd-wait state. Delay for a
1260 while, and go round again, in the hope that eventually a
1261 thread becomes runnable. */
1262 nanosleep_for_a_while();
sewardj7e87e382002-05-03 19:09:05 +00001263 /* pp_sched_status(); */
sewardjb48e5002002-05-13 00:16:03 +00001264 /* VG_(printf)("."); */
sewardje663cb92002-04-12 10:26:32 +00001265 }
1266
1267
1268 /* ======================= Phase 2 of 3 =======================
1269 Wahey! We've finally decided that thread tid is runnable, so
1270 we now do that. Run it for as much of a quanta as possible.
1271 Trivial requests are handled and the thread continues. The
1272 aim is not to do too many of Phase 1 since it is expensive. */
1273
1274 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001275 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001276
1277 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1278 that it decrements the counter before testing it for zero, so
1279 that if VG_(dispatch_ctr) is set to N you get at most N-1
1280 iterations. Also this means that VG_(dispatch_ctr) must
1281 exceed zero before entering the innerloop. Also also, the
1282 decrement is done before the bb is actually run, so you
1283 always get at least one decrement even if nothing happens.
1284 */
1285 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1286 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1287 else
1288 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1289
1290 /* ... and remember what we asked for. */
1291 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1292
sewardj1e8cdc92002-04-18 11:37:52 +00001293 /* paranoia ... */
sewardj018f7622002-05-15 21:13:39 +00001294 vg_assert(VG_(threads)[tid].tid == tid);
sewardj1e8cdc92002-04-18 11:37:52 +00001295
sewardje663cb92002-04-12 10:26:32 +00001296 /* Actually run thread tid. */
1297 while (True) {
1298
sewardj7e87e382002-05-03 19:09:05 +00001299 VG_(last_run_tid) = tid;
1300
sewardje663cb92002-04-12 10:26:32 +00001301 /* For stats purposes only. */
1302 VG_(num_scheduling_events_MINOR) ++;
1303
1304 if (0)
1305 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1306 tid, VG_(dispatch_ctr) - 1 );
sewardjb3eef6b2002-05-01 00:05:27 +00001307# if 0
1308 if (VG_(bbs_done) > 31700000 + 0) {
1309 dispatch_ctr_SAVED = VG_(dispatch_ctr) = 2;
sewardj018f7622002-05-15 21:13:39 +00001310 VG_(translate)(&VG_(threads)[tid], VG_(threads)[tid].m_eip,
sewardjb3eef6b2002-05-01 00:05:27 +00001311 NULL,NULL,NULL);
1312 }
sewardj018f7622002-05-15 21:13:39 +00001313 vg_assert(VG_(threads)[tid].m_eip != 0);
sewardjb3eef6b2002-05-01 00:05:27 +00001314# endif
sewardje663cb92002-04-12 10:26:32 +00001315
1316 trc = run_thread_for_a_while ( tid );
1317
sewardjb3eef6b2002-05-01 00:05:27 +00001318# if 0
sewardj018f7622002-05-15 21:13:39 +00001319 if (0 == VG_(threads)[tid].m_eip) {
sewardjb3eef6b2002-05-01 00:05:27 +00001320 VG_(printf)("tid = %d, dc = %llu\n", tid, VG_(bbs_done));
sewardj018f7622002-05-15 21:13:39 +00001321 vg_assert(0 != VG_(threads)[tid].m_eip);
sewardjb3eef6b2002-05-01 00:05:27 +00001322 }
1323# endif
1324
sewardje663cb92002-04-12 10:26:32 +00001325 /* Deal quickly with trivial scheduling events, and resume the
1326 thread. */
1327
1328 if (trc == VG_TRC_INNER_FASTMISS) {
1329 vg_assert(VG_(dispatch_ctr) > 0);
1330
1331 /* Trivial event. Miss in the fast-cache. Do a full
1332 lookup for it. */
1333 trans_addr
sewardj018f7622002-05-15 21:13:39 +00001334 = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001335 if (trans_addr == (Addr)0) {
1336 /* Not found; we need to request a translation. */
sewardj018f7622002-05-15 21:13:39 +00001337 create_translation_for( tid, VG_(threads)[tid].m_eip );
1338 trans_addr = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001339 if (trans_addr == (Addr)0)
1340 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1341 }
1342 continue; /* with this thread */
1343 }
1344
1345 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
sewardj8ccc2be2002-05-10 20:26:37 +00001346 Bool done;
1347 /* VG_(printf)("request 0x%x\n",
sewardj018f7622002-05-15 21:13:39 +00001348 *(UInt*)(VG_(threads)[tid].m_eax)); */
sewardj8ccc2be2002-05-10 20:26:37 +00001349 done = maybe_do_trivial_clientreq(tid);
sewardjd7fd4d22002-04-24 01:57:27 +00001350 if (done) {
1351 /* The request is done. We try and continue with the
1352 same thread if still runnable. If not, go back to
1353 Stage 1 to select a new thread to run. */
sewardj018f7622002-05-15 21:13:39 +00001354 if (VG_(threads)[tid].status == VgTs_Runnable)
sewardjd7fd4d22002-04-24 01:57:27 +00001355 continue; /* with this thread */
1356 else
1357 goto stage1;
sewardje663cb92002-04-12 10:26:32 +00001358 }
1359 }
1360
sewardj51c0aaf2002-04-25 01:32:10 +00001361 if (trc == VG_TRC_EBP_JMP_SYSCALL) {
1362 /* Do a syscall for the vthread tid. This could cause it
sewardj7e87e382002-05-03 19:09:05 +00001363 to become non-runnable. One special case: spot the
1364 client doing calls to exit() and take this as the cue
1365 to exit. */
sewardjb3eef6b2002-05-01 00:05:27 +00001366# if 0
1367 { UInt* esp; Int i;
sewardj018f7622002-05-15 21:13:39 +00001368 esp=(UInt*)VG_(threads)[tid].m_esp;
sewardjb3eef6b2002-05-01 00:05:27 +00001369 VG_(printf)("\nBEFORE\n");
1370 for (i = 10; i >= -10; i--)
1371 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1372 }
1373# endif
1374
sewardj018f7622002-05-15 21:13:39 +00001375 if (VG_(threads)[tid].m_eax == __NR_exit)
sewardj7e87e382002-05-03 19:09:05 +00001376 return VgSrc_ExitSyscall;
1377
sewardj51c0aaf2002-04-25 01:32:10 +00001378 sched_do_syscall(tid);
sewardjb3eef6b2002-05-01 00:05:27 +00001379
1380# if 0
1381 { UInt* esp; Int i;
sewardj018f7622002-05-15 21:13:39 +00001382 esp=(UInt*)VG_(threads)[tid].m_esp;
sewardjb3eef6b2002-05-01 00:05:27 +00001383 VG_(printf)("AFTER\n");
1384 for (i = 10; i >= -10; i--)
1385 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1386 }
1387# endif
1388
sewardj018f7622002-05-15 21:13:39 +00001389 if (VG_(threads)[tid].status == VgTs_Runnable)
sewardj51c0aaf2002-04-25 01:32:10 +00001390 continue; /* with this thread */
1391 else
1392 goto stage1;
1393 }
1394
sewardjd7fd4d22002-04-24 01:57:27 +00001395 /* It's an event we can't quickly deal with. Give up running
1396 this thread and handle things the expensive way. */
sewardje663cb92002-04-12 10:26:32 +00001397 break;
1398 }
1399
1400 /* ======================= Phase 3 of 3 =======================
1401 Handle non-trivial thread requests, mostly pthread stuff. */
1402
1403 /* Ok, we've fallen out of the dispatcher for a
1404 non-completely-trivial reason. First, update basic-block
1405 counters. */
1406
1407 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1408 vg_assert(done_this_time >= 0);
1409 VG_(bbs_to_go) -= (ULong)done_this_time;
1410 VG_(bbs_done) += (ULong)done_this_time;
1411
1412 if (0 && trc != VG_TRC_INNER_FASTMISS)
1413 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1414 tid, done_this_time, (Int)trc );
1415
1416 if (0 && trc != VG_TRC_INNER_FASTMISS)
1417 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1418 tid, VG_(bbs_done),
1419 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001420
sewardje663cb92002-04-12 10:26:32 +00001421 /* Examine the thread's return code to figure out why it
1422 stopped, and handle requests. */
1423
1424 switch (trc) {
1425
1426 case VG_TRC_INNER_FASTMISS:
1427 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1428 /*NOTREACHED*/
1429 break;
1430
1431 case VG_TRC_INNER_COUNTERZERO:
1432 /* Timeslice is out. Let a new thread be scheduled,
1433 simply by doing nothing, causing us to arrive back at
1434 Phase 1. */
1435 if (VG_(bbs_to_go) == 0) {
1436 goto debug_stop;
1437 }
1438 vg_assert(VG_(dispatch_ctr) == 0);
1439 break;
1440
1441 case VG_TRC_UNRESUMABLE_SIGNAL:
1442 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1443 away. Again, do nothing, so we wind up back at Phase
1444 1, whereupon the signal will be "delivered". */
1445 break;
1446
sewardje663cb92002-04-12 10:26:32 +00001447 case VG_TRC_EBP_JMP_CLIENTREQ:
1448 /* Do a client request for the vthread tid. Note that
1449 some requests will have been handled by
1450 maybe_do_trivial_clientreq(), so we don't expect to see
1451 those here.
1452 */
sewardj54cacf02002-04-12 23:24:59 +00001453 /* The thread's %EAX points at an arg block, the first
1454 word of which is the request code. */
sewardj018f7622002-05-15 21:13:39 +00001455 request_code = ((UInt*)(VG_(threads)[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001456 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001457 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001458 print_sched_event(tid, msg_buf);
1459 }
1460 /* Do a non-trivial client request for thread tid. tid's
1461 %EAX points to a short vector of argument words, the
1462 first of which is the request code. The result of the
1463 request is put in tid's %EDX. Alternatively, perhaps
1464 the request causes tid to become non-runnable and/or
1465 other blocked threads become runnable. In general we
1466 can and often do mess with the state of arbitrary
1467 threads at this point. */
sewardj7e87e382002-05-03 19:09:05 +00001468 do_nontrivial_clientreq(tid);
sewardje663cb92002-04-12 10:26:32 +00001469 break;
1470
1471 default:
1472 VG_(printf)("\ntrc = %d\n", trc);
1473 VG_(panic)("VG_(scheduler), phase 3: "
1474 "unexpected thread return code");
1475 /* NOTREACHED */
1476 break;
1477
1478 } /* switch (trc) */
1479
1480 /* That completes Phase 3 of 3. Return now to the top of the
1481 main scheduler loop, to Phase 1 of 3. */
1482
1483 } /* top-level scheduler loop */
1484
1485
1486 /* NOTREACHED */
1487 VG_(panic)("scheduler: post-main-loop ?!");
1488 /* NOTREACHED */
1489
1490 debug_stop:
1491 /* If we exited because of a debug stop, print the translation
1492 of the last block executed -- by translating it again, and
1493 throwing away the result. */
1494 VG_(printf)(
1495 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj018f7622002-05-15 21:13:39 +00001496 VG_(translate)( &VG_(threads)[tid],
1497 VG_(threads)[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001498 VG_(printf)("\n");
1499 VG_(printf)(
1500 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1501
1502 return VgSrc_BbsDone;
1503}
1504
1505
1506/* ---------------------------------------------------------------------
1507 The pthread implementation.
1508 ------------------------------------------------------------------ */
1509
1510#include <pthread.h>
1511#include <errno.h>
1512
sewardjbf290b92002-05-01 02:28:01 +00001513#define VG_PTHREAD_STACK_MIN \
sewardjc3bd5f52002-05-01 03:24:23 +00001514 (VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
sewardje663cb92002-04-12 10:26:32 +00001515
1516/* /usr/include/bits/pthreadtypes.h:
1517 typedef unsigned long int pthread_t;
1518*/
1519
sewardje663cb92002-04-12 10:26:32 +00001520
sewardj604ec3c2002-04-18 22:38:41 +00001521/* -----------------------------------------------------------
1522 Thread CREATION, JOINAGE and CANCELLATION.
1523 -------------------------------------------------------- */
1524
sewardjb48e5002002-05-13 00:16:03 +00001525/* Release resources and generally clean up once a thread has finally
1526 disappeared. */
1527static
1528void cleanup_after_thread_exited ( ThreadId tid )
1529{
sewardj3a951cf2002-05-15 22:25:47 +00001530 vki_ksigset_t irrelevant_sigmask;
sewardj018f7622002-05-15 21:13:39 +00001531 vg_assert(VG_(is_valid_or_empty_tid)(tid));
1532 vg_assert(VG_(threads)[tid].status == VgTs_Empty);
sewardjb48e5002002-05-13 00:16:03 +00001533 /* Mark its stack no-access */
1534 if (VG_(clo_instrument) && tid != 1)
sewardj018f7622002-05-15 21:13:39 +00001535 VGM_(make_noaccess)( VG_(threads)[tid].stack_base,
1536 VG_(threads)[tid].stack_size );
sewardjb48e5002002-05-13 00:16:03 +00001537 /* Forget about any pending signals directed specifically at this
sewardj018f7622002-05-15 21:13:39 +00001538 thread, and get rid of signal handlers specifically arranged for
1539 this thread. */
sewardj3a951cf2002-05-15 22:25:47 +00001540 VG_(block_all_host_signals)( &irrelevant_sigmask );
sewardj018f7622002-05-15 21:13:39 +00001541 VG_(handle_SCSS_change)( False /* lazy update */ );
sewardjb48e5002002-05-13 00:16:03 +00001542}
1543
1544
sewardje663cb92002-04-12 10:26:32 +00001545static
sewardj853f55d2002-04-26 00:27:53 +00001546void do_pthread_cancel ( ThreadId tid,
sewardje663cb92002-04-12 10:26:32 +00001547 pthread_t tid_cancellee )
1548{
1549 Char msg_buf[100];
sewardj853f55d2002-04-26 00:27:53 +00001550
sewardjb48e5002002-05-13 00:16:03 +00001551 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +00001552 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
sewardj853f55d2002-04-26 00:27:53 +00001553
sewardjb48e5002002-05-13 00:16:03 +00001554 if (!VG_(is_valid_tid)(tid_cancellee)
sewardj018f7622002-05-15 21:13:39 +00001555 || VG_(threads)[tid_cancellee].status == VgTs_Empty) {
sewardjc3bd5f52002-05-01 03:24:23 +00001556 SET_EDX(tid, ESRCH);
sewardj853f55d2002-04-26 00:27:53 +00001557 return;
1558 }
1559
sewardje663cb92002-04-12 10:26:32 +00001560 /* We want make is appear that this thread has returned to
1561 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1562 return value. So: simple: put PTHREAD_CANCELED into %EAX
1563 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001564 if (VG_(clo_trace_sched)) {
sewardj853f55d2002-04-26 00:27:53 +00001565 VG_(sprintf)(msg_buf, "cancelled by %d", tid);
sewardje663cb92002-04-12 10:26:32 +00001566 print_sched_event(tid_cancellee, msg_buf);
1567 }
sewardj018f7622002-05-15 21:13:39 +00001568 VG_(threads)[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
1569 VG_(threads)[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1570 VG_(threads)[tid_cancellee].status = VgTs_Runnable;
sewardj853f55d2002-04-26 00:27:53 +00001571
1572 /* We return with success (0). */
sewardjc3bd5f52002-05-01 03:24:23 +00001573 SET_EDX(tid, 0);
sewardje663cb92002-04-12 10:26:32 +00001574}
1575
1576
sewardj3b5d8862002-04-20 13:53:23 +00001577static
1578void do_pthread_exit ( ThreadId tid, void* retval )
1579{
1580 Char msg_buf[100];
1581 /* We want make is appear that this thread has returned to
1582 do_pthread_create_bogusRA with retval as the
1583 return value. So: simple: put retval into %EAX
1584 and &do_pthread_create_bogusRA into %EIP and keep going! */
1585 if (VG_(clo_trace_sched)) {
1586 VG_(sprintf)(msg_buf, "exiting with %p", retval);
1587 print_sched_event(tid, msg_buf);
1588 }
sewardj018f7622002-05-15 21:13:39 +00001589 VG_(threads)[tid].m_eax = (UInt)retval;
1590 VG_(threads)[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1591 VG_(threads)[tid].status = VgTs_Runnable;
sewardj3b5d8862002-04-20 13:53:23 +00001592}
1593
sewardje663cb92002-04-12 10:26:32 +00001594
1595/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001596 created with. Or possibly due to pthread_exit or cancellation.
1597 The main complication here is to resume any thread waiting to join
1598 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001599static
sewardjbc5b99f2002-04-13 00:08:51 +00001600void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001601{
1602 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1603 UInt* jnr_args;
1604 void** jnr_thread_return;
1605 Char msg_buf[100];
1606
1607 /* Mark it as not in use. Leave the stack in place so the next
1608 user of this slot doesn't reallocate it. */
sewardjb48e5002002-05-13 00:16:03 +00001609 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +00001610 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
sewardje663cb92002-04-12 10:26:32 +00001611
sewardj018f7622002-05-15 21:13:39 +00001612 VG_(threads)[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001613
sewardj018f7622002-05-15 21:13:39 +00001614 if (VG_(threads)[tid].joiner == VG_INVALID_THREADID) {
sewardje663cb92002-04-12 10:26:32 +00001615 /* No one has yet done a join on me */
sewardj018f7622002-05-15 21:13:39 +00001616 VG_(threads)[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001617 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001618 VG_(sprintf)(msg_buf,
1619 "root fn returns, waiting for a call pthread_join(%d)",
1620 tid);
1621 print_sched_event(tid, msg_buf);
1622 }
1623 } else {
1624 /* Some is waiting; make their join call return with success,
1625 putting my exit code in the place specified by the caller's
1626 thread_return param. This is all very horrible, since we
1627 need to consult the joiner's arg block -- pointed to by its
1628 %EAX -- in order to extract the 2nd param of its pthread_join
1629 call. TODO: free properly the slot (also below).
1630 */
sewardj018f7622002-05-15 21:13:39 +00001631 jnr = VG_(threads)[tid].joiner;
sewardjb48e5002002-05-13 00:16:03 +00001632 vg_assert(VG_(is_valid_tid)(jnr));
sewardj018f7622002-05-15 21:13:39 +00001633 vg_assert(VG_(threads)[jnr].status == VgTs_WaitJoinee);
1634 jnr_args = (UInt*)VG_(threads)[jnr].m_eax;
sewardje663cb92002-04-12 10:26:32 +00001635 jnr_thread_return = (void**)(jnr_args[2]);
1636 if (jnr_thread_return != NULL)
sewardj018f7622002-05-15 21:13:39 +00001637 *jnr_thread_return = VG_(threads)[tid].retval;
sewardjc3bd5f52002-05-01 03:24:23 +00001638 SET_EDX(jnr, 0); /* success */
sewardj018f7622002-05-15 21:13:39 +00001639 VG_(threads)[jnr].status = VgTs_Runnable;
1640 VG_(threads)[tid].status = VgTs_Empty; /* bye! */
sewardjb48e5002002-05-13 00:16:03 +00001641 cleanup_after_thread_exited ( tid );
sewardj8937c812002-04-12 20:12:20 +00001642 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001643 VG_(sprintf)(msg_buf,
1644 "root fn returns, to find a waiting pthread_join(%d)", tid);
1645 print_sched_event(tid, msg_buf);
1646 VG_(sprintf)(msg_buf,
1647 "my pthread_join(%d) returned; resuming", tid);
1648 print_sched_event(jnr, msg_buf);
1649 }
1650 }
1651
1652 /* Return value is irrelevant; this thread will not get
1653 rescheduled. */
1654}
1655
1656
1657static
1658void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1659{
1660 Char msg_buf[100];
1661
1662 /* jee, the joinee, is the thread specified as an arg in thread
1663 tid's call to pthread_join. So tid is the join-er. */
sewardjb48e5002002-05-13 00:16:03 +00001664 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +00001665 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001666
1667 if (jee == tid) {
sewardjc3bd5f52002-05-01 03:24:23 +00001668 SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */
sewardj018f7622002-05-15 21:13:39 +00001669 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001670 return;
1671 }
1672
1673 if (jee < 0
1674 || jee >= VG_N_THREADS
sewardj018f7622002-05-15 21:13:39 +00001675 || VG_(threads)[jee].status == VgTs_Empty) {
sewardje663cb92002-04-12 10:26:32 +00001676 /* Invalid thread to join to. */
sewardjc3bd5f52002-05-01 03:24:23 +00001677 SET_EDX(tid, EINVAL);
sewardj018f7622002-05-15 21:13:39 +00001678 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001679 return;
1680 }
1681
sewardj018f7622002-05-15 21:13:39 +00001682 if (VG_(threads)[jee].joiner != VG_INVALID_THREADID) {
sewardje663cb92002-04-12 10:26:32 +00001683 /* Someone already did join on this thread */
sewardjc3bd5f52002-05-01 03:24:23 +00001684 SET_EDX(tid, EINVAL);
sewardj018f7622002-05-15 21:13:39 +00001685 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001686 return;
1687 }
1688
sewardj018f7622002-05-15 21:13:39 +00001689 /* if (VG_(threads)[jee].detached) ... */
sewardje663cb92002-04-12 10:26:32 +00001690
1691 /* Perhaps the joinee has already finished? If so return
1692 immediately with its return code, and free up the slot. TODO:
1693 free it properly (also above). */
sewardj018f7622002-05-15 21:13:39 +00001694 if (VG_(threads)[jee].status == VgTs_WaitJoiner) {
1695 vg_assert(VG_(threads)[jee].joiner == VG_INVALID_THREADID);
sewardjc3bd5f52002-05-01 03:24:23 +00001696 SET_EDX(tid, 0); /* success */
1697 if (thread_return != NULL) {
sewardj018f7622002-05-15 21:13:39 +00001698 *thread_return = VG_(threads)[jee].retval;
sewardjc3bd5f52002-05-01 03:24:23 +00001699 /* Not really right, since it makes the thread's return value
1700 appear to be defined even if it isn't. */
1701 if (VG_(clo_instrument))
1702 VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
1703 }
sewardj018f7622002-05-15 21:13:39 +00001704 VG_(threads)[tid].status = VgTs_Runnable;
1705 VG_(threads)[jee].status = VgTs_Empty; /* bye! */
sewardjb48e5002002-05-13 00:16:03 +00001706 cleanup_after_thread_exited ( jee );
sewardj8937c812002-04-12 20:12:20 +00001707 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001708 VG_(sprintf)(msg_buf,
1709 "someone called pthread_join() on me; bye!");
1710 print_sched_event(jee, msg_buf);
1711 VG_(sprintf)(msg_buf,
1712 "my pthread_join(%d) returned immediately",
1713 jee );
1714 print_sched_event(tid, msg_buf);
1715 }
1716 return;
1717 }
1718
1719 /* Ok, so we'll have to wait on jee. */
sewardj018f7622002-05-15 21:13:39 +00001720 VG_(threads)[jee].joiner = tid;
1721 VG_(threads)[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001722 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001723 VG_(sprintf)(msg_buf,
1724 "blocking on call of pthread_join(%d)", jee );
1725 print_sched_event(tid, msg_buf);
1726 }
1727 /* So tid's join call does not return just now. */
1728}
1729
1730
1731static
1732void do_pthread_create ( ThreadId parent_tid,
1733 pthread_t* thread,
1734 pthread_attr_t* attr,
1735 void* (*start_routine)(void *),
1736 void* arg )
1737{
sewardj5f07b662002-04-23 16:52:51 +00001738 Int i;
sewardje663cb92002-04-12 10:26:32 +00001739 Addr new_stack;
1740 UInt new_stk_szb;
1741 ThreadId tid;
1742 Char msg_buf[100];
1743
1744 /* Paranoia ... */
1745 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1746
sewardj018f7622002-05-15 21:13:39 +00001747 vg_assert(VG_(threads)[parent_tid].status != VgTs_Empty);
sewardje663cb92002-04-12 10:26:32 +00001748
sewardj1e8cdc92002-04-18 11:37:52 +00001749 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001750
1751 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001752 vg_assert(tid != 1);
sewardj018f7622002-05-15 21:13:39 +00001753 vg_assert(VG_(is_valid_or_empty_tid)(tid));
sewardje663cb92002-04-12 10:26:32 +00001754
1755 /* Copy the parent's CPU state into the child's, in a roundabout
1756 way (via baseBlock). */
1757 VG_(load_thread_state)(parent_tid);
1758 VG_(save_thread_state)(tid);
1759
1760 /* Consider allocating the child a stack, if the one it already has
1761 is inadequate. */
sewardjbf290b92002-05-01 02:28:01 +00001762 new_stk_szb = VG_PTHREAD_STACK_MIN;
sewardje663cb92002-04-12 10:26:32 +00001763
sewardj018f7622002-05-15 21:13:39 +00001764 if (new_stk_szb > VG_(threads)[tid].stack_size) {
sewardje663cb92002-04-12 10:26:32 +00001765 /* Again, for good measure :) We definitely don't want to be
1766 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001767 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001768 /* for now, we don't handle the case of anything other than
1769 assigning it for the first time. */
sewardj018f7622002-05-15 21:13:39 +00001770 vg_assert(VG_(threads)[tid].stack_size == 0);
1771 vg_assert(VG_(threads)[tid].stack_base == (Addr)NULL);
sewardje663cb92002-04-12 10:26:32 +00001772 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
sewardj018f7622002-05-15 21:13:39 +00001773 VG_(threads)[tid].stack_base = new_stack;
1774 VG_(threads)[tid].stack_size = new_stk_szb;
1775 VG_(threads)[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001776 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001777 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001778 }
sewardj1e8cdc92002-04-18 11:37:52 +00001779
sewardj018f7622002-05-15 21:13:39 +00001780 VG_(threads)[tid].m_esp
1781 = VG_(threads)[tid].stack_base
1782 + VG_(threads)[tid].stack_size
sewardj1e8cdc92002-04-18 11:37:52 +00001783 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1784
sewardje663cb92002-04-12 10:26:32 +00001785 if (VG_(clo_instrument))
sewardj018f7622002-05-15 21:13:39 +00001786 VGM_(make_noaccess)( VG_(threads)[tid].m_esp,
sewardje663cb92002-04-12 10:26:32 +00001787 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1788
1789 /* push arg */
sewardj018f7622002-05-15 21:13:39 +00001790 VG_(threads)[tid].m_esp -= 4;
1791 * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
sewardje663cb92002-04-12 10:26:32 +00001792
1793 /* push (magical) return address */
sewardj018f7622002-05-15 21:13:39 +00001794 VG_(threads)[tid].m_esp -= 4;
1795 * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001796
1797 if (VG_(clo_instrument))
sewardj018f7622002-05-15 21:13:39 +00001798 VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
sewardje663cb92002-04-12 10:26:32 +00001799
1800 /* this is where we start */
sewardj018f7622002-05-15 21:13:39 +00001801 VG_(threads)[tid].m_eip = (UInt)start_routine;
sewardje663cb92002-04-12 10:26:32 +00001802
sewardj8937c812002-04-12 20:12:20 +00001803 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001804 VG_(sprintf)(msg_buf,
1805 "new thread, created by %d", parent_tid );
1806 print_sched_event(tid, msg_buf);
1807 }
1808
1809 /* store the thread id in *thread. */
1810 // if (VG_(clo_instrument))
1811 // ***** CHECK *thread is writable
1812 *thread = (pthread_t)tid;
sewardjc3bd5f52002-05-01 03:24:23 +00001813 if (VG_(clo_instrument))
1814 VGM_(make_readable)( (Addr)thread, sizeof(pthread_t) );
sewardje663cb92002-04-12 10:26:32 +00001815
sewardj018f7622002-05-15 21:13:39 +00001816 VG_(threads)[tid].associated_mx = NULL;
1817 VG_(threads)[tid].associated_cv = NULL;
1818 VG_(threads)[tid].joiner = VG_INVALID_THREADID;
1819 VG_(threads)[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001820
sewardj5f07b662002-04-23 16:52:51 +00001821 for (i = 0; i < VG_N_THREAD_KEYS; i++)
sewardj018f7622002-05-15 21:13:39 +00001822 VG_(threads)[tid].specifics[i] = NULL;
sewardj5f07b662002-04-23 16:52:51 +00001823
sewardj018f7622002-05-15 21:13:39 +00001824 /* We inherit our parent's signal mask. */
1825 VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask;
1826 VG_(ksigemptyset)(&VG_(threads)[i].sigs_waited_for);
sewardjb48e5002002-05-13 00:16:03 +00001827
sewardj604ec3c2002-04-18 22:38:41 +00001828 /* return zero */
sewardj1de04f12002-05-10 02:16:19 +00001829 SET_EDX(parent_tid, 0); /* success */
sewardje663cb92002-04-12 10:26:32 +00001830}
1831
1832
sewardj604ec3c2002-04-18 22:38:41 +00001833/* -----------------------------------------------------------
1834 MUTEXes
1835 -------------------------------------------------------- */
1836
sewardj604ec3c2002-04-18 22:38:41 +00001837/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001838 typedef struct
1839 {
1840 int __m_reserved; -- Reserved for future use
1841 int __m_count; -- Depth of recursive locking
1842 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1843 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1844 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1845 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001846
sewardj6072c362002-04-19 14:40:57 +00001847 #define PTHREAD_MUTEX_INITIALIZER \
1848 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
1849 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
1850 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
1851 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
1852 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
1853 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
1854 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00001855
sewardj6072c362002-04-19 14:40:57 +00001856 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00001857
sewardj6072c362002-04-19 14:40:57 +00001858 __m_kind never changes and indicates whether or not it is recursive.
1859
1860 __m_count indicates the lock count; if 0, the mutex is not owned by
1861 anybody.
1862
1863 __m_owner has a ThreadId value stuffed into it. We carefully arrange
1864 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
1865 statically initialised mutexes correctly appear
1866 to belong to nobody.
1867
1868 In summary, a not-in-use mutex is distinguised by having __m_owner
1869 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
1870 conditions holds, the other should too.
1871
1872 There is no linked list of threads waiting for this mutex. Instead
1873 a thread in WaitMX state points at the mutex with its waited_on_mx
1874 field. This makes _unlock() inefficient, but simple to implement the
1875 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00001876
sewardj604ec3c2002-04-18 22:38:41 +00001877 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00001878 deals with that for us.
1879*/
sewardje663cb92002-04-12 10:26:32 +00001880
sewardj3b5d8862002-04-20 13:53:23 +00001881/* Helper fns ... */
1882static
1883void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
1884 Char* caller )
1885{
1886 Int i;
1887 Char msg_buf[100];
1888
1889 /* Find some arbitrary thread waiting on this mutex, and make it
1890 runnable. If none are waiting, mark the mutex as not held. */
1891 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00001892 if (VG_(threads)[i].status == VgTs_Empty)
sewardj3b5d8862002-04-20 13:53:23 +00001893 continue;
sewardj018f7622002-05-15 21:13:39 +00001894 if (VG_(threads)[i].status == VgTs_WaitMX
1895 && VG_(threads)[i].associated_mx == mutex)
sewardj3b5d8862002-04-20 13:53:23 +00001896 break;
1897 }
1898
1899 vg_assert(i <= VG_N_THREADS);
1900 if (i == VG_N_THREADS) {
1901 /* Nobody else is waiting on it. */
1902 mutex->__m_count = 0;
1903 mutex->__m_owner = VG_INVALID_THREADID;
1904 } else {
1905 /* Notionally transfer the hold to thread i, whose
1906 pthread_mutex_lock() call now returns with 0 (success). */
1907 /* The .count is already == 1. */
sewardj018f7622002-05-15 21:13:39 +00001908 vg_assert(VG_(threads)[i].associated_mx == mutex);
sewardj3b5d8862002-04-20 13:53:23 +00001909 mutex->__m_owner = (_pthread_descr)i;
sewardj018f7622002-05-15 21:13:39 +00001910 VG_(threads)[i].status = VgTs_Runnable;
1911 VG_(threads)[i].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00001912 /* m_edx already holds pth_mx_lock() success (0) */
sewardj3b5d8862002-04-20 13:53:23 +00001913
1914 if (VG_(clo_trace_pthread_level) >= 1) {
1915 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
1916 caller, mutex );
1917 print_pthread_event(i, msg_buf);
1918 }
1919 }
1920}
1921
sewardje663cb92002-04-12 10:26:32 +00001922
1923static
sewardj30671ff2002-04-21 00:13:57 +00001924void do_pthread_mutex_lock( ThreadId tid,
1925 Bool is_trylock,
sewardjd7fd4d22002-04-24 01:57:27 +00001926 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00001927{
sewardj30671ff2002-04-21 00:13:57 +00001928 Char msg_buf[100];
1929 Char* caller
sewardj8ccc2be2002-05-10 20:26:37 +00001930 = is_trylock ? "pthread_mutex_trylock"
1931 : "pthread_mutex_lock ";
sewardje663cb92002-04-12 10:26:32 +00001932
sewardjd7fd4d22002-04-24 01:57:27 +00001933 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
1934
sewardj604ec3c2002-04-18 22:38:41 +00001935 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj30671ff2002-04-21 00:13:57 +00001936 VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex );
sewardj604ec3c2002-04-18 22:38:41 +00001937 print_pthread_event(tid, msg_buf);
1938 }
1939
1940 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00001941 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00001942 && VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001943
1944 /* POSIX doesn't mandate this, but for sanity ... */
1945 if (mutex == NULL) {
sewardj8e651d72002-05-10 21:00:19 +00001946 /* VG_(printf)("NULL mutex\n"); */
sewardjc3bd5f52002-05-01 03:24:23 +00001947 SET_EDX(tid, EINVAL);
sewardje663cb92002-04-12 10:26:32 +00001948 return;
1949 }
1950
sewardj604ec3c2002-04-18 22:38:41 +00001951 /* More paranoia ... */
1952 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001953# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001954 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001955 case PTHREAD_MUTEX_ADAPTIVE_NP:
1956# endif
sewardja1679dd2002-05-10 22:31:40 +00001957# ifdef GLIBC_2_1
sewardj8e651d72002-05-10 21:00:19 +00001958 case PTHREAD_MUTEX_FAST_NP:
sewardja1679dd2002-05-10 22:31:40 +00001959# endif
sewardj604ec3c2002-04-18 22:38:41 +00001960 case PTHREAD_MUTEX_RECURSIVE_NP:
1961 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001962 if (mutex->__m_count >= 0) break;
1963 /* else fall thru */
1964 default:
sewardj8e651d72002-05-10 21:00:19 +00001965 /* VG_(printf)("unknown __m_kind %d in mutex\n", mutex->__m_kind); */
sewardjc3bd5f52002-05-01 03:24:23 +00001966 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00001967 return;
sewardje663cb92002-04-12 10:26:32 +00001968 }
1969
sewardj604ec3c2002-04-18 22:38:41 +00001970 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001971
sewardjb48e5002002-05-13 00:16:03 +00001972 vg_assert(VG_(is_valid_tid)((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001973
1974 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001975 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001976 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001977 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001978 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001979 mutex->__m_count++;
sewardjc3bd5f52002-05-01 03:24:23 +00001980 SET_EDX(tid, 0);
sewardj853f55d2002-04-26 00:27:53 +00001981 if (0)
1982 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
1983 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001984 return;
1985 } else {
sewardj30671ff2002-04-21 00:13:57 +00001986 if (is_trylock)
sewardjc3bd5f52002-05-01 03:24:23 +00001987 SET_EDX(tid, EBUSY);
sewardj30671ff2002-04-21 00:13:57 +00001988 else
sewardjc3bd5f52002-05-01 03:24:23 +00001989 SET_EDX(tid, EDEADLK);
sewardjf8f819e2002-04-17 23:21:37 +00001990 return;
1991 }
1992 } else {
sewardj6072c362002-04-19 14:40:57 +00001993 /* Someone else has it; we have to wait. Mark ourselves
1994 thusly. */
sewardj05553872002-04-20 20:53:17 +00001995 /* GUARD: __m_count > 0 && __m_owner is valid */
sewardj30671ff2002-04-21 00:13:57 +00001996 if (is_trylock) {
1997 /* caller is polling; so return immediately. */
sewardjc3bd5f52002-05-01 03:24:23 +00001998 SET_EDX(tid, EBUSY);
sewardj30671ff2002-04-21 00:13:57 +00001999 } else {
sewardj018f7622002-05-15 21:13:39 +00002000 VG_(threads)[tid].status = VgTs_WaitMX;
2001 VG_(threads)[tid].associated_mx = mutex;
sewardjc3bd5f52002-05-01 03:24:23 +00002002 SET_EDX(tid, 0); /* pth_mx_lock success value */
sewardj30671ff2002-04-21 00:13:57 +00002003 if (VG_(clo_trace_pthread_level) >= 1) {
2004 VG_(sprintf)(msg_buf, "%s mx %p: BLOCK",
2005 caller, mutex );
2006 print_pthread_event(tid, msg_buf);
2007 }
2008 }
sewardje663cb92002-04-12 10:26:32 +00002009 return;
2010 }
sewardjf8f819e2002-04-17 23:21:37 +00002011
sewardje663cb92002-04-12 10:26:32 +00002012 } else {
sewardj6072c362002-04-19 14:40:57 +00002013 /* Nobody owns it. Sanity check ... */
2014 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00002015 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00002016 mutex->__m_count = 1;
2017 mutex->__m_owner = (_pthread_descr)tid;
sewardj018f7622002-05-15 21:13:39 +00002018 vg_assert(VG_(threads)[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00002019 /* return 0 (success). */
sewardjc3bd5f52002-05-01 03:24:23 +00002020 SET_EDX(tid, 0);
sewardje663cb92002-04-12 10:26:32 +00002021 }
sewardjf8f819e2002-04-17 23:21:37 +00002022
sewardje663cb92002-04-12 10:26:32 +00002023}
2024
2025
2026static
2027void do_pthread_mutex_unlock ( ThreadId tid,
sewardjd7fd4d22002-04-24 01:57:27 +00002028 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00002029{
sewardj3b5d8862002-04-20 13:53:23 +00002030 Char msg_buf[100];
sewardjd7fd4d22002-04-24 01:57:27 +00002031 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
sewardje663cb92002-04-12 10:26:32 +00002032
sewardj45b4b372002-04-16 22:50:32 +00002033 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00002034 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00002035 print_pthread_event(tid, msg_buf);
2036 }
2037
sewardj604ec3c2002-04-18 22:38:41 +00002038 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002039 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002040 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj604ec3c2002-04-18 22:38:41 +00002041
2042 if (mutex == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002043 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002044 return;
2045 }
2046
2047 /* More paranoia ... */
2048 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002049# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00002050 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002051 case PTHREAD_MUTEX_ADAPTIVE_NP:
2052# endif
sewardja1679dd2002-05-10 22:31:40 +00002053# ifdef GLIBC_2_1
sewardj8e651d72002-05-10 21:00:19 +00002054 case PTHREAD_MUTEX_FAST_NP:
sewardja1679dd2002-05-10 22:31:40 +00002055# endif
sewardj604ec3c2002-04-18 22:38:41 +00002056 case PTHREAD_MUTEX_RECURSIVE_NP:
2057 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00002058 if (mutex->__m_count >= 0) break;
2059 /* else fall thru */
2060 default:
sewardjc3bd5f52002-05-01 03:24:23 +00002061 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002062 return;
2063 }
sewardje663cb92002-04-12 10:26:32 +00002064
2065 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00002066 if (mutex->__m_count == 0 /* nobody holds it */
2067 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardjc3bd5f52002-05-01 03:24:23 +00002068 SET_EDX(tid, EPERM);
sewardje663cb92002-04-12 10:26:32 +00002069 return;
2070 }
2071
sewardjf8f819e2002-04-17 23:21:37 +00002072 /* If it's a multiply-locked recursive mutex, just decrement the
2073 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00002074 if (mutex->__m_count > 1) {
2075 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
2076 mutex->__m_count --;
sewardjc3bd5f52002-05-01 03:24:23 +00002077 SET_EDX(tid, 0); /* success */
sewardjf8f819e2002-04-17 23:21:37 +00002078 return;
2079 }
2080
sewardj604ec3c2002-04-18 22:38:41 +00002081 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00002082 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00002083 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00002084 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00002085
sewardj3b5d8862002-04-20 13:53:23 +00002086 /* Release at max one thread waiting on this mutex. */
2087 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00002088
sewardj3b5d8862002-04-20 13:53:23 +00002089 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardjc3bd5f52002-05-01 03:24:23 +00002090 SET_EDX(tid, 0); /* Success. */
sewardje663cb92002-04-12 10:26:32 +00002091}
2092
2093
sewardj6072c362002-04-19 14:40:57 +00002094/* -----------------------------------------------------------
2095 CONDITION VARIABLES
2096 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00002097
sewardj6072c362002-04-19 14:40:57 +00002098/* The relevant native types are as follows:
2099 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00002100
sewardj6072c362002-04-19 14:40:57 +00002101 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
2102 typedef struct
2103 {
2104 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
2105 _pthread_descr __c_waiting; -- Threads waiting on this condition
2106 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00002107
sewardj6072c362002-04-19 14:40:57 +00002108 -- Attribute for conditionally variables.
2109 typedef struct
2110 {
2111 int __dummy;
2112 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00002113
sewardj6072c362002-04-19 14:40:57 +00002114 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00002115
sewardj3b5d8862002-04-20 13:53:23 +00002116 We don't use any fields of pthread_cond_t for anything at all.
2117 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00002118
2119 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00002120 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00002121
sewardj77e466c2002-04-14 02:29:29 +00002122
sewardj5f07b662002-04-23 16:52:51 +00002123static
2124void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid )
2125{
2126 Char msg_buf[100];
2127 pthread_mutex_t* mx;
2128 pthread_cond_t* cv;
2129
sewardjb48e5002002-05-13 00:16:03 +00002130 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002131 && VG_(threads)[tid].status == VgTs_WaitCV
2132 && VG_(threads)[tid].awaken_at != 0xFFFFFFFF);
2133 mx = VG_(threads)[tid].associated_mx;
sewardj5f07b662002-04-23 16:52:51 +00002134 vg_assert(mx != NULL);
sewardj018f7622002-05-15 21:13:39 +00002135 cv = VG_(threads)[tid].associated_cv;
sewardj5f07b662002-04-23 16:52:51 +00002136 vg_assert(cv != NULL);
2137
2138 if (mx->__m_owner == VG_INVALID_THREADID) {
2139 /* Currently unheld; hand it out to thread tid. */
2140 vg_assert(mx->__m_count == 0);
sewardj018f7622002-05-15 21:13:39 +00002141 VG_(threads)[tid].status = VgTs_Runnable;
sewardjc3bd5f52002-05-01 03:24:23 +00002142 SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */
sewardj018f7622002-05-15 21:13:39 +00002143 VG_(threads)[tid].associated_cv = NULL;
2144 VG_(threads)[tid].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00002145 mx->__m_owner = (_pthread_descr)tid;
2146 mx->__m_count = 1;
2147
2148 if (VG_(clo_trace_pthread_level) >= 1) {
sewardjc3bd5f52002-05-01 03:24:23 +00002149 VG_(sprintf)(msg_buf,
2150 "pthread_cond_timedwai cv %p: TIMEOUT with mx %p",
2151 cv, mx );
sewardj5f07b662002-04-23 16:52:51 +00002152 print_pthread_event(tid, msg_buf);
2153 }
2154 } else {
2155 /* Currently held. Make thread tid be blocked on it. */
2156 vg_assert(mx->__m_count > 0);
sewardj018f7622002-05-15 21:13:39 +00002157 VG_(threads)[tid].status = VgTs_WaitMX;
sewardjc3bd5f52002-05-01 03:24:23 +00002158 SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */
sewardj018f7622002-05-15 21:13:39 +00002159 VG_(threads)[tid].associated_cv = NULL;
2160 VG_(threads)[tid].associated_mx = mx;
sewardj5f07b662002-04-23 16:52:51 +00002161 if (VG_(clo_trace_pthread_level) >= 1) {
2162 VG_(sprintf)(msg_buf,
2163 "pthread_cond_timedwai cv %p: TIMEOUT -> BLOCK for mx %p",
2164 cv, mx );
2165 print_pthread_event(tid, msg_buf);
2166 }
2167
2168 }
2169}
2170
2171
sewardj3b5d8862002-04-20 13:53:23 +00002172static
2173void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
2174 Int n_to_release,
2175 Char* caller )
2176{
2177 Int i;
2178 Char msg_buf[100];
2179 pthread_mutex_t* mx;
2180
2181 while (True) {
2182 if (n_to_release == 0)
2183 return;
2184
2185 /* Find a thread waiting on this CV. */
2186 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00002187 if (VG_(threads)[i].status == VgTs_Empty)
sewardj3b5d8862002-04-20 13:53:23 +00002188 continue;
sewardj018f7622002-05-15 21:13:39 +00002189 if (VG_(threads)[i].status == VgTs_WaitCV
2190 && VG_(threads)[i].associated_cv == cond)
sewardj3b5d8862002-04-20 13:53:23 +00002191 break;
2192 }
2193 vg_assert(i <= VG_N_THREADS);
2194
2195 if (i == VG_N_THREADS) {
2196 /* Nobody else is waiting on it. */
2197 return;
2198 }
2199
sewardj018f7622002-05-15 21:13:39 +00002200 mx = VG_(threads)[i].associated_mx;
sewardj3b5d8862002-04-20 13:53:23 +00002201 vg_assert(mx != NULL);
2202
2203 if (mx->__m_owner == VG_INVALID_THREADID) {
2204 /* Currently unheld; hand it out to thread i. */
2205 vg_assert(mx->__m_count == 0);
sewardj018f7622002-05-15 21:13:39 +00002206 VG_(threads)[i].status = VgTs_Runnable;
2207 VG_(threads)[i].associated_cv = NULL;
2208 VG_(threads)[i].associated_mx = NULL;
sewardj3b5d8862002-04-20 13:53:23 +00002209 mx->__m_owner = (_pthread_descr)i;
2210 mx->__m_count = 1;
sewardj5f07b662002-04-23 16:52:51 +00002211 /* .m_edx already holds pth_cond_wait success value (0) */
sewardj3b5d8862002-04-20 13:53:23 +00002212
2213 if (VG_(clo_trace_pthread_level) >= 1) {
2214 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
2215 caller, cond, mx );
2216 print_pthread_event(i, msg_buf);
2217 }
2218
2219 } else {
2220 /* Currently held. Make thread i be blocked on it. */
sewardj5f07b662002-04-23 16:52:51 +00002221 vg_assert(mx->__m_count > 0);
sewardj018f7622002-05-15 21:13:39 +00002222 VG_(threads)[i].status = VgTs_WaitMX;
2223 VG_(threads)[i].associated_cv = NULL;
2224 VG_(threads)[i].associated_mx = mx;
sewardjc3bd5f52002-05-01 03:24:23 +00002225 SET_EDX(i, 0); /* pth_cond_wait success value */
sewardj3b5d8862002-04-20 13:53:23 +00002226
2227 if (VG_(clo_trace_pthread_level) >= 1) {
2228 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
2229 caller, cond, mx );
2230 print_pthread_event(i, msg_buf);
2231 }
2232
2233 }
2234
2235 n_to_release--;
2236 }
2237}
2238
2239
2240static
2241void do_pthread_cond_wait ( ThreadId tid,
2242 pthread_cond_t *cond,
sewardj5f07b662002-04-23 16:52:51 +00002243 pthread_mutex_t *mutex,
2244 UInt ms_end )
sewardj3b5d8862002-04-20 13:53:23 +00002245{
2246 Char msg_buf[100];
2247
sewardj5f07b662002-04-23 16:52:51 +00002248 /* If ms_end == 0xFFFFFFFF, wait forever (no timeout). Otherwise,
2249 ms_end is the ending millisecond. */
2250
sewardj3b5d8862002-04-20 13:53:23 +00002251 /* pre: mutex should be a valid mutex and owned by tid. */
2252 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj5f07b662002-04-23 16:52:51 +00002253 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p, end %d ...",
2254 cond, mutex, ms_end );
sewardj3b5d8862002-04-20 13:53:23 +00002255 print_pthread_event(tid, msg_buf);
2256 }
2257
2258 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002259 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002260 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj3b5d8862002-04-20 13:53:23 +00002261
2262 if (mutex == NULL || cond == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002263 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002264 return;
2265 }
2266
2267 /* More paranoia ... */
2268 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002269# ifndef GLIBC_2_1
sewardj3b5d8862002-04-20 13:53:23 +00002270 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002271 case PTHREAD_MUTEX_ADAPTIVE_NP:
2272# endif
sewardja1679dd2002-05-10 22:31:40 +00002273# ifdef GLIBC_2_1
sewardj8e651d72002-05-10 21:00:19 +00002274 case PTHREAD_MUTEX_FAST_NP:
sewardja1679dd2002-05-10 22:31:40 +00002275# endif
sewardj3b5d8862002-04-20 13:53:23 +00002276 case PTHREAD_MUTEX_RECURSIVE_NP:
2277 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj3b5d8862002-04-20 13:53:23 +00002278 if (mutex->__m_count >= 0) break;
2279 /* else fall thru */
2280 default:
sewardjc3bd5f52002-05-01 03:24:23 +00002281 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002282 return;
2283 }
2284
2285 /* Barf if we don't currently hold the mutex. */
2286 if (mutex->__m_count == 0 /* nobody holds it */
2287 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardjc3bd5f52002-05-01 03:24:23 +00002288 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002289 return;
2290 }
2291
2292 /* Queue ourselves on the condition. */
sewardj018f7622002-05-15 21:13:39 +00002293 VG_(threads)[tid].status = VgTs_WaitCV;
2294 VG_(threads)[tid].associated_cv = cond;
2295 VG_(threads)[tid].associated_mx = mutex;
2296 VG_(threads)[tid].awaken_at = ms_end;
sewardj3b5d8862002-04-20 13:53:23 +00002297
2298 if (VG_(clo_trace_pthread_level) >= 1) {
2299 VG_(sprintf)(msg_buf,
2300 "pthread_cond_wait cv %p, mx %p: BLOCK",
2301 cond, mutex );
2302 print_pthread_event(tid, msg_buf);
2303 }
2304
2305 /* Release the mutex. */
2306 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
2307}
2308
2309
2310static
2311void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2312 Bool broadcast,
2313 pthread_cond_t *cond )
2314{
2315 Char msg_buf[100];
2316 Char* caller
2317 = broadcast ? "pthread_cond_broadcast"
2318 : "pthread_cond_signal ";
2319
2320 if (VG_(clo_trace_pthread_level) >= 2) {
2321 VG_(sprintf)(msg_buf, "%s cv %p ...",
2322 caller, cond );
2323 print_pthread_event(tid, msg_buf);
2324 }
2325
2326 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002327 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002328 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj3b5d8862002-04-20 13:53:23 +00002329
2330 if (cond == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002331 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002332 return;
2333 }
2334
2335 release_N_threads_waiting_on_cond (
2336 cond,
2337 broadcast ? VG_N_THREADS : 1,
2338 caller
2339 );
2340
sewardjc3bd5f52002-05-01 03:24:23 +00002341 SET_EDX(tid, 0); /* success */
sewardj3b5d8862002-04-20 13:53:23 +00002342}
2343
sewardj77e466c2002-04-14 02:29:29 +00002344
sewardj5f07b662002-04-23 16:52:51 +00002345/* -----------------------------------------------------------
2346 THREAD SPECIFIC DATA
2347 -------------------------------------------------------- */
2348
2349static __inline__
2350Bool is_valid_key ( ThreadKey k )
2351{
2352 /* k unsigned; hence no < 0 check */
2353 if (k >= VG_N_THREAD_KEYS) return False;
2354 if (!vg_thread_keys[k].inuse) return False;
2355 return True;
2356}
2357
2358static
2359void do_pthread_key_create ( ThreadId tid,
2360 pthread_key_t* key,
2361 void (*destructor)(void*) )
2362{
2363 Int i;
2364 Char msg_buf[100];
2365
2366 if (VG_(clo_trace_pthread_level) >= 1) {
2367 VG_(sprintf)(msg_buf, "pthread_key_create *key %p, destr %p",
2368 key, destructor );
2369 print_pthread_event(tid, msg_buf);
2370 }
2371
2372 vg_assert(sizeof(pthread_key_t) == sizeof(ThreadKey));
sewardjb48e5002002-05-13 00:16:03 +00002373 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002374 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002375
2376 for (i = 0; i < VG_N_THREAD_KEYS; i++)
2377 if (!vg_thread_keys[i].inuse)
2378 break;
2379
2380 if (i == VG_N_THREAD_KEYS) {
sewardjc3bd5f52002-05-01 03:24:23 +00002381 /* SET_EDX(tid, EAGAIN);
sewardj5f07b662002-04-23 16:52:51 +00002382 return;
2383 */
2384 VG_(panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;"
2385 " increase and recompile");
2386 }
2387
2388 vg_thread_keys[i].inuse = True;
sewardjc3bd5f52002-05-01 03:24:23 +00002389
sewardj5f07b662002-04-23 16:52:51 +00002390 /* TODO: check key for addressibility */
2391 *key = i;
sewardjc3bd5f52002-05-01 03:24:23 +00002392 if (VG_(clo_instrument))
2393 VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) );
2394
2395 SET_EDX(tid, 0);
sewardj5f07b662002-04-23 16:52:51 +00002396}
2397
2398
2399static
2400void do_pthread_key_delete ( ThreadId tid, pthread_key_t key )
2401{
2402 Char msg_buf[100];
2403 if (VG_(clo_trace_pthread_level) >= 1) {
2404 VG_(sprintf)(msg_buf, "pthread_key_delete key %d",
2405 key );
2406 print_pthread_event(tid, msg_buf);
2407 }
2408
sewardjb48e5002002-05-13 00:16:03 +00002409 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002410 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002411
2412 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002413 SET_EDX(tid, EINVAL);
sewardj5f07b662002-04-23 16:52:51 +00002414 return;
2415 }
2416
2417 vg_thread_keys[key].inuse = False;
2418
2419 /* Optional. We're not required to do this, although it shouldn't
2420 make any difference to programs which use the key/specifics
2421 functions correctly. */
sewardj3b13f0e2002-04-25 20:17:29 +00002422# if 1
sewardj5f07b662002-04-23 16:52:51 +00002423 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj018f7622002-05-15 21:13:39 +00002424 if (VG_(threads)[tid].status != VgTs_Empty)
2425 VG_(threads)[tid].specifics[key] = NULL;
sewardj5f07b662002-04-23 16:52:51 +00002426 }
sewardj3b13f0e2002-04-25 20:17:29 +00002427# endif
sewardj5f07b662002-04-23 16:52:51 +00002428}
2429
2430
2431static
2432void do_pthread_getspecific ( ThreadId tid, pthread_key_t key )
2433{
2434 Char msg_buf[100];
2435 if (VG_(clo_trace_pthread_level) >= 1) {
2436 VG_(sprintf)(msg_buf, "pthread_getspecific key %d",
2437 key );
2438 print_pthread_event(tid, msg_buf);
2439 }
2440
sewardjb48e5002002-05-13 00:16:03 +00002441 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002442 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002443
2444 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002445 SET_EDX(tid, (UInt)NULL);
sewardj5f07b662002-04-23 16:52:51 +00002446 return;
2447 }
2448
sewardj018f7622002-05-15 21:13:39 +00002449 SET_EDX(tid, (UInt)VG_(threads)[tid].specifics[key]);
sewardj5f07b662002-04-23 16:52:51 +00002450}
2451
2452
2453static
2454void do_pthread_setspecific ( ThreadId tid,
2455 pthread_key_t key,
2456 void *pointer )
2457{
2458 Char msg_buf[100];
2459 if (VG_(clo_trace_pthread_level) >= 1) {
2460 VG_(sprintf)(msg_buf, "pthread_setspecific key %d, ptr %p",
2461 key, pointer );
2462 print_pthread_event(tid, msg_buf);
2463 }
2464
sewardjb48e5002002-05-13 00:16:03 +00002465 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002466 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002467
2468 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002469 SET_EDX(tid, EINVAL);
sewardj5f07b662002-04-23 16:52:51 +00002470 return;
2471 }
2472
sewardj018f7622002-05-15 21:13:39 +00002473 VG_(threads)[tid].specifics[key] = pointer;
sewardjc3bd5f52002-05-01 03:24:23 +00002474 SET_EDX(tid, 0);
sewardj5f07b662002-04-23 16:52:51 +00002475}
2476
2477
sewardjb48e5002002-05-13 00:16:03 +00002478/* ---------------------------------------------------
2479 SIGNALS
2480 ------------------------------------------------ */
2481
2482/* See comment in vg_libthread.c:pthread_sigmask() regarding
sewardj018f7622002-05-15 21:13:39 +00002483 deliberate confusion of types sigset_t and vki_sigset_t. Return 0
2484 for OK and 1 for some kind of addressing error, which the
2485 vg_libpthread.c routine turns into return values 0 and EFAULT
2486 respectively. */
sewardjb48e5002002-05-13 00:16:03 +00002487static
2488void do_pthread_sigmask ( ThreadId tid,
sewardj018f7622002-05-15 21:13:39 +00002489 Int vki_how,
sewardjb48e5002002-05-13 00:16:03 +00002490 vki_ksigset_t* newmask,
2491 vki_ksigset_t* oldmask )
2492{
2493 Char msg_buf[100];
2494 if (VG_(clo_trace_pthread_level) >= 1) {
2495 VG_(sprintf)(msg_buf,
sewardj018f7622002-05-15 21:13:39 +00002496 "pthread_sigmask vki_how %d, newmask %p, oldmask %p",
2497 vki_how, newmask, oldmask );
sewardjb48e5002002-05-13 00:16:03 +00002498 print_pthread_event(tid, msg_buf);
2499 }
2500
2501 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002502 && VG_(threads)[tid].status == VgTs_Runnable);
sewardjb48e5002002-05-13 00:16:03 +00002503
2504 if (VG_(clo_instrument)) {
2505 /* TODO check newmask/oldmask are addressible/defined */
2506 }
2507
sewardj018f7622002-05-15 21:13:39 +00002508 VG_(do_pthread_sigmask_SCSS_upd) ( tid, vki_how, newmask, oldmask );
sewardjb48e5002002-05-13 00:16:03 +00002509
sewardj3a951cf2002-05-15 22:25:47 +00002510 if (newmask && VG_(clo_instrument)) {
2511 VGM_(make_readable)( (Addr)newmask, sizeof(vki_ksigset_t) );
2512 }
2513
sewardj018f7622002-05-15 21:13:39 +00002514 /* Success. */
sewardjb48e5002002-05-13 00:16:03 +00002515 SET_EDX(tid, 0);
2516}
2517
2518
2519static
2520void do_sigwait ( ThreadId tid,
2521 vki_ksigset_t* set,
2522 Int* sig )
2523{
sewardj018f7622002-05-15 21:13:39 +00002524 vki_ksigset_t irrelevant_sigmask;
2525 Char msg_buf[100];
2526
sewardjb48e5002002-05-13 00:16:03 +00002527 if (VG_(clo_trace_signals) || VG_(clo_trace_sched)) {
2528 VG_(sprintf)(msg_buf,
2529 "suspend due to sigwait(): set %p, sig %p",
2530 set, sig );
2531 print_pthread_event(tid, msg_buf);
2532 }
2533
2534 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002535 && VG_(threads)[tid].status == VgTs_Runnable);
sewardjb48e5002002-05-13 00:16:03 +00002536
sewardj018f7622002-05-15 21:13:39 +00002537 /* Change SCSS */
2538 VG_(threads)[tid].sigs_waited_for = *set;
2539 VG_(threads)[tid].status = VgTs_WaitSIG;
2540
2541 VG_(block_all_host_signals)( &irrelevant_sigmask );
2542 VG_(handle_SCSS_change)( False /* lazy update */ );
2543}
2544
2545
2546static
2547void do_pthread_kill ( ThreadId tid, /* me */
2548 ThreadId thread, /* thread to signal */
2549 Int sig )
2550{
2551 Char msg_buf[100];
2552
2553 if (VG_(clo_trace_signals) || VG_(clo_trace_pthread_level) >= 1) {
2554 VG_(sprintf)(msg_buf,
2555 "pthread_kill thread %d, signo %d",
2556 thread, sig );
2557 print_pthread_event(tid, msg_buf);
2558 }
2559
2560 vg_assert(VG_(is_valid_tid)(tid)
2561 && VG_(threads)[tid].status == VgTs_Runnable);
2562
2563 if (!VG_(is_valid_tid)(tid)) {
2564 SET_EDX(tid, -VKI_ESRCH);
2565 return;
2566 }
2567
2568 if (sig < 1 || sig > VKI_KNSIG) {
2569 SET_EDX(tid, -VKI_EINVAL);
2570 return;
2571 }
2572
2573 VG_(send_signal_to_thread)( thread, sig );
2574 SET_EDX(tid, 0);
sewardjb48e5002002-05-13 00:16:03 +00002575}
2576
2577
sewardje663cb92002-04-12 10:26:32 +00002578/* ---------------------------------------------------------------------
2579 Handle non-trivial client requests.
2580 ------------------------------------------------------------------ */
2581
2582static
2583void do_nontrivial_clientreq ( ThreadId tid )
2584{
sewardj018f7622002-05-15 21:13:39 +00002585 UInt* arg = (UInt*)(VG_(threads)[tid].m_eax);
sewardje663cb92002-04-12 10:26:32 +00002586 UInt req_no = arg[0];
2587 switch (req_no) {
2588
2589 case VG_USERREQ__PTHREAD_CREATE:
2590 do_pthread_create( tid,
2591 (pthread_t*)arg[1],
2592 (pthread_attr_t*)arg[2],
2593 (void*(*)(void*))arg[3],
2594 (void*)arg[4] );
2595 break;
2596
sewardjbc5b99f2002-04-13 00:08:51 +00002597 case VG_USERREQ__PTHREAD_RETURNS:
2598 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00002599 break;
2600
2601 case VG_USERREQ__PTHREAD_JOIN:
2602 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2603 break;
2604
sewardje663cb92002-04-12 10:26:32 +00002605 case VG_USERREQ__PTHREAD_CANCEL:
2606 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
2607 break;
2608
sewardj3b5d8862002-04-20 13:53:23 +00002609 case VG_USERREQ__PTHREAD_EXIT:
2610 do_pthread_exit( tid, (void*)(arg[1]) );
2611 break;
2612
2613 case VG_USERREQ__PTHREAD_COND_WAIT:
2614 do_pthread_cond_wait( tid,
2615 (pthread_cond_t *)(arg[1]),
sewardj5f07b662002-04-23 16:52:51 +00002616 (pthread_mutex_t *)(arg[2]),
2617 0xFFFFFFFF /* no timeout */ );
2618 break;
2619
2620 case VG_USERREQ__PTHREAD_COND_TIMEDWAIT:
2621 do_pthread_cond_wait( tid,
2622 (pthread_cond_t *)(arg[1]),
2623 (pthread_mutex_t *)(arg[2]),
2624 arg[3] /* timeout millisecond point */ );
sewardj3b5d8862002-04-20 13:53:23 +00002625 break;
2626
2627 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2628 do_pthread_cond_signal_or_broadcast(
2629 tid,
2630 False, /* signal, not broadcast */
2631 (pthread_cond_t *)(arg[1]) );
2632 break;
2633
2634 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2635 do_pthread_cond_signal_or_broadcast(
2636 tid,
2637 True, /* broadcast, not signal */
2638 (pthread_cond_t *)(arg[1]) );
2639 break;
2640
sewardj5f07b662002-04-23 16:52:51 +00002641 case VG_USERREQ__PTHREAD_KEY_CREATE:
2642 do_pthread_key_create ( tid,
2643 (pthread_key_t*)(arg[1]),
2644 (void(*)(void*))(arg[2]) );
2645 break;
2646
2647 case VG_USERREQ__PTHREAD_KEY_DELETE:
2648 do_pthread_key_delete ( tid,
2649 (pthread_key_t)(arg[1]) );
2650 break;
2651
sewardj5f07b662002-04-23 16:52:51 +00002652 case VG_USERREQ__PTHREAD_SETSPECIFIC:
2653 do_pthread_setspecific ( tid,
2654 (pthread_key_t)(arg[1]),
2655 (void*)(arg[2]) );
2656 break;
2657
sewardjb48e5002002-05-13 00:16:03 +00002658 case VG_USERREQ__PTHREAD_SIGMASK:
2659 do_pthread_sigmask ( tid,
2660 arg[1],
2661 (vki_ksigset_t*)(arg[2]),
2662 (vki_ksigset_t*)(arg[3]) );
2663 break;
2664
2665 case VG_USERREQ__SIGWAIT:
2666 do_sigwait ( tid,
2667 (vki_ksigset_t*)(arg[1]),
2668 (Int*)(arg[2]) );
2669 break;
2670
sewardj018f7622002-05-15 21:13:39 +00002671 case VG_USERREQ__PTHREAD_KILL:
2672 do_pthread_kill ( tid, arg[1], arg[2] );
2673 break;
2674
2675
sewardje663cb92002-04-12 10:26:32 +00002676 case VG_USERREQ__MAKE_NOACCESS:
2677 case VG_USERREQ__MAKE_WRITABLE:
2678 case VG_USERREQ__MAKE_READABLE:
2679 case VG_USERREQ__DISCARD:
2680 case VG_USERREQ__CHECK_WRITABLE:
2681 case VG_USERREQ__CHECK_READABLE:
2682 case VG_USERREQ__MAKE_NOACCESS_STACK:
2683 case VG_USERREQ__RUNNING_ON_VALGRIND:
2684 case VG_USERREQ__DO_LEAK_CHECK:
sewardj18d75132002-05-16 11:06:21 +00002685 case VG_USERREQ__DISCARD_TRANSLATIONS:
sewardjc3bd5f52002-05-01 03:24:23 +00002686 SET_EDX(
2687 tid,
sewardj018f7622002-05-15 21:13:39 +00002688 VG_(handle_client_request) ( &VG_(threads)[tid], arg )
sewardjc3bd5f52002-05-01 03:24:23 +00002689 );
sewardje663cb92002-04-12 10:26:32 +00002690 break;
2691
sewardj77e466c2002-04-14 02:29:29 +00002692 case VG_USERREQ__SIGNAL_RETURNS:
2693 handle_signal_return(tid);
2694 break;
sewardj54cacf02002-04-12 23:24:59 +00002695
sewardje663cb92002-04-12 10:26:32 +00002696 default:
2697 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2698 VG_(panic)("handle_private_client_pthread_request: "
2699 "unknown request");
2700 /*NOTREACHED*/
2701 break;
2702 }
2703}
2704
2705
sewardj6072c362002-04-19 14:40:57 +00002706/* ---------------------------------------------------------------------
2707 Sanity checking.
2708 ------------------------------------------------------------------ */
2709
2710/* Internal consistency checks on the sched/pthread structures. */
2711static
2712void scheduler_sanity ( void )
2713{
sewardj3b5d8862002-04-20 13:53:23 +00002714 pthread_mutex_t* mx;
2715 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002716 Int i;
sewardj5f07b662002-04-23 16:52:51 +00002717
sewardj6072c362002-04-19 14:40:57 +00002718 /* VG_(printf)("scheduler_sanity\n"); */
2719 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00002720 mx = VG_(threads)[i].associated_mx;
2721 cv = VG_(threads)[i].associated_cv;
2722 if (VG_(threads)[i].status == VgTs_WaitMX) {
sewardjbf290b92002-05-01 02:28:01 +00002723 /* If we're waiting on a MX: (1) the mx is not null, (2, 3)
2724 it's actually held by someone, since otherwise this thread
2725 is deadlocked, (4) the mutex's owner is not us, since
2726 otherwise this thread is also deadlocked. The logic in
2727 do_pthread_mutex_lock rejects attempts by a thread to lock
2728 a (non-recursive) mutex which it already owns.
sewardj05553872002-04-20 20:53:17 +00002729
sewardjbf290b92002-05-01 02:28:01 +00002730 (2) has been seen to fail sometimes. I don't know why.
2731 Possibly to do with signals. */
sewardj3b5d8862002-04-20 13:53:23 +00002732 vg_assert(cv == NULL);
sewardj05553872002-04-20 20:53:17 +00002733 /* 1 */ vg_assert(mx != NULL);
2734 /* 2 */ vg_assert(mx->__m_count > 0);
sewardjb48e5002002-05-13 00:16:03 +00002735 /* 3 */ vg_assert(VG_(is_valid_tid)((ThreadId)mx->__m_owner));
sewardj05553872002-04-20 20:53:17 +00002736 /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner);
sewardj3b5d8862002-04-20 13:53:23 +00002737 } else
sewardj018f7622002-05-15 21:13:39 +00002738 if (VG_(threads)[i].status == VgTs_WaitCV) {
sewardj3b5d8862002-04-20 13:53:23 +00002739 vg_assert(cv != NULL);
2740 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002741 } else {
sewardj05553872002-04-20 20:53:17 +00002742 /* Unfortunately these don't hold true when a sighandler is
2743 running. To be fixed. */
2744 /* vg_assert(cv == NULL); */
2745 /* vg_assert(mx == NULL); */
sewardj6072c362002-04-19 14:40:57 +00002746 }
sewardjbf290b92002-05-01 02:28:01 +00002747
sewardj018f7622002-05-15 21:13:39 +00002748 if (VG_(threads)[i].status != VgTs_Empty) {
sewardjbf290b92002-05-01 02:28:01 +00002749 Int
sewardj018f7622002-05-15 21:13:39 +00002750 stack_used = (Addr)VG_(threads)[i].stack_highest_word
2751 - (Addr)VG_(threads)[i].m_esp;
sewardjbf290b92002-05-01 02:28:01 +00002752 if (i > 1 /* not the root thread */
2753 && stack_used
2754 >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) {
2755 VG_(message)(Vg_UserMsg,
2756 "Warning: STACK OVERFLOW: "
2757 "thread %d: stack used %d, available %d",
2758 i, stack_used, VG_PTHREAD_STACK_MIN );
2759 VG_(message)(Vg_UserMsg,
2760 "Terminating Valgrind. If thread(s) "
2761 "really need more stack, increase");
2762 VG_(message)(Vg_UserMsg,
2763 "VG_PTHREAD_STACK_SIZE in vg_include.h and recompile.");
2764 VG_(exit)(1);
2765 }
sewardjb48e5002002-05-13 00:16:03 +00002766
sewardj018f7622002-05-15 21:13:39 +00002767 if (VG_(threads)[i].status == VgTs_WaitSIG) {
sewardjb48e5002002-05-13 00:16:03 +00002768 vg_assert( ! VG_(kisemptysigset)(
sewardj018f7622002-05-15 21:13:39 +00002769 & VG_(threads)[i].sigs_waited_for) );
sewardjb48e5002002-05-13 00:16:03 +00002770 } else {
2771 vg_assert( VG_(kisemptysigset)(
sewardj018f7622002-05-15 21:13:39 +00002772 & VG_(threads)[i].sigs_waited_for) );
sewardjb48e5002002-05-13 00:16:03 +00002773 }
2774
sewardjbf290b92002-05-01 02:28:01 +00002775 }
sewardj6072c362002-04-19 14:40:57 +00002776 }
sewardj5f07b662002-04-23 16:52:51 +00002777
2778 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
2779 if (!vg_thread_keys[i].inuse)
2780 vg_assert(vg_thread_keys[i].destructor == NULL);
2781 }
sewardj6072c362002-04-19 14:40:57 +00002782}
2783
2784
sewardje663cb92002-04-12 10:26:32 +00002785/*--------------------------------------------------------------------*/
2786/*--- end vg_scheduler.c ---*/
2787/*--------------------------------------------------------------------*/