blob: 61ae9ce8e8a64a2f7c8085d1a7ba9b95408d3323 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardj6072c362002-04-19 14:40:57 +000062- Fix signals properly, so that each thread has its own blocking mask.
63 Currently this isn't done, and (worse?) signals are delivered to
64 Thread 1 (the root thread) regardless.
65
66 So, what's the deal with signals and mutexes? If a thread is
67 blocked on a mutex, or for a condition variable for that matter, can
68 signals still be delivered to it? This has serious consequences --
69 deadlocks, etc.
70
sewardje462e202002-04-13 04:09:07 +000071*/
sewardje663cb92002-04-12 10:26:32 +000072
73
74/* ---------------------------------------------------------------------
75 Types and globals for the scheduler.
76 ------------------------------------------------------------------ */
77
78/* type ThreadId is defined in vg_include.h. */
79
80/* struct ThreadState is defined in vg_include.h. */
81
sewardj6072c362002-04-19 14:40:57 +000082/* Private globals. A statically allocated array of threads. NOTE:
83 [0] is never used, to simplify the simulation of initialisers for
84 LinuxThreads. */
sewardje663cb92002-04-12 10:26:32 +000085static ThreadState vg_threads[VG_N_THREADS];
86
sewardj1e8cdc92002-04-18 11:37:52 +000087/* The tid of the thread currently in VG_(baseBlock). */
88static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
89
sewardje663cb92002-04-12 10:26:32 +000090
91/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
92jmp_buf VG_(scheduler_jmpbuf);
93/* ... and if so, here's the signal which caused it to do so. */
94Int VG_(longjmpd_on_signal);
95
96
97/* Machinery to keep track of which threads are waiting on which
98 fds. */
99typedef
100 struct {
101 /* The thread which made the request. */
102 ThreadId tid;
103
104 /* The next two fields describe the request. */
105 /* File descriptor waited for. -1 means this slot is not in use */
106 Int fd;
107 /* The syscall number the fd is used in. */
108 Int syscall_no;
109
110 /* False => still waiting for select to tell us the fd is ready
111 to go. True => the fd is ready, but the results have not yet
112 been delivered back to the calling thread. Once the latter
113 happens, this entire record is marked as no longer in use, by
114 making the fd field be -1. */
115 Bool ready;
116 }
117 VgWaitedOnFd;
118
119static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
120
121
sewardj5f07b662002-04-23 16:52:51 +0000122/* Keeping track of keys. */
123typedef
124 struct {
125 /* Has this key been allocated ? */
126 Bool inuse;
127 /* If .inuse==True, records the address of the associated
128 destructor, or NULL if none. */
129 void (*destructor)(void*);
130 }
131 ThreadKeyState;
132
133/* And our array of thread keys. */
134static ThreadKeyState vg_thread_keys[VG_N_THREAD_KEYS];
135
136typedef UInt ThreadKey;
137
138
sewardje663cb92002-04-12 10:26:32 +0000139/* Forwards */
sewardj5f07b662002-04-23 16:52:51 +0000140static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
141
sewardje663cb92002-04-12 10:26:32 +0000142static void do_nontrivial_clientreq ( ThreadId tid );
143
sewardj6072c362002-04-19 14:40:57 +0000144static void scheduler_sanity ( void );
145
sewardjd7fd4d22002-04-24 01:57:27 +0000146static void do_pthread_mutex_unlock ( ThreadId,
147 void* /* pthread_cond_t* */ );
148static void do_pthread_mutex_lock ( ThreadId, Bool,
149 void* /* pthread_cond_t* */ );
150
sewardj51c0aaf2002-04-25 01:32:10 +0000151static void do_pthread_getspecific ( ThreadId,
152 UInt /* pthread_key_t */ );
153
sewardje663cb92002-04-12 10:26:32 +0000154
155/* ---------------------------------------------------------------------
156 Helper functions for the scheduler.
157 ------------------------------------------------------------------ */
158
sewardj604ec3c2002-04-18 22:38:41 +0000159static __inline__
160Bool is_valid_tid ( ThreadId tid )
161{
162 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000163 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000164 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000165 return True;
166}
167
168
sewardj1e8cdc92002-04-18 11:37:52 +0000169/* For constructing error messages only: try and identify a thread
170 whose stack this address currently falls within, or return
171 VG_INVALID_THREADID if it doesn't. A small complication is dealing
172 with any currently VG_(baseBlock)-resident thread.
173*/
174ThreadId VG_(identify_stack_addr)( Addr a )
175{
176 ThreadId tid, tid_to_skip;
177
178 tid_to_skip = VG_INVALID_THREADID;
179
180 /* First check to see if there's a currently-loaded thread in
181 VG_(baseBlock). */
182 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
183 tid = vg_tid_currently_in_baseBlock;
184 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
185 && a <= vg_threads[tid].stack_highest_word)
186 return tid;
187 else
188 tid_to_skip = tid;
189 }
190
sewardj6072c362002-04-19 14:40:57 +0000191 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj1e8cdc92002-04-18 11:37:52 +0000192 if (vg_threads[tid].status == VgTs_Empty) continue;
193 if (tid == tid_to_skip) continue;
194 if (vg_threads[tid].m_esp <= a
195 && a <= vg_threads[tid].stack_highest_word)
196 return tid;
197 }
198 return VG_INVALID_THREADID;
199}
200
201
sewardj15a43e12002-04-17 19:35:12 +0000202/* Print the scheduler status. */
203void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000204{
205 Int i;
206 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000207 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000208 if (vg_threads[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000209 VG_(printf)("\nThread %d: status = ", i);
sewardje663cb92002-04-12 10:26:32 +0000210 switch (vg_threads[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000211 case VgTs_Runnable: VG_(printf)("Runnable"); break;
212 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
213 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardje663cb92002-04-12 10:26:32 +0000214 vg_threads[i].joiner); break;
sewardj6072c362002-04-19 14:40:57 +0000215 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
216 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
217 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000218 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardje663cb92002-04-12 10:26:32 +0000219 default: VG_(printf)("???"); break;
220 }
sewardj3b5d8862002-04-20 13:53:23 +0000221 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
222 vg_threads[i].associated_mx,
223 vg_threads[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000224 VG_(pp_ExeContext)(
225 VG_(get_ExeContext)( False, vg_threads[i].m_eip,
226 vg_threads[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000227 }
228 VG_(printf)("\n");
229}
230
231static
232void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
233{
234 Int i;
235
236 vg_assert(fd != -1); /* avoid total chaos */
237
238 for (i = 0; i < VG_N_WAITING_FDS; i++)
239 if (vg_waiting_fds[i].fd == -1)
240 break;
241
242 if (i == VG_N_WAITING_FDS)
243 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
244 /*
245 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
246 tid, fd, i);
247 */
248 vg_waiting_fds[i].fd = fd;
249 vg_waiting_fds[i].tid = tid;
250 vg_waiting_fds[i].ready = False;
251 vg_waiting_fds[i].syscall_no = syscall_no;
252}
253
254
255
256static
257void print_sched_event ( ThreadId tid, Char* what )
258{
sewardj45b4b372002-04-16 22:50:32 +0000259 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000260}
261
262
263static
264void print_pthread_event ( ThreadId tid, Char* what )
265{
266 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000267}
268
269
270static
271Char* name_of_sched_event ( UInt event )
272{
273 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000274 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
275 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
276 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
277 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
278 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
279 default: return "??UNKNOWN??";
280 }
281}
282
283
284/* Create a translation of the client basic block beginning at
285 orig_addr, and add it to the translation cache & translation table.
286 This probably doesn't really belong here, but, hey ...
287*/
sewardj1e8cdc92002-04-18 11:37:52 +0000288static
289void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000290{
291 Addr trans_addr;
292 TTEntry tte;
293 Int orig_size, trans_size;
294 /* Ensure there is space to hold a translation. */
295 VG_(maybe_do_lru_pass)();
sewardj1e8cdc92002-04-18 11:37:52 +0000296 VG_(translate)( &vg_threads[tid],
297 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000298 /* Copy data at trans_addr into the translation cache.
299 Returned pointer is to the code, not to the 4-byte
300 header. */
301 /* Since the .orig_size and .trans_size fields are
302 UShort, be paranoid. */
303 vg_assert(orig_size > 0 && orig_size < 65536);
304 vg_assert(trans_size > 0 && trans_size < 65536);
305 tte.orig_size = orig_size;
306 tte.orig_addr = orig_addr;
307 tte.trans_size = trans_size;
308 tte.trans_addr = VG_(copy_to_transcache)
309 ( trans_addr, trans_size );
310 tte.mru_epoch = VG_(current_epoch);
311 /* Free the intermediary -- was allocated by VG_(emit_code). */
312 VG_(jitfree)( (void*)trans_addr );
313 /* Add to trans tab and set back pointer. */
314 VG_(add_to_trans_tab) ( &tte );
315 /* Update stats. */
316 VG_(this_epoch_in_count) ++;
317 VG_(this_epoch_in_osize) += orig_size;
318 VG_(this_epoch_in_tsize) += trans_size;
319 VG_(overall_in_count) ++;
320 VG_(overall_in_osize) += orig_size;
321 VG_(overall_in_tsize) += trans_size;
322 /* Record translated area for SMC detection. */
323 VG_(smc_mark_original) ( orig_addr, orig_size );
324}
325
326
327/* Allocate a completely empty ThreadState record. */
328static
329ThreadId vg_alloc_ThreadState ( void )
330{
331 Int i;
sewardj6072c362002-04-19 14:40:57 +0000332 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000333 if (vg_threads[i].status == VgTs_Empty)
334 return i;
335 }
336 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
337 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
338 VG_(panic)("VG_N_THREADS is too low");
339 /*NOTREACHED*/
340}
341
342
343ThreadState* VG_(get_thread_state) ( ThreadId tid )
344{
sewardj6072c362002-04-19 14:40:57 +0000345 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000346 vg_assert(vg_threads[tid].status != VgTs_Empty);
347 return & vg_threads[tid];
348}
349
350
sewardj1e8cdc92002-04-18 11:37:52 +0000351ThreadState* VG_(get_current_thread_state) ( void )
352{
353 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj6072c362002-04-19 14:40:57 +0000354 return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj1e8cdc92002-04-18 11:37:52 +0000355}
356
357
358ThreadId VG_(get_current_tid) ( void )
359{
360 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
361 return vg_tid_currently_in_baseBlock;
362}
363
364
sewardje663cb92002-04-12 10:26:32 +0000365/* Copy the saved state of a thread into VG_(baseBlock), ready for it
366 to be run. */
367__inline__
368void VG_(load_thread_state) ( ThreadId tid )
369{
370 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000371 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
372
sewardje663cb92002-04-12 10:26:32 +0000373 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
374 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
375 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
376 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
377 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
378 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
379 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
380 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
381 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
382 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
383
384 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
385 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
386
387 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
388 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
389 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
390 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
391 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
392 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
393 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
394 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
395 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000396
397 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000398}
399
400
401/* Copy the state of a thread from VG_(baseBlock), presumably after it
402 has been descheduled. For sanity-check purposes, fill the vacated
403 VG_(baseBlock) with garbage so as to make the system more likely to
404 fail quickly if we erroneously continue to poke around inside
405 VG_(baseBlock) without first doing a load_thread_state().
406*/
407__inline__
408void VG_(save_thread_state) ( ThreadId tid )
409{
410 Int i;
411 const UInt junk = 0xDEADBEEF;
412
sewardj1e8cdc92002-04-18 11:37:52 +0000413 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
414
sewardje663cb92002-04-12 10:26:32 +0000415 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
416 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
417 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
418 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
419 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
420 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
421 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
422 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
423 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
424 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
425
426 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
427 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
428
429 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
430 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
431 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
432 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
433 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
434 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
435 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
436 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
437 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
438
439 /* Fill it up with junk. */
440 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
441 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
442 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
443 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
444 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
445 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
446 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
447 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
448 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
449 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
450
451 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
452 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000453
454 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000455}
456
457
458/* Run the thread tid for a while, and return a VG_TRC_* value to the
459 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000460static
sewardje663cb92002-04-12 10:26:32 +0000461UInt run_thread_for_a_while ( ThreadId tid )
462{
sewardj7ccc5c22002-04-24 21:39:11 +0000463 volatile UInt trc = 0;
sewardj6072c362002-04-19 14:40:57 +0000464 vg_assert(is_valid_tid(tid));
465 vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000466 vg_assert(VG_(bbs_to_go) > 0);
467
468 VG_(load_thread_state) ( tid );
469 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
470 /* try this ... */
471 trc = VG_(run_innerloop)();
472 /* We get here if the client didn't take a fault. */
473 } else {
474 /* We get here if the client took a fault, which caused our
475 signal handler to longjmp. */
476 vg_assert(trc == 0);
477 trc = VG_TRC_UNRESUMABLE_SIGNAL;
478 }
479 VG_(save_thread_state) ( tid );
480 return trc;
481}
482
483
484/* Increment the LRU epoch counter. */
485static
486void increment_epoch ( void )
487{
488 VG_(current_epoch)++;
489 if (VG_(clo_verbosity) > 2) {
490 UInt tt_used, tc_used;
491 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
492 VG_(message)(Vg_UserMsg,
493 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
494 VG_(bbs_done),
495 VG_(this_epoch_in_count),
496 VG_(this_epoch_in_osize),
497 VG_(this_epoch_in_tsize),
498 VG_(this_epoch_out_count),
499 VG_(this_epoch_out_osize),
500 VG_(this_epoch_out_tsize),
501 tt_used, tc_used
502 );
503 }
504 VG_(this_epoch_in_count) = 0;
505 VG_(this_epoch_in_osize) = 0;
506 VG_(this_epoch_in_tsize) = 0;
507 VG_(this_epoch_out_count) = 0;
508 VG_(this_epoch_out_osize) = 0;
509 VG_(this_epoch_out_tsize) = 0;
510}
511
512
513/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000514 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000515 caller takes care to park the client's state is parked in
516 VG_(baseBlock).
517*/
518void VG_(scheduler_init) ( void )
519{
520 Int i;
521 Addr startup_esp;
522 ThreadId tid_main;
523
524 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
525 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000526 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
527 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000528 VG_(panic)("unexpected %esp at startup");
529 }
530
sewardj6072c362002-04-19 14:40:57 +0000531 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
532 vg_threads[i].status = VgTs_Empty;
sewardje663cb92002-04-12 10:26:32 +0000533 vg_threads[i].stack_size = 0;
534 vg_threads[i].stack_base = (Addr)NULL;
sewardj1e8cdc92002-04-18 11:37:52 +0000535 vg_threads[i].tid = i;
sewardje663cb92002-04-12 10:26:32 +0000536 }
537
538 for (i = 0; i < VG_N_WAITING_FDS; i++)
539 vg_waiting_fds[i].fd = -1; /* not in use */
540
sewardj5f07b662002-04-23 16:52:51 +0000541 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
542 vg_thread_keys[i].inuse = False;
543 vg_thread_keys[i].destructor = NULL;
544 }
545
sewardje663cb92002-04-12 10:26:32 +0000546 /* Assert this is thread zero, which has certain magic
547 properties. */
548 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000549 vg_assert(tid_main == 1);
sewardje663cb92002-04-12 10:26:32 +0000550
sewardj3b5d8862002-04-20 13:53:23 +0000551 vg_threads[tid_main].status = VgTs_Runnable;
552 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
553 vg_threads[tid_main].associated_mx = NULL;
554 vg_threads[tid_main].associated_cv = NULL;
555 vg_threads[tid_main].retval = NULL; /* not important */
sewardj1e8cdc92002-04-18 11:37:52 +0000556 vg_threads[tid_main].stack_highest_word
557 = vg_threads[tid_main].m_esp /* -4 ??? */;
sewardj5f07b662002-04-23 16:52:51 +0000558 for (i = 0; i < VG_N_THREAD_KEYS; i++)
559 vg_threads[tid_main].specifics[i] = NULL;
sewardje663cb92002-04-12 10:26:32 +0000560
561 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000562 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000563 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000564
565 /* So now ... */
566 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000567}
568
569
570/* What if fd isn't a valid fd? */
571static
572void set_fd_nonblocking ( Int fd )
573{
574 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
575 vg_assert(!VG_(is_kerror)(res));
576 res |= VKI_O_NONBLOCK;
577 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
578 vg_assert(!VG_(is_kerror)(res));
579}
580
581static
582void set_fd_blocking ( Int fd )
583{
584 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
585 vg_assert(!VG_(is_kerror)(res));
586 res &= ~VKI_O_NONBLOCK;
587 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
588 vg_assert(!VG_(is_kerror)(res));
589}
590
591static
592Bool fd_is_blockful ( Int fd )
593{
594 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
595 vg_assert(!VG_(is_kerror)(res));
596 return (res & VKI_O_NONBLOCK) ? False : True;
597}
598
599
600
sewardjd7fd4d22002-04-24 01:57:27 +0000601/* Possibly do a for tid. Return values are:
sewardje663cb92002-04-12 10:26:32 +0000602
sewardjd7fd4d22002-04-24 01:57:27 +0000603 True = request done. Thread may or may not be still runnable;
604 caller must check. If it is still runnable, the result will be in
605 the thread's %EDX as expected.
606
607 False = request not done. A more capable but slower mechanism will
608 deal with it.
sewardje663cb92002-04-12 10:26:32 +0000609*/
sewardjd7fd4d22002-04-24 01:57:27 +0000610static
sewardje663cb92002-04-12 10:26:32 +0000611Bool maybe_do_trivial_clientreq ( ThreadId tid )
612{
613# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000614 { tst->m_edx = (vvv); \
sewardje663cb92002-04-12 10:26:32 +0000615 return True; \
616 }
617
sewardj8c824512002-04-14 04:16:48 +0000618 ThreadState* tst = &vg_threads[tid];
619 UInt* arg = (UInt*)(tst->m_eax);
620 UInt req_no = arg[0];
621
sewardje663cb92002-04-12 10:26:32 +0000622 switch (req_no) {
623 case VG_USERREQ__MALLOC:
624 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000625 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000626 );
627 case VG_USERREQ__BUILTIN_NEW:
628 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000629 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000630 );
631 case VG_USERREQ__BUILTIN_VEC_NEW:
632 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000633 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000634 );
635 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000636 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000637 SIMPLE_RETURN(0); /* irrelevant */
638 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000639 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000640 SIMPLE_RETURN(0); /* irrelevant */
641 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000642 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000643 SIMPLE_RETURN(0); /* irrelevant */
644 case VG_USERREQ__CALLOC:
645 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000646 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000647 );
648 case VG_USERREQ__REALLOC:
649 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000650 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000651 );
652 case VG_USERREQ__MEMALIGN:
653 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000654 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000655 );
sewardj9650c992002-04-16 03:44:31 +0000656
sewardj5f07b662002-04-23 16:52:51 +0000657 /* These are heavily used -- or at least we want them to be
658 cheap. */
sewardj9650c992002-04-16 03:44:31 +0000659 case VG_USERREQ__PTHREAD_GET_THREADID:
660 SIMPLE_RETURN(tid);
661 case VG_USERREQ__RUNNING_ON_VALGRIND:
662 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000663 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
664 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj5f07b662002-04-23 16:52:51 +0000665 case VG_USERREQ__READ_MILLISECOND_TIMER:
666 SIMPLE_RETURN(VG_(read_millisecond_timer)());
sewardj9650c992002-04-16 03:44:31 +0000667
sewardjd7fd4d22002-04-24 01:57:27 +0000668 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
669 do_pthread_mutex_unlock( tid, (void *)(arg[1]) );
670 return True;
671
672 /* This may make thread tid non-runnable, but the scheduler
673 checks for that on return from this function. */
674 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
675 do_pthread_mutex_lock( tid, False, (void *)(arg[1]) );
676 return True;
677
sewardj14e03422002-04-24 19:51:31 +0000678 case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK:
679 do_pthread_mutex_lock( tid, True, (void *)(arg[1]) );
680 return True;
681
sewardj51c0aaf2002-04-25 01:32:10 +0000682 case VG_USERREQ__PTHREAD_GETSPECIFIC:
683 do_pthread_getspecific ( tid, (UInt)(arg[1]) );
684 return True;
685
sewardje663cb92002-04-12 10:26:32 +0000686 default:
687 /* Too hard; wimp out. */
688 return False;
689 }
690# undef SIMPLE_RETURN
691}
692
693
sewardj6072c362002-04-19 14:40:57 +0000694/* vthread tid is returning from a signal handler; modify its
695 stack/regs accordingly. */
sewardj1ffa8da2002-04-26 22:47:57 +0000696
697/* [Helper fn for handle_signal_return] tid, assumed to be in WaitFD
698 for read or write, has been interrupted by a signal. Find and
699 clear the relevant vg_waiting_fd[] entry. Most of the code in this
700 procedure is total paranoia, if you look closely. */
701static
702void cleanup_waiting_fd_table ( ThreadId tid )
703{
704 Int i, waiters;
705
706 vg_assert(is_valid_tid(tid));
707 vg_assert(vg_threads[tid].status == VgTs_WaitFD);
708 vg_assert(vg_threads[tid].m_eax == __NR_read
709 || vg_threads[tid].m_eax == __NR_write);
710
711 /* Excessively paranoidly ... find the fd this op was waiting
712 for, and mark it as not being waited on. */
713 waiters = 0;
714 for (i = 0; i < VG_N_WAITING_FDS; i++) {
715 if (vg_waiting_fds[i].tid == tid) {
716 waiters++;
717 vg_assert(vg_waiting_fds[i].syscall_no == vg_threads[tid].m_eax);
718 }
719 }
720 vg_assert(waiters == 1);
721 for (i = 0; i < VG_N_WAITING_FDS; i++)
722 if (vg_waiting_fds[i].tid == tid)
723 break;
724 vg_assert(i < VG_N_WAITING_FDS);
725 vg_assert(vg_waiting_fds[i].fd != -1);
726 vg_waiting_fds[i].fd = -1; /* not in use */
727}
728
729
sewardj6072c362002-04-19 14:40:57 +0000730static
731void handle_signal_return ( ThreadId tid )
732{
733 Char msg_buf[100];
734 Bool restart_blocked_syscalls;
735
736 vg_assert(is_valid_tid(tid));
737
738 restart_blocked_syscalls = VG_(signal_returns)(tid);
739
740 if (restart_blocked_syscalls)
741 /* Easy; we don't have to do anything. */
742 return;
743
sewardj1ffa8da2002-04-26 22:47:57 +0000744 if (vg_threads[tid].status == VgTs_WaitFD
745 && (vg_threads[tid].m_eax == __NR_read
746 || vg_threads[tid].m_eax == __NR_write)) {
sewardj6072c362002-04-19 14:40:57 +0000747 /* read() or write() interrupted. Force a return with EINTR. */
sewardj1ffa8da2002-04-26 22:47:57 +0000748 cleanup_waiting_fd_table(tid);
sewardj6072c362002-04-19 14:40:57 +0000749 vg_threads[tid].m_eax = -VKI_EINTR;
750 vg_threads[tid].status = VgTs_Runnable;
sewardj1ffa8da2002-04-26 22:47:57 +0000751
sewardj6072c362002-04-19 14:40:57 +0000752 if (VG_(clo_trace_sched)) {
753 VG_(sprintf)(msg_buf,
754 "read() / write() interrupted by signal; return EINTR" );
755 print_sched_event(tid, msg_buf);
756 }
757 return;
758 }
759
sewardj1ffa8da2002-04-26 22:47:57 +0000760 if (vg_threads[tid].status == VgTs_WaitFD
761 && vg_threads[tid].m_eax == __NR_nanosleep) {
sewardj6072c362002-04-19 14:40:57 +0000762 /* We interrupted a nanosleep(). The right thing to do is to
763 write the unused time to nanosleep's second param and return
764 EINTR, but I'm too lazy for that. */
765 return;
766 }
767
sewardj1ffa8da2002-04-26 22:47:57 +0000768 if (vg_threads[tid].status == VgTs_WaitFD) {
769 VG_(panic)("handle_signal_return: unknown interrupted syscall");
770 }
771
sewardj6072c362002-04-19 14:40:57 +0000772 /* All other cases? Just return. */
773}
774
775
sewardje663cb92002-04-12 10:26:32 +0000776static
777void sched_do_syscall ( ThreadId tid )
778{
779 UInt saved_eax;
780 UInt res, syscall_no;
781 UInt fd;
sewardje663cb92002-04-12 10:26:32 +0000782 Bool orig_fd_blockness;
783 Char msg_buf[100];
784
sewardj6072c362002-04-19 14:40:57 +0000785 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000786 vg_assert(vg_threads[tid].status == VgTs_Runnable);
787
788 syscall_no = vg_threads[tid].m_eax; /* syscall number */
789
790 if (syscall_no == __NR_nanosleep) {
sewardj5f07b662002-04-23 16:52:51 +0000791 UInt t_now, t_awaken;
sewardje663cb92002-04-12 10:26:32 +0000792 struct vki_timespec* req;
793 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
sewardj5f07b662002-04-23 16:52:51 +0000794 t_now = VG_(read_millisecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000795 t_awaken
796 = t_now
sewardj5f07b662002-04-23 16:52:51 +0000797 + (UInt)1000ULL * (UInt)(req->tv_sec)
798 + (UInt)(req->tv_nsec) / 1000000;
sewardje663cb92002-04-12 10:26:32 +0000799 vg_threads[tid].status = VgTs_Sleeping;
800 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000801 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000802 VG_(sprintf)(msg_buf, "at %d: nanosleep for %d",
sewardje663cb92002-04-12 10:26:32 +0000803 t_now, t_awaken-t_now);
804 print_sched_event(tid, msg_buf);
805 }
806 /* Force the scheduler to run something else for a while. */
807 return;
808 }
809
sewardjaec22c02002-04-29 01:58:08 +0000810 if (syscall_no != __NR_read && syscall_no != __NR_write) {
sewardje663cb92002-04-12 10:26:32 +0000811 /* We think it's non-blocking. Just do it in the normal way. */
812 VG_(perform_assumed_nonblocking_syscall)(tid);
813 /* The thread is still runnable. */
814 return;
815 }
816
sewardje663cb92002-04-12 10:26:32 +0000817 /* Set the fd to nonblocking, and do the syscall, which will return
818 immediately, in order to lodge a request with the Linux kernel.
819 We later poll for I/O completion using select(). */
820
sewardjaec22c02002-04-29 01:58:08 +0000821 fd = vg_threads[tid].m_ebx /* arg1 */;
sewardje663cb92002-04-12 10:26:32 +0000822 orig_fd_blockness = fd_is_blockful(fd);
823 set_fd_nonblocking(fd);
824 vg_assert(!fd_is_blockful(fd));
825 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
826
827 /* This trashes the thread's %eax; we have to preserve it. */
828 saved_eax = vg_threads[tid].m_eax;
829 KERNEL_DO_SYSCALL(tid,res);
830
831 /* Restore original blockfulness of the fd. */
832 if (orig_fd_blockness)
833 set_fd_blocking(fd);
834 else
835 set_fd_nonblocking(fd);
836
sewardjaec22c02002-04-29 01:58:08 +0000837 if (res != -VKI_EWOULDBLOCK || !orig_fd_blockness) {
838 /* Finish off in the normal way. Don't restore %EAX, since that
839 now (correctly) holds the result of the call. We get here if either:
840 1. The call didn't block, or
841 2. The fd was already in nonblocking mode before we started to
842 mess with it. In this case, we're not expecting to handle
843 the I/O completion -- the client is. So don't file a
844 completion-wait entry.
845 */
sewardje663cb92002-04-12 10:26:32 +0000846 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
847 /* We're still runnable. */
848 vg_assert(vg_threads[tid].status == VgTs_Runnable);
849
850 } else {
851
sewardjaec22c02002-04-29 01:58:08 +0000852 vg_assert(res == -VKI_EWOULDBLOCK && orig_fd_blockness);
853
sewardje663cb92002-04-12 10:26:32 +0000854 /* It would have blocked. First, restore %EAX to what it was
855 before our speculative call. */
856 vg_threads[tid].m_eax = saved_eax;
857 /* Put this fd in a table of fds on which we are waiting for
858 completion. The arguments for select() later are constructed
859 from this table. */
860 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
861 /* Deschedule thread until an I/O completion happens. */
862 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000863 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000864 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
865 print_sched_event(tid, msg_buf);
866 }
867
868 }
869}
870
871
872/* Find out which of the fds in vg_waiting_fds are now ready to go, by
873 making enquiries with select(), and mark them as ready. We have to
874 wait for the requesting threads to fall into the the WaitFD state
875 before we can actually finally deliver the results, so this
876 procedure doesn't do that; complete_blocked_syscalls() does it.
877
878 It might seem odd that a thread which has done a blocking syscall
879 is not in WaitFD state; the way this can happen is if it initially
880 becomes WaitFD, but then a signal is delivered to it, so it becomes
881 Runnable for a while. In this case we have to wait for the
882 sighandler to return, whereupon the WaitFD state is resumed, and
883 only at that point can the I/O result be delivered to it. However,
884 this point may be long after the fd is actually ready.
885
886 So, poll_for_ready_fds() merely detects fds which are ready.
887 complete_blocked_syscalls() does the second half of the trick,
888 possibly much later: it delivers the results from ready fds to
889 threads in WaitFD state.
890*/
sewardj9a199dc2002-04-14 13:01:38 +0000891static
sewardje663cb92002-04-12 10:26:32 +0000892void poll_for_ready_fds ( void )
893{
894 vki_ksigset_t saved_procmask;
895 vki_fd_set readfds;
896 vki_fd_set writefds;
897 vki_fd_set exceptfds;
898 struct vki_timeval timeout;
899 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
900 ThreadId tid;
901 Bool rd_ok, wr_ok, ex_ok;
902 Char msg_buf[100];
903
sewardje462e202002-04-13 04:09:07 +0000904 struct vki_timespec* rem;
sewardj5f07b662002-04-23 16:52:51 +0000905 UInt t_now;
sewardje462e202002-04-13 04:09:07 +0000906
sewardje663cb92002-04-12 10:26:32 +0000907 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000908 for (tid = 1; tid < VG_N_THREADS; tid++)
sewardj853f55d2002-04-26 00:27:53 +0000909 if (vg_threads[tid].status == VgTs_Sleeping)
910 break;
sewardj6072c362002-04-19 14:40:57 +0000911
sewardj5f07b662002-04-23 16:52:51 +0000912 /* Avoid pointless calls to VG_(read_millisecond_timer). */
sewardj6072c362002-04-19 14:40:57 +0000913 if (tid < VG_N_THREADS) {
sewardj5f07b662002-04-23 16:52:51 +0000914 t_now = VG_(read_millisecond_timer)();
sewardj6072c362002-04-19 14:40:57 +0000915 for (tid = 1; tid < VG_N_THREADS; tid++) {
916 if (vg_threads[tid].status != VgTs_Sleeping)
917 continue;
918 if (t_now >= vg_threads[tid].awaken_at) {
919 /* Resume this thread. Set to zero the remaining-time
920 (second) arg of nanosleep, since it's used up all its
921 time. */
922 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
923 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
924 if (rem != NULL) {
925 rem->tv_sec = 0;
926 rem->tv_nsec = 0;
927 }
928 /* Make the syscall return 0 (success). */
929 vg_threads[tid].m_eax = 0;
930 /* Reschedule this thread. */
931 vg_threads[tid].status = VgTs_Runnable;
932 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000933 VG_(sprintf)(msg_buf, "at %d: nanosleep done",
sewardj6072c362002-04-19 14:40:57 +0000934 t_now);
935 print_sched_event(tid, msg_buf);
936 }
sewardje663cb92002-04-12 10:26:32 +0000937 }
938 }
939 }
sewardje663cb92002-04-12 10:26:32 +0000940
sewardje462e202002-04-13 04:09:07 +0000941 /* And look for threads waiting on file descriptors which are now
942 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000943 timeout.tv_sec = 0;
944 timeout.tv_usec = 0;
945
946 VKI_FD_ZERO(&readfds);
947 VKI_FD_ZERO(&writefds);
948 VKI_FD_ZERO(&exceptfds);
949 fd_max = -1;
950 for (i = 0; i < VG_N_WAITING_FDS; i++) {
951 if (vg_waiting_fds[i].fd == -1 /* not in use */)
952 continue;
953 if (vg_waiting_fds[i].ready /* already ready? */)
954 continue;
955 fd = vg_waiting_fds[i].fd;
956 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000957 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000958 if (fd > fd_max)
959 fd_max = fd;
960 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000961 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000962 syscall_no = vg_waiting_fds[i].syscall_no;
963 switch (syscall_no) {
964 case __NR_read:
965 VKI_FD_SET(fd, &readfds); break;
966 case __NR_write:
967 VKI_FD_SET(fd, &writefds); break;
968 default:
969 VG_(panic)("poll_for_ready_fds: unexpected syscall");
970 /*NOTREACHED*/
971 break;
972 }
973 }
974
sewardje462e202002-04-13 04:09:07 +0000975 /* Short cut: if no fds are waiting, give up now. */
976 if (fd_max == -1)
977 return;
978
sewardje663cb92002-04-12 10:26:32 +0000979 /* BLOCK ALL SIGNALS. We don't want the complication of select()
980 getting interrupted. */
981 VG_(block_all_host_signals)( &saved_procmask );
982
983 n_ready = VG_(select)
984 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
985 if (VG_(is_kerror)(n_ready)) {
986 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
987 VG_(panic)("poll_for_ready_fds: select failed?!");
988 /*NOTREACHED*/
989 }
990
991 /* UNBLOCK ALL SIGNALS */
992 VG_(restore_host_signals)( &saved_procmask );
993
994 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
995
996 if (n_ready == 0)
997 return;
998
999 /* Inspect all the fds we know about, and handle any completions that
1000 have happened. */
1001 /*
1002 VG_(printf)("\n\n");
1003 for (fd = 0; fd < 100; fd++)
1004 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
1005 VG_(printf)("X"); } else { VG_(printf)("."); };
1006 VG_(printf)("\n\nfd_max = %d\n", fd_max);
1007 */
1008
1009 for (fd = 0; fd <= fd_max; fd++) {
1010 rd_ok = VKI_FD_ISSET(fd, &readfds);
1011 wr_ok = VKI_FD_ISSET(fd, &writefds);
1012 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
1013
1014 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
1015 if (n_ok == 0)
1016 continue;
1017 if (n_ok > 1) {
1018 VG_(printf)("offending fd = %d\n", fd);
1019 VG_(panic)("poll_for_ready_fds: multiple events on fd");
1020 }
1021
1022 /* An I/O event completed for fd. Find the thread which
1023 requested this. */
1024 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1025 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1026 continue;
1027 if (vg_waiting_fds[i].fd == fd)
1028 break;
1029 }
1030
1031 /* And a bit more paranoia ... */
1032 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
1033
1034 /* Mark the fd as ready. */
1035 vg_assert(! vg_waiting_fds[i].ready);
1036 vg_waiting_fds[i].ready = True;
1037 }
1038}
1039
1040
1041/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +00001042static
sewardje663cb92002-04-12 10:26:32 +00001043void complete_blocked_syscalls ( void )
1044{
1045 Int fd, i, res, syscall_no;
1046 ThreadId tid;
1047 Char msg_buf[100];
1048
1049 /* Inspect all the outstanding fds we know about. */
1050
1051 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1052 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1053 continue;
1054 if (! vg_waiting_fds[i].ready)
1055 continue;
1056
1057 fd = vg_waiting_fds[i].fd;
1058 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +00001059 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001060
1061 /* The thread actually has to be waiting for the I/O event it
1062 requested before we can deliver the result! */
1063 if (vg_threads[tid].status != VgTs_WaitFD)
1064 continue;
1065
1066 /* Ok, actually do it! We can safely use %EAX as the syscall
1067 number, because the speculative call made by
1068 sched_do_syscall() doesn't change %EAX in the case where the
1069 call would have blocked. */
1070
1071 syscall_no = vg_waiting_fds[i].syscall_no;
1072 vg_assert(syscall_no == vg_threads[tid].m_eax);
1073 KERNEL_DO_SYSCALL(tid,res);
1074 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
1075
1076 /* Reschedule. */
1077 vg_threads[tid].status = VgTs_Runnable;
1078 /* Mark slot as no longer in use. */
1079 vg_waiting_fds[i].fd = -1;
1080 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +00001081 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001082 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1083 print_sched_event(tid, msg_buf);
1084 }
1085 }
1086}
1087
1088
1089static
sewardj5f07b662002-04-23 16:52:51 +00001090void check_for_pthread_cond_timedwait ( void )
1091{
sewardj51c0aaf2002-04-25 01:32:10 +00001092 Int i, now;
sewardj5f07b662002-04-23 16:52:51 +00001093 for (i = 1; i < VG_N_THREADS; i++) {
1094 if (vg_threads[i].status != VgTs_WaitCV)
1095 continue;
1096 if (vg_threads[i].awaken_at == 0xFFFFFFFF /* no timeout */)
1097 continue;
sewardj51c0aaf2002-04-25 01:32:10 +00001098 now = VG_(read_millisecond_timer)();
1099 if (now >= vg_threads[i].awaken_at) {
sewardj5f07b662002-04-23 16:52:51 +00001100 do_pthread_cond_timedwait_TIMEOUT(i);
sewardj51c0aaf2002-04-25 01:32:10 +00001101 }
sewardj5f07b662002-04-23 16:52:51 +00001102 }
1103}
1104
1105
1106static
sewardje663cb92002-04-12 10:26:32 +00001107void nanosleep_for_a_while ( void )
1108{
1109 Int res;
1110 struct vki_timespec req;
1111 struct vki_timespec rem;
1112 req.tv_sec = 0;
sewardj51c0aaf2002-04-25 01:32:10 +00001113 req.tv_nsec = 20 * 1000 * 1000;
sewardje663cb92002-04-12 10:26:32 +00001114 res = VG_(nanosleep)( &req, &rem );
sewardj5f07b662002-04-23 16:52:51 +00001115 vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
sewardje663cb92002-04-12 10:26:32 +00001116}
1117
1118
1119/* ---------------------------------------------------------------------
1120 The scheduler proper.
1121 ------------------------------------------------------------------ */
1122
1123/* Run user-space threads until either
1124 * Deadlock occurs
1125 * One thread asks to shutdown Valgrind
1126 * The specified number of basic blocks has gone by.
1127*/
1128VgSchedReturnCode VG_(scheduler) ( void )
1129{
1130 ThreadId tid, tid_next;
1131 UInt trc;
1132 UInt dispatch_ctr_SAVED;
sewardj51c0aaf2002-04-25 01:32:10 +00001133 Int request_code, done_this_time, n_in_bounded_wait;
sewardje663cb92002-04-12 10:26:32 +00001134 Char msg_buf[100];
1135 Addr trans_addr;
sewardj14e03422002-04-24 19:51:31 +00001136 Bool sigs_delivered;
sewardje663cb92002-04-12 10:26:32 +00001137
1138 /* For the LRU structures, records when the epoch began. */
1139 ULong lru_epoch_started_at = 0;
1140
1141 /* Start with the root thread. tid in general indicates the
1142 currently runnable/just-finished-running thread. */
sewardj6072c362002-04-19 14:40:57 +00001143 tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001144
1145 /* This is the top level scheduler loop. It falls into three
1146 phases. */
1147 while (True) {
1148
sewardj6072c362002-04-19 14:40:57 +00001149 /* ======================= Phase 0 of 3 =======================
1150 Be paranoid. Always a good idea. */
sewardjd7fd4d22002-04-24 01:57:27 +00001151 stage1:
sewardj6072c362002-04-19 14:40:57 +00001152 scheduler_sanity();
sewardj0c3b53f2002-05-01 01:58:35 +00001153 VG_(do_sanity_checks)( False );
sewardj6072c362002-04-19 14:40:57 +00001154
sewardje663cb92002-04-12 10:26:32 +00001155 /* ======================= Phase 1 of 3 =======================
1156 Handle I/O completions and signals. This may change the
1157 status of various threads. Then select a new thread to run,
1158 or declare deadlock, or sleep if there are no runnable
1159 threads but some are blocked on I/O. */
1160
1161 /* Age the LRU structures if an epoch has been completed. */
1162 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1163 lru_epoch_started_at = VG_(bbs_done);
1164 increment_epoch();
1165 }
1166
1167 /* Was a debug-stop requested? */
1168 if (VG_(bbs_to_go) == 0)
1169 goto debug_stop;
1170
1171 /* Do the following loop until a runnable thread is found, or
1172 deadlock is detected. */
1173 while (True) {
1174
1175 /* For stats purposes only. */
1176 VG_(num_scheduling_events_MAJOR) ++;
1177
1178 /* See if any I/O operations which we were waiting for have
1179 completed, and, if so, make runnable the relevant waiting
1180 threads. */
1181 poll_for_ready_fds();
1182 complete_blocked_syscalls();
sewardj5f07b662002-04-23 16:52:51 +00001183 check_for_pthread_cond_timedwait();
sewardje663cb92002-04-12 10:26:32 +00001184
1185 /* See if there are any signals which need to be delivered. If
1186 so, choose thread(s) to deliver them to, and build signal
1187 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001188
1189 /* Be careful about delivering signals to a thread waiting
1190 for a mutex. In particular, when the handler is running,
1191 that thread is temporarily apparently-not-waiting for the
1192 mutex, so if it is unlocked by another thread whilst the
1193 handler is running, this thread is not informed. When the
1194 handler returns, the thread resumes waiting on the mutex,
1195 even if, as a result, it has missed the unlocking of it.
1196 Potential deadlock. This sounds all very strange, but the
1197 POSIX standard appears to require this behaviour. */
sewardj14e03422002-04-24 19:51:31 +00001198 sigs_delivered = VG_(deliver_signals)( 1 /*HACK*/ );
1199 if (sigs_delivered)
sewardj0c3b53f2002-05-01 01:58:35 +00001200 VG_(do_sanity_checks)( False );
sewardje663cb92002-04-12 10:26:32 +00001201
1202 /* Try and find a thread (tid) to run. */
1203 tid_next = tid;
sewardj51c0aaf2002-04-25 01:32:10 +00001204 n_in_bounded_wait = 0;
sewardje663cb92002-04-12 10:26:32 +00001205 while (True) {
1206 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001207 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj54cacf02002-04-12 23:24:59 +00001208 if (vg_threads[tid_next].status == VgTs_WaitFD
sewardj51c0aaf2002-04-25 01:32:10 +00001209 || vg_threads[tid_next].status == VgTs_Sleeping
1210 || (vg_threads[tid_next].status == VgTs_WaitCV
1211 && vg_threads[tid_next].awaken_at != 0xFFFFFFFF))
1212 n_in_bounded_wait ++;
sewardje663cb92002-04-12 10:26:32 +00001213 if (vg_threads[tid_next].status == VgTs_Runnable)
1214 break; /* We can run this one. */
1215 if (tid_next == tid)
1216 break; /* been all the way round */
1217 }
1218 tid = tid_next;
1219
1220 if (vg_threads[tid].status == VgTs_Runnable) {
1221 /* Found a suitable candidate. Fall out of this loop, so
1222 we can advance to stage 2 of the scheduler: actually
1223 running the thread. */
1224 break;
1225 }
1226
1227 /* We didn't find a runnable thread. Now what? */
sewardj51c0aaf2002-04-25 01:32:10 +00001228 if (n_in_bounded_wait == 0) {
sewardj54cacf02002-04-12 23:24:59 +00001229 /* No runnable threads and no prospect of any appearing
1230 even if we wait for an arbitrary length of time. In
1231 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001232 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001233 return VgSrc_Deadlock;
1234 }
1235
1236 /* At least one thread is in a fd-wait state. Delay for a
1237 while, and go round again, in the hope that eventually a
1238 thread becomes runnable. */
1239 nanosleep_for_a_while();
1240 // pp_sched_status();
1241 // VG_(printf)(".\n");
1242 }
1243
1244
1245 /* ======================= Phase 2 of 3 =======================
1246 Wahey! We've finally decided that thread tid is runnable, so
1247 we now do that. Run it for as much of a quanta as possible.
1248 Trivial requests are handled and the thread continues. The
1249 aim is not to do too many of Phase 1 since it is expensive. */
1250
1251 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001252 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001253
1254 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1255 that it decrements the counter before testing it for zero, so
1256 that if VG_(dispatch_ctr) is set to N you get at most N-1
1257 iterations. Also this means that VG_(dispatch_ctr) must
1258 exceed zero before entering the innerloop. Also also, the
1259 decrement is done before the bb is actually run, so you
1260 always get at least one decrement even if nothing happens.
1261 */
1262 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1263 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1264 else
1265 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1266
1267 /* ... and remember what we asked for. */
1268 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1269
sewardj1e8cdc92002-04-18 11:37:52 +00001270 /* paranoia ... */
1271 vg_assert(vg_threads[tid].tid == tid);
1272
sewardje663cb92002-04-12 10:26:32 +00001273 /* Actually run thread tid. */
1274 while (True) {
1275
1276 /* For stats purposes only. */
1277 VG_(num_scheduling_events_MINOR) ++;
1278
1279 if (0)
1280 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1281 tid, VG_(dispatch_ctr) - 1 );
sewardjb3eef6b2002-05-01 00:05:27 +00001282# if 0
1283 if (VG_(bbs_done) > 31700000 + 0) {
1284 dispatch_ctr_SAVED = VG_(dispatch_ctr) = 2;
1285 VG_(translate)(&vg_threads[tid], vg_threads[tid].m_eip,
1286 NULL,NULL,NULL);
1287 }
1288 vg_assert(vg_threads[tid].m_eip != 0);
1289# endif
sewardje663cb92002-04-12 10:26:32 +00001290
1291 trc = run_thread_for_a_while ( tid );
1292
sewardjb3eef6b2002-05-01 00:05:27 +00001293# if 0
1294 if (0 == vg_threads[tid].m_eip) {
1295 VG_(printf)("tid = %d, dc = %llu\n", tid, VG_(bbs_done));
1296 vg_assert(0 != vg_threads[tid].m_eip);
1297 }
1298# endif
1299
sewardje663cb92002-04-12 10:26:32 +00001300 /* Deal quickly with trivial scheduling events, and resume the
1301 thread. */
1302
1303 if (trc == VG_TRC_INNER_FASTMISS) {
1304 vg_assert(VG_(dispatch_ctr) > 0);
1305
1306 /* Trivial event. Miss in the fast-cache. Do a full
1307 lookup for it. */
1308 trans_addr
1309 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1310 if (trans_addr == (Addr)0) {
1311 /* Not found; we need to request a translation. */
sewardj1e8cdc92002-04-18 11:37:52 +00001312 create_translation_for( tid, vg_threads[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001313 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1314 if (trans_addr == (Addr)0)
1315 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1316 }
1317 continue; /* with this thread */
1318 }
1319
1320 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
sewardjd7fd4d22002-04-24 01:57:27 +00001321 Bool done = maybe_do_trivial_clientreq(tid);
1322 if (done) {
1323 /* The request is done. We try and continue with the
1324 same thread if still runnable. If not, go back to
1325 Stage 1 to select a new thread to run. */
1326 if (vg_threads[tid].status == VgTs_Runnable)
1327 continue; /* with this thread */
1328 else
1329 goto stage1;
sewardje663cb92002-04-12 10:26:32 +00001330 }
1331 }
1332
sewardj51c0aaf2002-04-25 01:32:10 +00001333 if (trc == VG_TRC_EBP_JMP_SYSCALL) {
1334 /* Do a syscall for the vthread tid. This could cause it
1335 to become non-runnable. */
sewardjb3eef6b2002-05-01 00:05:27 +00001336# if 0
1337 { UInt* esp; Int i;
1338 esp=(UInt*)vg_threads[tid].m_esp;
1339 VG_(printf)("\nBEFORE\n");
1340 for (i = 10; i >= -10; i--)
1341 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1342 }
1343# endif
1344
sewardj51c0aaf2002-04-25 01:32:10 +00001345 sched_do_syscall(tid);
sewardjb3eef6b2002-05-01 00:05:27 +00001346
1347# if 0
1348 { UInt* esp; Int i;
1349 esp=(UInt*)vg_threads[tid].m_esp;
1350 VG_(printf)("AFTER\n");
1351 for (i = 10; i >= -10; i--)
1352 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1353 }
1354# endif
1355
sewardj51c0aaf2002-04-25 01:32:10 +00001356 if (vg_threads[tid].status == VgTs_Runnable)
1357 continue; /* with this thread */
1358 else
1359 goto stage1;
1360 }
1361
sewardjd7fd4d22002-04-24 01:57:27 +00001362 /* It's an event we can't quickly deal with. Give up running
1363 this thread and handle things the expensive way. */
sewardje663cb92002-04-12 10:26:32 +00001364 break;
1365 }
1366
1367 /* ======================= Phase 3 of 3 =======================
1368 Handle non-trivial thread requests, mostly pthread stuff. */
1369
1370 /* Ok, we've fallen out of the dispatcher for a
1371 non-completely-trivial reason. First, update basic-block
1372 counters. */
1373
1374 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1375 vg_assert(done_this_time >= 0);
1376 VG_(bbs_to_go) -= (ULong)done_this_time;
1377 VG_(bbs_done) += (ULong)done_this_time;
1378
1379 if (0 && trc != VG_TRC_INNER_FASTMISS)
1380 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1381 tid, done_this_time, (Int)trc );
1382
1383 if (0 && trc != VG_TRC_INNER_FASTMISS)
1384 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1385 tid, VG_(bbs_done),
1386 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001387
sewardje663cb92002-04-12 10:26:32 +00001388 /* Examine the thread's return code to figure out why it
1389 stopped, and handle requests. */
1390
1391 switch (trc) {
1392
1393 case VG_TRC_INNER_FASTMISS:
1394 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1395 /*NOTREACHED*/
1396 break;
1397
1398 case VG_TRC_INNER_COUNTERZERO:
1399 /* Timeslice is out. Let a new thread be scheduled,
1400 simply by doing nothing, causing us to arrive back at
1401 Phase 1. */
1402 if (VG_(bbs_to_go) == 0) {
1403 goto debug_stop;
1404 }
1405 vg_assert(VG_(dispatch_ctr) == 0);
1406 break;
1407
1408 case VG_TRC_UNRESUMABLE_SIGNAL:
1409 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1410 away. Again, do nothing, so we wind up back at Phase
1411 1, whereupon the signal will be "delivered". */
1412 break;
1413
sewardj51c0aaf2002-04-25 01:32:10 +00001414#if 0
sewardje663cb92002-04-12 10:26:32 +00001415 case VG_TRC_EBP_JMP_SYSCALL:
1416 /* Do a syscall for the vthread tid. This could cause it
1417 to become non-runnable. */
1418 sched_do_syscall(tid);
1419 break;
sewardj51c0aaf2002-04-25 01:32:10 +00001420#endif
sewardje663cb92002-04-12 10:26:32 +00001421
1422 case VG_TRC_EBP_JMP_CLIENTREQ:
1423 /* Do a client request for the vthread tid. Note that
1424 some requests will have been handled by
1425 maybe_do_trivial_clientreq(), so we don't expect to see
1426 those here.
1427 */
sewardj54cacf02002-04-12 23:24:59 +00001428 /* The thread's %EAX points at an arg block, the first
1429 word of which is the request code. */
1430 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001431 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001432 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001433 print_sched_event(tid, msg_buf);
1434 }
1435 /* Do a non-trivial client request for thread tid. tid's
1436 %EAX points to a short vector of argument words, the
1437 first of which is the request code. The result of the
1438 request is put in tid's %EDX. Alternatively, perhaps
1439 the request causes tid to become non-runnable and/or
1440 other blocked threads become runnable. In general we
1441 can and often do mess with the state of arbitrary
1442 threads at this point. */
sewardj54cacf02002-04-12 23:24:59 +00001443 if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
1444 return VgSrc_Shutdown;
1445 } else {
1446 do_nontrivial_clientreq(tid);
1447 }
sewardje663cb92002-04-12 10:26:32 +00001448 break;
1449
1450 default:
1451 VG_(printf)("\ntrc = %d\n", trc);
1452 VG_(panic)("VG_(scheduler), phase 3: "
1453 "unexpected thread return code");
1454 /* NOTREACHED */
1455 break;
1456
1457 } /* switch (trc) */
1458
1459 /* That completes Phase 3 of 3. Return now to the top of the
1460 main scheduler loop, to Phase 1 of 3. */
1461
1462 } /* top-level scheduler loop */
1463
1464
1465 /* NOTREACHED */
1466 VG_(panic)("scheduler: post-main-loop ?!");
1467 /* NOTREACHED */
1468
1469 debug_stop:
1470 /* If we exited because of a debug stop, print the translation
1471 of the last block executed -- by translating it again, and
1472 throwing away the result. */
1473 VG_(printf)(
1474 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj1e8cdc92002-04-18 11:37:52 +00001475 VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001476 VG_(printf)("\n");
1477 VG_(printf)(
1478 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1479
1480 return VgSrc_BbsDone;
1481}
1482
1483
1484/* ---------------------------------------------------------------------
1485 The pthread implementation.
1486 ------------------------------------------------------------------ */
1487
1488#include <pthread.h>
1489#include <errno.h>
1490
1491#if !defined(PTHREAD_STACK_MIN)
1492# define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
1493#endif
1494
1495/* /usr/include/bits/pthreadtypes.h:
1496 typedef unsigned long int pthread_t;
1497*/
1498
sewardje663cb92002-04-12 10:26:32 +00001499
sewardj604ec3c2002-04-18 22:38:41 +00001500/* -----------------------------------------------------------
1501 Thread CREATION, JOINAGE and CANCELLATION.
1502 -------------------------------------------------------- */
1503
sewardje663cb92002-04-12 10:26:32 +00001504static
sewardj853f55d2002-04-26 00:27:53 +00001505void do_pthread_cancel ( ThreadId tid,
sewardje663cb92002-04-12 10:26:32 +00001506 pthread_t tid_cancellee )
1507{
1508 Char msg_buf[100];
sewardj853f55d2002-04-26 00:27:53 +00001509
1510 vg_assert(is_valid_tid(tid));
1511 vg_assert(vg_threads[tid].status != VgTs_Empty);
1512
1513 if (!is_valid_tid(tid_cancellee)
1514 || vg_threads[tid_cancellee].status == VgTs_Empty) {
1515 vg_threads[tid].m_edx = ESRCH;
1516 return;
1517 }
1518
sewardje663cb92002-04-12 10:26:32 +00001519 /* We want make is appear that this thread has returned to
1520 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1521 return value. So: simple: put PTHREAD_CANCELED into %EAX
1522 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001523 if (VG_(clo_trace_sched)) {
sewardj853f55d2002-04-26 00:27:53 +00001524 VG_(sprintf)(msg_buf, "cancelled by %d", tid);
sewardje663cb92002-04-12 10:26:32 +00001525 print_sched_event(tid_cancellee, msg_buf);
1526 }
1527 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001528 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001529 vg_threads[tid_cancellee].status = VgTs_Runnable;
sewardj853f55d2002-04-26 00:27:53 +00001530
1531 /* We return with success (0). */
1532 vg_threads[tid].m_edx = 0;
sewardje663cb92002-04-12 10:26:32 +00001533}
1534
1535
sewardj3b5d8862002-04-20 13:53:23 +00001536static
1537void do_pthread_exit ( ThreadId tid, void* retval )
1538{
1539 Char msg_buf[100];
1540 /* We want make is appear that this thread has returned to
1541 do_pthread_create_bogusRA with retval as the
1542 return value. So: simple: put retval into %EAX
1543 and &do_pthread_create_bogusRA into %EIP and keep going! */
1544 if (VG_(clo_trace_sched)) {
1545 VG_(sprintf)(msg_buf, "exiting with %p", retval);
1546 print_sched_event(tid, msg_buf);
1547 }
1548 vg_threads[tid].m_eax = (UInt)retval;
1549 vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1550 vg_threads[tid].status = VgTs_Runnable;
1551}
1552
sewardje663cb92002-04-12 10:26:32 +00001553
1554/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001555 created with. Or possibly due to pthread_exit or cancellation.
1556 The main complication here is to resume any thread waiting to join
1557 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001558static
sewardjbc5b99f2002-04-13 00:08:51 +00001559void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001560{
1561 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1562 UInt* jnr_args;
1563 void** jnr_thread_return;
1564 Char msg_buf[100];
1565
1566 /* Mark it as not in use. Leave the stack in place so the next
1567 user of this slot doesn't reallocate it. */
sewardj6072c362002-04-19 14:40:57 +00001568 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001569 vg_assert(vg_threads[tid].status != VgTs_Empty);
1570
sewardjbc5b99f2002-04-13 00:08:51 +00001571 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001572
1573 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1574 /* No one has yet done a join on me */
1575 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001576 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001577 VG_(sprintf)(msg_buf,
1578 "root fn returns, waiting for a call pthread_join(%d)",
1579 tid);
1580 print_sched_event(tid, msg_buf);
1581 }
1582 } else {
1583 /* Some is waiting; make their join call return with success,
1584 putting my exit code in the place specified by the caller's
1585 thread_return param. This is all very horrible, since we
1586 need to consult the joiner's arg block -- pointed to by its
1587 %EAX -- in order to extract the 2nd param of its pthread_join
1588 call. TODO: free properly the slot (also below).
1589 */
1590 jnr = vg_threads[tid].joiner;
sewardj6072c362002-04-19 14:40:57 +00001591 vg_assert(is_valid_tid(jnr));
sewardje663cb92002-04-12 10:26:32 +00001592 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1593 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1594 jnr_thread_return = (void**)(jnr_args[2]);
1595 if (jnr_thread_return != NULL)
1596 *jnr_thread_return = vg_threads[tid].retval;
1597 vg_threads[jnr].m_edx = 0; /* success */
1598 vg_threads[jnr].status = VgTs_Runnable;
1599 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001600 if (VG_(clo_instrument) && tid != 0)
1601 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1602 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001603 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001604 VG_(sprintf)(msg_buf,
1605 "root fn returns, to find a waiting pthread_join(%d)", tid);
1606 print_sched_event(tid, msg_buf);
1607 VG_(sprintf)(msg_buf,
1608 "my pthread_join(%d) returned; resuming", tid);
1609 print_sched_event(jnr, msg_buf);
1610 }
1611 }
1612
1613 /* Return value is irrelevant; this thread will not get
1614 rescheduled. */
1615}
1616
1617
1618static
1619void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1620{
1621 Char msg_buf[100];
1622
1623 /* jee, the joinee, is the thread specified as an arg in thread
1624 tid's call to pthread_join. So tid is the join-er. */
sewardj6072c362002-04-19 14:40:57 +00001625 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001626 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1627
1628 if (jee == tid) {
1629 vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */
1630 vg_threads[tid].status = VgTs_Runnable;
1631 return;
1632 }
1633
1634 if (jee < 0
1635 || jee >= VG_N_THREADS
1636 || vg_threads[jee].status == VgTs_Empty) {
1637 /* Invalid thread to join to. */
1638 vg_threads[tid].m_edx = EINVAL;
1639 vg_threads[tid].status = VgTs_Runnable;
1640 return;
1641 }
1642
1643 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1644 /* Someone already did join on this thread */
1645 vg_threads[tid].m_edx = EINVAL;
1646 vg_threads[tid].status = VgTs_Runnable;
1647 return;
1648 }
1649
1650 /* if (vg_threads[jee].detached) ... */
1651
1652 /* Perhaps the joinee has already finished? If so return
1653 immediately with its return code, and free up the slot. TODO:
1654 free it properly (also above). */
1655 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1656 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
1657 vg_threads[tid].m_edx = 0; /* success */
1658 if (thread_return != NULL)
1659 *thread_return = vg_threads[jee].retval;
1660 vg_threads[tid].status = VgTs_Runnable;
1661 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001662 if (VG_(clo_instrument) && jee != 0)
1663 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1664 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001665 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001666 VG_(sprintf)(msg_buf,
1667 "someone called pthread_join() on me; bye!");
1668 print_sched_event(jee, msg_buf);
1669 VG_(sprintf)(msg_buf,
1670 "my pthread_join(%d) returned immediately",
1671 jee );
1672 print_sched_event(tid, msg_buf);
1673 }
1674 return;
1675 }
1676
1677 /* Ok, so we'll have to wait on jee. */
1678 vg_threads[jee].joiner = tid;
1679 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001680 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001681 VG_(sprintf)(msg_buf,
1682 "blocking on call of pthread_join(%d)", jee );
1683 print_sched_event(tid, msg_buf);
1684 }
1685 /* So tid's join call does not return just now. */
1686}
1687
1688
1689static
1690void do_pthread_create ( ThreadId parent_tid,
1691 pthread_t* thread,
1692 pthread_attr_t* attr,
1693 void* (*start_routine)(void *),
1694 void* arg )
1695{
sewardj5f07b662002-04-23 16:52:51 +00001696 Int i;
sewardje663cb92002-04-12 10:26:32 +00001697 Addr new_stack;
1698 UInt new_stk_szb;
1699 ThreadId tid;
1700 Char msg_buf[100];
1701
1702 /* Paranoia ... */
1703 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1704
1705 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1706
sewardj1e8cdc92002-04-18 11:37:52 +00001707 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001708
1709 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001710 vg_assert(tid != 1);
1711 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001712
1713 /* Copy the parent's CPU state into the child's, in a roundabout
1714 way (via baseBlock). */
1715 VG_(load_thread_state)(parent_tid);
1716 VG_(save_thread_state)(tid);
1717
1718 /* Consider allocating the child a stack, if the one it already has
1719 is inadequate. */
1720 new_stk_szb = PTHREAD_STACK_MIN;
1721
1722 if (new_stk_szb > vg_threads[tid].stack_size) {
1723 /* Again, for good measure :) We definitely don't want to be
1724 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001725 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001726 /* for now, we don't handle the case of anything other than
1727 assigning it for the first time. */
1728 vg_assert(vg_threads[tid].stack_size == 0);
1729 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1730 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1731 vg_threads[tid].stack_base = new_stack;
1732 vg_threads[tid].stack_size = new_stk_szb;
sewardj1e8cdc92002-04-18 11:37:52 +00001733 vg_threads[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001734 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001735 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001736 }
sewardj1e8cdc92002-04-18 11:37:52 +00001737
1738 vg_threads[tid].m_esp
1739 = vg_threads[tid].stack_base
1740 + vg_threads[tid].stack_size
1741 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1742
sewardje663cb92002-04-12 10:26:32 +00001743 if (VG_(clo_instrument))
1744 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1745 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1746
1747 /* push arg */
1748 vg_threads[tid].m_esp -= 4;
1749 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1750
1751 /* push (magical) return address */
1752 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001753 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001754
1755 if (VG_(clo_instrument))
1756 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1757
1758 /* this is where we start */
1759 vg_threads[tid].m_eip = (UInt)start_routine;
1760
sewardj8937c812002-04-12 20:12:20 +00001761 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001762 VG_(sprintf)(msg_buf,
1763 "new thread, created by %d", parent_tid );
1764 print_sched_event(tid, msg_buf);
1765 }
1766
1767 /* store the thread id in *thread. */
1768 // if (VG_(clo_instrument))
1769 // ***** CHECK *thread is writable
1770 *thread = (pthread_t)tid;
1771
sewardj3b5d8862002-04-20 13:53:23 +00001772 vg_threads[tid].associated_mx = NULL;
1773 vg_threads[tid].associated_cv = NULL;
1774 vg_threads[tid].joiner = VG_INVALID_THREADID;
1775 vg_threads[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001776
sewardj5f07b662002-04-23 16:52:51 +00001777 for (i = 0; i < VG_N_THREAD_KEYS; i++)
1778 vg_threads[tid].specifics[i] = NULL;
1779
sewardj604ec3c2002-04-18 22:38:41 +00001780 /* return zero */
sewardje663cb92002-04-12 10:26:32 +00001781 vg_threads[tid].m_edx = 0; /* success */
1782}
1783
1784
sewardj604ec3c2002-04-18 22:38:41 +00001785/* -----------------------------------------------------------
1786 MUTEXes
1787 -------------------------------------------------------- */
1788
sewardj604ec3c2002-04-18 22:38:41 +00001789/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001790 typedef struct
1791 {
1792 int __m_reserved; -- Reserved for future use
1793 int __m_count; -- Depth of recursive locking
1794 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1795 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1796 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1797 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001798
sewardj6072c362002-04-19 14:40:57 +00001799 #define PTHREAD_MUTEX_INITIALIZER \
1800 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
1801 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
1802 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
1803 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
1804 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
1805 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
1806 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00001807
sewardj6072c362002-04-19 14:40:57 +00001808 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00001809
sewardj6072c362002-04-19 14:40:57 +00001810 __m_kind never changes and indicates whether or not it is recursive.
1811
1812 __m_count indicates the lock count; if 0, the mutex is not owned by
1813 anybody.
1814
1815 __m_owner has a ThreadId value stuffed into it. We carefully arrange
1816 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
1817 statically initialised mutexes correctly appear
1818 to belong to nobody.
1819
1820 In summary, a not-in-use mutex is distinguised by having __m_owner
1821 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
1822 conditions holds, the other should too.
1823
1824 There is no linked list of threads waiting for this mutex. Instead
1825 a thread in WaitMX state points at the mutex with its waited_on_mx
1826 field. This makes _unlock() inefficient, but simple to implement the
1827 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00001828
sewardj604ec3c2002-04-18 22:38:41 +00001829 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00001830 deals with that for us.
1831*/
sewardje663cb92002-04-12 10:26:32 +00001832
sewardj3b5d8862002-04-20 13:53:23 +00001833/* Helper fns ... */
1834static
1835void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
1836 Char* caller )
1837{
1838 Int i;
1839 Char msg_buf[100];
1840
1841 /* Find some arbitrary thread waiting on this mutex, and make it
1842 runnable. If none are waiting, mark the mutex as not held. */
1843 for (i = 1; i < VG_N_THREADS; i++) {
1844 if (vg_threads[i].status == VgTs_Empty)
1845 continue;
1846 if (vg_threads[i].status == VgTs_WaitMX
1847 && vg_threads[i].associated_mx == mutex)
1848 break;
1849 }
1850
1851 vg_assert(i <= VG_N_THREADS);
1852 if (i == VG_N_THREADS) {
1853 /* Nobody else is waiting on it. */
1854 mutex->__m_count = 0;
1855 mutex->__m_owner = VG_INVALID_THREADID;
1856 } else {
1857 /* Notionally transfer the hold to thread i, whose
1858 pthread_mutex_lock() call now returns with 0 (success). */
1859 /* The .count is already == 1. */
1860 vg_assert(vg_threads[i].associated_mx == mutex);
1861 mutex->__m_owner = (_pthread_descr)i;
1862 vg_threads[i].status = VgTs_Runnable;
1863 vg_threads[i].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00001864 /* m_edx already holds pth_mx_lock() success (0) */
sewardj3b5d8862002-04-20 13:53:23 +00001865
1866 if (VG_(clo_trace_pthread_level) >= 1) {
1867 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
1868 caller, mutex );
1869 print_pthread_event(i, msg_buf);
1870 }
1871 }
1872}
1873
sewardje663cb92002-04-12 10:26:32 +00001874
1875static
sewardj30671ff2002-04-21 00:13:57 +00001876void do_pthread_mutex_lock( ThreadId tid,
1877 Bool is_trylock,
sewardjd7fd4d22002-04-24 01:57:27 +00001878 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00001879{
sewardj30671ff2002-04-21 00:13:57 +00001880 Char msg_buf[100];
1881 Char* caller
1882 = is_trylock ? "pthread_mutex_lock "
1883 : "pthread_mutex_trylock";
sewardje663cb92002-04-12 10:26:32 +00001884
sewardjd7fd4d22002-04-24 01:57:27 +00001885 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
1886
sewardj604ec3c2002-04-18 22:38:41 +00001887 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj30671ff2002-04-21 00:13:57 +00001888 VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex );
sewardj604ec3c2002-04-18 22:38:41 +00001889 print_pthread_event(tid, msg_buf);
1890 }
1891
1892 /* Paranoia ... */
1893 vg_assert(is_valid_tid(tid)
1894 && vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001895
1896 /* POSIX doesn't mandate this, but for sanity ... */
1897 if (mutex == NULL) {
1898 vg_threads[tid].m_edx = EINVAL;
1899 return;
1900 }
1901
sewardj604ec3c2002-04-18 22:38:41 +00001902 /* More paranoia ... */
1903 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001904# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001905 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001906 case PTHREAD_MUTEX_ADAPTIVE_NP:
1907# endif
sewardj604ec3c2002-04-18 22:38:41 +00001908 case PTHREAD_MUTEX_RECURSIVE_NP:
1909 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001910 if (mutex->__m_count >= 0) break;
1911 /* else fall thru */
1912 default:
1913 vg_threads[tid].m_edx = EINVAL;
1914 return;
sewardje663cb92002-04-12 10:26:32 +00001915 }
1916
sewardj604ec3c2002-04-18 22:38:41 +00001917 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001918
sewardj604ec3c2002-04-18 22:38:41 +00001919 vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001920
1921 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001922 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001923 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001924 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001925 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001926 mutex->__m_count++;
sewardjf8f819e2002-04-17 23:21:37 +00001927 vg_threads[tid].m_edx = 0;
sewardj853f55d2002-04-26 00:27:53 +00001928 if (0)
1929 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
1930 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001931 return;
1932 } else {
sewardj30671ff2002-04-21 00:13:57 +00001933 if (is_trylock)
1934 vg_threads[tid].m_edx = EBUSY;
1935 else
1936 vg_threads[tid].m_edx = EDEADLK;
sewardjf8f819e2002-04-17 23:21:37 +00001937 return;
1938 }
1939 } else {
sewardj6072c362002-04-19 14:40:57 +00001940 /* Someone else has it; we have to wait. Mark ourselves
1941 thusly. */
sewardj05553872002-04-20 20:53:17 +00001942 /* GUARD: __m_count > 0 && __m_owner is valid */
sewardj30671ff2002-04-21 00:13:57 +00001943 if (is_trylock) {
1944 /* caller is polling; so return immediately. */
1945 vg_threads[tid].m_edx = EBUSY;
1946 } else {
1947 vg_threads[tid].status = VgTs_WaitMX;
1948 vg_threads[tid].associated_mx = mutex;
sewardj5f07b662002-04-23 16:52:51 +00001949 vg_threads[tid].m_edx = 0; /* pth_mx_lock success value */
sewardj30671ff2002-04-21 00:13:57 +00001950 if (VG_(clo_trace_pthread_level) >= 1) {
1951 VG_(sprintf)(msg_buf, "%s mx %p: BLOCK",
1952 caller, mutex );
1953 print_pthread_event(tid, msg_buf);
1954 }
1955 }
sewardje663cb92002-04-12 10:26:32 +00001956 return;
1957 }
sewardjf8f819e2002-04-17 23:21:37 +00001958
sewardje663cb92002-04-12 10:26:32 +00001959 } else {
sewardj6072c362002-04-19 14:40:57 +00001960 /* Nobody owns it. Sanity check ... */
1961 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00001962 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00001963 mutex->__m_count = 1;
1964 mutex->__m_owner = (_pthread_descr)tid;
sewardj3b5d8862002-04-20 13:53:23 +00001965 vg_assert(vg_threads[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00001966 /* return 0 (success). */
1967 vg_threads[tid].m_edx = 0;
1968 }
sewardjf8f819e2002-04-17 23:21:37 +00001969
sewardje663cb92002-04-12 10:26:32 +00001970}
1971
1972
1973static
1974void do_pthread_mutex_unlock ( ThreadId tid,
sewardjd7fd4d22002-04-24 01:57:27 +00001975 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00001976{
sewardj3b5d8862002-04-20 13:53:23 +00001977 Char msg_buf[100];
sewardjd7fd4d22002-04-24 01:57:27 +00001978 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
sewardje663cb92002-04-12 10:26:32 +00001979
sewardj45b4b372002-04-16 22:50:32 +00001980 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00001981 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00001982 print_pthread_event(tid, msg_buf);
1983 }
1984
sewardj604ec3c2002-04-18 22:38:41 +00001985 /* Paranoia ... */
1986 vg_assert(is_valid_tid(tid)
1987 && vg_threads[tid].status == VgTs_Runnable);
1988
1989 if (mutex == NULL) {
1990 vg_threads[tid].m_edx = EINVAL;
1991 return;
1992 }
1993
1994 /* More paranoia ... */
1995 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001996# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001997 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001998 case PTHREAD_MUTEX_ADAPTIVE_NP:
1999# endif
sewardj604ec3c2002-04-18 22:38:41 +00002000 case PTHREAD_MUTEX_RECURSIVE_NP:
2001 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00002002 if (mutex->__m_count >= 0) break;
2003 /* else fall thru */
2004 default:
2005 vg_threads[tid].m_edx = EINVAL;
2006 return;
2007 }
sewardje663cb92002-04-12 10:26:32 +00002008
2009 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00002010 if (mutex->__m_count == 0 /* nobody holds it */
2011 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardje663cb92002-04-12 10:26:32 +00002012 vg_threads[tid].m_edx = EPERM;
2013 return;
2014 }
2015
sewardjf8f819e2002-04-17 23:21:37 +00002016 /* If it's a multiply-locked recursive mutex, just decrement the
2017 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00002018 if (mutex->__m_count > 1) {
2019 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
2020 mutex->__m_count --;
sewardjf8f819e2002-04-17 23:21:37 +00002021 vg_threads[tid].m_edx = 0; /* success */
2022 return;
2023 }
2024
sewardj604ec3c2002-04-18 22:38:41 +00002025 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00002026 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00002027 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00002028 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00002029
sewardj3b5d8862002-04-20 13:53:23 +00002030 /* Release at max one thread waiting on this mutex. */
2031 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00002032
sewardj3b5d8862002-04-20 13:53:23 +00002033 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardje663cb92002-04-12 10:26:32 +00002034 vg_threads[tid].m_edx = 0; /* Success. */
2035}
2036
2037
sewardj6072c362002-04-19 14:40:57 +00002038/* -----------------------------------------------------------
2039 CONDITION VARIABLES
2040 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00002041
sewardj6072c362002-04-19 14:40:57 +00002042/* The relevant native types are as follows:
2043 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00002044
sewardj6072c362002-04-19 14:40:57 +00002045 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
2046 typedef struct
2047 {
2048 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
2049 _pthread_descr __c_waiting; -- Threads waiting on this condition
2050 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00002051
sewardj6072c362002-04-19 14:40:57 +00002052 -- Attribute for conditionally variables.
2053 typedef struct
2054 {
2055 int __dummy;
2056 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00002057
sewardj6072c362002-04-19 14:40:57 +00002058 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00002059
sewardj3b5d8862002-04-20 13:53:23 +00002060 We don't use any fields of pthread_cond_t for anything at all.
2061 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00002062
2063 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00002064 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00002065
sewardj77e466c2002-04-14 02:29:29 +00002066
sewardj5f07b662002-04-23 16:52:51 +00002067static
2068void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid )
2069{
2070 Char msg_buf[100];
2071 pthread_mutex_t* mx;
2072 pthread_cond_t* cv;
2073
2074 vg_assert(is_valid_tid(tid)
2075 && vg_threads[tid].status == VgTs_WaitCV
2076 && vg_threads[tid].awaken_at != 0xFFFFFFFF);
2077 mx = vg_threads[tid].associated_mx;
2078 vg_assert(mx != NULL);
2079 cv = vg_threads[tid].associated_cv;
2080 vg_assert(cv != NULL);
2081
2082 if (mx->__m_owner == VG_INVALID_THREADID) {
2083 /* Currently unheld; hand it out to thread tid. */
2084 vg_assert(mx->__m_count == 0);
2085 vg_threads[tid].status = VgTs_Runnable;
2086 vg_threads[tid].m_edx = ETIMEDOUT;
2087 /* pthread_cond_wait return value */
2088 vg_threads[tid].associated_cv = NULL;
2089 vg_threads[tid].associated_mx = NULL;
2090 mx->__m_owner = (_pthread_descr)tid;
2091 mx->__m_count = 1;
2092
2093 if (VG_(clo_trace_pthread_level) >= 1) {
2094 VG_(sprintf)(msg_buf, "pthread_cond_timedwai cv %p: TIMEOUT with mx %p",
2095 cv, mx );
2096 print_pthread_event(tid, msg_buf);
2097 }
2098 } else {
2099 /* Currently held. Make thread tid be blocked on it. */
2100 vg_assert(mx->__m_count > 0);
2101 vg_threads[tid].status = VgTs_WaitMX;
2102 vg_threads[tid].m_edx = ETIMEDOUT;
2103 /* pthread_cond_wait return value */
2104 vg_threads[tid].associated_cv = NULL;
2105 vg_threads[tid].associated_mx = mx;
2106 if (VG_(clo_trace_pthread_level) >= 1) {
2107 VG_(sprintf)(msg_buf,
2108 "pthread_cond_timedwai cv %p: TIMEOUT -> BLOCK for mx %p",
2109 cv, mx );
2110 print_pthread_event(tid, msg_buf);
2111 }
2112
2113 }
2114}
2115
2116
sewardj3b5d8862002-04-20 13:53:23 +00002117static
2118void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
2119 Int n_to_release,
2120 Char* caller )
2121{
2122 Int i;
2123 Char msg_buf[100];
2124 pthread_mutex_t* mx;
2125
2126 while (True) {
2127 if (n_to_release == 0)
2128 return;
2129
2130 /* Find a thread waiting on this CV. */
2131 for (i = 1; i < VG_N_THREADS; i++) {
2132 if (vg_threads[i].status == VgTs_Empty)
2133 continue;
2134 if (vg_threads[i].status == VgTs_WaitCV
2135 && vg_threads[i].associated_cv == cond)
2136 break;
2137 }
2138 vg_assert(i <= VG_N_THREADS);
2139
2140 if (i == VG_N_THREADS) {
2141 /* Nobody else is waiting on it. */
2142 return;
2143 }
2144
2145 mx = vg_threads[i].associated_mx;
2146 vg_assert(mx != NULL);
2147
2148 if (mx->__m_owner == VG_INVALID_THREADID) {
2149 /* Currently unheld; hand it out to thread i. */
2150 vg_assert(mx->__m_count == 0);
2151 vg_threads[i].status = VgTs_Runnable;
2152 vg_threads[i].associated_cv = NULL;
2153 vg_threads[i].associated_mx = NULL;
2154 mx->__m_owner = (_pthread_descr)i;
2155 mx->__m_count = 1;
sewardj5f07b662002-04-23 16:52:51 +00002156 /* .m_edx already holds pth_cond_wait success value (0) */
sewardj3b5d8862002-04-20 13:53:23 +00002157
2158 if (VG_(clo_trace_pthread_level) >= 1) {
2159 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
2160 caller, cond, mx );
2161 print_pthread_event(i, msg_buf);
2162 }
2163
2164 } else {
2165 /* Currently held. Make thread i be blocked on it. */
sewardj5f07b662002-04-23 16:52:51 +00002166 vg_assert(mx->__m_count > 0);
sewardj3b5d8862002-04-20 13:53:23 +00002167 vg_threads[i].status = VgTs_WaitMX;
2168 vg_threads[i].associated_cv = NULL;
2169 vg_threads[i].associated_mx = mx;
sewardj5f07b662002-04-23 16:52:51 +00002170 vg_threads[i].m_edx = 0; /* pth_cond_wait success value */
sewardj3b5d8862002-04-20 13:53:23 +00002171
2172 if (VG_(clo_trace_pthread_level) >= 1) {
2173 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
2174 caller, cond, mx );
2175 print_pthread_event(i, msg_buf);
2176 }
2177
2178 }
2179
2180 n_to_release--;
2181 }
2182}
2183
2184
2185static
2186void do_pthread_cond_wait ( ThreadId tid,
2187 pthread_cond_t *cond,
sewardj5f07b662002-04-23 16:52:51 +00002188 pthread_mutex_t *mutex,
2189 UInt ms_end )
sewardj3b5d8862002-04-20 13:53:23 +00002190{
2191 Char msg_buf[100];
2192
sewardj5f07b662002-04-23 16:52:51 +00002193 /* If ms_end == 0xFFFFFFFF, wait forever (no timeout). Otherwise,
2194 ms_end is the ending millisecond. */
2195
sewardj3b5d8862002-04-20 13:53:23 +00002196 /* pre: mutex should be a valid mutex and owned by tid. */
2197 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj5f07b662002-04-23 16:52:51 +00002198 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p, end %d ...",
2199 cond, mutex, ms_end );
sewardj3b5d8862002-04-20 13:53:23 +00002200 print_pthread_event(tid, msg_buf);
2201 }
2202
2203 /* Paranoia ... */
2204 vg_assert(is_valid_tid(tid)
2205 && vg_threads[tid].status == VgTs_Runnable);
2206
2207 if (mutex == NULL || cond == NULL) {
2208 vg_threads[tid].m_edx = EINVAL;
2209 return;
2210 }
2211
2212 /* More paranoia ... */
2213 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002214# ifndef GLIBC_2_1
sewardj3b5d8862002-04-20 13:53:23 +00002215 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002216 case PTHREAD_MUTEX_ADAPTIVE_NP:
2217# endif
sewardj3b5d8862002-04-20 13:53:23 +00002218 case PTHREAD_MUTEX_RECURSIVE_NP:
2219 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj3b5d8862002-04-20 13:53:23 +00002220 if (mutex->__m_count >= 0) break;
2221 /* else fall thru */
2222 default:
2223 vg_threads[tid].m_edx = EINVAL;
2224 return;
2225 }
2226
2227 /* Barf if we don't currently hold the mutex. */
2228 if (mutex->__m_count == 0 /* nobody holds it */
2229 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
2230 vg_threads[tid].m_edx = EINVAL;
2231 return;
2232 }
2233
2234 /* Queue ourselves on the condition. */
2235 vg_threads[tid].status = VgTs_WaitCV;
2236 vg_threads[tid].associated_cv = cond;
2237 vg_threads[tid].associated_mx = mutex;
sewardj5f07b662002-04-23 16:52:51 +00002238 vg_threads[tid].awaken_at = ms_end;
sewardj3b5d8862002-04-20 13:53:23 +00002239
2240 if (VG_(clo_trace_pthread_level) >= 1) {
2241 VG_(sprintf)(msg_buf,
2242 "pthread_cond_wait cv %p, mx %p: BLOCK",
2243 cond, mutex );
2244 print_pthread_event(tid, msg_buf);
2245 }
2246
2247 /* Release the mutex. */
2248 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
2249}
2250
2251
2252static
2253void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2254 Bool broadcast,
2255 pthread_cond_t *cond )
2256{
2257 Char msg_buf[100];
2258 Char* caller
2259 = broadcast ? "pthread_cond_broadcast"
2260 : "pthread_cond_signal ";
2261
2262 if (VG_(clo_trace_pthread_level) >= 2) {
2263 VG_(sprintf)(msg_buf, "%s cv %p ...",
2264 caller, cond );
2265 print_pthread_event(tid, msg_buf);
2266 }
2267
2268 /* Paranoia ... */
2269 vg_assert(is_valid_tid(tid)
2270 && vg_threads[tid].status == VgTs_Runnable);
2271
2272 if (cond == NULL) {
2273 vg_threads[tid].m_edx = EINVAL;
2274 return;
2275 }
2276
2277 release_N_threads_waiting_on_cond (
2278 cond,
2279 broadcast ? VG_N_THREADS : 1,
2280 caller
2281 );
2282
2283 vg_threads[tid].m_edx = 0; /* success */
2284}
2285
sewardj77e466c2002-04-14 02:29:29 +00002286
sewardj5f07b662002-04-23 16:52:51 +00002287/* -----------------------------------------------------------
2288 THREAD SPECIFIC DATA
2289 -------------------------------------------------------- */
2290
2291static __inline__
2292Bool is_valid_key ( ThreadKey k )
2293{
2294 /* k unsigned; hence no < 0 check */
2295 if (k >= VG_N_THREAD_KEYS) return False;
2296 if (!vg_thread_keys[k].inuse) return False;
2297 return True;
2298}
2299
2300static
2301void do_pthread_key_create ( ThreadId tid,
2302 pthread_key_t* key,
2303 void (*destructor)(void*) )
2304{
2305 Int i;
2306 Char msg_buf[100];
2307
2308 if (VG_(clo_trace_pthread_level) >= 1) {
2309 VG_(sprintf)(msg_buf, "pthread_key_create *key %p, destr %p",
2310 key, destructor );
2311 print_pthread_event(tid, msg_buf);
2312 }
2313
2314 vg_assert(sizeof(pthread_key_t) == sizeof(ThreadKey));
2315 vg_assert(is_valid_tid(tid)
2316 && vg_threads[tid].status == VgTs_Runnable);
2317
2318 for (i = 0; i < VG_N_THREAD_KEYS; i++)
2319 if (!vg_thread_keys[i].inuse)
2320 break;
2321
2322 if (i == VG_N_THREAD_KEYS) {
2323 /* vg_threads[tid].m_edx = EAGAIN;
2324 return;
2325 */
2326 VG_(panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;"
2327 " increase and recompile");
2328 }
2329
2330 vg_thread_keys[i].inuse = True;
2331 /* TODO: check key for addressibility */
2332 *key = i;
2333 vg_threads[tid].m_edx = 0;
2334}
2335
2336
2337static
2338void do_pthread_key_delete ( ThreadId tid, pthread_key_t key )
2339{
2340 Char msg_buf[100];
2341 if (VG_(clo_trace_pthread_level) >= 1) {
2342 VG_(sprintf)(msg_buf, "pthread_key_delete key %d",
2343 key );
2344 print_pthread_event(tid, msg_buf);
2345 }
2346
2347 vg_assert(is_valid_tid(tid)
2348 && vg_threads[tid].status == VgTs_Runnable);
2349
2350 if (!is_valid_key(key)) {
2351 vg_threads[tid].m_edx = EINVAL;
2352 return;
2353 }
2354
2355 vg_thread_keys[key].inuse = False;
2356
2357 /* Optional. We're not required to do this, although it shouldn't
2358 make any difference to programs which use the key/specifics
2359 functions correctly. */
sewardj3b13f0e2002-04-25 20:17:29 +00002360# if 1
sewardj5f07b662002-04-23 16:52:51 +00002361 for (tid = 1; tid < VG_N_THREADS; tid++) {
2362 if (vg_threads[tid].status != VgTs_Empty)
2363 vg_threads[tid].specifics[key] = NULL;
2364 }
sewardj3b13f0e2002-04-25 20:17:29 +00002365# endif
sewardj5f07b662002-04-23 16:52:51 +00002366}
2367
2368
2369static
2370void do_pthread_getspecific ( ThreadId tid, pthread_key_t key )
2371{
2372 Char msg_buf[100];
2373 if (VG_(clo_trace_pthread_level) >= 1) {
2374 VG_(sprintf)(msg_buf, "pthread_getspecific key %d",
2375 key );
2376 print_pthread_event(tid, msg_buf);
2377 }
2378
2379 vg_assert(is_valid_tid(tid)
2380 && vg_threads[tid].status == VgTs_Runnable);
2381
2382 if (!is_valid_key(key)) {
2383 vg_threads[tid].m_edx = (UInt)NULL;
2384 return;
2385 }
2386
2387 vg_threads[tid].m_edx = (UInt)vg_threads[tid].specifics[key];
2388}
2389
2390
2391static
2392void do_pthread_setspecific ( ThreadId tid,
2393 pthread_key_t key,
2394 void *pointer )
2395{
2396 Char msg_buf[100];
2397 if (VG_(clo_trace_pthread_level) >= 1) {
2398 VG_(sprintf)(msg_buf, "pthread_setspecific key %d, ptr %p",
2399 key, pointer );
2400 print_pthread_event(tid, msg_buf);
2401 }
2402
2403 vg_assert(is_valid_tid(tid)
2404 && vg_threads[tid].status == VgTs_Runnable);
2405
2406 if (!is_valid_key(key)) {
2407 vg_threads[tid].m_edx = EINVAL;
2408 return;
2409 }
2410
2411 vg_threads[tid].specifics[key] = pointer;
2412 vg_threads[tid].m_edx = 0;
2413}
2414
2415
sewardje663cb92002-04-12 10:26:32 +00002416/* ---------------------------------------------------------------------
2417 Handle non-trivial client requests.
2418 ------------------------------------------------------------------ */
2419
2420static
2421void do_nontrivial_clientreq ( ThreadId tid )
2422{
2423 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
2424 UInt req_no = arg[0];
2425 switch (req_no) {
2426
2427 case VG_USERREQ__PTHREAD_CREATE:
2428 do_pthread_create( tid,
2429 (pthread_t*)arg[1],
2430 (pthread_attr_t*)arg[2],
2431 (void*(*)(void*))arg[3],
2432 (void*)arg[4] );
2433 break;
2434
sewardjbc5b99f2002-04-13 00:08:51 +00002435 case VG_USERREQ__PTHREAD_RETURNS:
2436 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00002437 break;
2438
2439 case VG_USERREQ__PTHREAD_JOIN:
2440 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2441 break;
2442
sewardje663cb92002-04-12 10:26:32 +00002443 case VG_USERREQ__PTHREAD_CANCEL:
2444 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
2445 break;
2446
sewardj3b5d8862002-04-20 13:53:23 +00002447 case VG_USERREQ__PTHREAD_EXIT:
2448 do_pthread_exit( tid, (void*)(arg[1]) );
2449 break;
2450
2451 case VG_USERREQ__PTHREAD_COND_WAIT:
2452 do_pthread_cond_wait( tid,
2453 (pthread_cond_t *)(arg[1]),
sewardj5f07b662002-04-23 16:52:51 +00002454 (pthread_mutex_t *)(arg[2]),
2455 0xFFFFFFFF /* no timeout */ );
2456 break;
2457
2458 case VG_USERREQ__PTHREAD_COND_TIMEDWAIT:
2459 do_pthread_cond_wait( tid,
2460 (pthread_cond_t *)(arg[1]),
2461 (pthread_mutex_t *)(arg[2]),
2462 arg[3] /* timeout millisecond point */ );
sewardj3b5d8862002-04-20 13:53:23 +00002463 break;
2464
2465 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2466 do_pthread_cond_signal_or_broadcast(
2467 tid,
2468 False, /* signal, not broadcast */
2469 (pthread_cond_t *)(arg[1]) );
2470 break;
2471
2472 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2473 do_pthread_cond_signal_or_broadcast(
2474 tid,
2475 True, /* broadcast, not signal */
2476 (pthread_cond_t *)(arg[1]) );
2477 break;
2478
sewardj5f07b662002-04-23 16:52:51 +00002479 case VG_USERREQ__PTHREAD_KEY_CREATE:
2480 do_pthread_key_create ( tid,
2481 (pthread_key_t*)(arg[1]),
2482 (void(*)(void*))(arg[2]) );
2483 break;
2484
2485 case VG_USERREQ__PTHREAD_KEY_DELETE:
2486 do_pthread_key_delete ( tid,
2487 (pthread_key_t)(arg[1]) );
2488 break;
2489
sewardj5f07b662002-04-23 16:52:51 +00002490 case VG_USERREQ__PTHREAD_SETSPECIFIC:
2491 do_pthread_setspecific ( tid,
2492 (pthread_key_t)(arg[1]),
2493 (void*)(arg[2]) );
2494 break;
2495
sewardje663cb92002-04-12 10:26:32 +00002496 case VG_USERREQ__MAKE_NOACCESS:
2497 case VG_USERREQ__MAKE_WRITABLE:
2498 case VG_USERREQ__MAKE_READABLE:
2499 case VG_USERREQ__DISCARD:
2500 case VG_USERREQ__CHECK_WRITABLE:
2501 case VG_USERREQ__CHECK_READABLE:
2502 case VG_USERREQ__MAKE_NOACCESS_STACK:
2503 case VG_USERREQ__RUNNING_ON_VALGRIND:
2504 case VG_USERREQ__DO_LEAK_CHECK:
sewardj8c824512002-04-14 04:16:48 +00002505 vg_threads[tid].m_edx
2506 = VG_(handle_client_request) ( &vg_threads[tid], arg );
sewardje663cb92002-04-12 10:26:32 +00002507 break;
2508
sewardj77e466c2002-04-14 02:29:29 +00002509 case VG_USERREQ__SIGNAL_RETURNS:
2510 handle_signal_return(tid);
2511 break;
sewardj54cacf02002-04-12 23:24:59 +00002512
sewardje663cb92002-04-12 10:26:32 +00002513 default:
2514 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2515 VG_(panic)("handle_private_client_pthread_request: "
2516 "unknown request");
2517 /*NOTREACHED*/
2518 break;
2519 }
2520}
2521
2522
sewardj6072c362002-04-19 14:40:57 +00002523/* ---------------------------------------------------------------------
2524 Sanity checking.
2525 ------------------------------------------------------------------ */
2526
2527/* Internal consistency checks on the sched/pthread structures. */
2528static
2529void scheduler_sanity ( void )
2530{
sewardj3b5d8862002-04-20 13:53:23 +00002531 pthread_mutex_t* mx;
2532 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002533 Int i;
sewardj5f07b662002-04-23 16:52:51 +00002534
sewardj6072c362002-04-19 14:40:57 +00002535 /* VG_(printf)("scheduler_sanity\n"); */
2536 for (i = 1; i < VG_N_THREADS; i++) {
sewardj3b5d8862002-04-20 13:53:23 +00002537 mx = vg_threads[i].associated_mx;
2538 cv = vg_threads[i].associated_cv;
sewardj6072c362002-04-19 14:40:57 +00002539 if (vg_threads[i].status == VgTs_WaitMX) {
sewardj05553872002-04-20 20:53:17 +00002540 /* If we're waiting on a MX: (1) the mx is not null, (2, 3)
2541 it's actually held by someone, since otherwise this thread
2542 is deadlocked, (4) the mutex's owner is not us, since
2543 otherwise this thread is also deadlocked. The logic in
2544 do_pthread_mutex_lock rejects attempts by a thread to lock
2545 a (non-recursive) mutex which it already owns.
2546
2547 (2) has been seen to fail sometimes. I don't know why.
2548 Possibly to do with signals. */
sewardj3b5d8862002-04-20 13:53:23 +00002549 vg_assert(cv == NULL);
sewardj05553872002-04-20 20:53:17 +00002550 /* 1 */ vg_assert(mx != NULL);
2551 /* 2 */ vg_assert(mx->__m_count > 0);
2552 /* 3 */ vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
2553 /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner);
sewardj3b5d8862002-04-20 13:53:23 +00002554 } else
2555 if (vg_threads[i].status == VgTs_WaitCV) {
2556 vg_assert(cv != NULL);
2557 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002558 } else {
sewardj05553872002-04-20 20:53:17 +00002559 /* Unfortunately these don't hold true when a sighandler is
2560 running. To be fixed. */
2561 /* vg_assert(cv == NULL); */
2562 /* vg_assert(mx == NULL); */
sewardj6072c362002-04-19 14:40:57 +00002563 }
2564 }
sewardj5f07b662002-04-23 16:52:51 +00002565
2566 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
2567 if (!vg_thread_keys[i].inuse)
2568 vg_assert(vg_thread_keys[i].destructor == NULL);
2569 }
sewardj6072c362002-04-19 14:40:57 +00002570}
2571
2572
sewardje663cb92002-04-12 10:26:32 +00002573/*--------------------------------------------------------------------*/
2574/*--- end vg_scheduler.c ---*/
2575/*--------------------------------------------------------------------*/