blob: ea24997adb56c35b0a2b9bf7755b98eb976aa83c [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
33
34#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
35 VG_USERREQ__DO_LEAK_CHECK */
36
sewardj77e466c2002-04-14 02:29:29 +000037/* BORKAGE/ISSUES as of 14 Apr 02
sewardje663cb92002-04-12 10:26:32 +000038
sewardj77e466c2002-04-14 02:29:29 +000039Note! This pthreads implementation is so poor as to not be
40suitable for use by anyone at all!
sewardje663cb92002-04-12 10:26:32 +000041
sewardj77e466c2002-04-14 02:29:29 +000042- Currently, when a signal is run, just the ThreadStatus.status fields
43 are saved in the signal frame, along with the CPU state. Question:
44 should I also save and restore:
45 ThreadStatus.joiner
46 ThreadStatus.waited_on_mid
47 ThreadStatus.awaken_at
48 ThreadStatus.retval
49 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000050
sewardj77e466c2002-04-14 02:29:29 +000051- Signals interrupting read/write and nanosleep: SA_RESTART settings.
52 Read/write correctly return with EINTR when SA_RESTART isn't
53 specified and they are interrupted by a signal. nanosleep just
54 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000055
sewardj75fe1892002-04-14 02:46:33 +000056- Read/write syscall starts: don't crap out when the initial
57 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000058
sewardj9a199dc2002-04-14 13:01:38 +000059- Get rid of restrictions re use of sigaltstack; they are no longer
60 needed.
61
sewardj6072c362002-04-19 14:40:57 +000062- Fix signals properly, so that each thread has its own blocking mask.
63 Currently this isn't done, and (worse?) signals are delivered to
64 Thread 1 (the root thread) regardless.
65
66 So, what's the deal with signals and mutexes? If a thread is
67 blocked on a mutex, or for a condition variable for that matter, can
68 signals still be delivered to it? This has serious consequences --
69 deadlocks, etc.
70
sewardje462e202002-04-13 04:09:07 +000071*/
sewardje663cb92002-04-12 10:26:32 +000072
73
74/* ---------------------------------------------------------------------
75 Types and globals for the scheduler.
76 ------------------------------------------------------------------ */
77
78/* type ThreadId is defined in vg_include.h. */
79
80/* struct ThreadState is defined in vg_include.h. */
81
sewardj6072c362002-04-19 14:40:57 +000082/* Private globals. A statically allocated array of threads. NOTE:
83 [0] is never used, to simplify the simulation of initialisers for
84 LinuxThreads. */
sewardje663cb92002-04-12 10:26:32 +000085static ThreadState vg_threads[VG_N_THREADS];
86
sewardj1e8cdc92002-04-18 11:37:52 +000087/* The tid of the thread currently in VG_(baseBlock). */
88static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
89
sewardje663cb92002-04-12 10:26:32 +000090
91/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
92jmp_buf VG_(scheduler_jmpbuf);
93/* ... and if so, here's the signal which caused it to do so. */
94Int VG_(longjmpd_on_signal);
95
96
97/* Machinery to keep track of which threads are waiting on which
98 fds. */
99typedef
100 struct {
101 /* The thread which made the request. */
102 ThreadId tid;
103
104 /* The next two fields describe the request. */
105 /* File descriptor waited for. -1 means this slot is not in use */
106 Int fd;
107 /* The syscall number the fd is used in. */
108 Int syscall_no;
109
110 /* False => still waiting for select to tell us the fd is ready
111 to go. True => the fd is ready, but the results have not yet
112 been delivered back to the calling thread. Once the latter
113 happens, this entire record is marked as no longer in use, by
114 making the fd field be -1. */
115 Bool ready;
116 }
117 VgWaitedOnFd;
118
119static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
120
121
sewardj5f07b662002-04-23 16:52:51 +0000122/* Keeping track of keys. */
123typedef
124 struct {
125 /* Has this key been allocated ? */
126 Bool inuse;
127 /* If .inuse==True, records the address of the associated
128 destructor, or NULL if none. */
129 void (*destructor)(void*);
130 }
131 ThreadKeyState;
132
133/* And our array of thread keys. */
134static ThreadKeyState vg_thread_keys[VG_N_THREAD_KEYS];
135
136typedef UInt ThreadKey;
137
138
sewardje663cb92002-04-12 10:26:32 +0000139/* Forwards */
sewardj5f07b662002-04-23 16:52:51 +0000140static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
141
sewardje663cb92002-04-12 10:26:32 +0000142static void do_nontrivial_clientreq ( ThreadId tid );
143
sewardj6072c362002-04-19 14:40:57 +0000144static void scheduler_sanity ( void );
145
sewardjd7fd4d22002-04-24 01:57:27 +0000146static void do_pthread_mutex_unlock ( ThreadId,
147 void* /* pthread_cond_t* */ );
148static void do_pthread_mutex_lock ( ThreadId, Bool,
149 void* /* pthread_cond_t* */ );
150
sewardj51c0aaf2002-04-25 01:32:10 +0000151static void do_pthread_getspecific ( ThreadId,
152 UInt /* pthread_key_t */ );
153
sewardje663cb92002-04-12 10:26:32 +0000154
155/* ---------------------------------------------------------------------
156 Helper functions for the scheduler.
157 ------------------------------------------------------------------ */
158
sewardj604ec3c2002-04-18 22:38:41 +0000159static __inline__
160Bool is_valid_tid ( ThreadId tid )
161{
162 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000163 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000164 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000165 return True;
166}
167
168
sewardj1e8cdc92002-04-18 11:37:52 +0000169/* For constructing error messages only: try and identify a thread
170 whose stack this address currently falls within, or return
171 VG_INVALID_THREADID if it doesn't. A small complication is dealing
172 with any currently VG_(baseBlock)-resident thread.
173*/
174ThreadId VG_(identify_stack_addr)( Addr a )
175{
176 ThreadId tid, tid_to_skip;
177
178 tid_to_skip = VG_INVALID_THREADID;
179
180 /* First check to see if there's a currently-loaded thread in
181 VG_(baseBlock). */
182 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
183 tid = vg_tid_currently_in_baseBlock;
184 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
185 && a <= vg_threads[tid].stack_highest_word)
186 return tid;
187 else
188 tid_to_skip = tid;
189 }
190
sewardj6072c362002-04-19 14:40:57 +0000191 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj1e8cdc92002-04-18 11:37:52 +0000192 if (vg_threads[tid].status == VgTs_Empty) continue;
193 if (tid == tid_to_skip) continue;
194 if (vg_threads[tid].m_esp <= a
195 && a <= vg_threads[tid].stack_highest_word)
196 return tid;
197 }
198 return VG_INVALID_THREADID;
199}
200
201
sewardj15a43e12002-04-17 19:35:12 +0000202/* Print the scheduler status. */
203void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000204{
205 Int i;
206 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000207 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000208 if (vg_threads[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000209 VG_(printf)("\nThread %d: status = ", i);
sewardje663cb92002-04-12 10:26:32 +0000210 switch (vg_threads[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000211 case VgTs_Runnable: VG_(printf)("Runnable"); break;
212 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
213 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)",
sewardje663cb92002-04-12 10:26:32 +0000214 vg_threads[i].joiner); break;
sewardj6072c362002-04-19 14:40:57 +0000215 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break;
216 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
217 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000218 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardje663cb92002-04-12 10:26:32 +0000219 default: VG_(printf)("???"); break;
220 }
sewardj3b5d8862002-04-20 13:53:23 +0000221 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
222 vg_threads[i].associated_mx,
223 vg_threads[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000224 VG_(pp_ExeContext)(
225 VG_(get_ExeContext)( False, vg_threads[i].m_eip,
226 vg_threads[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000227 }
228 VG_(printf)("\n");
229}
230
231static
232void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
233{
234 Int i;
235
236 vg_assert(fd != -1); /* avoid total chaos */
237
238 for (i = 0; i < VG_N_WAITING_FDS; i++)
239 if (vg_waiting_fds[i].fd == -1)
240 break;
241
242 if (i == VG_N_WAITING_FDS)
243 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
244 /*
245 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
246 tid, fd, i);
247 */
248 vg_waiting_fds[i].fd = fd;
249 vg_waiting_fds[i].tid = tid;
250 vg_waiting_fds[i].ready = False;
251 vg_waiting_fds[i].syscall_no = syscall_no;
252}
253
254
255
256static
257void print_sched_event ( ThreadId tid, Char* what )
258{
sewardj45b4b372002-04-16 22:50:32 +0000259 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000260}
261
262
263static
264void print_pthread_event ( ThreadId tid, Char* what )
265{
266 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000267}
268
269
270static
271Char* name_of_sched_event ( UInt event )
272{
273 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000274 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
275 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
276 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
277 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
278 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
279 default: return "??UNKNOWN??";
280 }
281}
282
283
284/* Create a translation of the client basic block beginning at
285 orig_addr, and add it to the translation cache & translation table.
286 This probably doesn't really belong here, but, hey ...
287*/
sewardj1e8cdc92002-04-18 11:37:52 +0000288static
289void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000290{
291 Addr trans_addr;
292 TTEntry tte;
293 Int orig_size, trans_size;
294 /* Ensure there is space to hold a translation. */
295 VG_(maybe_do_lru_pass)();
sewardj1e8cdc92002-04-18 11:37:52 +0000296 VG_(translate)( &vg_threads[tid],
297 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000298 /* Copy data at trans_addr into the translation cache.
299 Returned pointer is to the code, not to the 4-byte
300 header. */
301 /* Since the .orig_size and .trans_size fields are
302 UShort, be paranoid. */
303 vg_assert(orig_size > 0 && orig_size < 65536);
304 vg_assert(trans_size > 0 && trans_size < 65536);
305 tte.orig_size = orig_size;
306 tte.orig_addr = orig_addr;
307 tte.trans_size = trans_size;
308 tte.trans_addr = VG_(copy_to_transcache)
309 ( trans_addr, trans_size );
310 tte.mru_epoch = VG_(current_epoch);
311 /* Free the intermediary -- was allocated by VG_(emit_code). */
312 VG_(jitfree)( (void*)trans_addr );
313 /* Add to trans tab and set back pointer. */
314 VG_(add_to_trans_tab) ( &tte );
315 /* Update stats. */
316 VG_(this_epoch_in_count) ++;
317 VG_(this_epoch_in_osize) += orig_size;
318 VG_(this_epoch_in_tsize) += trans_size;
319 VG_(overall_in_count) ++;
320 VG_(overall_in_osize) += orig_size;
321 VG_(overall_in_tsize) += trans_size;
322 /* Record translated area for SMC detection. */
323 VG_(smc_mark_original) ( orig_addr, orig_size );
324}
325
326
327/* Allocate a completely empty ThreadState record. */
328static
329ThreadId vg_alloc_ThreadState ( void )
330{
331 Int i;
sewardj6072c362002-04-19 14:40:57 +0000332 for (i = 1; i < VG_N_THREADS; i++) {
sewardje663cb92002-04-12 10:26:32 +0000333 if (vg_threads[i].status == VgTs_Empty)
334 return i;
335 }
336 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
337 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
338 VG_(panic)("VG_N_THREADS is too low");
339 /*NOTREACHED*/
340}
341
342
343ThreadState* VG_(get_thread_state) ( ThreadId tid )
344{
sewardj6072c362002-04-19 14:40:57 +0000345 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000346 vg_assert(vg_threads[tid].status != VgTs_Empty);
347 return & vg_threads[tid];
348}
349
350
sewardj1e8cdc92002-04-18 11:37:52 +0000351ThreadState* VG_(get_current_thread_state) ( void )
352{
353 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
sewardj6072c362002-04-19 14:40:57 +0000354 return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock );
sewardj1e8cdc92002-04-18 11:37:52 +0000355}
356
357
358ThreadId VG_(get_current_tid) ( void )
359{
360 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
361 return vg_tid_currently_in_baseBlock;
362}
363
364
sewardje663cb92002-04-12 10:26:32 +0000365/* Copy the saved state of a thread into VG_(baseBlock), ready for it
366 to be run. */
367__inline__
368void VG_(load_thread_state) ( ThreadId tid )
369{
370 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000371 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
372
sewardje663cb92002-04-12 10:26:32 +0000373 VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax;
374 VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx;
375 VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx;
376 VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx;
377 VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi;
378 VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi;
379 VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp;
380 VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp;
381 VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags;
382 VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip;
383
384 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
385 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i];
386
387 VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax;
388 VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx;
389 VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx;
390 VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx;
391 VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi;
392 VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi;
393 VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp;
394 VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp;
395 VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000396
397 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000398}
399
400
401/* Copy the state of a thread from VG_(baseBlock), presumably after it
402 has been descheduled. For sanity-check purposes, fill the vacated
403 VG_(baseBlock) with garbage so as to make the system more likely to
404 fail quickly if we erroneously continue to poke around inside
405 VG_(baseBlock) without first doing a load_thread_state().
406*/
407__inline__
408void VG_(save_thread_state) ( ThreadId tid )
409{
410 Int i;
411 const UInt junk = 0xDEADBEEF;
412
sewardj1e8cdc92002-04-18 11:37:52 +0000413 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
414
sewardje663cb92002-04-12 10:26:32 +0000415 vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
416 vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
417 vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
418 vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
419 vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
420 vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
421 vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
422 vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
423 vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
424 vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
425
426 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
427 vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
428
429 vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
430 vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
431 vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
432 vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
433 vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
434 vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
435 vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
436 vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
437 vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
438
439 /* Fill it up with junk. */
440 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
441 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
442 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
443 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
444 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
445 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
446 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
447 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
448 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
449 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
450
451 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
452 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000453
454 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000455}
456
457
458/* Run the thread tid for a while, and return a VG_TRC_* value to the
459 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000460static
sewardje663cb92002-04-12 10:26:32 +0000461UInt run_thread_for_a_while ( ThreadId tid )
462{
sewardj7ccc5c22002-04-24 21:39:11 +0000463 volatile UInt trc = 0;
sewardj6072c362002-04-19 14:40:57 +0000464 vg_assert(is_valid_tid(tid));
465 vg_assert(vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000466 vg_assert(VG_(bbs_to_go) > 0);
467
468 VG_(load_thread_state) ( tid );
469 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
470 /* try this ... */
471 trc = VG_(run_innerloop)();
472 /* We get here if the client didn't take a fault. */
473 } else {
474 /* We get here if the client took a fault, which caused our
475 signal handler to longjmp. */
476 vg_assert(trc == 0);
477 trc = VG_TRC_UNRESUMABLE_SIGNAL;
478 }
479 VG_(save_thread_state) ( tid );
480 return trc;
481}
482
483
484/* Increment the LRU epoch counter. */
485static
486void increment_epoch ( void )
487{
488 VG_(current_epoch)++;
489 if (VG_(clo_verbosity) > 2) {
490 UInt tt_used, tc_used;
491 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
492 VG_(message)(Vg_UserMsg,
493 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
494 VG_(bbs_done),
495 VG_(this_epoch_in_count),
496 VG_(this_epoch_in_osize),
497 VG_(this_epoch_in_tsize),
498 VG_(this_epoch_out_count),
499 VG_(this_epoch_out_osize),
500 VG_(this_epoch_out_tsize),
501 tt_used, tc_used
502 );
503 }
504 VG_(this_epoch_in_count) = 0;
505 VG_(this_epoch_in_osize) = 0;
506 VG_(this_epoch_in_tsize) = 0;
507 VG_(this_epoch_out_count) = 0;
508 VG_(this_epoch_out_osize) = 0;
509 VG_(this_epoch_out_tsize) = 0;
510}
511
512
513/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000514 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000515 caller takes care to park the client's state is parked in
516 VG_(baseBlock).
517*/
518void VG_(scheduler_init) ( void )
519{
520 Int i;
521 Addr startup_esp;
522 ThreadId tid_main;
523
524 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
525 if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) {
sewardj9a199dc2002-04-14 13:01:38 +0000526 VG_(printf)("%%esp at startup = %p is not near %p; aborting\n",
527 (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK);
sewardje663cb92002-04-12 10:26:32 +0000528 VG_(panic)("unexpected %esp at startup");
529 }
530
sewardj6072c362002-04-19 14:40:57 +0000531 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
532 vg_threads[i].status = VgTs_Empty;
sewardje663cb92002-04-12 10:26:32 +0000533 vg_threads[i].stack_size = 0;
534 vg_threads[i].stack_base = (Addr)NULL;
sewardj1e8cdc92002-04-18 11:37:52 +0000535 vg_threads[i].tid = i;
sewardje663cb92002-04-12 10:26:32 +0000536 }
537
538 for (i = 0; i < VG_N_WAITING_FDS; i++)
539 vg_waiting_fds[i].fd = -1; /* not in use */
540
sewardj5f07b662002-04-23 16:52:51 +0000541 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
542 vg_thread_keys[i].inuse = False;
543 vg_thread_keys[i].destructor = NULL;
544 }
545
sewardje663cb92002-04-12 10:26:32 +0000546 /* Assert this is thread zero, which has certain magic
547 properties. */
548 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000549 vg_assert(tid_main == 1);
sewardje663cb92002-04-12 10:26:32 +0000550
sewardj3b5d8862002-04-20 13:53:23 +0000551 vg_threads[tid_main].status = VgTs_Runnable;
552 vg_threads[tid_main].joiner = VG_INVALID_THREADID;
553 vg_threads[tid_main].associated_mx = NULL;
554 vg_threads[tid_main].associated_cv = NULL;
555 vg_threads[tid_main].retval = NULL; /* not important */
sewardj5f07b662002-04-23 16:52:51 +0000556 for (i = 0; i < VG_N_THREAD_KEYS; i++)
557 vg_threads[tid_main].specifics[i] = NULL;
sewardje663cb92002-04-12 10:26:32 +0000558
559 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000560 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000561 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000562
sewardjbf290b92002-05-01 02:28:01 +0000563 vg_threads[tid_main].stack_highest_word
564 = vg_threads[tid_main].m_esp /* -4 ??? */;
565
sewardj1e8cdc92002-04-18 11:37:52 +0000566 /* So now ... */
567 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000568}
569
570
571/* What if fd isn't a valid fd? */
572static
573void set_fd_nonblocking ( Int fd )
574{
575 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
576 vg_assert(!VG_(is_kerror)(res));
577 res |= VKI_O_NONBLOCK;
578 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
579 vg_assert(!VG_(is_kerror)(res));
580}
581
582static
583void set_fd_blocking ( Int fd )
584{
585 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
586 vg_assert(!VG_(is_kerror)(res));
587 res &= ~VKI_O_NONBLOCK;
588 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
589 vg_assert(!VG_(is_kerror)(res));
590}
591
592static
593Bool fd_is_blockful ( Int fd )
594{
595 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
596 vg_assert(!VG_(is_kerror)(res));
597 return (res & VKI_O_NONBLOCK) ? False : True;
598}
599
600
601
sewardjd7fd4d22002-04-24 01:57:27 +0000602/* Possibly do a for tid. Return values are:
sewardje663cb92002-04-12 10:26:32 +0000603
sewardjd7fd4d22002-04-24 01:57:27 +0000604 True = request done. Thread may or may not be still runnable;
605 caller must check. If it is still runnable, the result will be in
606 the thread's %EDX as expected.
607
608 False = request not done. A more capable but slower mechanism will
609 deal with it.
sewardje663cb92002-04-12 10:26:32 +0000610*/
sewardjd7fd4d22002-04-24 01:57:27 +0000611static
sewardje663cb92002-04-12 10:26:32 +0000612Bool maybe_do_trivial_clientreq ( ThreadId tid )
613{
614# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000615 { tst->m_edx = (vvv); \
sewardjc3bd5f52002-05-01 03:24:23 +0000616 tst->sh_edx = VGM_WORD_VALID; \
sewardje663cb92002-04-12 10:26:32 +0000617 return True; \
618 }
619
sewardj8c824512002-04-14 04:16:48 +0000620 ThreadState* tst = &vg_threads[tid];
621 UInt* arg = (UInt*)(tst->m_eax);
622 UInt req_no = arg[0];
623
sewardje663cb92002-04-12 10:26:32 +0000624 switch (req_no) {
625 case VG_USERREQ__MALLOC:
626 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000627 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000628 );
629 case VG_USERREQ__BUILTIN_NEW:
630 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000631 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000632 );
633 case VG_USERREQ__BUILTIN_VEC_NEW:
634 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000635 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000636 );
637 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000638 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000639 SIMPLE_RETURN(0); /* irrelevant */
640 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000641 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000642 SIMPLE_RETURN(0); /* irrelevant */
643 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000644 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000645 SIMPLE_RETURN(0); /* irrelevant */
646 case VG_USERREQ__CALLOC:
647 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000648 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000649 );
650 case VG_USERREQ__REALLOC:
651 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000652 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000653 );
654 case VG_USERREQ__MEMALIGN:
655 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000656 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000657 );
sewardj9650c992002-04-16 03:44:31 +0000658
sewardj5f07b662002-04-23 16:52:51 +0000659 /* These are heavily used -- or at least we want them to be
660 cheap. */
sewardj9650c992002-04-16 03:44:31 +0000661 case VG_USERREQ__PTHREAD_GET_THREADID:
662 SIMPLE_RETURN(tid);
663 case VG_USERREQ__RUNNING_ON_VALGRIND:
664 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000665 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
666 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj5f07b662002-04-23 16:52:51 +0000667 case VG_USERREQ__READ_MILLISECOND_TIMER:
668 SIMPLE_RETURN(VG_(read_millisecond_timer)());
sewardj9650c992002-04-16 03:44:31 +0000669
sewardjd7fd4d22002-04-24 01:57:27 +0000670 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
671 do_pthread_mutex_unlock( tid, (void *)(arg[1]) );
672 return True;
673
674 /* This may make thread tid non-runnable, but the scheduler
675 checks for that on return from this function. */
676 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
677 do_pthread_mutex_lock( tid, False, (void *)(arg[1]) );
678 return True;
679
sewardj14e03422002-04-24 19:51:31 +0000680 case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK:
681 do_pthread_mutex_lock( tid, True, (void *)(arg[1]) );
682 return True;
683
sewardj51c0aaf2002-04-25 01:32:10 +0000684 case VG_USERREQ__PTHREAD_GETSPECIFIC:
685 do_pthread_getspecific ( tid, (UInt)(arg[1]) );
686 return True;
687
sewardje663cb92002-04-12 10:26:32 +0000688 default:
689 /* Too hard; wimp out. */
690 return False;
691 }
692# undef SIMPLE_RETURN
693}
694
695
sewardj6072c362002-04-19 14:40:57 +0000696/* vthread tid is returning from a signal handler; modify its
697 stack/regs accordingly. */
sewardj1ffa8da2002-04-26 22:47:57 +0000698
699/* [Helper fn for handle_signal_return] tid, assumed to be in WaitFD
700 for read or write, has been interrupted by a signal. Find and
701 clear the relevant vg_waiting_fd[] entry. Most of the code in this
702 procedure is total paranoia, if you look closely. */
703static
704void cleanup_waiting_fd_table ( ThreadId tid )
705{
706 Int i, waiters;
707
708 vg_assert(is_valid_tid(tid));
709 vg_assert(vg_threads[tid].status == VgTs_WaitFD);
710 vg_assert(vg_threads[tid].m_eax == __NR_read
711 || vg_threads[tid].m_eax == __NR_write);
712
713 /* Excessively paranoidly ... find the fd this op was waiting
714 for, and mark it as not being waited on. */
715 waiters = 0;
716 for (i = 0; i < VG_N_WAITING_FDS; i++) {
717 if (vg_waiting_fds[i].tid == tid) {
718 waiters++;
719 vg_assert(vg_waiting_fds[i].syscall_no == vg_threads[tid].m_eax);
720 }
721 }
722 vg_assert(waiters == 1);
723 for (i = 0; i < VG_N_WAITING_FDS; i++)
724 if (vg_waiting_fds[i].tid == tid)
725 break;
726 vg_assert(i < VG_N_WAITING_FDS);
727 vg_assert(vg_waiting_fds[i].fd != -1);
728 vg_waiting_fds[i].fd = -1; /* not in use */
729}
730
731
sewardj6072c362002-04-19 14:40:57 +0000732static
733void handle_signal_return ( ThreadId tid )
734{
735 Char msg_buf[100];
736 Bool restart_blocked_syscalls;
737
738 vg_assert(is_valid_tid(tid));
739
740 restart_blocked_syscalls = VG_(signal_returns)(tid);
741
742 if (restart_blocked_syscalls)
743 /* Easy; we don't have to do anything. */
744 return;
745
sewardj1ffa8da2002-04-26 22:47:57 +0000746 if (vg_threads[tid].status == VgTs_WaitFD
747 && (vg_threads[tid].m_eax == __NR_read
748 || vg_threads[tid].m_eax == __NR_write)) {
sewardj6072c362002-04-19 14:40:57 +0000749 /* read() or write() interrupted. Force a return with EINTR. */
sewardj1ffa8da2002-04-26 22:47:57 +0000750 cleanup_waiting_fd_table(tid);
sewardj6072c362002-04-19 14:40:57 +0000751 vg_threads[tid].m_eax = -VKI_EINTR;
752 vg_threads[tid].status = VgTs_Runnable;
sewardj1ffa8da2002-04-26 22:47:57 +0000753
sewardj6072c362002-04-19 14:40:57 +0000754 if (VG_(clo_trace_sched)) {
755 VG_(sprintf)(msg_buf,
756 "read() / write() interrupted by signal; return EINTR" );
757 print_sched_event(tid, msg_buf);
758 }
759 return;
760 }
761
sewardj1ffa8da2002-04-26 22:47:57 +0000762 if (vg_threads[tid].status == VgTs_WaitFD
763 && vg_threads[tid].m_eax == __NR_nanosleep) {
sewardj6072c362002-04-19 14:40:57 +0000764 /* We interrupted a nanosleep(). The right thing to do is to
765 write the unused time to nanosleep's second param and return
766 EINTR, but I'm too lazy for that. */
767 return;
768 }
769
sewardj1ffa8da2002-04-26 22:47:57 +0000770 if (vg_threads[tid].status == VgTs_WaitFD) {
771 VG_(panic)("handle_signal_return: unknown interrupted syscall");
772 }
773
sewardj6072c362002-04-19 14:40:57 +0000774 /* All other cases? Just return. */
775}
776
777
sewardje663cb92002-04-12 10:26:32 +0000778static
779void sched_do_syscall ( ThreadId tid )
780{
781 UInt saved_eax;
782 UInt res, syscall_no;
783 UInt fd;
sewardje663cb92002-04-12 10:26:32 +0000784 Bool orig_fd_blockness;
785 Char msg_buf[100];
786
sewardj6072c362002-04-19 14:40:57 +0000787 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000788 vg_assert(vg_threads[tid].status == VgTs_Runnable);
789
790 syscall_no = vg_threads[tid].m_eax; /* syscall number */
791
792 if (syscall_no == __NR_nanosleep) {
sewardj5f07b662002-04-23 16:52:51 +0000793 UInt t_now, t_awaken;
sewardje663cb92002-04-12 10:26:32 +0000794 struct vki_timespec* req;
795 req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */
sewardj5f07b662002-04-23 16:52:51 +0000796 t_now = VG_(read_millisecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000797 t_awaken
798 = t_now
sewardj5f07b662002-04-23 16:52:51 +0000799 + (UInt)1000ULL * (UInt)(req->tv_sec)
800 + (UInt)(req->tv_nsec) / 1000000;
sewardje663cb92002-04-12 10:26:32 +0000801 vg_threads[tid].status = VgTs_Sleeping;
802 vg_threads[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000803 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000804 VG_(sprintf)(msg_buf, "at %d: nanosleep for %d",
sewardje663cb92002-04-12 10:26:32 +0000805 t_now, t_awaken-t_now);
806 print_sched_event(tid, msg_buf);
807 }
808 /* Force the scheduler to run something else for a while. */
809 return;
810 }
811
sewardjaec22c02002-04-29 01:58:08 +0000812 if (syscall_no != __NR_read && syscall_no != __NR_write) {
sewardje663cb92002-04-12 10:26:32 +0000813 /* We think it's non-blocking. Just do it in the normal way. */
814 VG_(perform_assumed_nonblocking_syscall)(tid);
815 /* The thread is still runnable. */
816 return;
817 }
818
sewardje663cb92002-04-12 10:26:32 +0000819 /* Set the fd to nonblocking, and do the syscall, which will return
820 immediately, in order to lodge a request with the Linux kernel.
821 We later poll for I/O completion using select(). */
822
sewardjaec22c02002-04-29 01:58:08 +0000823 fd = vg_threads[tid].m_ebx /* arg1 */;
sewardje663cb92002-04-12 10:26:32 +0000824 orig_fd_blockness = fd_is_blockful(fd);
825 set_fd_nonblocking(fd);
826 vg_assert(!fd_is_blockful(fd));
827 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
828
829 /* This trashes the thread's %eax; we have to preserve it. */
830 saved_eax = vg_threads[tid].m_eax;
831 KERNEL_DO_SYSCALL(tid,res);
832
833 /* Restore original blockfulness of the fd. */
834 if (orig_fd_blockness)
835 set_fd_blocking(fd);
836 else
837 set_fd_nonblocking(fd);
838
sewardjaec22c02002-04-29 01:58:08 +0000839 if (res != -VKI_EWOULDBLOCK || !orig_fd_blockness) {
840 /* Finish off in the normal way. Don't restore %EAX, since that
841 now (correctly) holds the result of the call. We get here if either:
842 1. The call didn't block, or
843 2. The fd was already in nonblocking mode before we started to
844 mess with it. In this case, we're not expecting to handle
845 the I/O completion -- the client is. So don't file a
846 completion-wait entry.
847 */
sewardje663cb92002-04-12 10:26:32 +0000848 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
849 /* We're still runnable. */
850 vg_assert(vg_threads[tid].status == VgTs_Runnable);
851
852 } else {
853
sewardjaec22c02002-04-29 01:58:08 +0000854 vg_assert(res == -VKI_EWOULDBLOCK && orig_fd_blockness);
855
sewardje663cb92002-04-12 10:26:32 +0000856 /* It would have blocked. First, restore %EAX to what it was
857 before our speculative call. */
858 vg_threads[tid].m_eax = saved_eax;
859 /* Put this fd in a table of fds on which we are waiting for
860 completion. The arguments for select() later are constructed
861 from this table. */
862 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
863 /* Deschedule thread until an I/O completion happens. */
864 vg_threads[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000865 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000866 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
867 print_sched_event(tid, msg_buf);
868 }
869
870 }
871}
872
873
874/* Find out which of the fds in vg_waiting_fds are now ready to go, by
875 making enquiries with select(), and mark them as ready. We have to
876 wait for the requesting threads to fall into the the WaitFD state
877 before we can actually finally deliver the results, so this
878 procedure doesn't do that; complete_blocked_syscalls() does it.
879
880 It might seem odd that a thread which has done a blocking syscall
881 is not in WaitFD state; the way this can happen is if it initially
882 becomes WaitFD, but then a signal is delivered to it, so it becomes
883 Runnable for a while. In this case we have to wait for the
884 sighandler to return, whereupon the WaitFD state is resumed, and
885 only at that point can the I/O result be delivered to it. However,
886 this point may be long after the fd is actually ready.
887
888 So, poll_for_ready_fds() merely detects fds which are ready.
889 complete_blocked_syscalls() does the second half of the trick,
890 possibly much later: it delivers the results from ready fds to
891 threads in WaitFD state.
892*/
sewardj9a199dc2002-04-14 13:01:38 +0000893static
sewardje663cb92002-04-12 10:26:32 +0000894void poll_for_ready_fds ( void )
895{
896 vki_ksigset_t saved_procmask;
897 vki_fd_set readfds;
898 vki_fd_set writefds;
899 vki_fd_set exceptfds;
900 struct vki_timeval timeout;
901 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
902 ThreadId tid;
903 Bool rd_ok, wr_ok, ex_ok;
904 Char msg_buf[100];
905
sewardje462e202002-04-13 04:09:07 +0000906 struct vki_timespec* rem;
sewardj5f07b662002-04-23 16:52:51 +0000907 UInt t_now;
sewardje462e202002-04-13 04:09:07 +0000908
sewardje663cb92002-04-12 10:26:32 +0000909 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000910 for (tid = 1; tid < VG_N_THREADS; tid++)
sewardj853f55d2002-04-26 00:27:53 +0000911 if (vg_threads[tid].status == VgTs_Sleeping)
912 break;
sewardj6072c362002-04-19 14:40:57 +0000913
sewardj5f07b662002-04-23 16:52:51 +0000914 /* Avoid pointless calls to VG_(read_millisecond_timer). */
sewardj6072c362002-04-19 14:40:57 +0000915 if (tid < VG_N_THREADS) {
sewardj5f07b662002-04-23 16:52:51 +0000916 t_now = VG_(read_millisecond_timer)();
sewardj6072c362002-04-19 14:40:57 +0000917 for (tid = 1; tid < VG_N_THREADS; tid++) {
918 if (vg_threads[tid].status != VgTs_Sleeping)
919 continue;
920 if (t_now >= vg_threads[tid].awaken_at) {
921 /* Resume this thread. Set to zero the remaining-time
922 (second) arg of nanosleep, since it's used up all its
923 time. */
924 vg_assert(vg_threads[tid].m_eax == __NR_nanosleep);
925 rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */
926 if (rem != NULL) {
927 rem->tv_sec = 0;
928 rem->tv_nsec = 0;
929 }
930 /* Make the syscall return 0 (success). */
931 vg_threads[tid].m_eax = 0;
932 /* Reschedule this thread. */
933 vg_threads[tid].status = VgTs_Runnable;
934 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000935 VG_(sprintf)(msg_buf, "at %d: nanosleep done",
sewardj6072c362002-04-19 14:40:57 +0000936 t_now);
937 print_sched_event(tid, msg_buf);
938 }
sewardje663cb92002-04-12 10:26:32 +0000939 }
940 }
941 }
sewardje663cb92002-04-12 10:26:32 +0000942
sewardje462e202002-04-13 04:09:07 +0000943 /* And look for threads waiting on file descriptors which are now
944 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000945 timeout.tv_sec = 0;
946 timeout.tv_usec = 0;
947
948 VKI_FD_ZERO(&readfds);
949 VKI_FD_ZERO(&writefds);
950 VKI_FD_ZERO(&exceptfds);
951 fd_max = -1;
952 for (i = 0; i < VG_N_WAITING_FDS; i++) {
953 if (vg_waiting_fds[i].fd == -1 /* not in use */)
954 continue;
955 if (vg_waiting_fds[i].ready /* already ready? */)
956 continue;
957 fd = vg_waiting_fds[i].fd;
958 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +0000959 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +0000960 if (fd > fd_max)
961 fd_max = fd;
962 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +0000963 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +0000964 syscall_no = vg_waiting_fds[i].syscall_no;
965 switch (syscall_no) {
966 case __NR_read:
967 VKI_FD_SET(fd, &readfds); break;
968 case __NR_write:
969 VKI_FD_SET(fd, &writefds); break;
970 default:
971 VG_(panic)("poll_for_ready_fds: unexpected syscall");
972 /*NOTREACHED*/
973 break;
974 }
975 }
976
sewardje462e202002-04-13 04:09:07 +0000977 /* Short cut: if no fds are waiting, give up now. */
978 if (fd_max == -1)
979 return;
980
sewardje663cb92002-04-12 10:26:32 +0000981 /* BLOCK ALL SIGNALS. We don't want the complication of select()
982 getting interrupted. */
983 VG_(block_all_host_signals)( &saved_procmask );
984
985 n_ready = VG_(select)
986 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
987 if (VG_(is_kerror)(n_ready)) {
988 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
989 VG_(panic)("poll_for_ready_fds: select failed?!");
990 /*NOTREACHED*/
991 }
992
993 /* UNBLOCK ALL SIGNALS */
994 VG_(restore_host_signals)( &saved_procmask );
995
996 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
997
998 if (n_ready == 0)
999 return;
1000
1001 /* Inspect all the fds we know about, and handle any completions that
1002 have happened. */
1003 /*
1004 VG_(printf)("\n\n");
1005 for (fd = 0; fd < 100; fd++)
1006 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
1007 VG_(printf)("X"); } else { VG_(printf)("."); };
1008 VG_(printf)("\n\nfd_max = %d\n", fd_max);
1009 */
1010
1011 for (fd = 0; fd <= fd_max; fd++) {
1012 rd_ok = VKI_FD_ISSET(fd, &readfds);
1013 wr_ok = VKI_FD_ISSET(fd, &writefds);
1014 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
1015
1016 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
1017 if (n_ok == 0)
1018 continue;
1019 if (n_ok > 1) {
1020 VG_(printf)("offending fd = %d\n", fd);
1021 VG_(panic)("poll_for_ready_fds: multiple events on fd");
1022 }
1023
1024 /* An I/O event completed for fd. Find the thread which
1025 requested this. */
1026 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1027 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1028 continue;
1029 if (vg_waiting_fds[i].fd == fd)
1030 break;
1031 }
1032
1033 /* And a bit more paranoia ... */
1034 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
1035
1036 /* Mark the fd as ready. */
1037 vg_assert(! vg_waiting_fds[i].ready);
1038 vg_waiting_fds[i].ready = True;
1039 }
1040}
1041
1042
1043/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +00001044static
sewardje663cb92002-04-12 10:26:32 +00001045void complete_blocked_syscalls ( void )
1046{
1047 Int fd, i, res, syscall_no;
1048 ThreadId tid;
1049 Char msg_buf[100];
1050
1051 /* Inspect all the outstanding fds we know about. */
1052
1053 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1054 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1055 continue;
1056 if (! vg_waiting_fds[i].ready)
1057 continue;
1058
1059 fd = vg_waiting_fds[i].fd;
1060 tid = vg_waiting_fds[i].tid;
sewardj6072c362002-04-19 14:40:57 +00001061 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001062
1063 /* The thread actually has to be waiting for the I/O event it
1064 requested before we can deliver the result! */
1065 if (vg_threads[tid].status != VgTs_WaitFD)
1066 continue;
1067
1068 /* Ok, actually do it! We can safely use %EAX as the syscall
1069 number, because the speculative call made by
1070 sched_do_syscall() doesn't change %EAX in the case where the
1071 call would have blocked. */
1072
1073 syscall_no = vg_waiting_fds[i].syscall_no;
1074 vg_assert(syscall_no == vg_threads[tid].m_eax);
1075 KERNEL_DO_SYSCALL(tid,res);
1076 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
1077
1078 /* Reschedule. */
1079 vg_threads[tid].status = VgTs_Runnable;
1080 /* Mark slot as no longer in use. */
1081 vg_waiting_fds[i].fd = -1;
1082 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +00001083 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001084 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1085 print_sched_event(tid, msg_buf);
1086 }
1087 }
1088}
1089
1090
1091static
sewardj5f07b662002-04-23 16:52:51 +00001092void check_for_pthread_cond_timedwait ( void )
1093{
sewardj51c0aaf2002-04-25 01:32:10 +00001094 Int i, now;
sewardj5f07b662002-04-23 16:52:51 +00001095 for (i = 1; i < VG_N_THREADS; i++) {
1096 if (vg_threads[i].status != VgTs_WaitCV)
1097 continue;
1098 if (vg_threads[i].awaken_at == 0xFFFFFFFF /* no timeout */)
1099 continue;
sewardj51c0aaf2002-04-25 01:32:10 +00001100 now = VG_(read_millisecond_timer)();
1101 if (now >= vg_threads[i].awaken_at) {
sewardj5f07b662002-04-23 16:52:51 +00001102 do_pthread_cond_timedwait_TIMEOUT(i);
sewardj51c0aaf2002-04-25 01:32:10 +00001103 }
sewardj5f07b662002-04-23 16:52:51 +00001104 }
1105}
1106
1107
1108static
sewardje663cb92002-04-12 10:26:32 +00001109void nanosleep_for_a_while ( void )
1110{
1111 Int res;
1112 struct vki_timespec req;
1113 struct vki_timespec rem;
1114 req.tv_sec = 0;
sewardj51c0aaf2002-04-25 01:32:10 +00001115 req.tv_nsec = 20 * 1000 * 1000;
sewardje663cb92002-04-12 10:26:32 +00001116 res = VG_(nanosleep)( &req, &rem );
sewardj5f07b662002-04-23 16:52:51 +00001117 vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
sewardje663cb92002-04-12 10:26:32 +00001118}
1119
1120
1121/* ---------------------------------------------------------------------
1122 The scheduler proper.
1123 ------------------------------------------------------------------ */
1124
1125/* Run user-space threads until either
1126 * Deadlock occurs
1127 * One thread asks to shutdown Valgrind
1128 * The specified number of basic blocks has gone by.
1129*/
1130VgSchedReturnCode VG_(scheduler) ( void )
1131{
1132 ThreadId tid, tid_next;
1133 UInt trc;
1134 UInt dispatch_ctr_SAVED;
sewardj51c0aaf2002-04-25 01:32:10 +00001135 Int request_code, done_this_time, n_in_bounded_wait;
sewardje663cb92002-04-12 10:26:32 +00001136 Char msg_buf[100];
1137 Addr trans_addr;
sewardj14e03422002-04-24 19:51:31 +00001138 Bool sigs_delivered;
sewardje663cb92002-04-12 10:26:32 +00001139
1140 /* For the LRU structures, records when the epoch began. */
1141 ULong lru_epoch_started_at = 0;
1142
1143 /* Start with the root thread. tid in general indicates the
1144 currently runnable/just-finished-running thread. */
sewardj7e87e382002-05-03 19:09:05 +00001145 VG_(last_run_tid) = tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001146
1147 /* This is the top level scheduler loop. It falls into three
1148 phases. */
1149 while (True) {
1150
sewardj6072c362002-04-19 14:40:57 +00001151 /* ======================= Phase 0 of 3 =======================
1152 Be paranoid. Always a good idea. */
sewardjd7fd4d22002-04-24 01:57:27 +00001153 stage1:
sewardj6072c362002-04-19 14:40:57 +00001154 scheduler_sanity();
sewardj0c3b53f2002-05-01 01:58:35 +00001155 VG_(do_sanity_checks)( False );
sewardj6072c362002-04-19 14:40:57 +00001156
sewardje663cb92002-04-12 10:26:32 +00001157 /* ======================= Phase 1 of 3 =======================
1158 Handle I/O completions and signals. This may change the
1159 status of various threads. Then select a new thread to run,
1160 or declare deadlock, or sleep if there are no runnable
1161 threads but some are blocked on I/O. */
1162
1163 /* Age the LRU structures if an epoch has been completed. */
1164 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1165 lru_epoch_started_at = VG_(bbs_done);
1166 increment_epoch();
1167 }
1168
1169 /* Was a debug-stop requested? */
1170 if (VG_(bbs_to_go) == 0)
1171 goto debug_stop;
1172
1173 /* Do the following loop until a runnable thread is found, or
1174 deadlock is detected. */
1175 while (True) {
1176
1177 /* For stats purposes only. */
1178 VG_(num_scheduling_events_MAJOR) ++;
1179
1180 /* See if any I/O operations which we were waiting for have
1181 completed, and, if so, make runnable the relevant waiting
1182 threads. */
1183 poll_for_ready_fds();
1184 complete_blocked_syscalls();
sewardj5f07b662002-04-23 16:52:51 +00001185 check_for_pthread_cond_timedwait();
sewardje663cb92002-04-12 10:26:32 +00001186
1187 /* See if there are any signals which need to be delivered. If
1188 so, choose thread(s) to deliver them to, and build signal
1189 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001190
1191 /* Be careful about delivering signals to a thread waiting
1192 for a mutex. In particular, when the handler is running,
1193 that thread is temporarily apparently-not-waiting for the
1194 mutex, so if it is unlocked by another thread whilst the
1195 handler is running, this thread is not informed. When the
1196 handler returns, the thread resumes waiting on the mutex,
1197 even if, as a result, it has missed the unlocking of it.
1198 Potential deadlock. This sounds all very strange, but the
1199 POSIX standard appears to require this behaviour. */
sewardj14e03422002-04-24 19:51:31 +00001200 sigs_delivered = VG_(deliver_signals)( 1 /*HACK*/ );
1201 if (sigs_delivered)
sewardj0c3b53f2002-05-01 01:58:35 +00001202 VG_(do_sanity_checks)( False );
sewardje663cb92002-04-12 10:26:32 +00001203
1204 /* Try and find a thread (tid) to run. */
1205 tid_next = tid;
sewardj51c0aaf2002-04-25 01:32:10 +00001206 n_in_bounded_wait = 0;
sewardje663cb92002-04-12 10:26:32 +00001207 while (True) {
1208 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001209 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj54cacf02002-04-12 23:24:59 +00001210 if (vg_threads[tid_next].status == VgTs_WaitFD
sewardj51c0aaf2002-04-25 01:32:10 +00001211 || vg_threads[tid_next].status == VgTs_Sleeping
1212 || (vg_threads[tid_next].status == VgTs_WaitCV
1213 && vg_threads[tid_next].awaken_at != 0xFFFFFFFF))
1214 n_in_bounded_wait ++;
sewardje663cb92002-04-12 10:26:32 +00001215 if (vg_threads[tid_next].status == VgTs_Runnable)
1216 break; /* We can run this one. */
1217 if (tid_next == tid)
1218 break; /* been all the way round */
1219 }
1220 tid = tid_next;
1221
1222 if (vg_threads[tid].status == VgTs_Runnable) {
1223 /* Found a suitable candidate. Fall out of this loop, so
1224 we can advance to stage 2 of the scheduler: actually
1225 running the thread. */
1226 break;
1227 }
1228
1229 /* We didn't find a runnable thread. Now what? */
sewardj51c0aaf2002-04-25 01:32:10 +00001230 if (n_in_bounded_wait == 0) {
sewardj54cacf02002-04-12 23:24:59 +00001231 /* No runnable threads and no prospect of any appearing
1232 even if we wait for an arbitrary length of time. In
1233 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001234 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001235 return VgSrc_Deadlock;
1236 }
1237
1238 /* At least one thread is in a fd-wait state. Delay for a
1239 while, and go round again, in the hope that eventually a
1240 thread becomes runnable. */
1241 nanosleep_for_a_while();
sewardj7e87e382002-05-03 19:09:05 +00001242 /* pp_sched_status(); */
1243 /* VG_(printf)(".\n"); */
sewardje663cb92002-04-12 10:26:32 +00001244 }
1245
1246
1247 /* ======================= Phase 2 of 3 =======================
1248 Wahey! We've finally decided that thread tid is runnable, so
1249 we now do that. Run it for as much of a quanta as possible.
1250 Trivial requests are handled and the thread continues. The
1251 aim is not to do too many of Phase 1 since it is expensive. */
1252
1253 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001254 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001255
1256 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1257 that it decrements the counter before testing it for zero, so
1258 that if VG_(dispatch_ctr) is set to N you get at most N-1
1259 iterations. Also this means that VG_(dispatch_ctr) must
1260 exceed zero before entering the innerloop. Also also, the
1261 decrement is done before the bb is actually run, so you
1262 always get at least one decrement even if nothing happens.
1263 */
1264 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1265 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1266 else
1267 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1268
1269 /* ... and remember what we asked for. */
1270 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1271
sewardj1e8cdc92002-04-18 11:37:52 +00001272 /* paranoia ... */
1273 vg_assert(vg_threads[tid].tid == tid);
1274
sewardje663cb92002-04-12 10:26:32 +00001275 /* Actually run thread tid. */
1276 while (True) {
1277
sewardj7e87e382002-05-03 19:09:05 +00001278 VG_(last_run_tid) = tid;
1279
sewardje663cb92002-04-12 10:26:32 +00001280 /* For stats purposes only. */
1281 VG_(num_scheduling_events_MINOR) ++;
1282
1283 if (0)
1284 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1285 tid, VG_(dispatch_ctr) - 1 );
sewardjb3eef6b2002-05-01 00:05:27 +00001286# if 0
1287 if (VG_(bbs_done) > 31700000 + 0) {
1288 dispatch_ctr_SAVED = VG_(dispatch_ctr) = 2;
1289 VG_(translate)(&vg_threads[tid], vg_threads[tid].m_eip,
1290 NULL,NULL,NULL);
1291 }
1292 vg_assert(vg_threads[tid].m_eip != 0);
1293# endif
sewardje663cb92002-04-12 10:26:32 +00001294
1295 trc = run_thread_for_a_while ( tid );
1296
sewardjb3eef6b2002-05-01 00:05:27 +00001297# if 0
1298 if (0 == vg_threads[tid].m_eip) {
1299 VG_(printf)("tid = %d, dc = %llu\n", tid, VG_(bbs_done));
1300 vg_assert(0 != vg_threads[tid].m_eip);
1301 }
1302# endif
1303
sewardje663cb92002-04-12 10:26:32 +00001304 /* Deal quickly with trivial scheduling events, and resume the
1305 thread. */
1306
1307 if (trc == VG_TRC_INNER_FASTMISS) {
1308 vg_assert(VG_(dispatch_ctr) > 0);
1309
1310 /* Trivial event. Miss in the fast-cache. Do a full
1311 lookup for it. */
1312 trans_addr
1313 = VG_(search_transtab) ( vg_threads[tid].m_eip );
1314 if (trans_addr == (Addr)0) {
1315 /* Not found; we need to request a translation. */
sewardj1e8cdc92002-04-18 11:37:52 +00001316 create_translation_for( tid, vg_threads[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001317 trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip );
1318 if (trans_addr == (Addr)0)
1319 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1320 }
1321 continue; /* with this thread */
1322 }
1323
1324 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
sewardjd7fd4d22002-04-24 01:57:27 +00001325 Bool done = maybe_do_trivial_clientreq(tid);
1326 if (done) {
1327 /* The request is done. We try and continue with the
1328 same thread if still runnable. If not, go back to
1329 Stage 1 to select a new thread to run. */
1330 if (vg_threads[tid].status == VgTs_Runnable)
1331 continue; /* with this thread */
1332 else
1333 goto stage1;
sewardje663cb92002-04-12 10:26:32 +00001334 }
1335 }
1336
sewardj51c0aaf2002-04-25 01:32:10 +00001337 if (trc == VG_TRC_EBP_JMP_SYSCALL) {
1338 /* Do a syscall for the vthread tid. This could cause it
sewardj7e87e382002-05-03 19:09:05 +00001339 to become non-runnable. One special case: spot the
1340 client doing calls to exit() and take this as the cue
1341 to exit. */
sewardjb3eef6b2002-05-01 00:05:27 +00001342# if 0
1343 { UInt* esp; Int i;
1344 esp=(UInt*)vg_threads[tid].m_esp;
1345 VG_(printf)("\nBEFORE\n");
1346 for (i = 10; i >= -10; i--)
1347 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1348 }
1349# endif
1350
sewardj7e87e382002-05-03 19:09:05 +00001351 if (vg_threads[tid].m_eax == __NR_exit)
1352 return VgSrc_ExitSyscall;
1353
sewardj51c0aaf2002-04-25 01:32:10 +00001354 sched_do_syscall(tid);
sewardjb3eef6b2002-05-01 00:05:27 +00001355
1356# if 0
1357 { UInt* esp; Int i;
1358 esp=(UInt*)vg_threads[tid].m_esp;
1359 VG_(printf)("AFTER\n");
1360 for (i = 10; i >= -10; i--)
1361 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1362 }
1363# endif
1364
sewardj51c0aaf2002-04-25 01:32:10 +00001365 if (vg_threads[tid].status == VgTs_Runnable)
1366 continue; /* with this thread */
1367 else
1368 goto stage1;
1369 }
1370
sewardjd7fd4d22002-04-24 01:57:27 +00001371 /* It's an event we can't quickly deal with. Give up running
1372 this thread and handle things the expensive way. */
sewardje663cb92002-04-12 10:26:32 +00001373 break;
1374 }
1375
1376 /* ======================= Phase 3 of 3 =======================
1377 Handle non-trivial thread requests, mostly pthread stuff. */
1378
1379 /* Ok, we've fallen out of the dispatcher for a
1380 non-completely-trivial reason. First, update basic-block
1381 counters. */
1382
1383 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1384 vg_assert(done_this_time >= 0);
1385 VG_(bbs_to_go) -= (ULong)done_this_time;
1386 VG_(bbs_done) += (ULong)done_this_time;
1387
1388 if (0 && trc != VG_TRC_INNER_FASTMISS)
1389 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1390 tid, done_this_time, (Int)trc );
1391
1392 if (0 && trc != VG_TRC_INNER_FASTMISS)
1393 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1394 tid, VG_(bbs_done),
1395 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001396
sewardje663cb92002-04-12 10:26:32 +00001397 /* Examine the thread's return code to figure out why it
1398 stopped, and handle requests. */
1399
1400 switch (trc) {
1401
1402 case VG_TRC_INNER_FASTMISS:
1403 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1404 /*NOTREACHED*/
1405 break;
1406
1407 case VG_TRC_INNER_COUNTERZERO:
1408 /* Timeslice is out. Let a new thread be scheduled,
1409 simply by doing nothing, causing us to arrive back at
1410 Phase 1. */
1411 if (VG_(bbs_to_go) == 0) {
1412 goto debug_stop;
1413 }
1414 vg_assert(VG_(dispatch_ctr) == 0);
1415 break;
1416
1417 case VG_TRC_UNRESUMABLE_SIGNAL:
1418 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1419 away. Again, do nothing, so we wind up back at Phase
1420 1, whereupon the signal will be "delivered". */
1421 break;
1422
sewardje663cb92002-04-12 10:26:32 +00001423 case VG_TRC_EBP_JMP_CLIENTREQ:
1424 /* Do a client request for the vthread tid. Note that
1425 some requests will have been handled by
1426 maybe_do_trivial_clientreq(), so we don't expect to see
1427 those here.
1428 */
sewardj54cacf02002-04-12 23:24:59 +00001429 /* The thread's %EAX points at an arg block, the first
1430 word of which is the request code. */
1431 request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001432 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001433 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001434 print_sched_event(tid, msg_buf);
1435 }
1436 /* Do a non-trivial client request for thread tid. tid's
1437 %EAX points to a short vector of argument words, the
1438 first of which is the request code. The result of the
1439 request is put in tid's %EDX. Alternatively, perhaps
1440 the request causes tid to become non-runnable and/or
1441 other blocked threads become runnable. In general we
1442 can and often do mess with the state of arbitrary
1443 threads at this point. */
sewardj7e87e382002-05-03 19:09:05 +00001444 do_nontrivial_clientreq(tid);
sewardje663cb92002-04-12 10:26:32 +00001445 break;
1446
1447 default:
1448 VG_(printf)("\ntrc = %d\n", trc);
1449 VG_(panic)("VG_(scheduler), phase 3: "
1450 "unexpected thread return code");
1451 /* NOTREACHED */
1452 break;
1453
1454 } /* switch (trc) */
1455
1456 /* That completes Phase 3 of 3. Return now to the top of the
1457 main scheduler loop, to Phase 1 of 3. */
1458
1459 } /* top-level scheduler loop */
1460
1461
1462 /* NOTREACHED */
1463 VG_(panic)("scheduler: post-main-loop ?!");
1464 /* NOTREACHED */
1465
1466 debug_stop:
1467 /* If we exited because of a debug stop, print the translation
1468 of the last block executed -- by translating it again, and
1469 throwing away the result. */
1470 VG_(printf)(
1471 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj1e8cdc92002-04-18 11:37:52 +00001472 VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001473 VG_(printf)("\n");
1474 VG_(printf)(
1475 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1476
1477 return VgSrc_BbsDone;
1478}
1479
1480
1481/* ---------------------------------------------------------------------
1482 The pthread implementation.
1483 ------------------------------------------------------------------ */
1484
1485#include <pthread.h>
1486#include <errno.h>
1487
sewardjbf290b92002-05-01 02:28:01 +00001488#define VG_PTHREAD_STACK_MIN \
sewardjc3bd5f52002-05-01 03:24:23 +00001489 (VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
sewardje663cb92002-04-12 10:26:32 +00001490
1491/* /usr/include/bits/pthreadtypes.h:
1492 typedef unsigned long int pthread_t;
1493*/
1494
sewardjc3bd5f52002-05-01 03:24:23 +00001495/* Write a value to the client's %EDX (request return value register)
1496 and set the shadow to indicate it is defined. */
1497#define SET_EDX(zztid, zzval) \
1498 do { vg_threads[zztid].m_edx = (zzval); \
1499 vg_threads[zztid].sh_edx = VGM_WORD_VALID; \
1500 } while (0)
1501
sewardje663cb92002-04-12 10:26:32 +00001502
sewardj604ec3c2002-04-18 22:38:41 +00001503/* -----------------------------------------------------------
1504 Thread CREATION, JOINAGE and CANCELLATION.
1505 -------------------------------------------------------- */
1506
sewardje663cb92002-04-12 10:26:32 +00001507static
sewardj853f55d2002-04-26 00:27:53 +00001508void do_pthread_cancel ( ThreadId tid,
sewardje663cb92002-04-12 10:26:32 +00001509 pthread_t tid_cancellee )
1510{
1511 Char msg_buf[100];
sewardj853f55d2002-04-26 00:27:53 +00001512
1513 vg_assert(is_valid_tid(tid));
1514 vg_assert(vg_threads[tid].status != VgTs_Empty);
1515
1516 if (!is_valid_tid(tid_cancellee)
1517 || vg_threads[tid_cancellee].status == VgTs_Empty) {
sewardjc3bd5f52002-05-01 03:24:23 +00001518 SET_EDX(tid, ESRCH);
sewardj853f55d2002-04-26 00:27:53 +00001519 return;
1520 }
1521
sewardje663cb92002-04-12 10:26:32 +00001522 /* We want make is appear that this thread has returned to
1523 do_pthread_create_bogusRA with PTHREAD_CANCELED as the
1524 return value. So: simple: put PTHREAD_CANCELED into %EAX
1525 and &do_pthread_create_bogusRA into %EIP and keep going! */
sewardj8937c812002-04-12 20:12:20 +00001526 if (VG_(clo_trace_sched)) {
sewardj853f55d2002-04-26 00:27:53 +00001527 VG_(sprintf)(msg_buf, "cancelled by %d", tid);
sewardje663cb92002-04-12 10:26:32 +00001528 print_sched_event(tid_cancellee, msg_buf);
1529 }
1530 vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED;
sewardjbc5b99f2002-04-13 00:08:51 +00001531 vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001532 vg_threads[tid_cancellee].status = VgTs_Runnable;
sewardj853f55d2002-04-26 00:27:53 +00001533
1534 /* We return with success (0). */
sewardjc3bd5f52002-05-01 03:24:23 +00001535 SET_EDX(tid, 0);
sewardje663cb92002-04-12 10:26:32 +00001536}
1537
1538
sewardj3b5d8862002-04-20 13:53:23 +00001539static
1540void do_pthread_exit ( ThreadId tid, void* retval )
1541{
1542 Char msg_buf[100];
1543 /* We want make is appear that this thread has returned to
1544 do_pthread_create_bogusRA with retval as the
1545 return value. So: simple: put retval into %EAX
1546 and &do_pthread_create_bogusRA into %EIP and keep going! */
1547 if (VG_(clo_trace_sched)) {
1548 VG_(sprintf)(msg_buf, "exiting with %p", retval);
1549 print_sched_event(tid, msg_buf);
1550 }
1551 vg_threads[tid].m_eax = (UInt)retval;
1552 vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA);
1553 vg_threads[tid].status = VgTs_Runnable;
1554}
1555
sewardje663cb92002-04-12 10:26:32 +00001556
1557/* Thread tid is exiting, by returning from the function it was
sewardjbc5b99f2002-04-13 00:08:51 +00001558 created with. Or possibly due to pthread_exit or cancellation.
1559 The main complication here is to resume any thread waiting to join
1560 with this one. */
sewardje663cb92002-04-12 10:26:32 +00001561static
sewardjbc5b99f2002-04-13 00:08:51 +00001562void handle_pthread_return ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001563{
1564 ThreadId jnr; /* joiner, the thread calling pthread_join. */
1565 UInt* jnr_args;
1566 void** jnr_thread_return;
1567 Char msg_buf[100];
1568
1569 /* Mark it as not in use. Leave the stack in place so the next
1570 user of this slot doesn't reallocate it. */
sewardj6072c362002-04-19 14:40:57 +00001571 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001572 vg_assert(vg_threads[tid].status != VgTs_Empty);
1573
sewardjbc5b99f2002-04-13 00:08:51 +00001574 vg_threads[tid].retval = retval;
sewardje663cb92002-04-12 10:26:32 +00001575
1576 if (vg_threads[tid].joiner == VG_INVALID_THREADID) {
1577 /* No one has yet done a join on me */
1578 vg_threads[tid].status = VgTs_WaitJoiner;
sewardj8937c812002-04-12 20:12:20 +00001579 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001580 VG_(sprintf)(msg_buf,
1581 "root fn returns, waiting for a call pthread_join(%d)",
1582 tid);
1583 print_sched_event(tid, msg_buf);
1584 }
1585 } else {
1586 /* Some is waiting; make their join call return with success,
1587 putting my exit code in the place specified by the caller's
1588 thread_return param. This is all very horrible, since we
1589 need to consult the joiner's arg block -- pointed to by its
1590 %EAX -- in order to extract the 2nd param of its pthread_join
1591 call. TODO: free properly the slot (also below).
1592 */
1593 jnr = vg_threads[tid].joiner;
sewardj6072c362002-04-19 14:40:57 +00001594 vg_assert(is_valid_tid(jnr));
sewardje663cb92002-04-12 10:26:32 +00001595 vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee);
1596 jnr_args = (UInt*)vg_threads[jnr].m_eax;
1597 jnr_thread_return = (void**)(jnr_args[2]);
1598 if (jnr_thread_return != NULL)
1599 *jnr_thread_return = vg_threads[tid].retval;
sewardjc3bd5f52002-05-01 03:24:23 +00001600 SET_EDX(jnr, 0); /* success */
sewardje663cb92002-04-12 10:26:32 +00001601 vg_threads[jnr].status = VgTs_Runnable;
1602 vg_threads[tid].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001603 if (VG_(clo_instrument) && tid != 0)
1604 VGM_(make_noaccess)( vg_threads[tid].stack_base,
1605 vg_threads[tid].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001606 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001607 VG_(sprintf)(msg_buf,
1608 "root fn returns, to find a waiting pthread_join(%d)", tid);
1609 print_sched_event(tid, msg_buf);
1610 VG_(sprintf)(msg_buf,
1611 "my pthread_join(%d) returned; resuming", tid);
1612 print_sched_event(jnr, msg_buf);
1613 }
1614 }
1615
1616 /* Return value is irrelevant; this thread will not get
1617 rescheduled. */
1618}
1619
1620
1621static
1622void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return )
1623{
1624 Char msg_buf[100];
1625
1626 /* jee, the joinee, is the thread specified as an arg in thread
1627 tid's call to pthread_join. So tid is the join-er. */
sewardj6072c362002-04-19 14:40:57 +00001628 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001629 vg_assert(vg_threads[tid].status == VgTs_Runnable);
1630
1631 if (jee == tid) {
sewardjc3bd5f52002-05-01 03:24:23 +00001632 SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */
sewardje663cb92002-04-12 10:26:32 +00001633 vg_threads[tid].status = VgTs_Runnable;
1634 return;
1635 }
1636
1637 if (jee < 0
1638 || jee >= VG_N_THREADS
1639 || vg_threads[jee].status == VgTs_Empty) {
1640 /* Invalid thread to join to. */
sewardjc3bd5f52002-05-01 03:24:23 +00001641 SET_EDX(tid, EINVAL);
sewardje663cb92002-04-12 10:26:32 +00001642 vg_threads[tid].status = VgTs_Runnable;
1643 return;
1644 }
1645
1646 if (vg_threads[jee].joiner != VG_INVALID_THREADID) {
1647 /* Someone already did join on this thread */
sewardjc3bd5f52002-05-01 03:24:23 +00001648 SET_EDX(tid, EINVAL);
sewardje663cb92002-04-12 10:26:32 +00001649 vg_threads[tid].status = VgTs_Runnable;
1650 return;
1651 }
1652
1653 /* if (vg_threads[jee].detached) ... */
1654
1655 /* Perhaps the joinee has already finished? If so return
1656 immediately with its return code, and free up the slot. TODO:
1657 free it properly (also above). */
1658 if (vg_threads[jee].status == VgTs_WaitJoiner) {
1659 vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID);
sewardjc3bd5f52002-05-01 03:24:23 +00001660 SET_EDX(tid, 0); /* success */
1661 if (thread_return != NULL) {
sewardje663cb92002-04-12 10:26:32 +00001662 *thread_return = vg_threads[jee].retval;
sewardjc3bd5f52002-05-01 03:24:23 +00001663 /* Not really right, since it makes the thread's return value
1664 appear to be defined even if it isn't. */
1665 if (VG_(clo_instrument))
1666 VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
1667 }
sewardje663cb92002-04-12 10:26:32 +00001668 vg_threads[tid].status = VgTs_Runnable;
1669 vg_threads[jee].status = VgTs_Empty; /* bye! */
sewardj75fe1892002-04-14 02:46:33 +00001670 if (VG_(clo_instrument) && jee != 0)
1671 VGM_(make_noaccess)( vg_threads[jee].stack_base,
1672 vg_threads[jee].stack_size );
sewardj8937c812002-04-12 20:12:20 +00001673 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001674 VG_(sprintf)(msg_buf,
1675 "someone called pthread_join() on me; bye!");
1676 print_sched_event(jee, msg_buf);
1677 VG_(sprintf)(msg_buf,
1678 "my pthread_join(%d) returned immediately",
1679 jee );
1680 print_sched_event(tid, msg_buf);
1681 }
1682 return;
1683 }
1684
1685 /* Ok, so we'll have to wait on jee. */
1686 vg_threads[jee].joiner = tid;
1687 vg_threads[tid].status = VgTs_WaitJoinee;
sewardj8937c812002-04-12 20:12:20 +00001688 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001689 VG_(sprintf)(msg_buf,
1690 "blocking on call of pthread_join(%d)", jee );
1691 print_sched_event(tid, msg_buf);
1692 }
1693 /* So tid's join call does not return just now. */
1694}
1695
1696
1697static
1698void do_pthread_create ( ThreadId parent_tid,
1699 pthread_t* thread,
1700 pthread_attr_t* attr,
1701 void* (*start_routine)(void *),
1702 void* arg )
1703{
sewardj5f07b662002-04-23 16:52:51 +00001704 Int i;
sewardje663cb92002-04-12 10:26:32 +00001705 Addr new_stack;
1706 UInt new_stk_szb;
1707 ThreadId tid;
1708 Char msg_buf[100];
1709
1710 /* Paranoia ... */
1711 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1712
1713 vg_assert(vg_threads[parent_tid].status != VgTs_Empty);
1714
sewardj1e8cdc92002-04-18 11:37:52 +00001715 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001716
1717 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001718 vg_assert(tid != 1);
1719 vg_assert(is_valid_tid(tid));
sewardje663cb92002-04-12 10:26:32 +00001720
1721 /* Copy the parent's CPU state into the child's, in a roundabout
1722 way (via baseBlock). */
1723 VG_(load_thread_state)(parent_tid);
1724 VG_(save_thread_state)(tid);
1725
1726 /* Consider allocating the child a stack, if the one it already has
1727 is inadequate. */
sewardjbf290b92002-05-01 02:28:01 +00001728 new_stk_szb = VG_PTHREAD_STACK_MIN;
sewardje663cb92002-04-12 10:26:32 +00001729
1730 if (new_stk_szb > vg_threads[tid].stack_size) {
1731 /* Again, for good measure :) We definitely don't want to be
1732 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001733 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001734 /* for now, we don't handle the case of anything other than
1735 assigning it for the first time. */
1736 vg_assert(vg_threads[tid].stack_size == 0);
1737 vg_assert(vg_threads[tid].stack_base == (Addr)NULL);
1738 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
1739 vg_threads[tid].stack_base = new_stack;
1740 vg_threads[tid].stack_size = new_stk_szb;
sewardj1e8cdc92002-04-18 11:37:52 +00001741 vg_threads[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00001742 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00001743 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00001744 }
sewardj1e8cdc92002-04-18 11:37:52 +00001745
1746 vg_threads[tid].m_esp
1747 = vg_threads[tid].stack_base
1748 + vg_threads[tid].stack_size
1749 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
1750
sewardje663cb92002-04-12 10:26:32 +00001751 if (VG_(clo_instrument))
1752 VGM_(make_noaccess)( vg_threads[tid].m_esp,
1753 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
1754
1755 /* push arg */
1756 vg_threads[tid].m_esp -= 4;
1757 * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg;
1758
1759 /* push (magical) return address */
1760 vg_threads[tid].m_esp -= 4;
sewardjbc5b99f2002-04-13 00:08:51 +00001761 * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA);
sewardje663cb92002-04-12 10:26:32 +00001762
1763 if (VG_(clo_instrument))
1764 VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 );
1765
1766 /* this is where we start */
1767 vg_threads[tid].m_eip = (UInt)start_routine;
1768
sewardj8937c812002-04-12 20:12:20 +00001769 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001770 VG_(sprintf)(msg_buf,
1771 "new thread, created by %d", parent_tid );
1772 print_sched_event(tid, msg_buf);
1773 }
1774
1775 /* store the thread id in *thread. */
1776 // if (VG_(clo_instrument))
1777 // ***** CHECK *thread is writable
1778 *thread = (pthread_t)tid;
sewardjc3bd5f52002-05-01 03:24:23 +00001779 if (VG_(clo_instrument))
1780 VGM_(make_readable)( (Addr)thread, sizeof(pthread_t) );
sewardje663cb92002-04-12 10:26:32 +00001781
sewardj3b5d8862002-04-20 13:53:23 +00001782 vg_threads[tid].associated_mx = NULL;
1783 vg_threads[tid].associated_cv = NULL;
1784 vg_threads[tid].joiner = VG_INVALID_THREADID;
1785 vg_threads[tid].status = VgTs_Runnable;
sewardj604ec3c2002-04-18 22:38:41 +00001786
sewardj5f07b662002-04-23 16:52:51 +00001787 for (i = 0; i < VG_N_THREAD_KEYS; i++)
1788 vg_threads[tid].specifics[i] = NULL;
1789
sewardj604ec3c2002-04-18 22:38:41 +00001790 /* return zero */
sewardjc3bd5f52002-05-01 03:24:23 +00001791 SET_EDX(tid, 0); /* success */
sewardje663cb92002-04-12 10:26:32 +00001792}
1793
1794
sewardj604ec3c2002-04-18 22:38:41 +00001795/* -----------------------------------------------------------
1796 MUTEXes
1797 -------------------------------------------------------- */
1798
sewardj604ec3c2002-04-18 22:38:41 +00001799/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00001800 typedef struct
1801 {
1802 int __m_reserved; -- Reserved for future use
1803 int __m_count; -- Depth of recursive locking
1804 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
1805 int __m_kind; -- Mutex kind: fast, recursive or errcheck
1806 struct _pthread_fastlock __m_lock; -- Underlying fast lock
1807 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00001808
sewardj6072c362002-04-19 14:40:57 +00001809 #define PTHREAD_MUTEX_INITIALIZER \
1810 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
1811 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
1812 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
1813 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
1814 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
1815 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
1816 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00001817
sewardj6072c362002-04-19 14:40:57 +00001818 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00001819
sewardj6072c362002-04-19 14:40:57 +00001820 __m_kind never changes and indicates whether or not it is recursive.
1821
1822 __m_count indicates the lock count; if 0, the mutex is not owned by
1823 anybody.
1824
1825 __m_owner has a ThreadId value stuffed into it. We carefully arrange
1826 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
1827 statically initialised mutexes correctly appear
1828 to belong to nobody.
1829
1830 In summary, a not-in-use mutex is distinguised by having __m_owner
1831 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
1832 conditions holds, the other should too.
1833
1834 There is no linked list of threads waiting for this mutex. Instead
1835 a thread in WaitMX state points at the mutex with its waited_on_mx
1836 field. This makes _unlock() inefficient, but simple to implement the
1837 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00001838
sewardj604ec3c2002-04-18 22:38:41 +00001839 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00001840 deals with that for us.
1841*/
sewardje663cb92002-04-12 10:26:32 +00001842
sewardj3b5d8862002-04-20 13:53:23 +00001843/* Helper fns ... */
1844static
1845void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
1846 Char* caller )
1847{
1848 Int i;
1849 Char msg_buf[100];
1850
1851 /* Find some arbitrary thread waiting on this mutex, and make it
1852 runnable. If none are waiting, mark the mutex as not held. */
1853 for (i = 1; i < VG_N_THREADS; i++) {
1854 if (vg_threads[i].status == VgTs_Empty)
1855 continue;
1856 if (vg_threads[i].status == VgTs_WaitMX
1857 && vg_threads[i].associated_mx == mutex)
1858 break;
1859 }
1860
1861 vg_assert(i <= VG_N_THREADS);
1862 if (i == VG_N_THREADS) {
1863 /* Nobody else is waiting on it. */
1864 mutex->__m_count = 0;
1865 mutex->__m_owner = VG_INVALID_THREADID;
1866 } else {
1867 /* Notionally transfer the hold to thread i, whose
1868 pthread_mutex_lock() call now returns with 0 (success). */
1869 /* The .count is already == 1. */
1870 vg_assert(vg_threads[i].associated_mx == mutex);
1871 mutex->__m_owner = (_pthread_descr)i;
1872 vg_threads[i].status = VgTs_Runnable;
1873 vg_threads[i].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00001874 /* m_edx already holds pth_mx_lock() success (0) */
sewardj3b5d8862002-04-20 13:53:23 +00001875
1876 if (VG_(clo_trace_pthread_level) >= 1) {
1877 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
1878 caller, mutex );
1879 print_pthread_event(i, msg_buf);
1880 }
1881 }
1882}
1883
sewardje663cb92002-04-12 10:26:32 +00001884
1885static
sewardj30671ff2002-04-21 00:13:57 +00001886void do_pthread_mutex_lock( ThreadId tid,
1887 Bool is_trylock,
sewardjd7fd4d22002-04-24 01:57:27 +00001888 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00001889{
sewardj30671ff2002-04-21 00:13:57 +00001890 Char msg_buf[100];
1891 Char* caller
1892 = is_trylock ? "pthread_mutex_lock "
1893 : "pthread_mutex_trylock";
sewardje663cb92002-04-12 10:26:32 +00001894
sewardjd7fd4d22002-04-24 01:57:27 +00001895 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
1896
sewardj604ec3c2002-04-18 22:38:41 +00001897 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj30671ff2002-04-21 00:13:57 +00001898 VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex );
sewardj604ec3c2002-04-18 22:38:41 +00001899 print_pthread_event(tid, msg_buf);
1900 }
1901
1902 /* Paranoia ... */
1903 vg_assert(is_valid_tid(tid)
1904 && vg_threads[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001905
1906 /* POSIX doesn't mandate this, but for sanity ... */
1907 if (mutex == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00001908 SET_EDX(tid, EINVAL);
sewardje663cb92002-04-12 10:26:32 +00001909 return;
1910 }
1911
sewardj604ec3c2002-04-18 22:38:41 +00001912 /* More paranoia ... */
1913 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00001914# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00001915 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00001916 case PTHREAD_MUTEX_ADAPTIVE_NP:
1917# endif
sewardj604ec3c2002-04-18 22:38:41 +00001918 case PTHREAD_MUTEX_RECURSIVE_NP:
1919 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00001920 if (mutex->__m_count >= 0) break;
1921 /* else fall thru */
1922 default:
sewardjc3bd5f52002-05-01 03:24:23 +00001923 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00001924 return;
sewardje663cb92002-04-12 10:26:32 +00001925 }
1926
sewardj604ec3c2002-04-18 22:38:41 +00001927 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00001928
sewardj604ec3c2002-04-18 22:38:41 +00001929 vg_assert(is_valid_tid((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00001930
1931 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00001932 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00001933 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00001934 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00001935 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00001936 mutex->__m_count++;
sewardjc3bd5f52002-05-01 03:24:23 +00001937 SET_EDX(tid, 0);
sewardj853f55d2002-04-26 00:27:53 +00001938 if (0)
1939 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
1940 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00001941 return;
1942 } else {
sewardj30671ff2002-04-21 00:13:57 +00001943 if (is_trylock)
sewardjc3bd5f52002-05-01 03:24:23 +00001944 SET_EDX(tid, EBUSY);
sewardj30671ff2002-04-21 00:13:57 +00001945 else
sewardjc3bd5f52002-05-01 03:24:23 +00001946 SET_EDX(tid, EDEADLK);
sewardjf8f819e2002-04-17 23:21:37 +00001947 return;
1948 }
1949 } else {
sewardj6072c362002-04-19 14:40:57 +00001950 /* Someone else has it; we have to wait. Mark ourselves
1951 thusly. */
sewardj05553872002-04-20 20:53:17 +00001952 /* GUARD: __m_count > 0 && __m_owner is valid */
sewardj30671ff2002-04-21 00:13:57 +00001953 if (is_trylock) {
1954 /* caller is polling; so return immediately. */
sewardjc3bd5f52002-05-01 03:24:23 +00001955 SET_EDX(tid, EBUSY);
sewardj30671ff2002-04-21 00:13:57 +00001956 } else {
1957 vg_threads[tid].status = VgTs_WaitMX;
1958 vg_threads[tid].associated_mx = mutex;
sewardjc3bd5f52002-05-01 03:24:23 +00001959 SET_EDX(tid, 0); /* pth_mx_lock success value */
sewardj30671ff2002-04-21 00:13:57 +00001960 if (VG_(clo_trace_pthread_level) >= 1) {
1961 VG_(sprintf)(msg_buf, "%s mx %p: BLOCK",
1962 caller, mutex );
1963 print_pthread_event(tid, msg_buf);
1964 }
1965 }
sewardje663cb92002-04-12 10:26:32 +00001966 return;
1967 }
sewardjf8f819e2002-04-17 23:21:37 +00001968
sewardje663cb92002-04-12 10:26:32 +00001969 } else {
sewardj6072c362002-04-19 14:40:57 +00001970 /* Nobody owns it. Sanity check ... */
1971 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00001972 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00001973 mutex->__m_count = 1;
1974 mutex->__m_owner = (_pthread_descr)tid;
sewardj3b5d8862002-04-20 13:53:23 +00001975 vg_assert(vg_threads[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00001976 /* return 0 (success). */
sewardjc3bd5f52002-05-01 03:24:23 +00001977 SET_EDX(tid, 0);
sewardje663cb92002-04-12 10:26:32 +00001978 }
sewardjf8f819e2002-04-17 23:21:37 +00001979
sewardje663cb92002-04-12 10:26:32 +00001980}
1981
1982
1983static
1984void do_pthread_mutex_unlock ( ThreadId tid,
sewardjd7fd4d22002-04-24 01:57:27 +00001985 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00001986{
sewardj3b5d8862002-04-20 13:53:23 +00001987 Char msg_buf[100];
sewardjd7fd4d22002-04-24 01:57:27 +00001988 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
sewardje663cb92002-04-12 10:26:32 +00001989
sewardj45b4b372002-04-16 22:50:32 +00001990 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00001991 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00001992 print_pthread_event(tid, msg_buf);
1993 }
1994
sewardj604ec3c2002-04-18 22:38:41 +00001995 /* Paranoia ... */
1996 vg_assert(is_valid_tid(tid)
1997 && vg_threads[tid].status == VgTs_Runnable);
1998
1999 if (mutex == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002000 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002001 return;
2002 }
2003
2004 /* More paranoia ... */
2005 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002006# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00002007 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002008 case PTHREAD_MUTEX_ADAPTIVE_NP:
2009# endif
sewardj604ec3c2002-04-18 22:38:41 +00002010 case PTHREAD_MUTEX_RECURSIVE_NP:
2011 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00002012 if (mutex->__m_count >= 0) break;
2013 /* else fall thru */
2014 default:
sewardjc3bd5f52002-05-01 03:24:23 +00002015 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002016 return;
2017 }
sewardje663cb92002-04-12 10:26:32 +00002018
2019 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00002020 if (mutex->__m_count == 0 /* nobody holds it */
2021 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardjc3bd5f52002-05-01 03:24:23 +00002022 SET_EDX(tid, EPERM);
sewardje663cb92002-04-12 10:26:32 +00002023 return;
2024 }
2025
sewardjf8f819e2002-04-17 23:21:37 +00002026 /* If it's a multiply-locked recursive mutex, just decrement the
2027 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00002028 if (mutex->__m_count > 1) {
2029 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
2030 mutex->__m_count --;
sewardjc3bd5f52002-05-01 03:24:23 +00002031 SET_EDX(tid, 0); /* success */
sewardjf8f819e2002-04-17 23:21:37 +00002032 return;
2033 }
2034
sewardj604ec3c2002-04-18 22:38:41 +00002035 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00002036 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00002037 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00002038 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00002039
sewardj3b5d8862002-04-20 13:53:23 +00002040 /* Release at max one thread waiting on this mutex. */
2041 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00002042
sewardj3b5d8862002-04-20 13:53:23 +00002043 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardjc3bd5f52002-05-01 03:24:23 +00002044 SET_EDX(tid, 0); /* Success. */
sewardje663cb92002-04-12 10:26:32 +00002045}
2046
2047
sewardj6072c362002-04-19 14:40:57 +00002048/* -----------------------------------------------------------
2049 CONDITION VARIABLES
2050 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00002051
sewardj6072c362002-04-19 14:40:57 +00002052/* The relevant native types are as follows:
2053 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00002054
sewardj6072c362002-04-19 14:40:57 +00002055 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
2056 typedef struct
2057 {
2058 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
2059 _pthread_descr __c_waiting; -- Threads waiting on this condition
2060 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00002061
sewardj6072c362002-04-19 14:40:57 +00002062 -- Attribute for conditionally variables.
2063 typedef struct
2064 {
2065 int __dummy;
2066 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00002067
sewardj6072c362002-04-19 14:40:57 +00002068 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00002069
sewardj3b5d8862002-04-20 13:53:23 +00002070 We don't use any fields of pthread_cond_t for anything at all.
2071 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00002072
2073 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00002074 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00002075
sewardj77e466c2002-04-14 02:29:29 +00002076
sewardj5f07b662002-04-23 16:52:51 +00002077static
2078void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid )
2079{
2080 Char msg_buf[100];
2081 pthread_mutex_t* mx;
2082 pthread_cond_t* cv;
2083
2084 vg_assert(is_valid_tid(tid)
2085 && vg_threads[tid].status == VgTs_WaitCV
2086 && vg_threads[tid].awaken_at != 0xFFFFFFFF);
2087 mx = vg_threads[tid].associated_mx;
2088 vg_assert(mx != NULL);
2089 cv = vg_threads[tid].associated_cv;
2090 vg_assert(cv != NULL);
2091
2092 if (mx->__m_owner == VG_INVALID_THREADID) {
2093 /* Currently unheld; hand it out to thread tid. */
2094 vg_assert(mx->__m_count == 0);
2095 vg_threads[tid].status = VgTs_Runnable;
sewardjc3bd5f52002-05-01 03:24:23 +00002096 SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */
sewardj5f07b662002-04-23 16:52:51 +00002097 vg_threads[tid].associated_cv = NULL;
2098 vg_threads[tid].associated_mx = NULL;
2099 mx->__m_owner = (_pthread_descr)tid;
2100 mx->__m_count = 1;
2101
2102 if (VG_(clo_trace_pthread_level) >= 1) {
sewardjc3bd5f52002-05-01 03:24:23 +00002103 VG_(sprintf)(msg_buf,
2104 "pthread_cond_timedwai cv %p: TIMEOUT with mx %p",
2105 cv, mx );
sewardj5f07b662002-04-23 16:52:51 +00002106 print_pthread_event(tid, msg_buf);
2107 }
2108 } else {
2109 /* Currently held. Make thread tid be blocked on it. */
2110 vg_assert(mx->__m_count > 0);
2111 vg_threads[tid].status = VgTs_WaitMX;
sewardjc3bd5f52002-05-01 03:24:23 +00002112 SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */
sewardj5f07b662002-04-23 16:52:51 +00002113 vg_threads[tid].associated_cv = NULL;
2114 vg_threads[tid].associated_mx = mx;
2115 if (VG_(clo_trace_pthread_level) >= 1) {
2116 VG_(sprintf)(msg_buf,
2117 "pthread_cond_timedwai cv %p: TIMEOUT -> BLOCK for mx %p",
2118 cv, mx );
2119 print_pthread_event(tid, msg_buf);
2120 }
2121
2122 }
2123}
2124
2125
sewardj3b5d8862002-04-20 13:53:23 +00002126static
2127void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
2128 Int n_to_release,
2129 Char* caller )
2130{
2131 Int i;
2132 Char msg_buf[100];
2133 pthread_mutex_t* mx;
2134
2135 while (True) {
2136 if (n_to_release == 0)
2137 return;
2138
2139 /* Find a thread waiting on this CV. */
2140 for (i = 1; i < VG_N_THREADS; i++) {
2141 if (vg_threads[i].status == VgTs_Empty)
2142 continue;
2143 if (vg_threads[i].status == VgTs_WaitCV
2144 && vg_threads[i].associated_cv == cond)
2145 break;
2146 }
2147 vg_assert(i <= VG_N_THREADS);
2148
2149 if (i == VG_N_THREADS) {
2150 /* Nobody else is waiting on it. */
2151 return;
2152 }
2153
2154 mx = vg_threads[i].associated_mx;
2155 vg_assert(mx != NULL);
2156
2157 if (mx->__m_owner == VG_INVALID_THREADID) {
2158 /* Currently unheld; hand it out to thread i. */
2159 vg_assert(mx->__m_count == 0);
2160 vg_threads[i].status = VgTs_Runnable;
2161 vg_threads[i].associated_cv = NULL;
2162 vg_threads[i].associated_mx = NULL;
2163 mx->__m_owner = (_pthread_descr)i;
2164 mx->__m_count = 1;
sewardj5f07b662002-04-23 16:52:51 +00002165 /* .m_edx already holds pth_cond_wait success value (0) */
sewardj3b5d8862002-04-20 13:53:23 +00002166
2167 if (VG_(clo_trace_pthread_level) >= 1) {
2168 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
2169 caller, cond, mx );
2170 print_pthread_event(i, msg_buf);
2171 }
2172
2173 } else {
2174 /* Currently held. Make thread i be blocked on it. */
sewardj5f07b662002-04-23 16:52:51 +00002175 vg_assert(mx->__m_count > 0);
sewardj3b5d8862002-04-20 13:53:23 +00002176 vg_threads[i].status = VgTs_WaitMX;
2177 vg_threads[i].associated_cv = NULL;
2178 vg_threads[i].associated_mx = mx;
sewardjc3bd5f52002-05-01 03:24:23 +00002179 SET_EDX(i, 0); /* pth_cond_wait success value */
sewardj3b5d8862002-04-20 13:53:23 +00002180
2181 if (VG_(clo_trace_pthread_level) >= 1) {
2182 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
2183 caller, cond, mx );
2184 print_pthread_event(i, msg_buf);
2185 }
2186
2187 }
2188
2189 n_to_release--;
2190 }
2191}
2192
2193
2194static
2195void do_pthread_cond_wait ( ThreadId tid,
2196 pthread_cond_t *cond,
sewardj5f07b662002-04-23 16:52:51 +00002197 pthread_mutex_t *mutex,
2198 UInt ms_end )
sewardj3b5d8862002-04-20 13:53:23 +00002199{
2200 Char msg_buf[100];
2201
sewardj5f07b662002-04-23 16:52:51 +00002202 /* If ms_end == 0xFFFFFFFF, wait forever (no timeout). Otherwise,
2203 ms_end is the ending millisecond. */
2204
sewardj3b5d8862002-04-20 13:53:23 +00002205 /* pre: mutex should be a valid mutex and owned by tid. */
2206 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj5f07b662002-04-23 16:52:51 +00002207 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p, end %d ...",
2208 cond, mutex, ms_end );
sewardj3b5d8862002-04-20 13:53:23 +00002209 print_pthread_event(tid, msg_buf);
2210 }
2211
2212 /* Paranoia ... */
2213 vg_assert(is_valid_tid(tid)
2214 && vg_threads[tid].status == VgTs_Runnable);
2215
2216 if (mutex == NULL || cond == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002217 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002218 return;
2219 }
2220
2221 /* More paranoia ... */
2222 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002223# ifndef GLIBC_2_1
sewardj3b5d8862002-04-20 13:53:23 +00002224 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002225 case PTHREAD_MUTEX_ADAPTIVE_NP:
2226# endif
sewardj3b5d8862002-04-20 13:53:23 +00002227 case PTHREAD_MUTEX_RECURSIVE_NP:
2228 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj3b5d8862002-04-20 13:53:23 +00002229 if (mutex->__m_count >= 0) break;
2230 /* else fall thru */
2231 default:
sewardjc3bd5f52002-05-01 03:24:23 +00002232 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002233 return;
2234 }
2235
2236 /* Barf if we don't currently hold the mutex. */
2237 if (mutex->__m_count == 0 /* nobody holds it */
2238 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardjc3bd5f52002-05-01 03:24:23 +00002239 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002240 return;
2241 }
2242
2243 /* Queue ourselves on the condition. */
2244 vg_threads[tid].status = VgTs_WaitCV;
2245 vg_threads[tid].associated_cv = cond;
2246 vg_threads[tid].associated_mx = mutex;
sewardj5f07b662002-04-23 16:52:51 +00002247 vg_threads[tid].awaken_at = ms_end;
sewardj3b5d8862002-04-20 13:53:23 +00002248
2249 if (VG_(clo_trace_pthread_level) >= 1) {
2250 VG_(sprintf)(msg_buf,
2251 "pthread_cond_wait cv %p, mx %p: BLOCK",
2252 cond, mutex );
2253 print_pthread_event(tid, msg_buf);
2254 }
2255
2256 /* Release the mutex. */
2257 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
2258}
2259
2260
2261static
2262void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2263 Bool broadcast,
2264 pthread_cond_t *cond )
2265{
2266 Char msg_buf[100];
2267 Char* caller
2268 = broadcast ? "pthread_cond_broadcast"
2269 : "pthread_cond_signal ";
2270
2271 if (VG_(clo_trace_pthread_level) >= 2) {
2272 VG_(sprintf)(msg_buf, "%s cv %p ...",
2273 caller, cond );
2274 print_pthread_event(tid, msg_buf);
2275 }
2276
2277 /* Paranoia ... */
2278 vg_assert(is_valid_tid(tid)
2279 && vg_threads[tid].status == VgTs_Runnable);
2280
2281 if (cond == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002282 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002283 return;
2284 }
2285
2286 release_N_threads_waiting_on_cond (
2287 cond,
2288 broadcast ? VG_N_THREADS : 1,
2289 caller
2290 );
2291
sewardjc3bd5f52002-05-01 03:24:23 +00002292 SET_EDX(tid, 0); /* success */
sewardj3b5d8862002-04-20 13:53:23 +00002293}
2294
sewardj77e466c2002-04-14 02:29:29 +00002295
sewardj5f07b662002-04-23 16:52:51 +00002296/* -----------------------------------------------------------
2297 THREAD SPECIFIC DATA
2298 -------------------------------------------------------- */
2299
2300static __inline__
2301Bool is_valid_key ( ThreadKey k )
2302{
2303 /* k unsigned; hence no < 0 check */
2304 if (k >= VG_N_THREAD_KEYS) return False;
2305 if (!vg_thread_keys[k].inuse) return False;
2306 return True;
2307}
2308
2309static
2310void do_pthread_key_create ( ThreadId tid,
2311 pthread_key_t* key,
2312 void (*destructor)(void*) )
2313{
2314 Int i;
2315 Char msg_buf[100];
2316
2317 if (VG_(clo_trace_pthread_level) >= 1) {
2318 VG_(sprintf)(msg_buf, "pthread_key_create *key %p, destr %p",
2319 key, destructor );
2320 print_pthread_event(tid, msg_buf);
2321 }
2322
2323 vg_assert(sizeof(pthread_key_t) == sizeof(ThreadKey));
2324 vg_assert(is_valid_tid(tid)
2325 && vg_threads[tid].status == VgTs_Runnable);
2326
2327 for (i = 0; i < VG_N_THREAD_KEYS; i++)
2328 if (!vg_thread_keys[i].inuse)
2329 break;
2330
2331 if (i == VG_N_THREAD_KEYS) {
sewardjc3bd5f52002-05-01 03:24:23 +00002332 /* SET_EDX(tid, EAGAIN);
sewardj5f07b662002-04-23 16:52:51 +00002333 return;
2334 */
2335 VG_(panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;"
2336 " increase and recompile");
2337 }
2338
2339 vg_thread_keys[i].inuse = True;
sewardjc3bd5f52002-05-01 03:24:23 +00002340
sewardj5f07b662002-04-23 16:52:51 +00002341 /* TODO: check key for addressibility */
2342 *key = i;
sewardjc3bd5f52002-05-01 03:24:23 +00002343 if (VG_(clo_instrument))
2344 VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) );
2345
2346 SET_EDX(tid, 0);
sewardj5f07b662002-04-23 16:52:51 +00002347}
2348
2349
2350static
2351void do_pthread_key_delete ( ThreadId tid, pthread_key_t key )
2352{
2353 Char msg_buf[100];
2354 if (VG_(clo_trace_pthread_level) >= 1) {
2355 VG_(sprintf)(msg_buf, "pthread_key_delete key %d",
2356 key );
2357 print_pthread_event(tid, msg_buf);
2358 }
2359
2360 vg_assert(is_valid_tid(tid)
2361 && vg_threads[tid].status == VgTs_Runnable);
2362
2363 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002364 SET_EDX(tid, EINVAL);
sewardj5f07b662002-04-23 16:52:51 +00002365 return;
2366 }
2367
2368 vg_thread_keys[key].inuse = False;
2369
2370 /* Optional. We're not required to do this, although it shouldn't
2371 make any difference to programs which use the key/specifics
2372 functions correctly. */
sewardj3b13f0e2002-04-25 20:17:29 +00002373# if 1
sewardj5f07b662002-04-23 16:52:51 +00002374 for (tid = 1; tid < VG_N_THREADS; tid++) {
2375 if (vg_threads[tid].status != VgTs_Empty)
2376 vg_threads[tid].specifics[key] = NULL;
2377 }
sewardj3b13f0e2002-04-25 20:17:29 +00002378# endif
sewardj5f07b662002-04-23 16:52:51 +00002379}
2380
2381
2382static
2383void do_pthread_getspecific ( ThreadId tid, pthread_key_t key )
2384{
2385 Char msg_buf[100];
2386 if (VG_(clo_trace_pthread_level) >= 1) {
2387 VG_(sprintf)(msg_buf, "pthread_getspecific key %d",
2388 key );
2389 print_pthread_event(tid, msg_buf);
2390 }
2391
2392 vg_assert(is_valid_tid(tid)
2393 && vg_threads[tid].status == VgTs_Runnable);
2394
2395 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002396 SET_EDX(tid, (UInt)NULL);
sewardj5f07b662002-04-23 16:52:51 +00002397 return;
2398 }
2399
sewardjc3bd5f52002-05-01 03:24:23 +00002400 SET_EDX(tid, (UInt)vg_threads[tid].specifics[key]);
sewardj5f07b662002-04-23 16:52:51 +00002401}
2402
2403
2404static
2405void do_pthread_setspecific ( ThreadId tid,
2406 pthread_key_t key,
2407 void *pointer )
2408{
2409 Char msg_buf[100];
2410 if (VG_(clo_trace_pthread_level) >= 1) {
2411 VG_(sprintf)(msg_buf, "pthread_setspecific key %d, ptr %p",
2412 key, pointer );
2413 print_pthread_event(tid, msg_buf);
2414 }
2415
2416 vg_assert(is_valid_tid(tid)
2417 && vg_threads[tid].status == VgTs_Runnable);
2418
2419 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002420 SET_EDX(tid, EINVAL);
sewardj5f07b662002-04-23 16:52:51 +00002421 return;
2422 }
2423
2424 vg_threads[tid].specifics[key] = pointer;
sewardjc3bd5f52002-05-01 03:24:23 +00002425 SET_EDX(tid, 0);
sewardj5f07b662002-04-23 16:52:51 +00002426}
2427
2428
sewardje663cb92002-04-12 10:26:32 +00002429/* ---------------------------------------------------------------------
2430 Handle non-trivial client requests.
2431 ------------------------------------------------------------------ */
2432
2433static
2434void do_nontrivial_clientreq ( ThreadId tid )
2435{
2436 UInt* arg = (UInt*)(vg_threads[tid].m_eax);
2437 UInt req_no = arg[0];
2438 switch (req_no) {
2439
2440 case VG_USERREQ__PTHREAD_CREATE:
2441 do_pthread_create( tid,
2442 (pthread_t*)arg[1],
2443 (pthread_attr_t*)arg[2],
2444 (void*(*)(void*))arg[3],
2445 (void*)arg[4] );
2446 break;
2447
sewardjbc5b99f2002-04-13 00:08:51 +00002448 case VG_USERREQ__PTHREAD_RETURNS:
2449 handle_pthread_return( tid, (void*)arg[1] );
sewardje663cb92002-04-12 10:26:32 +00002450 break;
2451
2452 case VG_USERREQ__PTHREAD_JOIN:
2453 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2454 break;
2455
sewardje663cb92002-04-12 10:26:32 +00002456 case VG_USERREQ__PTHREAD_CANCEL:
2457 do_pthread_cancel( tid, (pthread_t)(arg[1]) );
2458 break;
2459
sewardj3b5d8862002-04-20 13:53:23 +00002460 case VG_USERREQ__PTHREAD_EXIT:
2461 do_pthread_exit( tid, (void*)(arg[1]) );
2462 break;
2463
2464 case VG_USERREQ__PTHREAD_COND_WAIT:
2465 do_pthread_cond_wait( tid,
2466 (pthread_cond_t *)(arg[1]),
sewardj5f07b662002-04-23 16:52:51 +00002467 (pthread_mutex_t *)(arg[2]),
2468 0xFFFFFFFF /* no timeout */ );
2469 break;
2470
2471 case VG_USERREQ__PTHREAD_COND_TIMEDWAIT:
2472 do_pthread_cond_wait( tid,
2473 (pthread_cond_t *)(arg[1]),
2474 (pthread_mutex_t *)(arg[2]),
2475 arg[3] /* timeout millisecond point */ );
sewardj3b5d8862002-04-20 13:53:23 +00002476 break;
2477
2478 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2479 do_pthread_cond_signal_or_broadcast(
2480 tid,
2481 False, /* signal, not broadcast */
2482 (pthread_cond_t *)(arg[1]) );
2483 break;
2484
2485 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2486 do_pthread_cond_signal_or_broadcast(
2487 tid,
2488 True, /* broadcast, not signal */
2489 (pthread_cond_t *)(arg[1]) );
2490 break;
2491
sewardj5f07b662002-04-23 16:52:51 +00002492 case VG_USERREQ__PTHREAD_KEY_CREATE:
2493 do_pthread_key_create ( tid,
2494 (pthread_key_t*)(arg[1]),
2495 (void(*)(void*))(arg[2]) );
2496 break;
2497
2498 case VG_USERREQ__PTHREAD_KEY_DELETE:
2499 do_pthread_key_delete ( tid,
2500 (pthread_key_t)(arg[1]) );
2501 break;
2502
sewardj5f07b662002-04-23 16:52:51 +00002503 case VG_USERREQ__PTHREAD_SETSPECIFIC:
2504 do_pthread_setspecific ( tid,
2505 (pthread_key_t)(arg[1]),
2506 (void*)(arg[2]) );
2507 break;
2508
sewardje663cb92002-04-12 10:26:32 +00002509 case VG_USERREQ__MAKE_NOACCESS:
2510 case VG_USERREQ__MAKE_WRITABLE:
2511 case VG_USERREQ__MAKE_READABLE:
2512 case VG_USERREQ__DISCARD:
2513 case VG_USERREQ__CHECK_WRITABLE:
2514 case VG_USERREQ__CHECK_READABLE:
2515 case VG_USERREQ__MAKE_NOACCESS_STACK:
2516 case VG_USERREQ__RUNNING_ON_VALGRIND:
2517 case VG_USERREQ__DO_LEAK_CHECK:
sewardjc3bd5f52002-05-01 03:24:23 +00002518 SET_EDX(
2519 tid,
2520 VG_(handle_client_request) ( &vg_threads[tid], arg )
2521 );
sewardje663cb92002-04-12 10:26:32 +00002522 break;
2523
sewardj77e466c2002-04-14 02:29:29 +00002524 case VG_USERREQ__SIGNAL_RETURNS:
2525 handle_signal_return(tid);
2526 break;
sewardj54cacf02002-04-12 23:24:59 +00002527
sewardje663cb92002-04-12 10:26:32 +00002528 default:
2529 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2530 VG_(panic)("handle_private_client_pthread_request: "
2531 "unknown request");
2532 /*NOTREACHED*/
2533 break;
2534 }
2535}
2536
2537
sewardj6072c362002-04-19 14:40:57 +00002538/* ---------------------------------------------------------------------
2539 Sanity checking.
2540 ------------------------------------------------------------------ */
2541
2542/* Internal consistency checks on the sched/pthread structures. */
2543static
2544void scheduler_sanity ( void )
2545{
sewardj3b5d8862002-04-20 13:53:23 +00002546 pthread_mutex_t* mx;
2547 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002548 Int i;
sewardj5f07b662002-04-23 16:52:51 +00002549
sewardj6072c362002-04-19 14:40:57 +00002550 /* VG_(printf)("scheduler_sanity\n"); */
2551 for (i = 1; i < VG_N_THREADS; i++) {
sewardj3b5d8862002-04-20 13:53:23 +00002552 mx = vg_threads[i].associated_mx;
2553 cv = vg_threads[i].associated_cv;
sewardj6072c362002-04-19 14:40:57 +00002554 if (vg_threads[i].status == VgTs_WaitMX) {
sewardjbf290b92002-05-01 02:28:01 +00002555 /* If we're waiting on a MX: (1) the mx is not null, (2, 3)
2556 it's actually held by someone, since otherwise this thread
2557 is deadlocked, (4) the mutex's owner is not us, since
2558 otherwise this thread is also deadlocked. The logic in
2559 do_pthread_mutex_lock rejects attempts by a thread to lock
2560 a (non-recursive) mutex which it already owns.
sewardj05553872002-04-20 20:53:17 +00002561
sewardjbf290b92002-05-01 02:28:01 +00002562 (2) has been seen to fail sometimes. I don't know why.
2563 Possibly to do with signals. */
sewardj3b5d8862002-04-20 13:53:23 +00002564 vg_assert(cv == NULL);
sewardj05553872002-04-20 20:53:17 +00002565 /* 1 */ vg_assert(mx != NULL);
2566 /* 2 */ vg_assert(mx->__m_count > 0);
2567 /* 3 */ vg_assert(is_valid_tid((ThreadId)mx->__m_owner));
2568 /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner);
sewardj3b5d8862002-04-20 13:53:23 +00002569 } else
2570 if (vg_threads[i].status == VgTs_WaitCV) {
2571 vg_assert(cv != NULL);
2572 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002573 } else {
sewardj05553872002-04-20 20:53:17 +00002574 /* Unfortunately these don't hold true when a sighandler is
2575 running. To be fixed. */
2576 /* vg_assert(cv == NULL); */
2577 /* vg_assert(mx == NULL); */
sewardj6072c362002-04-19 14:40:57 +00002578 }
sewardjbf290b92002-05-01 02:28:01 +00002579
2580 if (vg_threads[i].status != VgTs_Empty) {
2581 Int
2582 stack_used = (Addr)vg_threads[i].stack_highest_word
2583 - (Addr)vg_threads[i].m_esp;
2584 if (i > 1 /* not the root thread */
2585 && stack_used
2586 >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) {
2587 VG_(message)(Vg_UserMsg,
2588 "Warning: STACK OVERFLOW: "
2589 "thread %d: stack used %d, available %d",
2590 i, stack_used, VG_PTHREAD_STACK_MIN );
2591 VG_(message)(Vg_UserMsg,
2592 "Terminating Valgrind. If thread(s) "
2593 "really need more stack, increase");
2594 VG_(message)(Vg_UserMsg,
2595 "VG_PTHREAD_STACK_SIZE in vg_include.h and recompile.");
2596 VG_(exit)(1);
2597 }
2598 }
sewardj6072c362002-04-19 14:40:57 +00002599 }
sewardj5f07b662002-04-23 16:52:51 +00002600
2601 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
2602 if (!vg_thread_keys[i].inuse)
2603 vg_assert(vg_thread_keys[i].destructor == NULL);
2604 }
sewardj6072c362002-04-19 14:40:57 +00002605}
2606
2607
sewardje663cb92002-04-12 10:26:32 +00002608/*--------------------------------------------------------------------*/
2609/*--- end vg_scheduler.c ---*/
2610/*--------------------------------------------------------------------*/