blob: a2a5573bd44531c8b34eedf4a89a349132dd1067 [file] [log] [blame]
sewardje663cb92002-04-12 10:26:32 +00001
2/*--------------------------------------------------------------------*/
3/*--- A user-space pthreads implementation. vg_scheduler.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an x86 protected-mode emulator
8 designed for debugging and profiling binaries on x86-Unixes.
9
10 Copyright (C) 2000-2002 Julian Seward
11 jseward@acm.org
sewardje663cb92002-04-12 10:26:32 +000012
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file LICENSE.
29*/
30
31#include "vg_include.h"
32#include "vg_constants.h"
sewardje663cb92002-04-12 10:26:32 +000033#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
34 VG_USERREQ__DO_LEAK_CHECK */
35
sewardj705d3cb2002-05-23 13:13:12 +000036/* BORKAGE/ISSUES as of 23 May 02
sewardje663cb92002-04-12 10:26:32 +000037
sewardj77e466c2002-04-14 02:29:29 +000038- Currently, when a signal is run, just the ThreadStatus.status fields
39 are saved in the signal frame, along with the CPU state. Question:
40 should I also save and restore:
41 ThreadStatus.joiner
42 ThreadStatus.waited_on_mid
43 ThreadStatus.awaken_at
44 ThreadStatus.retval
45 Currently unsure, and so am not doing so.
sewardje663cb92002-04-12 10:26:32 +000046
sewardj77e466c2002-04-14 02:29:29 +000047- Signals interrupting read/write and nanosleep: SA_RESTART settings.
48 Read/write correctly return with EINTR when SA_RESTART isn't
49 specified and they are interrupted by a signal. nanosleep just
50 pretends signals don't exist -- should be fixed.
sewardje663cb92002-04-12 10:26:32 +000051
sewardj75fe1892002-04-14 02:46:33 +000052- Read/write syscall starts: don't crap out when the initial
53 nonblocking read/write returns an error.
sewardj8937c812002-04-12 20:12:20 +000054
sewardj705d3cb2002-05-23 13:13:12 +000055- So, what's the deal with signals and mutexes? If a thread is
sewardj6072c362002-04-19 14:40:57 +000056 blocked on a mutex, or for a condition variable for that matter, can
57 signals still be delivered to it? This has serious consequences --
58 deadlocks, etc.
59
sewardj705d3cb2002-05-23 13:13:12 +000060- Signals still not really right. Each thread should have its
61 own pending-set, but there is just one process-wide pending set.
62
sewardje462e202002-04-13 04:09:07 +000063*/
sewardje663cb92002-04-12 10:26:32 +000064
65
66/* ---------------------------------------------------------------------
67 Types and globals for the scheduler.
68 ------------------------------------------------------------------ */
69
70/* type ThreadId is defined in vg_include.h. */
71
72/* struct ThreadState is defined in vg_include.h. */
73
sewardj018f7622002-05-15 21:13:39 +000074/* Globals. A statically allocated array of threads. NOTE: [0] is
75 never used, to simplify the simulation of initialisers for
sewardj6072c362002-04-19 14:40:57 +000076 LinuxThreads. */
sewardj018f7622002-05-15 21:13:39 +000077ThreadState VG_(threads)[VG_N_THREADS];
sewardje663cb92002-04-12 10:26:32 +000078
sewardj1e8cdc92002-04-18 11:37:52 +000079/* The tid of the thread currently in VG_(baseBlock). */
80static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
81
sewardje663cb92002-04-12 10:26:32 +000082
83/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */
84jmp_buf VG_(scheduler_jmpbuf);
85/* ... and if so, here's the signal which caused it to do so. */
86Int VG_(longjmpd_on_signal);
87
88
89/* Machinery to keep track of which threads are waiting on which
90 fds. */
91typedef
92 struct {
93 /* The thread which made the request. */
94 ThreadId tid;
95
96 /* The next two fields describe the request. */
97 /* File descriptor waited for. -1 means this slot is not in use */
98 Int fd;
99 /* The syscall number the fd is used in. */
100 Int syscall_no;
101
102 /* False => still waiting for select to tell us the fd is ready
103 to go. True => the fd is ready, but the results have not yet
104 been delivered back to the calling thread. Once the latter
105 happens, this entire record is marked as no longer in use, by
106 making the fd field be -1. */
107 Bool ready;
108 }
109 VgWaitedOnFd;
110
111static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS];
112
113
sewardj5f07b662002-04-23 16:52:51 +0000114/* Keeping track of keys. */
115typedef
116 struct {
117 /* Has this key been allocated ? */
118 Bool inuse;
119 /* If .inuse==True, records the address of the associated
120 destructor, or NULL if none. */
121 void (*destructor)(void*);
122 }
123 ThreadKeyState;
124
125/* And our array of thread keys. */
126static ThreadKeyState vg_thread_keys[VG_N_THREAD_KEYS];
127
128typedef UInt ThreadKey;
129
130
sewardje663cb92002-04-12 10:26:32 +0000131/* Forwards */
sewardj5f07b662002-04-23 16:52:51 +0000132static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
133
sewardje663cb92002-04-12 10:26:32 +0000134static void do_nontrivial_clientreq ( ThreadId tid );
135
sewardj6072c362002-04-19 14:40:57 +0000136static void scheduler_sanity ( void );
137
sewardjd7fd4d22002-04-24 01:57:27 +0000138static void do_pthread_mutex_unlock ( ThreadId,
sewardj8ccc2be2002-05-10 20:26:37 +0000139 void* /* pthread_mutex_t* */ );
sewardjd7fd4d22002-04-24 01:57:27 +0000140static void do_pthread_mutex_lock ( ThreadId, Bool,
sewardj8ccc2be2002-05-10 20:26:37 +0000141 void* /* pthread_mutex_t* */ );
sewardjd7fd4d22002-04-24 01:57:27 +0000142
sewardj51c0aaf2002-04-25 01:32:10 +0000143static void do_pthread_getspecific ( ThreadId,
144 UInt /* pthread_key_t */ );
145
sewardje663cb92002-04-12 10:26:32 +0000146
147/* ---------------------------------------------------------------------
148 Helper functions for the scheduler.
149 ------------------------------------------------------------------ */
150
sewardjb48e5002002-05-13 00:16:03 +0000151__inline__
152Bool VG_(is_valid_tid) ( ThreadId tid )
sewardj604ec3c2002-04-18 22:38:41 +0000153{
154 /* tid is unsigned, hence no < 0 test. */
sewardj6072c362002-04-19 14:40:57 +0000155 if (tid == 0) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000156 if (tid >= VG_N_THREADS) return False;
sewardj018f7622002-05-15 21:13:39 +0000157 if (VG_(threads)[tid].status == VgTs_Empty) return False;
158 return True;
159}
160
161
162__inline__
163Bool VG_(is_valid_or_empty_tid) ( ThreadId tid )
164{
165 /* tid is unsigned, hence no < 0 test. */
166 if (tid == 0) return False;
167 if (tid >= VG_N_THREADS) return False;
sewardj604ec3c2002-04-18 22:38:41 +0000168 return True;
169}
170
171
sewardj1e8cdc92002-04-18 11:37:52 +0000172/* For constructing error messages only: try and identify a thread
173 whose stack this address currently falls within, or return
174 VG_INVALID_THREADID if it doesn't. A small complication is dealing
175 with any currently VG_(baseBlock)-resident thread.
176*/
177ThreadId VG_(identify_stack_addr)( Addr a )
178{
179 ThreadId tid, tid_to_skip;
180
181 tid_to_skip = VG_INVALID_THREADID;
182
183 /* First check to see if there's a currently-loaded thread in
184 VG_(baseBlock). */
185 if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
186 tid = vg_tid_currently_in_baseBlock;
187 if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
sewardj018f7622002-05-15 21:13:39 +0000188 && a <= VG_(threads)[tid].stack_highest_word)
sewardj1e8cdc92002-04-18 11:37:52 +0000189 return tid;
190 else
191 tid_to_skip = tid;
192 }
193
sewardj6072c362002-04-19 14:40:57 +0000194 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj018f7622002-05-15 21:13:39 +0000195 if (VG_(threads)[tid].status == VgTs_Empty) continue;
sewardj1e8cdc92002-04-18 11:37:52 +0000196 if (tid == tid_to_skip) continue;
sewardj018f7622002-05-15 21:13:39 +0000197 if (VG_(threads)[tid].m_esp <= a
198 && a <= VG_(threads)[tid].stack_highest_word)
sewardj1e8cdc92002-04-18 11:37:52 +0000199 return tid;
200 }
201 return VG_INVALID_THREADID;
202}
203
204
sewardj15a43e12002-04-17 19:35:12 +0000205/* Print the scheduler status. */
206void VG_(pp_sched_status) ( void )
sewardje663cb92002-04-12 10:26:32 +0000207{
208 Int i;
209 VG_(printf)("\nsched status:\n");
sewardj6072c362002-04-19 14:40:57 +0000210 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +0000211 if (VG_(threads)[i].status == VgTs_Empty) continue;
sewardj15a43e12002-04-17 19:35:12 +0000212 VG_(printf)("\nThread %d: status = ", i);
sewardj018f7622002-05-15 21:13:39 +0000213 switch (VG_(threads)[i].status) {
sewardj6072c362002-04-19 14:40:57 +0000214 case VgTs_Runnable: VG_(printf)("Runnable"); break;
215 case VgTs_WaitFD: VG_(printf)("WaitFD"); break;
sewardj20917d82002-05-28 01:36:45 +0000216 case VgTs_WaitJoinee: VG_(printf)("WaitJoinee(%d)",
217 VG_(threads)[i].joiner_jee_tid);
218 break;
219 case VgTs_WaitJoiner: VG_(printf)("WaitJoiner"); break;
sewardj6072c362002-04-19 14:40:57 +0000220 case VgTs_Sleeping: VG_(printf)("Sleeping"); break;
221 case VgTs_WaitMX: VG_(printf)("WaitMX"); break;
sewardj3b5d8862002-04-20 13:53:23 +0000222 case VgTs_WaitCV: VG_(printf)("WaitCV"); break;
sewardjb48e5002002-05-13 00:16:03 +0000223 case VgTs_WaitSIG: VG_(printf)("WaitSIG"); break;
sewardje663cb92002-04-12 10:26:32 +0000224 default: VG_(printf)("???"); break;
225 }
sewardj3b5d8862002-04-20 13:53:23 +0000226 VG_(printf)(", associated_mx = %p, associated_cv = %p\n",
sewardj018f7622002-05-15 21:13:39 +0000227 VG_(threads)[i].associated_mx,
228 VG_(threads)[i].associated_cv );
sewardj15a43e12002-04-17 19:35:12 +0000229 VG_(pp_ExeContext)(
sewardj018f7622002-05-15 21:13:39 +0000230 VG_(get_ExeContext)( False, VG_(threads)[i].m_eip,
231 VG_(threads)[i].m_ebp ));
sewardje663cb92002-04-12 10:26:32 +0000232 }
233 VG_(printf)("\n");
234}
235
236static
237void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
238{
239 Int i;
240
241 vg_assert(fd != -1); /* avoid total chaos */
242
243 for (i = 0; i < VG_N_WAITING_FDS; i++)
244 if (vg_waiting_fds[i].fd == -1)
245 break;
246
247 if (i == VG_N_WAITING_FDS)
248 VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low");
249 /*
250 VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n",
251 tid, fd, i);
252 */
253 vg_waiting_fds[i].fd = fd;
254 vg_waiting_fds[i].tid = tid;
255 vg_waiting_fds[i].ready = False;
256 vg_waiting_fds[i].syscall_no = syscall_no;
257}
258
259
260
261static
262void print_sched_event ( ThreadId tid, Char* what )
263{
sewardj45b4b372002-04-16 22:50:32 +0000264 VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what );
sewardj8937c812002-04-12 20:12:20 +0000265}
266
267
268static
269void print_pthread_event ( ThreadId tid, Char* what )
270{
271 VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what );
sewardje663cb92002-04-12 10:26:32 +0000272}
273
274
275static
276Char* name_of_sched_event ( UInt event )
277{
278 switch (event) {
sewardje663cb92002-04-12 10:26:32 +0000279 case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL";
280 case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ";
281 case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
282 case VG_TRC_INNER_FASTMISS: return "FASTMISS";
283 case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
284 default: return "??UNKNOWN??";
285 }
286}
287
288
289/* Create a translation of the client basic block beginning at
290 orig_addr, and add it to the translation cache & translation table.
291 This probably doesn't really belong here, but, hey ...
292*/
sewardj1e8cdc92002-04-18 11:37:52 +0000293static
294void create_translation_for ( ThreadId tid, Addr orig_addr )
sewardje663cb92002-04-12 10:26:32 +0000295{
296 Addr trans_addr;
297 TTEntry tte;
298 Int orig_size, trans_size;
299 /* Ensure there is space to hold a translation. */
300 VG_(maybe_do_lru_pass)();
sewardj018f7622002-05-15 21:13:39 +0000301 VG_(translate)( &VG_(threads)[tid],
sewardj1e8cdc92002-04-18 11:37:52 +0000302 orig_addr, &orig_size, &trans_addr, &trans_size );
sewardje663cb92002-04-12 10:26:32 +0000303 /* Copy data at trans_addr into the translation cache.
304 Returned pointer is to the code, not to the 4-byte
305 header. */
306 /* Since the .orig_size and .trans_size fields are
307 UShort, be paranoid. */
308 vg_assert(orig_size > 0 && orig_size < 65536);
309 vg_assert(trans_size > 0 && trans_size < 65536);
310 tte.orig_size = orig_size;
311 tte.orig_addr = orig_addr;
312 tte.trans_size = trans_size;
313 tte.trans_addr = VG_(copy_to_transcache)
314 ( trans_addr, trans_size );
315 tte.mru_epoch = VG_(current_epoch);
316 /* Free the intermediary -- was allocated by VG_(emit_code). */
317 VG_(jitfree)( (void*)trans_addr );
318 /* Add to trans tab and set back pointer. */
319 VG_(add_to_trans_tab) ( &tte );
320 /* Update stats. */
321 VG_(this_epoch_in_count) ++;
322 VG_(this_epoch_in_osize) += orig_size;
323 VG_(this_epoch_in_tsize) += trans_size;
324 VG_(overall_in_count) ++;
325 VG_(overall_in_osize) += orig_size;
326 VG_(overall_in_tsize) += trans_size;
sewardje663cb92002-04-12 10:26:32 +0000327}
328
329
330/* Allocate a completely empty ThreadState record. */
331static
332ThreadId vg_alloc_ThreadState ( void )
333{
334 Int i;
sewardj6072c362002-04-19 14:40:57 +0000335 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +0000336 if (VG_(threads)[i].status == VgTs_Empty)
sewardje663cb92002-04-12 10:26:32 +0000337 return i;
338 }
339 VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
340 VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
341 VG_(panic)("VG_N_THREADS is too low");
342 /*NOTREACHED*/
343}
344
345
sewardj1e8cdc92002-04-18 11:37:52 +0000346ThreadState* VG_(get_current_thread_state) ( void )
347{
sewardj018f7622002-05-15 21:13:39 +0000348 vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
349 return & VG_(threads)[vg_tid_currently_in_baseBlock];
sewardj1e8cdc92002-04-18 11:37:52 +0000350}
351
352
353ThreadId VG_(get_current_tid) ( void )
354{
sewardj018f7622002-05-15 21:13:39 +0000355 vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
sewardj1e8cdc92002-04-18 11:37:52 +0000356 return vg_tid_currently_in_baseBlock;
357}
358
359
sewardje663cb92002-04-12 10:26:32 +0000360/* Copy the saved state of a thread into VG_(baseBlock), ready for it
361 to be run. */
362__inline__
363void VG_(load_thread_state) ( ThreadId tid )
364{
365 Int i;
sewardj1e8cdc92002-04-18 11:37:52 +0000366 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
367
sewardj018f7622002-05-15 21:13:39 +0000368 VG_(baseBlock)[VGOFF_(m_eax)] = VG_(threads)[tid].m_eax;
369 VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(threads)[tid].m_ebx;
370 VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(threads)[tid].m_ecx;
371 VG_(baseBlock)[VGOFF_(m_edx)] = VG_(threads)[tid].m_edx;
372 VG_(baseBlock)[VGOFF_(m_esi)] = VG_(threads)[tid].m_esi;
373 VG_(baseBlock)[VGOFF_(m_edi)] = VG_(threads)[tid].m_edi;
374 VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(threads)[tid].m_ebp;
375 VG_(baseBlock)[VGOFF_(m_esp)] = VG_(threads)[tid].m_esp;
376 VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(threads)[tid].m_eflags;
377 VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip;
sewardje663cb92002-04-12 10:26:32 +0000378
379 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
sewardj018f7622002-05-15 21:13:39 +0000380 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
sewardje663cb92002-04-12 10:26:32 +0000381
sewardj018f7622002-05-15 21:13:39 +0000382 VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
383 VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
384 VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
385 VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
386 VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
387 VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
388 VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
389 VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
390 VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
sewardj1e8cdc92002-04-18 11:37:52 +0000391
392 vg_tid_currently_in_baseBlock = tid;
sewardje663cb92002-04-12 10:26:32 +0000393}
394
395
396/* Copy the state of a thread from VG_(baseBlock), presumably after it
397 has been descheduled. For sanity-check purposes, fill the vacated
398 VG_(baseBlock) with garbage so as to make the system more likely to
399 fail quickly if we erroneously continue to poke around inside
400 VG_(baseBlock) without first doing a load_thread_state().
401*/
402__inline__
403void VG_(save_thread_state) ( ThreadId tid )
404{
405 Int i;
406 const UInt junk = 0xDEADBEEF;
407
sewardj1e8cdc92002-04-18 11:37:52 +0000408 vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID);
409
sewardj018f7622002-05-15 21:13:39 +0000410 VG_(threads)[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)];
411 VG_(threads)[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)];
412 VG_(threads)[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)];
413 VG_(threads)[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)];
414 VG_(threads)[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)];
415 VG_(threads)[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)];
416 VG_(threads)[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
417 VG_(threads)[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
418 VG_(threads)[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)];
419 VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
sewardje663cb92002-04-12 10:26:32 +0000420
421 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
sewardj018f7622002-05-15 21:13:39 +0000422 VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
sewardje663cb92002-04-12 10:26:32 +0000423
sewardj018f7622002-05-15 21:13:39 +0000424 VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
425 VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
426 VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
427 VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
428 VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
429 VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
430 VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
431 VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
432 VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
sewardje663cb92002-04-12 10:26:32 +0000433
434 /* Fill it up with junk. */
435 VG_(baseBlock)[VGOFF_(m_eax)] = junk;
436 VG_(baseBlock)[VGOFF_(m_ebx)] = junk;
437 VG_(baseBlock)[VGOFF_(m_ecx)] = junk;
438 VG_(baseBlock)[VGOFF_(m_edx)] = junk;
439 VG_(baseBlock)[VGOFF_(m_esi)] = junk;
440 VG_(baseBlock)[VGOFF_(m_edi)] = junk;
441 VG_(baseBlock)[VGOFF_(m_ebp)] = junk;
442 VG_(baseBlock)[VGOFF_(m_esp)] = junk;
443 VG_(baseBlock)[VGOFF_(m_eflags)] = junk;
444 VG_(baseBlock)[VGOFF_(m_eip)] = junk;
445
446 for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
447 VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk;
sewardj1e8cdc92002-04-18 11:37:52 +0000448
449 vg_tid_currently_in_baseBlock = VG_INVALID_THREADID;
sewardje663cb92002-04-12 10:26:32 +0000450}
451
452
453/* Run the thread tid for a while, and return a VG_TRC_* value to the
454 scheduler indicating what happened. */
sewardj6072c362002-04-19 14:40:57 +0000455static
sewardje663cb92002-04-12 10:26:32 +0000456UInt run_thread_for_a_while ( ThreadId tid )
457{
sewardj7ccc5c22002-04-24 21:39:11 +0000458 volatile UInt trc = 0;
sewardjb48e5002002-05-13 00:16:03 +0000459 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +0000460 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000461 vg_assert(VG_(bbs_to_go) > 0);
462
sewardj671ff542002-05-07 09:25:30 +0000463 VGP_PUSHCC(VgpRun);
sewardje663cb92002-04-12 10:26:32 +0000464 VG_(load_thread_state) ( tid );
465 if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) {
466 /* try this ... */
467 trc = VG_(run_innerloop)();
468 /* We get here if the client didn't take a fault. */
469 } else {
470 /* We get here if the client took a fault, which caused our
471 signal handler to longjmp. */
472 vg_assert(trc == 0);
473 trc = VG_TRC_UNRESUMABLE_SIGNAL;
474 }
475 VG_(save_thread_state) ( tid );
sewardj671ff542002-05-07 09:25:30 +0000476 VGP_POPCC;
sewardje663cb92002-04-12 10:26:32 +0000477 return trc;
478}
479
480
481/* Increment the LRU epoch counter. */
482static
483void increment_epoch ( void )
484{
485 VG_(current_epoch)++;
486 if (VG_(clo_verbosity) > 2) {
487 UInt tt_used, tc_used;
488 VG_(get_tt_tc_used) ( &tt_used, &tc_used );
489 VG_(message)(Vg_UserMsg,
490 "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d",
491 VG_(bbs_done),
492 VG_(this_epoch_in_count),
493 VG_(this_epoch_in_osize),
494 VG_(this_epoch_in_tsize),
495 VG_(this_epoch_out_count),
496 VG_(this_epoch_out_osize),
497 VG_(this_epoch_out_tsize),
498 tt_used, tc_used
499 );
500 }
501 VG_(this_epoch_in_count) = 0;
502 VG_(this_epoch_in_osize) = 0;
503 VG_(this_epoch_in_tsize) = 0;
504 VG_(this_epoch_out_count) = 0;
505 VG_(this_epoch_out_osize) = 0;
506 VG_(this_epoch_out_tsize) = 0;
507}
508
509
sewardj20917d82002-05-28 01:36:45 +0000510static
511void mostly_clear_thread_record ( ThreadId tid )
512{
513 Int j;
514 vg_assert(tid >= 0 && tid < VG_N_THREADS);
515 VG_(threads)[tid].tid = tid;
516 VG_(threads)[tid].status = VgTs_Empty;
517 VG_(threads)[tid].associated_mx = NULL;
518 VG_(threads)[tid].associated_cv = NULL;
519 VG_(threads)[tid].awaken_at = 0;
520 VG_(threads)[tid].joinee_retval = NULL;
521 VG_(threads)[tid].joiner_thread_return = NULL;
522 VG_(threads)[tid].joiner_jee_tid = VG_INVALID_THREADID;
523 VG_(threads)[tid].cancel_st = True; /* PTHREAD_CANCEL_ENABLE */
524 VG_(threads)[tid].cancel_ty = True; /* PTHREAD_CANCEL_DEFERRED */
525 VG_(threads)[tid].cancel_pend = NULL; /* not pending */
526 VG_(threads)[tid].detached = False;
527 VG_(ksigemptyset)(&VG_(threads)[tid].sig_mask);
528 VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
529 for (j = 0; j < VG_N_THREAD_KEYS; j++)
530 VG_(threads)[tid].specifics[j] = NULL;
531}
532
533
sewardje663cb92002-04-12 10:26:32 +0000534/* Initialise the scheduler. Create a single "main" thread ready to
sewardj6072c362002-04-19 14:40:57 +0000535 run, with special ThreadId of one. This is called at startup; the
sewardje663cb92002-04-12 10:26:32 +0000536 caller takes care to park the client's state is parked in
537 VG_(baseBlock).
538*/
539void VG_(scheduler_init) ( void )
540{
541 Int i;
542 Addr startup_esp;
543 ThreadId tid_main;
544
545 startup_esp = VG_(baseBlock)[VGOFF_(m_esp)];
sewardja1679dd2002-05-10 22:31:40 +0000546
547 if (VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_1)
548 || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_2)) {
549 /* Jolly good! */
550 } else {
551 VG_(printf)("%%esp at startup = %p is not near %p or %p; aborting\n",
552 (void*)startup_esp,
553 (void*)VG_STARTUP_STACK_BASE_1,
554 (void*)VG_STARTUP_STACK_BASE_2 );
sewardje663cb92002-04-12 10:26:32 +0000555 VG_(panic)("unexpected %esp at startup");
556 }
557
sewardj6072c362002-04-19 14:40:57 +0000558 for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
sewardj20917d82002-05-28 01:36:45 +0000559 mostly_clear_thread_record(i);
560 VG_(threads)[i].stack_size = 0;
561 VG_(threads)[i].stack_base = (Addr)NULL;
562 VG_(threads)[i].stack_highest_word = (Addr)NULL;
sewardje663cb92002-04-12 10:26:32 +0000563 }
564
565 for (i = 0; i < VG_N_WAITING_FDS; i++)
566 vg_waiting_fds[i].fd = -1; /* not in use */
567
sewardj5f07b662002-04-23 16:52:51 +0000568 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
569 vg_thread_keys[i].inuse = False;
570 vg_thread_keys[i].destructor = NULL;
571 }
572
sewardje663cb92002-04-12 10:26:32 +0000573 /* Assert this is thread zero, which has certain magic
574 properties. */
575 tid_main = vg_alloc_ThreadState();
sewardj6072c362002-04-19 14:40:57 +0000576 vg_assert(tid_main == 1);
sewardj20917d82002-05-28 01:36:45 +0000577 VG_(threads)[tid_main].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +0000578
579 /* Copy VG_(baseBlock) state to tid_main's slot. */
sewardj1e8cdc92002-04-18 11:37:52 +0000580 vg_tid_currently_in_baseBlock = tid_main;
sewardje663cb92002-04-12 10:26:32 +0000581 VG_(save_thread_state) ( tid_main );
sewardj1e8cdc92002-04-18 11:37:52 +0000582
sewardj018f7622002-05-15 21:13:39 +0000583 VG_(threads)[tid_main].stack_highest_word
584 = VG_(threads)[tid_main].m_esp /* -4 ??? */;
sewardjbf290b92002-05-01 02:28:01 +0000585
sewardj1e8cdc92002-04-18 11:37:52 +0000586 /* So now ... */
587 vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
sewardje663cb92002-04-12 10:26:32 +0000588}
589
590
591/* What if fd isn't a valid fd? */
592static
593void set_fd_nonblocking ( Int fd )
594{
595 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
596 vg_assert(!VG_(is_kerror)(res));
597 res |= VKI_O_NONBLOCK;
598 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
599 vg_assert(!VG_(is_kerror)(res));
600}
601
602static
603void set_fd_blocking ( Int fd )
604{
605 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
606 vg_assert(!VG_(is_kerror)(res));
607 res &= ~VKI_O_NONBLOCK;
608 res = VG_(fcntl)( fd, VKI_F_SETFL, res );
609 vg_assert(!VG_(is_kerror)(res));
610}
611
612static
613Bool fd_is_blockful ( Int fd )
614{
615 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
616 vg_assert(!VG_(is_kerror)(res));
617 return (res & VKI_O_NONBLOCK) ? False : True;
618}
619
sewardj3947e622002-05-23 16:52:11 +0000620static
621Bool fd_is_valid ( Int fd )
622{
623 Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 );
624 return VG_(is_kerror)(res) ? False : True;
625}
626
sewardje663cb92002-04-12 10:26:32 +0000627
628
sewardjd7fd4d22002-04-24 01:57:27 +0000629/* Possibly do a for tid. Return values are:
sewardje663cb92002-04-12 10:26:32 +0000630
sewardjd7fd4d22002-04-24 01:57:27 +0000631 True = request done. Thread may or may not be still runnable;
632 caller must check. If it is still runnable, the result will be in
633 the thread's %EDX as expected.
634
635 False = request not done. A more capable but slower mechanism will
636 deal with it.
sewardje663cb92002-04-12 10:26:32 +0000637*/
sewardjd7fd4d22002-04-24 01:57:27 +0000638static
sewardje663cb92002-04-12 10:26:32 +0000639Bool maybe_do_trivial_clientreq ( ThreadId tid )
640{
641# define SIMPLE_RETURN(vvv) \
sewardj8c824512002-04-14 04:16:48 +0000642 { tst->m_edx = (vvv); \
sewardjc3bd5f52002-05-01 03:24:23 +0000643 tst->sh_edx = VGM_WORD_VALID; \
sewardje663cb92002-04-12 10:26:32 +0000644 return True; \
645 }
646
sewardj018f7622002-05-15 21:13:39 +0000647 ThreadState* tst = &VG_(threads)[tid];
sewardj8c824512002-04-14 04:16:48 +0000648 UInt* arg = (UInt*)(tst->m_eax);
649 UInt req_no = arg[0];
650
sewardj8ccc2be2002-05-10 20:26:37 +0000651 /* VG_(printf)("req no = 0x%x\n", req_no); */
sewardje663cb92002-04-12 10:26:32 +0000652 switch (req_no) {
653 case VG_USERREQ__MALLOC:
654 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000655 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc )
sewardje663cb92002-04-12 10:26:32 +0000656 );
657 case VG_USERREQ__BUILTIN_NEW:
658 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000659 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew )
sewardje663cb92002-04-12 10:26:32 +0000660 );
661 case VG_USERREQ__BUILTIN_VEC_NEW:
662 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000663 (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec )
sewardje663cb92002-04-12 10:26:32 +0000664 );
665 case VG_USERREQ__FREE:
sewardj8c824512002-04-14 04:16:48 +0000666 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc );
sewardje663cb92002-04-12 10:26:32 +0000667 SIMPLE_RETURN(0); /* irrelevant */
668 case VG_USERREQ__BUILTIN_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000669 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew );
sewardje663cb92002-04-12 10:26:32 +0000670 SIMPLE_RETURN(0); /* irrelevant */
671 case VG_USERREQ__BUILTIN_VEC_DELETE:
sewardj8c824512002-04-14 04:16:48 +0000672 VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec );
sewardje663cb92002-04-12 10:26:32 +0000673 SIMPLE_RETURN(0); /* irrelevant */
674 case VG_USERREQ__CALLOC:
675 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000676 (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000677 );
678 case VG_USERREQ__REALLOC:
679 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000680 (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000681 );
682 case VG_USERREQ__MEMALIGN:
683 SIMPLE_RETURN(
sewardj8c824512002-04-14 04:16:48 +0000684 (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] )
sewardje663cb92002-04-12 10:26:32 +0000685 );
sewardj9650c992002-04-16 03:44:31 +0000686
sewardj5f07b662002-04-23 16:52:51 +0000687 /* These are heavily used -- or at least we want them to be
688 cheap. */
sewardj9650c992002-04-16 03:44:31 +0000689 case VG_USERREQ__PTHREAD_GET_THREADID:
690 SIMPLE_RETURN(tid);
691 case VG_USERREQ__RUNNING_ON_VALGRIND:
692 SIMPLE_RETURN(1);
sewardj45b4b372002-04-16 22:50:32 +0000693 case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL:
694 SIMPLE_RETURN(VG_(clo_trace_pthread_level));
sewardj5f07b662002-04-23 16:52:51 +0000695 case VG_USERREQ__READ_MILLISECOND_TIMER:
696 SIMPLE_RETURN(VG_(read_millisecond_timer)());
sewardj9650c992002-04-16 03:44:31 +0000697
sewardjd7fd4d22002-04-24 01:57:27 +0000698 case VG_USERREQ__PTHREAD_MUTEX_UNLOCK:
699 do_pthread_mutex_unlock( tid, (void *)(arg[1]) );
700 return True;
701
702 /* This may make thread tid non-runnable, but the scheduler
703 checks for that on return from this function. */
704 case VG_USERREQ__PTHREAD_MUTEX_LOCK:
705 do_pthread_mutex_lock( tid, False, (void *)(arg[1]) );
706 return True;
707
sewardj14e03422002-04-24 19:51:31 +0000708 case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK:
709 do_pthread_mutex_lock( tid, True, (void *)(arg[1]) );
710 return True;
711
sewardj51c0aaf2002-04-25 01:32:10 +0000712 case VG_USERREQ__PTHREAD_GETSPECIFIC:
713 do_pthread_getspecific ( tid, (UInt)(arg[1]) );
714 return True;
715
sewardje663cb92002-04-12 10:26:32 +0000716 default:
717 /* Too hard; wimp out. */
718 return False;
719 }
720# undef SIMPLE_RETURN
721}
722
723
sewardj6072c362002-04-19 14:40:57 +0000724/* vthread tid is returning from a signal handler; modify its
725 stack/regs accordingly. */
sewardj1ffa8da2002-04-26 22:47:57 +0000726
727/* [Helper fn for handle_signal_return] tid, assumed to be in WaitFD
728 for read or write, has been interrupted by a signal. Find and
729 clear the relevant vg_waiting_fd[] entry. Most of the code in this
730 procedure is total paranoia, if you look closely. */
731static
732void cleanup_waiting_fd_table ( ThreadId tid )
733{
734 Int i, waiters;
735
sewardjb48e5002002-05-13 00:16:03 +0000736 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +0000737 vg_assert(VG_(threads)[tid].status == VgTs_WaitFD);
738 vg_assert(VG_(threads)[tid].m_eax == __NR_read
739 || VG_(threads)[tid].m_eax == __NR_write);
sewardj1ffa8da2002-04-26 22:47:57 +0000740
741 /* Excessively paranoidly ... find the fd this op was waiting
742 for, and mark it as not being waited on. */
743 waiters = 0;
744 for (i = 0; i < VG_N_WAITING_FDS; i++) {
745 if (vg_waiting_fds[i].tid == tid) {
746 waiters++;
sewardj018f7622002-05-15 21:13:39 +0000747 vg_assert(vg_waiting_fds[i].syscall_no == VG_(threads)[tid].m_eax);
sewardj1ffa8da2002-04-26 22:47:57 +0000748 }
749 }
750 vg_assert(waiters == 1);
751 for (i = 0; i < VG_N_WAITING_FDS; i++)
752 if (vg_waiting_fds[i].tid == tid)
753 break;
754 vg_assert(i < VG_N_WAITING_FDS);
755 vg_assert(vg_waiting_fds[i].fd != -1);
756 vg_waiting_fds[i].fd = -1; /* not in use */
757}
758
759
sewardj6072c362002-04-19 14:40:57 +0000760static
761void handle_signal_return ( ThreadId tid )
762{
763 Char msg_buf[100];
764 Bool restart_blocked_syscalls;
765
sewardjb48e5002002-05-13 00:16:03 +0000766 vg_assert(VG_(is_valid_tid)(tid));
sewardj6072c362002-04-19 14:40:57 +0000767
768 restart_blocked_syscalls = VG_(signal_returns)(tid);
769
770 if (restart_blocked_syscalls)
771 /* Easy; we don't have to do anything. */
772 return;
773
sewardj018f7622002-05-15 21:13:39 +0000774 if (VG_(threads)[tid].status == VgTs_WaitFD
775 && (VG_(threads)[tid].m_eax == __NR_read
776 || VG_(threads)[tid].m_eax == __NR_write)) {
sewardj6072c362002-04-19 14:40:57 +0000777 /* read() or write() interrupted. Force a return with EINTR. */
sewardj1ffa8da2002-04-26 22:47:57 +0000778 cleanup_waiting_fd_table(tid);
sewardj018f7622002-05-15 21:13:39 +0000779 VG_(threads)[tid].m_eax = -VKI_EINTR;
780 VG_(threads)[tid].status = VgTs_Runnable;
sewardj1ffa8da2002-04-26 22:47:57 +0000781
sewardj6072c362002-04-19 14:40:57 +0000782 if (VG_(clo_trace_sched)) {
783 VG_(sprintf)(msg_buf,
784 "read() / write() interrupted by signal; return EINTR" );
785 print_sched_event(tid, msg_buf);
786 }
787 return;
788 }
789
sewardj018f7622002-05-15 21:13:39 +0000790 if (VG_(threads)[tid].status == VgTs_WaitFD
791 && VG_(threads)[tid].m_eax == __NR_nanosleep) {
sewardj6072c362002-04-19 14:40:57 +0000792 /* We interrupted a nanosleep(). The right thing to do is to
793 write the unused time to nanosleep's second param and return
794 EINTR, but I'm too lazy for that. */
795 return;
796 }
797
sewardj018f7622002-05-15 21:13:39 +0000798 if (VG_(threads)[tid].status == VgTs_WaitFD) {
sewardj1ffa8da2002-04-26 22:47:57 +0000799 VG_(panic)("handle_signal_return: unknown interrupted syscall");
800 }
801
sewardj6072c362002-04-19 14:40:57 +0000802 /* All other cases? Just return. */
803}
804
805
sewardje663cb92002-04-12 10:26:32 +0000806static
807void sched_do_syscall ( ThreadId tid )
808{
809 UInt saved_eax;
810 UInt res, syscall_no;
811 UInt fd;
sewardje663cb92002-04-12 10:26:32 +0000812 Bool orig_fd_blockness;
813 Char msg_buf[100];
814
sewardjb48e5002002-05-13 00:16:03 +0000815 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +0000816 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000817
sewardj018f7622002-05-15 21:13:39 +0000818 syscall_no = VG_(threads)[tid].m_eax; /* syscall number */
sewardje663cb92002-04-12 10:26:32 +0000819
820 if (syscall_no == __NR_nanosleep) {
sewardj5f07b662002-04-23 16:52:51 +0000821 UInt t_now, t_awaken;
sewardje663cb92002-04-12 10:26:32 +0000822 struct vki_timespec* req;
sewardj018f7622002-05-15 21:13:39 +0000823 req = (struct vki_timespec*)VG_(threads)[tid].m_ebx; /* arg1 */
sewardj5f07b662002-04-23 16:52:51 +0000824 t_now = VG_(read_millisecond_timer)();
sewardje663cb92002-04-12 10:26:32 +0000825 t_awaken
826 = t_now
sewardj5f07b662002-04-23 16:52:51 +0000827 + (UInt)1000ULL * (UInt)(req->tv_sec)
828 + (UInt)(req->tv_nsec) / 1000000;
sewardj018f7622002-05-15 21:13:39 +0000829 VG_(threads)[tid].status = VgTs_Sleeping;
830 VG_(threads)[tid].awaken_at = t_awaken;
sewardj8937c812002-04-12 20:12:20 +0000831 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000832 VG_(sprintf)(msg_buf, "at %d: nanosleep for %d",
sewardje663cb92002-04-12 10:26:32 +0000833 t_now, t_awaken-t_now);
834 print_sched_event(tid, msg_buf);
835 }
836 /* Force the scheduler to run something else for a while. */
837 return;
838 }
839
sewardjaec22c02002-04-29 01:58:08 +0000840 if (syscall_no != __NR_read && syscall_no != __NR_write) {
sewardje663cb92002-04-12 10:26:32 +0000841 /* We think it's non-blocking. Just do it in the normal way. */
842 VG_(perform_assumed_nonblocking_syscall)(tid);
843 /* The thread is still runnable. */
844 return;
845 }
846
sewardje663cb92002-04-12 10:26:32 +0000847 /* Set the fd to nonblocking, and do the syscall, which will return
848 immediately, in order to lodge a request with the Linux kernel.
849 We later poll for I/O completion using select(). */
850
sewardj018f7622002-05-15 21:13:39 +0000851 fd = VG_(threads)[tid].m_ebx /* arg1 */;
sewardj3947e622002-05-23 16:52:11 +0000852
853 /* Deal with error case immediately. */
854 if (!fd_is_valid(fd)) {
855 VG_(message)(Vg_UserMsg,
856 "Warning: invalid file descriptor %d in syscall %s",
857 fd, syscall_no == __NR_read ? "read()" : "write()" );
858 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
859 KERNEL_DO_SYSCALL(tid, res);
860 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
861 /* We're still runnable. */
862 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
863 return;
864 }
865
866 /* From here onwards we know that fd is valid. */
867
sewardje663cb92002-04-12 10:26:32 +0000868 orig_fd_blockness = fd_is_blockful(fd);
869 set_fd_nonblocking(fd);
870 vg_assert(!fd_is_blockful(fd));
871 VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
872
873 /* This trashes the thread's %eax; we have to preserve it. */
sewardj018f7622002-05-15 21:13:39 +0000874 saved_eax = VG_(threads)[tid].m_eax;
sewardje663cb92002-04-12 10:26:32 +0000875 KERNEL_DO_SYSCALL(tid,res);
876
877 /* Restore original blockfulness of the fd. */
878 if (orig_fd_blockness)
879 set_fd_blocking(fd);
880 else
881 set_fd_nonblocking(fd);
882
sewardjaec22c02002-04-29 01:58:08 +0000883 if (res != -VKI_EWOULDBLOCK || !orig_fd_blockness) {
884 /* Finish off in the normal way. Don't restore %EAX, since that
885 now (correctly) holds the result of the call. We get here if either:
886 1. The call didn't block, or
887 2. The fd was already in nonblocking mode before we started to
888 mess with it. In this case, we're not expecting to handle
889 the I/O completion -- the client is. So don't file a
890 completion-wait entry.
891 */
sewardje663cb92002-04-12 10:26:32 +0000892 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
893 /* We're still runnable. */
sewardj018f7622002-05-15 21:13:39 +0000894 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +0000895
896 } else {
897
sewardjaec22c02002-04-29 01:58:08 +0000898 vg_assert(res == -VKI_EWOULDBLOCK && orig_fd_blockness);
899
sewardje663cb92002-04-12 10:26:32 +0000900 /* It would have blocked. First, restore %EAX to what it was
901 before our speculative call. */
sewardj018f7622002-05-15 21:13:39 +0000902 VG_(threads)[tid].m_eax = saved_eax;
sewardje663cb92002-04-12 10:26:32 +0000903 /* Put this fd in a table of fds on which we are waiting for
904 completion. The arguments for select() later are constructed
905 from this table. */
906 add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
907 /* Deschedule thread until an I/O completion happens. */
sewardj018f7622002-05-15 21:13:39 +0000908 VG_(threads)[tid].status = VgTs_WaitFD;
sewardj8937c812002-04-12 20:12:20 +0000909 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +0000910 VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd);
911 print_sched_event(tid, msg_buf);
912 }
913
914 }
915}
916
917
918/* Find out which of the fds in vg_waiting_fds are now ready to go, by
919 making enquiries with select(), and mark them as ready. We have to
920 wait for the requesting threads to fall into the the WaitFD state
921 before we can actually finally deliver the results, so this
922 procedure doesn't do that; complete_blocked_syscalls() does it.
923
924 It might seem odd that a thread which has done a blocking syscall
925 is not in WaitFD state; the way this can happen is if it initially
926 becomes WaitFD, but then a signal is delivered to it, so it becomes
927 Runnable for a while. In this case we have to wait for the
928 sighandler to return, whereupon the WaitFD state is resumed, and
929 only at that point can the I/O result be delivered to it. However,
930 this point may be long after the fd is actually ready.
931
932 So, poll_for_ready_fds() merely detects fds which are ready.
933 complete_blocked_syscalls() does the second half of the trick,
934 possibly much later: it delivers the results from ready fds to
935 threads in WaitFD state.
936*/
sewardj9a199dc2002-04-14 13:01:38 +0000937static
sewardje663cb92002-04-12 10:26:32 +0000938void poll_for_ready_fds ( void )
939{
940 vki_ksigset_t saved_procmask;
941 vki_fd_set readfds;
942 vki_fd_set writefds;
943 vki_fd_set exceptfds;
944 struct vki_timeval timeout;
945 Int fd, fd_max, i, n_ready, syscall_no, n_ok;
946 ThreadId tid;
947 Bool rd_ok, wr_ok, ex_ok;
948 Char msg_buf[100];
949
sewardje462e202002-04-13 04:09:07 +0000950 struct vki_timespec* rem;
sewardj5f07b662002-04-23 16:52:51 +0000951 UInt t_now;
sewardje462e202002-04-13 04:09:07 +0000952
sewardje663cb92002-04-12 10:26:32 +0000953 /* Awaken any sleeping threads whose sleep has expired. */
sewardj6072c362002-04-19 14:40:57 +0000954 for (tid = 1; tid < VG_N_THREADS; tid++)
sewardj018f7622002-05-15 21:13:39 +0000955 if (VG_(threads)[tid].status == VgTs_Sleeping)
sewardj853f55d2002-04-26 00:27:53 +0000956 break;
sewardj6072c362002-04-19 14:40:57 +0000957
sewardj5f07b662002-04-23 16:52:51 +0000958 /* Avoid pointless calls to VG_(read_millisecond_timer). */
sewardj6072c362002-04-19 14:40:57 +0000959 if (tid < VG_N_THREADS) {
sewardj5f07b662002-04-23 16:52:51 +0000960 t_now = VG_(read_millisecond_timer)();
sewardj6072c362002-04-19 14:40:57 +0000961 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj018f7622002-05-15 21:13:39 +0000962 if (VG_(threads)[tid].status != VgTs_Sleeping)
sewardj6072c362002-04-19 14:40:57 +0000963 continue;
sewardj018f7622002-05-15 21:13:39 +0000964 if (t_now >= VG_(threads)[tid].awaken_at) {
sewardj6072c362002-04-19 14:40:57 +0000965 /* Resume this thread. Set to zero the remaining-time
966 (second) arg of nanosleep, since it's used up all its
967 time. */
sewardj018f7622002-05-15 21:13:39 +0000968 vg_assert(VG_(threads)[tid].m_eax == __NR_nanosleep);
969 rem = (struct vki_timespec *)VG_(threads)[tid].m_ecx; /* arg2 */
sewardj6072c362002-04-19 14:40:57 +0000970 if (rem != NULL) {
971 rem->tv_sec = 0;
972 rem->tv_nsec = 0;
973 }
974 /* Make the syscall return 0 (success). */
sewardj018f7622002-05-15 21:13:39 +0000975 VG_(threads)[tid].m_eax = 0;
sewardj6072c362002-04-19 14:40:57 +0000976 /* Reschedule this thread. */
sewardj018f7622002-05-15 21:13:39 +0000977 VG_(threads)[tid].status = VgTs_Runnable;
sewardj6072c362002-04-19 14:40:57 +0000978 if (VG_(clo_trace_sched)) {
sewardj5f07b662002-04-23 16:52:51 +0000979 VG_(sprintf)(msg_buf, "at %d: nanosleep done",
sewardj6072c362002-04-19 14:40:57 +0000980 t_now);
981 print_sched_event(tid, msg_buf);
982 }
sewardje663cb92002-04-12 10:26:32 +0000983 }
984 }
985 }
sewardje663cb92002-04-12 10:26:32 +0000986
sewardje462e202002-04-13 04:09:07 +0000987 /* And look for threads waiting on file descriptors which are now
988 ready for I/O.*/
sewardje663cb92002-04-12 10:26:32 +0000989 timeout.tv_sec = 0;
990 timeout.tv_usec = 0;
991
992 VKI_FD_ZERO(&readfds);
993 VKI_FD_ZERO(&writefds);
994 VKI_FD_ZERO(&exceptfds);
995 fd_max = -1;
996 for (i = 0; i < VG_N_WAITING_FDS; i++) {
997 if (vg_waiting_fds[i].fd == -1 /* not in use */)
998 continue;
999 if (vg_waiting_fds[i].ready /* already ready? */)
1000 continue;
1001 fd = vg_waiting_fds[i].fd;
1002 /* VG_(printf)("adding QUERY for fd %d\n", fd); */
sewardje462e202002-04-13 04:09:07 +00001003 vg_assert(fd >= 0);
sewardje663cb92002-04-12 10:26:32 +00001004 if (fd > fd_max)
1005 fd_max = fd;
1006 tid = vg_waiting_fds[i].tid;
sewardjb48e5002002-05-13 00:16:03 +00001007 vg_assert(VG_(is_valid_tid)(tid));
sewardje663cb92002-04-12 10:26:32 +00001008 syscall_no = vg_waiting_fds[i].syscall_no;
1009 switch (syscall_no) {
sewardj3984b852002-05-12 03:00:17 +00001010 case __NR_read:
1011 /* In order to catch timeout events on fds which are
1012 readable and which have been ioctl(TCSETA)'d with a
1013 VTIMEout, we appear to need to ask if the fd is
1014 writable, for some reason. Ask me not why. Since this
1015 is strange and potentially troublesome we only do it if
1016 the user asks specially. */
sewardj8d365b52002-05-12 10:52:16 +00001017 if (VG_(strstr)(VG_(clo_weird_hacks), "ioctl-VTIME") != NULL)
sewardj3984b852002-05-12 03:00:17 +00001018 VKI_FD_SET(fd, &writefds);
sewardje663cb92002-04-12 10:26:32 +00001019 VKI_FD_SET(fd, &readfds); break;
1020 case __NR_write:
1021 VKI_FD_SET(fd, &writefds); break;
1022 default:
1023 VG_(panic)("poll_for_ready_fds: unexpected syscall");
1024 /*NOTREACHED*/
1025 break;
1026 }
1027 }
1028
sewardje462e202002-04-13 04:09:07 +00001029 /* Short cut: if no fds are waiting, give up now. */
1030 if (fd_max == -1)
1031 return;
1032
sewardje663cb92002-04-12 10:26:32 +00001033 /* BLOCK ALL SIGNALS. We don't want the complication of select()
1034 getting interrupted. */
1035 VG_(block_all_host_signals)( &saved_procmask );
1036
1037 n_ready = VG_(select)
1038 ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout);
1039 if (VG_(is_kerror)(n_ready)) {
1040 VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready);
1041 VG_(panic)("poll_for_ready_fds: select failed?!");
1042 /*NOTREACHED*/
1043 }
1044
1045 /* UNBLOCK ALL SIGNALS */
sewardj018f7622002-05-15 21:13:39 +00001046 VG_(restore_all_host_signals)( &saved_procmask );
sewardje663cb92002-04-12 10:26:32 +00001047
1048 /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */
1049
1050 if (n_ready == 0)
1051 return;
1052
1053 /* Inspect all the fds we know about, and handle any completions that
1054 have happened. */
1055 /*
1056 VG_(printf)("\n\n");
1057 for (fd = 0; fd < 100; fd++)
1058 if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) {
1059 VG_(printf)("X"); } else { VG_(printf)("."); };
1060 VG_(printf)("\n\nfd_max = %d\n", fd_max);
1061 */
1062
1063 for (fd = 0; fd <= fd_max; fd++) {
1064 rd_ok = VKI_FD_ISSET(fd, &readfds);
1065 wr_ok = VKI_FD_ISSET(fd, &writefds);
1066 ex_ok = VKI_FD_ISSET(fd, &exceptfds);
1067
1068 n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0);
1069 if (n_ok == 0)
1070 continue;
1071 if (n_ok > 1) {
1072 VG_(printf)("offending fd = %d\n", fd);
1073 VG_(panic)("poll_for_ready_fds: multiple events on fd");
1074 }
1075
1076 /* An I/O event completed for fd. Find the thread which
1077 requested this. */
1078 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1079 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1080 continue;
1081 if (vg_waiting_fds[i].fd == fd)
1082 break;
1083 }
1084
1085 /* And a bit more paranoia ... */
1086 vg_assert(i >= 0 && i < VG_N_WAITING_FDS);
1087
1088 /* Mark the fd as ready. */
1089 vg_assert(! vg_waiting_fds[i].ready);
1090 vg_waiting_fds[i].ready = True;
1091 }
1092}
1093
1094
1095/* See comment attached to poll_for_ready_fds() for explaination. */
sewardj9a199dc2002-04-14 13:01:38 +00001096static
sewardje663cb92002-04-12 10:26:32 +00001097void complete_blocked_syscalls ( void )
1098{
1099 Int fd, i, res, syscall_no;
1100 ThreadId tid;
1101 Char msg_buf[100];
1102
1103 /* Inspect all the outstanding fds we know about. */
1104
1105 for (i = 0; i < VG_N_WAITING_FDS; i++) {
1106 if (vg_waiting_fds[i].fd == -1 /* not in use */)
1107 continue;
1108 if (! vg_waiting_fds[i].ready)
1109 continue;
1110
1111 fd = vg_waiting_fds[i].fd;
1112 tid = vg_waiting_fds[i].tid;
sewardjb48e5002002-05-13 00:16:03 +00001113 vg_assert(VG_(is_valid_tid)(tid));
sewardje663cb92002-04-12 10:26:32 +00001114
1115 /* The thread actually has to be waiting for the I/O event it
1116 requested before we can deliver the result! */
sewardj018f7622002-05-15 21:13:39 +00001117 if (VG_(threads)[tid].status != VgTs_WaitFD)
sewardje663cb92002-04-12 10:26:32 +00001118 continue;
1119
1120 /* Ok, actually do it! We can safely use %EAX as the syscall
1121 number, because the speculative call made by
1122 sched_do_syscall() doesn't change %EAX in the case where the
1123 call would have blocked. */
1124
1125 syscall_no = vg_waiting_fds[i].syscall_no;
sewardj018f7622002-05-15 21:13:39 +00001126 vg_assert(syscall_no == VG_(threads)[tid].m_eax);
sewardje663cb92002-04-12 10:26:32 +00001127 KERNEL_DO_SYSCALL(tid,res);
1128 VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
1129
1130 /* Reschedule. */
sewardj018f7622002-05-15 21:13:39 +00001131 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001132 /* Mark slot as no longer in use. */
1133 vg_waiting_fds[i].fd = -1;
1134 /* pp_sched_status(); */
sewardj8937c812002-04-12 20:12:20 +00001135 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00001136 VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd);
1137 print_sched_event(tid, msg_buf);
1138 }
1139 }
1140}
1141
1142
1143static
sewardj5f07b662002-04-23 16:52:51 +00001144void check_for_pthread_cond_timedwait ( void )
1145{
sewardj51c0aaf2002-04-25 01:32:10 +00001146 Int i, now;
sewardj5f07b662002-04-23 16:52:51 +00001147 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00001148 if (VG_(threads)[i].status != VgTs_WaitCV)
sewardj5f07b662002-04-23 16:52:51 +00001149 continue;
sewardj018f7622002-05-15 21:13:39 +00001150 if (VG_(threads)[i].awaken_at == 0xFFFFFFFF /* no timeout */)
sewardj5f07b662002-04-23 16:52:51 +00001151 continue;
sewardj51c0aaf2002-04-25 01:32:10 +00001152 now = VG_(read_millisecond_timer)();
sewardj018f7622002-05-15 21:13:39 +00001153 if (now >= VG_(threads)[i].awaken_at) {
sewardj5f07b662002-04-23 16:52:51 +00001154 do_pthread_cond_timedwait_TIMEOUT(i);
sewardj51c0aaf2002-04-25 01:32:10 +00001155 }
sewardj5f07b662002-04-23 16:52:51 +00001156 }
1157}
1158
1159
1160static
sewardje663cb92002-04-12 10:26:32 +00001161void nanosleep_for_a_while ( void )
1162{
1163 Int res;
1164 struct vki_timespec req;
1165 struct vki_timespec rem;
1166 req.tv_sec = 0;
sewardj51c0aaf2002-04-25 01:32:10 +00001167 req.tv_nsec = 20 * 1000 * 1000;
sewardje663cb92002-04-12 10:26:32 +00001168 res = VG_(nanosleep)( &req, &rem );
sewardj5f07b662002-04-23 16:52:51 +00001169 vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
sewardje663cb92002-04-12 10:26:32 +00001170}
1171
1172
1173/* ---------------------------------------------------------------------
1174 The scheduler proper.
1175 ------------------------------------------------------------------ */
1176
1177/* Run user-space threads until either
1178 * Deadlock occurs
1179 * One thread asks to shutdown Valgrind
1180 * The specified number of basic blocks has gone by.
1181*/
1182VgSchedReturnCode VG_(scheduler) ( void )
1183{
1184 ThreadId tid, tid_next;
1185 UInt trc;
1186 UInt dispatch_ctr_SAVED;
sewardj51c0aaf2002-04-25 01:32:10 +00001187 Int request_code, done_this_time, n_in_bounded_wait;
sewardje663cb92002-04-12 10:26:32 +00001188 Char msg_buf[100];
1189 Addr trans_addr;
sewardj14e03422002-04-24 19:51:31 +00001190 Bool sigs_delivered;
sewardje663cb92002-04-12 10:26:32 +00001191
1192 /* For the LRU structures, records when the epoch began. */
1193 ULong lru_epoch_started_at = 0;
1194
1195 /* Start with the root thread. tid in general indicates the
1196 currently runnable/just-finished-running thread. */
sewardj7e87e382002-05-03 19:09:05 +00001197 VG_(last_run_tid) = tid = 1;
sewardje663cb92002-04-12 10:26:32 +00001198
1199 /* This is the top level scheduler loop. It falls into three
1200 phases. */
1201 while (True) {
1202
sewardj6072c362002-04-19 14:40:57 +00001203 /* ======================= Phase 0 of 3 =======================
1204 Be paranoid. Always a good idea. */
sewardjd7fd4d22002-04-24 01:57:27 +00001205 stage1:
sewardj6072c362002-04-19 14:40:57 +00001206 scheduler_sanity();
sewardj0c3b53f2002-05-01 01:58:35 +00001207 VG_(do_sanity_checks)( False );
sewardj6072c362002-04-19 14:40:57 +00001208
sewardje663cb92002-04-12 10:26:32 +00001209 /* ======================= Phase 1 of 3 =======================
1210 Handle I/O completions and signals. This may change the
1211 status of various threads. Then select a new thread to run,
1212 or declare deadlock, or sleep if there are no runnable
1213 threads but some are blocked on I/O. */
1214
1215 /* Age the LRU structures if an epoch has been completed. */
1216 if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) {
1217 lru_epoch_started_at = VG_(bbs_done);
1218 increment_epoch();
1219 }
1220
1221 /* Was a debug-stop requested? */
1222 if (VG_(bbs_to_go) == 0)
1223 goto debug_stop;
1224
1225 /* Do the following loop until a runnable thread is found, or
1226 deadlock is detected. */
1227 while (True) {
1228
1229 /* For stats purposes only. */
1230 VG_(num_scheduling_events_MAJOR) ++;
1231
1232 /* See if any I/O operations which we were waiting for have
1233 completed, and, if so, make runnable the relevant waiting
1234 threads. */
1235 poll_for_ready_fds();
1236 complete_blocked_syscalls();
sewardj5f07b662002-04-23 16:52:51 +00001237 check_for_pthread_cond_timedwait();
sewardje663cb92002-04-12 10:26:32 +00001238
1239 /* See if there are any signals which need to be delivered. If
1240 so, choose thread(s) to deliver them to, and build signal
1241 delivery frames on those thread(s) stacks. */
sewardj6072c362002-04-19 14:40:57 +00001242
1243 /* Be careful about delivering signals to a thread waiting
1244 for a mutex. In particular, when the handler is running,
1245 that thread is temporarily apparently-not-waiting for the
1246 mutex, so if it is unlocked by another thread whilst the
1247 handler is running, this thread is not informed. When the
1248 handler returns, the thread resumes waiting on the mutex,
1249 even if, as a result, it has missed the unlocking of it.
1250 Potential deadlock. This sounds all very strange, but the
1251 POSIX standard appears to require this behaviour. */
sewardjb48e5002002-05-13 00:16:03 +00001252 sigs_delivered = VG_(deliver_signals)();
sewardj14e03422002-04-24 19:51:31 +00001253 if (sigs_delivered)
sewardj0c3b53f2002-05-01 01:58:35 +00001254 VG_(do_sanity_checks)( False );
sewardje663cb92002-04-12 10:26:32 +00001255
1256 /* Try and find a thread (tid) to run. */
1257 tid_next = tid;
sewardj51c0aaf2002-04-25 01:32:10 +00001258 n_in_bounded_wait = 0;
sewardje663cb92002-04-12 10:26:32 +00001259 while (True) {
1260 tid_next++;
sewardj6072c362002-04-19 14:40:57 +00001261 if (tid_next >= VG_N_THREADS) tid_next = 1;
sewardj018f7622002-05-15 21:13:39 +00001262 if (VG_(threads)[tid_next].status == VgTs_WaitFD
1263 || VG_(threads)[tid_next].status == VgTs_Sleeping
1264 || VG_(threads)[tid_next].status == VgTs_WaitSIG
1265 || (VG_(threads)[tid_next].status == VgTs_WaitCV
1266 && VG_(threads)[tid_next].awaken_at != 0xFFFFFFFF))
sewardj51c0aaf2002-04-25 01:32:10 +00001267 n_in_bounded_wait ++;
sewardj018f7622002-05-15 21:13:39 +00001268 if (VG_(threads)[tid_next].status == VgTs_Runnable)
sewardje663cb92002-04-12 10:26:32 +00001269 break; /* We can run this one. */
1270 if (tid_next == tid)
1271 break; /* been all the way round */
1272 }
1273 tid = tid_next;
1274
sewardj018f7622002-05-15 21:13:39 +00001275 if (VG_(threads)[tid].status == VgTs_Runnable) {
sewardje663cb92002-04-12 10:26:32 +00001276 /* Found a suitable candidate. Fall out of this loop, so
1277 we can advance to stage 2 of the scheduler: actually
1278 running the thread. */
1279 break;
1280 }
1281
1282 /* We didn't find a runnable thread. Now what? */
sewardj51c0aaf2002-04-25 01:32:10 +00001283 if (n_in_bounded_wait == 0) {
sewardj54cacf02002-04-12 23:24:59 +00001284 /* No runnable threads and no prospect of any appearing
1285 even if we wait for an arbitrary length of time. In
1286 short, we have a deadlock. */
sewardj15a43e12002-04-17 19:35:12 +00001287 VG_(pp_sched_status)();
sewardje663cb92002-04-12 10:26:32 +00001288 return VgSrc_Deadlock;
1289 }
1290
1291 /* At least one thread is in a fd-wait state. Delay for a
1292 while, and go round again, in the hope that eventually a
1293 thread becomes runnable. */
1294 nanosleep_for_a_while();
sewardj7e87e382002-05-03 19:09:05 +00001295 /* pp_sched_status(); */
sewardjb48e5002002-05-13 00:16:03 +00001296 /* VG_(printf)("."); */
sewardje663cb92002-04-12 10:26:32 +00001297 }
1298
1299
1300 /* ======================= Phase 2 of 3 =======================
1301 Wahey! We've finally decided that thread tid is runnable, so
1302 we now do that. Run it for as much of a quanta as possible.
1303 Trivial requests are handled and the thread continues. The
1304 aim is not to do too many of Phase 1 since it is expensive. */
1305
1306 if (0)
sewardj3b5d8862002-04-20 13:53:23 +00001307 VG_(printf)("SCHED: tid %d\n", tid);
sewardje663cb92002-04-12 10:26:32 +00001308
1309 /* Figure out how many bbs to ask vg_run_innerloop to do. Note
1310 that it decrements the counter before testing it for zero, so
1311 that if VG_(dispatch_ctr) is set to N you get at most N-1
1312 iterations. Also this means that VG_(dispatch_ctr) must
1313 exceed zero before entering the innerloop. Also also, the
1314 decrement is done before the bb is actually run, so you
1315 always get at least one decrement even if nothing happens.
1316 */
1317 if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM)
1318 VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
1319 else
1320 VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1;
1321
1322 /* ... and remember what we asked for. */
1323 dispatch_ctr_SAVED = VG_(dispatch_ctr);
1324
sewardj1e8cdc92002-04-18 11:37:52 +00001325 /* paranoia ... */
sewardj018f7622002-05-15 21:13:39 +00001326 vg_assert(VG_(threads)[tid].tid == tid);
sewardj1e8cdc92002-04-18 11:37:52 +00001327
sewardje663cb92002-04-12 10:26:32 +00001328 /* Actually run thread tid. */
1329 while (True) {
1330
sewardj7e87e382002-05-03 19:09:05 +00001331 VG_(last_run_tid) = tid;
1332
sewardje663cb92002-04-12 10:26:32 +00001333 /* For stats purposes only. */
1334 VG_(num_scheduling_events_MINOR) ++;
1335
1336 if (0)
1337 VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
1338 tid, VG_(dispatch_ctr) - 1 );
sewardjb3eef6b2002-05-01 00:05:27 +00001339# if 0
1340 if (VG_(bbs_done) > 31700000 + 0) {
1341 dispatch_ctr_SAVED = VG_(dispatch_ctr) = 2;
sewardj018f7622002-05-15 21:13:39 +00001342 VG_(translate)(&VG_(threads)[tid], VG_(threads)[tid].m_eip,
sewardjb3eef6b2002-05-01 00:05:27 +00001343 NULL,NULL,NULL);
1344 }
sewardj018f7622002-05-15 21:13:39 +00001345 vg_assert(VG_(threads)[tid].m_eip != 0);
sewardjb3eef6b2002-05-01 00:05:27 +00001346# endif
sewardje663cb92002-04-12 10:26:32 +00001347
1348 trc = run_thread_for_a_while ( tid );
1349
sewardjb3eef6b2002-05-01 00:05:27 +00001350# if 0
sewardj018f7622002-05-15 21:13:39 +00001351 if (0 == VG_(threads)[tid].m_eip) {
sewardjb3eef6b2002-05-01 00:05:27 +00001352 VG_(printf)("tid = %d, dc = %llu\n", tid, VG_(bbs_done));
sewardj018f7622002-05-15 21:13:39 +00001353 vg_assert(0 != VG_(threads)[tid].m_eip);
sewardjb3eef6b2002-05-01 00:05:27 +00001354 }
1355# endif
1356
sewardje663cb92002-04-12 10:26:32 +00001357 /* Deal quickly with trivial scheduling events, and resume the
1358 thread. */
1359
1360 if (trc == VG_TRC_INNER_FASTMISS) {
1361 vg_assert(VG_(dispatch_ctr) > 0);
1362
1363 /* Trivial event. Miss in the fast-cache. Do a full
1364 lookup for it. */
1365 trans_addr
sewardj018f7622002-05-15 21:13:39 +00001366 = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001367 if (trans_addr == (Addr)0) {
1368 /* Not found; we need to request a translation. */
sewardj018f7622002-05-15 21:13:39 +00001369 create_translation_for( tid, VG_(threads)[tid].m_eip );
1370 trans_addr = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
sewardje663cb92002-04-12 10:26:32 +00001371 if (trans_addr == (Addr)0)
1372 VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
1373 }
1374 continue; /* with this thread */
1375 }
1376
1377 if (trc == VG_TRC_EBP_JMP_CLIENTREQ) {
sewardj8ccc2be2002-05-10 20:26:37 +00001378 Bool done;
1379 /* VG_(printf)("request 0x%x\n",
sewardj018f7622002-05-15 21:13:39 +00001380 *(UInt*)(VG_(threads)[tid].m_eax)); */
sewardj8ccc2be2002-05-10 20:26:37 +00001381 done = maybe_do_trivial_clientreq(tid);
sewardjd7fd4d22002-04-24 01:57:27 +00001382 if (done) {
1383 /* The request is done. We try and continue with the
1384 same thread if still runnable. If not, go back to
1385 Stage 1 to select a new thread to run. */
sewardj018f7622002-05-15 21:13:39 +00001386 if (VG_(threads)[tid].status == VgTs_Runnable)
sewardjd7fd4d22002-04-24 01:57:27 +00001387 continue; /* with this thread */
1388 else
1389 goto stage1;
sewardje663cb92002-04-12 10:26:32 +00001390 }
1391 }
1392
sewardj51c0aaf2002-04-25 01:32:10 +00001393 if (trc == VG_TRC_EBP_JMP_SYSCALL) {
1394 /* Do a syscall for the vthread tid. This could cause it
sewardj7e87e382002-05-03 19:09:05 +00001395 to become non-runnable. One special case: spot the
1396 client doing calls to exit() and take this as the cue
1397 to exit. */
sewardjb3eef6b2002-05-01 00:05:27 +00001398# if 0
1399 { UInt* esp; Int i;
sewardj018f7622002-05-15 21:13:39 +00001400 esp=(UInt*)VG_(threads)[tid].m_esp;
sewardjb3eef6b2002-05-01 00:05:27 +00001401 VG_(printf)("\nBEFORE\n");
1402 for (i = 10; i >= -10; i--)
1403 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1404 }
1405# endif
1406
sewardj83798bf2002-05-24 00:11:16 +00001407 /* Is the client exiting for good? */
sewardj018f7622002-05-15 21:13:39 +00001408 if (VG_(threads)[tid].m_eax == __NR_exit)
sewardj7e87e382002-05-03 19:09:05 +00001409 return VgSrc_ExitSyscall;
1410
sewardj83798bf2002-05-24 00:11:16 +00001411 /* Trap syscalls to __NR_sched_yield and just have this
1412 thread yield instead. Not essential, just an
1413 optimisation. */
1414 if (VG_(threads)[tid].m_eax == __NR_sched_yield) {
1415 SET_EAX(tid, 0); /* syscall returns with success */
1416 goto stage1; /* find a new thread to run */
1417 }
1418
sewardj51c0aaf2002-04-25 01:32:10 +00001419 sched_do_syscall(tid);
sewardjb3eef6b2002-05-01 00:05:27 +00001420
1421# if 0
1422 { UInt* esp; Int i;
sewardj018f7622002-05-15 21:13:39 +00001423 esp=(UInt*)VG_(threads)[tid].m_esp;
sewardjb3eef6b2002-05-01 00:05:27 +00001424 VG_(printf)("AFTER\n");
1425 for (i = 10; i >= -10; i--)
1426 VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]);
1427 }
1428# endif
1429
sewardj018f7622002-05-15 21:13:39 +00001430 if (VG_(threads)[tid].status == VgTs_Runnable)
sewardj51c0aaf2002-04-25 01:32:10 +00001431 continue; /* with this thread */
1432 else
1433 goto stage1;
1434 }
1435
sewardjd7fd4d22002-04-24 01:57:27 +00001436 /* It's an event we can't quickly deal with. Give up running
1437 this thread and handle things the expensive way. */
sewardje663cb92002-04-12 10:26:32 +00001438 break;
1439 }
1440
1441 /* ======================= Phase 3 of 3 =======================
1442 Handle non-trivial thread requests, mostly pthread stuff. */
1443
1444 /* Ok, we've fallen out of the dispatcher for a
1445 non-completely-trivial reason. First, update basic-block
1446 counters. */
1447
1448 done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1;
1449 vg_assert(done_this_time >= 0);
1450 VG_(bbs_to_go) -= (ULong)done_this_time;
1451 VG_(bbs_done) += (ULong)done_this_time;
1452
1453 if (0 && trc != VG_TRC_INNER_FASTMISS)
1454 VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
1455 tid, done_this_time, (Int)trc );
1456
1457 if (0 && trc != VG_TRC_INNER_FASTMISS)
1458 VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s",
1459 tid, VG_(bbs_done),
1460 name_of_sched_event(trc) );
sewardj9d1b5d32002-04-17 19:40:49 +00001461
sewardje663cb92002-04-12 10:26:32 +00001462 /* Examine the thread's return code to figure out why it
1463 stopped, and handle requests. */
1464
1465 switch (trc) {
1466
1467 case VG_TRC_INNER_FASTMISS:
1468 VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS");
1469 /*NOTREACHED*/
1470 break;
1471
1472 case VG_TRC_INNER_COUNTERZERO:
1473 /* Timeslice is out. Let a new thread be scheduled,
1474 simply by doing nothing, causing us to arrive back at
1475 Phase 1. */
1476 if (VG_(bbs_to_go) == 0) {
1477 goto debug_stop;
1478 }
1479 vg_assert(VG_(dispatch_ctr) == 0);
1480 break;
1481
1482 case VG_TRC_UNRESUMABLE_SIGNAL:
1483 /* It got a SIGSEGV/SIGBUS, which we need to deliver right
1484 away. Again, do nothing, so we wind up back at Phase
1485 1, whereupon the signal will be "delivered". */
1486 break;
1487
sewardje663cb92002-04-12 10:26:32 +00001488 case VG_TRC_EBP_JMP_CLIENTREQ:
1489 /* Do a client request for the vthread tid. Note that
1490 some requests will have been handled by
1491 maybe_do_trivial_clientreq(), so we don't expect to see
1492 those here.
1493 */
sewardj54cacf02002-04-12 23:24:59 +00001494 /* The thread's %EAX points at an arg block, the first
1495 word of which is the request code. */
sewardj018f7622002-05-15 21:13:39 +00001496 request_code = ((UInt*)(VG_(threads)[tid].m_eax))[0];
sewardje663cb92002-04-12 10:26:32 +00001497 if (0) {
sewardj54cacf02002-04-12 23:24:59 +00001498 VG_(sprintf)(msg_buf, "request 0x%x", request_code );
sewardje663cb92002-04-12 10:26:32 +00001499 print_sched_event(tid, msg_buf);
1500 }
1501 /* Do a non-trivial client request for thread tid. tid's
1502 %EAX points to a short vector of argument words, the
1503 first of which is the request code. The result of the
1504 request is put in tid's %EDX. Alternatively, perhaps
1505 the request causes tid to become non-runnable and/or
1506 other blocked threads become runnable. In general we
1507 can and often do mess with the state of arbitrary
1508 threads at this point. */
sewardj7e87e382002-05-03 19:09:05 +00001509 do_nontrivial_clientreq(tid);
sewardje663cb92002-04-12 10:26:32 +00001510 break;
1511
1512 default:
1513 VG_(printf)("\ntrc = %d\n", trc);
1514 VG_(panic)("VG_(scheduler), phase 3: "
1515 "unexpected thread return code");
1516 /* NOTREACHED */
1517 break;
1518
1519 } /* switch (trc) */
1520
1521 /* That completes Phase 3 of 3. Return now to the top of the
1522 main scheduler loop, to Phase 1 of 3. */
1523
1524 } /* top-level scheduler loop */
1525
1526
1527 /* NOTREACHED */
1528 VG_(panic)("scheduler: post-main-loop ?!");
1529 /* NOTREACHED */
1530
1531 debug_stop:
1532 /* If we exited because of a debug stop, print the translation
1533 of the last block executed -- by translating it again, and
1534 throwing away the result. */
1535 VG_(printf)(
1536 "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n");
sewardj018f7622002-05-15 21:13:39 +00001537 VG_(translate)( &VG_(threads)[tid],
1538 VG_(threads)[tid].m_eip, NULL, NULL, NULL );
sewardje663cb92002-04-12 10:26:32 +00001539 VG_(printf)("\n");
1540 VG_(printf)(
1541 "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n");
1542
1543 return VgSrc_BbsDone;
1544}
1545
1546
1547/* ---------------------------------------------------------------------
1548 The pthread implementation.
1549 ------------------------------------------------------------------ */
1550
1551#include <pthread.h>
1552#include <errno.h>
1553
sewardjbf290b92002-05-01 02:28:01 +00001554#define VG_PTHREAD_STACK_MIN \
sewardjc3bd5f52002-05-01 03:24:23 +00001555 (VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB)
sewardje663cb92002-04-12 10:26:32 +00001556
1557/* /usr/include/bits/pthreadtypes.h:
1558 typedef unsigned long int pthread_t;
1559*/
1560
sewardje663cb92002-04-12 10:26:32 +00001561
sewardj604ec3c2002-04-18 22:38:41 +00001562/* -----------------------------------------------------------
sewardj20917d82002-05-28 01:36:45 +00001563 Thread CREATION, JOINAGE and CANCELLATION: HELPER FNS
sewardj604ec3c2002-04-18 22:38:41 +00001564 -------------------------------------------------------- */
1565
sewardj20917d82002-05-28 01:36:45 +00001566/* We've decided to action a cancellation on tid. Make it jump to
1567 thread_exit_wrapper() in vg_libpthread.c, passing PTHREAD_CANCELED
1568 as the arg. */
1569static
1570void make_thread_jump_to_cancelhdlr ( ThreadId tid )
1571{
1572 Char msg_buf[100];
1573 vg_assert(VG_(is_valid_tid)(tid));
1574 /* Push PTHREAD_CANCELED on the stack and jump to the cancellation
1575 handler -- which is really thread_exit_wrapper() in
1576 vg_libpthread.c. */
1577 vg_assert(VG_(threads)[tid].cancel_pend != NULL);
1578 VG_(threads)[tid].m_esp -= 4;
1579 * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)PTHREAD_CANCELED;
1580 VG_(threads)[tid].m_eip = (UInt)VG_(threads)[tid].cancel_pend;
1581 VG_(threads)[tid].status = VgTs_Runnable;
1582 /* Make sure we aren't cancelled again whilst handling this
1583 cancellation. */
1584 VG_(threads)[tid].cancel_st = False;
1585 if (VG_(clo_trace_sched)) {
1586 VG_(sprintf)(msg_buf,
1587 "jump to cancellation handler (hdlr = %p)",
1588 VG_(threads)[tid].cancel_pend);
1589 print_sched_event(tid, msg_buf);
1590 }
1591}
1592
1593
1594
sewardjb48e5002002-05-13 00:16:03 +00001595/* Release resources and generally clean up once a thread has finally
1596 disappeared. */
1597static
1598void cleanup_after_thread_exited ( ThreadId tid )
1599{
sewardj3a951cf2002-05-15 22:25:47 +00001600 vki_ksigset_t irrelevant_sigmask;
sewardj018f7622002-05-15 21:13:39 +00001601 vg_assert(VG_(is_valid_or_empty_tid)(tid));
1602 vg_assert(VG_(threads)[tid].status == VgTs_Empty);
sewardjb48e5002002-05-13 00:16:03 +00001603 /* Mark its stack no-access */
1604 if (VG_(clo_instrument) && tid != 1)
sewardj018f7622002-05-15 21:13:39 +00001605 VGM_(make_noaccess)( VG_(threads)[tid].stack_base,
1606 VG_(threads)[tid].stack_size );
sewardjb48e5002002-05-13 00:16:03 +00001607 /* Forget about any pending signals directed specifically at this
sewardj018f7622002-05-15 21:13:39 +00001608 thread, and get rid of signal handlers specifically arranged for
1609 this thread. */
sewardj3a951cf2002-05-15 22:25:47 +00001610 VG_(block_all_host_signals)( &irrelevant_sigmask );
sewardj018f7622002-05-15 21:13:39 +00001611 VG_(handle_SCSS_change)( False /* lazy update */ );
sewardjb48e5002002-05-13 00:16:03 +00001612}
1613
1614
sewardj20917d82002-05-28 01:36:45 +00001615/* Look for matching pairs of threads waiting for joiners and threads
1616 waiting for joinees. For each such pair copy the return value of
1617 the joinee into the joiner, let the joiner resume and discard the
1618 joinee. */
1619static
1620void maybe_rendezvous_joiners_and_joinees ( void )
1621{
1622 Char msg_buf[100];
1623 void** thread_return;
1624 ThreadId jnr, jee;
1625
1626 for (jnr = 1; jnr < VG_N_THREADS; jnr++) {
1627 if (VG_(threads)[jnr].status != VgTs_WaitJoinee)
1628 continue;
1629 jee = VG_(threads)[jnr].joiner_jee_tid;
1630 if (jee == VG_INVALID_THREADID)
1631 continue;
1632 vg_assert(VG_(is_valid_tid)(jee));
1633 if (VG_(threads)[jee].status != VgTs_WaitJoiner)
1634 continue;
1635 /* ok! jnr is waiting to join with jee, and jee is waiting to be
1636 joined by ... well, any thread. So let's do it! */
1637
1638 /* Copy return value to where joiner wants it. */
1639 thread_return = VG_(threads)[jnr].joiner_thread_return;
1640 if (thread_return != NULL) {
1641 /* CHECK thread_return writable */
1642 *thread_return = VG_(threads)[jee].joinee_retval;
1643 /* Not really right, since it makes the thread's return value
1644 appear to be defined even if it isn't. */
1645 if (VG_(clo_instrument))
1646 VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
1647 }
1648
1649 /* Joinee is discarded */
1650 VG_(threads)[jee].status = VgTs_Empty; /* bye! */
1651 cleanup_after_thread_exited ( jee );
1652 if (VG_(clo_trace_sched)) {
1653 VG_(sprintf)(msg_buf,
1654 "rendezvous with joinee %d. %d resumes, %d exits.",
1655 jee, jnr, jee );
1656 print_sched_event(jnr, msg_buf);
1657 }
1658
1659 /* joiner returns with success */
1660 VG_(threads)[jnr].status = VgTs_Runnable;
1661 SET_EDX(jnr, 0);
1662 }
1663}
1664
1665
1666/* -----------------------------------------------------------
1667 Thread CREATION, JOINAGE and CANCELLATION: REQUESTS
1668 -------------------------------------------------------- */
1669
sewardje663cb92002-04-12 10:26:32 +00001670static
sewardjff42d1d2002-05-22 13:17:31 +00001671void do_pthread_yield ( ThreadId tid )
1672{
1673 Char msg_buf[100];
1674 vg_assert(VG_(is_valid_tid)(tid));
sewardjff42d1d2002-05-22 13:17:31 +00001675 if (VG_(clo_trace_sched)) {
1676 VG_(sprintf)(msg_buf, "yield");
1677 print_sched_event(tid, msg_buf);
1678 }
1679 SET_EDX(tid, 0);
1680}
1681
1682
1683static
sewardj20917d82002-05-28 01:36:45 +00001684void do__testcancel ( ThreadId tid )
sewardje663cb92002-04-12 10:26:32 +00001685{
sewardj7989d0c2002-05-28 11:00:01 +00001686 Char msg_buf[100];
sewardjb48e5002002-05-13 00:16:03 +00001687 vg_assert(VG_(is_valid_tid)(tid));
sewardj7989d0c2002-05-28 11:00:01 +00001688 if (VG_(clo_trace_sched)) {
1689 VG_(sprintf)(msg_buf, "testcancel");
1690 print_sched_event(tid, msg_buf);
1691 }
sewardj20917d82002-05-28 01:36:45 +00001692 if (/* is there a cancellation pending on this thread? */
1693 VG_(threads)[tid].cancel_pend != NULL
1694 && /* is this thread accepting cancellations? */
1695 VG_(threads)[tid].cancel_st) {
1696 /* Ok, let's do the cancellation. */
1697 make_thread_jump_to_cancelhdlr ( tid );
sewardje663cb92002-04-12 10:26:32 +00001698 } else {
sewardj20917d82002-05-28 01:36:45 +00001699 /* No, we keep going. */
1700 SET_EDX(tid, 0);
sewardje663cb92002-04-12 10:26:32 +00001701 }
sewardje663cb92002-04-12 10:26:32 +00001702}
1703
1704
1705static
sewardj20917d82002-05-28 01:36:45 +00001706void do__set_cancelstate ( ThreadId tid, Int state )
1707{
1708 Bool old_st;
sewardj7989d0c2002-05-28 11:00:01 +00001709 Char msg_buf[100];
sewardj20917d82002-05-28 01:36:45 +00001710 vg_assert(VG_(is_valid_tid)(tid));
sewardj7989d0c2002-05-28 11:00:01 +00001711 if (VG_(clo_trace_sched)) {
1712 VG_(sprintf)(msg_buf, "set_cancelstate to %d (%s)", state,
1713 state==PTHREAD_CANCEL_ENABLE
1714 ? "ENABLE"
1715 : (state==PTHREAD_CANCEL_DISABLE ? "DISABLE" : "???"));
1716 print_sched_event(tid, msg_buf);
1717 }
sewardj20917d82002-05-28 01:36:45 +00001718 old_st = VG_(threads)[tid].cancel_st;
1719 if (state == PTHREAD_CANCEL_ENABLE) {
1720 VG_(threads)[tid].cancel_st = True;
1721 } else
1722 if (state == PTHREAD_CANCEL_DISABLE) {
1723 VG_(threads)[tid].cancel_st = False;
1724 } else {
1725 VG_(panic)("do__set_cancelstate");
1726 }
1727 SET_EDX(tid, old_st ? PTHREAD_CANCEL_ENABLE
1728 : PTHREAD_CANCEL_DISABLE);
1729}
1730
1731
1732static
1733void do__set_canceltype ( ThreadId tid, Int type )
1734{
1735 Bool old_ty;
sewardj7989d0c2002-05-28 11:00:01 +00001736 Char msg_buf[100];
sewardj20917d82002-05-28 01:36:45 +00001737 vg_assert(VG_(is_valid_tid)(tid));
sewardj7989d0c2002-05-28 11:00:01 +00001738 if (VG_(clo_trace_sched)) {
1739 VG_(sprintf)(msg_buf, "set_canceltype to %d (%s)", type,
1740 type==PTHREAD_CANCEL_ASYNCHRONOUS
1741 ? "ASYNCHRONOUS"
1742 : (type==PTHREAD_CANCEL_DEFERRED ? "DEFERRED" : "???"));
1743 print_sched_event(tid, msg_buf);
1744 }
sewardj20917d82002-05-28 01:36:45 +00001745 old_ty = VG_(threads)[tid].cancel_ty;
1746 if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
1747 VG_(threads)[tid].cancel_ty = False;
1748 } else
1749 if (type == PTHREAD_CANCEL_DEFERRED) {
1750 VG_(threads)[tid].cancel_st = True;
1751 } else {
1752 VG_(panic)("do__set_canceltype");
1753 }
1754 SET_EDX(tid, old_ty ? PTHREAD_CANCEL_DEFERRED
1755 : PTHREAD_CANCEL_ASYNCHRONOUS);
1756}
1757
1758
sewardj7989d0c2002-05-28 11:00:01 +00001759/* Set or get the detach state for thread det. */
sewardj20917d82002-05-28 01:36:45 +00001760static
sewardj7989d0c2002-05-28 11:00:01 +00001761void do__set_or_get_detach ( ThreadId tid,
1762 Int what, ThreadId det )
sewardj20917d82002-05-28 01:36:45 +00001763{
sewardj7989d0c2002-05-28 11:00:01 +00001764 ThreadId i;
1765 Char msg_buf[100];
1766 /* VG_(printf)("do__set_or_get_detach tid %d what %d det %d\n",
1767 tid, what, det); */
sewardj20917d82002-05-28 01:36:45 +00001768 vg_assert(VG_(is_valid_tid)(tid));
sewardj7989d0c2002-05-28 11:00:01 +00001769 if (VG_(clo_trace_sched)) {
1770 VG_(sprintf)(msg_buf, "set_or_get_detach %d (%s) for tid %d", what,
1771 what==0 ? "not-detached" : (
1772 what==1 ? "detached" : (
1773 what==2 ? "fetch old value" : "???")),
1774 det );
1775 print_sched_event(tid, msg_buf);
1776 }
1777
1778 if (!VG_(is_valid_tid)(det)) {
1779 SET_EDX(tid, -1);
1780 return;
1781 }
1782
sewardj20917d82002-05-28 01:36:45 +00001783 switch (what) {
1784 case 2: /* get */
sewardj7989d0c2002-05-28 11:00:01 +00001785 SET_EDX(tid, VG_(threads)[det].detached ? 1 : 0);
sewardj20917d82002-05-28 01:36:45 +00001786 return;
sewardj7989d0c2002-05-28 11:00:01 +00001787 case 1: /* set detached. If someone is in a join-wait for det,
1788 do not detach. */
1789 for (i = 1; i < VG_N_THREADS; i++) {
1790 if (VG_(threads)[i].status == VgTs_WaitJoinee
1791 && VG_(threads)[i].joiner_jee_tid == det) {
1792 SET_EDX(tid, 0);
1793 if (VG_(clo_trace_sched)) {
1794 VG_(sprintf)(msg_buf,
1795 "tid %d not detached because %d in join-wait for it %d",
1796 det, i);
1797 print_sched_event(tid, msg_buf);
1798 }
1799 return;
1800 }
1801 }
1802 VG_(threads)[det].detached = True;
sewardj20917d82002-05-28 01:36:45 +00001803 SET_EDX(tid, 0);
1804 return;
1805 case 0: /* set not detached */
sewardj7989d0c2002-05-28 11:00:01 +00001806 VG_(threads)[det].detached = False;
sewardj20917d82002-05-28 01:36:45 +00001807 SET_EDX(tid, 0);
1808 return;
1809 default:
1810 VG_(panic)("do__set_or_get_detach");
1811 }
1812}
1813
1814
1815static
1816void do__set_cancelpend ( ThreadId tid,
1817 ThreadId cee,
1818 void (*cancelpend_hdlr)(void*) )
sewardje663cb92002-04-12 10:26:32 +00001819{
1820 Char msg_buf[100];
1821
sewardj20917d82002-05-28 01:36:45 +00001822 vg_assert(VG_(is_valid_tid)(tid));
1823 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
1824
sewardj7989d0c2002-05-28 11:00:01 +00001825 if (!VG_(is_valid_tid)(cee)) {
1826 if (VG_(clo_trace_sched)) {
1827 VG_(sprintf)(msg_buf,
1828 "set_cancelpend for invalid tid %d", cee);
1829 print_sched_event(tid, msg_buf);
1830 }
1831 SET_EDX(tid, -VKI_ESRCH);
1832 return;
1833 }
sewardj20917d82002-05-28 01:36:45 +00001834
1835 VG_(threads)[cee].cancel_pend = cancelpend_hdlr;
1836
1837 if (VG_(clo_trace_sched)) {
1838 VG_(sprintf)(msg_buf,
sewardj7989d0c2002-05-28 11:00:01 +00001839 "set_cancelpend (hdlr = %p, set by tid %d)",
sewardj20917d82002-05-28 01:36:45 +00001840 cancelpend_hdlr, tid);
1841 print_sched_event(cee, msg_buf);
1842 }
1843
1844 /* Thread doing the cancelling returns with success. */
1845 SET_EDX(tid, 0);
1846
1847 /* Perhaps we can nuke the cancellee right now? */
1848 do__testcancel(cee);
1849}
1850
1851
1852static
1853void do_pthread_join ( ThreadId tid,
1854 ThreadId jee, void** thread_return )
1855{
1856 Char msg_buf[100];
1857 ThreadId i;
sewardje663cb92002-04-12 10:26:32 +00001858 /* jee, the joinee, is the thread specified as an arg in thread
1859 tid's call to pthread_join. So tid is the join-er. */
sewardjb48e5002002-05-13 00:16:03 +00001860 vg_assert(VG_(is_valid_tid)(tid));
sewardj018f7622002-05-15 21:13:39 +00001861 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00001862
1863 if (jee == tid) {
sewardjc3bd5f52002-05-01 03:24:23 +00001864 SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */
sewardj018f7622002-05-15 21:13:39 +00001865 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001866 return;
1867 }
1868
sewardj20917d82002-05-28 01:36:45 +00001869 /* Flush any completed pairs, so as to make sure what we're looking
1870 at is up-to-date. */
1871 maybe_rendezvous_joiners_and_joinees();
1872
1873 /* Is this a sane request? */
sewardje663cb92002-04-12 10:26:32 +00001874 if (jee < 0
1875 || jee >= VG_N_THREADS
sewardj018f7622002-05-15 21:13:39 +00001876 || VG_(threads)[jee].status == VgTs_Empty) {
sewardje663cb92002-04-12 10:26:32 +00001877 /* Invalid thread to join to. */
sewardjc3bd5f52002-05-01 03:24:23 +00001878 SET_EDX(tid, EINVAL);
sewardj018f7622002-05-15 21:13:39 +00001879 VG_(threads)[tid].status = VgTs_Runnable;
sewardje663cb92002-04-12 10:26:32 +00001880 return;
1881 }
1882
sewardj20917d82002-05-28 01:36:45 +00001883 /* Is anyone else already in a join-wait for jee? */
1884 for (i = 1; i < VG_N_THREADS; i++) {
1885 if (i == tid) continue;
1886 if (VG_(threads)[i].status == VgTs_WaitJoinee
1887 && VG_(threads)[i].joiner_jee_tid == jee) {
1888 /* Someone already did join on this thread */
1889 SET_EDX(tid, EINVAL);
1890 VG_(threads)[tid].status = VgTs_Runnable;
1891 return;
1892 }
sewardje663cb92002-04-12 10:26:32 +00001893 }
1894
sewardj20917d82002-05-28 01:36:45 +00001895 /* Mark this thread as waiting for the joinee. */
sewardj018f7622002-05-15 21:13:39 +00001896 VG_(threads)[tid].status = VgTs_WaitJoinee;
sewardj20917d82002-05-28 01:36:45 +00001897 VG_(threads)[tid].joiner_thread_return = thread_return;
1898 VG_(threads)[tid].joiner_jee_tid = jee;
1899
1900 /* Look for matching joiners and joinees and do the right thing. */
1901 maybe_rendezvous_joiners_and_joinees();
1902
1903 /* Return value is irrelevant since this this thread becomes
1904 non-runnable. maybe_resume_joiner() will cause it to return the
1905 right value when it resumes. */
1906
sewardj8937c812002-04-12 20:12:20 +00001907 if (VG_(clo_trace_sched)) {
sewardj20917d82002-05-28 01:36:45 +00001908 VG_(sprintf)(msg_buf,
1909 "wait for joinee %d (may already be ready)", jee);
sewardje663cb92002-04-12 10:26:32 +00001910 print_sched_event(tid, msg_buf);
1911 }
sewardje663cb92002-04-12 10:26:32 +00001912}
1913
1914
sewardj20917d82002-05-28 01:36:45 +00001915/* ( void* ): calling thread waits for joiner and returns the void* to
1916 it. This is one of two ways in which a thread can finally exit --
1917 the other is do__quit. */
sewardje663cb92002-04-12 10:26:32 +00001918static
sewardj20917d82002-05-28 01:36:45 +00001919void do__wait_joiner ( ThreadId tid, void* retval )
sewardje663cb92002-04-12 10:26:32 +00001920{
sewardj20917d82002-05-28 01:36:45 +00001921 Char msg_buf[100];
1922 vg_assert(VG_(is_valid_tid)(tid));
1923 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
1924 if (VG_(clo_trace_sched)) {
1925 VG_(sprintf)(msg_buf,
sewardj7989d0c2002-05-28 11:00:01 +00001926 "do__wait_joiner(retval = %p) (non-detached thread exit)", retval);
sewardj20917d82002-05-28 01:36:45 +00001927 print_sched_event(tid, msg_buf);
1928 }
1929 VG_(threads)[tid].status = VgTs_WaitJoiner;
1930 VG_(threads)[tid].joinee_retval = retval;
1931 maybe_rendezvous_joiners_and_joinees();
1932}
1933
1934
1935/* ( no-args ): calling thread disappears from the system forever.
1936 Reclaim resources. */
1937static
1938void do__quit ( ThreadId tid )
1939{
1940 Char msg_buf[100];
1941 vg_assert(VG_(is_valid_tid)(tid));
1942 vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
1943 VG_(threads)[tid].status = VgTs_Empty; /* bye! */
1944 cleanup_after_thread_exited ( tid );
sewardj20917d82002-05-28 01:36:45 +00001945 if (VG_(clo_trace_sched)) {
sewardj7989d0c2002-05-28 11:00:01 +00001946 VG_(sprintf)(msg_buf, "do__quit (detached thread exit)");
sewardj20917d82002-05-28 01:36:45 +00001947 print_sched_event(tid, msg_buf);
1948 }
1949 /* Return value is irrelevant; this thread will not get
1950 rescheduled. */
1951}
1952
1953
1954/* Should never be entered. If it is, will be on the simulated
1955 CPU. */
1956static
1957void do__apply_in_new_thread_bogusRA ( void )
1958{
1959 VG_(panic)("do__apply_in_new_thread_bogusRA");
1960}
1961
1962/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it. Fn
1963 MUST NOT return -- ever. Eventually it will do either __QUIT or
1964 __WAIT_JOINER. Return the child tid to the parent. */
1965static
1966void do__apply_in_new_thread ( ThreadId parent_tid,
1967 void* (*fn)(void *),
1968 void* arg )
1969{
sewardje663cb92002-04-12 10:26:32 +00001970 Addr new_stack;
1971 UInt new_stk_szb;
1972 ThreadId tid;
1973 Char msg_buf[100];
1974
1975 /* Paranoia ... */
1976 vg_assert(sizeof(pthread_t) == sizeof(UInt));
1977
sewardj018f7622002-05-15 21:13:39 +00001978 vg_assert(VG_(threads)[parent_tid].status != VgTs_Empty);
sewardje663cb92002-04-12 10:26:32 +00001979
sewardj1e8cdc92002-04-18 11:37:52 +00001980 tid = vg_alloc_ThreadState();
sewardje663cb92002-04-12 10:26:32 +00001981
1982 /* If we've created the main thread's tid, we're in deep trouble :) */
sewardj6072c362002-04-19 14:40:57 +00001983 vg_assert(tid != 1);
sewardj018f7622002-05-15 21:13:39 +00001984 vg_assert(VG_(is_valid_or_empty_tid)(tid));
sewardje663cb92002-04-12 10:26:32 +00001985
1986 /* Copy the parent's CPU state into the child's, in a roundabout
1987 way (via baseBlock). */
1988 VG_(load_thread_state)(parent_tid);
1989 VG_(save_thread_state)(tid);
1990
1991 /* Consider allocating the child a stack, if the one it already has
1992 is inadequate. */
sewardjbf290b92002-05-01 02:28:01 +00001993 new_stk_szb = VG_PTHREAD_STACK_MIN;
sewardje663cb92002-04-12 10:26:32 +00001994
sewardj018f7622002-05-15 21:13:39 +00001995 if (new_stk_szb > VG_(threads)[tid].stack_size) {
sewardje663cb92002-04-12 10:26:32 +00001996 /* Again, for good measure :) We definitely don't want to be
1997 allocating a stack for the main thread. */
sewardj6072c362002-04-19 14:40:57 +00001998 vg_assert(tid != 1);
sewardje663cb92002-04-12 10:26:32 +00001999 /* for now, we don't handle the case of anything other than
2000 assigning it for the first time. */
sewardj018f7622002-05-15 21:13:39 +00002001 vg_assert(VG_(threads)[tid].stack_size == 0);
2002 vg_assert(VG_(threads)[tid].stack_base == (Addr)NULL);
sewardje663cb92002-04-12 10:26:32 +00002003 new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb );
sewardj018f7622002-05-15 21:13:39 +00002004 VG_(threads)[tid].stack_base = new_stack;
2005 VG_(threads)[tid].stack_size = new_stk_szb;
2006 VG_(threads)[tid].stack_highest_word
sewardje663cb92002-04-12 10:26:32 +00002007 = new_stack + new_stk_szb
sewardj1e8cdc92002-04-18 11:37:52 +00002008 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */;
sewardje663cb92002-04-12 10:26:32 +00002009 }
sewardj1e8cdc92002-04-18 11:37:52 +00002010
sewardj018f7622002-05-15 21:13:39 +00002011 VG_(threads)[tid].m_esp
2012 = VG_(threads)[tid].stack_base
2013 + VG_(threads)[tid].stack_size
sewardj1e8cdc92002-04-18 11:37:52 +00002014 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
2015
sewardje663cb92002-04-12 10:26:32 +00002016 if (VG_(clo_instrument))
sewardj018f7622002-05-15 21:13:39 +00002017 VGM_(make_noaccess)( VG_(threads)[tid].m_esp,
sewardje663cb92002-04-12 10:26:32 +00002018 VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
2019
2020 /* push arg */
sewardj018f7622002-05-15 21:13:39 +00002021 VG_(threads)[tid].m_esp -= 4;
2022 * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
sewardje663cb92002-04-12 10:26:32 +00002023
sewardj20917d82002-05-28 01:36:45 +00002024 /* push (bogus) return address */
sewardj018f7622002-05-15 21:13:39 +00002025 VG_(threads)[tid].m_esp -= 4;
sewardj20917d82002-05-28 01:36:45 +00002026 * (UInt*)(VG_(threads)[tid].m_esp)
2027 = (UInt)&do__apply_in_new_thread_bogusRA;
sewardje663cb92002-04-12 10:26:32 +00002028
2029 if (VG_(clo_instrument))
sewardj018f7622002-05-15 21:13:39 +00002030 VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
sewardje663cb92002-04-12 10:26:32 +00002031
2032 /* this is where we start */
sewardj20917d82002-05-28 01:36:45 +00002033 VG_(threads)[tid].m_eip = (UInt)fn;
sewardje663cb92002-04-12 10:26:32 +00002034
sewardj8937c812002-04-12 20:12:20 +00002035 if (VG_(clo_trace_sched)) {
sewardje663cb92002-04-12 10:26:32 +00002036 VG_(sprintf)(msg_buf,
2037 "new thread, created by %d", parent_tid );
2038 print_sched_event(tid, msg_buf);
2039 }
2040
sewardj20917d82002-05-28 01:36:45 +00002041 /* Create new thread with default attrs:
2042 deferred cancellation, not detached
2043 */
2044 mostly_clear_thread_record(tid);
2045 VG_(threads)[tid].status = VgTs_Runnable;
sewardj5f07b662002-04-23 16:52:51 +00002046
sewardj018f7622002-05-15 21:13:39 +00002047 /* We inherit our parent's signal mask. */
2048 VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask;
sewardj20917d82002-05-28 01:36:45 +00002049 VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for);
sewardjb48e5002002-05-13 00:16:03 +00002050
sewardj20917d82002-05-28 01:36:45 +00002051 /* return child's tid to parent */
2052 SET_EDX(parent_tid, tid); /* success */
sewardje663cb92002-04-12 10:26:32 +00002053}
2054
2055
sewardj604ec3c2002-04-18 22:38:41 +00002056/* -----------------------------------------------------------
2057 MUTEXes
2058 -------------------------------------------------------- */
2059
sewardj604ec3c2002-04-18 22:38:41 +00002060/* pthread_mutex_t is a struct with at 5 words:
sewardje663cb92002-04-12 10:26:32 +00002061 typedef struct
2062 {
2063 int __m_reserved; -- Reserved for future use
2064 int __m_count; -- Depth of recursive locking
2065 _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck)
2066 int __m_kind; -- Mutex kind: fast, recursive or errcheck
2067 struct _pthread_fastlock __m_lock; -- Underlying fast lock
2068 } pthread_mutex_t;
sewardj604ec3c2002-04-18 22:38:41 +00002069
sewardj6072c362002-04-19 14:40:57 +00002070 #define PTHREAD_MUTEX_INITIALIZER \
2071 {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER}
2072 # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \
2073 {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER}
2074 # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \
2075 {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER}
2076 # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \
2077 {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER}
sewardj604ec3c2002-04-18 22:38:41 +00002078
sewardj6072c362002-04-19 14:40:57 +00002079 How we use it:
sewardj604ec3c2002-04-18 22:38:41 +00002080
sewardj6072c362002-04-19 14:40:57 +00002081 __m_kind never changes and indicates whether or not it is recursive.
2082
2083 __m_count indicates the lock count; if 0, the mutex is not owned by
2084 anybody.
2085
2086 __m_owner has a ThreadId value stuffed into it. We carefully arrange
2087 that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that
2088 statically initialised mutexes correctly appear
2089 to belong to nobody.
2090
2091 In summary, a not-in-use mutex is distinguised by having __m_owner
2092 == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those
2093 conditions holds, the other should too.
2094
2095 There is no linked list of threads waiting for this mutex. Instead
2096 a thread in WaitMX state points at the mutex with its waited_on_mx
2097 field. This makes _unlock() inefficient, but simple to implement the
2098 right semantics viz-a-viz signals.
sewardje663cb92002-04-12 10:26:32 +00002099
sewardj604ec3c2002-04-18 22:38:41 +00002100 We don't have to deal with mutex initialisation; the client side
sewardj6072c362002-04-19 14:40:57 +00002101 deals with that for us.
2102*/
sewardje663cb92002-04-12 10:26:32 +00002103
sewardj3b5d8862002-04-20 13:53:23 +00002104/* Helper fns ... */
2105static
2106void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex,
2107 Char* caller )
2108{
2109 Int i;
2110 Char msg_buf[100];
2111
2112 /* Find some arbitrary thread waiting on this mutex, and make it
2113 runnable. If none are waiting, mark the mutex as not held. */
2114 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00002115 if (VG_(threads)[i].status == VgTs_Empty)
sewardj3b5d8862002-04-20 13:53:23 +00002116 continue;
sewardj018f7622002-05-15 21:13:39 +00002117 if (VG_(threads)[i].status == VgTs_WaitMX
2118 && VG_(threads)[i].associated_mx == mutex)
sewardj3b5d8862002-04-20 13:53:23 +00002119 break;
2120 }
2121
2122 vg_assert(i <= VG_N_THREADS);
2123 if (i == VG_N_THREADS) {
2124 /* Nobody else is waiting on it. */
2125 mutex->__m_count = 0;
2126 mutex->__m_owner = VG_INVALID_THREADID;
2127 } else {
2128 /* Notionally transfer the hold to thread i, whose
2129 pthread_mutex_lock() call now returns with 0 (success). */
2130 /* The .count is already == 1. */
sewardj018f7622002-05-15 21:13:39 +00002131 vg_assert(VG_(threads)[i].associated_mx == mutex);
sewardj3b5d8862002-04-20 13:53:23 +00002132 mutex->__m_owner = (_pthread_descr)i;
sewardj018f7622002-05-15 21:13:39 +00002133 VG_(threads)[i].status = VgTs_Runnable;
2134 VG_(threads)[i].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00002135 /* m_edx already holds pth_mx_lock() success (0) */
sewardj3b5d8862002-04-20 13:53:23 +00002136
2137 if (VG_(clo_trace_pthread_level) >= 1) {
2138 VG_(sprintf)(msg_buf, "%s mx %p: RESUME",
2139 caller, mutex );
2140 print_pthread_event(i, msg_buf);
2141 }
2142 }
2143}
2144
sewardje663cb92002-04-12 10:26:32 +00002145
2146static
sewardj30671ff2002-04-21 00:13:57 +00002147void do_pthread_mutex_lock( ThreadId tid,
2148 Bool is_trylock,
sewardjd7fd4d22002-04-24 01:57:27 +00002149 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00002150{
sewardj30671ff2002-04-21 00:13:57 +00002151 Char msg_buf[100];
2152 Char* caller
sewardj8ccc2be2002-05-10 20:26:37 +00002153 = is_trylock ? "pthread_mutex_trylock"
2154 : "pthread_mutex_lock ";
sewardje663cb92002-04-12 10:26:32 +00002155
sewardjd7fd4d22002-04-24 01:57:27 +00002156 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
2157
sewardj604ec3c2002-04-18 22:38:41 +00002158 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj30671ff2002-04-21 00:13:57 +00002159 VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex );
sewardj604ec3c2002-04-18 22:38:41 +00002160 print_pthread_event(tid, msg_buf);
2161 }
2162
2163 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002164 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002165 && VG_(threads)[tid].status == VgTs_Runnable);
sewardje663cb92002-04-12 10:26:32 +00002166
2167 /* POSIX doesn't mandate this, but for sanity ... */
2168 if (mutex == NULL) {
sewardj8e651d72002-05-10 21:00:19 +00002169 /* VG_(printf)("NULL mutex\n"); */
sewardjc3bd5f52002-05-01 03:24:23 +00002170 SET_EDX(tid, EINVAL);
sewardje663cb92002-04-12 10:26:32 +00002171 return;
2172 }
2173
sewardj604ec3c2002-04-18 22:38:41 +00002174 /* More paranoia ... */
2175 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002176# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00002177 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002178 case PTHREAD_MUTEX_ADAPTIVE_NP:
2179# endif
sewardja1679dd2002-05-10 22:31:40 +00002180# ifdef GLIBC_2_1
sewardj8e651d72002-05-10 21:00:19 +00002181 case PTHREAD_MUTEX_FAST_NP:
sewardja1679dd2002-05-10 22:31:40 +00002182# endif
sewardj604ec3c2002-04-18 22:38:41 +00002183 case PTHREAD_MUTEX_RECURSIVE_NP:
2184 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00002185 if (mutex->__m_count >= 0) break;
2186 /* else fall thru */
2187 default:
sewardj8e651d72002-05-10 21:00:19 +00002188 /* VG_(printf)("unknown __m_kind %d in mutex\n", mutex->__m_kind); */
sewardjc3bd5f52002-05-01 03:24:23 +00002189 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002190 return;
sewardje663cb92002-04-12 10:26:32 +00002191 }
2192
sewardj604ec3c2002-04-18 22:38:41 +00002193 if (mutex->__m_count > 0) {
sewardje663cb92002-04-12 10:26:32 +00002194
sewardjb48e5002002-05-13 00:16:03 +00002195 vg_assert(VG_(is_valid_tid)((ThreadId)mutex->__m_owner));
sewardjf8f819e2002-04-17 23:21:37 +00002196
2197 /* Someone has it already. */
sewardj604ec3c2002-04-18 22:38:41 +00002198 if ((ThreadId)mutex->__m_owner == tid) {
sewardjf8f819e2002-04-17 23:21:37 +00002199 /* It's locked -- by me! */
sewardj604ec3c2002-04-18 22:38:41 +00002200 if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
sewardjf8f819e2002-04-17 23:21:37 +00002201 /* return 0 (success). */
sewardj604ec3c2002-04-18 22:38:41 +00002202 mutex->__m_count++;
sewardjc3bd5f52002-05-01 03:24:23 +00002203 SET_EDX(tid, 0);
sewardj853f55d2002-04-26 00:27:53 +00002204 if (0)
2205 VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n",
2206 tid, mutex, mutex->__m_count);
sewardjf8f819e2002-04-17 23:21:37 +00002207 return;
2208 } else {
sewardj30671ff2002-04-21 00:13:57 +00002209 if (is_trylock)
sewardjc3bd5f52002-05-01 03:24:23 +00002210 SET_EDX(tid, EBUSY);
sewardj30671ff2002-04-21 00:13:57 +00002211 else
sewardjc3bd5f52002-05-01 03:24:23 +00002212 SET_EDX(tid, EDEADLK);
sewardjf8f819e2002-04-17 23:21:37 +00002213 return;
2214 }
2215 } else {
sewardj6072c362002-04-19 14:40:57 +00002216 /* Someone else has it; we have to wait. Mark ourselves
2217 thusly. */
sewardj05553872002-04-20 20:53:17 +00002218 /* GUARD: __m_count > 0 && __m_owner is valid */
sewardj30671ff2002-04-21 00:13:57 +00002219 if (is_trylock) {
2220 /* caller is polling; so return immediately. */
sewardjc3bd5f52002-05-01 03:24:23 +00002221 SET_EDX(tid, EBUSY);
sewardj30671ff2002-04-21 00:13:57 +00002222 } else {
sewardj018f7622002-05-15 21:13:39 +00002223 VG_(threads)[tid].status = VgTs_WaitMX;
2224 VG_(threads)[tid].associated_mx = mutex;
sewardjc3bd5f52002-05-01 03:24:23 +00002225 SET_EDX(tid, 0); /* pth_mx_lock success value */
sewardj30671ff2002-04-21 00:13:57 +00002226 if (VG_(clo_trace_pthread_level) >= 1) {
2227 VG_(sprintf)(msg_buf, "%s mx %p: BLOCK",
2228 caller, mutex );
2229 print_pthread_event(tid, msg_buf);
2230 }
2231 }
sewardje663cb92002-04-12 10:26:32 +00002232 return;
2233 }
sewardjf8f819e2002-04-17 23:21:37 +00002234
sewardje663cb92002-04-12 10:26:32 +00002235 } else {
sewardj6072c362002-04-19 14:40:57 +00002236 /* Nobody owns it. Sanity check ... */
2237 vg_assert(mutex->__m_owner == VG_INVALID_THREADID);
sewardjf8f819e2002-04-17 23:21:37 +00002238 /* We get it! [for the first time]. */
sewardj604ec3c2002-04-18 22:38:41 +00002239 mutex->__m_count = 1;
2240 mutex->__m_owner = (_pthread_descr)tid;
sewardj018f7622002-05-15 21:13:39 +00002241 vg_assert(VG_(threads)[tid].associated_mx == NULL);
sewardje663cb92002-04-12 10:26:32 +00002242 /* return 0 (success). */
sewardjc3bd5f52002-05-01 03:24:23 +00002243 SET_EDX(tid, 0);
sewardje663cb92002-04-12 10:26:32 +00002244 }
sewardjf8f819e2002-04-17 23:21:37 +00002245
sewardje663cb92002-04-12 10:26:32 +00002246}
2247
2248
2249static
2250void do_pthread_mutex_unlock ( ThreadId tid,
sewardjd7fd4d22002-04-24 01:57:27 +00002251 void* /* pthread_mutex_t* */ mutexV )
sewardje663cb92002-04-12 10:26:32 +00002252{
sewardj3b5d8862002-04-20 13:53:23 +00002253 Char msg_buf[100];
sewardjd7fd4d22002-04-24 01:57:27 +00002254 pthread_mutex_t* mutex = (pthread_mutex_t*)mutexV;
sewardje663cb92002-04-12 10:26:32 +00002255
sewardj45b4b372002-04-16 22:50:32 +00002256 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj3b5d8862002-04-20 13:53:23 +00002257 VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex );
sewardj8937c812002-04-12 20:12:20 +00002258 print_pthread_event(tid, msg_buf);
2259 }
2260
sewardj604ec3c2002-04-18 22:38:41 +00002261 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002262 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002263 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj604ec3c2002-04-18 22:38:41 +00002264
2265 if (mutex == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002266 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002267 return;
2268 }
2269
2270 /* More paranoia ... */
2271 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002272# ifndef GLIBC_2_1
sewardj604ec3c2002-04-18 22:38:41 +00002273 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002274 case PTHREAD_MUTEX_ADAPTIVE_NP:
2275# endif
sewardja1679dd2002-05-10 22:31:40 +00002276# ifdef GLIBC_2_1
sewardj8e651d72002-05-10 21:00:19 +00002277 case PTHREAD_MUTEX_FAST_NP:
sewardja1679dd2002-05-10 22:31:40 +00002278# endif
sewardj604ec3c2002-04-18 22:38:41 +00002279 case PTHREAD_MUTEX_RECURSIVE_NP:
2280 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj604ec3c2002-04-18 22:38:41 +00002281 if (mutex->__m_count >= 0) break;
2282 /* else fall thru */
2283 default:
sewardjc3bd5f52002-05-01 03:24:23 +00002284 SET_EDX(tid, EINVAL);
sewardj604ec3c2002-04-18 22:38:41 +00002285 return;
2286 }
sewardje663cb92002-04-12 10:26:32 +00002287
2288 /* Barf if we don't currently hold the mutex. */
sewardj604ec3c2002-04-18 22:38:41 +00002289 if (mutex->__m_count == 0 /* nobody holds it */
2290 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardjc3bd5f52002-05-01 03:24:23 +00002291 SET_EDX(tid, EPERM);
sewardje663cb92002-04-12 10:26:32 +00002292 return;
2293 }
2294
sewardjf8f819e2002-04-17 23:21:37 +00002295 /* If it's a multiply-locked recursive mutex, just decrement the
2296 lock count and return. */
sewardj604ec3c2002-04-18 22:38:41 +00002297 if (mutex->__m_count > 1) {
2298 vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP);
2299 mutex->__m_count --;
sewardjc3bd5f52002-05-01 03:24:23 +00002300 SET_EDX(tid, 0); /* success */
sewardjf8f819e2002-04-17 23:21:37 +00002301 return;
2302 }
2303
sewardj604ec3c2002-04-18 22:38:41 +00002304 /* Now we're sure it is locked exactly once, and by the thread who
sewardjf8f819e2002-04-17 23:21:37 +00002305 is now doing an unlock on it. */
sewardj604ec3c2002-04-18 22:38:41 +00002306 vg_assert(mutex->__m_count == 1);
sewardj6072c362002-04-19 14:40:57 +00002307 vg_assert((ThreadId)mutex->__m_owner == tid);
sewardjf8f819e2002-04-17 23:21:37 +00002308
sewardj3b5d8862002-04-20 13:53:23 +00002309 /* Release at max one thread waiting on this mutex. */
2310 release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
sewardje663cb92002-04-12 10:26:32 +00002311
sewardj3b5d8862002-04-20 13:53:23 +00002312 /* Our (tid's) pth_unlock() returns with 0 (success). */
sewardjc3bd5f52002-05-01 03:24:23 +00002313 SET_EDX(tid, 0); /* Success. */
sewardje663cb92002-04-12 10:26:32 +00002314}
2315
2316
sewardj6072c362002-04-19 14:40:57 +00002317/* -----------------------------------------------------------
2318 CONDITION VARIABLES
2319 -------------------------------------------------------- */
sewardje663cb92002-04-12 10:26:32 +00002320
sewardj6072c362002-04-19 14:40:57 +00002321/* The relevant native types are as follows:
2322 (copied from /usr/include/bits/pthreadtypes.h)
sewardj77e466c2002-04-14 02:29:29 +00002323
sewardj6072c362002-04-19 14:40:57 +00002324 -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER
2325 typedef struct
2326 {
2327 struct _pthread_fastlock __c_lock; -- Protect against concurrent access
2328 _pthread_descr __c_waiting; -- Threads waiting on this condition
2329 } pthread_cond_t;
sewardj77e466c2002-04-14 02:29:29 +00002330
sewardj6072c362002-04-19 14:40:57 +00002331 -- Attribute for conditionally variables.
2332 typedef struct
2333 {
2334 int __dummy;
2335 } pthread_condattr_t;
sewardj77e466c2002-04-14 02:29:29 +00002336
sewardj6072c362002-04-19 14:40:57 +00002337 #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0}
sewardj77e466c2002-04-14 02:29:29 +00002338
sewardj3b5d8862002-04-20 13:53:23 +00002339 We don't use any fields of pthread_cond_t for anything at all.
2340 Only the identity of the CVs is important.
sewardj6072c362002-04-19 14:40:57 +00002341
2342 Linux pthreads supports no attributes on condition variables, so we
sewardj3b5d8862002-04-20 13:53:23 +00002343 don't need to think too hard there. */
sewardj6072c362002-04-19 14:40:57 +00002344
sewardj77e466c2002-04-14 02:29:29 +00002345
sewardj5f07b662002-04-23 16:52:51 +00002346static
2347void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid )
2348{
2349 Char msg_buf[100];
2350 pthread_mutex_t* mx;
2351 pthread_cond_t* cv;
2352
sewardjb48e5002002-05-13 00:16:03 +00002353 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002354 && VG_(threads)[tid].status == VgTs_WaitCV
2355 && VG_(threads)[tid].awaken_at != 0xFFFFFFFF);
2356 mx = VG_(threads)[tid].associated_mx;
sewardj5f07b662002-04-23 16:52:51 +00002357 vg_assert(mx != NULL);
sewardj018f7622002-05-15 21:13:39 +00002358 cv = VG_(threads)[tid].associated_cv;
sewardj5f07b662002-04-23 16:52:51 +00002359 vg_assert(cv != NULL);
2360
2361 if (mx->__m_owner == VG_INVALID_THREADID) {
2362 /* Currently unheld; hand it out to thread tid. */
2363 vg_assert(mx->__m_count == 0);
sewardj018f7622002-05-15 21:13:39 +00002364 VG_(threads)[tid].status = VgTs_Runnable;
sewardjc3bd5f52002-05-01 03:24:23 +00002365 SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */
sewardj018f7622002-05-15 21:13:39 +00002366 VG_(threads)[tid].associated_cv = NULL;
2367 VG_(threads)[tid].associated_mx = NULL;
sewardj5f07b662002-04-23 16:52:51 +00002368 mx->__m_owner = (_pthread_descr)tid;
2369 mx->__m_count = 1;
2370
2371 if (VG_(clo_trace_pthread_level) >= 1) {
sewardjc3bd5f52002-05-01 03:24:23 +00002372 VG_(sprintf)(msg_buf,
2373 "pthread_cond_timedwai cv %p: TIMEOUT with mx %p",
2374 cv, mx );
sewardj5f07b662002-04-23 16:52:51 +00002375 print_pthread_event(tid, msg_buf);
2376 }
2377 } else {
2378 /* Currently held. Make thread tid be blocked on it. */
2379 vg_assert(mx->__m_count > 0);
sewardj018f7622002-05-15 21:13:39 +00002380 VG_(threads)[tid].status = VgTs_WaitMX;
sewardjc3bd5f52002-05-01 03:24:23 +00002381 SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */
sewardj018f7622002-05-15 21:13:39 +00002382 VG_(threads)[tid].associated_cv = NULL;
2383 VG_(threads)[tid].associated_mx = mx;
sewardj5f07b662002-04-23 16:52:51 +00002384 if (VG_(clo_trace_pthread_level) >= 1) {
2385 VG_(sprintf)(msg_buf,
2386 "pthread_cond_timedwai cv %p: TIMEOUT -> BLOCK for mx %p",
2387 cv, mx );
2388 print_pthread_event(tid, msg_buf);
2389 }
2390
2391 }
2392}
2393
2394
sewardj3b5d8862002-04-20 13:53:23 +00002395static
2396void release_N_threads_waiting_on_cond ( pthread_cond_t* cond,
2397 Int n_to_release,
2398 Char* caller )
2399{
2400 Int i;
2401 Char msg_buf[100];
2402 pthread_mutex_t* mx;
2403
2404 while (True) {
2405 if (n_to_release == 0)
2406 return;
2407
2408 /* Find a thread waiting on this CV. */
2409 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00002410 if (VG_(threads)[i].status == VgTs_Empty)
sewardj3b5d8862002-04-20 13:53:23 +00002411 continue;
sewardj018f7622002-05-15 21:13:39 +00002412 if (VG_(threads)[i].status == VgTs_WaitCV
2413 && VG_(threads)[i].associated_cv == cond)
sewardj3b5d8862002-04-20 13:53:23 +00002414 break;
2415 }
2416 vg_assert(i <= VG_N_THREADS);
2417
2418 if (i == VG_N_THREADS) {
2419 /* Nobody else is waiting on it. */
2420 return;
2421 }
2422
sewardj018f7622002-05-15 21:13:39 +00002423 mx = VG_(threads)[i].associated_mx;
sewardj3b5d8862002-04-20 13:53:23 +00002424 vg_assert(mx != NULL);
2425
2426 if (mx->__m_owner == VG_INVALID_THREADID) {
2427 /* Currently unheld; hand it out to thread i. */
2428 vg_assert(mx->__m_count == 0);
sewardj018f7622002-05-15 21:13:39 +00002429 VG_(threads)[i].status = VgTs_Runnable;
2430 VG_(threads)[i].associated_cv = NULL;
2431 VG_(threads)[i].associated_mx = NULL;
sewardj3b5d8862002-04-20 13:53:23 +00002432 mx->__m_owner = (_pthread_descr)i;
2433 mx->__m_count = 1;
sewardj5f07b662002-04-23 16:52:51 +00002434 /* .m_edx already holds pth_cond_wait success value (0) */
sewardj3b5d8862002-04-20 13:53:23 +00002435
2436 if (VG_(clo_trace_pthread_level) >= 1) {
2437 VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p",
2438 caller, cond, mx );
2439 print_pthread_event(i, msg_buf);
2440 }
2441
2442 } else {
2443 /* Currently held. Make thread i be blocked on it. */
sewardj5f07b662002-04-23 16:52:51 +00002444 vg_assert(mx->__m_count > 0);
sewardj018f7622002-05-15 21:13:39 +00002445 VG_(threads)[i].status = VgTs_WaitMX;
2446 VG_(threads)[i].associated_cv = NULL;
2447 VG_(threads)[i].associated_mx = mx;
sewardjc3bd5f52002-05-01 03:24:23 +00002448 SET_EDX(i, 0); /* pth_cond_wait success value */
sewardj3b5d8862002-04-20 13:53:23 +00002449
2450 if (VG_(clo_trace_pthread_level) >= 1) {
2451 VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p",
2452 caller, cond, mx );
2453 print_pthread_event(i, msg_buf);
2454 }
2455
2456 }
2457
2458 n_to_release--;
2459 }
2460}
2461
2462
2463static
2464void do_pthread_cond_wait ( ThreadId tid,
2465 pthread_cond_t *cond,
sewardj5f07b662002-04-23 16:52:51 +00002466 pthread_mutex_t *mutex,
2467 UInt ms_end )
sewardj3b5d8862002-04-20 13:53:23 +00002468{
2469 Char msg_buf[100];
2470
sewardj5f07b662002-04-23 16:52:51 +00002471 /* If ms_end == 0xFFFFFFFF, wait forever (no timeout). Otherwise,
2472 ms_end is the ending millisecond. */
2473
sewardj3b5d8862002-04-20 13:53:23 +00002474 /* pre: mutex should be a valid mutex and owned by tid. */
2475 if (VG_(clo_trace_pthread_level) >= 2) {
sewardj5f07b662002-04-23 16:52:51 +00002476 VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p, end %d ...",
2477 cond, mutex, ms_end );
sewardj3b5d8862002-04-20 13:53:23 +00002478 print_pthread_event(tid, msg_buf);
2479 }
2480
2481 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002482 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002483 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj3b5d8862002-04-20 13:53:23 +00002484
2485 if (mutex == NULL || cond == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002486 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002487 return;
2488 }
2489
2490 /* More paranoia ... */
2491 switch (mutex->__m_kind) {
sewardj2a1dcce2002-04-22 12:45:25 +00002492# ifndef GLIBC_2_1
sewardj3b5d8862002-04-20 13:53:23 +00002493 case PTHREAD_MUTEX_TIMED_NP:
sewardj2a1dcce2002-04-22 12:45:25 +00002494 case PTHREAD_MUTEX_ADAPTIVE_NP:
2495# endif
sewardja1679dd2002-05-10 22:31:40 +00002496# ifdef GLIBC_2_1
sewardj8e651d72002-05-10 21:00:19 +00002497 case PTHREAD_MUTEX_FAST_NP:
sewardja1679dd2002-05-10 22:31:40 +00002498# endif
sewardj3b5d8862002-04-20 13:53:23 +00002499 case PTHREAD_MUTEX_RECURSIVE_NP:
2500 case PTHREAD_MUTEX_ERRORCHECK_NP:
sewardj3b5d8862002-04-20 13:53:23 +00002501 if (mutex->__m_count >= 0) break;
2502 /* else fall thru */
2503 default:
sewardjc3bd5f52002-05-01 03:24:23 +00002504 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002505 return;
2506 }
2507
2508 /* Barf if we don't currently hold the mutex. */
2509 if (mutex->__m_count == 0 /* nobody holds it */
2510 || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
sewardjc3bd5f52002-05-01 03:24:23 +00002511 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002512 return;
2513 }
2514
2515 /* Queue ourselves on the condition. */
sewardj018f7622002-05-15 21:13:39 +00002516 VG_(threads)[tid].status = VgTs_WaitCV;
2517 VG_(threads)[tid].associated_cv = cond;
2518 VG_(threads)[tid].associated_mx = mutex;
2519 VG_(threads)[tid].awaken_at = ms_end;
sewardj3b5d8862002-04-20 13:53:23 +00002520
2521 if (VG_(clo_trace_pthread_level) >= 1) {
2522 VG_(sprintf)(msg_buf,
2523 "pthread_cond_wait cv %p, mx %p: BLOCK",
2524 cond, mutex );
2525 print_pthread_event(tid, msg_buf);
2526 }
2527
2528 /* Release the mutex. */
2529 release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " );
2530}
2531
2532
2533static
2534void do_pthread_cond_signal_or_broadcast ( ThreadId tid,
2535 Bool broadcast,
2536 pthread_cond_t *cond )
2537{
2538 Char msg_buf[100];
2539 Char* caller
2540 = broadcast ? "pthread_cond_broadcast"
2541 : "pthread_cond_signal ";
2542
2543 if (VG_(clo_trace_pthread_level) >= 2) {
2544 VG_(sprintf)(msg_buf, "%s cv %p ...",
2545 caller, cond );
2546 print_pthread_event(tid, msg_buf);
2547 }
2548
2549 /* Paranoia ... */
sewardjb48e5002002-05-13 00:16:03 +00002550 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002551 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj3b5d8862002-04-20 13:53:23 +00002552
2553 if (cond == NULL) {
sewardjc3bd5f52002-05-01 03:24:23 +00002554 SET_EDX(tid, EINVAL);
sewardj3b5d8862002-04-20 13:53:23 +00002555 return;
2556 }
2557
2558 release_N_threads_waiting_on_cond (
2559 cond,
2560 broadcast ? VG_N_THREADS : 1,
2561 caller
2562 );
2563
sewardjc3bd5f52002-05-01 03:24:23 +00002564 SET_EDX(tid, 0); /* success */
sewardj3b5d8862002-04-20 13:53:23 +00002565}
2566
sewardj77e466c2002-04-14 02:29:29 +00002567
sewardj5f07b662002-04-23 16:52:51 +00002568/* -----------------------------------------------------------
2569 THREAD SPECIFIC DATA
2570 -------------------------------------------------------- */
2571
2572static __inline__
2573Bool is_valid_key ( ThreadKey k )
2574{
2575 /* k unsigned; hence no < 0 check */
2576 if (k >= VG_N_THREAD_KEYS) return False;
2577 if (!vg_thread_keys[k].inuse) return False;
2578 return True;
2579}
2580
2581static
2582void do_pthread_key_create ( ThreadId tid,
2583 pthread_key_t* key,
2584 void (*destructor)(void*) )
2585{
2586 Int i;
2587 Char msg_buf[100];
2588
2589 if (VG_(clo_trace_pthread_level) >= 1) {
2590 VG_(sprintf)(msg_buf, "pthread_key_create *key %p, destr %p",
2591 key, destructor );
2592 print_pthread_event(tid, msg_buf);
2593 }
2594
2595 vg_assert(sizeof(pthread_key_t) == sizeof(ThreadKey));
sewardjb48e5002002-05-13 00:16:03 +00002596 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002597 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002598
2599 for (i = 0; i < VG_N_THREAD_KEYS; i++)
2600 if (!vg_thread_keys[i].inuse)
2601 break;
2602
2603 if (i == VG_N_THREAD_KEYS) {
sewardjc3bd5f52002-05-01 03:24:23 +00002604 /* SET_EDX(tid, EAGAIN);
sewardj5f07b662002-04-23 16:52:51 +00002605 return;
2606 */
2607 VG_(panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;"
2608 " increase and recompile");
2609 }
2610
2611 vg_thread_keys[i].inuse = True;
sewardjc3bd5f52002-05-01 03:24:23 +00002612
sewardj5f07b662002-04-23 16:52:51 +00002613 /* TODO: check key for addressibility */
2614 *key = i;
sewardjc3bd5f52002-05-01 03:24:23 +00002615 if (VG_(clo_instrument))
2616 VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) );
2617
2618 SET_EDX(tid, 0);
sewardj5f07b662002-04-23 16:52:51 +00002619}
2620
2621
2622static
2623void do_pthread_key_delete ( ThreadId tid, pthread_key_t key )
2624{
2625 Char msg_buf[100];
2626 if (VG_(clo_trace_pthread_level) >= 1) {
2627 VG_(sprintf)(msg_buf, "pthread_key_delete key %d",
2628 key );
2629 print_pthread_event(tid, msg_buf);
2630 }
2631
sewardjb48e5002002-05-13 00:16:03 +00002632 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002633 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002634
2635 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002636 SET_EDX(tid, EINVAL);
sewardj5f07b662002-04-23 16:52:51 +00002637 return;
2638 }
2639
2640 vg_thread_keys[key].inuse = False;
2641
2642 /* Optional. We're not required to do this, although it shouldn't
2643 make any difference to programs which use the key/specifics
2644 functions correctly. */
sewardj3b13f0e2002-04-25 20:17:29 +00002645# if 1
sewardj5f07b662002-04-23 16:52:51 +00002646 for (tid = 1; tid < VG_N_THREADS; tid++) {
sewardj018f7622002-05-15 21:13:39 +00002647 if (VG_(threads)[tid].status != VgTs_Empty)
2648 VG_(threads)[tid].specifics[key] = NULL;
sewardj5f07b662002-04-23 16:52:51 +00002649 }
sewardj3b13f0e2002-04-25 20:17:29 +00002650# endif
sewardj5f07b662002-04-23 16:52:51 +00002651}
2652
2653
2654static
2655void do_pthread_getspecific ( ThreadId tid, pthread_key_t key )
2656{
2657 Char msg_buf[100];
2658 if (VG_(clo_trace_pthread_level) >= 1) {
2659 VG_(sprintf)(msg_buf, "pthread_getspecific key %d",
2660 key );
2661 print_pthread_event(tid, msg_buf);
2662 }
2663
sewardjb48e5002002-05-13 00:16:03 +00002664 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002665 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002666
2667 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002668 SET_EDX(tid, (UInt)NULL);
sewardj5f07b662002-04-23 16:52:51 +00002669 return;
2670 }
2671
sewardj018f7622002-05-15 21:13:39 +00002672 SET_EDX(tid, (UInt)VG_(threads)[tid].specifics[key]);
sewardj5f07b662002-04-23 16:52:51 +00002673}
2674
2675
2676static
2677void do_pthread_setspecific ( ThreadId tid,
2678 pthread_key_t key,
2679 void *pointer )
2680{
2681 Char msg_buf[100];
2682 if (VG_(clo_trace_pthread_level) >= 1) {
2683 VG_(sprintf)(msg_buf, "pthread_setspecific key %d, ptr %p",
2684 key, pointer );
2685 print_pthread_event(tid, msg_buf);
2686 }
2687
sewardjb48e5002002-05-13 00:16:03 +00002688 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002689 && VG_(threads)[tid].status == VgTs_Runnable);
sewardj5f07b662002-04-23 16:52:51 +00002690
2691 if (!is_valid_key(key)) {
sewardjc3bd5f52002-05-01 03:24:23 +00002692 SET_EDX(tid, EINVAL);
sewardj5f07b662002-04-23 16:52:51 +00002693 return;
2694 }
2695
sewardj018f7622002-05-15 21:13:39 +00002696 VG_(threads)[tid].specifics[key] = pointer;
sewardjc3bd5f52002-05-01 03:24:23 +00002697 SET_EDX(tid, 0);
sewardj5f07b662002-04-23 16:52:51 +00002698}
2699
2700
sewardjb48e5002002-05-13 00:16:03 +00002701/* ---------------------------------------------------
2702 SIGNALS
2703 ------------------------------------------------ */
2704
2705/* See comment in vg_libthread.c:pthread_sigmask() regarding
sewardj018f7622002-05-15 21:13:39 +00002706 deliberate confusion of types sigset_t and vki_sigset_t. Return 0
2707 for OK and 1 for some kind of addressing error, which the
2708 vg_libpthread.c routine turns into return values 0 and EFAULT
2709 respectively. */
sewardjb48e5002002-05-13 00:16:03 +00002710static
2711void do_pthread_sigmask ( ThreadId tid,
sewardj018f7622002-05-15 21:13:39 +00002712 Int vki_how,
sewardjb48e5002002-05-13 00:16:03 +00002713 vki_ksigset_t* newmask,
2714 vki_ksigset_t* oldmask )
2715{
2716 Char msg_buf[100];
2717 if (VG_(clo_trace_pthread_level) >= 1) {
2718 VG_(sprintf)(msg_buf,
sewardj018f7622002-05-15 21:13:39 +00002719 "pthread_sigmask vki_how %d, newmask %p, oldmask %p",
2720 vki_how, newmask, oldmask );
sewardjb48e5002002-05-13 00:16:03 +00002721 print_pthread_event(tid, msg_buf);
2722 }
2723
2724 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002725 && VG_(threads)[tid].status == VgTs_Runnable);
sewardjb48e5002002-05-13 00:16:03 +00002726
2727 if (VG_(clo_instrument)) {
2728 /* TODO check newmask/oldmask are addressible/defined */
2729 }
2730
sewardj018f7622002-05-15 21:13:39 +00002731 VG_(do_pthread_sigmask_SCSS_upd) ( tid, vki_how, newmask, oldmask );
sewardjb48e5002002-05-13 00:16:03 +00002732
sewardj3a951cf2002-05-15 22:25:47 +00002733 if (newmask && VG_(clo_instrument)) {
2734 VGM_(make_readable)( (Addr)newmask, sizeof(vki_ksigset_t) );
2735 }
2736
sewardj018f7622002-05-15 21:13:39 +00002737 /* Success. */
sewardjb48e5002002-05-13 00:16:03 +00002738 SET_EDX(tid, 0);
2739}
2740
2741
2742static
2743void do_sigwait ( ThreadId tid,
2744 vki_ksigset_t* set,
2745 Int* sig )
2746{
sewardj018f7622002-05-15 21:13:39 +00002747 vki_ksigset_t irrelevant_sigmask;
2748 Char msg_buf[100];
2749
sewardjb48e5002002-05-13 00:16:03 +00002750 if (VG_(clo_trace_signals) || VG_(clo_trace_sched)) {
2751 VG_(sprintf)(msg_buf,
2752 "suspend due to sigwait(): set %p, sig %p",
2753 set, sig );
2754 print_pthread_event(tid, msg_buf);
2755 }
2756
2757 vg_assert(VG_(is_valid_tid)(tid)
sewardj018f7622002-05-15 21:13:39 +00002758 && VG_(threads)[tid].status == VgTs_Runnable);
sewardjb48e5002002-05-13 00:16:03 +00002759
sewardj018f7622002-05-15 21:13:39 +00002760 /* Change SCSS */
2761 VG_(threads)[tid].sigs_waited_for = *set;
2762 VG_(threads)[tid].status = VgTs_WaitSIG;
2763
2764 VG_(block_all_host_signals)( &irrelevant_sigmask );
2765 VG_(handle_SCSS_change)( False /* lazy update */ );
2766}
2767
2768
2769static
2770void do_pthread_kill ( ThreadId tid, /* me */
2771 ThreadId thread, /* thread to signal */
2772 Int sig )
2773{
2774 Char msg_buf[100];
2775
2776 if (VG_(clo_trace_signals) || VG_(clo_trace_pthread_level) >= 1) {
2777 VG_(sprintf)(msg_buf,
2778 "pthread_kill thread %d, signo %d",
2779 thread, sig );
2780 print_pthread_event(tid, msg_buf);
2781 }
2782
2783 vg_assert(VG_(is_valid_tid)(tid)
2784 && VG_(threads)[tid].status == VgTs_Runnable);
2785
2786 if (!VG_(is_valid_tid)(tid)) {
2787 SET_EDX(tid, -VKI_ESRCH);
2788 return;
2789 }
2790
2791 if (sig < 1 || sig > VKI_KNSIG) {
2792 SET_EDX(tid, -VKI_EINVAL);
2793 return;
2794 }
2795
2796 VG_(send_signal_to_thread)( thread, sig );
2797 SET_EDX(tid, 0);
sewardjb48e5002002-05-13 00:16:03 +00002798}
2799
2800
sewardje663cb92002-04-12 10:26:32 +00002801/* ---------------------------------------------------------------------
2802 Handle non-trivial client requests.
2803 ------------------------------------------------------------------ */
2804
2805static
2806void do_nontrivial_clientreq ( ThreadId tid )
2807{
sewardj018f7622002-05-15 21:13:39 +00002808 UInt* arg = (UInt*)(VG_(threads)[tid].m_eax);
sewardje663cb92002-04-12 10:26:32 +00002809 UInt req_no = arg[0];
2810 switch (req_no) {
2811
sewardje663cb92002-04-12 10:26:32 +00002812 case VG_USERREQ__PTHREAD_JOIN:
2813 do_pthread_join( tid, arg[1], (void**)(arg[2]) );
2814 break;
2815
sewardj3b5d8862002-04-20 13:53:23 +00002816 case VG_USERREQ__PTHREAD_COND_WAIT:
2817 do_pthread_cond_wait( tid,
2818 (pthread_cond_t *)(arg[1]),
sewardj5f07b662002-04-23 16:52:51 +00002819 (pthread_mutex_t *)(arg[2]),
2820 0xFFFFFFFF /* no timeout */ );
2821 break;
2822
2823 case VG_USERREQ__PTHREAD_COND_TIMEDWAIT:
2824 do_pthread_cond_wait( tid,
2825 (pthread_cond_t *)(arg[1]),
2826 (pthread_mutex_t *)(arg[2]),
2827 arg[3] /* timeout millisecond point */ );
sewardj3b5d8862002-04-20 13:53:23 +00002828 break;
2829
2830 case VG_USERREQ__PTHREAD_COND_SIGNAL:
2831 do_pthread_cond_signal_or_broadcast(
2832 tid,
2833 False, /* signal, not broadcast */
2834 (pthread_cond_t *)(arg[1]) );
2835 break;
2836
2837 case VG_USERREQ__PTHREAD_COND_BROADCAST:
2838 do_pthread_cond_signal_or_broadcast(
2839 tid,
2840 True, /* broadcast, not signal */
2841 (pthread_cond_t *)(arg[1]) );
2842 break;
2843
sewardj5f07b662002-04-23 16:52:51 +00002844 case VG_USERREQ__PTHREAD_KEY_CREATE:
2845 do_pthread_key_create ( tid,
2846 (pthread_key_t*)(arg[1]),
2847 (void(*)(void*))(arg[2]) );
2848 break;
2849
2850 case VG_USERREQ__PTHREAD_KEY_DELETE:
2851 do_pthread_key_delete ( tid,
2852 (pthread_key_t)(arg[1]) );
2853 break;
2854
sewardj5f07b662002-04-23 16:52:51 +00002855 case VG_USERREQ__PTHREAD_SETSPECIFIC:
2856 do_pthread_setspecific ( tid,
2857 (pthread_key_t)(arg[1]),
2858 (void*)(arg[2]) );
2859 break;
2860
sewardjb48e5002002-05-13 00:16:03 +00002861 case VG_USERREQ__PTHREAD_SIGMASK:
2862 do_pthread_sigmask ( tid,
2863 arg[1],
2864 (vki_ksigset_t*)(arg[2]),
2865 (vki_ksigset_t*)(arg[3]) );
2866 break;
2867
2868 case VG_USERREQ__SIGWAIT:
2869 do_sigwait ( tid,
2870 (vki_ksigset_t*)(arg[1]),
2871 (Int*)(arg[2]) );
2872 break;
2873
sewardj018f7622002-05-15 21:13:39 +00002874 case VG_USERREQ__PTHREAD_KILL:
2875 do_pthread_kill ( tid, arg[1], arg[2] );
2876 break;
2877
sewardjff42d1d2002-05-22 13:17:31 +00002878 case VG_USERREQ__PTHREAD_YIELD:
2879 do_pthread_yield ( tid );
2880 /* because this is classified as a non-trivial client
2881 request, the scheduler should now select a new thread to
2882 run. */
2883 break;
sewardj018f7622002-05-15 21:13:39 +00002884
sewardj7989d0c2002-05-28 11:00:01 +00002885 case VG_USERREQ__SET_CANCELSTATE:
2886 do__set_cancelstate ( tid, arg[1] );
2887 break;
2888
2889 case VG_USERREQ__SET_CANCELTYPE:
2890 do__set_canceltype ( tid, arg[1] );
2891 break;
2892
2893 case VG_USERREQ__SET_OR_GET_DETACH:
2894 do__set_or_get_detach ( tid, arg[1], arg[2] );
2895 break;
2896
2897 case VG_USERREQ__SET_CANCELPEND:
2898 do__set_cancelpend ( tid, arg[1], (void(*)(void*))arg[2] );
2899 break;
2900
2901 case VG_USERREQ__WAIT_JOINER:
2902 do__wait_joiner ( tid, (void*)arg[1] );
2903 break;
2904
2905 case VG_USERREQ__QUIT:
2906 do__quit ( tid );
2907 break;
2908
2909 case VG_USERREQ__APPLY_IN_NEW_THREAD:
2910 do__apply_in_new_thread ( tid, (void*(*)(void*))arg[1],
2911 (void*)arg[2] );
2912 break;
2913
sewardje663cb92002-04-12 10:26:32 +00002914 case VG_USERREQ__MAKE_NOACCESS:
2915 case VG_USERREQ__MAKE_WRITABLE:
2916 case VG_USERREQ__MAKE_READABLE:
2917 case VG_USERREQ__DISCARD:
2918 case VG_USERREQ__CHECK_WRITABLE:
2919 case VG_USERREQ__CHECK_READABLE:
2920 case VG_USERREQ__MAKE_NOACCESS_STACK:
2921 case VG_USERREQ__RUNNING_ON_VALGRIND:
2922 case VG_USERREQ__DO_LEAK_CHECK:
sewardj18d75132002-05-16 11:06:21 +00002923 case VG_USERREQ__DISCARD_TRANSLATIONS:
sewardjc3bd5f52002-05-01 03:24:23 +00002924 SET_EDX(
2925 tid,
sewardj018f7622002-05-15 21:13:39 +00002926 VG_(handle_client_request) ( &VG_(threads)[tid], arg )
sewardjc3bd5f52002-05-01 03:24:23 +00002927 );
sewardje663cb92002-04-12 10:26:32 +00002928 break;
2929
sewardj77e466c2002-04-14 02:29:29 +00002930 case VG_USERREQ__SIGNAL_RETURNS:
2931 handle_signal_return(tid);
2932 break;
sewardj54cacf02002-04-12 23:24:59 +00002933
sewardje663cb92002-04-12 10:26:32 +00002934 default:
2935 VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
2936 VG_(panic)("handle_private_client_pthread_request: "
2937 "unknown request");
2938 /*NOTREACHED*/
2939 break;
2940 }
2941}
2942
2943
sewardj6072c362002-04-19 14:40:57 +00002944/* ---------------------------------------------------------------------
2945 Sanity checking.
2946 ------------------------------------------------------------------ */
2947
2948/* Internal consistency checks on the sched/pthread structures. */
2949static
2950void scheduler_sanity ( void )
2951{
sewardj3b5d8862002-04-20 13:53:23 +00002952 pthread_mutex_t* mx;
2953 pthread_cond_t* cv;
sewardj6072c362002-04-19 14:40:57 +00002954 Int i;
sewardj5f07b662002-04-23 16:52:51 +00002955
sewardj6072c362002-04-19 14:40:57 +00002956 /* VG_(printf)("scheduler_sanity\n"); */
2957 for (i = 1; i < VG_N_THREADS; i++) {
sewardj018f7622002-05-15 21:13:39 +00002958 mx = VG_(threads)[i].associated_mx;
2959 cv = VG_(threads)[i].associated_cv;
2960 if (VG_(threads)[i].status == VgTs_WaitMX) {
sewardjbf290b92002-05-01 02:28:01 +00002961 /* If we're waiting on a MX: (1) the mx is not null, (2, 3)
2962 it's actually held by someone, since otherwise this thread
2963 is deadlocked, (4) the mutex's owner is not us, since
2964 otherwise this thread is also deadlocked. The logic in
2965 do_pthread_mutex_lock rejects attempts by a thread to lock
2966 a (non-recursive) mutex which it already owns.
sewardj05553872002-04-20 20:53:17 +00002967
sewardjbf290b92002-05-01 02:28:01 +00002968 (2) has been seen to fail sometimes. I don't know why.
2969 Possibly to do with signals. */
sewardj3b5d8862002-04-20 13:53:23 +00002970 vg_assert(cv == NULL);
sewardj05553872002-04-20 20:53:17 +00002971 /* 1 */ vg_assert(mx != NULL);
2972 /* 2 */ vg_assert(mx->__m_count > 0);
sewardjb48e5002002-05-13 00:16:03 +00002973 /* 3 */ vg_assert(VG_(is_valid_tid)((ThreadId)mx->__m_owner));
sewardj05553872002-04-20 20:53:17 +00002974 /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner);
sewardj3b5d8862002-04-20 13:53:23 +00002975 } else
sewardj018f7622002-05-15 21:13:39 +00002976 if (VG_(threads)[i].status == VgTs_WaitCV) {
sewardj3b5d8862002-04-20 13:53:23 +00002977 vg_assert(cv != NULL);
2978 vg_assert(mx != NULL);
sewardj6072c362002-04-19 14:40:57 +00002979 } else {
sewardj05553872002-04-20 20:53:17 +00002980 /* Unfortunately these don't hold true when a sighandler is
2981 running. To be fixed. */
2982 /* vg_assert(cv == NULL); */
2983 /* vg_assert(mx == NULL); */
sewardj6072c362002-04-19 14:40:57 +00002984 }
sewardjbf290b92002-05-01 02:28:01 +00002985
sewardj018f7622002-05-15 21:13:39 +00002986 if (VG_(threads)[i].status != VgTs_Empty) {
sewardjbf290b92002-05-01 02:28:01 +00002987 Int
sewardj018f7622002-05-15 21:13:39 +00002988 stack_used = (Addr)VG_(threads)[i].stack_highest_word
2989 - (Addr)VG_(threads)[i].m_esp;
sewardjbf290b92002-05-01 02:28:01 +00002990 if (i > 1 /* not the root thread */
2991 && stack_used
2992 >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) {
2993 VG_(message)(Vg_UserMsg,
2994 "Warning: STACK OVERFLOW: "
2995 "thread %d: stack used %d, available %d",
2996 i, stack_used, VG_PTHREAD_STACK_MIN );
2997 VG_(message)(Vg_UserMsg,
2998 "Terminating Valgrind. If thread(s) "
2999 "really need more stack, increase");
3000 VG_(message)(Vg_UserMsg,
3001 "VG_PTHREAD_STACK_SIZE in vg_include.h and recompile.");
3002 VG_(exit)(1);
3003 }
sewardjb48e5002002-05-13 00:16:03 +00003004
sewardj018f7622002-05-15 21:13:39 +00003005 if (VG_(threads)[i].status == VgTs_WaitSIG) {
sewardjb48e5002002-05-13 00:16:03 +00003006 vg_assert( ! VG_(kisemptysigset)(
sewardj018f7622002-05-15 21:13:39 +00003007 & VG_(threads)[i].sigs_waited_for) );
sewardjb48e5002002-05-13 00:16:03 +00003008 } else {
3009 vg_assert( VG_(kisemptysigset)(
sewardj018f7622002-05-15 21:13:39 +00003010 & VG_(threads)[i].sigs_waited_for) );
sewardjb48e5002002-05-13 00:16:03 +00003011 }
3012
sewardjbf290b92002-05-01 02:28:01 +00003013 }
sewardj6072c362002-04-19 14:40:57 +00003014 }
sewardj5f07b662002-04-23 16:52:51 +00003015
3016 for (i = 0; i < VG_N_THREAD_KEYS; i++) {
3017 if (!vg_thread_keys[i].inuse)
3018 vg_assert(vg_thread_keys[i].destructor == NULL);
3019 }
sewardj6072c362002-04-19 14:40:57 +00003020}
3021
3022
sewardje663cb92002-04-12 10:26:32 +00003023/*--------------------------------------------------------------------*/
3024/*--- end vg_scheduler.c ---*/
3025/*--------------------------------------------------------------------*/