sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- A user-space pthreads implementation. vg_scheduler.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Valgrind, an x86 protected-mode emulator |
| 8 | designed for debugging and profiling binaries on x86-Unixes. |
| 9 | |
| 10 | Copyright (C) 2000-2002 Julian Seward |
| 11 | jseward@acm.org |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file LICENSE. |
| 29 | */ |
| 30 | |
| 31 | #include "vg_include.h" |
| 32 | #include "vg_constants.h" |
| 33 | |
| 34 | #include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and |
| 35 | VG_USERREQ__DO_LEAK_CHECK */ |
| 36 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 37 | /* BORKAGE/ISSUES as of 14 Apr 02 |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 38 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 39 | Note! This pthreads implementation is so poor as to not be |
| 40 | suitable for use by anyone at all! |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 41 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 42 | - Currently, when a signal is run, just the ThreadStatus.status fields |
| 43 | are saved in the signal frame, along with the CPU state. Question: |
| 44 | should I also save and restore: |
| 45 | ThreadStatus.joiner |
| 46 | ThreadStatus.waited_on_mid |
| 47 | ThreadStatus.awaken_at |
| 48 | ThreadStatus.retval |
| 49 | Currently unsure, and so am not doing so. |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 50 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 51 | - Signals interrupting read/write and nanosleep: SA_RESTART settings. |
| 52 | Read/write correctly return with EINTR when SA_RESTART isn't |
| 53 | specified and they are interrupted by a signal. nanosleep just |
| 54 | pretends signals don't exist -- should be fixed. |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 55 | |
sewardj | 75fe189 | 2002-04-14 02:46:33 +0000 | [diff] [blame] | 56 | - Read/write syscall starts: don't crap out when the initial |
| 57 | nonblocking read/write returns an error. |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 58 | |
sewardj | 9a199dc | 2002-04-14 13:01:38 +0000 | [diff] [blame] | 59 | - Get rid of restrictions re use of sigaltstack; they are no longer |
| 60 | needed. |
| 61 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 62 | - Fix signals properly, so that each thread has its own blocking mask. |
| 63 | Currently this isn't done, and (worse?) signals are delivered to |
| 64 | Thread 1 (the root thread) regardless. |
| 65 | |
| 66 | So, what's the deal with signals and mutexes? If a thread is |
| 67 | blocked on a mutex, or for a condition variable for that matter, can |
| 68 | signals still be delivered to it? This has serious consequences -- |
| 69 | deadlocks, etc. |
| 70 | |
sewardj | e462e20 | 2002-04-13 04:09:07 +0000 | [diff] [blame] | 71 | */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 72 | |
| 73 | |
| 74 | /* --------------------------------------------------------------------- |
| 75 | Types and globals for the scheduler. |
| 76 | ------------------------------------------------------------------ */ |
| 77 | |
| 78 | /* type ThreadId is defined in vg_include.h. */ |
| 79 | |
| 80 | /* struct ThreadState is defined in vg_include.h. */ |
| 81 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 82 | /* Private globals. A statically allocated array of threads. NOTE: |
| 83 | [0] is never used, to simplify the simulation of initialisers for |
| 84 | LinuxThreads. */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 85 | static ThreadState vg_threads[VG_N_THREADS]; |
| 86 | |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 87 | /* The tid of the thread currently in VG_(baseBlock). */ |
| 88 | static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; |
| 89 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 90 | |
| 91 | /* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */ |
| 92 | jmp_buf VG_(scheduler_jmpbuf); |
| 93 | /* ... and if so, here's the signal which caused it to do so. */ |
| 94 | Int VG_(longjmpd_on_signal); |
| 95 | |
| 96 | |
| 97 | /* Machinery to keep track of which threads are waiting on which |
| 98 | fds. */ |
| 99 | typedef |
| 100 | struct { |
| 101 | /* The thread which made the request. */ |
| 102 | ThreadId tid; |
| 103 | |
| 104 | /* The next two fields describe the request. */ |
| 105 | /* File descriptor waited for. -1 means this slot is not in use */ |
| 106 | Int fd; |
| 107 | /* The syscall number the fd is used in. */ |
| 108 | Int syscall_no; |
| 109 | |
| 110 | /* False => still waiting for select to tell us the fd is ready |
| 111 | to go. True => the fd is ready, but the results have not yet |
| 112 | been delivered back to the calling thread. Once the latter |
| 113 | happens, this entire record is marked as no longer in use, by |
| 114 | making the fd field be -1. */ |
| 115 | Bool ready; |
| 116 | } |
| 117 | VgWaitedOnFd; |
| 118 | |
| 119 | static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS]; |
| 120 | |
| 121 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 122 | /* Forwards */ |
| 123 | static void do_nontrivial_clientreq ( ThreadId tid ); |
| 124 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 125 | static void scheduler_sanity ( void ); |
| 126 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 127 | |
| 128 | /* --------------------------------------------------------------------- |
| 129 | Helper functions for the scheduler. |
| 130 | ------------------------------------------------------------------ */ |
| 131 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 132 | static __inline__ |
| 133 | Bool is_valid_tid ( ThreadId tid ) |
| 134 | { |
| 135 | /* tid is unsigned, hence no < 0 test. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 136 | if (tid == 0) return False; |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 137 | if (tid >= VG_N_THREADS) return False; |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 138 | return True; |
| 139 | } |
| 140 | |
| 141 | |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 142 | /* For constructing error messages only: try and identify a thread |
| 143 | whose stack this address currently falls within, or return |
| 144 | VG_INVALID_THREADID if it doesn't. A small complication is dealing |
| 145 | with any currently VG_(baseBlock)-resident thread. |
| 146 | */ |
| 147 | ThreadId VG_(identify_stack_addr)( Addr a ) |
| 148 | { |
| 149 | ThreadId tid, tid_to_skip; |
| 150 | |
| 151 | tid_to_skip = VG_INVALID_THREADID; |
| 152 | |
| 153 | /* First check to see if there's a currently-loaded thread in |
| 154 | VG_(baseBlock). */ |
| 155 | if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) { |
| 156 | tid = vg_tid_currently_in_baseBlock; |
| 157 | if (VG_(baseBlock)[VGOFF_(m_esp)] <= a |
| 158 | && a <= vg_threads[tid].stack_highest_word) |
| 159 | return tid; |
| 160 | else |
| 161 | tid_to_skip = tid; |
| 162 | } |
| 163 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 164 | for (tid = 1; tid < VG_N_THREADS; tid++) { |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 165 | if (vg_threads[tid].status == VgTs_Empty) continue; |
| 166 | if (tid == tid_to_skip) continue; |
| 167 | if (vg_threads[tid].m_esp <= a |
| 168 | && a <= vg_threads[tid].stack_highest_word) |
| 169 | return tid; |
| 170 | } |
| 171 | return VG_INVALID_THREADID; |
| 172 | } |
| 173 | |
| 174 | |
sewardj | 15a43e1 | 2002-04-17 19:35:12 +0000 | [diff] [blame] | 175 | /* Print the scheduler status. */ |
| 176 | void VG_(pp_sched_status) ( void ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 177 | { |
| 178 | Int i; |
| 179 | VG_(printf)("\nsched status:\n"); |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 180 | for (i = 1; i < VG_N_THREADS; i++) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 181 | if (vg_threads[i].status == VgTs_Empty) continue; |
sewardj | 15a43e1 | 2002-04-17 19:35:12 +0000 | [diff] [blame] | 182 | VG_(printf)("\nThread %d: status = ", i); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 183 | switch (vg_threads[i].status) { |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 184 | case VgTs_Runnable: VG_(printf)("Runnable"); break; |
| 185 | case VgTs_WaitFD: VG_(printf)("WaitFD"); break; |
| 186 | case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)", |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 187 | vg_threads[i].joiner); break; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 188 | case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break; |
| 189 | case VgTs_Sleeping: VG_(printf)("Sleeping"); break; |
| 190 | case VgTs_WaitMX: VG_(printf)("WaitMX"); break; |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 191 | case VgTs_WaitCV: VG_(printf)("WaitCV"); break; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 192 | default: VG_(printf)("???"); break; |
| 193 | } |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 194 | VG_(printf)(", associated_mx = %p, associated_cv = %p\n", |
| 195 | vg_threads[i].associated_mx, |
| 196 | vg_threads[i].associated_cv ); |
sewardj | 15a43e1 | 2002-04-17 19:35:12 +0000 | [diff] [blame] | 197 | VG_(pp_ExeContext)( |
| 198 | VG_(get_ExeContext)( False, vg_threads[i].m_eip, |
| 199 | vg_threads[i].m_ebp )); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 200 | } |
| 201 | VG_(printf)("\n"); |
| 202 | } |
| 203 | |
| 204 | static |
| 205 | void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no ) |
| 206 | { |
| 207 | Int i; |
| 208 | |
| 209 | vg_assert(fd != -1); /* avoid total chaos */ |
| 210 | |
| 211 | for (i = 0; i < VG_N_WAITING_FDS; i++) |
| 212 | if (vg_waiting_fds[i].fd == -1) |
| 213 | break; |
| 214 | |
| 215 | if (i == VG_N_WAITING_FDS) |
| 216 | VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low"); |
| 217 | /* |
| 218 | VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n", |
| 219 | tid, fd, i); |
| 220 | */ |
| 221 | vg_waiting_fds[i].fd = fd; |
| 222 | vg_waiting_fds[i].tid = tid; |
| 223 | vg_waiting_fds[i].ready = False; |
| 224 | vg_waiting_fds[i].syscall_no = syscall_no; |
| 225 | } |
| 226 | |
| 227 | |
| 228 | |
| 229 | static |
| 230 | void print_sched_event ( ThreadId tid, Char* what ) |
| 231 | { |
sewardj | 45b4b37 | 2002-04-16 22:50:32 +0000 | [diff] [blame] | 232 | VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what ); |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 233 | } |
| 234 | |
| 235 | |
| 236 | static |
| 237 | void print_pthread_event ( ThreadId tid, Char* what ) |
| 238 | { |
| 239 | VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 240 | } |
| 241 | |
| 242 | |
| 243 | static |
| 244 | Char* name_of_sched_event ( UInt event ) |
| 245 | { |
| 246 | switch (event) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 247 | case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL"; |
| 248 | case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ"; |
| 249 | case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO"; |
| 250 | case VG_TRC_INNER_FASTMISS: return "FASTMISS"; |
| 251 | case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL"; |
| 252 | default: return "??UNKNOWN??"; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | |
| 257 | /* Create a translation of the client basic block beginning at |
| 258 | orig_addr, and add it to the translation cache & translation table. |
| 259 | This probably doesn't really belong here, but, hey ... |
| 260 | */ |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 261 | static |
| 262 | void create_translation_for ( ThreadId tid, Addr orig_addr ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 263 | { |
| 264 | Addr trans_addr; |
| 265 | TTEntry tte; |
| 266 | Int orig_size, trans_size; |
| 267 | /* Ensure there is space to hold a translation. */ |
| 268 | VG_(maybe_do_lru_pass)(); |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 269 | VG_(translate)( &vg_threads[tid], |
| 270 | orig_addr, &orig_size, &trans_addr, &trans_size ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 271 | /* Copy data at trans_addr into the translation cache. |
| 272 | Returned pointer is to the code, not to the 4-byte |
| 273 | header. */ |
| 274 | /* Since the .orig_size and .trans_size fields are |
| 275 | UShort, be paranoid. */ |
| 276 | vg_assert(orig_size > 0 && orig_size < 65536); |
| 277 | vg_assert(trans_size > 0 && trans_size < 65536); |
| 278 | tte.orig_size = orig_size; |
| 279 | tte.orig_addr = orig_addr; |
| 280 | tte.trans_size = trans_size; |
| 281 | tte.trans_addr = VG_(copy_to_transcache) |
| 282 | ( trans_addr, trans_size ); |
| 283 | tte.mru_epoch = VG_(current_epoch); |
| 284 | /* Free the intermediary -- was allocated by VG_(emit_code). */ |
| 285 | VG_(jitfree)( (void*)trans_addr ); |
| 286 | /* Add to trans tab and set back pointer. */ |
| 287 | VG_(add_to_trans_tab) ( &tte ); |
| 288 | /* Update stats. */ |
| 289 | VG_(this_epoch_in_count) ++; |
| 290 | VG_(this_epoch_in_osize) += orig_size; |
| 291 | VG_(this_epoch_in_tsize) += trans_size; |
| 292 | VG_(overall_in_count) ++; |
| 293 | VG_(overall_in_osize) += orig_size; |
| 294 | VG_(overall_in_tsize) += trans_size; |
| 295 | /* Record translated area for SMC detection. */ |
| 296 | VG_(smc_mark_original) ( orig_addr, orig_size ); |
| 297 | } |
| 298 | |
| 299 | |
| 300 | /* Allocate a completely empty ThreadState record. */ |
| 301 | static |
| 302 | ThreadId vg_alloc_ThreadState ( void ) |
| 303 | { |
| 304 | Int i; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 305 | for (i = 1; i < VG_N_THREADS; i++) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 306 | if (vg_threads[i].status == VgTs_Empty) |
| 307 | return i; |
| 308 | } |
| 309 | VG_(printf)("vg_alloc_ThreadState: no free slots available\n"); |
| 310 | VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n"); |
| 311 | VG_(panic)("VG_N_THREADS is too low"); |
| 312 | /*NOTREACHED*/ |
| 313 | } |
| 314 | |
| 315 | |
| 316 | ThreadState* VG_(get_thread_state) ( ThreadId tid ) |
| 317 | { |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 318 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 319 | vg_assert(vg_threads[tid].status != VgTs_Empty); |
| 320 | return & vg_threads[tid]; |
| 321 | } |
| 322 | |
| 323 | |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 324 | ThreadState* VG_(get_current_thread_state) ( void ) |
| 325 | { |
| 326 | vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 327 | return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock ); |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 328 | } |
| 329 | |
| 330 | |
| 331 | ThreadId VG_(get_current_tid) ( void ) |
| 332 | { |
| 333 | vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); |
| 334 | return vg_tid_currently_in_baseBlock; |
| 335 | } |
| 336 | |
| 337 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 338 | /* Copy the saved state of a thread into VG_(baseBlock), ready for it |
| 339 | to be run. */ |
| 340 | __inline__ |
| 341 | void VG_(load_thread_state) ( ThreadId tid ) |
| 342 | { |
| 343 | Int i; |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 344 | vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); |
| 345 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 346 | VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax; |
| 347 | VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx; |
| 348 | VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx; |
| 349 | VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx; |
| 350 | VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi; |
| 351 | VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi; |
| 352 | VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp; |
| 353 | VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp; |
| 354 | VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags; |
| 355 | VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip; |
| 356 | |
| 357 | for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) |
| 358 | VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i]; |
| 359 | |
| 360 | VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax; |
| 361 | VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx; |
| 362 | VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx; |
| 363 | VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx; |
| 364 | VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi; |
| 365 | VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi; |
| 366 | VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp; |
| 367 | VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp; |
| 368 | VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags; |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 369 | |
| 370 | vg_tid_currently_in_baseBlock = tid; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 371 | } |
| 372 | |
| 373 | |
| 374 | /* Copy the state of a thread from VG_(baseBlock), presumably after it |
| 375 | has been descheduled. For sanity-check purposes, fill the vacated |
| 376 | VG_(baseBlock) with garbage so as to make the system more likely to |
| 377 | fail quickly if we erroneously continue to poke around inside |
| 378 | VG_(baseBlock) without first doing a load_thread_state(). |
| 379 | */ |
| 380 | __inline__ |
| 381 | void VG_(save_thread_state) ( ThreadId tid ) |
| 382 | { |
| 383 | Int i; |
| 384 | const UInt junk = 0xDEADBEEF; |
| 385 | |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 386 | vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); |
| 387 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 388 | vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)]; |
| 389 | vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)]; |
| 390 | vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)]; |
| 391 | vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)]; |
| 392 | vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)]; |
| 393 | vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)]; |
| 394 | vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; |
| 395 | vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; |
| 396 | vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)]; |
| 397 | vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; |
| 398 | |
| 399 | for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) |
| 400 | vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; |
| 401 | |
| 402 | vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; |
| 403 | vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; |
| 404 | vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)]; |
| 405 | vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)]; |
| 406 | vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)]; |
| 407 | vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)]; |
| 408 | vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)]; |
| 409 | vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)]; |
| 410 | vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)]; |
| 411 | |
| 412 | /* Fill it up with junk. */ |
| 413 | VG_(baseBlock)[VGOFF_(m_eax)] = junk; |
| 414 | VG_(baseBlock)[VGOFF_(m_ebx)] = junk; |
| 415 | VG_(baseBlock)[VGOFF_(m_ecx)] = junk; |
| 416 | VG_(baseBlock)[VGOFF_(m_edx)] = junk; |
| 417 | VG_(baseBlock)[VGOFF_(m_esi)] = junk; |
| 418 | VG_(baseBlock)[VGOFF_(m_edi)] = junk; |
| 419 | VG_(baseBlock)[VGOFF_(m_ebp)] = junk; |
| 420 | VG_(baseBlock)[VGOFF_(m_esp)] = junk; |
| 421 | VG_(baseBlock)[VGOFF_(m_eflags)] = junk; |
| 422 | VG_(baseBlock)[VGOFF_(m_eip)] = junk; |
| 423 | |
| 424 | for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) |
| 425 | VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk; |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 426 | |
| 427 | vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 428 | } |
| 429 | |
| 430 | |
| 431 | /* Run the thread tid for a while, and return a VG_TRC_* value to the |
| 432 | scheduler indicating what happened. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 433 | static |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 434 | UInt run_thread_for_a_while ( ThreadId tid ) |
| 435 | { |
| 436 | UInt trc = 0; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 437 | vg_assert(is_valid_tid(tid)); |
| 438 | vg_assert(vg_threads[tid].status == VgTs_Runnable); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 439 | vg_assert(VG_(bbs_to_go) > 0); |
| 440 | |
| 441 | VG_(load_thread_state) ( tid ); |
| 442 | if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) { |
| 443 | /* try this ... */ |
| 444 | trc = VG_(run_innerloop)(); |
| 445 | /* We get here if the client didn't take a fault. */ |
| 446 | } else { |
| 447 | /* We get here if the client took a fault, which caused our |
| 448 | signal handler to longjmp. */ |
| 449 | vg_assert(trc == 0); |
| 450 | trc = VG_TRC_UNRESUMABLE_SIGNAL; |
| 451 | } |
| 452 | VG_(save_thread_state) ( tid ); |
| 453 | return trc; |
| 454 | } |
| 455 | |
| 456 | |
| 457 | /* Increment the LRU epoch counter. */ |
| 458 | static |
| 459 | void increment_epoch ( void ) |
| 460 | { |
| 461 | VG_(current_epoch)++; |
| 462 | if (VG_(clo_verbosity) > 2) { |
| 463 | UInt tt_used, tc_used; |
| 464 | VG_(get_tt_tc_used) ( &tt_used, &tc_used ); |
| 465 | VG_(message)(Vg_UserMsg, |
| 466 | "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d", |
| 467 | VG_(bbs_done), |
| 468 | VG_(this_epoch_in_count), |
| 469 | VG_(this_epoch_in_osize), |
| 470 | VG_(this_epoch_in_tsize), |
| 471 | VG_(this_epoch_out_count), |
| 472 | VG_(this_epoch_out_osize), |
| 473 | VG_(this_epoch_out_tsize), |
| 474 | tt_used, tc_used |
| 475 | ); |
| 476 | } |
| 477 | VG_(this_epoch_in_count) = 0; |
| 478 | VG_(this_epoch_in_osize) = 0; |
| 479 | VG_(this_epoch_in_tsize) = 0; |
| 480 | VG_(this_epoch_out_count) = 0; |
| 481 | VG_(this_epoch_out_osize) = 0; |
| 482 | VG_(this_epoch_out_tsize) = 0; |
| 483 | } |
| 484 | |
| 485 | |
| 486 | /* Initialise the scheduler. Create a single "main" thread ready to |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 487 | run, with special ThreadId of one. This is called at startup; the |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 488 | caller takes care to park the client's state is parked in |
| 489 | VG_(baseBlock). |
| 490 | */ |
| 491 | void VG_(scheduler_init) ( void ) |
| 492 | { |
| 493 | Int i; |
| 494 | Addr startup_esp; |
| 495 | ThreadId tid_main; |
| 496 | |
| 497 | startup_esp = VG_(baseBlock)[VGOFF_(m_esp)]; |
| 498 | if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) { |
sewardj | 9a199dc | 2002-04-14 13:01:38 +0000 | [diff] [blame] | 499 | VG_(printf)("%%esp at startup = %p is not near %p; aborting\n", |
| 500 | (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 501 | VG_(panic)("unexpected %esp at startup"); |
| 502 | } |
| 503 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 504 | for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) { |
| 505 | vg_threads[i].status = VgTs_Empty; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 506 | vg_threads[i].stack_size = 0; |
| 507 | vg_threads[i].stack_base = (Addr)NULL; |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 508 | vg_threads[i].tid = i; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 509 | } |
| 510 | |
| 511 | for (i = 0; i < VG_N_WAITING_FDS; i++) |
| 512 | vg_waiting_fds[i].fd = -1; /* not in use */ |
| 513 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 514 | /* Assert this is thread zero, which has certain magic |
| 515 | properties. */ |
| 516 | tid_main = vg_alloc_ThreadState(); |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 517 | vg_assert(tid_main == 1); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 518 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 519 | vg_threads[tid_main].status = VgTs_Runnable; |
| 520 | vg_threads[tid_main].joiner = VG_INVALID_THREADID; |
| 521 | vg_threads[tid_main].associated_mx = NULL; |
| 522 | vg_threads[tid_main].associated_cv = NULL; |
| 523 | vg_threads[tid_main].retval = NULL; /* not important */ |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 524 | vg_threads[tid_main].stack_highest_word |
| 525 | = vg_threads[tid_main].m_esp /* -4 ??? */; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 526 | |
| 527 | /* Copy VG_(baseBlock) state to tid_main's slot. */ |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 528 | vg_tid_currently_in_baseBlock = tid_main; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 529 | VG_(save_thread_state) ( tid_main ); |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 530 | |
| 531 | /* So now ... */ |
| 532 | vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 533 | } |
| 534 | |
| 535 | |
| 536 | /* What if fd isn't a valid fd? */ |
| 537 | static |
| 538 | void set_fd_nonblocking ( Int fd ) |
| 539 | { |
| 540 | Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); |
| 541 | vg_assert(!VG_(is_kerror)(res)); |
| 542 | res |= VKI_O_NONBLOCK; |
| 543 | res = VG_(fcntl)( fd, VKI_F_SETFL, res ); |
| 544 | vg_assert(!VG_(is_kerror)(res)); |
| 545 | } |
| 546 | |
| 547 | static |
| 548 | void set_fd_blocking ( Int fd ) |
| 549 | { |
| 550 | Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); |
| 551 | vg_assert(!VG_(is_kerror)(res)); |
| 552 | res &= ~VKI_O_NONBLOCK; |
| 553 | res = VG_(fcntl)( fd, VKI_F_SETFL, res ); |
| 554 | vg_assert(!VG_(is_kerror)(res)); |
| 555 | } |
| 556 | |
| 557 | static |
| 558 | Bool fd_is_blockful ( Int fd ) |
| 559 | { |
| 560 | Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); |
| 561 | vg_assert(!VG_(is_kerror)(res)); |
| 562 | return (res & VKI_O_NONBLOCK) ? False : True; |
| 563 | } |
| 564 | |
| 565 | |
| 566 | |
| 567 | /* Do a purely thread-local request for tid, and put the result in its |
| 568 | %EDX, without changing its scheduling state in any way, nor that of |
| 569 | any other threads. Return True if so. |
| 570 | |
| 571 | If the request is non-trivial, return False; a more capable but |
| 572 | slower mechanism will deal with it. |
| 573 | */ |
| 574 | static |
| 575 | Bool maybe_do_trivial_clientreq ( ThreadId tid ) |
| 576 | { |
| 577 | # define SIMPLE_RETURN(vvv) \ |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 578 | { tst->m_edx = (vvv); \ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 579 | return True; \ |
| 580 | } |
| 581 | |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 582 | ThreadState* tst = &vg_threads[tid]; |
| 583 | UInt* arg = (UInt*)(tst->m_eax); |
| 584 | UInt req_no = arg[0]; |
| 585 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 586 | switch (req_no) { |
| 587 | case VG_USERREQ__MALLOC: |
| 588 | SIMPLE_RETURN( |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 589 | (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 590 | ); |
| 591 | case VG_USERREQ__BUILTIN_NEW: |
| 592 | SIMPLE_RETURN( |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 593 | (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 594 | ); |
| 595 | case VG_USERREQ__BUILTIN_VEC_NEW: |
| 596 | SIMPLE_RETURN( |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 597 | (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 598 | ); |
| 599 | case VG_USERREQ__FREE: |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 600 | VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 601 | SIMPLE_RETURN(0); /* irrelevant */ |
| 602 | case VG_USERREQ__BUILTIN_DELETE: |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 603 | VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 604 | SIMPLE_RETURN(0); /* irrelevant */ |
| 605 | case VG_USERREQ__BUILTIN_VEC_DELETE: |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 606 | VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 607 | SIMPLE_RETURN(0); /* irrelevant */ |
| 608 | case VG_USERREQ__CALLOC: |
| 609 | SIMPLE_RETURN( |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 610 | (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 611 | ); |
| 612 | case VG_USERREQ__REALLOC: |
| 613 | SIMPLE_RETURN( |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 614 | (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 615 | ); |
| 616 | case VG_USERREQ__MEMALIGN: |
| 617 | SIMPLE_RETURN( |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 618 | (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 619 | ); |
sewardj | 9650c99 | 2002-04-16 03:44:31 +0000 | [diff] [blame] | 620 | |
| 621 | /* These are heavily used. */ |
| 622 | case VG_USERREQ__PTHREAD_GET_THREADID: |
| 623 | SIMPLE_RETURN(tid); |
| 624 | case VG_USERREQ__RUNNING_ON_VALGRIND: |
| 625 | SIMPLE_RETURN(1); |
sewardj | 45b4b37 | 2002-04-16 22:50:32 +0000 | [diff] [blame] | 626 | case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL: |
| 627 | SIMPLE_RETURN(VG_(clo_trace_pthread_level)); |
sewardj | 9650c99 | 2002-04-16 03:44:31 +0000 | [diff] [blame] | 628 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 629 | default: |
| 630 | /* Too hard; wimp out. */ |
| 631 | return False; |
| 632 | } |
| 633 | # undef SIMPLE_RETURN |
| 634 | } |
| 635 | |
| 636 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 637 | /* vthread tid is returning from a signal handler; modify its |
| 638 | stack/regs accordingly. */ |
| 639 | static |
| 640 | void handle_signal_return ( ThreadId tid ) |
| 641 | { |
| 642 | Char msg_buf[100]; |
| 643 | Bool restart_blocked_syscalls; |
| 644 | |
| 645 | vg_assert(is_valid_tid(tid)); |
| 646 | |
| 647 | restart_blocked_syscalls = VG_(signal_returns)(tid); |
| 648 | |
| 649 | if (restart_blocked_syscalls) |
| 650 | /* Easy; we don't have to do anything. */ |
| 651 | return; |
| 652 | |
| 653 | if (vg_threads[tid].status == VgTs_WaitFD) { |
| 654 | vg_assert(vg_threads[tid].m_eax == __NR_read |
| 655 | || vg_threads[tid].m_eax == __NR_write); |
| 656 | /* read() or write() interrupted. Force a return with EINTR. */ |
| 657 | vg_threads[tid].m_eax = -VKI_EINTR; |
| 658 | vg_threads[tid].status = VgTs_Runnable; |
| 659 | if (VG_(clo_trace_sched)) { |
| 660 | VG_(sprintf)(msg_buf, |
| 661 | "read() / write() interrupted by signal; return EINTR" ); |
| 662 | print_sched_event(tid, msg_buf); |
| 663 | } |
| 664 | return; |
| 665 | } |
| 666 | |
| 667 | if (vg_threads[tid].status == VgTs_WaitFD) { |
| 668 | vg_assert(vg_threads[tid].m_eax == __NR_nanosleep); |
| 669 | /* We interrupted a nanosleep(). The right thing to do is to |
| 670 | write the unused time to nanosleep's second param and return |
| 671 | EINTR, but I'm too lazy for that. */ |
| 672 | return; |
| 673 | } |
| 674 | |
| 675 | /* All other cases? Just return. */ |
| 676 | } |
| 677 | |
| 678 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 679 | static |
| 680 | void sched_do_syscall ( ThreadId tid ) |
| 681 | { |
| 682 | UInt saved_eax; |
| 683 | UInt res, syscall_no; |
| 684 | UInt fd; |
| 685 | Bool might_block, assumed_nonblocking; |
| 686 | Bool orig_fd_blockness; |
| 687 | Char msg_buf[100]; |
| 688 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 689 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 690 | vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| 691 | |
| 692 | syscall_no = vg_threads[tid].m_eax; /* syscall number */ |
| 693 | |
| 694 | if (syscall_no == __NR_nanosleep) { |
| 695 | ULong t_now, t_awaken; |
| 696 | struct vki_timespec* req; |
| 697 | req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */ |
| 698 | t_now = VG_(read_microsecond_timer)(); |
| 699 | t_awaken |
| 700 | = t_now |
| 701 | + (ULong)1000000ULL * (ULong)(req->tv_sec) |
| 702 | + (ULong)( (UInt)(req->tv_nsec) / 1000 ); |
| 703 | vg_threads[tid].status = VgTs_Sleeping; |
| 704 | vg_threads[tid].awaken_at = t_awaken; |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 705 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 706 | VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu", |
| 707 | t_now, t_awaken-t_now); |
| 708 | print_sched_event(tid, msg_buf); |
| 709 | } |
| 710 | /* Force the scheduler to run something else for a while. */ |
| 711 | return; |
| 712 | } |
| 713 | |
| 714 | switch (syscall_no) { |
| 715 | case __NR_read: |
| 716 | case __NR_write: |
| 717 | assumed_nonblocking |
| 718 | = False; |
| 719 | might_block |
| 720 | = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */); |
| 721 | break; |
| 722 | default: |
| 723 | might_block = False; |
| 724 | assumed_nonblocking = True; |
| 725 | } |
| 726 | |
| 727 | if (assumed_nonblocking) { |
| 728 | /* We think it's non-blocking. Just do it in the normal way. */ |
| 729 | VG_(perform_assumed_nonblocking_syscall)(tid); |
| 730 | /* The thread is still runnable. */ |
| 731 | return; |
| 732 | } |
| 733 | |
| 734 | /* It might block. Take evasive action. */ |
| 735 | switch (syscall_no) { |
| 736 | case __NR_read: |
| 737 | case __NR_write: |
| 738 | fd = vg_threads[tid].m_ebx; break; |
| 739 | default: |
| 740 | vg_assert(3+3 == 7); |
| 741 | } |
| 742 | |
| 743 | /* Set the fd to nonblocking, and do the syscall, which will return |
| 744 | immediately, in order to lodge a request with the Linux kernel. |
| 745 | We later poll for I/O completion using select(). */ |
| 746 | |
| 747 | orig_fd_blockness = fd_is_blockful(fd); |
| 748 | set_fd_nonblocking(fd); |
| 749 | vg_assert(!fd_is_blockful(fd)); |
| 750 | VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */); |
| 751 | |
| 752 | /* This trashes the thread's %eax; we have to preserve it. */ |
| 753 | saved_eax = vg_threads[tid].m_eax; |
| 754 | KERNEL_DO_SYSCALL(tid,res); |
| 755 | |
| 756 | /* Restore original blockfulness of the fd. */ |
| 757 | if (orig_fd_blockness) |
| 758 | set_fd_blocking(fd); |
| 759 | else |
| 760 | set_fd_nonblocking(fd); |
| 761 | |
| 762 | if (res != -VKI_EWOULDBLOCK) { |
| 763 | /* It didn't block; it went through immediately. So finish off |
| 764 | in the normal way. Don't restore %EAX, since that now |
| 765 | (correctly) holds the result of the call. */ |
| 766 | VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); |
| 767 | /* We're still runnable. */ |
| 768 | vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| 769 | |
| 770 | } else { |
| 771 | |
| 772 | /* It would have blocked. First, restore %EAX to what it was |
| 773 | before our speculative call. */ |
| 774 | vg_threads[tid].m_eax = saved_eax; |
| 775 | /* Put this fd in a table of fds on which we are waiting for |
| 776 | completion. The arguments for select() later are constructed |
| 777 | from this table. */ |
| 778 | add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */); |
| 779 | /* Deschedule thread until an I/O completion happens. */ |
| 780 | vg_threads[tid].status = VgTs_WaitFD; |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 781 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 782 | VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd); |
| 783 | print_sched_event(tid, msg_buf); |
| 784 | } |
| 785 | |
| 786 | } |
| 787 | } |
| 788 | |
| 789 | |
| 790 | /* Find out which of the fds in vg_waiting_fds are now ready to go, by |
| 791 | making enquiries with select(), and mark them as ready. We have to |
| 792 | wait for the requesting threads to fall into the the WaitFD state |
| 793 | before we can actually finally deliver the results, so this |
| 794 | procedure doesn't do that; complete_blocked_syscalls() does it. |
| 795 | |
| 796 | It might seem odd that a thread which has done a blocking syscall |
| 797 | is not in WaitFD state; the way this can happen is if it initially |
| 798 | becomes WaitFD, but then a signal is delivered to it, so it becomes |
| 799 | Runnable for a while. In this case we have to wait for the |
| 800 | sighandler to return, whereupon the WaitFD state is resumed, and |
| 801 | only at that point can the I/O result be delivered to it. However, |
| 802 | this point may be long after the fd is actually ready. |
| 803 | |
| 804 | So, poll_for_ready_fds() merely detects fds which are ready. |
| 805 | complete_blocked_syscalls() does the second half of the trick, |
| 806 | possibly much later: it delivers the results from ready fds to |
| 807 | threads in WaitFD state. |
| 808 | */ |
sewardj | 9a199dc | 2002-04-14 13:01:38 +0000 | [diff] [blame] | 809 | static |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 810 | void poll_for_ready_fds ( void ) |
| 811 | { |
| 812 | vki_ksigset_t saved_procmask; |
| 813 | vki_fd_set readfds; |
| 814 | vki_fd_set writefds; |
| 815 | vki_fd_set exceptfds; |
| 816 | struct vki_timeval timeout; |
| 817 | Int fd, fd_max, i, n_ready, syscall_no, n_ok; |
| 818 | ThreadId tid; |
| 819 | Bool rd_ok, wr_ok, ex_ok; |
| 820 | Char msg_buf[100]; |
| 821 | |
sewardj | e462e20 | 2002-04-13 04:09:07 +0000 | [diff] [blame] | 822 | struct vki_timespec* rem; |
| 823 | ULong t_now; |
| 824 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 825 | /* Awaken any sleeping threads whose sleep has expired. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 826 | for (tid = 1; tid < VG_N_THREADS; tid++) |
| 827 | if (vg_threads[tid].status == VgTs_Sleeping) |
| 828 | break; |
| 829 | |
| 830 | /* Avoid pointless calls to VG_(read_microsecond_timer). */ |
| 831 | if (tid < VG_N_THREADS) { |
| 832 | t_now = VG_(read_microsecond_timer)(); |
| 833 | for (tid = 1; tid < VG_N_THREADS; tid++) { |
| 834 | if (vg_threads[tid].status != VgTs_Sleeping) |
| 835 | continue; |
| 836 | if (t_now >= vg_threads[tid].awaken_at) { |
| 837 | /* Resume this thread. Set to zero the remaining-time |
| 838 | (second) arg of nanosleep, since it's used up all its |
| 839 | time. */ |
| 840 | vg_assert(vg_threads[tid].m_eax == __NR_nanosleep); |
| 841 | rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */ |
| 842 | if (rem != NULL) { |
| 843 | rem->tv_sec = 0; |
| 844 | rem->tv_nsec = 0; |
| 845 | } |
| 846 | /* Make the syscall return 0 (success). */ |
| 847 | vg_threads[tid].m_eax = 0; |
| 848 | /* Reschedule this thread. */ |
| 849 | vg_threads[tid].status = VgTs_Runnable; |
| 850 | if (VG_(clo_trace_sched)) { |
| 851 | VG_(sprintf)(msg_buf, "at %lu: nanosleep done", |
| 852 | t_now); |
| 853 | print_sched_event(tid, msg_buf); |
| 854 | } |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 855 | } |
| 856 | } |
| 857 | } |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 858 | |
sewardj | e462e20 | 2002-04-13 04:09:07 +0000 | [diff] [blame] | 859 | /* And look for threads waiting on file descriptors which are now |
| 860 | ready for I/O.*/ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 861 | timeout.tv_sec = 0; |
| 862 | timeout.tv_usec = 0; |
| 863 | |
| 864 | VKI_FD_ZERO(&readfds); |
| 865 | VKI_FD_ZERO(&writefds); |
| 866 | VKI_FD_ZERO(&exceptfds); |
| 867 | fd_max = -1; |
| 868 | for (i = 0; i < VG_N_WAITING_FDS; i++) { |
| 869 | if (vg_waiting_fds[i].fd == -1 /* not in use */) |
| 870 | continue; |
| 871 | if (vg_waiting_fds[i].ready /* already ready? */) |
| 872 | continue; |
| 873 | fd = vg_waiting_fds[i].fd; |
| 874 | /* VG_(printf)("adding QUERY for fd %d\n", fd); */ |
sewardj | e462e20 | 2002-04-13 04:09:07 +0000 | [diff] [blame] | 875 | vg_assert(fd >= 0); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 876 | if (fd > fd_max) |
| 877 | fd_max = fd; |
| 878 | tid = vg_waiting_fds[i].tid; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 879 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 880 | syscall_no = vg_waiting_fds[i].syscall_no; |
| 881 | switch (syscall_no) { |
| 882 | case __NR_read: |
| 883 | VKI_FD_SET(fd, &readfds); break; |
| 884 | case __NR_write: |
| 885 | VKI_FD_SET(fd, &writefds); break; |
| 886 | default: |
| 887 | VG_(panic)("poll_for_ready_fds: unexpected syscall"); |
| 888 | /*NOTREACHED*/ |
| 889 | break; |
| 890 | } |
| 891 | } |
| 892 | |
sewardj | e462e20 | 2002-04-13 04:09:07 +0000 | [diff] [blame] | 893 | /* Short cut: if no fds are waiting, give up now. */ |
| 894 | if (fd_max == -1) |
| 895 | return; |
| 896 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 897 | /* BLOCK ALL SIGNALS. We don't want the complication of select() |
| 898 | getting interrupted. */ |
| 899 | VG_(block_all_host_signals)( &saved_procmask ); |
| 900 | |
| 901 | n_ready = VG_(select) |
| 902 | ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout); |
| 903 | if (VG_(is_kerror)(n_ready)) { |
| 904 | VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready); |
| 905 | VG_(panic)("poll_for_ready_fds: select failed?!"); |
| 906 | /*NOTREACHED*/ |
| 907 | } |
| 908 | |
| 909 | /* UNBLOCK ALL SIGNALS */ |
| 910 | VG_(restore_host_signals)( &saved_procmask ); |
| 911 | |
| 912 | /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */ |
| 913 | |
| 914 | if (n_ready == 0) |
| 915 | return; |
| 916 | |
| 917 | /* Inspect all the fds we know about, and handle any completions that |
| 918 | have happened. */ |
| 919 | /* |
| 920 | VG_(printf)("\n\n"); |
| 921 | for (fd = 0; fd < 100; fd++) |
| 922 | if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) { |
| 923 | VG_(printf)("X"); } else { VG_(printf)("."); }; |
| 924 | VG_(printf)("\n\nfd_max = %d\n", fd_max); |
| 925 | */ |
| 926 | |
| 927 | for (fd = 0; fd <= fd_max; fd++) { |
| 928 | rd_ok = VKI_FD_ISSET(fd, &readfds); |
| 929 | wr_ok = VKI_FD_ISSET(fd, &writefds); |
| 930 | ex_ok = VKI_FD_ISSET(fd, &exceptfds); |
| 931 | |
| 932 | n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0); |
| 933 | if (n_ok == 0) |
| 934 | continue; |
| 935 | if (n_ok > 1) { |
| 936 | VG_(printf)("offending fd = %d\n", fd); |
| 937 | VG_(panic)("poll_for_ready_fds: multiple events on fd"); |
| 938 | } |
| 939 | |
| 940 | /* An I/O event completed for fd. Find the thread which |
| 941 | requested this. */ |
| 942 | for (i = 0; i < VG_N_WAITING_FDS; i++) { |
| 943 | if (vg_waiting_fds[i].fd == -1 /* not in use */) |
| 944 | continue; |
| 945 | if (vg_waiting_fds[i].fd == fd) |
| 946 | break; |
| 947 | } |
| 948 | |
| 949 | /* And a bit more paranoia ... */ |
| 950 | vg_assert(i >= 0 && i < VG_N_WAITING_FDS); |
| 951 | |
| 952 | /* Mark the fd as ready. */ |
| 953 | vg_assert(! vg_waiting_fds[i].ready); |
| 954 | vg_waiting_fds[i].ready = True; |
| 955 | } |
| 956 | } |
| 957 | |
| 958 | |
| 959 | /* See comment attached to poll_for_ready_fds() for explaination. */ |
sewardj | 9a199dc | 2002-04-14 13:01:38 +0000 | [diff] [blame] | 960 | static |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 961 | void complete_blocked_syscalls ( void ) |
| 962 | { |
| 963 | Int fd, i, res, syscall_no; |
| 964 | ThreadId tid; |
| 965 | Char msg_buf[100]; |
| 966 | |
| 967 | /* Inspect all the outstanding fds we know about. */ |
| 968 | |
| 969 | for (i = 0; i < VG_N_WAITING_FDS; i++) { |
| 970 | if (vg_waiting_fds[i].fd == -1 /* not in use */) |
| 971 | continue; |
| 972 | if (! vg_waiting_fds[i].ready) |
| 973 | continue; |
| 974 | |
| 975 | fd = vg_waiting_fds[i].fd; |
| 976 | tid = vg_waiting_fds[i].tid; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 977 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 978 | |
| 979 | /* The thread actually has to be waiting for the I/O event it |
| 980 | requested before we can deliver the result! */ |
| 981 | if (vg_threads[tid].status != VgTs_WaitFD) |
| 982 | continue; |
| 983 | |
| 984 | /* Ok, actually do it! We can safely use %EAX as the syscall |
| 985 | number, because the speculative call made by |
| 986 | sched_do_syscall() doesn't change %EAX in the case where the |
| 987 | call would have blocked. */ |
| 988 | |
| 989 | syscall_no = vg_waiting_fds[i].syscall_no; |
| 990 | vg_assert(syscall_no == vg_threads[tid].m_eax); |
| 991 | KERNEL_DO_SYSCALL(tid,res); |
| 992 | VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); |
| 993 | |
| 994 | /* Reschedule. */ |
| 995 | vg_threads[tid].status = VgTs_Runnable; |
| 996 | /* Mark slot as no longer in use. */ |
| 997 | vg_waiting_fds[i].fd = -1; |
| 998 | /* pp_sched_status(); */ |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 999 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1000 | VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd); |
| 1001 | print_sched_event(tid, msg_buf); |
| 1002 | } |
| 1003 | } |
| 1004 | } |
| 1005 | |
| 1006 | |
| 1007 | static |
| 1008 | void nanosleep_for_a_while ( void ) |
| 1009 | { |
| 1010 | Int res; |
| 1011 | struct vki_timespec req; |
| 1012 | struct vki_timespec rem; |
| 1013 | req.tv_sec = 0; |
| 1014 | req.tv_nsec = 20 * 1000 * 1000; |
| 1015 | res = VG_(nanosleep)( &req, &rem ); |
| 1016 | /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */ |
| 1017 | vg_assert(res == 0); |
| 1018 | } |
| 1019 | |
| 1020 | |
| 1021 | /* --------------------------------------------------------------------- |
| 1022 | The scheduler proper. |
| 1023 | ------------------------------------------------------------------ */ |
| 1024 | |
| 1025 | /* Run user-space threads until either |
| 1026 | * Deadlock occurs |
| 1027 | * One thread asks to shutdown Valgrind |
| 1028 | * The specified number of basic blocks has gone by. |
| 1029 | */ |
| 1030 | VgSchedReturnCode VG_(scheduler) ( void ) |
| 1031 | { |
| 1032 | ThreadId tid, tid_next; |
| 1033 | UInt trc; |
| 1034 | UInt dispatch_ctr_SAVED; |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1035 | Int request_code, done_this_time, n_in_fdwait_or_sleep; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1036 | Char msg_buf[100]; |
| 1037 | Addr trans_addr; |
| 1038 | |
| 1039 | /* For the LRU structures, records when the epoch began. */ |
| 1040 | ULong lru_epoch_started_at = 0; |
| 1041 | |
| 1042 | /* Start with the root thread. tid in general indicates the |
| 1043 | currently runnable/just-finished-running thread. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1044 | tid = 1; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1045 | |
| 1046 | /* This is the top level scheduler loop. It falls into three |
| 1047 | phases. */ |
| 1048 | while (True) { |
| 1049 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1050 | /* ======================= Phase 0 of 3 ======================= |
| 1051 | Be paranoid. Always a good idea. */ |
| 1052 | scheduler_sanity(); |
| 1053 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1054 | /* ======================= Phase 1 of 3 ======================= |
| 1055 | Handle I/O completions and signals. This may change the |
| 1056 | status of various threads. Then select a new thread to run, |
| 1057 | or declare deadlock, or sleep if there are no runnable |
| 1058 | threads but some are blocked on I/O. */ |
| 1059 | |
| 1060 | /* Age the LRU structures if an epoch has been completed. */ |
| 1061 | if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) { |
| 1062 | lru_epoch_started_at = VG_(bbs_done); |
| 1063 | increment_epoch(); |
| 1064 | } |
| 1065 | |
| 1066 | /* Was a debug-stop requested? */ |
| 1067 | if (VG_(bbs_to_go) == 0) |
| 1068 | goto debug_stop; |
| 1069 | |
| 1070 | /* Do the following loop until a runnable thread is found, or |
| 1071 | deadlock is detected. */ |
| 1072 | while (True) { |
| 1073 | |
| 1074 | /* For stats purposes only. */ |
| 1075 | VG_(num_scheduling_events_MAJOR) ++; |
| 1076 | |
| 1077 | /* See if any I/O operations which we were waiting for have |
| 1078 | completed, and, if so, make runnable the relevant waiting |
| 1079 | threads. */ |
| 1080 | poll_for_ready_fds(); |
| 1081 | complete_blocked_syscalls(); |
| 1082 | |
| 1083 | /* See if there are any signals which need to be delivered. If |
| 1084 | so, choose thread(s) to deliver them to, and build signal |
| 1085 | delivery frames on those thread(s) stacks. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1086 | |
| 1087 | /* Be careful about delivering signals to a thread waiting |
| 1088 | for a mutex. In particular, when the handler is running, |
| 1089 | that thread is temporarily apparently-not-waiting for the |
| 1090 | mutex, so if it is unlocked by another thread whilst the |
| 1091 | handler is running, this thread is not informed. When the |
| 1092 | handler returns, the thread resumes waiting on the mutex, |
| 1093 | even if, as a result, it has missed the unlocking of it. |
| 1094 | Potential deadlock. This sounds all very strange, but the |
| 1095 | POSIX standard appears to require this behaviour. */ |
| 1096 | VG_(deliver_signals)( 1 /*HACK*/ ); |
| 1097 | VG_(do_sanity_checks)( 1 /*HACK*/, False ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1098 | |
| 1099 | /* Try and find a thread (tid) to run. */ |
| 1100 | tid_next = tid; |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1101 | n_in_fdwait_or_sleep = 0; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1102 | while (True) { |
| 1103 | tid_next++; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1104 | if (tid_next >= VG_N_THREADS) tid_next = 1; |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1105 | if (vg_threads[tid_next].status == VgTs_WaitFD |
| 1106 | || vg_threads[tid_next].status == VgTs_Sleeping) |
| 1107 | n_in_fdwait_or_sleep ++; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1108 | if (vg_threads[tid_next].status == VgTs_Runnable) |
| 1109 | break; /* We can run this one. */ |
| 1110 | if (tid_next == tid) |
| 1111 | break; /* been all the way round */ |
| 1112 | } |
| 1113 | tid = tid_next; |
| 1114 | |
| 1115 | if (vg_threads[tid].status == VgTs_Runnable) { |
| 1116 | /* Found a suitable candidate. Fall out of this loop, so |
| 1117 | we can advance to stage 2 of the scheduler: actually |
| 1118 | running the thread. */ |
| 1119 | break; |
| 1120 | } |
| 1121 | |
| 1122 | /* We didn't find a runnable thread. Now what? */ |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1123 | if (n_in_fdwait_or_sleep == 0) { |
| 1124 | /* No runnable threads and no prospect of any appearing |
| 1125 | even if we wait for an arbitrary length of time. In |
| 1126 | short, we have a deadlock. */ |
sewardj | 15a43e1 | 2002-04-17 19:35:12 +0000 | [diff] [blame] | 1127 | VG_(pp_sched_status)(); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1128 | return VgSrc_Deadlock; |
| 1129 | } |
| 1130 | |
| 1131 | /* At least one thread is in a fd-wait state. Delay for a |
| 1132 | while, and go round again, in the hope that eventually a |
| 1133 | thread becomes runnable. */ |
| 1134 | nanosleep_for_a_while(); |
| 1135 | // pp_sched_status(); |
| 1136 | // VG_(printf)(".\n"); |
| 1137 | } |
| 1138 | |
| 1139 | |
| 1140 | /* ======================= Phase 2 of 3 ======================= |
| 1141 | Wahey! We've finally decided that thread tid is runnable, so |
| 1142 | we now do that. Run it for as much of a quanta as possible. |
| 1143 | Trivial requests are handled and the thread continues. The |
| 1144 | aim is not to do too many of Phase 1 since it is expensive. */ |
| 1145 | |
| 1146 | if (0) |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1147 | VG_(printf)("SCHED: tid %d\n", tid); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1148 | |
| 1149 | /* Figure out how many bbs to ask vg_run_innerloop to do. Note |
| 1150 | that it decrements the counter before testing it for zero, so |
| 1151 | that if VG_(dispatch_ctr) is set to N you get at most N-1 |
| 1152 | iterations. Also this means that VG_(dispatch_ctr) must |
| 1153 | exceed zero before entering the innerloop. Also also, the |
| 1154 | decrement is done before the bb is actually run, so you |
| 1155 | always get at least one decrement even if nothing happens. |
| 1156 | */ |
| 1157 | if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM) |
| 1158 | VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1; |
| 1159 | else |
| 1160 | VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1; |
| 1161 | |
| 1162 | /* ... and remember what we asked for. */ |
| 1163 | dispatch_ctr_SAVED = VG_(dispatch_ctr); |
| 1164 | |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1165 | /* paranoia ... */ |
| 1166 | vg_assert(vg_threads[tid].tid == tid); |
| 1167 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1168 | /* Actually run thread tid. */ |
| 1169 | while (True) { |
| 1170 | |
| 1171 | /* For stats purposes only. */ |
| 1172 | VG_(num_scheduling_events_MINOR) ++; |
| 1173 | |
| 1174 | if (0) |
| 1175 | VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs", |
| 1176 | tid, VG_(dispatch_ctr) - 1 ); |
| 1177 | |
| 1178 | trc = run_thread_for_a_while ( tid ); |
| 1179 | |
| 1180 | /* Deal quickly with trivial scheduling events, and resume the |
| 1181 | thread. */ |
| 1182 | |
| 1183 | if (trc == VG_TRC_INNER_FASTMISS) { |
| 1184 | vg_assert(VG_(dispatch_ctr) > 0); |
| 1185 | |
| 1186 | /* Trivial event. Miss in the fast-cache. Do a full |
| 1187 | lookup for it. */ |
| 1188 | trans_addr |
| 1189 | = VG_(search_transtab) ( vg_threads[tid].m_eip ); |
| 1190 | if (trans_addr == (Addr)0) { |
| 1191 | /* Not found; we need to request a translation. */ |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1192 | create_translation_for( tid, vg_threads[tid].m_eip ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1193 | trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip ); |
| 1194 | if (trans_addr == (Addr)0) |
| 1195 | VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry"); |
| 1196 | } |
| 1197 | continue; /* with this thread */ |
| 1198 | } |
| 1199 | |
| 1200 | if (trc == VG_TRC_EBP_JMP_CLIENTREQ) { |
| 1201 | Bool is_triv = maybe_do_trivial_clientreq(tid); |
| 1202 | if (is_triv) { |
| 1203 | /* NOTE: a trivial request is something like a call to |
| 1204 | malloc() or free(). It DOES NOT change the |
| 1205 | Runnability of this thread nor the status of any |
| 1206 | other thread; it is purely thread-local. */ |
| 1207 | continue; /* with this thread */ |
| 1208 | } |
| 1209 | } |
| 1210 | |
| 1211 | /* It's a non-trivial event. Give up running this thread and |
| 1212 | handle things the expensive way. */ |
| 1213 | break; |
| 1214 | } |
| 1215 | |
| 1216 | /* ======================= Phase 3 of 3 ======================= |
| 1217 | Handle non-trivial thread requests, mostly pthread stuff. */ |
| 1218 | |
| 1219 | /* Ok, we've fallen out of the dispatcher for a |
| 1220 | non-completely-trivial reason. First, update basic-block |
| 1221 | counters. */ |
| 1222 | |
| 1223 | done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1; |
| 1224 | vg_assert(done_this_time >= 0); |
| 1225 | VG_(bbs_to_go) -= (ULong)done_this_time; |
| 1226 | VG_(bbs_done) += (ULong)done_this_time; |
| 1227 | |
| 1228 | if (0 && trc != VG_TRC_INNER_FASTMISS) |
| 1229 | VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d", |
| 1230 | tid, done_this_time, (Int)trc ); |
| 1231 | |
| 1232 | if (0 && trc != VG_TRC_INNER_FASTMISS) |
| 1233 | VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s", |
| 1234 | tid, VG_(bbs_done), |
| 1235 | name_of_sched_event(trc) ); |
sewardj | 9d1b5d3 | 2002-04-17 19:40:49 +0000 | [diff] [blame] | 1236 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1237 | /* Examine the thread's return code to figure out why it |
| 1238 | stopped, and handle requests. */ |
| 1239 | |
| 1240 | switch (trc) { |
| 1241 | |
| 1242 | case VG_TRC_INNER_FASTMISS: |
| 1243 | VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS"); |
| 1244 | /*NOTREACHED*/ |
| 1245 | break; |
| 1246 | |
| 1247 | case VG_TRC_INNER_COUNTERZERO: |
| 1248 | /* Timeslice is out. Let a new thread be scheduled, |
| 1249 | simply by doing nothing, causing us to arrive back at |
| 1250 | Phase 1. */ |
| 1251 | if (VG_(bbs_to_go) == 0) { |
| 1252 | goto debug_stop; |
| 1253 | } |
| 1254 | vg_assert(VG_(dispatch_ctr) == 0); |
| 1255 | break; |
| 1256 | |
| 1257 | case VG_TRC_UNRESUMABLE_SIGNAL: |
| 1258 | /* It got a SIGSEGV/SIGBUS, which we need to deliver right |
| 1259 | away. Again, do nothing, so we wind up back at Phase |
| 1260 | 1, whereupon the signal will be "delivered". */ |
| 1261 | break; |
| 1262 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1263 | case VG_TRC_EBP_JMP_SYSCALL: |
| 1264 | /* Do a syscall for the vthread tid. This could cause it |
| 1265 | to become non-runnable. */ |
| 1266 | sched_do_syscall(tid); |
| 1267 | break; |
| 1268 | |
| 1269 | case VG_TRC_EBP_JMP_CLIENTREQ: |
| 1270 | /* Do a client request for the vthread tid. Note that |
| 1271 | some requests will have been handled by |
| 1272 | maybe_do_trivial_clientreq(), so we don't expect to see |
| 1273 | those here. |
| 1274 | */ |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1275 | /* The thread's %EAX points at an arg block, the first |
| 1276 | word of which is the request code. */ |
| 1277 | request_code = ((UInt*)(vg_threads[tid].m_eax))[0]; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1278 | if (0) { |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1279 | VG_(sprintf)(msg_buf, "request 0x%x", request_code ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1280 | print_sched_event(tid, msg_buf); |
| 1281 | } |
| 1282 | /* Do a non-trivial client request for thread tid. tid's |
| 1283 | %EAX points to a short vector of argument words, the |
| 1284 | first of which is the request code. The result of the |
| 1285 | request is put in tid's %EDX. Alternatively, perhaps |
| 1286 | the request causes tid to become non-runnable and/or |
| 1287 | other blocked threads become runnable. In general we |
| 1288 | can and often do mess with the state of arbitrary |
| 1289 | threads at this point. */ |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 1290 | if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) { |
| 1291 | return VgSrc_Shutdown; |
| 1292 | } else { |
| 1293 | do_nontrivial_clientreq(tid); |
| 1294 | } |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1295 | break; |
| 1296 | |
| 1297 | default: |
| 1298 | VG_(printf)("\ntrc = %d\n", trc); |
| 1299 | VG_(panic)("VG_(scheduler), phase 3: " |
| 1300 | "unexpected thread return code"); |
| 1301 | /* NOTREACHED */ |
| 1302 | break; |
| 1303 | |
| 1304 | } /* switch (trc) */ |
| 1305 | |
| 1306 | /* That completes Phase 3 of 3. Return now to the top of the |
| 1307 | main scheduler loop, to Phase 1 of 3. */ |
| 1308 | |
| 1309 | } /* top-level scheduler loop */ |
| 1310 | |
| 1311 | |
| 1312 | /* NOTREACHED */ |
| 1313 | VG_(panic)("scheduler: post-main-loop ?!"); |
| 1314 | /* NOTREACHED */ |
| 1315 | |
| 1316 | debug_stop: |
| 1317 | /* If we exited because of a debug stop, print the translation |
| 1318 | of the last block executed -- by translating it again, and |
| 1319 | throwing away the result. */ |
| 1320 | VG_(printf)( |
| 1321 | "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n"); |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1322 | VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1323 | VG_(printf)("\n"); |
| 1324 | VG_(printf)( |
| 1325 | "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n"); |
| 1326 | |
| 1327 | return VgSrc_BbsDone; |
| 1328 | } |
| 1329 | |
| 1330 | |
| 1331 | /* --------------------------------------------------------------------- |
| 1332 | The pthread implementation. |
| 1333 | ------------------------------------------------------------------ */ |
| 1334 | |
| 1335 | #include <pthread.h> |
| 1336 | #include <errno.h> |
| 1337 | |
| 1338 | #if !defined(PTHREAD_STACK_MIN) |
| 1339 | # define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB) |
| 1340 | #endif |
| 1341 | |
| 1342 | /* /usr/include/bits/pthreadtypes.h: |
| 1343 | typedef unsigned long int pthread_t; |
| 1344 | */ |
| 1345 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1346 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1347 | /* ----------------------------------------------------------- |
| 1348 | Thread CREATION, JOINAGE and CANCELLATION. |
| 1349 | -------------------------------------------------------- */ |
| 1350 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1351 | static |
| 1352 | void do_pthread_cancel ( ThreadId tid_canceller, |
| 1353 | pthread_t tid_cancellee ) |
| 1354 | { |
| 1355 | Char msg_buf[100]; |
| 1356 | /* We want make is appear that this thread has returned to |
| 1357 | do_pthread_create_bogusRA with PTHREAD_CANCELED as the |
| 1358 | return value. So: simple: put PTHREAD_CANCELED into %EAX |
| 1359 | and &do_pthread_create_bogusRA into %EIP and keep going! */ |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1360 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1361 | VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller); |
| 1362 | print_sched_event(tid_cancellee, msg_buf); |
| 1363 | } |
| 1364 | vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED; |
sewardj | bc5b99f | 2002-04-13 00:08:51 +0000 | [diff] [blame] | 1365 | vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1366 | vg_threads[tid_cancellee].status = VgTs_Runnable; |
| 1367 | } |
| 1368 | |
| 1369 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1370 | static |
| 1371 | void do_pthread_exit ( ThreadId tid, void* retval ) |
| 1372 | { |
| 1373 | Char msg_buf[100]; |
| 1374 | /* We want make is appear that this thread has returned to |
| 1375 | do_pthread_create_bogusRA with retval as the |
| 1376 | return value. So: simple: put retval into %EAX |
| 1377 | and &do_pthread_create_bogusRA into %EIP and keep going! */ |
| 1378 | if (VG_(clo_trace_sched)) { |
| 1379 | VG_(sprintf)(msg_buf, "exiting with %p", retval); |
| 1380 | print_sched_event(tid, msg_buf); |
| 1381 | } |
| 1382 | vg_threads[tid].m_eax = (UInt)retval; |
| 1383 | vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA); |
| 1384 | vg_threads[tid].status = VgTs_Runnable; |
| 1385 | } |
| 1386 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1387 | |
| 1388 | /* Thread tid is exiting, by returning from the function it was |
sewardj | bc5b99f | 2002-04-13 00:08:51 +0000 | [diff] [blame] | 1389 | created with. Or possibly due to pthread_exit or cancellation. |
| 1390 | The main complication here is to resume any thread waiting to join |
| 1391 | with this one. */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1392 | static |
sewardj | bc5b99f | 2002-04-13 00:08:51 +0000 | [diff] [blame] | 1393 | void handle_pthread_return ( ThreadId tid, void* retval ) |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1394 | { |
| 1395 | ThreadId jnr; /* joiner, the thread calling pthread_join. */ |
| 1396 | UInt* jnr_args; |
| 1397 | void** jnr_thread_return; |
| 1398 | Char msg_buf[100]; |
| 1399 | |
| 1400 | /* Mark it as not in use. Leave the stack in place so the next |
| 1401 | user of this slot doesn't reallocate it. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1402 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1403 | vg_assert(vg_threads[tid].status != VgTs_Empty); |
| 1404 | |
sewardj | bc5b99f | 2002-04-13 00:08:51 +0000 | [diff] [blame] | 1405 | vg_threads[tid].retval = retval; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1406 | |
| 1407 | if (vg_threads[tid].joiner == VG_INVALID_THREADID) { |
| 1408 | /* No one has yet done a join on me */ |
| 1409 | vg_threads[tid].status = VgTs_WaitJoiner; |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1410 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1411 | VG_(sprintf)(msg_buf, |
| 1412 | "root fn returns, waiting for a call pthread_join(%d)", |
| 1413 | tid); |
| 1414 | print_sched_event(tid, msg_buf); |
| 1415 | } |
| 1416 | } else { |
| 1417 | /* Some is waiting; make their join call return with success, |
| 1418 | putting my exit code in the place specified by the caller's |
| 1419 | thread_return param. This is all very horrible, since we |
| 1420 | need to consult the joiner's arg block -- pointed to by its |
| 1421 | %EAX -- in order to extract the 2nd param of its pthread_join |
| 1422 | call. TODO: free properly the slot (also below). |
| 1423 | */ |
| 1424 | jnr = vg_threads[tid].joiner; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1425 | vg_assert(is_valid_tid(jnr)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1426 | vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee); |
| 1427 | jnr_args = (UInt*)vg_threads[jnr].m_eax; |
| 1428 | jnr_thread_return = (void**)(jnr_args[2]); |
| 1429 | if (jnr_thread_return != NULL) |
| 1430 | *jnr_thread_return = vg_threads[tid].retval; |
| 1431 | vg_threads[jnr].m_edx = 0; /* success */ |
| 1432 | vg_threads[jnr].status = VgTs_Runnable; |
| 1433 | vg_threads[tid].status = VgTs_Empty; /* bye! */ |
sewardj | 75fe189 | 2002-04-14 02:46:33 +0000 | [diff] [blame] | 1434 | if (VG_(clo_instrument) && tid != 0) |
| 1435 | VGM_(make_noaccess)( vg_threads[tid].stack_base, |
| 1436 | vg_threads[tid].stack_size ); |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1437 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1438 | VG_(sprintf)(msg_buf, |
| 1439 | "root fn returns, to find a waiting pthread_join(%d)", tid); |
| 1440 | print_sched_event(tid, msg_buf); |
| 1441 | VG_(sprintf)(msg_buf, |
| 1442 | "my pthread_join(%d) returned; resuming", tid); |
| 1443 | print_sched_event(jnr, msg_buf); |
| 1444 | } |
| 1445 | } |
| 1446 | |
| 1447 | /* Return value is irrelevant; this thread will not get |
| 1448 | rescheduled. */ |
| 1449 | } |
| 1450 | |
| 1451 | |
| 1452 | static |
| 1453 | void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return ) |
| 1454 | { |
| 1455 | Char msg_buf[100]; |
| 1456 | |
| 1457 | /* jee, the joinee, is the thread specified as an arg in thread |
| 1458 | tid's call to pthread_join. So tid is the join-er. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1459 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1460 | vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| 1461 | |
| 1462 | if (jee == tid) { |
| 1463 | vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */ |
| 1464 | vg_threads[tid].status = VgTs_Runnable; |
| 1465 | return; |
| 1466 | } |
| 1467 | |
| 1468 | if (jee < 0 |
| 1469 | || jee >= VG_N_THREADS |
| 1470 | || vg_threads[jee].status == VgTs_Empty) { |
| 1471 | /* Invalid thread to join to. */ |
| 1472 | vg_threads[tid].m_edx = EINVAL; |
| 1473 | vg_threads[tid].status = VgTs_Runnable; |
| 1474 | return; |
| 1475 | } |
| 1476 | |
| 1477 | if (vg_threads[jee].joiner != VG_INVALID_THREADID) { |
| 1478 | /* Someone already did join on this thread */ |
| 1479 | vg_threads[tid].m_edx = EINVAL; |
| 1480 | vg_threads[tid].status = VgTs_Runnable; |
| 1481 | return; |
| 1482 | } |
| 1483 | |
| 1484 | /* if (vg_threads[jee].detached) ... */ |
| 1485 | |
| 1486 | /* Perhaps the joinee has already finished? If so return |
| 1487 | immediately with its return code, and free up the slot. TODO: |
| 1488 | free it properly (also above). */ |
| 1489 | if (vg_threads[jee].status == VgTs_WaitJoiner) { |
| 1490 | vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID); |
| 1491 | vg_threads[tid].m_edx = 0; /* success */ |
| 1492 | if (thread_return != NULL) |
| 1493 | *thread_return = vg_threads[jee].retval; |
| 1494 | vg_threads[tid].status = VgTs_Runnable; |
| 1495 | vg_threads[jee].status = VgTs_Empty; /* bye! */ |
sewardj | 75fe189 | 2002-04-14 02:46:33 +0000 | [diff] [blame] | 1496 | if (VG_(clo_instrument) && jee != 0) |
| 1497 | VGM_(make_noaccess)( vg_threads[jee].stack_base, |
| 1498 | vg_threads[jee].stack_size ); |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1499 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1500 | VG_(sprintf)(msg_buf, |
| 1501 | "someone called pthread_join() on me; bye!"); |
| 1502 | print_sched_event(jee, msg_buf); |
| 1503 | VG_(sprintf)(msg_buf, |
| 1504 | "my pthread_join(%d) returned immediately", |
| 1505 | jee ); |
| 1506 | print_sched_event(tid, msg_buf); |
| 1507 | } |
| 1508 | return; |
| 1509 | } |
| 1510 | |
| 1511 | /* Ok, so we'll have to wait on jee. */ |
| 1512 | vg_threads[jee].joiner = tid; |
| 1513 | vg_threads[tid].status = VgTs_WaitJoinee; |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1514 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1515 | VG_(sprintf)(msg_buf, |
| 1516 | "blocking on call of pthread_join(%d)", jee ); |
| 1517 | print_sched_event(tid, msg_buf); |
| 1518 | } |
| 1519 | /* So tid's join call does not return just now. */ |
| 1520 | } |
| 1521 | |
| 1522 | |
| 1523 | static |
| 1524 | void do_pthread_create ( ThreadId parent_tid, |
| 1525 | pthread_t* thread, |
| 1526 | pthread_attr_t* attr, |
| 1527 | void* (*start_routine)(void *), |
| 1528 | void* arg ) |
| 1529 | { |
| 1530 | Addr new_stack; |
| 1531 | UInt new_stk_szb; |
| 1532 | ThreadId tid; |
| 1533 | Char msg_buf[100]; |
| 1534 | |
| 1535 | /* Paranoia ... */ |
| 1536 | vg_assert(sizeof(pthread_t) == sizeof(UInt)); |
| 1537 | |
| 1538 | vg_assert(vg_threads[parent_tid].status != VgTs_Empty); |
| 1539 | |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1540 | tid = vg_alloc_ThreadState(); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1541 | |
| 1542 | /* If we've created the main thread's tid, we're in deep trouble :) */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1543 | vg_assert(tid != 1); |
| 1544 | vg_assert(is_valid_tid(tid)); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1545 | |
| 1546 | /* Copy the parent's CPU state into the child's, in a roundabout |
| 1547 | way (via baseBlock). */ |
| 1548 | VG_(load_thread_state)(parent_tid); |
| 1549 | VG_(save_thread_state)(tid); |
| 1550 | |
| 1551 | /* Consider allocating the child a stack, if the one it already has |
| 1552 | is inadequate. */ |
| 1553 | new_stk_szb = PTHREAD_STACK_MIN; |
| 1554 | |
| 1555 | if (new_stk_szb > vg_threads[tid].stack_size) { |
| 1556 | /* Again, for good measure :) We definitely don't want to be |
| 1557 | allocating a stack for the main thread. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1558 | vg_assert(tid != 1); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1559 | /* for now, we don't handle the case of anything other than |
| 1560 | assigning it for the first time. */ |
| 1561 | vg_assert(vg_threads[tid].stack_size == 0); |
| 1562 | vg_assert(vg_threads[tid].stack_base == (Addr)NULL); |
| 1563 | new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb ); |
| 1564 | vg_threads[tid].stack_base = new_stack; |
| 1565 | vg_threads[tid].stack_size = new_stk_szb; |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1566 | vg_threads[tid].stack_highest_word |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1567 | = new_stack + new_stk_szb |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1568 | - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1569 | } |
sewardj | 1e8cdc9 | 2002-04-18 11:37:52 +0000 | [diff] [blame] | 1570 | |
| 1571 | vg_threads[tid].m_esp |
| 1572 | = vg_threads[tid].stack_base |
| 1573 | + vg_threads[tid].stack_size |
| 1574 | - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; |
| 1575 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1576 | if (VG_(clo_instrument)) |
| 1577 | VGM_(make_noaccess)( vg_threads[tid].m_esp, |
| 1578 | VG_AR_CLIENT_STACKBASE_REDZONE_SZB ); |
| 1579 | |
| 1580 | /* push arg */ |
| 1581 | vg_threads[tid].m_esp -= 4; |
| 1582 | * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg; |
| 1583 | |
| 1584 | /* push (magical) return address */ |
| 1585 | vg_threads[tid].m_esp -= 4; |
sewardj | bc5b99f | 2002-04-13 00:08:51 +0000 | [diff] [blame] | 1586 | * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1587 | |
| 1588 | if (VG_(clo_instrument)) |
| 1589 | VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 ); |
| 1590 | |
| 1591 | /* this is where we start */ |
| 1592 | vg_threads[tid].m_eip = (UInt)start_routine; |
| 1593 | |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1594 | if (VG_(clo_trace_sched)) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1595 | VG_(sprintf)(msg_buf, |
| 1596 | "new thread, created by %d", parent_tid ); |
| 1597 | print_sched_event(tid, msg_buf); |
| 1598 | } |
| 1599 | |
| 1600 | /* store the thread id in *thread. */ |
| 1601 | // if (VG_(clo_instrument)) |
| 1602 | // ***** CHECK *thread is writable |
| 1603 | *thread = (pthread_t)tid; |
| 1604 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1605 | vg_threads[tid].associated_mx = NULL; |
| 1606 | vg_threads[tid].associated_cv = NULL; |
| 1607 | vg_threads[tid].joiner = VG_INVALID_THREADID; |
| 1608 | vg_threads[tid].status = VgTs_Runnable; |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1609 | |
| 1610 | /* return zero */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1611 | vg_threads[tid].m_edx = 0; /* success */ |
| 1612 | } |
| 1613 | |
| 1614 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1615 | /* ----------------------------------------------------------- |
| 1616 | MUTEXes |
| 1617 | -------------------------------------------------------- */ |
| 1618 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1619 | /* pthread_mutex_t is a struct with at 5 words: |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1620 | typedef struct |
| 1621 | { |
| 1622 | int __m_reserved; -- Reserved for future use |
| 1623 | int __m_count; -- Depth of recursive locking |
| 1624 | _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck) |
| 1625 | int __m_kind; -- Mutex kind: fast, recursive or errcheck |
| 1626 | struct _pthread_fastlock __m_lock; -- Underlying fast lock |
| 1627 | } pthread_mutex_t; |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1628 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1629 | #define PTHREAD_MUTEX_INITIALIZER \ |
| 1630 | {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER} |
| 1631 | # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \ |
| 1632 | {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER} |
| 1633 | # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \ |
| 1634 | {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER} |
| 1635 | # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \ |
| 1636 | {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER} |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1637 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1638 | How we use it: |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1639 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1640 | __m_kind never changes and indicates whether or not it is recursive. |
| 1641 | |
| 1642 | __m_count indicates the lock count; if 0, the mutex is not owned by |
| 1643 | anybody. |
| 1644 | |
| 1645 | __m_owner has a ThreadId value stuffed into it. We carefully arrange |
| 1646 | that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that |
| 1647 | statically initialised mutexes correctly appear |
| 1648 | to belong to nobody. |
| 1649 | |
| 1650 | In summary, a not-in-use mutex is distinguised by having __m_owner |
| 1651 | == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those |
| 1652 | conditions holds, the other should too. |
| 1653 | |
| 1654 | There is no linked list of threads waiting for this mutex. Instead |
| 1655 | a thread in WaitMX state points at the mutex with its waited_on_mx |
| 1656 | field. This makes _unlock() inefficient, but simple to implement the |
| 1657 | right semantics viz-a-viz signals. |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1658 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1659 | We don't have to deal with mutex initialisation; the client side |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1660 | deals with that for us. |
| 1661 | */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1662 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1663 | /* Helper fns ... */ |
| 1664 | static |
| 1665 | void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex, |
| 1666 | Char* caller ) |
| 1667 | { |
| 1668 | Int i; |
| 1669 | Char msg_buf[100]; |
| 1670 | |
| 1671 | /* Find some arbitrary thread waiting on this mutex, and make it |
| 1672 | runnable. If none are waiting, mark the mutex as not held. */ |
| 1673 | for (i = 1; i < VG_N_THREADS; i++) { |
| 1674 | if (vg_threads[i].status == VgTs_Empty) |
| 1675 | continue; |
| 1676 | if (vg_threads[i].status == VgTs_WaitMX |
| 1677 | && vg_threads[i].associated_mx == mutex) |
| 1678 | break; |
| 1679 | } |
| 1680 | |
| 1681 | vg_assert(i <= VG_N_THREADS); |
| 1682 | if (i == VG_N_THREADS) { |
| 1683 | /* Nobody else is waiting on it. */ |
| 1684 | mutex->__m_count = 0; |
| 1685 | mutex->__m_owner = VG_INVALID_THREADID; |
| 1686 | } else { |
| 1687 | /* Notionally transfer the hold to thread i, whose |
| 1688 | pthread_mutex_lock() call now returns with 0 (success). */ |
| 1689 | /* The .count is already == 1. */ |
| 1690 | vg_assert(vg_threads[i].associated_mx == mutex); |
| 1691 | mutex->__m_owner = (_pthread_descr)i; |
| 1692 | vg_threads[i].status = VgTs_Runnable; |
| 1693 | vg_threads[i].associated_mx = NULL; |
| 1694 | vg_threads[i].m_edx = 0; /* pth_lock() success */ |
| 1695 | |
| 1696 | if (VG_(clo_trace_pthread_level) >= 1) { |
| 1697 | VG_(sprintf)(msg_buf, "%s mx %p: RESUME", |
| 1698 | caller, mutex ); |
| 1699 | print_pthread_event(i, msg_buf); |
| 1700 | } |
| 1701 | } |
| 1702 | } |
| 1703 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1704 | |
| 1705 | static |
| 1706 | void do_pthread_mutex_lock( ThreadId tid, pthread_mutex_t *mutex ) |
| 1707 | { |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1708 | Char msg_buf[100]; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1709 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1710 | if (VG_(clo_trace_pthread_level) >= 2) { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1711 | VG_(sprintf)(msg_buf, "pthread_mutex_lock mx %p ...", mutex ); |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1712 | print_pthread_event(tid, msg_buf); |
| 1713 | } |
| 1714 | |
| 1715 | /* Paranoia ... */ |
| 1716 | vg_assert(is_valid_tid(tid) |
| 1717 | && vg_threads[tid].status == VgTs_Runnable); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1718 | |
| 1719 | /* POSIX doesn't mandate this, but for sanity ... */ |
| 1720 | if (mutex == NULL) { |
| 1721 | vg_threads[tid].m_edx = EINVAL; |
| 1722 | return; |
| 1723 | } |
| 1724 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1725 | /* More paranoia ... */ |
| 1726 | switch (mutex->__m_kind) { |
| 1727 | case PTHREAD_MUTEX_TIMED_NP: |
| 1728 | case PTHREAD_MUTEX_RECURSIVE_NP: |
| 1729 | case PTHREAD_MUTEX_ERRORCHECK_NP: |
| 1730 | case PTHREAD_MUTEX_ADAPTIVE_NP: |
| 1731 | if (mutex->__m_count >= 0) break; |
| 1732 | /* else fall thru */ |
| 1733 | default: |
| 1734 | vg_threads[tid].m_edx = EINVAL; |
| 1735 | return; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1736 | } |
| 1737 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1738 | if (mutex->__m_count > 0) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1739 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1740 | vg_assert(is_valid_tid((ThreadId)mutex->__m_owner)); |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1741 | |
| 1742 | /* Someone has it already. */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1743 | if ((ThreadId)mutex->__m_owner == tid) { |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1744 | /* It's locked -- by me! */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1745 | if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) { |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1746 | /* return 0 (success). */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1747 | mutex->__m_count++; |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1748 | vg_threads[tid].m_edx = 0; |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1749 | VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n", |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1750 | tid, mutex, mutex->__m_count); |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1751 | return; |
| 1752 | } else { |
| 1753 | vg_threads[tid].m_edx = EDEADLK; |
| 1754 | return; |
| 1755 | } |
| 1756 | } else { |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1757 | /* Someone else has it; we have to wait. Mark ourselves |
| 1758 | thusly. */ |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1759 | vg_threads[tid].status = VgTs_WaitMX; |
| 1760 | vg_threads[tid].associated_mx = mutex; |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1761 | /* No assignment to %EDX, since we're blocking. */ |
| 1762 | if (VG_(clo_trace_pthread_level) >= 1) { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1763 | VG_(sprintf)(msg_buf, "pthread_mutex_lock mx %p: BLOCK", |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1764 | mutex ); |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1765 | print_pthread_event(tid, msg_buf); |
| 1766 | } |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1767 | return; |
| 1768 | } |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1769 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1770 | } else { |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1771 | /* Nobody owns it. Sanity check ... */ |
| 1772 | vg_assert(mutex->__m_owner == VG_INVALID_THREADID); |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1773 | /* We get it! [for the first time]. */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1774 | mutex->__m_count = 1; |
| 1775 | mutex->__m_owner = (_pthread_descr)tid; |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1776 | vg_assert(vg_threads[tid].associated_mx == NULL); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1777 | /* return 0 (success). */ |
| 1778 | vg_threads[tid].m_edx = 0; |
| 1779 | } |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1780 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1781 | } |
| 1782 | |
| 1783 | |
| 1784 | static |
| 1785 | void do_pthread_mutex_unlock ( ThreadId tid, |
| 1786 | pthread_mutex_t *mutex ) |
| 1787 | { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1788 | Char msg_buf[100]; |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1789 | |
sewardj | 45b4b37 | 2002-04-16 22:50:32 +0000 | [diff] [blame] | 1790 | if (VG_(clo_trace_pthread_level) >= 2) { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1791 | VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex ); |
sewardj | 8937c81 | 2002-04-12 20:12:20 +0000 | [diff] [blame] | 1792 | print_pthread_event(tid, msg_buf); |
| 1793 | } |
| 1794 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1795 | /* Paranoia ... */ |
| 1796 | vg_assert(is_valid_tid(tid) |
| 1797 | && vg_threads[tid].status == VgTs_Runnable); |
| 1798 | |
| 1799 | if (mutex == NULL) { |
| 1800 | vg_threads[tid].m_edx = EINVAL; |
| 1801 | return; |
| 1802 | } |
| 1803 | |
| 1804 | /* More paranoia ... */ |
| 1805 | switch (mutex->__m_kind) { |
| 1806 | case PTHREAD_MUTEX_TIMED_NP: |
| 1807 | case PTHREAD_MUTEX_RECURSIVE_NP: |
| 1808 | case PTHREAD_MUTEX_ERRORCHECK_NP: |
| 1809 | case PTHREAD_MUTEX_ADAPTIVE_NP: |
| 1810 | if (mutex->__m_count >= 0) break; |
| 1811 | /* else fall thru */ |
| 1812 | default: |
| 1813 | vg_threads[tid].m_edx = EINVAL; |
| 1814 | return; |
| 1815 | } |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1816 | |
| 1817 | /* Barf if we don't currently hold the mutex. */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1818 | if (mutex->__m_count == 0 /* nobody holds it */ |
| 1819 | || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) { |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1820 | vg_threads[tid].m_edx = EPERM; |
| 1821 | return; |
| 1822 | } |
| 1823 | |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1824 | /* If it's a multiply-locked recursive mutex, just decrement the |
| 1825 | lock count and return. */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1826 | if (mutex->__m_count > 1) { |
| 1827 | vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP); |
| 1828 | mutex->__m_count --; |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1829 | vg_threads[tid].m_edx = 0; /* success */ |
| 1830 | return; |
| 1831 | } |
| 1832 | |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1833 | /* Now we're sure it is locked exactly once, and by the thread who |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1834 | is now doing an unlock on it. */ |
sewardj | 604ec3c | 2002-04-18 22:38:41 +0000 | [diff] [blame] | 1835 | vg_assert(mutex->__m_count == 1); |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1836 | vg_assert((ThreadId)mutex->__m_owner == tid); |
sewardj | f8f819e | 2002-04-17 23:21:37 +0000 | [diff] [blame] | 1837 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1838 | /* Release at max one thread waiting on this mutex. */ |
| 1839 | release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1840 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1841 | /* Our (tid's) pth_unlock() returns with 0 (success). */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1842 | vg_threads[tid].m_edx = 0; /* Success. */ |
| 1843 | } |
| 1844 | |
| 1845 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1846 | /* ----------------------------------------------------------- |
| 1847 | CONDITION VARIABLES |
| 1848 | -------------------------------------------------------- */ |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 1849 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1850 | /* The relevant native types are as follows: |
| 1851 | (copied from /usr/include/bits/pthreadtypes.h) |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 1852 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1853 | -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER |
| 1854 | typedef struct |
| 1855 | { |
| 1856 | struct _pthread_fastlock __c_lock; -- Protect against concurrent access |
| 1857 | _pthread_descr __c_waiting; -- Threads waiting on this condition |
| 1858 | } pthread_cond_t; |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 1859 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1860 | -- Attribute for conditionally variables. |
| 1861 | typedef struct |
| 1862 | { |
| 1863 | int __dummy; |
| 1864 | } pthread_condattr_t; |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 1865 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1866 | #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0} |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 1867 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1868 | We don't use any fields of pthread_cond_t for anything at all. |
| 1869 | Only the identity of the CVs is important. |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1870 | |
| 1871 | Linux pthreads supports no attributes on condition variables, so we |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1872 | don't need to think too hard there. */ |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 1873 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 1874 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 1875 | static |
| 1876 | void release_N_threads_waiting_on_cond ( pthread_cond_t* cond, |
| 1877 | Int n_to_release, |
| 1878 | Char* caller ) |
| 1879 | { |
| 1880 | Int i; |
| 1881 | Char msg_buf[100]; |
| 1882 | pthread_mutex_t* mx; |
| 1883 | |
| 1884 | while (True) { |
| 1885 | if (n_to_release == 0) |
| 1886 | return; |
| 1887 | |
| 1888 | /* Find a thread waiting on this CV. */ |
| 1889 | for (i = 1; i < VG_N_THREADS; i++) { |
| 1890 | if (vg_threads[i].status == VgTs_Empty) |
| 1891 | continue; |
| 1892 | if (vg_threads[i].status == VgTs_WaitCV |
| 1893 | && vg_threads[i].associated_cv == cond) |
| 1894 | break; |
| 1895 | } |
| 1896 | vg_assert(i <= VG_N_THREADS); |
| 1897 | |
| 1898 | if (i == VG_N_THREADS) { |
| 1899 | /* Nobody else is waiting on it. */ |
| 1900 | return; |
| 1901 | } |
| 1902 | |
| 1903 | mx = vg_threads[i].associated_mx; |
| 1904 | vg_assert(mx != NULL); |
| 1905 | |
| 1906 | if (mx->__m_owner == VG_INVALID_THREADID) { |
| 1907 | /* Currently unheld; hand it out to thread i. */ |
| 1908 | vg_assert(mx->__m_count == 0); |
| 1909 | vg_threads[i].status = VgTs_Runnable; |
| 1910 | vg_threads[i].associated_cv = NULL; |
| 1911 | vg_threads[i].associated_mx = NULL; |
| 1912 | mx->__m_owner = (_pthread_descr)i; |
| 1913 | mx->__m_count = 1; |
| 1914 | vg_threads[i].m_edx = 0; /* pthread_cond_wait returns success */ |
| 1915 | |
| 1916 | if (VG_(clo_trace_pthread_level) >= 1) { |
| 1917 | VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p", |
| 1918 | caller, cond, mx ); |
| 1919 | print_pthread_event(i, msg_buf); |
| 1920 | } |
| 1921 | |
| 1922 | } else { |
| 1923 | /* Currently held. Make thread i be blocked on it. */ |
| 1924 | vg_threads[i].status = VgTs_WaitMX; |
| 1925 | vg_threads[i].associated_cv = NULL; |
| 1926 | vg_threads[i].associated_mx = mx; |
| 1927 | |
| 1928 | if (VG_(clo_trace_pthread_level) >= 1) { |
| 1929 | VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p", |
| 1930 | caller, cond, mx ); |
| 1931 | print_pthread_event(i, msg_buf); |
| 1932 | } |
| 1933 | |
| 1934 | } |
| 1935 | |
| 1936 | n_to_release--; |
| 1937 | } |
| 1938 | } |
| 1939 | |
| 1940 | |
| 1941 | static |
| 1942 | void do_pthread_cond_wait ( ThreadId tid, |
| 1943 | pthread_cond_t *cond, |
| 1944 | pthread_mutex_t *mutex ) |
| 1945 | { |
| 1946 | Char msg_buf[100]; |
| 1947 | |
| 1948 | /* pre: mutex should be a valid mutex and owned by tid. */ |
| 1949 | if (VG_(clo_trace_pthread_level) >= 2) { |
| 1950 | VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p ...", |
| 1951 | cond, mutex ); |
| 1952 | print_pthread_event(tid, msg_buf); |
| 1953 | } |
| 1954 | |
| 1955 | /* Paranoia ... */ |
| 1956 | vg_assert(is_valid_tid(tid) |
| 1957 | && vg_threads[tid].status == VgTs_Runnable); |
| 1958 | |
| 1959 | if (mutex == NULL || cond == NULL) { |
| 1960 | vg_threads[tid].m_edx = EINVAL; |
| 1961 | return; |
| 1962 | } |
| 1963 | |
| 1964 | /* More paranoia ... */ |
| 1965 | switch (mutex->__m_kind) { |
| 1966 | case PTHREAD_MUTEX_TIMED_NP: |
| 1967 | case PTHREAD_MUTEX_RECURSIVE_NP: |
| 1968 | case PTHREAD_MUTEX_ERRORCHECK_NP: |
| 1969 | case PTHREAD_MUTEX_ADAPTIVE_NP: |
| 1970 | if (mutex->__m_count >= 0) break; |
| 1971 | /* else fall thru */ |
| 1972 | default: |
| 1973 | vg_threads[tid].m_edx = EINVAL; |
| 1974 | return; |
| 1975 | } |
| 1976 | |
| 1977 | /* Barf if we don't currently hold the mutex. */ |
| 1978 | if (mutex->__m_count == 0 /* nobody holds it */ |
| 1979 | || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) { |
| 1980 | vg_threads[tid].m_edx = EINVAL; |
| 1981 | return; |
| 1982 | } |
| 1983 | |
| 1984 | /* Queue ourselves on the condition. */ |
| 1985 | vg_threads[tid].status = VgTs_WaitCV; |
| 1986 | vg_threads[tid].associated_cv = cond; |
| 1987 | vg_threads[tid].associated_mx = mutex; |
| 1988 | |
| 1989 | if (VG_(clo_trace_pthread_level) >= 1) { |
| 1990 | VG_(sprintf)(msg_buf, |
| 1991 | "pthread_cond_wait cv %p, mx %p: BLOCK", |
| 1992 | cond, mutex ); |
| 1993 | print_pthread_event(tid, msg_buf); |
| 1994 | } |
| 1995 | |
| 1996 | /* Release the mutex. */ |
| 1997 | release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " ); |
| 1998 | } |
| 1999 | |
| 2000 | |
| 2001 | static |
| 2002 | void do_pthread_cond_signal_or_broadcast ( ThreadId tid, |
| 2003 | Bool broadcast, |
| 2004 | pthread_cond_t *cond ) |
| 2005 | { |
| 2006 | Char msg_buf[100]; |
| 2007 | Char* caller |
| 2008 | = broadcast ? "pthread_cond_broadcast" |
| 2009 | : "pthread_cond_signal "; |
| 2010 | |
| 2011 | if (VG_(clo_trace_pthread_level) >= 2) { |
| 2012 | VG_(sprintf)(msg_buf, "%s cv %p ...", |
| 2013 | caller, cond ); |
| 2014 | print_pthread_event(tid, msg_buf); |
| 2015 | } |
| 2016 | |
| 2017 | /* Paranoia ... */ |
| 2018 | vg_assert(is_valid_tid(tid) |
| 2019 | && vg_threads[tid].status == VgTs_Runnable); |
| 2020 | |
| 2021 | if (cond == NULL) { |
| 2022 | vg_threads[tid].m_edx = EINVAL; |
| 2023 | return; |
| 2024 | } |
| 2025 | |
| 2026 | release_N_threads_waiting_on_cond ( |
| 2027 | cond, |
| 2028 | broadcast ? VG_N_THREADS : 1, |
| 2029 | caller |
| 2030 | ); |
| 2031 | |
| 2032 | vg_threads[tid].m_edx = 0; /* success */ |
| 2033 | } |
| 2034 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 2035 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2036 | /* --------------------------------------------------------------------- |
| 2037 | Handle non-trivial client requests. |
| 2038 | ------------------------------------------------------------------ */ |
| 2039 | |
| 2040 | static |
| 2041 | void do_nontrivial_clientreq ( ThreadId tid ) |
| 2042 | { |
| 2043 | UInt* arg = (UInt*)(vg_threads[tid].m_eax); |
| 2044 | UInt req_no = arg[0]; |
| 2045 | switch (req_no) { |
| 2046 | |
| 2047 | case VG_USERREQ__PTHREAD_CREATE: |
| 2048 | do_pthread_create( tid, |
| 2049 | (pthread_t*)arg[1], |
| 2050 | (pthread_attr_t*)arg[2], |
| 2051 | (void*(*)(void*))arg[3], |
| 2052 | (void*)arg[4] ); |
| 2053 | break; |
| 2054 | |
sewardj | bc5b99f | 2002-04-13 00:08:51 +0000 | [diff] [blame] | 2055 | case VG_USERREQ__PTHREAD_RETURNS: |
| 2056 | handle_pthread_return( tid, (void*)arg[1] ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2057 | break; |
| 2058 | |
| 2059 | case VG_USERREQ__PTHREAD_JOIN: |
| 2060 | do_pthread_join( tid, arg[1], (void**)(arg[2]) ); |
| 2061 | break; |
| 2062 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2063 | case VG_USERREQ__PTHREAD_MUTEX_LOCK: |
| 2064 | do_pthread_mutex_lock( tid, (pthread_mutex_t *)(arg[1]) ); |
| 2065 | break; |
| 2066 | |
| 2067 | case VG_USERREQ__PTHREAD_MUTEX_UNLOCK: |
| 2068 | do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) ); |
| 2069 | break; |
| 2070 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2071 | case VG_USERREQ__PTHREAD_CANCEL: |
| 2072 | do_pthread_cancel( tid, (pthread_t)(arg[1]) ); |
| 2073 | break; |
| 2074 | |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 2075 | case VG_USERREQ__PTHREAD_EXIT: |
| 2076 | do_pthread_exit( tid, (void*)(arg[1]) ); |
| 2077 | break; |
| 2078 | |
| 2079 | case VG_USERREQ__PTHREAD_COND_WAIT: |
| 2080 | do_pthread_cond_wait( tid, |
| 2081 | (pthread_cond_t *)(arg[1]), |
| 2082 | (pthread_mutex_t *)(arg[2]) ); |
| 2083 | break; |
| 2084 | |
| 2085 | case VG_USERREQ__PTHREAD_COND_SIGNAL: |
| 2086 | do_pthread_cond_signal_or_broadcast( |
| 2087 | tid, |
| 2088 | False, /* signal, not broadcast */ |
| 2089 | (pthread_cond_t *)(arg[1]) ); |
| 2090 | break; |
| 2091 | |
| 2092 | case VG_USERREQ__PTHREAD_COND_BROADCAST: |
| 2093 | do_pthread_cond_signal_or_broadcast( |
| 2094 | tid, |
| 2095 | True, /* broadcast, not signal */ |
| 2096 | (pthread_cond_t *)(arg[1]) ); |
| 2097 | break; |
| 2098 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2099 | case VG_USERREQ__MAKE_NOACCESS: |
| 2100 | case VG_USERREQ__MAKE_WRITABLE: |
| 2101 | case VG_USERREQ__MAKE_READABLE: |
| 2102 | case VG_USERREQ__DISCARD: |
| 2103 | case VG_USERREQ__CHECK_WRITABLE: |
| 2104 | case VG_USERREQ__CHECK_READABLE: |
| 2105 | case VG_USERREQ__MAKE_NOACCESS_STACK: |
| 2106 | case VG_USERREQ__RUNNING_ON_VALGRIND: |
| 2107 | case VG_USERREQ__DO_LEAK_CHECK: |
sewardj | 8c82451 | 2002-04-14 04:16:48 +0000 | [diff] [blame] | 2108 | vg_threads[tid].m_edx |
| 2109 | = VG_(handle_client_request) ( &vg_threads[tid], arg ); |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2110 | break; |
| 2111 | |
sewardj | 77e466c | 2002-04-14 02:29:29 +0000 | [diff] [blame] | 2112 | case VG_USERREQ__SIGNAL_RETURNS: |
| 2113 | handle_signal_return(tid); |
| 2114 | break; |
sewardj | 54cacf0 | 2002-04-12 23:24:59 +0000 | [diff] [blame] | 2115 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2116 | default: |
| 2117 | VG_(printf)("panic'd on private request = 0x%x\n", arg[0] ); |
| 2118 | VG_(panic)("handle_private_client_pthread_request: " |
| 2119 | "unknown request"); |
| 2120 | /*NOTREACHED*/ |
| 2121 | break; |
| 2122 | } |
| 2123 | } |
| 2124 | |
| 2125 | |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 2126 | /* --------------------------------------------------------------------- |
| 2127 | Sanity checking. |
| 2128 | ------------------------------------------------------------------ */ |
| 2129 | |
| 2130 | /* Internal consistency checks on the sched/pthread structures. */ |
| 2131 | static |
| 2132 | void scheduler_sanity ( void ) |
| 2133 | { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 2134 | pthread_mutex_t* mx; |
| 2135 | pthread_cond_t* cv; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 2136 | Int i; |
| 2137 | /* VG_(printf)("scheduler_sanity\n"); */ |
| 2138 | for (i = 1; i < VG_N_THREADS; i++) { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 2139 | mx = vg_threads[i].associated_mx; |
| 2140 | cv = vg_threads[i].associated_cv; |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 2141 | if (vg_threads[i].status == VgTs_WaitMX) { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 2142 | vg_assert(cv == NULL); |
| 2143 | vg_assert(mx != NULL); |
| 2144 | vg_assert(mx->__m_count > 0); |
| 2145 | vg_assert(is_valid_tid((ThreadId)mx->__m_owner)); |
| 2146 | vg_assert(i != (ThreadId)mx->__m_owner); |
| 2147 | /* otherwise thread i would be deadlocked. */ |
| 2148 | } else |
| 2149 | if (vg_threads[i].status == VgTs_WaitCV) { |
| 2150 | vg_assert(cv != NULL); |
| 2151 | vg_assert(mx != NULL); |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 2152 | } else { |
sewardj | 3b5d886 | 2002-04-20 13:53:23 +0000 | [diff] [blame^] | 2153 | vg_assert(cv == NULL); |
| 2154 | vg_assert(mx == NULL); |
sewardj | 6072c36 | 2002-04-19 14:40:57 +0000 | [diff] [blame] | 2155 | } |
| 2156 | } |
| 2157 | } |
| 2158 | |
| 2159 | |
sewardj | e663cb9 | 2002-04-12 10:26:32 +0000 | [diff] [blame] | 2160 | /*--------------------------------------------------------------------*/ |
| 2161 | /*--- end vg_scheduler.c ---*/ |
| 2162 | /*--------------------------------------------------------------------*/ |