| |
| /*--------------------------------------------------------------------*/ |
| /*--- A user-space pthreads implementation. vg_scheduler.c ---*/ |
| /*--------------------------------------------------------------------*/ |
| |
| /* |
| This file is part of Valgrind, an x86 protected-mode emulator |
| designed for debugging and profiling binaries on x86-Unixes. |
| |
| Copyright (C) 2000-2002 Julian Seward |
| jseward@acm.org |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License as |
| published by the Free Software Foundation; either version 2 of the |
| License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 02111-1307, USA. |
| |
| The GNU General Public License is contained in the file LICENSE. |
| */ |
| |
| #include "vg_include.h" |
| #include "vg_constants.h" |
| |
| #include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and |
| VG_USERREQ__DO_LEAK_CHECK */ |
| |
| /* BORKAGE/ISSUES as of 14 Apr 02 |
| |
| Note! This pthreads implementation is so poor as to not be |
| suitable for use by anyone at all! |
| |
| - Currently, when a signal is run, just the ThreadStatus.status fields |
| are saved in the signal frame, along with the CPU state. Question: |
| should I also save and restore: |
| ThreadStatus.joiner |
| ThreadStatus.waited_on_mid |
| ThreadStatus.awaken_at |
| ThreadStatus.retval |
| Currently unsure, and so am not doing so. |
| |
| - Signals interrupting read/write and nanosleep: SA_RESTART settings. |
| Read/write correctly return with EINTR when SA_RESTART isn't |
| specified and they are interrupted by a signal. nanosleep just |
| pretends signals don't exist -- should be fixed. |
| |
| - Read/write syscall starts: don't crap out when the initial |
| nonblocking read/write returns an error. |
| |
| - Get rid of restrictions re use of sigaltstack; they are no longer |
| needed. |
| |
| - Fix signals properly, so that each thread has its own blocking mask. |
| Currently this isn't done, and (worse?) signals are delivered to |
| Thread 1 (the root thread) regardless. |
| |
| So, what's the deal with signals and mutexes? If a thread is |
| blocked on a mutex, or for a condition variable for that matter, can |
| signals still be delivered to it? This has serious consequences -- |
| deadlocks, etc. |
| |
| */ |
| |
| |
| /* --------------------------------------------------------------------- |
| Types and globals for the scheduler. |
| ------------------------------------------------------------------ */ |
| |
| /* type ThreadId is defined in vg_include.h. */ |
| |
| /* struct ThreadState is defined in vg_include.h. */ |
| |
| /* Private globals. A statically allocated array of threads. NOTE: |
| [0] is never used, to simplify the simulation of initialisers for |
| LinuxThreads. */ |
| static ThreadState vg_threads[VG_N_THREADS]; |
| |
| /* The tid of the thread currently in VG_(baseBlock). */ |
| static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; |
| |
| |
| /* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */ |
| jmp_buf VG_(scheduler_jmpbuf); |
| /* ... and if so, here's the signal which caused it to do so. */ |
| Int VG_(longjmpd_on_signal); |
| |
| |
| /* Machinery to keep track of which threads are waiting on which |
| fds. */ |
| typedef |
| struct { |
| /* The thread which made the request. */ |
| ThreadId tid; |
| |
| /* The next two fields describe the request. */ |
| /* File descriptor waited for. -1 means this slot is not in use */ |
| Int fd; |
| /* The syscall number the fd is used in. */ |
| Int syscall_no; |
| |
| /* False => still waiting for select to tell us the fd is ready |
| to go. True => the fd is ready, but the results have not yet |
| been delivered back to the calling thread. Once the latter |
| happens, this entire record is marked as no longer in use, by |
| making the fd field be -1. */ |
| Bool ready; |
| } |
| VgWaitedOnFd; |
| |
| static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS]; |
| |
| |
| /* Forwards */ |
| static void do_nontrivial_clientreq ( ThreadId tid ); |
| |
| static void scheduler_sanity ( void ); |
| |
| |
| /* --------------------------------------------------------------------- |
| Helper functions for the scheduler. |
| ------------------------------------------------------------------ */ |
| |
| static __inline__ |
| Bool is_valid_tid ( ThreadId tid ) |
| { |
| /* tid is unsigned, hence no < 0 test. */ |
| if (tid == 0) return False; |
| if (tid >= VG_N_THREADS) return False; |
| return True; |
| } |
| |
| |
| /* For constructing error messages only: try and identify a thread |
| whose stack this address currently falls within, or return |
| VG_INVALID_THREADID if it doesn't. A small complication is dealing |
| with any currently VG_(baseBlock)-resident thread. |
| */ |
| ThreadId VG_(identify_stack_addr)( Addr a ) |
| { |
| ThreadId tid, tid_to_skip; |
| |
| tid_to_skip = VG_INVALID_THREADID; |
| |
| /* First check to see if there's a currently-loaded thread in |
| VG_(baseBlock). */ |
| if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) { |
| tid = vg_tid_currently_in_baseBlock; |
| if (VG_(baseBlock)[VGOFF_(m_esp)] <= a |
| && a <= vg_threads[tid].stack_highest_word) |
| return tid; |
| else |
| tid_to_skip = tid; |
| } |
| |
| for (tid = 1; tid < VG_N_THREADS; tid++) { |
| if (vg_threads[tid].status == VgTs_Empty) continue; |
| if (tid == tid_to_skip) continue; |
| if (vg_threads[tid].m_esp <= a |
| && a <= vg_threads[tid].stack_highest_word) |
| return tid; |
| } |
| return VG_INVALID_THREADID; |
| } |
| |
| |
| /* Print the scheduler status. */ |
| void VG_(pp_sched_status) ( void ) |
| { |
| Int i; |
| VG_(printf)("\nsched status:\n"); |
| for (i = 1; i < VG_N_THREADS; i++) { |
| if (vg_threads[i].status == VgTs_Empty) continue; |
| VG_(printf)("\nThread %d: status = ", i); |
| switch (vg_threads[i].status) { |
| case VgTs_Runnable: VG_(printf)("Runnable"); break; |
| case VgTs_WaitFD: VG_(printf)("WaitFD"); break; |
| case VgTs_WaitJoiner: VG_(printf)("WaitJoiner(%d)", |
| vg_threads[i].joiner); break; |
| case VgTs_WaitJoinee: VG_(printf)("WaitJoinee"); break; |
| case VgTs_Sleeping: VG_(printf)("Sleeping"); break; |
| case VgTs_WaitMX: VG_(printf)("WaitMX"); break; |
| case VgTs_WaitCV: VG_(printf)("WaitCV"); break; |
| default: VG_(printf)("???"); break; |
| } |
| VG_(printf)(", associated_mx = %p, associated_cv = %p\n", |
| vg_threads[i].associated_mx, |
| vg_threads[i].associated_cv ); |
| VG_(pp_ExeContext)( |
| VG_(get_ExeContext)( False, vg_threads[i].m_eip, |
| vg_threads[i].m_ebp )); |
| } |
| VG_(printf)("\n"); |
| } |
| |
| static |
| void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no ) |
| { |
| Int i; |
| |
| vg_assert(fd != -1); /* avoid total chaos */ |
| |
| for (i = 0; i < VG_N_WAITING_FDS; i++) |
| if (vg_waiting_fds[i].fd == -1) |
| break; |
| |
| if (i == VG_N_WAITING_FDS) |
| VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low"); |
| /* |
| VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n", |
| tid, fd, i); |
| */ |
| vg_waiting_fds[i].fd = fd; |
| vg_waiting_fds[i].tid = tid; |
| vg_waiting_fds[i].ready = False; |
| vg_waiting_fds[i].syscall_no = syscall_no; |
| } |
| |
| |
| |
| static |
| void print_sched_event ( ThreadId tid, Char* what ) |
| { |
| VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what ); |
| } |
| |
| |
| static |
| void print_pthread_event ( ThreadId tid, Char* what ) |
| { |
| VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what ); |
| } |
| |
| |
| static |
| Char* name_of_sched_event ( UInt event ) |
| { |
| switch (event) { |
| case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL"; |
| case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ"; |
| case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO"; |
| case VG_TRC_INNER_FASTMISS: return "FASTMISS"; |
| case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL"; |
| default: return "??UNKNOWN??"; |
| } |
| } |
| |
| |
| /* Create a translation of the client basic block beginning at |
| orig_addr, and add it to the translation cache & translation table. |
| This probably doesn't really belong here, but, hey ... |
| */ |
| static |
| void create_translation_for ( ThreadId tid, Addr orig_addr ) |
| { |
| Addr trans_addr; |
| TTEntry tte; |
| Int orig_size, trans_size; |
| /* Ensure there is space to hold a translation. */ |
| VG_(maybe_do_lru_pass)(); |
| VG_(translate)( &vg_threads[tid], |
| orig_addr, &orig_size, &trans_addr, &trans_size ); |
| /* Copy data at trans_addr into the translation cache. |
| Returned pointer is to the code, not to the 4-byte |
| header. */ |
| /* Since the .orig_size and .trans_size fields are |
| UShort, be paranoid. */ |
| vg_assert(orig_size > 0 && orig_size < 65536); |
| vg_assert(trans_size > 0 && trans_size < 65536); |
| tte.orig_size = orig_size; |
| tte.orig_addr = orig_addr; |
| tte.trans_size = trans_size; |
| tte.trans_addr = VG_(copy_to_transcache) |
| ( trans_addr, trans_size ); |
| tte.mru_epoch = VG_(current_epoch); |
| /* Free the intermediary -- was allocated by VG_(emit_code). */ |
| VG_(jitfree)( (void*)trans_addr ); |
| /* Add to trans tab and set back pointer. */ |
| VG_(add_to_trans_tab) ( &tte ); |
| /* Update stats. */ |
| VG_(this_epoch_in_count) ++; |
| VG_(this_epoch_in_osize) += orig_size; |
| VG_(this_epoch_in_tsize) += trans_size; |
| VG_(overall_in_count) ++; |
| VG_(overall_in_osize) += orig_size; |
| VG_(overall_in_tsize) += trans_size; |
| /* Record translated area for SMC detection. */ |
| VG_(smc_mark_original) ( orig_addr, orig_size ); |
| } |
| |
| |
| /* Allocate a completely empty ThreadState record. */ |
| static |
| ThreadId vg_alloc_ThreadState ( void ) |
| { |
| Int i; |
| for (i = 1; i < VG_N_THREADS; i++) { |
| if (vg_threads[i].status == VgTs_Empty) |
| return i; |
| } |
| VG_(printf)("vg_alloc_ThreadState: no free slots available\n"); |
| VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n"); |
| VG_(panic)("VG_N_THREADS is too low"); |
| /*NOTREACHED*/ |
| } |
| |
| |
| ThreadState* VG_(get_thread_state) ( ThreadId tid ) |
| { |
| vg_assert(is_valid_tid(tid)); |
| vg_assert(vg_threads[tid].status != VgTs_Empty); |
| return & vg_threads[tid]; |
| } |
| |
| |
| ThreadState* VG_(get_current_thread_state) ( void ) |
| { |
| vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); |
| return VG_(get_thread_state) ( vg_tid_currently_in_baseBlock ); |
| } |
| |
| |
| ThreadId VG_(get_current_tid) ( void ) |
| { |
| vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); |
| return vg_tid_currently_in_baseBlock; |
| } |
| |
| |
| /* Copy the saved state of a thread into VG_(baseBlock), ready for it |
| to be run. */ |
| __inline__ |
| void VG_(load_thread_state) ( ThreadId tid ) |
| { |
| Int i; |
| vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); |
| |
| VG_(baseBlock)[VGOFF_(m_eax)] = vg_threads[tid].m_eax; |
| VG_(baseBlock)[VGOFF_(m_ebx)] = vg_threads[tid].m_ebx; |
| VG_(baseBlock)[VGOFF_(m_ecx)] = vg_threads[tid].m_ecx; |
| VG_(baseBlock)[VGOFF_(m_edx)] = vg_threads[tid].m_edx; |
| VG_(baseBlock)[VGOFF_(m_esi)] = vg_threads[tid].m_esi; |
| VG_(baseBlock)[VGOFF_(m_edi)] = vg_threads[tid].m_edi; |
| VG_(baseBlock)[VGOFF_(m_ebp)] = vg_threads[tid].m_ebp; |
| VG_(baseBlock)[VGOFF_(m_esp)] = vg_threads[tid].m_esp; |
| VG_(baseBlock)[VGOFF_(m_eflags)] = vg_threads[tid].m_eflags; |
| VG_(baseBlock)[VGOFF_(m_eip)] = vg_threads[tid].m_eip; |
| |
| for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) |
| VG_(baseBlock)[VGOFF_(m_fpustate) + i] = vg_threads[tid].m_fpu[i]; |
| |
| VG_(baseBlock)[VGOFF_(sh_eax)] = vg_threads[tid].sh_eax; |
| VG_(baseBlock)[VGOFF_(sh_ebx)] = vg_threads[tid].sh_ebx; |
| VG_(baseBlock)[VGOFF_(sh_ecx)] = vg_threads[tid].sh_ecx; |
| VG_(baseBlock)[VGOFF_(sh_edx)] = vg_threads[tid].sh_edx; |
| VG_(baseBlock)[VGOFF_(sh_esi)] = vg_threads[tid].sh_esi; |
| VG_(baseBlock)[VGOFF_(sh_edi)] = vg_threads[tid].sh_edi; |
| VG_(baseBlock)[VGOFF_(sh_ebp)] = vg_threads[tid].sh_ebp; |
| VG_(baseBlock)[VGOFF_(sh_esp)] = vg_threads[tid].sh_esp; |
| VG_(baseBlock)[VGOFF_(sh_eflags)] = vg_threads[tid].sh_eflags; |
| |
| vg_tid_currently_in_baseBlock = tid; |
| } |
| |
| |
| /* Copy the state of a thread from VG_(baseBlock), presumably after it |
| has been descheduled. For sanity-check purposes, fill the vacated |
| VG_(baseBlock) with garbage so as to make the system more likely to |
| fail quickly if we erroneously continue to poke around inside |
| VG_(baseBlock) without first doing a load_thread_state(). |
| */ |
| __inline__ |
| void VG_(save_thread_state) ( ThreadId tid ) |
| { |
| Int i; |
| const UInt junk = 0xDEADBEEF; |
| |
| vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); |
| |
| vg_threads[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)]; |
| vg_threads[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)]; |
| vg_threads[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)]; |
| vg_threads[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)]; |
| vg_threads[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)]; |
| vg_threads[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)]; |
| vg_threads[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; |
| vg_threads[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; |
| vg_threads[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)]; |
| vg_threads[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; |
| |
| for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) |
| vg_threads[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; |
| |
| vg_threads[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; |
| vg_threads[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; |
| vg_threads[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)]; |
| vg_threads[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)]; |
| vg_threads[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)]; |
| vg_threads[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)]; |
| vg_threads[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)]; |
| vg_threads[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)]; |
| vg_threads[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)]; |
| |
| /* Fill it up with junk. */ |
| VG_(baseBlock)[VGOFF_(m_eax)] = junk; |
| VG_(baseBlock)[VGOFF_(m_ebx)] = junk; |
| VG_(baseBlock)[VGOFF_(m_ecx)] = junk; |
| VG_(baseBlock)[VGOFF_(m_edx)] = junk; |
| VG_(baseBlock)[VGOFF_(m_esi)] = junk; |
| VG_(baseBlock)[VGOFF_(m_edi)] = junk; |
| VG_(baseBlock)[VGOFF_(m_ebp)] = junk; |
| VG_(baseBlock)[VGOFF_(m_esp)] = junk; |
| VG_(baseBlock)[VGOFF_(m_eflags)] = junk; |
| VG_(baseBlock)[VGOFF_(m_eip)] = junk; |
| |
| for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) |
| VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk; |
| |
| vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; |
| } |
| |
| |
| /* Run the thread tid for a while, and return a VG_TRC_* value to the |
| scheduler indicating what happened. */ |
| static |
| UInt run_thread_for_a_while ( ThreadId tid ) |
| { |
| UInt trc = 0; |
| vg_assert(is_valid_tid(tid)); |
| vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| vg_assert(VG_(bbs_to_go) > 0); |
| |
| VG_(load_thread_state) ( tid ); |
| if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) { |
| /* try this ... */ |
| trc = VG_(run_innerloop)(); |
| /* We get here if the client didn't take a fault. */ |
| } else { |
| /* We get here if the client took a fault, which caused our |
| signal handler to longjmp. */ |
| vg_assert(trc == 0); |
| trc = VG_TRC_UNRESUMABLE_SIGNAL; |
| } |
| VG_(save_thread_state) ( tid ); |
| return trc; |
| } |
| |
| |
| /* Increment the LRU epoch counter. */ |
| static |
| void increment_epoch ( void ) |
| { |
| VG_(current_epoch)++; |
| if (VG_(clo_verbosity) > 2) { |
| UInt tt_used, tc_used; |
| VG_(get_tt_tc_used) ( &tt_used, &tc_used ); |
| VG_(message)(Vg_UserMsg, |
| "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d", |
| VG_(bbs_done), |
| VG_(this_epoch_in_count), |
| VG_(this_epoch_in_osize), |
| VG_(this_epoch_in_tsize), |
| VG_(this_epoch_out_count), |
| VG_(this_epoch_out_osize), |
| VG_(this_epoch_out_tsize), |
| tt_used, tc_used |
| ); |
| } |
| VG_(this_epoch_in_count) = 0; |
| VG_(this_epoch_in_osize) = 0; |
| VG_(this_epoch_in_tsize) = 0; |
| VG_(this_epoch_out_count) = 0; |
| VG_(this_epoch_out_osize) = 0; |
| VG_(this_epoch_out_tsize) = 0; |
| } |
| |
| |
| /* Initialise the scheduler. Create a single "main" thread ready to |
| run, with special ThreadId of one. This is called at startup; the |
| caller takes care to park the client's state is parked in |
| VG_(baseBlock). |
| */ |
| void VG_(scheduler_init) ( void ) |
| { |
| Int i; |
| Addr startup_esp; |
| ThreadId tid_main; |
| |
| startup_esp = VG_(baseBlock)[VGOFF_(m_esp)]; |
| if ((startup_esp & VG_STARTUP_STACK_MASK) != VG_STARTUP_STACK_MASK) { |
| VG_(printf)("%%esp at startup = %p is not near %p; aborting\n", |
| (void*)startup_esp, (void*)VG_STARTUP_STACK_MASK); |
| VG_(panic)("unexpected %esp at startup"); |
| } |
| |
| for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) { |
| vg_threads[i].status = VgTs_Empty; |
| vg_threads[i].stack_size = 0; |
| vg_threads[i].stack_base = (Addr)NULL; |
| vg_threads[i].tid = i; |
| } |
| |
| for (i = 0; i < VG_N_WAITING_FDS; i++) |
| vg_waiting_fds[i].fd = -1; /* not in use */ |
| |
| /* Assert this is thread zero, which has certain magic |
| properties. */ |
| tid_main = vg_alloc_ThreadState(); |
| vg_assert(tid_main == 1); |
| |
| vg_threads[tid_main].status = VgTs_Runnable; |
| vg_threads[tid_main].joiner = VG_INVALID_THREADID; |
| vg_threads[tid_main].associated_mx = NULL; |
| vg_threads[tid_main].associated_cv = NULL; |
| vg_threads[tid_main].retval = NULL; /* not important */ |
| vg_threads[tid_main].stack_highest_word |
| = vg_threads[tid_main].m_esp /* -4 ??? */; |
| |
| /* Copy VG_(baseBlock) state to tid_main's slot. */ |
| vg_tid_currently_in_baseBlock = tid_main; |
| VG_(save_thread_state) ( tid_main ); |
| |
| /* So now ... */ |
| vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); |
| } |
| |
| |
| /* What if fd isn't a valid fd? */ |
| static |
| void set_fd_nonblocking ( Int fd ) |
| { |
| Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); |
| vg_assert(!VG_(is_kerror)(res)); |
| res |= VKI_O_NONBLOCK; |
| res = VG_(fcntl)( fd, VKI_F_SETFL, res ); |
| vg_assert(!VG_(is_kerror)(res)); |
| } |
| |
| static |
| void set_fd_blocking ( Int fd ) |
| { |
| Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); |
| vg_assert(!VG_(is_kerror)(res)); |
| res &= ~VKI_O_NONBLOCK; |
| res = VG_(fcntl)( fd, VKI_F_SETFL, res ); |
| vg_assert(!VG_(is_kerror)(res)); |
| } |
| |
| static |
| Bool fd_is_blockful ( Int fd ) |
| { |
| Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); |
| vg_assert(!VG_(is_kerror)(res)); |
| return (res & VKI_O_NONBLOCK) ? False : True; |
| } |
| |
| |
| |
| /* Do a purely thread-local request for tid, and put the result in its |
| %EDX, without changing its scheduling state in any way, nor that of |
| any other threads. Return True if so. |
| |
| If the request is non-trivial, return False; a more capable but |
| slower mechanism will deal with it. |
| */ |
| static |
| Bool maybe_do_trivial_clientreq ( ThreadId tid ) |
| { |
| # define SIMPLE_RETURN(vvv) \ |
| { tst->m_edx = (vvv); \ |
| return True; \ |
| } |
| |
| ThreadState* tst = &vg_threads[tid]; |
| UInt* arg = (UInt*)(tst->m_eax); |
| UInt req_no = arg[0]; |
| |
| switch (req_no) { |
| case VG_USERREQ__MALLOC: |
| SIMPLE_RETURN( |
| (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc ) |
| ); |
| case VG_USERREQ__BUILTIN_NEW: |
| SIMPLE_RETURN( |
| (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew ) |
| ); |
| case VG_USERREQ__BUILTIN_VEC_NEW: |
| SIMPLE_RETURN( |
| (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec ) |
| ); |
| case VG_USERREQ__FREE: |
| VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc ); |
| SIMPLE_RETURN(0); /* irrelevant */ |
| case VG_USERREQ__BUILTIN_DELETE: |
| VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew ); |
| SIMPLE_RETURN(0); /* irrelevant */ |
| case VG_USERREQ__BUILTIN_VEC_DELETE: |
| VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec ); |
| SIMPLE_RETURN(0); /* irrelevant */ |
| case VG_USERREQ__CALLOC: |
| SIMPLE_RETURN( |
| (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] ) |
| ); |
| case VG_USERREQ__REALLOC: |
| SIMPLE_RETURN( |
| (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] ) |
| ); |
| case VG_USERREQ__MEMALIGN: |
| SIMPLE_RETURN( |
| (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] ) |
| ); |
| |
| /* These are heavily used. */ |
| case VG_USERREQ__PTHREAD_GET_THREADID: |
| SIMPLE_RETURN(tid); |
| case VG_USERREQ__RUNNING_ON_VALGRIND: |
| SIMPLE_RETURN(1); |
| case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL: |
| SIMPLE_RETURN(VG_(clo_trace_pthread_level)); |
| |
| default: |
| /* Too hard; wimp out. */ |
| return False; |
| } |
| # undef SIMPLE_RETURN |
| } |
| |
| |
| /* vthread tid is returning from a signal handler; modify its |
| stack/regs accordingly. */ |
| static |
| void handle_signal_return ( ThreadId tid ) |
| { |
| Char msg_buf[100]; |
| Bool restart_blocked_syscalls; |
| |
| vg_assert(is_valid_tid(tid)); |
| |
| restart_blocked_syscalls = VG_(signal_returns)(tid); |
| |
| if (restart_blocked_syscalls) |
| /* Easy; we don't have to do anything. */ |
| return; |
| |
| if (vg_threads[tid].status == VgTs_WaitFD) { |
| vg_assert(vg_threads[tid].m_eax == __NR_read |
| || vg_threads[tid].m_eax == __NR_write); |
| /* read() or write() interrupted. Force a return with EINTR. */ |
| vg_threads[tid].m_eax = -VKI_EINTR; |
| vg_threads[tid].status = VgTs_Runnable; |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, |
| "read() / write() interrupted by signal; return EINTR" ); |
| print_sched_event(tid, msg_buf); |
| } |
| return; |
| } |
| |
| if (vg_threads[tid].status == VgTs_WaitFD) { |
| vg_assert(vg_threads[tid].m_eax == __NR_nanosleep); |
| /* We interrupted a nanosleep(). The right thing to do is to |
| write the unused time to nanosleep's second param and return |
| EINTR, but I'm too lazy for that. */ |
| return; |
| } |
| |
| /* All other cases? Just return. */ |
| } |
| |
| |
| static |
| void sched_do_syscall ( ThreadId tid ) |
| { |
| UInt saved_eax; |
| UInt res, syscall_no; |
| UInt fd; |
| Bool might_block, assumed_nonblocking; |
| Bool orig_fd_blockness; |
| Char msg_buf[100]; |
| |
| vg_assert(is_valid_tid(tid)); |
| vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| |
| syscall_no = vg_threads[tid].m_eax; /* syscall number */ |
| |
| if (syscall_no == __NR_nanosleep) { |
| ULong t_now, t_awaken; |
| struct vki_timespec* req; |
| req = (struct vki_timespec*)vg_threads[tid].m_ebx; /* arg1 */ |
| t_now = VG_(read_microsecond_timer)(); |
| t_awaken |
| = t_now |
| + (ULong)1000000ULL * (ULong)(req->tv_sec) |
| + (ULong)( (UInt)(req->tv_nsec) / 1000 ); |
| vg_threads[tid].status = VgTs_Sleeping; |
| vg_threads[tid].awaken_at = t_awaken; |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, "at %lu: nanosleep for %lu", |
| t_now, t_awaken-t_now); |
| print_sched_event(tid, msg_buf); |
| } |
| /* Force the scheduler to run something else for a while. */ |
| return; |
| } |
| |
| switch (syscall_no) { |
| case __NR_read: |
| case __NR_write: |
| assumed_nonblocking |
| = False; |
| might_block |
| = fd_is_blockful(vg_threads[tid].m_ebx /* arg1 */); |
| break; |
| default: |
| might_block = False; |
| assumed_nonblocking = True; |
| } |
| |
| if (assumed_nonblocking) { |
| /* We think it's non-blocking. Just do it in the normal way. */ |
| VG_(perform_assumed_nonblocking_syscall)(tid); |
| /* The thread is still runnable. */ |
| return; |
| } |
| |
| /* It might block. Take evasive action. */ |
| switch (syscall_no) { |
| case __NR_read: |
| case __NR_write: |
| fd = vg_threads[tid].m_ebx; break; |
| default: |
| vg_assert(3+3 == 7); |
| } |
| |
| /* Set the fd to nonblocking, and do the syscall, which will return |
| immediately, in order to lodge a request with the Linux kernel. |
| We later poll for I/O completion using select(). */ |
| |
| orig_fd_blockness = fd_is_blockful(fd); |
| set_fd_nonblocking(fd); |
| vg_assert(!fd_is_blockful(fd)); |
| VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */); |
| |
| /* This trashes the thread's %eax; we have to preserve it. */ |
| saved_eax = vg_threads[tid].m_eax; |
| KERNEL_DO_SYSCALL(tid,res); |
| |
| /* Restore original blockfulness of the fd. */ |
| if (orig_fd_blockness) |
| set_fd_blocking(fd); |
| else |
| set_fd_nonblocking(fd); |
| |
| if (res != -VKI_EWOULDBLOCK) { |
| /* It didn't block; it went through immediately. So finish off |
| in the normal way. Don't restore %EAX, since that now |
| (correctly) holds the result of the call. */ |
| VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); |
| /* We're still runnable. */ |
| vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| |
| } else { |
| |
| /* It would have blocked. First, restore %EAX to what it was |
| before our speculative call. */ |
| vg_threads[tid].m_eax = saved_eax; |
| /* Put this fd in a table of fds on which we are waiting for |
| completion. The arguments for select() later are constructed |
| from this table. */ |
| add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */); |
| /* Deschedule thread until an I/O completion happens. */ |
| vg_threads[tid].status = VgTs_WaitFD; |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd); |
| print_sched_event(tid, msg_buf); |
| } |
| |
| } |
| } |
| |
| |
| /* Find out which of the fds in vg_waiting_fds are now ready to go, by |
| making enquiries with select(), and mark them as ready. We have to |
| wait for the requesting threads to fall into the the WaitFD state |
| before we can actually finally deliver the results, so this |
| procedure doesn't do that; complete_blocked_syscalls() does it. |
| |
| It might seem odd that a thread which has done a blocking syscall |
| is not in WaitFD state; the way this can happen is if it initially |
| becomes WaitFD, but then a signal is delivered to it, so it becomes |
| Runnable for a while. In this case we have to wait for the |
| sighandler to return, whereupon the WaitFD state is resumed, and |
| only at that point can the I/O result be delivered to it. However, |
| this point may be long after the fd is actually ready. |
| |
| So, poll_for_ready_fds() merely detects fds which are ready. |
| complete_blocked_syscalls() does the second half of the trick, |
| possibly much later: it delivers the results from ready fds to |
| threads in WaitFD state. |
| */ |
| static |
| void poll_for_ready_fds ( void ) |
| { |
| vki_ksigset_t saved_procmask; |
| vki_fd_set readfds; |
| vki_fd_set writefds; |
| vki_fd_set exceptfds; |
| struct vki_timeval timeout; |
| Int fd, fd_max, i, n_ready, syscall_no, n_ok; |
| ThreadId tid; |
| Bool rd_ok, wr_ok, ex_ok; |
| Char msg_buf[100]; |
| |
| struct vki_timespec* rem; |
| ULong t_now; |
| |
| /* Awaken any sleeping threads whose sleep has expired. */ |
| for (tid = 1; tid < VG_N_THREADS; tid++) |
| if (vg_threads[tid].status == VgTs_Sleeping) |
| break; |
| |
| /* Avoid pointless calls to VG_(read_microsecond_timer). */ |
| if (tid < VG_N_THREADS) { |
| t_now = VG_(read_microsecond_timer)(); |
| for (tid = 1; tid < VG_N_THREADS; tid++) { |
| if (vg_threads[tid].status != VgTs_Sleeping) |
| continue; |
| if (t_now >= vg_threads[tid].awaken_at) { |
| /* Resume this thread. Set to zero the remaining-time |
| (second) arg of nanosleep, since it's used up all its |
| time. */ |
| vg_assert(vg_threads[tid].m_eax == __NR_nanosleep); |
| rem = (struct vki_timespec *)vg_threads[tid].m_ecx; /* arg2 */ |
| if (rem != NULL) { |
| rem->tv_sec = 0; |
| rem->tv_nsec = 0; |
| } |
| /* Make the syscall return 0 (success). */ |
| vg_threads[tid].m_eax = 0; |
| /* Reschedule this thread. */ |
| vg_threads[tid].status = VgTs_Runnable; |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, "at %lu: nanosleep done", |
| t_now); |
| print_sched_event(tid, msg_buf); |
| } |
| } |
| } |
| } |
| |
| /* And look for threads waiting on file descriptors which are now |
| ready for I/O.*/ |
| timeout.tv_sec = 0; |
| timeout.tv_usec = 0; |
| |
| VKI_FD_ZERO(&readfds); |
| VKI_FD_ZERO(&writefds); |
| VKI_FD_ZERO(&exceptfds); |
| fd_max = -1; |
| for (i = 0; i < VG_N_WAITING_FDS; i++) { |
| if (vg_waiting_fds[i].fd == -1 /* not in use */) |
| continue; |
| if (vg_waiting_fds[i].ready /* already ready? */) |
| continue; |
| fd = vg_waiting_fds[i].fd; |
| /* VG_(printf)("adding QUERY for fd %d\n", fd); */ |
| vg_assert(fd >= 0); |
| if (fd > fd_max) |
| fd_max = fd; |
| tid = vg_waiting_fds[i].tid; |
| vg_assert(is_valid_tid(tid)); |
| syscall_no = vg_waiting_fds[i].syscall_no; |
| switch (syscall_no) { |
| case __NR_read: |
| VKI_FD_SET(fd, &readfds); break; |
| case __NR_write: |
| VKI_FD_SET(fd, &writefds); break; |
| default: |
| VG_(panic)("poll_for_ready_fds: unexpected syscall"); |
| /*NOTREACHED*/ |
| break; |
| } |
| } |
| |
| /* Short cut: if no fds are waiting, give up now. */ |
| if (fd_max == -1) |
| return; |
| |
| /* BLOCK ALL SIGNALS. We don't want the complication of select() |
| getting interrupted. */ |
| VG_(block_all_host_signals)( &saved_procmask ); |
| |
| n_ready = VG_(select) |
| ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout); |
| if (VG_(is_kerror)(n_ready)) { |
| VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready); |
| VG_(panic)("poll_for_ready_fds: select failed?!"); |
| /*NOTREACHED*/ |
| } |
| |
| /* UNBLOCK ALL SIGNALS */ |
| VG_(restore_host_signals)( &saved_procmask ); |
| |
| /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */ |
| |
| if (n_ready == 0) |
| return; |
| |
| /* Inspect all the fds we know about, and handle any completions that |
| have happened. */ |
| /* |
| VG_(printf)("\n\n"); |
| for (fd = 0; fd < 100; fd++) |
| if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) { |
| VG_(printf)("X"); } else { VG_(printf)("."); }; |
| VG_(printf)("\n\nfd_max = %d\n", fd_max); |
| */ |
| |
| for (fd = 0; fd <= fd_max; fd++) { |
| rd_ok = VKI_FD_ISSET(fd, &readfds); |
| wr_ok = VKI_FD_ISSET(fd, &writefds); |
| ex_ok = VKI_FD_ISSET(fd, &exceptfds); |
| |
| n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0); |
| if (n_ok == 0) |
| continue; |
| if (n_ok > 1) { |
| VG_(printf)("offending fd = %d\n", fd); |
| VG_(panic)("poll_for_ready_fds: multiple events on fd"); |
| } |
| |
| /* An I/O event completed for fd. Find the thread which |
| requested this. */ |
| for (i = 0; i < VG_N_WAITING_FDS; i++) { |
| if (vg_waiting_fds[i].fd == -1 /* not in use */) |
| continue; |
| if (vg_waiting_fds[i].fd == fd) |
| break; |
| } |
| |
| /* And a bit more paranoia ... */ |
| vg_assert(i >= 0 && i < VG_N_WAITING_FDS); |
| |
| /* Mark the fd as ready. */ |
| vg_assert(! vg_waiting_fds[i].ready); |
| vg_waiting_fds[i].ready = True; |
| } |
| } |
| |
| |
| /* See comment attached to poll_for_ready_fds() for explaination. */ |
| static |
| void complete_blocked_syscalls ( void ) |
| { |
| Int fd, i, res, syscall_no; |
| ThreadId tid; |
| Char msg_buf[100]; |
| |
| /* Inspect all the outstanding fds we know about. */ |
| |
| for (i = 0; i < VG_N_WAITING_FDS; i++) { |
| if (vg_waiting_fds[i].fd == -1 /* not in use */) |
| continue; |
| if (! vg_waiting_fds[i].ready) |
| continue; |
| |
| fd = vg_waiting_fds[i].fd; |
| tid = vg_waiting_fds[i].tid; |
| vg_assert(is_valid_tid(tid)); |
| |
| /* The thread actually has to be waiting for the I/O event it |
| requested before we can deliver the result! */ |
| if (vg_threads[tid].status != VgTs_WaitFD) |
| continue; |
| |
| /* Ok, actually do it! We can safely use %EAX as the syscall |
| number, because the speculative call made by |
| sched_do_syscall() doesn't change %EAX in the case where the |
| call would have blocked. */ |
| |
| syscall_no = vg_waiting_fds[i].syscall_no; |
| vg_assert(syscall_no == vg_threads[tid].m_eax); |
| KERNEL_DO_SYSCALL(tid,res); |
| VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); |
| |
| /* Reschedule. */ |
| vg_threads[tid].status = VgTs_Runnable; |
| /* Mark slot as no longer in use. */ |
| vg_waiting_fds[i].fd = -1; |
| /* pp_sched_status(); */ |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd); |
| print_sched_event(tid, msg_buf); |
| } |
| } |
| } |
| |
| |
| static |
| void nanosleep_for_a_while ( void ) |
| { |
| Int res; |
| struct vki_timespec req; |
| struct vki_timespec rem; |
| req.tv_sec = 0; |
| req.tv_nsec = 20 * 1000 * 1000; |
| res = VG_(nanosleep)( &req, &rem ); |
| /* VG_(printf)("after ns, unused = %d\n", rem.tv_nsec ); */ |
| vg_assert(res == 0); |
| } |
| |
| |
| /* --------------------------------------------------------------------- |
| The scheduler proper. |
| ------------------------------------------------------------------ */ |
| |
| /* Run user-space threads until either |
| * Deadlock occurs |
| * One thread asks to shutdown Valgrind |
| * The specified number of basic blocks has gone by. |
| */ |
| VgSchedReturnCode VG_(scheduler) ( void ) |
| { |
| ThreadId tid, tid_next; |
| UInt trc; |
| UInt dispatch_ctr_SAVED; |
| Int request_code, done_this_time, n_in_fdwait_or_sleep; |
| Char msg_buf[100]; |
| Addr trans_addr; |
| |
| /* For the LRU structures, records when the epoch began. */ |
| ULong lru_epoch_started_at = 0; |
| |
| /* Start with the root thread. tid in general indicates the |
| currently runnable/just-finished-running thread. */ |
| tid = 1; |
| |
| /* This is the top level scheduler loop. It falls into three |
| phases. */ |
| while (True) { |
| |
| /* ======================= Phase 0 of 3 ======================= |
| Be paranoid. Always a good idea. */ |
| scheduler_sanity(); |
| |
| /* ======================= Phase 1 of 3 ======================= |
| Handle I/O completions and signals. This may change the |
| status of various threads. Then select a new thread to run, |
| or declare deadlock, or sleep if there are no runnable |
| threads but some are blocked on I/O. */ |
| |
| /* Age the LRU structures if an epoch has been completed. */ |
| if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) { |
| lru_epoch_started_at = VG_(bbs_done); |
| increment_epoch(); |
| } |
| |
| /* Was a debug-stop requested? */ |
| if (VG_(bbs_to_go) == 0) |
| goto debug_stop; |
| |
| /* Do the following loop until a runnable thread is found, or |
| deadlock is detected. */ |
| while (True) { |
| |
| /* For stats purposes only. */ |
| VG_(num_scheduling_events_MAJOR) ++; |
| |
| /* See if any I/O operations which we were waiting for have |
| completed, and, if so, make runnable the relevant waiting |
| threads. */ |
| poll_for_ready_fds(); |
| complete_blocked_syscalls(); |
| |
| /* See if there are any signals which need to be delivered. If |
| so, choose thread(s) to deliver them to, and build signal |
| delivery frames on those thread(s) stacks. */ |
| |
| /* Be careful about delivering signals to a thread waiting |
| for a mutex. In particular, when the handler is running, |
| that thread is temporarily apparently-not-waiting for the |
| mutex, so if it is unlocked by another thread whilst the |
| handler is running, this thread is not informed. When the |
| handler returns, the thread resumes waiting on the mutex, |
| even if, as a result, it has missed the unlocking of it. |
| Potential deadlock. This sounds all very strange, but the |
| POSIX standard appears to require this behaviour. */ |
| VG_(deliver_signals)( 1 /*HACK*/ ); |
| VG_(do_sanity_checks)( 1 /*HACK*/, False ); |
| |
| /* Try and find a thread (tid) to run. */ |
| tid_next = tid; |
| n_in_fdwait_or_sleep = 0; |
| while (True) { |
| tid_next++; |
| if (tid_next >= VG_N_THREADS) tid_next = 1; |
| if (vg_threads[tid_next].status == VgTs_WaitFD |
| || vg_threads[tid_next].status == VgTs_Sleeping) |
| n_in_fdwait_or_sleep ++; |
| if (vg_threads[tid_next].status == VgTs_Runnable) |
| break; /* We can run this one. */ |
| if (tid_next == tid) |
| break; /* been all the way round */ |
| } |
| tid = tid_next; |
| |
| if (vg_threads[tid].status == VgTs_Runnable) { |
| /* Found a suitable candidate. Fall out of this loop, so |
| we can advance to stage 2 of the scheduler: actually |
| running the thread. */ |
| break; |
| } |
| |
| /* We didn't find a runnable thread. Now what? */ |
| if (n_in_fdwait_or_sleep == 0) { |
| /* No runnable threads and no prospect of any appearing |
| even if we wait for an arbitrary length of time. In |
| short, we have a deadlock. */ |
| VG_(pp_sched_status)(); |
| return VgSrc_Deadlock; |
| } |
| |
| /* At least one thread is in a fd-wait state. Delay for a |
| while, and go round again, in the hope that eventually a |
| thread becomes runnable. */ |
| nanosleep_for_a_while(); |
| // pp_sched_status(); |
| // VG_(printf)(".\n"); |
| } |
| |
| |
| /* ======================= Phase 2 of 3 ======================= |
| Wahey! We've finally decided that thread tid is runnable, so |
| we now do that. Run it for as much of a quanta as possible. |
| Trivial requests are handled and the thread continues. The |
| aim is not to do too many of Phase 1 since it is expensive. */ |
| |
| if (0) |
| VG_(printf)("SCHED: tid %d\n", tid); |
| |
| /* Figure out how many bbs to ask vg_run_innerloop to do. Note |
| that it decrements the counter before testing it for zero, so |
| that if VG_(dispatch_ctr) is set to N you get at most N-1 |
| iterations. Also this means that VG_(dispatch_ctr) must |
| exceed zero before entering the innerloop. Also also, the |
| decrement is done before the bb is actually run, so you |
| always get at least one decrement even if nothing happens. |
| */ |
| if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM) |
| VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1; |
| else |
| VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1; |
| |
| /* ... and remember what we asked for. */ |
| dispatch_ctr_SAVED = VG_(dispatch_ctr); |
| |
| /* paranoia ... */ |
| vg_assert(vg_threads[tid].tid == tid); |
| |
| /* Actually run thread tid. */ |
| while (True) { |
| |
| /* For stats purposes only. */ |
| VG_(num_scheduling_events_MINOR) ++; |
| |
| if (0) |
| VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs", |
| tid, VG_(dispatch_ctr) - 1 ); |
| |
| trc = run_thread_for_a_while ( tid ); |
| |
| /* Deal quickly with trivial scheduling events, and resume the |
| thread. */ |
| |
| if (trc == VG_TRC_INNER_FASTMISS) { |
| vg_assert(VG_(dispatch_ctr) > 0); |
| |
| /* Trivial event. Miss in the fast-cache. Do a full |
| lookup for it. */ |
| trans_addr |
| = VG_(search_transtab) ( vg_threads[tid].m_eip ); |
| if (trans_addr == (Addr)0) { |
| /* Not found; we need to request a translation. */ |
| create_translation_for( tid, vg_threads[tid].m_eip ); |
| trans_addr = VG_(search_transtab) ( vg_threads[tid].m_eip ); |
| if (trans_addr == (Addr)0) |
| VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry"); |
| } |
| continue; /* with this thread */ |
| } |
| |
| if (trc == VG_TRC_EBP_JMP_CLIENTREQ) { |
| Bool is_triv = maybe_do_trivial_clientreq(tid); |
| if (is_triv) { |
| /* NOTE: a trivial request is something like a call to |
| malloc() or free(). It DOES NOT change the |
| Runnability of this thread nor the status of any |
| other thread; it is purely thread-local. */ |
| continue; /* with this thread */ |
| } |
| } |
| |
| /* It's a non-trivial event. Give up running this thread and |
| handle things the expensive way. */ |
| break; |
| } |
| |
| /* ======================= Phase 3 of 3 ======================= |
| Handle non-trivial thread requests, mostly pthread stuff. */ |
| |
| /* Ok, we've fallen out of the dispatcher for a |
| non-completely-trivial reason. First, update basic-block |
| counters. */ |
| |
| done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1; |
| vg_assert(done_this_time >= 0); |
| VG_(bbs_to_go) -= (ULong)done_this_time; |
| VG_(bbs_done) += (ULong)done_this_time; |
| |
| if (0 && trc != VG_TRC_INNER_FASTMISS) |
| VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d", |
| tid, done_this_time, (Int)trc ); |
| |
| if (0 && trc != VG_TRC_INNER_FASTMISS) |
| VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s", |
| tid, VG_(bbs_done), |
| name_of_sched_event(trc) ); |
| |
| /* Examine the thread's return code to figure out why it |
| stopped, and handle requests. */ |
| |
| switch (trc) { |
| |
| case VG_TRC_INNER_FASTMISS: |
| VG_(panic)("VG_(scheduler): VG_TRC_INNER_FASTMISS"); |
| /*NOTREACHED*/ |
| break; |
| |
| case VG_TRC_INNER_COUNTERZERO: |
| /* Timeslice is out. Let a new thread be scheduled, |
| simply by doing nothing, causing us to arrive back at |
| Phase 1. */ |
| if (VG_(bbs_to_go) == 0) { |
| goto debug_stop; |
| } |
| vg_assert(VG_(dispatch_ctr) == 0); |
| break; |
| |
| case VG_TRC_UNRESUMABLE_SIGNAL: |
| /* It got a SIGSEGV/SIGBUS, which we need to deliver right |
| away. Again, do nothing, so we wind up back at Phase |
| 1, whereupon the signal will be "delivered". */ |
| break; |
| |
| case VG_TRC_EBP_JMP_SYSCALL: |
| /* Do a syscall for the vthread tid. This could cause it |
| to become non-runnable. */ |
| sched_do_syscall(tid); |
| break; |
| |
| case VG_TRC_EBP_JMP_CLIENTREQ: |
| /* Do a client request for the vthread tid. Note that |
| some requests will have been handled by |
| maybe_do_trivial_clientreq(), so we don't expect to see |
| those here. |
| */ |
| /* The thread's %EAX points at an arg block, the first |
| word of which is the request code. */ |
| request_code = ((UInt*)(vg_threads[tid].m_eax))[0]; |
| if (0) { |
| VG_(sprintf)(msg_buf, "request 0x%x", request_code ); |
| print_sched_event(tid, msg_buf); |
| } |
| /* Do a non-trivial client request for thread tid. tid's |
| %EAX points to a short vector of argument words, the |
| first of which is the request code. The result of the |
| request is put in tid's %EDX. Alternatively, perhaps |
| the request causes tid to become non-runnable and/or |
| other blocked threads become runnable. In general we |
| can and often do mess with the state of arbitrary |
| threads at this point. */ |
| if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) { |
| return VgSrc_Shutdown; |
| } else { |
| do_nontrivial_clientreq(tid); |
| } |
| break; |
| |
| default: |
| VG_(printf)("\ntrc = %d\n", trc); |
| VG_(panic)("VG_(scheduler), phase 3: " |
| "unexpected thread return code"); |
| /* NOTREACHED */ |
| break; |
| |
| } /* switch (trc) */ |
| |
| /* That completes Phase 3 of 3. Return now to the top of the |
| main scheduler loop, to Phase 1 of 3. */ |
| |
| } /* top-level scheduler loop */ |
| |
| |
| /* NOTREACHED */ |
| VG_(panic)("scheduler: post-main-loop ?!"); |
| /* NOTREACHED */ |
| |
| debug_stop: |
| /* If we exited because of a debug stop, print the translation |
| of the last block executed -- by translating it again, and |
| throwing away the result. */ |
| VG_(printf)( |
| "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n"); |
| VG_(translate)( &vg_threads[tid], vg_threads[tid].m_eip, NULL, NULL, NULL ); |
| VG_(printf)("\n"); |
| VG_(printf)( |
| "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n"); |
| |
| return VgSrc_BbsDone; |
| } |
| |
| |
| /* --------------------------------------------------------------------- |
| The pthread implementation. |
| ------------------------------------------------------------------ */ |
| |
| #include <pthread.h> |
| #include <errno.h> |
| |
| #if !defined(PTHREAD_STACK_MIN) |
| # define PTHREAD_STACK_MIN (16384 - VG_AR_CLIENT_STACKBASE_REDZONE_SZB) |
| #endif |
| |
| /* /usr/include/bits/pthreadtypes.h: |
| typedef unsigned long int pthread_t; |
| */ |
| |
| |
| /* ----------------------------------------------------------- |
| Thread CREATION, JOINAGE and CANCELLATION. |
| -------------------------------------------------------- */ |
| |
| static |
| void do_pthread_cancel ( ThreadId tid_canceller, |
| pthread_t tid_cancellee ) |
| { |
| Char msg_buf[100]; |
| /* We want make is appear that this thread has returned to |
| do_pthread_create_bogusRA with PTHREAD_CANCELED as the |
| return value. So: simple: put PTHREAD_CANCELED into %EAX |
| and &do_pthread_create_bogusRA into %EIP and keep going! */ |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, "cancelled by %d", tid_canceller); |
| print_sched_event(tid_cancellee, msg_buf); |
| } |
| vg_threads[tid_cancellee].m_eax = (UInt)PTHREAD_CANCELED; |
| vg_threads[tid_cancellee].m_eip = (UInt)&VG_(pthreadreturn_bogusRA); |
| vg_threads[tid_cancellee].status = VgTs_Runnable; |
| } |
| |
| |
| static |
| void do_pthread_exit ( ThreadId tid, void* retval ) |
| { |
| Char msg_buf[100]; |
| /* We want make is appear that this thread has returned to |
| do_pthread_create_bogusRA with retval as the |
| return value. So: simple: put retval into %EAX |
| and &do_pthread_create_bogusRA into %EIP and keep going! */ |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, "exiting with %p", retval); |
| print_sched_event(tid, msg_buf); |
| } |
| vg_threads[tid].m_eax = (UInt)retval; |
| vg_threads[tid].m_eip = (UInt)&VG_(pthreadreturn_bogusRA); |
| vg_threads[tid].status = VgTs_Runnable; |
| } |
| |
| |
| /* Thread tid is exiting, by returning from the function it was |
| created with. Or possibly due to pthread_exit or cancellation. |
| The main complication here is to resume any thread waiting to join |
| with this one. */ |
| static |
| void handle_pthread_return ( ThreadId tid, void* retval ) |
| { |
| ThreadId jnr; /* joiner, the thread calling pthread_join. */ |
| UInt* jnr_args; |
| void** jnr_thread_return; |
| Char msg_buf[100]; |
| |
| /* Mark it as not in use. Leave the stack in place so the next |
| user of this slot doesn't reallocate it. */ |
| vg_assert(is_valid_tid(tid)); |
| vg_assert(vg_threads[tid].status != VgTs_Empty); |
| |
| vg_threads[tid].retval = retval; |
| |
| if (vg_threads[tid].joiner == VG_INVALID_THREADID) { |
| /* No one has yet done a join on me */ |
| vg_threads[tid].status = VgTs_WaitJoiner; |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, |
| "root fn returns, waiting for a call pthread_join(%d)", |
| tid); |
| print_sched_event(tid, msg_buf); |
| } |
| } else { |
| /* Some is waiting; make their join call return with success, |
| putting my exit code in the place specified by the caller's |
| thread_return param. This is all very horrible, since we |
| need to consult the joiner's arg block -- pointed to by its |
| %EAX -- in order to extract the 2nd param of its pthread_join |
| call. TODO: free properly the slot (also below). |
| */ |
| jnr = vg_threads[tid].joiner; |
| vg_assert(is_valid_tid(jnr)); |
| vg_assert(vg_threads[jnr].status == VgTs_WaitJoinee); |
| jnr_args = (UInt*)vg_threads[jnr].m_eax; |
| jnr_thread_return = (void**)(jnr_args[2]); |
| if (jnr_thread_return != NULL) |
| *jnr_thread_return = vg_threads[tid].retval; |
| vg_threads[jnr].m_edx = 0; /* success */ |
| vg_threads[jnr].status = VgTs_Runnable; |
| vg_threads[tid].status = VgTs_Empty; /* bye! */ |
| if (VG_(clo_instrument) && tid != 0) |
| VGM_(make_noaccess)( vg_threads[tid].stack_base, |
| vg_threads[tid].stack_size ); |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, |
| "root fn returns, to find a waiting pthread_join(%d)", tid); |
| print_sched_event(tid, msg_buf); |
| VG_(sprintf)(msg_buf, |
| "my pthread_join(%d) returned; resuming", tid); |
| print_sched_event(jnr, msg_buf); |
| } |
| } |
| |
| /* Return value is irrelevant; this thread will not get |
| rescheduled. */ |
| } |
| |
| |
| static |
| void do_pthread_join ( ThreadId tid, ThreadId jee, void** thread_return ) |
| { |
| Char msg_buf[100]; |
| |
| /* jee, the joinee, is the thread specified as an arg in thread |
| tid's call to pthread_join. So tid is the join-er. */ |
| vg_assert(is_valid_tid(tid)); |
| vg_assert(vg_threads[tid].status == VgTs_Runnable); |
| |
| if (jee == tid) { |
| vg_threads[tid].m_edx = EDEADLK; /* libc constant, not a kernel one */ |
| vg_threads[tid].status = VgTs_Runnable; |
| return; |
| } |
| |
| if (jee < 0 |
| || jee >= VG_N_THREADS |
| || vg_threads[jee].status == VgTs_Empty) { |
| /* Invalid thread to join to. */ |
| vg_threads[tid].m_edx = EINVAL; |
| vg_threads[tid].status = VgTs_Runnable; |
| return; |
| } |
| |
| if (vg_threads[jee].joiner != VG_INVALID_THREADID) { |
| /* Someone already did join on this thread */ |
| vg_threads[tid].m_edx = EINVAL; |
| vg_threads[tid].status = VgTs_Runnable; |
| return; |
| } |
| |
| /* if (vg_threads[jee].detached) ... */ |
| |
| /* Perhaps the joinee has already finished? If so return |
| immediately with its return code, and free up the slot. TODO: |
| free it properly (also above). */ |
| if (vg_threads[jee].status == VgTs_WaitJoiner) { |
| vg_assert(vg_threads[jee].joiner == VG_INVALID_THREADID); |
| vg_threads[tid].m_edx = 0; /* success */ |
| if (thread_return != NULL) |
| *thread_return = vg_threads[jee].retval; |
| vg_threads[tid].status = VgTs_Runnable; |
| vg_threads[jee].status = VgTs_Empty; /* bye! */ |
| if (VG_(clo_instrument) && jee != 0) |
| VGM_(make_noaccess)( vg_threads[jee].stack_base, |
| vg_threads[jee].stack_size ); |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, |
| "someone called pthread_join() on me; bye!"); |
| print_sched_event(jee, msg_buf); |
| VG_(sprintf)(msg_buf, |
| "my pthread_join(%d) returned immediately", |
| jee ); |
| print_sched_event(tid, msg_buf); |
| } |
| return; |
| } |
| |
| /* Ok, so we'll have to wait on jee. */ |
| vg_threads[jee].joiner = tid; |
| vg_threads[tid].status = VgTs_WaitJoinee; |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, |
| "blocking on call of pthread_join(%d)", jee ); |
| print_sched_event(tid, msg_buf); |
| } |
| /* So tid's join call does not return just now. */ |
| } |
| |
| |
| static |
| void do_pthread_create ( ThreadId parent_tid, |
| pthread_t* thread, |
| pthread_attr_t* attr, |
| void* (*start_routine)(void *), |
| void* arg ) |
| { |
| Addr new_stack; |
| UInt new_stk_szb; |
| ThreadId tid; |
| Char msg_buf[100]; |
| |
| /* Paranoia ... */ |
| vg_assert(sizeof(pthread_t) == sizeof(UInt)); |
| |
| vg_assert(vg_threads[parent_tid].status != VgTs_Empty); |
| |
| tid = vg_alloc_ThreadState(); |
| |
| /* If we've created the main thread's tid, we're in deep trouble :) */ |
| vg_assert(tid != 1); |
| vg_assert(is_valid_tid(tid)); |
| |
| /* Copy the parent's CPU state into the child's, in a roundabout |
| way (via baseBlock). */ |
| VG_(load_thread_state)(parent_tid); |
| VG_(save_thread_state)(tid); |
| |
| /* Consider allocating the child a stack, if the one it already has |
| is inadequate. */ |
| new_stk_szb = PTHREAD_STACK_MIN; |
| |
| if (new_stk_szb > vg_threads[tid].stack_size) { |
| /* Again, for good measure :) We definitely don't want to be |
| allocating a stack for the main thread. */ |
| vg_assert(tid != 1); |
| /* for now, we don't handle the case of anything other than |
| assigning it for the first time. */ |
| vg_assert(vg_threads[tid].stack_size == 0); |
| vg_assert(vg_threads[tid].stack_base == (Addr)NULL); |
| new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb ); |
| vg_threads[tid].stack_base = new_stack; |
| vg_threads[tid].stack_size = new_stk_szb; |
| vg_threads[tid].stack_highest_word |
| = new_stack + new_stk_szb |
| - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */; |
| } |
| |
| vg_threads[tid].m_esp |
| = vg_threads[tid].stack_base |
| + vg_threads[tid].stack_size |
| - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; |
| |
| if (VG_(clo_instrument)) |
| VGM_(make_noaccess)( vg_threads[tid].m_esp, |
| VG_AR_CLIENT_STACKBASE_REDZONE_SZB ); |
| |
| /* push arg */ |
| vg_threads[tid].m_esp -= 4; |
| * (UInt*)(vg_threads[tid].m_esp) = (UInt)arg; |
| |
| /* push (magical) return address */ |
| vg_threads[tid].m_esp -= 4; |
| * (UInt*)(vg_threads[tid].m_esp) = (UInt)VG_(pthreadreturn_bogusRA); |
| |
| if (VG_(clo_instrument)) |
| VGM_(make_readable)( vg_threads[tid].m_esp, 2 * 4 ); |
| |
| /* this is where we start */ |
| vg_threads[tid].m_eip = (UInt)start_routine; |
| |
| if (VG_(clo_trace_sched)) { |
| VG_(sprintf)(msg_buf, |
| "new thread, created by %d", parent_tid ); |
| print_sched_event(tid, msg_buf); |
| } |
| |
| /* store the thread id in *thread. */ |
| // if (VG_(clo_instrument)) |
| // ***** CHECK *thread is writable |
| *thread = (pthread_t)tid; |
| |
| vg_threads[tid].associated_mx = NULL; |
| vg_threads[tid].associated_cv = NULL; |
| vg_threads[tid].joiner = VG_INVALID_THREADID; |
| vg_threads[tid].status = VgTs_Runnable; |
| |
| /* return zero */ |
| vg_threads[tid].m_edx = 0; /* success */ |
| } |
| |
| |
| /* ----------------------------------------------------------- |
| MUTEXes |
| -------------------------------------------------------- */ |
| |
| /* pthread_mutex_t is a struct with at 5 words: |
| typedef struct |
| { |
| int __m_reserved; -- Reserved for future use |
| int __m_count; -- Depth of recursive locking |
| _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck) |
| int __m_kind; -- Mutex kind: fast, recursive or errcheck |
| struct _pthread_fastlock __m_lock; -- Underlying fast lock |
| } pthread_mutex_t; |
| |
| #define PTHREAD_MUTEX_INITIALIZER \ |
| {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER} |
| # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \ |
| {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER} |
| # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \ |
| {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER} |
| # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \ |
| {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER} |
| |
| How we use it: |
| |
| __m_kind never changes and indicates whether or not it is recursive. |
| |
| __m_count indicates the lock count; if 0, the mutex is not owned by |
| anybody. |
| |
| __m_owner has a ThreadId value stuffed into it. We carefully arrange |
| that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that |
| statically initialised mutexes correctly appear |
| to belong to nobody. |
| |
| In summary, a not-in-use mutex is distinguised by having __m_owner |
| == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those |
| conditions holds, the other should too. |
| |
| There is no linked list of threads waiting for this mutex. Instead |
| a thread in WaitMX state points at the mutex with its waited_on_mx |
| field. This makes _unlock() inefficient, but simple to implement the |
| right semantics viz-a-viz signals. |
| |
| We don't have to deal with mutex initialisation; the client side |
| deals with that for us. |
| */ |
| |
| /* Helper fns ... */ |
| static |
| void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex, |
| Char* caller ) |
| { |
| Int i; |
| Char msg_buf[100]; |
| |
| /* Find some arbitrary thread waiting on this mutex, and make it |
| runnable. If none are waiting, mark the mutex as not held. */ |
| for (i = 1; i < VG_N_THREADS; i++) { |
| if (vg_threads[i].status == VgTs_Empty) |
| continue; |
| if (vg_threads[i].status == VgTs_WaitMX |
| && vg_threads[i].associated_mx == mutex) |
| break; |
| } |
| |
| vg_assert(i <= VG_N_THREADS); |
| if (i == VG_N_THREADS) { |
| /* Nobody else is waiting on it. */ |
| mutex->__m_count = 0; |
| mutex->__m_owner = VG_INVALID_THREADID; |
| } else { |
| /* Notionally transfer the hold to thread i, whose |
| pthread_mutex_lock() call now returns with 0 (success). */ |
| /* The .count is already == 1. */ |
| vg_assert(vg_threads[i].associated_mx == mutex); |
| mutex->__m_owner = (_pthread_descr)i; |
| vg_threads[i].status = VgTs_Runnable; |
| vg_threads[i].associated_mx = NULL; |
| vg_threads[i].m_edx = 0; /* pth_lock() success */ |
| |
| if (VG_(clo_trace_pthread_level) >= 1) { |
| VG_(sprintf)(msg_buf, "%s mx %p: RESUME", |
| caller, mutex ); |
| print_pthread_event(i, msg_buf); |
| } |
| } |
| } |
| |
| |
| static |
| void do_pthread_mutex_lock( ThreadId tid, |
| Bool is_trylock, |
| pthread_mutex_t *mutex ) |
| { |
| Char msg_buf[100]; |
| Char* caller |
| = is_trylock ? "pthread_mutex_lock " |
| : "pthread_mutex_trylock"; |
| |
| if (VG_(clo_trace_pthread_level) >= 2) { |
| VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex ); |
| print_pthread_event(tid, msg_buf); |
| } |
| |
| /* Paranoia ... */ |
| vg_assert(is_valid_tid(tid) |
| && vg_threads[tid].status == VgTs_Runnable); |
| |
| /* POSIX doesn't mandate this, but for sanity ... */ |
| if (mutex == NULL) { |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| /* More paranoia ... */ |
| switch (mutex->__m_kind) { |
| case PTHREAD_MUTEX_TIMED_NP: |
| case PTHREAD_MUTEX_RECURSIVE_NP: |
| case PTHREAD_MUTEX_ERRORCHECK_NP: |
| case PTHREAD_MUTEX_ADAPTIVE_NP: |
| if (mutex->__m_count >= 0) break; |
| /* else fall thru */ |
| default: |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| if (mutex->__m_count > 0) { |
| |
| vg_assert(is_valid_tid((ThreadId)mutex->__m_owner)); |
| |
| /* Someone has it already. */ |
| if ((ThreadId)mutex->__m_owner == tid) { |
| /* It's locked -- by me! */ |
| if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) { |
| /* return 0 (success). */ |
| mutex->__m_count++; |
| vg_threads[tid].m_edx = 0; |
| VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n", |
| tid, mutex, mutex->__m_count); |
| return; |
| } else { |
| if (is_trylock) |
| vg_threads[tid].m_edx = EBUSY; |
| else |
| vg_threads[tid].m_edx = EDEADLK; |
| return; |
| } |
| } else { |
| /* Someone else has it; we have to wait. Mark ourselves |
| thusly. */ |
| /* GUARD: __m_count > 0 && __m_owner is valid */ |
| if (is_trylock) { |
| /* caller is polling; so return immediately. */ |
| vg_threads[tid].m_edx = EBUSY; |
| } else { |
| vg_threads[tid].status = VgTs_WaitMX; |
| vg_threads[tid].associated_mx = mutex; |
| /* No assignment to %EDX, since we're blocking. */ |
| if (VG_(clo_trace_pthread_level) >= 1) { |
| VG_(sprintf)(msg_buf, "%s mx %p: BLOCK", |
| caller, mutex ); |
| print_pthread_event(tid, msg_buf); |
| } |
| } |
| return; |
| } |
| |
| } else { |
| /* Nobody owns it. Sanity check ... */ |
| vg_assert(mutex->__m_owner == VG_INVALID_THREADID); |
| /* We get it! [for the first time]. */ |
| mutex->__m_count = 1; |
| mutex->__m_owner = (_pthread_descr)tid; |
| vg_assert(vg_threads[tid].associated_mx == NULL); |
| /* return 0 (success). */ |
| vg_threads[tid].m_edx = 0; |
| } |
| |
| } |
| |
| |
| static |
| void do_pthread_mutex_unlock ( ThreadId tid, |
| pthread_mutex_t *mutex ) |
| { |
| Char msg_buf[100]; |
| |
| if (VG_(clo_trace_pthread_level) >= 2) { |
| VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex ); |
| print_pthread_event(tid, msg_buf); |
| } |
| |
| /* Paranoia ... */ |
| vg_assert(is_valid_tid(tid) |
| && vg_threads[tid].status == VgTs_Runnable); |
| |
| if (mutex == NULL) { |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| /* More paranoia ... */ |
| switch (mutex->__m_kind) { |
| case PTHREAD_MUTEX_TIMED_NP: |
| case PTHREAD_MUTEX_RECURSIVE_NP: |
| case PTHREAD_MUTEX_ERRORCHECK_NP: |
| case PTHREAD_MUTEX_ADAPTIVE_NP: |
| if (mutex->__m_count >= 0) break; |
| /* else fall thru */ |
| default: |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| /* Barf if we don't currently hold the mutex. */ |
| if (mutex->__m_count == 0 /* nobody holds it */ |
| || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) { |
| vg_threads[tid].m_edx = EPERM; |
| return; |
| } |
| |
| /* If it's a multiply-locked recursive mutex, just decrement the |
| lock count and return. */ |
| if (mutex->__m_count > 1) { |
| vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP); |
| mutex->__m_count --; |
| vg_threads[tid].m_edx = 0; /* success */ |
| return; |
| } |
| |
| /* Now we're sure it is locked exactly once, and by the thread who |
| is now doing an unlock on it. */ |
| vg_assert(mutex->__m_count == 1); |
| vg_assert((ThreadId)mutex->__m_owner == tid); |
| |
| /* Release at max one thread waiting on this mutex. */ |
| release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" ); |
| |
| /* Our (tid's) pth_unlock() returns with 0 (success). */ |
| vg_threads[tid].m_edx = 0; /* Success. */ |
| } |
| |
| |
| /* ----------------------------------------------------------- |
| CONDITION VARIABLES |
| -------------------------------------------------------- */ |
| |
| /* The relevant native types are as follows: |
| (copied from /usr/include/bits/pthreadtypes.h) |
| |
| -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER |
| typedef struct |
| { |
| struct _pthread_fastlock __c_lock; -- Protect against concurrent access |
| _pthread_descr __c_waiting; -- Threads waiting on this condition |
| } pthread_cond_t; |
| |
| -- Attribute for conditionally variables. |
| typedef struct |
| { |
| int __dummy; |
| } pthread_condattr_t; |
| |
| #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0} |
| |
| We don't use any fields of pthread_cond_t for anything at all. |
| Only the identity of the CVs is important. |
| |
| Linux pthreads supports no attributes on condition variables, so we |
| don't need to think too hard there. */ |
| |
| |
| static |
| void release_N_threads_waiting_on_cond ( pthread_cond_t* cond, |
| Int n_to_release, |
| Char* caller ) |
| { |
| Int i; |
| Char msg_buf[100]; |
| pthread_mutex_t* mx; |
| |
| while (True) { |
| if (n_to_release == 0) |
| return; |
| |
| /* Find a thread waiting on this CV. */ |
| for (i = 1; i < VG_N_THREADS; i++) { |
| if (vg_threads[i].status == VgTs_Empty) |
| continue; |
| if (vg_threads[i].status == VgTs_WaitCV |
| && vg_threads[i].associated_cv == cond) |
| break; |
| } |
| vg_assert(i <= VG_N_THREADS); |
| |
| if (i == VG_N_THREADS) { |
| /* Nobody else is waiting on it. */ |
| return; |
| } |
| |
| mx = vg_threads[i].associated_mx; |
| vg_assert(mx != NULL); |
| vg_assert(mx->__m_count > 0); |
| vg_assert(is_valid_tid((ThreadId)mx->__m_owner)); |
| |
| if (mx->__m_owner == VG_INVALID_THREADID) { |
| /* Currently unheld; hand it out to thread i. */ |
| vg_assert(mx->__m_count == 0); |
| vg_threads[i].status = VgTs_Runnable; |
| vg_threads[i].associated_cv = NULL; |
| vg_threads[i].associated_mx = NULL; |
| mx->__m_owner = (_pthread_descr)i; |
| mx->__m_count = 1; |
| vg_threads[i].m_edx = 0; /* pthread_cond_wait returns success */ |
| |
| if (VG_(clo_trace_pthread_level) >= 1) { |
| VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p", |
| caller, cond, mx ); |
| print_pthread_event(i, msg_buf); |
| } |
| |
| } else { |
| /* Currently held. Make thread i be blocked on it. */ |
| vg_threads[i].status = VgTs_WaitMX; |
| vg_threads[i].associated_cv = NULL; |
| vg_threads[i].associated_mx = mx; |
| |
| if (VG_(clo_trace_pthread_level) >= 1) { |
| VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p", |
| caller, cond, mx ); |
| print_pthread_event(i, msg_buf); |
| } |
| |
| } |
| |
| n_to_release--; |
| } |
| } |
| |
| |
| static |
| void do_pthread_cond_wait ( ThreadId tid, |
| pthread_cond_t *cond, |
| pthread_mutex_t *mutex ) |
| { |
| Char msg_buf[100]; |
| |
| /* pre: mutex should be a valid mutex and owned by tid. */ |
| if (VG_(clo_trace_pthread_level) >= 2) { |
| VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p ...", |
| cond, mutex ); |
| print_pthread_event(tid, msg_buf); |
| } |
| |
| /* Paranoia ... */ |
| vg_assert(is_valid_tid(tid) |
| && vg_threads[tid].status == VgTs_Runnable); |
| |
| if (mutex == NULL || cond == NULL) { |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| /* More paranoia ... */ |
| switch (mutex->__m_kind) { |
| case PTHREAD_MUTEX_TIMED_NP: |
| case PTHREAD_MUTEX_RECURSIVE_NP: |
| case PTHREAD_MUTEX_ERRORCHECK_NP: |
| case PTHREAD_MUTEX_ADAPTIVE_NP: |
| if (mutex->__m_count >= 0) break; |
| /* else fall thru */ |
| default: |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| /* Barf if we don't currently hold the mutex. */ |
| if (mutex->__m_count == 0 /* nobody holds it */ |
| || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) { |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| /* Queue ourselves on the condition. */ |
| vg_threads[tid].status = VgTs_WaitCV; |
| vg_threads[tid].associated_cv = cond; |
| vg_threads[tid].associated_mx = mutex; |
| |
| if (VG_(clo_trace_pthread_level) >= 1) { |
| VG_(sprintf)(msg_buf, |
| "pthread_cond_wait cv %p, mx %p: BLOCK", |
| cond, mutex ); |
| print_pthread_event(tid, msg_buf); |
| } |
| |
| /* Release the mutex. */ |
| release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " ); |
| } |
| |
| |
| static |
| void do_pthread_cond_signal_or_broadcast ( ThreadId tid, |
| Bool broadcast, |
| pthread_cond_t *cond ) |
| { |
| Char msg_buf[100]; |
| Char* caller |
| = broadcast ? "pthread_cond_broadcast" |
| : "pthread_cond_signal "; |
| |
| if (VG_(clo_trace_pthread_level) >= 2) { |
| VG_(sprintf)(msg_buf, "%s cv %p ...", |
| caller, cond ); |
| print_pthread_event(tid, msg_buf); |
| } |
| |
| /* Paranoia ... */ |
| vg_assert(is_valid_tid(tid) |
| && vg_threads[tid].status == VgTs_Runnable); |
| |
| if (cond == NULL) { |
| vg_threads[tid].m_edx = EINVAL; |
| return; |
| } |
| |
| release_N_threads_waiting_on_cond ( |
| cond, |
| broadcast ? VG_N_THREADS : 1, |
| caller |
| ); |
| |
| vg_threads[tid].m_edx = 0; /* success */ |
| } |
| |
| |
| /* --------------------------------------------------------------------- |
| Handle non-trivial client requests. |
| ------------------------------------------------------------------ */ |
| |
| static |
| void do_nontrivial_clientreq ( ThreadId tid ) |
| { |
| UInt* arg = (UInt*)(vg_threads[tid].m_eax); |
| UInt req_no = arg[0]; |
| switch (req_no) { |
| |
| case VG_USERREQ__PTHREAD_CREATE: |
| do_pthread_create( tid, |
| (pthread_t*)arg[1], |
| (pthread_attr_t*)arg[2], |
| (void*(*)(void*))arg[3], |
| (void*)arg[4] ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_RETURNS: |
| handle_pthread_return( tid, (void*)arg[1] ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_JOIN: |
| do_pthread_join( tid, arg[1], (void**)(arg[2]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_MUTEX_LOCK: |
| do_pthread_mutex_lock( tid, False, (pthread_mutex_t *)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK: |
| do_pthread_mutex_lock( tid, True, (pthread_mutex_t *)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_MUTEX_UNLOCK: |
| do_pthread_mutex_unlock( tid, (pthread_mutex_t *)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_CANCEL: |
| do_pthread_cancel( tid, (pthread_t)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_EXIT: |
| do_pthread_exit( tid, (void*)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_COND_WAIT: |
| do_pthread_cond_wait( tid, |
| (pthread_cond_t *)(arg[1]), |
| (pthread_mutex_t *)(arg[2]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_COND_SIGNAL: |
| do_pthread_cond_signal_or_broadcast( |
| tid, |
| False, /* signal, not broadcast */ |
| (pthread_cond_t *)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__PTHREAD_COND_BROADCAST: |
| do_pthread_cond_signal_or_broadcast( |
| tid, |
| True, /* broadcast, not signal */ |
| (pthread_cond_t *)(arg[1]) ); |
| break; |
| |
| case VG_USERREQ__MAKE_NOACCESS: |
| case VG_USERREQ__MAKE_WRITABLE: |
| case VG_USERREQ__MAKE_READABLE: |
| case VG_USERREQ__DISCARD: |
| case VG_USERREQ__CHECK_WRITABLE: |
| case VG_USERREQ__CHECK_READABLE: |
| case VG_USERREQ__MAKE_NOACCESS_STACK: |
| case VG_USERREQ__RUNNING_ON_VALGRIND: |
| case VG_USERREQ__DO_LEAK_CHECK: |
| vg_threads[tid].m_edx |
| = VG_(handle_client_request) ( &vg_threads[tid], arg ); |
| break; |
| |
| case VG_USERREQ__SIGNAL_RETURNS: |
| handle_signal_return(tid); |
| break; |
| |
| default: |
| VG_(printf)("panic'd on private request = 0x%x\n", arg[0] ); |
| VG_(panic)("handle_private_client_pthread_request: " |
| "unknown request"); |
| /*NOTREACHED*/ |
| break; |
| } |
| } |
| |
| |
| /* --------------------------------------------------------------------- |
| Sanity checking. |
| ------------------------------------------------------------------ */ |
| |
| /* Internal consistency checks on the sched/pthread structures. */ |
| static |
| void scheduler_sanity ( void ) |
| { |
| pthread_mutex_t* mx; |
| pthread_cond_t* cv; |
| Int i; |
| /* VG_(printf)("scheduler_sanity\n"); */ |
| for (i = 1; i < VG_N_THREADS; i++) { |
| mx = vg_threads[i].associated_mx; |
| cv = vg_threads[i].associated_cv; |
| if (vg_threads[i].status == VgTs_WaitMX) { |
| /* If we're waiting on a MX: (1) the mx is not null, (2, 3) |
| it's actually held by someone, since otherwise this thread |
| is deadlocked, (4) the mutex's owner is not us, since |
| otherwise this thread is also deadlocked. The logic in |
| do_pthread_mutex_lock rejects attempts by a thread to lock |
| a (non-recursive) mutex which it already owns. |
| |
| (2) has been seen to fail sometimes. I don't know why. |
| Possibly to do with signals. */ |
| vg_assert(cv == NULL); |
| /* 1 */ vg_assert(mx != NULL); |
| /* 2 */ vg_assert(mx->__m_count > 0); |
| /* 3 */ vg_assert(is_valid_tid((ThreadId)mx->__m_owner)); |
| /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner); |
| } else |
| if (vg_threads[i].status == VgTs_WaitCV) { |
| vg_assert(cv != NULL); |
| vg_assert(mx != NULL); |
| } else { |
| /* Unfortunately these don't hold true when a sighandler is |
| running. To be fixed. */ |
| /* vg_assert(cv == NULL); */ |
| /* vg_assert(mx == NULL); */ |
| } |
| } |
| } |
| |
| |
| /*--------------------------------------------------------------------*/ |
| /*--- end vg_scheduler.c ---*/ |
| /*--------------------------------------------------------------------*/ |