jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Proxy LWP machinery. vg_proxylwp.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Valgrind, an extensible x86 protected-mode |
| 8 | emulator for monitoring program execution on x86-Unixes. |
| 9 | |
| 10 | Copyright (C) 2000-2003 Julian Seward |
| 11 | jseward@acm.org |
| 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | |
| 32 | #include "vg_include.h" |
| 33 | |
| 34 | /* We need our own copy of VG_(do_syscall)() to handle a special |
| 35 | race-condition. If we've got signals unblocked, and we take a |
| 36 | signal in the gap either just before or after the syscall, we may |
| 37 | end up not running the syscall at all, or running it more than |
| 38 | once. |
| 39 | |
| 40 | The solution is to make the signal handler derive the proxy's |
| 41 | precise state by looking to see which eip it is executing at |
| 42 | exception time. |
| 43 | |
| 44 | Ranges: |
| 45 | |
| 46 | sys_before ... sys_restarted: |
| 47 | Setting up register arguments and running state. If |
| 48 | interrupted, then the syscall should be considered to return |
| 49 | ERESTARTSYS. |
| 50 | |
| 51 | sys_restarted: |
| 52 | If interrupted and eip==sys_restarted, then either the syscall |
| 53 | was about to start running, or it has run, was interrupted and |
| 54 | the kernel wants to restart it. eax still contains the |
| 55 | syscall number. If interrupted, then the syscall return value |
| 56 | should be ERESTARTSYS. |
| 57 | |
| 58 | sys_after: |
| 59 | If interrupted and eip==sys_after, the syscall either just |
| 60 | finished, or it was interrupted and the kernel doesn't want to |
| 61 | restart it. Either way, eax equals the correct return value |
| 62 | (either the actual return value, or EINTR). |
| 63 | |
| 64 | sys_after ... sys_done: |
| 65 | System call is complete, but the state hasn't been updated, |
| 66 | nor has the result been written back. eax contains the return |
| 67 | value. |
| 68 | */ |
| 69 | |
| 70 | enum PXState |
| 71 | { |
| 72 | PXS_BAD = -1, |
| 73 | PXS_WaitReq, /* waiting for a request */ |
| 74 | PXS_RunSyscall, /* running a syscall */ |
| 75 | PXS_IntReply, /* request interrupted - need to send reply */ |
| 76 | PXS_SysDone, /* small window between syscall |
| 77 | complete and results written out */ |
| 78 | PXS_SigACK, /* waiting for a signal ACK */ |
| 79 | }; |
| 80 | |
| 81 | enum RequestType { |
| 82 | PX_BAD = -1, |
| 83 | PX_SetSigmask, /* sched->proxy; proxy->sched */ |
| 84 | PX_RunSyscall, /* sched->proxy; proxy->sched */ |
| 85 | PX_Signal, /* proxy->sched */ |
| 86 | PX_SigACK, /* sched->proxy */ |
| 87 | PX_Ping, /* use for sanity-checking */ |
| 88 | PX_Exiting, /* reply sent by proxy for exit sync */ |
| 89 | }; |
| 90 | |
| 91 | extern void do_thread_syscall(Int sys, |
| 92 | Int arg1, Int arg2, Int arg3, Int arg4, Int arg5, Int arg6, |
| 93 | Int *result, enum PXState *statep, enum PXState poststate); |
| 94 | |
| 95 | asm( |
| 96 | ".text\n" |
| 97 | " .type do_thread_syscall,@function\n" |
| 98 | |
| 99 | "do_thread_syscall:\n" |
| 100 | " push %esi\n" |
| 101 | " push %edi\n" |
| 102 | " push %ebx\n" |
| 103 | " push %ebp\n" |
| 104 | ".sys_before:\n" |
| 105 | " movl 16+ 4(%esp),%eax\n" /* syscall */ |
| 106 | " movl 16+ 8(%esp),%ebx\n" /* arg1 */ |
| 107 | " movl 16+12(%esp),%ecx\n" /* arg2 */ |
| 108 | " movl 16+16(%esp),%edx\n" /* arg3 */ |
| 109 | " movl 16+20(%esp),%esi\n" /* arg4 */ |
| 110 | " movl 16+24(%esp),%edi\n" /* arg5 */ |
| 111 | " movl 16+28(%esp),%ebp\n" /* arg6 */ |
| 112 | ".sys_restarted:\n" |
| 113 | " int $0x80\n" |
| 114 | ".sys_after:\n" |
| 115 | " movl 16+32(%esp),%ebx\n" /* ebx = Int *res */ |
| 116 | " movl %eax, (%ebx)\n" /* write the syscall retval */ |
| 117 | |
| 118 | " movl 16+36(%esp),%ebx\n" /* ebx = enum PXState * */ |
| 119 | " testl %ebx, %ebx\n" |
| 120 | " jz 1f\n" |
| 121 | |
| 122 | " movl 16+40(%esp),%ecx\n" /* write the post state (must be after retval write) */ |
| 123 | " movl %ecx,(%ebx)\n" |
| 124 | |
| 125 | ".sys_done:\n" /* OK, all clear from here */ |
| 126 | "1: popl %ebp\n" |
| 127 | " popl %ebx\n" |
| 128 | " popl %edi\n" |
| 129 | " popl %esi\n" |
| 130 | " ret\n" |
| 131 | " .size do_thread_syscall,.-do_thread_syscall\n" |
| 132 | ".previous\n" |
| 133 | |
| 134 | ".section .rodata\n" |
| 135 | "sys_before: .long .sys_before\n" |
| 136 | "sys_restarted: .long .sys_restarted\n" |
| 137 | "sys_after: .long .sys_after\n" |
| 138 | "sys_done: .long .sys_done\n" |
| 139 | ".previous\n" |
| 140 | ); |
| 141 | extern const Addr sys_before, sys_restarted, sys_after, sys_done; |
| 142 | |
| 143 | /* Run a syscall for a particular thread, getting the arguments from |
| 144 | the thread's registers, and returning the result in the thread's |
| 145 | eax. |
| 146 | |
| 147 | Assumes that the only thread state which matters is the contents of |
| 148 | %eax-%ebp and the return value in %eax. |
| 149 | */ |
| 150 | static void thread_syscall(Int syscallno, ThreadState *tst, |
| 151 | enum PXState *state , enum PXState poststate) |
| 152 | { |
| 153 | do_thread_syscall(syscallno, /* syscall no. */ |
| 154 | tst->m_ebx, /* arg 1 */ |
| 155 | tst->m_ecx, /* arg 2 */ |
| 156 | tst->m_edx, /* arg 3 */ |
| 157 | tst->m_esi, /* arg 4 */ |
| 158 | tst->m_edi, /* arg 5 */ |
| 159 | tst->m_ebp, /* arg 6 */ |
| 160 | &tst->m_eax, /* result */ |
| 161 | state, /* state to update */ |
| 162 | poststate); /* state when syscall has finished */ |
| 163 | } |
| 164 | |
| 165 | #define VG_PROXY_MAGIC 0xef83b192 |
| 166 | struct ProxyLWP { |
| 167 | UInt magic; /* magic number */ |
| 168 | ThreadId tid; /* scheduler's tid */ |
| 169 | ThreadState *tst; /* thread state */ |
| 170 | Int lwp; /* kernel's ID for LWP */ |
| 171 | Int exitcode; /* ProxyLWP exit code */ |
| 172 | |
| 173 | Int topx, frommain; /* pipe fds */ |
| 174 | vki_ksiginfo_t siginfo; /* received signal */ |
| 175 | Bool terminating; /* in the middle of exiting */ |
| 176 | |
| 177 | /* State of proxy */ |
| 178 | enum PXState state; |
| 179 | |
| 180 | jmp_buf jumpbuf; |
| 181 | }; |
| 182 | |
| 183 | static void sys_wait_results(Bool block, ThreadId tid, enum RequestType reqtype); |
| 184 | |
| 185 | struct PX_Request { |
| 186 | enum RequestType request; |
| 187 | |
| 188 | vki_ksigset_t sigmask; /* sigmask applied by SigACK */ |
| 189 | }; |
| 190 | |
| 191 | /* All replies are multiplexed over a single pipe, so we need to disinguish them */ |
| 192 | struct PX_Reply { |
| 193 | ThreadId tid; /* tid this reply pertains to */ |
| 194 | enum RequestType req; /* what this relates to */ |
| 195 | |
| 196 | union { |
| 197 | Int syscallno; /* system call completed */ |
| 198 | vki_ksiginfo_t siginfo; /* signal */ |
| 199 | }; |
| 200 | }; |
| 201 | |
| 202 | /* results pipe */ |
| 203 | static Int result_send = -1, result_recv = -1; |
| 204 | |
| 205 | /* reentrant printf for proxy use */ |
| 206 | #if 0 |
| 207 | static void px_printf(const Char *fmt, ...) |
| 208 | { |
| 209 | Char buf[1024]; |
| 210 | Char *cp = buf; |
| 211 | va_list vargs; |
| 212 | |
| 213 | void addbuf(Char c) { *cp++ = c; } |
| 214 | |
| 215 | cp += VG_(sprintf)(buf, "[%d, %d]: ", VG_(getpid)(), VG_(gettid)()); |
| 216 | |
| 217 | va_start(vargs,fmt); |
| 218 | VG_(vprintf)(addbuf, fmt, vargs); |
| 219 | va_end(vargs); |
| 220 | VG_(send_bytes_to_logging_sink)(buf, cp-buf); |
| 221 | } |
| 222 | #else |
| 223 | static void px_printf(const Char *fmt, ...) |
| 224 | { |
| 225 | } |
| 226 | #endif |
| 227 | |
| 228 | static const Char *pxs_name(enum PXState s) |
| 229 | { |
| 230 | switch(s) { |
| 231 | #define S(x) case PXS_##x: return #x |
| 232 | S(BAD); |
| 233 | S(WaitReq); |
| 234 | S(RunSyscall); |
| 235 | S(IntReply); |
| 236 | S(SysDone); |
| 237 | S(SigACK); |
| 238 | #undef S |
| 239 | default: return "???"; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | static const Char *px_name(enum RequestType r) |
| 244 | { |
| 245 | switch(r) { |
| 246 | #define S(x) case PX_##x: return #x |
| 247 | S(BAD); |
| 248 | S(SetSigmask); |
| 249 | S(RunSyscall); |
| 250 | S(Signal); |
| 251 | S(SigACK); |
| 252 | S(Ping); |
| 253 | S(Exiting); |
| 254 | #undef S |
| 255 | default: return "???"; |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | #define PROXYLWP_OFFSET (VKI_BYTES_PER_PAGE - sizeof(ProxyLWP)) |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 260 | #define ROUNDDN(p) ((UChar *)((Addr)(p) & ~(VKI_BYTES_PER_PAGE-1))) |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 261 | |
| 262 | /* |
| 263 | Allocate a page for the ProxyLWP and its stack. |
| 264 | |
| 265 | This uses the trick for finding the LWP's private data by knowing |
| 266 | that the stack is a single page, and that the ProxyLWP structure is |
| 267 | at the end of it. Therefore, given any %esp in the stack, you can |
| 268 | find the ProxyLWP structure (see LWP_TSD()). |
| 269 | */ |
| 270 | static ProxyLWP *LWP_alloc(void) |
| 271 | { |
| 272 | UChar *p = VG_(get_memory_from_mmap)(VKI_BYTES_PER_PAGE, "alloc_LWP"); |
| 273 | ProxyLWP *ret; |
| 274 | vg_assert(p == ROUNDDN(p)); /* px must be page aligned */ |
| 275 | |
| 276 | ret = (ProxyLWP *)(p + PROXYLWP_OFFSET); |
| 277 | |
| 278 | ret->magic = VG_PROXY_MAGIC; |
| 279 | |
| 280 | return ret; |
| 281 | } |
| 282 | |
| 283 | /* Free a thread structure */ |
| 284 | static void LWP_free(ProxyLWP *px) |
| 285 | { |
| 286 | UChar *p = ROUNDDN(px); |
| 287 | |
| 288 | vg_assert(px->magic == VG_PROXY_MAGIC); |
| 289 | px->magic = 0; |
| 290 | vg_assert((p + PROXYLWP_OFFSET) == (UChar *)px); |
| 291 | |
| 292 | VG_(munmap)(p, VKI_BYTES_PER_PAGE); |
| 293 | } |
| 294 | |
| 295 | /* Get a particular ProxyLWP's LWP structure from its esp (relies on |
| 296 | stacks being page aligned, with the ProxyLWP structure at the |
| 297 | end). */ |
| 298 | static inline ProxyLWP *LWP_TSD(void *esp) |
| 299 | { |
| 300 | UChar *p = ROUNDDN(esp); |
| 301 | ProxyLWP *ret; |
| 302 | |
| 303 | ret = (ProxyLWP *)(p + PROXYLWP_OFFSET); |
| 304 | vg_assert(ret->magic == VG_PROXY_MAGIC); |
| 305 | |
| 306 | return ret; |
| 307 | } |
| 308 | |
| 309 | /* Get top of stack */ |
| 310 | static inline void *LWP_stack(ProxyLWP *px) |
| 311 | { |
| 312 | vg_assert(px->magic == VG_PROXY_MAGIC); |
| 313 | |
| 314 | return (void *)(((void **)px) - 1); |
| 315 | } |
| 316 | |
| 317 | static void proxy_fork_cleanup(ThreadId tid); |
| 318 | |
| 319 | /* Init the proxy mechanism */ |
| 320 | void VG_(proxy_init)(void) |
| 321 | { |
| 322 | Int p[2]; |
| 323 | Int res; |
| 324 | |
| 325 | /* this will ignore any duplicate registrations */ |
| 326 | VG_(atfork)(NULL, NULL, proxy_fork_cleanup); |
| 327 | |
| 328 | vg_assert(result_recv == -1); |
| 329 | vg_assert(result_send == -1); |
| 330 | |
| 331 | res = VG_(pipe)(p); |
| 332 | vg_assert(res == 0); |
| 333 | |
| 334 | result_recv = VG_(safe_fd)(p[0]); |
| 335 | result_send = VG_(safe_fd)(p[1]); |
| 336 | |
| 337 | /* Make reading end non-blocking */ |
| 338 | VG_(fcntl)(result_recv, VKI_F_SETFL, VKI_O_NONBLOCK); |
| 339 | } |
| 340 | |
| 341 | /* After fork, the forking thread is in a strange state of having a |
| 342 | couple of pipes still linked to the parent. */ |
| 343 | static void proxy_fork_cleanup(ThreadId tid) |
| 344 | { |
| 345 | ThreadId t; |
| 346 | |
| 347 | VG_(close)(result_recv); |
| 348 | VG_(close)(result_send); |
| 349 | |
| 350 | result_recv = result_send = -1; |
| 351 | |
| 352 | VG_(proxy_init)(); |
| 353 | |
| 354 | for(t = 1; t < VG_N_THREADS; t++) { |
| 355 | ThreadState *tst = VG_(get_ThreadState)(t); |
| 356 | ProxyLWP *proxy = tst->proxy; |
| 357 | |
| 358 | if (tst->status == VgTs_Empty) { |
| 359 | vg_assert(proxy == NULL); |
| 360 | continue; |
| 361 | } |
| 362 | |
| 363 | vg_assert(proxy != NULL); |
| 364 | |
| 365 | /* We need to do a manual teardown, since the proxy this structure |
| 366 | describes is our parent's */ |
| 367 | VG_(close)(proxy->topx); |
| 368 | VG_(close)(proxy->frommain); |
| 369 | |
| 370 | LWP_free(proxy); |
| 371 | tst->proxy = NULL; |
| 372 | } |
| 373 | |
| 374 | /* Create a proxy for calling thread |
| 375 | |
| 376 | We need to temporarily set the state back to Runnable for |
| 377 | proxy_create's benefit. |
| 378 | */ |
| 379 | |
| 380 | { |
| 381 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 382 | |
| 383 | vg_assert(tst->proxy == NULL); |
| 384 | vg_assert(tst->status == VgTs_WaitSys); |
| 385 | tst->status = VgTs_Runnable; |
| 386 | VG_(proxy_create)(tid); |
| 387 | VG_(proxy_setsigmask)(tid); |
| 388 | tst->status = VgTs_WaitSys; |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | Int VG_(proxy_resfd)(void) |
| 393 | { |
| 394 | return result_recv; |
| 395 | } |
| 396 | |
| 397 | void VG_(proxy_shutdown)(void) |
| 398 | { |
| 399 | VG_(close)(result_recv); |
| 400 | VG_(close)(result_send); |
| 401 | |
| 402 | result_recv = result_send = -1; |
| 403 | } |
| 404 | |
| 405 | /* This is called from within a proxy LWP signal handler. This |
| 406 | function records the siginfo, then longjmps back into the proxy |
| 407 | main state machine loop. The presumption is that the signal |
| 408 | handler is being run with all signals blocked; the longjmp is |
| 409 | there to make sure they stay masked until the application thread is |
| 410 | ready to run its signal handler. */ |
| 411 | void VG_(proxy_handlesig)(const vki_ksiginfo_t *siginfo, |
| 412 | const struct vki_sigcontext *sigcontext) |
| 413 | { |
| 414 | UChar local; |
| 415 | ProxyLWP *px = LWP_TSD(&local); |
| 416 | Addr eip = sigcontext->eip; |
| 417 | Int eax = sigcontext->eax; |
| 418 | |
| 419 | vg_assert(siginfo->si_signo != 0); |
| 420 | if (px->siginfo.si_signo != 0) { |
| 421 | px_printf("proxy_handlesig: tid %d already has %d pending, new sig %d\n", |
| 422 | px->lwp, px->siginfo.si_signo, siginfo->si_signo); |
| 423 | } |
| 424 | vg_assert(px->siginfo.si_signo == 0); |
| 425 | |
| 426 | px->siginfo = *siginfo; |
| 427 | |
| 428 | px_printf("proxy got signal %d\n", siginfo->si_signo); |
| 429 | |
| 430 | /* First look to see if the EIP is within our interesting ranges |
| 431 | near a syscall to work out what should happen. */ |
| 432 | if (sys_before <= eip && eip <= sys_restarted) { |
| 433 | /* We are before the syscall actually ran, or it did run and |
| 434 | wants to be restarted. Either way, set the return code to |
| 435 | indicate a restart. This is not really any different from |
| 436 | anywhere else, except that we can make some assertions about |
| 437 | the proxy and machine state here. */ |
| 438 | vg_assert(px->state == PXS_RunSyscall); |
| 439 | vg_assert(px->tst->m_eax == -VKI_ERESTARTSYS); |
| 440 | } else if (sys_after <= eip && eip <= sys_done) { |
| 441 | /* We're after the syscall. Either it was interrupted by the |
| 442 | signal, or the syscall completed normally. In either case |
| 443 | eax contains the correct syscall return value, and the new |
| 444 | state is effectively PXS_SysDone. */ |
| 445 | vg_assert(px->state == PXS_RunSyscall || px->state == PXS_SysDone); |
| 446 | px->state = PXS_SysDone; |
| 447 | px->tst->m_eax = eax; |
| 448 | } |
| 449 | px_printf(" signalled in state %s\n", pxs_name(px->state)); |
| 450 | |
| 451 | __builtin_longjmp(px->jumpbuf, 1); |
| 452 | } |
| 453 | |
| 454 | static Bool send_reply(const struct PX_Reply *reply) |
| 455 | { |
| 456 | const Int size = sizeof(struct PX_Reply); |
| 457 | |
| 458 | return VG_(write)(result_send, reply, size) == size; |
| 459 | } |
| 460 | |
| 461 | static Bool recv_reply(struct PX_Reply *reply) |
| 462 | { |
| 463 | const Int size = sizeof(struct PX_Reply); |
| 464 | |
| 465 | return VG_(read)(result_recv, reply, size) == size; |
| 466 | } |
| 467 | |
| 468 | /* Proxy LWP thread. This is run as a separate cloned() thread, so it |
| 469 | MUST NOT touch any core Valgrind data structures directly: the only |
| 470 | exception is while we're running a PX_RunSyscall command, we may |
| 471 | look at and update the thread's register state. It interacts with |
| 472 | the rest of Valgrind by receiving messages through its pipe and |
| 473 | sending results through result_send. */ |
| 474 | static Int proxylwp(void *v) |
| 475 | { |
| 476 | ProxyLWP *px = (ProxyLWP *)v; |
| 477 | Int frommain = px->frommain; |
| 478 | ThreadState *tst = px->tst; |
| 479 | vki_ksigset_t allsig; |
| 480 | vki_ksigset_t appsigmask; /* signal mask the client has asked for */ |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 481 | Int ret = 1000; |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 482 | static const vki_kstack_t ss = { .ss_flags = VKI_SS_DISABLE }; |
| 483 | |
| 484 | /* Block everything until we're told otherwise (LWP should have |
| 485 | been started with all signals blocked anyway) */ |
| 486 | VG_(ksigfillset)(&allsig); |
| 487 | VG_(ksigdelset)(&allsig, VKI_SIGVGKILL); /* but allow SIGVGKILL to interrupt */ |
| 488 | |
| 489 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL); |
| 490 | |
| 491 | appsigmask = allsig; |
| 492 | |
| 493 | /* no signal stack for us */ |
| 494 | VG_(ksigaltstack)(&ss, NULL); |
| 495 | |
| 496 | for(;;) { |
| 497 | struct PX_Reply reply, sigreply; |
| 498 | struct PX_Request req; |
| 499 | Int res; |
| 500 | |
| 501 | if (__builtin_setjmp(px->jumpbuf)) { |
| 502 | /* We were hit by a signal. This is the signal-driven part |
| 503 | of the state machine. |
| 504 | |
| 505 | This code prepares a reply which is suitable for whatever |
| 506 | was interrupted by this signal. If "no reply" is the |
| 507 | right response, then it sets reply.req = PX_BAD. |
| 508 | |
| 509 | NOTE: the ST:N notation represents the correspondence |
| 510 | between states where we can be interrupted in the main |
| 511 | state machine loop, and where those states are handled |
| 512 | here. |
| 513 | */ |
| 514 | |
| 515 | if (px->siginfo.si_signo != VKI_SIGVGKILL) { |
| 516 | /* First, send the signal info */ |
| 517 | sigreply.tid = px->tid; |
| 518 | sigreply.req = PX_Signal; |
| 519 | sigreply.siginfo = px->siginfo; |
| 520 | |
| 521 | if (!send_reply(&sigreply)) { |
| 522 | ret = 44; /* incomplete or failed write */ |
| 523 | goto out; |
| 524 | } |
| 525 | } else { |
| 526 | /* We got VKI_SIGVGKILL, which means we just skip all the |
| 527 | below and get back to the state machine - probably to |
| 528 | exit. */ |
| 529 | px->state = PXS_WaitReq; |
| 530 | px->siginfo.si_signo = 0; |
| 531 | goto state_machine; |
| 532 | } |
| 533 | |
| 534 | px->siginfo.si_signo = 0; |
| 535 | |
| 536 | /* Now work out what our new state is, and what to do on the way. */ |
| 537 | switch(px->state) { |
| 538 | case PXS_WaitReq: |
| 539 | /* We were interrupted while waiting for a request. See |
| 540 | if we had actually read the request, and do the |
| 541 | appropriate thing if so. */ |
| 542 | reply.req = req.request; |
| 543 | reply.tid = px->tid; |
| 544 | |
| 545 | switch(req.request) { |
| 546 | case PX_BAD: |
| 547 | /* ST:1 */ |
| 548 | /* nothing read; just wait for SigACK */ |
| 549 | px->state = PXS_SigACK; |
| 550 | break; |
| 551 | |
| 552 | case PX_RunSyscall: |
| 553 | /* ST:2 */ |
| 554 | /* They asked for a syscall, but we were signalled |
| 555 | before even getting started. Claim the syscall was |
| 556 | interrupted. |
| 557 | |
| 558 | XXX how to distunguish between restartable and |
| 559 | non-restartable syscalls? Does it matter? |
| 560 | */ |
fitzhardinge | a09a1b5 | 2003-11-07 23:09:48 +0000 | [diff] [blame^] | 561 | reply.syscallno = tst->syscallno; |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 562 | |
| 563 | tst->m_eax = -VKI_ERESTARTSYS; |
| 564 | px->state = PXS_IntReply; |
| 565 | break; |
| 566 | |
| 567 | case PX_SetSigmask: |
| 568 | /* ST:2 */ |
| 569 | /* ST:3 */ |
| 570 | /* They asked for a signal mask update. Ignore it, |
| 571 | because they're going to give us a new mask when |
| 572 | they send a SigACK, and we want all signals blocked |
| 573 | in the meantime. However, we set the state to |
| 574 | PXS_IntReply to make sure the reply from the |
| 575 | PX_SetSigmask is sent. */ |
| 576 | vg_assert(reply.req == PX_SetSigmask); |
| 577 | px->state = PXS_IntReply; |
| 578 | break; |
| 579 | |
| 580 | case PX_Ping: |
| 581 | /* ST:2 */ |
| 582 | /* We read a Ping request, so we need to send a Ping |
| 583 | reply. */ |
| 584 | vg_assert(reply.req == PX_Ping); |
| 585 | px->state = PXS_IntReply; |
| 586 | break; |
| 587 | |
| 588 | case PX_Exiting: |
| 589 | case PX_Signal: |
| 590 | ret = 10; /* completely bogus - noone should send us a signal */ |
| 591 | goto out; |
| 592 | |
| 593 | case PX_SigACK: |
| 594 | ret = 11; /* Also bogus. No way we should get a |
| 595 | signal while waiting for a |
| 596 | SigACK. */ |
| 597 | goto out; |
| 598 | } |
| 599 | break; |
| 600 | |
| 601 | case PXS_RunSyscall: |
| 602 | /* ST:4 */ |
| 603 | /* We were actually running the syscall when interrupted. |
| 604 | reply should already be set up, including return in eax. */ |
| 605 | vg_assert(reply.req == PX_RunSyscall); |
| 606 | vg_assert(reply.syscallno == tst->syscallno); |
| 607 | vg_assert(tst->status == VgTs_WaitSys); |
| 608 | px->state = PXS_IntReply; |
| 609 | break; |
| 610 | |
| 611 | case PXS_SysDone: |
| 612 | /* The syscall is done; we just need to send the results |
| 613 | back. */ |
| 614 | vg_assert(reply.req == PX_RunSyscall); |
| 615 | vg_assert(reply.syscallno == tst->syscallno); |
| 616 | px->state = PXS_IntReply; |
| 617 | break; |
| 618 | |
| 619 | case PXS_IntReply: |
| 620 | case PXS_SigACK: |
| 621 | ret = 13; /* Bogus. Same as ret=11 above. */ |
| 622 | goto out; |
| 623 | |
| 624 | case PXS_BAD: |
| 625 | ret = 33; |
| 626 | goto out; |
| 627 | } |
| 628 | |
| 629 | /* End of signal handling states. If the scheduler LWP is |
| 630 | currently running application code, tell it to drop back |
| 631 | into the scheduler loop ASAP to handle the signal. */ |
| 632 | if (VG_(clo_lowlat_signals)) |
| 633 | VG_(need_resched)(px->tid); |
| 634 | } |
| 635 | |
| 636 | state_machine: |
| 637 | px_printf("proxylwp main: state %s\n", pxs_name(px->state)); |
| 638 | |
| 639 | switch(px->state) { |
| 640 | case PXS_WaitReq: |
| 641 | case PXS_SigACK: |
| 642 | req.request = PX_BAD; /* init request so we know if the read() read anything */ |
| 643 | |
| 644 | if (px->state == PXS_WaitReq) { |
| 645 | /* allow signals when waiting for a normal request */ |
| 646 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL); |
| 647 | } |
| 648 | |
| 649 | /* ST:1 */ |
| 650 | |
| 651 | res = VG_(read)(frommain, &req, sizeof(req)); |
| 652 | |
| 653 | /* ST:2 */ |
| 654 | |
| 655 | /* process message with signals blocked */ |
| 656 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL); |
| 657 | |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 658 | if (res == 0) { |
| 659 | ret = 0; |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 660 | goto out; /* EOF - we're quitting */ |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 661 | } |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 662 | |
| 663 | if (res < 0) { |
| 664 | px_printf("read(frommain) failed %d\n", res); |
| 665 | ret = 1; /* error */ |
| 666 | goto out; |
| 667 | } |
| 668 | if (res != sizeof(req)) { |
| 669 | ret = 2; /* error - partial read */ |
| 670 | goto out; |
| 671 | } |
| 672 | |
| 673 | px_printf("read req: %s\n", px_name(req.request)); |
| 674 | |
| 675 | reply.tid = px->tid; |
| 676 | reply.req = req.request; |
| 677 | |
| 678 | switch(req.request) { |
| 679 | case PX_Ping: |
| 680 | /* do nothing; just send reply */ |
| 681 | break; |
| 682 | |
| 683 | case PX_SigACK: |
| 684 | /* The thread ACKed the signal, and sent the mask they |
| 685 | want while running the handler. */ |
| 686 | vg_assert(px->state == PXS_SigACK); |
| 687 | appsigmask = req.sigmask; |
| 688 | VG_(ksigdelset)(&appsigmask, VKI_SIGVGKILL); /* but allow SIGVGKILL to interrupt */ |
| 689 | px->state = PXS_WaitReq; |
| 690 | reply.req = PX_BAD; /* don't reply */ |
| 691 | break; |
| 692 | |
| 693 | case PX_SetSigmask: |
| 694 | appsigmask = req.sigmask; |
| 695 | VG_(ksigdelset)(&appsigmask, VKI_SIGVGKILL); /* but allow SIGVGKILL to interrupt */ |
| 696 | |
| 697 | vg_assert(px->state == PXS_WaitReq || |
| 698 | px->state == PXS_SigACK); |
| 699 | |
| 700 | if (px->state != PXS_SigACK) { |
| 701 | /* If we're not waiting for a PX_SigACK, set the apps mask |
| 702 | to get at least one of the pending signals, which will |
| 703 | be delivered synchronously, so that some progress is |
| 704 | made before the we tell the client the mask has been |
| 705 | set.. Then reset the mask back to all blocked. */ |
| 706 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL); |
| 707 | /* ST:3 */ |
| 708 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL); |
| 709 | } else { |
| 710 | /* Waiting for SigACK. We want all signals blocked, |
| 711 | and when the SigACK arrives, it will give us the |
| 712 | thread's signal mask for its handler. */ |
| 713 | } |
| 714 | break; |
| 715 | |
| 716 | case PX_RunSyscall: |
| 717 | /* Run a syscall for our thread; results will be poked |
| 718 | back into tst */ |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 719 | reply.syscallno = tst->syscallno; |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 720 | |
| 721 | vg_assert(px->state == PXS_WaitReq || |
| 722 | px->state == PXS_SigACK); |
| 723 | if (px->state == PXS_SigACK) { |
| 724 | /* If we're in the middle of signal handling, make the |
| 725 | client's syscalls fail with ERESTARTSYS until its signal |
| 726 | handler runs - there should be at most one, if it was |
| 727 | on the way to us as we got the signal. |
| 728 | */ |
| 729 | px_printf("RunSyscall in SigACK: rejecting syscall %d with ERESTARTSYS\n", |
| 730 | reply.syscallno); |
| 731 | tst->m_eax = -VKI_ERESTARTSYS; |
| 732 | } else { |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 733 | Int syscallno = tst->syscallno; |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 734 | |
| 735 | px->state = PXS_RunSyscall; |
| 736 | /* If we're interrupted before we get to the syscall |
| 737 | itself, we want the syscall restarted. */ |
| 738 | tst->m_eax = -VKI_ERESTARTSYS; |
| 739 | |
| 740 | /* set our process group ID to match parent */ |
| 741 | if (VG_(getpgrp)() != VG_(main_pgrp)) |
| 742 | VG_(setpgid)(0, VG_(main_pgrp)); |
| 743 | |
| 744 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL); |
| 745 | |
| 746 | /* ST:4 */ |
| 747 | |
| 748 | thread_syscall(syscallno, tst, &px->state, PXS_SysDone); |
| 749 | |
| 750 | /* ST:5 */ |
| 751 | |
| 752 | VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL); |
| 753 | /* whew - made it here without being interrupted */ |
| 754 | px->state = PXS_WaitReq; |
| 755 | |
| 756 | if (VG_(clo_lowlat_syscalls)) |
| 757 | VG_(need_resched)(px->tid); |
| 758 | } |
| 759 | break; |
| 760 | |
| 761 | case PX_BAD: |
| 762 | case PX_Signal: |
| 763 | case PX_Exiting: |
| 764 | /* we never expect to see these */ |
| 765 | ret = 3; |
| 766 | goto out; |
| 767 | } |
| 768 | break; |
| 769 | |
| 770 | case PXS_IntReply: |
| 771 | /* This state only exists so that we fall out and write the |
| 772 | interrupted syscall reply before moving to SigACK */ |
| 773 | px->state = PXS_SigACK; |
| 774 | break; |
| 775 | |
| 776 | case PXS_RunSyscall: |
| 777 | case PXS_SysDone: |
| 778 | case PXS_BAD: |
| 779 | default: |
| 780 | /* Never expect to see these states here */ |
| 781 | ret = 5; |
| 782 | goto out; |
| 783 | } |
| 784 | |
| 785 | /* If we have something sensible to say, say it */ |
| 786 | if (reply.req != PX_BAD) { |
| 787 | px_printf("sending reply %s\n", px_name(reply.req)); |
| 788 | |
| 789 | if (!send_reply(&reply)) { |
| 790 | ret = 4; /* error - didn't write full message */ |
| 791 | goto out; |
| 792 | } |
| 793 | reply.req = PX_BAD; |
| 794 | } |
| 795 | } |
| 796 | |
| 797 | out: |
| 798 | px_printf("proxy exiting with ret=%d\n", ret); |
| 799 | |
| 800 | { |
| 801 | struct PX_Reply reply; |
| 802 | reply.req = PX_Exiting; |
| 803 | reply.tid = px->tid; |
| 804 | px_printf("exit: sending %s\n", px_name(reply.req)); |
| 805 | |
| 806 | send_reply(&reply); |
| 807 | } |
| 808 | |
| 809 | px->frommain = -1; |
| 810 | VG_(close)(frommain); |
| 811 | |
| 812 | px->exitcode = ret; |
| 813 | return ret; |
| 814 | } |
| 815 | |
| 816 | /* Send a signal to a proxy LWP */ |
| 817 | void VG_(proxy_sendsig)(ThreadId tid, Int sig) |
| 818 | { |
| 819 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 820 | ProxyLWP *proxy = tst->proxy; |
| 821 | Int lwp; |
| 822 | |
| 823 | if (proxy == NULL) |
| 824 | return; |
| 825 | |
| 826 | lwp = proxy->lwp; /* proxy->lwp may change async */ |
| 827 | |
| 828 | if (lwp != 0) { |
| 829 | /* SIGKILL and SIGSTOP always apply to all threads (need to |
| 830 | route for route_signals case?) */ |
| 831 | if (sig == VKI_SIGKILL || sig == VKI_SIGSTOP) |
| 832 | VG_(kkill)(VG_(main_pid), sig); |
| 833 | else |
| 834 | VG_(ktkill)(lwp, sig); |
| 835 | } |
| 836 | |
| 837 | /* If a thread is sending a signal to itself and the signal isn't |
| 838 | blocked (ie, it will be delivered), wait until the signal |
| 839 | message gets sent back, thus making the signal synchronous. */ |
| 840 | if (sig != 0 && |
| 841 | !VG_(is_sig_ign)(sig) && |
| 842 | tid == VG_(get_current_or_recent_tid)() && |
| 843 | !VG_(ksigismember)(&tst->eff_sig_mask, sig)) { |
| 844 | /* If the LWP is actually blocked in a sigtimedwait, then it |
| 845 | will eat the signal rather than make it pending and deliver |
| 846 | it by the normal mechanism. In this case, just wait for the |
| 847 | syscall to dinish. */ |
| 848 | if (tst->status == VgTs_WaitSys && tst->syscallno == __NR_rt_sigtimedwait) |
| 849 | sys_wait_results(True, tid, PX_RunSyscall); |
| 850 | else |
| 851 | sys_wait_results(True, tid, PX_Signal); |
| 852 | } |
| 853 | } |
| 854 | |
| 855 | /* If a thread is blocked in a syscall, this function will interrupt |
| 856 | the proxy LWP's syscall by hitting it with a VKI_SIGVGINT signal. |
| 857 | This signal will not be reported to the client application. */ |
| 858 | void VG_(proxy_abort_syscall)(ThreadId tid) |
| 859 | { |
| 860 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 861 | ProxyLWP *proxy = tst->proxy; |
| 862 | Int lwp; |
| 863 | |
| 864 | if (tst->status != VgTs_WaitSys) |
| 865 | return; |
| 866 | |
| 867 | vg_assert(proxy != NULL); |
| 868 | |
| 869 | lwp = proxy->lwp; |
| 870 | |
| 871 | if (lwp != 0) |
| 872 | VG_(ktkill)(lwp, VKI_SIGVGINT); |
| 873 | |
| 874 | sys_wait_results(True, tid, PX_RunSyscall); |
| 875 | |
| 876 | vg_assert(tst->status == VgTs_Runnable); |
| 877 | } |
| 878 | |
| 879 | static Int do_futex(void *addr, Int op, Int val, struct vki_timespec *time, void *addr2) |
| 880 | { |
| 881 | return VG_(do_syscall)(__NR_futex, addr, op, val, time, addr2); |
| 882 | } |
| 883 | |
| 884 | #define VKI_FUTEX_WAIT 0 |
| 885 | #define VKI_FUTEX_WAKE 1 |
| 886 | #define VKI_FUTEX_FD 2 |
| 887 | #define VKI_FUTEX_REQUEUE 3 |
| 888 | |
| 889 | static Int have_futex = -1; /* -1 -> unknown */ |
| 890 | |
| 891 | /* |
| 892 | Create a proxy LWP using whatever varient of clone makes the most |
| 893 | sense for the current kernel. We use futexes for termination |
| 894 | notification wherever possible. Returns 0 on success, or a -ve |
| 895 | error code on failure. |
| 896 | */ |
| 897 | static Int proxy_clone(ProxyLWP *proxy) |
| 898 | { |
| 899 | Int ret; |
| 900 | |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 901 | if (VG_(clo_assume_24)) |
| 902 | have_futex = 0; |
| 903 | |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 904 | if (have_futex == -1) |
| 905 | have_futex = do_futex(NULL, VKI_FUTEX_WAKE, 0, NULL, NULL) != -VKI_ENOSYS; |
| 906 | |
| 907 | if (have_futex) { |
| 908 | ret = VG_(clone)(proxylwp, |
| 909 | LWP_stack(proxy), |
| 910 | VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM | |
| 911 | VKI_CLONE_SIGHAND | VKI_CLONE_THREAD | |
| 912 | VKI_CLONE_PARENT_SETTID | |
| 913 | VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED, |
| 914 | proxy, &proxy->lwp, &proxy->lwp); |
| 915 | } else { |
| 916 | VG_(do_signal_routing) = True; /* XXX True, it seems kernels |
| 917 | which have futex also have |
| 918 | sensible signal handling, but |
| 919 | it would be nice to test it |
| 920 | directly. */ |
| 921 | |
| 922 | ret = VG_(clone)(proxylwp, |
| 923 | LWP_stack(proxy), |
| 924 | VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM | |
| 925 | VKI_CLONE_SIGHAND | VKI_CLONE_THREAD, |
| 926 | proxy, NULL, NULL); |
| 927 | proxy->lwp = ret; |
| 928 | } |
| 929 | |
| 930 | return (ret < 0) ? ret : 0; |
| 931 | } |
| 932 | |
| 933 | /* Wait on a proxy LWP. Returns True if the LWP has exited. */ |
| 934 | static Bool proxy_wait(ProxyLWP *proxy, Bool block, Int *status) |
| 935 | { |
| 936 | Bool ret = False; |
| 937 | |
| 938 | if (have_futex == -1) |
| 939 | return False; |
| 940 | |
| 941 | if (have_futex) { |
| 942 | if (block) { |
| 943 | Int lwp = proxy->lwp; |
| 944 | |
| 945 | while(proxy->lwp != 0) |
| 946 | do_futex(&proxy->lwp, VKI_FUTEX_WAIT, lwp, NULL, NULL); |
| 947 | |
| 948 | if (status) |
| 949 | *status = proxy->exitcode; |
| 950 | ret = True; |
| 951 | } else { |
| 952 | if (proxy->lwp == 0) { |
| 953 | *status = proxy->exitcode; |
| 954 | ret = True; |
| 955 | } |
| 956 | } |
| 957 | } else { |
| 958 | Int flags = VKI__WCLONE; |
| 959 | Int res; |
| 960 | |
| 961 | if (!block) |
| 962 | flags |= VKI_WNOHANG; |
| 963 | res = VG_(waitpid)(proxy->lwp, status, flags); |
| 964 | if (res == proxy->lwp) { |
| 965 | vg_assert(*status == proxy->exitcode); |
| 966 | ret = True; |
| 967 | } |
| 968 | } |
| 969 | |
| 970 | return ret; |
| 971 | } |
| 972 | |
| 973 | /* Create a proxy for a new thread */ |
| 974 | void VG_(proxy_create)(ThreadId tid) |
| 975 | { |
| 976 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 977 | ProxyLWP *proxy; |
| 978 | Int p[2]; |
| 979 | vki_ksigset_t mask; |
| 980 | Int ret; |
| 981 | |
| 982 | vg_assert(tst->proxy == NULL); |
| 983 | vg_assert(tst->status == VgTs_Runnable); |
| 984 | |
| 985 | proxy = LWP_alloc(); |
| 986 | |
| 987 | VG_(pipe)(p); |
| 988 | |
| 989 | proxy->tid = tid; |
| 990 | proxy->tst = tst; |
| 991 | proxy->siginfo.si_signo = 0; |
| 992 | proxy->frommain = VG_(safe_fd)(p[0]); |
| 993 | proxy->topx = VG_(safe_fd)(p[1]); |
| 994 | proxy->state = PXS_WaitReq; /* start by waiting for requests */ |
| 995 | proxy->terminating = False; |
| 996 | |
| 997 | /* Make sure proxy LWP starts with all signals blocked (not even |
| 998 | SEGV, BUS, ILL or FPE) */ |
| 999 | VG_(block_all_host_signals)(&mask); |
| 1000 | |
| 1001 | ret = proxy_clone(proxy); |
| 1002 | if (ret < 0) { |
| 1003 | VG_(printf)("Error %d trying to create proxy LWP for tid %d\n", |
| 1004 | ret, tid); |
| 1005 | VG_(core_panic)("Can't start proxy LWPs"); |
| 1006 | } |
| 1007 | |
| 1008 | VG_(restore_all_host_signals)(&mask); |
| 1009 | |
| 1010 | tst->proxy = proxy; |
| 1011 | } |
| 1012 | |
| 1013 | /* Clean up proxy after thread dies */ |
| 1014 | void VG_(proxy_delete)(ThreadId tid, Bool force) |
| 1015 | { |
| 1016 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 1017 | ProxyLWP *proxy = tst->proxy; |
| 1018 | Bool res; |
| 1019 | Int status; |
| 1020 | Int lwp; |
| 1021 | |
| 1022 | if (proxy == NULL) |
| 1023 | return; /* nothing to do */ |
| 1024 | |
| 1025 | lwp = proxy->lwp; |
| 1026 | |
| 1027 | #if 0 |
| 1028 | MAYBE_PRINTF("VG_(proxy_delete)(tid=%d (lwp=%d), force=%s; tst->status=%d\n", |
| 1029 | tid, lwp, force ? "true" : "false", tst->status); |
| 1030 | #endif |
| 1031 | vg_assert(proxy->tid == tid); |
| 1032 | if (proxy->terminating) |
| 1033 | return; /* already going away */ |
| 1034 | |
| 1035 | proxy->terminating = True; |
| 1036 | |
| 1037 | VG_(close)(proxy->topx); |
| 1038 | proxy->topx = -1; |
| 1039 | |
| 1040 | /* proxy thread will close proxy->frommain itself */ |
| 1041 | |
| 1042 | if (force && lwp != 0) { |
| 1043 | /* wouldn't need to force it if it were already dead */ |
| 1044 | vg_assert(tst->status != VgTs_Empty); |
| 1045 | //VG_(printf)("kill %d with SIGVGKILL\n", lwp); |
| 1046 | VG_(ktkill)(lwp, VKI_SIGVGKILL); |
| 1047 | } else |
| 1048 | vg_assert(tst->status == VgTs_Empty); /* just killed */ |
| 1049 | |
| 1050 | status = -1; |
| 1051 | res = False; |
| 1052 | |
| 1053 | /* We need to wait for the PX_Exiting message before doing the |
| 1054 | proxy_wait, because if we don't read the results pipe, the proxy |
| 1055 | may be blocked writing to it, causing a deadlock with us as we |
| 1056 | wait for it to exit. */ |
| 1057 | sys_wait_results(True, tid, PX_Exiting); |
| 1058 | res = proxy_wait(proxy, True, &status); |
| 1059 | |
| 1060 | if (!res || status != 0) |
| 1061 | VG_(printf)("proxy %d for tid %d exited status %d, res %d\n", |
| 1062 | lwp, tid, status, res); |
| 1063 | |
| 1064 | LWP_free(proxy); |
| 1065 | tst->proxy = NULL; |
| 1066 | } |
| 1067 | |
| 1068 | /* Read back the results of any completed syscalls. |
| 1069 | |
| 1070 | At this point, there should be only one pending syscall per thread. |
| 1071 | Those threads should be in VgTs_WaitSys state. Each syscall return |
| 1072 | may have multiple signals associated with it, so we read those and |
| 1073 | set up some pending signals in our signal simulation. When we |
| 1074 | finally get the message saying the syscall is complete, we mark the |
| 1075 | thread as runnable and return. |
| 1076 | |
| 1077 | If block is set to True, then this call will block until anything |
| 1078 | happens (ie, some progress was made). |
| 1079 | |
| 1080 | If reqtype != PX_BAD, then this will block until some reply for |
| 1081 | that request type appears (assuming you're expecting that kind of |
| 1082 | reply, otherwise it will block forever). If tid != 0, then it will |
| 1083 | wait for a reply for that particular tid. |
| 1084 | */ |
| 1085 | static void sys_wait_results(Bool block, ThreadId tid, enum RequestType reqtype) |
| 1086 | { |
| 1087 | Bool found_reply = (reqtype == PX_BAD); |
| 1088 | struct PX_Reply res; |
| 1089 | |
| 1090 | vg_assert(VG_(gettid)() == VG_(main_pid)); |
| 1091 | |
| 1092 | do { |
| 1093 | if (reqtype != PX_BAD || block) { |
| 1094 | /* wait for activity on recv_res */ |
| 1095 | struct vki_pollfd pollfd; |
| 1096 | Int ret; |
| 1097 | |
| 1098 | /* result_recv could be -1 if we're asking for results before any |
| 1099 | syscalls are issued - which is OK - but we can't block on |
| 1100 | it. */ |
| 1101 | vg_assert(result_recv != -1); |
| 1102 | |
| 1103 | pollfd.fd = result_recv; |
| 1104 | pollfd.events = VKI_POLLIN; |
| 1105 | |
| 1106 | do { |
| 1107 | ret = VG_(poll)(&pollfd, 1, -1); |
| 1108 | } while(ret == -VKI_EINTR); |
| 1109 | |
| 1110 | if (ret <= 0) { |
| 1111 | VG_(printf)("sys_wait_results: poll failed fd=%d errno=%d\n", |
| 1112 | pollfd.fd, ret); |
| 1113 | return; |
| 1114 | } |
| 1115 | } |
| 1116 | |
| 1117 | while(recv_reply(&res)) { |
| 1118 | ThreadState *tst; |
| 1119 | |
| 1120 | if (reqtype != PX_BAD && |
| 1121 | res.req == reqtype && |
| 1122 | (tid == 0 || tid == res.tid)) |
| 1123 | found_reply = True; |
| 1124 | |
| 1125 | tst = VG_(get_ThreadState)(res.tid); |
| 1126 | |
| 1127 | switch(res.req) { |
| 1128 | case PX_SetSigmask: |
| 1129 | /* Don't need to do anything */ |
| 1130 | if (VG_(clo_trace_signals) || VG_(clo_trace_syscalls)) |
| 1131 | VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_SetSigmask for TID %d", |
| 1132 | res.tid); |
| 1133 | break; |
| 1134 | |
| 1135 | case PX_RunSyscall: |
| 1136 | if (VG_(clo_trace_syscalls)) |
| 1137 | VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_RunSyscall for TID %d: syscall %d result %d", |
| 1138 | res.tid, tst->syscallno, tst->m_eax); |
| 1139 | |
| 1140 | if (tst->status != VgTs_WaitSys) |
| 1141 | VG_(printf)("tid %d in status %d\n", |
| 1142 | tst->tid, tst->status); |
| 1143 | |
| 1144 | vg_assert(res.syscallno == tst->syscallno); |
| 1145 | vg_assert(tst->status == VgTs_WaitSys); |
| 1146 | |
| 1147 | VG_(post_syscall)(res.tid); |
| 1148 | break; |
| 1149 | |
| 1150 | case PX_Signal: |
| 1151 | if (VG_(clo_trace_signals) || VG_(clo_trace_syscalls)) |
| 1152 | VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_Signal for TID %d, signal %d", |
| 1153 | res.tid, res.siginfo.si_signo); |
| 1154 | |
| 1155 | vg_assert(res.siginfo.si_signo != 0); |
| 1156 | if (VG_(threads)[res.tid].proxy && |
| 1157 | !VG_(threads)[res.tid].proxy->terminating) |
| 1158 | VG_(deliver_signal)(res.tid, &res.siginfo, True); |
| 1159 | break; |
| 1160 | |
| 1161 | case PX_Ping: |
| 1162 | /* Got a ping response. Great. */ |
| 1163 | break; |
| 1164 | |
| 1165 | case PX_Exiting: |
| 1166 | /* They're exiting. Hooray! */ |
| 1167 | break; |
| 1168 | |
| 1169 | case PX_BAD: |
| 1170 | case PX_SigACK: |
| 1171 | default: |
| 1172 | VG_(core_panic)("sys_wait_results: got PX_BAD/PX_SigACK!\n"); |
| 1173 | } |
| 1174 | } |
| 1175 | } while(!found_reply); |
| 1176 | } |
| 1177 | |
| 1178 | /* External version */ |
| 1179 | void VG_(proxy_results)(void) |
| 1180 | { |
| 1181 | sys_wait_results(False, 0, PX_BAD); |
| 1182 | } |
| 1183 | |
fitzhardinge | a09a1b5 | 2003-11-07 23:09:48 +0000 | [diff] [blame^] | 1184 | void VG_(proxy_wait_sys)(ThreadId tid) |
| 1185 | { |
| 1186 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 1187 | |
| 1188 | vg_assert(tst->status == VgTs_WaitSys); |
| 1189 | |
| 1190 | sys_wait_results(True, tid, PX_RunSyscall); |
| 1191 | } |
| 1192 | |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1193 | /* Tell proxy about it's thread's updated signal mask */ |
| 1194 | void VG_(proxy_setsigmask)(ThreadId tid) |
| 1195 | { |
| 1196 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 1197 | ProxyLWP *proxy = tst->proxy; |
| 1198 | Int res; |
| 1199 | struct PX_Request req; |
| 1200 | |
| 1201 | vg_assert(proxy != NULL); |
| 1202 | vg_assert(proxy->tid == tid); |
| 1203 | |
| 1204 | req.request = PX_SetSigmask; |
| 1205 | req.sigmask = tst->sig_mask; |
| 1206 | |
| 1207 | tst->eff_sig_mask = tst->sig_mask; |
| 1208 | |
| 1209 | /* clear the results pipe before we try to write to a proxy to |
| 1210 | prevent a deadlock */ |
| 1211 | VG_(proxy_results)(); |
| 1212 | res = VG_(write)(proxy->topx, &req, sizeof(req)); |
| 1213 | vg_assert(res == sizeof(req)); |
| 1214 | |
| 1215 | /* wait for proxy to ack mask update; mask changes don't really |
| 1216 | have to be synchronous, but they do have to be fully ordered |
| 1217 | with respect to each other (ie, if thread A then thread B |
| 1218 | updates their signal masks, A's update must be done before B's |
| 1219 | is). */ |
| 1220 | sys_wait_results(True, tid, PX_SetSigmask); |
| 1221 | } |
| 1222 | |
| 1223 | void VG_(proxy_sigack)(ThreadId tid, const vki_ksigset_t *mask) |
| 1224 | { |
| 1225 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 1226 | ProxyLWP *proxy = tst->proxy; |
| 1227 | Int res; |
| 1228 | struct PX_Request req; |
| 1229 | |
| 1230 | vg_assert(proxy != NULL); |
| 1231 | vg_assert(proxy->tid == tid); |
| 1232 | |
| 1233 | if (proxy_wait(proxy, False, NULL)) |
| 1234 | return; |
| 1235 | |
| 1236 | req.request = PX_SigACK; |
| 1237 | req.sigmask = *mask; |
| 1238 | |
| 1239 | tst->eff_sig_mask = *mask; |
| 1240 | |
| 1241 | #if 0 |
| 1242 | /* Clear the results pipe before we try to write to a proxy to |
| 1243 | prevent a deadlock. |
| 1244 | |
| 1245 | XXX this breaks things. This is called as a result of a |
| 1246 | PX_Signal message, and is called from within sys_wait_results. |
| 1247 | If that sys_wait_results was blocking of a particular message, |
| 1248 | it will never wake up if we eat those messages by calling |
| 1249 | sys_wait_results ourselves from here. Maybe make |
| 1250 | sys_wait_results non-recursive? |
| 1251 | */ |
| 1252 | VG_(proxy_results)(); |
| 1253 | #endif |
| 1254 | |
| 1255 | res = VG_(write)(proxy->topx, &req, sizeof(req)); |
| 1256 | vg_assert(res == sizeof(req)); |
| 1257 | } |
| 1258 | |
| 1259 | /* Wait for a signal to be delivered to any thread */ |
| 1260 | void VG_(proxy_waitsig)(void) |
| 1261 | { |
| 1262 | if (VG_(do_signal_routing)) |
| 1263 | VG_(route_signals)(); |
| 1264 | else |
| 1265 | sys_wait_results(True, VG_INVALID_THREADID /* any */, PX_Signal); |
| 1266 | } |
| 1267 | |
| 1268 | /* Issue a syscall to the thread's ProxyLWP */ |
| 1269 | Int VG_(sys_issue)(int tid) |
| 1270 | { |
| 1271 | ThreadState *tst = VG_(get_ThreadState)(tid); |
| 1272 | ProxyLWP *proxy = tst->proxy; |
| 1273 | Int res; |
| 1274 | struct PX_Request req; |
| 1275 | |
| 1276 | vg_assert(proxy != NULL); |
| 1277 | vg_assert(proxy->tid == tid); |
fitzhardinge | a09a1b5 | 2003-11-07 23:09:48 +0000 | [diff] [blame^] | 1278 | vg_assert(tst->status == VgTs_WaitSys); |
| 1279 | |
| 1280 | /* Clear the results pipe before we try to write to a proxy to |
| 1281 | prevent a deadlock (the proxyLWP may be trying to write a result |
| 1282 | back to the scheduler LWP, and therefore not be reading its |
| 1283 | input pipe, which would then block the write below). |
| 1284 | |
| 1285 | XXX I think this can't happen - the pipe has 4k of buffering, |
| 1286 | and can therefore fit many messages, but we can only have one |
| 1287 | outstanding - the write below will not block forever. Fetching |
| 1288 | results here can cause all kinds of confusion, because we |
| 1289 | definitely don't want the complexity of trying to deliver a |
| 1290 | signal right now. |
| 1291 | */ |
| 1292 | if (0) |
| 1293 | VG_(proxy_results)(); |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1294 | |
| 1295 | req.request = PX_RunSyscall; |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 1296 | |
| 1297 | tst->syscallno = tst->m_eax; |
| 1298 | tst->m_eax = -VKI_ERESTARTSYS; |
| 1299 | |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1300 | res = VG_(write)(proxy->topx, &req, sizeof(req)); |
| 1301 | |
| 1302 | if (res != sizeof(req)) { |
fitzhardinge | a09a1b5 | 2003-11-07 23:09:48 +0000 | [diff] [blame^] | 1303 | VG_(message)(Vg_DebugMsg, "sys_issue: write to tid %d failed %d (not %d)\n", |
| 1304 | tid, res, sizeof(req)); |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1305 | } |
| 1306 | return 0; |
| 1307 | } |
| 1308 | |
| 1309 | /* Relatively expensive sanity tests for the syscall machinery */ |
| 1310 | void VG_(proxy_sanity)(void) |
| 1311 | { |
| 1312 | Int tid; |
| 1313 | Bool sane = True; |
| 1314 | static const struct PX_Request req = { .request = PX_Ping }; |
| 1315 | |
| 1316 | for(tid = 0; tid < VG_N_THREADS; tid++) { |
| 1317 | ThreadState *tst = &VG_(threads)[tid]; |
| 1318 | ProxyLWP *px; |
| 1319 | Int status; |
| 1320 | Int ret; |
| 1321 | |
| 1322 | if (tst->status == VgTs_Empty) |
| 1323 | continue; |
| 1324 | |
| 1325 | if (tst->proxy == NULL) { |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 1326 | VG_(message)(Vg_DebugMsg, "TID %d: NULL proxy"); |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1327 | sane = False; |
| 1328 | continue; |
| 1329 | } |
| 1330 | |
| 1331 | px = tst->proxy; |
| 1332 | |
| 1333 | if (px->tid != tid) { |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 1334 | VG_(message)(Vg_DebugMsg, |
| 1335 | "TID %d: proxy LWP %d doesn't have right tid (%d)\n", |
| 1336 | tid, px->lwp, px->tid); |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1337 | sane = False; |
| 1338 | } |
| 1339 | |
| 1340 | if (proxy_wait(px, False, &status)) { |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 1341 | VG_(message)(Vg_DebugMsg, |
| 1342 | "TID %d: proxy LWP %d exited with status %d\n", |
| 1343 | tid, px->lwp, status); |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1344 | sane = False; |
| 1345 | continue; |
| 1346 | } |
| 1347 | |
| 1348 | /* No point checking if proxy is busy in a syscall, but all |
| 1349 | other times it should respond promptly. */ |
| 1350 | if (tst->status != VgTs_WaitSys) { |
| 1351 | ret = VG_(write)(px->topx, &req, sizeof(req)); |
| 1352 | if (ret != sizeof(req)) { |
fitzhardinge | 89f9a32 | 2003-10-30 07:25:59 +0000 | [diff] [blame] | 1353 | VG_(message)(Vg_DebugMsg, |
| 1354 | "TID %d: failed to write PX_Ping to lwp %d: %d\n", |
| 1355 | tid, px->lwp, ret); |
jsgf | 855d93d | 2003-10-13 22:26:55 +0000 | [diff] [blame] | 1356 | sane = False; |
| 1357 | } |
| 1358 | sys_wait_results(True, tid, PX_Ping); |
| 1359 | /* Can't make an assertion here, fortunately; this will |
| 1360 | either come back or it won't. */ |
| 1361 | } |
| 1362 | } |
| 1363 | |
| 1364 | vg_assert(sane); |
| 1365 | } |
| 1366 | |
| 1367 | /*--------------------------------------------------------------------*/ |
| 1368 | /*--- Proxy LWP machinery. vg_proxylwp.c ---*/ |
| 1369 | /*--------------------------------------------------------------------*/ |