blob: 9fc470ad3ef3d19d44db02e7a4be4bcfe5ff1f4a [file] [log] [blame]
jsgf855d93d2003-10-13 22:26:55 +00001
2/*--------------------------------------------------------------------*/
3/*--- Proxy LWP machinery. vg_proxylwp.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, an extensible x86 protected-mode
8 emulator for monitoring program execution on x86-Unixes.
9
10 Copyright (C) 2000-2003 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31
32#include "vg_include.h"
33
34/* We need our own copy of VG_(do_syscall)() to handle a special
35 race-condition. If we've got signals unblocked, and we take a
36 signal in the gap either just before or after the syscall, we may
37 end up not running the syscall at all, or running it more than
38 once.
39
40 The solution is to make the signal handler derive the proxy's
41 precise state by looking to see which eip it is executing at
42 exception time.
43
44 Ranges:
45
46 sys_before ... sys_restarted:
47 Setting up register arguments and running state. If
48 interrupted, then the syscall should be considered to return
49 ERESTARTSYS.
50
51 sys_restarted:
52 If interrupted and eip==sys_restarted, then either the syscall
53 was about to start running, or it has run, was interrupted and
54 the kernel wants to restart it. eax still contains the
55 syscall number. If interrupted, then the syscall return value
56 should be ERESTARTSYS.
57
58 sys_after:
59 If interrupted and eip==sys_after, the syscall either just
60 finished, or it was interrupted and the kernel doesn't want to
61 restart it. Either way, eax equals the correct return value
62 (either the actual return value, or EINTR).
63
64 sys_after ... sys_done:
65 System call is complete, but the state hasn't been updated,
66 nor has the result been written back. eax contains the return
67 value.
68*/
69
70enum PXState
71{
72 PXS_BAD = -1,
73 PXS_WaitReq, /* waiting for a request */
74 PXS_RunSyscall, /* running a syscall */
75 PXS_IntReply, /* request interrupted - need to send reply */
76 PXS_SysDone, /* small window between syscall
77 complete and results written out */
78 PXS_SigACK, /* waiting for a signal ACK */
79};
80
81enum RequestType {
82 PX_BAD = -1,
83 PX_SetSigmask, /* sched->proxy; proxy->sched */
84 PX_RunSyscall, /* sched->proxy; proxy->sched */
85 PX_Signal, /* proxy->sched */
86 PX_SigACK, /* sched->proxy */
87 PX_Ping, /* use for sanity-checking */
88 PX_Exiting, /* reply sent by proxy for exit sync */
89};
90
91extern void do_thread_syscall(Int sys,
92 Int arg1, Int arg2, Int arg3, Int arg4, Int arg5, Int arg6,
93 Int *result, enum PXState *statep, enum PXState poststate);
94
95asm(
96".text\n"
97" .type do_thread_syscall,@function\n"
98
99"do_thread_syscall:\n"
100" push %esi\n"
101" push %edi\n"
102" push %ebx\n"
103" push %ebp\n"
104".sys_before:\n"
105" movl 16+ 4(%esp),%eax\n" /* syscall */
106" movl 16+ 8(%esp),%ebx\n" /* arg1 */
107" movl 16+12(%esp),%ecx\n" /* arg2 */
108" movl 16+16(%esp),%edx\n" /* arg3 */
109" movl 16+20(%esp),%esi\n" /* arg4 */
110" movl 16+24(%esp),%edi\n" /* arg5 */
111" movl 16+28(%esp),%ebp\n" /* arg6 */
112".sys_restarted:\n"
113" int $0x80\n"
114".sys_after:\n"
115" movl 16+32(%esp),%ebx\n" /* ebx = Int *res */
116" movl %eax, (%ebx)\n" /* write the syscall retval */
117
118" movl 16+36(%esp),%ebx\n" /* ebx = enum PXState * */
119" testl %ebx, %ebx\n"
120" jz 1f\n"
121
122" movl 16+40(%esp),%ecx\n" /* write the post state (must be after retval write) */
123" movl %ecx,(%ebx)\n"
124
125".sys_done:\n" /* OK, all clear from here */
126"1: popl %ebp\n"
127" popl %ebx\n"
128" popl %edi\n"
129" popl %esi\n"
130" ret\n"
131" .size do_thread_syscall,.-do_thread_syscall\n"
132".previous\n"
133
134".section .rodata\n"
135"sys_before: .long .sys_before\n"
136"sys_restarted: .long .sys_restarted\n"
137"sys_after: .long .sys_after\n"
138"sys_done: .long .sys_done\n"
139".previous\n"
140);
141extern const Addr sys_before, sys_restarted, sys_after, sys_done;
142
143/* Run a syscall for a particular thread, getting the arguments from
144 the thread's registers, and returning the result in the thread's
145 eax.
146
147 Assumes that the only thread state which matters is the contents of
148 %eax-%ebp and the return value in %eax.
149 */
150static void thread_syscall(Int syscallno, ThreadState *tst,
151 enum PXState *state , enum PXState poststate)
152{
153 do_thread_syscall(syscallno, /* syscall no. */
154 tst->m_ebx, /* arg 1 */
155 tst->m_ecx, /* arg 2 */
156 tst->m_edx, /* arg 3 */
157 tst->m_esi, /* arg 4 */
158 tst->m_edi, /* arg 5 */
159 tst->m_ebp, /* arg 6 */
160 &tst->m_eax, /* result */
161 state, /* state to update */
162 poststate); /* state when syscall has finished */
163}
164
165#define VG_PROXY_MAGIC 0xef83b192
166struct ProxyLWP {
167 UInt magic; /* magic number */
168 ThreadId tid; /* scheduler's tid */
169 ThreadState *tst; /* thread state */
170 Int lwp; /* kernel's ID for LWP */
171 Int exitcode; /* ProxyLWP exit code */
172
173 Int topx, frommain; /* pipe fds */
174 vki_ksiginfo_t siginfo; /* received signal */
175 Bool terminating; /* in the middle of exiting */
176
177 /* State of proxy */
178 enum PXState state;
179
180 jmp_buf jumpbuf;
181};
182
183static void sys_wait_results(Bool block, ThreadId tid, enum RequestType reqtype);
184
185struct PX_Request {
186 enum RequestType request;
187
188 vki_ksigset_t sigmask; /* sigmask applied by SigACK */
189};
190
191/* All replies are multiplexed over a single pipe, so we need to disinguish them */
192struct PX_Reply {
193 ThreadId tid; /* tid this reply pertains to */
194 enum RequestType req; /* what this relates to */
195
196 union {
197 Int syscallno; /* system call completed */
198 vki_ksiginfo_t siginfo; /* signal */
199 };
200};
201
202/* results pipe */
203static Int result_send = -1, result_recv = -1;
204
205/* reentrant printf for proxy use */
206#if 0
207static void px_printf(const Char *fmt, ...)
208{
209 Char buf[1024];
210 Char *cp = buf;
211 va_list vargs;
212
213 void addbuf(Char c) { *cp++ = c; }
214
215 cp += VG_(sprintf)(buf, "[%d, %d]: ", VG_(getpid)(), VG_(gettid)());
216
217 va_start(vargs,fmt);
218 VG_(vprintf)(addbuf, fmt, vargs);
219 va_end(vargs);
220 VG_(send_bytes_to_logging_sink)(buf, cp-buf);
221}
222#else
223static void px_printf(const Char *fmt, ...)
224{
225}
226#endif
227
228static const Char *pxs_name(enum PXState s)
229{
230 switch(s) {
231#define S(x) case PXS_##x: return #x
232 S(BAD);
233 S(WaitReq);
234 S(RunSyscall);
235 S(IntReply);
236 S(SysDone);
237 S(SigACK);
238#undef S
239 default: return "???";
240 }
241}
242
243static const Char *px_name(enum RequestType r)
244{
245 switch(r) {
246#define S(x) case PX_##x: return #x
247 S(BAD);
248 S(SetSigmask);
249 S(RunSyscall);
250 S(Signal);
251 S(SigACK);
252 S(Ping);
253 S(Exiting);
254#undef S
255 default: return "???";
256 }
257}
258
259#define PROXYLWP_OFFSET (VKI_BYTES_PER_PAGE - sizeof(ProxyLWP))
fitzhardinge89f9a322003-10-30 07:25:59 +0000260#define ROUNDDN(p) ((UChar *)((Addr)(p) & ~(VKI_BYTES_PER_PAGE-1)))
jsgf855d93d2003-10-13 22:26:55 +0000261
262/*
263 Allocate a page for the ProxyLWP and its stack.
264
265 This uses the trick for finding the LWP's private data by knowing
266 that the stack is a single page, and that the ProxyLWP structure is
267 at the end of it. Therefore, given any %esp in the stack, you can
268 find the ProxyLWP structure (see LWP_TSD()).
269 */
270static ProxyLWP *LWP_alloc(void)
271{
272 UChar *p = VG_(get_memory_from_mmap)(VKI_BYTES_PER_PAGE, "alloc_LWP");
273 ProxyLWP *ret;
274 vg_assert(p == ROUNDDN(p)); /* px must be page aligned */
275
276 ret = (ProxyLWP *)(p + PROXYLWP_OFFSET);
277
278 ret->magic = VG_PROXY_MAGIC;
279
280 return ret;
281}
282
283/* Free a thread structure */
284static void LWP_free(ProxyLWP *px)
285{
286 UChar *p = ROUNDDN(px);
287
288 vg_assert(px->magic == VG_PROXY_MAGIC);
289 px->magic = 0;
290 vg_assert((p + PROXYLWP_OFFSET) == (UChar *)px);
291
292 VG_(munmap)(p, VKI_BYTES_PER_PAGE);
293}
294
295/* Get a particular ProxyLWP's LWP structure from its esp (relies on
296 stacks being page aligned, with the ProxyLWP structure at the
297 end). */
298static inline ProxyLWP *LWP_TSD(void *esp)
299{
300 UChar *p = ROUNDDN(esp);
301 ProxyLWP *ret;
302
303 ret = (ProxyLWP *)(p + PROXYLWP_OFFSET);
304 vg_assert(ret->magic == VG_PROXY_MAGIC);
305
306 return ret;
307}
308
309/* Get top of stack */
310static inline void *LWP_stack(ProxyLWP *px)
311{
312 vg_assert(px->magic == VG_PROXY_MAGIC);
313
314 return (void *)(((void **)px) - 1);
315}
316
317static void proxy_fork_cleanup(ThreadId tid);
318
319/* Init the proxy mechanism */
320void VG_(proxy_init)(void)
321{
322 Int p[2];
323 Int res;
324
325 /* this will ignore any duplicate registrations */
326 VG_(atfork)(NULL, NULL, proxy_fork_cleanup);
327
328 vg_assert(result_recv == -1);
329 vg_assert(result_send == -1);
330
331 res = VG_(pipe)(p);
332 vg_assert(res == 0);
333
334 result_recv = VG_(safe_fd)(p[0]);
335 result_send = VG_(safe_fd)(p[1]);
336
337 /* Make reading end non-blocking */
338 VG_(fcntl)(result_recv, VKI_F_SETFL, VKI_O_NONBLOCK);
339}
340
341/* After fork, the forking thread is in a strange state of having a
342 couple of pipes still linked to the parent. */
343static void proxy_fork_cleanup(ThreadId tid)
344{
345 ThreadId t;
346
347 VG_(close)(result_recv);
348 VG_(close)(result_send);
349
350 result_recv = result_send = -1;
351
352 VG_(proxy_init)();
353
354 for(t = 1; t < VG_N_THREADS; t++) {
355 ThreadState *tst = VG_(get_ThreadState)(t);
356 ProxyLWP *proxy = tst->proxy;
357
358 if (tst->status == VgTs_Empty) {
359 vg_assert(proxy == NULL);
360 continue;
361 }
362
363 vg_assert(proxy != NULL);
364
365 /* We need to do a manual teardown, since the proxy this structure
366 describes is our parent's */
367 VG_(close)(proxy->topx);
368 VG_(close)(proxy->frommain);
369
370 LWP_free(proxy);
371 tst->proxy = NULL;
372 }
373
374 /* Create a proxy for calling thread
375
376 We need to temporarily set the state back to Runnable for
377 proxy_create's benefit.
378 */
379
380 {
381 ThreadState *tst = VG_(get_ThreadState)(tid);
382
383 vg_assert(tst->proxy == NULL);
384 vg_assert(tst->status == VgTs_WaitSys);
385 tst->status = VgTs_Runnable;
386 VG_(proxy_create)(tid);
387 VG_(proxy_setsigmask)(tid);
388 tst->status = VgTs_WaitSys;
389 }
390}
391
392Int VG_(proxy_resfd)(void)
393{
394 return result_recv;
395}
396
397void VG_(proxy_shutdown)(void)
398{
399 VG_(close)(result_recv);
400 VG_(close)(result_send);
401
402 result_recv = result_send = -1;
403}
404
405/* This is called from within a proxy LWP signal handler. This
406 function records the siginfo, then longjmps back into the proxy
407 main state machine loop. The presumption is that the signal
408 handler is being run with all signals blocked; the longjmp is
409 there to make sure they stay masked until the application thread is
410 ready to run its signal handler. */
411void VG_(proxy_handlesig)(const vki_ksiginfo_t *siginfo,
412 const struct vki_sigcontext *sigcontext)
413{
414 UChar local;
415 ProxyLWP *px = LWP_TSD(&local);
416 Addr eip = sigcontext->eip;
417 Int eax = sigcontext->eax;
418
419 vg_assert(siginfo->si_signo != 0);
420 if (px->siginfo.si_signo != 0) {
421 px_printf("proxy_handlesig: tid %d already has %d pending, new sig %d\n",
422 px->lwp, px->siginfo.si_signo, siginfo->si_signo);
423 }
424 vg_assert(px->siginfo.si_signo == 0);
425
426 px->siginfo = *siginfo;
427
428 px_printf("proxy got signal %d\n", siginfo->si_signo);
429
430 /* First look to see if the EIP is within our interesting ranges
431 near a syscall to work out what should happen. */
432 if (sys_before <= eip && eip <= sys_restarted) {
433 /* We are before the syscall actually ran, or it did run and
434 wants to be restarted. Either way, set the return code to
435 indicate a restart. This is not really any different from
436 anywhere else, except that we can make some assertions about
437 the proxy and machine state here. */
438 vg_assert(px->state == PXS_RunSyscall);
439 vg_assert(px->tst->m_eax == -VKI_ERESTARTSYS);
440 } else if (sys_after <= eip && eip <= sys_done) {
441 /* We're after the syscall. Either it was interrupted by the
442 signal, or the syscall completed normally. In either case
443 eax contains the correct syscall return value, and the new
444 state is effectively PXS_SysDone. */
445 vg_assert(px->state == PXS_RunSyscall || px->state == PXS_SysDone);
446 px->state = PXS_SysDone;
447 px->tst->m_eax = eax;
448 }
449 px_printf(" signalled in state %s\n", pxs_name(px->state));
450
451 __builtin_longjmp(px->jumpbuf, 1);
452}
453
454static Bool send_reply(const struct PX_Reply *reply)
455{
456 const Int size = sizeof(struct PX_Reply);
457
458 return VG_(write)(result_send, reply, size) == size;
459}
460
461static Bool recv_reply(struct PX_Reply *reply)
462{
463 const Int size = sizeof(struct PX_Reply);
464
465 return VG_(read)(result_recv, reply, size) == size;
466}
467
468/* Proxy LWP thread. This is run as a separate cloned() thread, so it
469 MUST NOT touch any core Valgrind data structures directly: the only
470 exception is while we're running a PX_RunSyscall command, we may
471 look at and update the thread's register state. It interacts with
472 the rest of Valgrind by receiving messages through its pipe and
473 sending results through result_send. */
474static Int proxylwp(void *v)
475{
476 ProxyLWP *px = (ProxyLWP *)v;
477 Int frommain = px->frommain;
478 ThreadState *tst = px->tst;
479 vki_ksigset_t allsig;
480 vki_ksigset_t appsigmask; /* signal mask the client has asked for */
fitzhardinge89f9a322003-10-30 07:25:59 +0000481 Int ret = 1000;
jsgf855d93d2003-10-13 22:26:55 +0000482 static const vki_kstack_t ss = { .ss_flags = VKI_SS_DISABLE };
483
484 /* Block everything until we're told otherwise (LWP should have
485 been started with all signals blocked anyway) */
486 VG_(ksigfillset)(&allsig);
487 VG_(ksigdelset)(&allsig, VKI_SIGVGKILL); /* but allow SIGVGKILL to interrupt */
488
489 VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
490
491 appsigmask = allsig;
492
493 /* no signal stack for us */
494 VG_(ksigaltstack)(&ss, NULL);
495
496 for(;;) {
497 struct PX_Reply reply, sigreply;
498 struct PX_Request req;
499 Int res;
500
501 if (__builtin_setjmp(px->jumpbuf)) {
502 /* We were hit by a signal. This is the signal-driven part
503 of the state machine.
504
505 This code prepares a reply which is suitable for whatever
506 was interrupted by this signal. If "no reply" is the
507 right response, then it sets reply.req = PX_BAD.
508
509 NOTE: the ST:N notation represents the correspondence
510 between states where we can be interrupted in the main
511 state machine loop, and where those states are handled
512 here.
513 */
514
515 if (px->siginfo.si_signo != VKI_SIGVGKILL) {
516 /* First, send the signal info */
517 sigreply.tid = px->tid;
518 sigreply.req = PX_Signal;
519 sigreply.siginfo = px->siginfo;
520
521 if (!send_reply(&sigreply)) {
522 ret = 44; /* incomplete or failed write */
523 goto out;
524 }
525 } else {
526 /* We got VKI_SIGVGKILL, which means we just skip all the
527 below and get back to the state machine - probably to
528 exit. */
529 px->state = PXS_WaitReq;
530 px->siginfo.si_signo = 0;
531 goto state_machine;
532 }
533
534 px->siginfo.si_signo = 0;
535
536 /* Now work out what our new state is, and what to do on the way. */
537 switch(px->state) {
538 case PXS_WaitReq:
539 /* We were interrupted while waiting for a request. See
540 if we had actually read the request, and do the
541 appropriate thing if so. */
542 reply.req = req.request;
543 reply.tid = px->tid;
544
545 switch(req.request) {
546 case PX_BAD:
547 /* ST:1 */
548 /* nothing read; just wait for SigACK */
549 px->state = PXS_SigACK;
550 break;
551
552 case PX_RunSyscall:
553 /* ST:2 */
554 /* They asked for a syscall, but we were signalled
555 before even getting started. Claim the syscall was
556 interrupted.
557
558 XXX how to distunguish between restartable and
559 non-restartable syscalls? Does it matter?
560 */
fitzhardingea09a1b52003-11-07 23:09:48 +0000561 reply.syscallno = tst->syscallno;
jsgf855d93d2003-10-13 22:26:55 +0000562
563 tst->m_eax = -VKI_ERESTARTSYS;
564 px->state = PXS_IntReply;
565 break;
566
567 case PX_SetSigmask:
568 /* ST:2 */
569 /* ST:3 */
570 /* They asked for a signal mask update. Ignore it,
571 because they're going to give us a new mask when
572 they send a SigACK, and we want all signals blocked
573 in the meantime. However, we set the state to
574 PXS_IntReply to make sure the reply from the
575 PX_SetSigmask is sent. */
576 vg_assert(reply.req == PX_SetSigmask);
577 px->state = PXS_IntReply;
578 break;
579
580 case PX_Ping:
581 /* ST:2 */
582 /* We read a Ping request, so we need to send a Ping
583 reply. */
584 vg_assert(reply.req == PX_Ping);
585 px->state = PXS_IntReply;
586 break;
587
588 case PX_Exiting:
589 case PX_Signal:
590 ret = 10; /* completely bogus - noone should send us a signal */
591 goto out;
592
593 case PX_SigACK:
594 ret = 11; /* Also bogus. No way we should get a
595 signal while waiting for a
596 SigACK. */
597 goto out;
598 }
599 break;
600
601 case PXS_RunSyscall:
602 /* ST:4 */
603 /* We were actually running the syscall when interrupted.
604 reply should already be set up, including return in eax. */
605 vg_assert(reply.req == PX_RunSyscall);
606 vg_assert(reply.syscallno == tst->syscallno);
607 vg_assert(tst->status == VgTs_WaitSys);
608 px->state = PXS_IntReply;
609 break;
610
611 case PXS_SysDone:
612 /* The syscall is done; we just need to send the results
613 back. */
614 vg_assert(reply.req == PX_RunSyscall);
615 vg_assert(reply.syscallno == tst->syscallno);
616 px->state = PXS_IntReply;
617 break;
618
619 case PXS_IntReply:
620 case PXS_SigACK:
621 ret = 13; /* Bogus. Same as ret=11 above. */
622 goto out;
623
624 case PXS_BAD:
625 ret = 33;
626 goto out;
627 }
628
629 /* End of signal handling states. If the scheduler LWP is
630 currently running application code, tell it to drop back
631 into the scheduler loop ASAP to handle the signal. */
632 if (VG_(clo_lowlat_signals))
633 VG_(need_resched)(px->tid);
634 }
635
636 state_machine:
637 px_printf("proxylwp main: state %s\n", pxs_name(px->state));
638
639 switch(px->state) {
640 case PXS_WaitReq:
641 case PXS_SigACK:
642 req.request = PX_BAD; /* init request so we know if the read() read anything */
643
644 if (px->state == PXS_WaitReq) {
645 /* allow signals when waiting for a normal request */
646 VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL);
647 }
648
649 /* ST:1 */
650
651 res = VG_(read)(frommain, &req, sizeof(req));
652
653 /* ST:2 */
654
655 /* process message with signals blocked */
656 VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
657
fitzhardinge89f9a322003-10-30 07:25:59 +0000658 if (res == 0) {
659 ret = 0;
jsgf855d93d2003-10-13 22:26:55 +0000660 goto out; /* EOF - we're quitting */
fitzhardinge89f9a322003-10-30 07:25:59 +0000661 }
jsgf855d93d2003-10-13 22:26:55 +0000662
663 if (res < 0) {
664 px_printf("read(frommain) failed %d\n", res);
665 ret = 1; /* error */
666 goto out;
667 }
668 if (res != sizeof(req)) {
669 ret = 2; /* error - partial read */
670 goto out;
671 }
672
673 px_printf("read req: %s\n", px_name(req.request));
674
675 reply.tid = px->tid;
676 reply.req = req.request;
677
678 switch(req.request) {
679 case PX_Ping:
680 /* do nothing; just send reply */
681 break;
682
683 case PX_SigACK:
684 /* The thread ACKed the signal, and sent the mask they
685 want while running the handler. */
686 vg_assert(px->state == PXS_SigACK);
687 appsigmask = req.sigmask;
688 VG_(ksigdelset)(&appsigmask, VKI_SIGVGKILL); /* but allow SIGVGKILL to interrupt */
689 px->state = PXS_WaitReq;
690 reply.req = PX_BAD; /* don't reply */
691 break;
692
693 case PX_SetSigmask:
694 appsigmask = req.sigmask;
695 VG_(ksigdelset)(&appsigmask, VKI_SIGVGKILL); /* but allow SIGVGKILL to interrupt */
696
697 vg_assert(px->state == PXS_WaitReq ||
698 px->state == PXS_SigACK);
699
700 if (px->state != PXS_SigACK) {
701 /* If we're not waiting for a PX_SigACK, set the apps mask
702 to get at least one of the pending signals, which will
703 be delivered synchronously, so that some progress is
704 made before the we tell the client the mask has been
705 set.. Then reset the mask back to all blocked. */
706 VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL);
707 /* ST:3 */
708 VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
709 } else {
710 /* Waiting for SigACK. We want all signals blocked,
711 and when the SigACK arrives, it will give us the
712 thread's signal mask for its handler. */
713 }
714 break;
715
716 case PX_RunSyscall:
717 /* Run a syscall for our thread; results will be poked
718 back into tst */
fitzhardinge89f9a322003-10-30 07:25:59 +0000719 reply.syscallno = tst->syscallno;
jsgf855d93d2003-10-13 22:26:55 +0000720
721 vg_assert(px->state == PXS_WaitReq ||
722 px->state == PXS_SigACK);
723 if (px->state == PXS_SigACK) {
724 /* If we're in the middle of signal handling, make the
725 client's syscalls fail with ERESTARTSYS until its signal
726 handler runs - there should be at most one, if it was
727 on the way to us as we got the signal.
728 */
729 px_printf("RunSyscall in SigACK: rejecting syscall %d with ERESTARTSYS\n",
730 reply.syscallno);
731 tst->m_eax = -VKI_ERESTARTSYS;
732 } else {
fitzhardinge89f9a322003-10-30 07:25:59 +0000733 Int syscallno = tst->syscallno;
jsgf855d93d2003-10-13 22:26:55 +0000734
735 px->state = PXS_RunSyscall;
736 /* If we're interrupted before we get to the syscall
737 itself, we want the syscall restarted. */
738 tst->m_eax = -VKI_ERESTARTSYS;
739
740 /* set our process group ID to match parent */
741 if (VG_(getpgrp)() != VG_(main_pgrp))
742 VG_(setpgid)(0, VG_(main_pgrp));
743
744 VG_(ksigprocmask)(VKI_SIG_SETMASK, &appsigmask, NULL);
745
746 /* ST:4 */
747
748 thread_syscall(syscallno, tst, &px->state, PXS_SysDone);
749
750 /* ST:5 */
751
752 VG_(ksigprocmask)(VKI_SIG_SETMASK, &allsig, NULL);
753 /* whew - made it here without being interrupted */
754 px->state = PXS_WaitReq;
755
756 if (VG_(clo_lowlat_syscalls))
757 VG_(need_resched)(px->tid);
758 }
759 break;
760
761 case PX_BAD:
762 case PX_Signal:
763 case PX_Exiting:
764 /* we never expect to see these */
765 ret = 3;
766 goto out;
767 }
768 break;
769
770 case PXS_IntReply:
771 /* This state only exists so that we fall out and write the
772 interrupted syscall reply before moving to SigACK */
773 px->state = PXS_SigACK;
774 break;
775
776 case PXS_RunSyscall:
777 case PXS_SysDone:
778 case PXS_BAD:
779 default:
780 /* Never expect to see these states here */
781 ret = 5;
782 goto out;
783 }
784
785 /* If we have something sensible to say, say it */
786 if (reply.req != PX_BAD) {
787 px_printf("sending reply %s\n", px_name(reply.req));
788
789 if (!send_reply(&reply)) {
790 ret = 4; /* error - didn't write full message */
791 goto out;
792 }
793 reply.req = PX_BAD;
794 }
795 }
796
797 out:
798 px_printf("proxy exiting with ret=%d\n", ret);
799
800 {
801 struct PX_Reply reply;
802 reply.req = PX_Exiting;
803 reply.tid = px->tid;
804 px_printf("exit: sending %s\n", px_name(reply.req));
805
806 send_reply(&reply);
807 }
808
809 px->frommain = -1;
810 VG_(close)(frommain);
811
812 px->exitcode = ret;
813 return ret;
814}
815
816/* Send a signal to a proxy LWP */
817void VG_(proxy_sendsig)(ThreadId tid, Int sig)
818{
819 ThreadState *tst = VG_(get_ThreadState)(tid);
820 ProxyLWP *proxy = tst->proxy;
821 Int lwp;
822
823 if (proxy == NULL)
824 return;
825
826 lwp = proxy->lwp; /* proxy->lwp may change async */
827
828 if (lwp != 0) {
829 /* SIGKILL and SIGSTOP always apply to all threads (need to
830 route for route_signals case?) */
831 if (sig == VKI_SIGKILL || sig == VKI_SIGSTOP)
832 VG_(kkill)(VG_(main_pid), sig);
833 else
834 VG_(ktkill)(lwp, sig);
835 }
836
837 /* If a thread is sending a signal to itself and the signal isn't
838 blocked (ie, it will be delivered), wait until the signal
839 message gets sent back, thus making the signal synchronous. */
840 if (sig != 0 &&
841 !VG_(is_sig_ign)(sig) &&
842 tid == VG_(get_current_or_recent_tid)() &&
843 !VG_(ksigismember)(&tst->eff_sig_mask, sig)) {
844 /* If the LWP is actually blocked in a sigtimedwait, then it
845 will eat the signal rather than make it pending and deliver
846 it by the normal mechanism. In this case, just wait for the
847 syscall to dinish. */
848 if (tst->status == VgTs_WaitSys && tst->syscallno == __NR_rt_sigtimedwait)
849 sys_wait_results(True, tid, PX_RunSyscall);
850 else
851 sys_wait_results(True, tid, PX_Signal);
852 }
853}
854
855/* If a thread is blocked in a syscall, this function will interrupt
856 the proxy LWP's syscall by hitting it with a VKI_SIGVGINT signal.
857 This signal will not be reported to the client application. */
858void VG_(proxy_abort_syscall)(ThreadId tid)
859{
860 ThreadState *tst = VG_(get_ThreadState)(tid);
861 ProxyLWP *proxy = tst->proxy;
862 Int lwp;
863
864 if (tst->status != VgTs_WaitSys)
865 return;
866
867 vg_assert(proxy != NULL);
868
869 lwp = proxy->lwp;
870
871 if (lwp != 0)
872 VG_(ktkill)(lwp, VKI_SIGVGINT);
873
874 sys_wait_results(True, tid, PX_RunSyscall);
875
876 vg_assert(tst->status == VgTs_Runnable);
877}
878
879static Int do_futex(void *addr, Int op, Int val, struct vki_timespec *time, void *addr2)
880{
881 return VG_(do_syscall)(__NR_futex, addr, op, val, time, addr2);
882}
883
884#define VKI_FUTEX_WAIT 0
885#define VKI_FUTEX_WAKE 1
886#define VKI_FUTEX_FD 2
887#define VKI_FUTEX_REQUEUE 3
888
889static Int have_futex = -1; /* -1 -> unknown */
890
891/*
892 Create a proxy LWP using whatever varient of clone makes the most
893 sense for the current kernel. We use futexes for termination
894 notification wherever possible. Returns 0 on success, or a -ve
895 error code on failure.
896*/
897static Int proxy_clone(ProxyLWP *proxy)
898{
899 Int ret;
900
fitzhardinge89f9a322003-10-30 07:25:59 +0000901 if (VG_(clo_assume_24))
902 have_futex = 0;
903
jsgf855d93d2003-10-13 22:26:55 +0000904 if (have_futex == -1)
905 have_futex = do_futex(NULL, VKI_FUTEX_WAKE, 0, NULL, NULL) != -VKI_ENOSYS;
906
907 if (have_futex) {
908 ret = VG_(clone)(proxylwp,
909 LWP_stack(proxy),
910 VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM |
911 VKI_CLONE_SIGHAND | VKI_CLONE_THREAD |
912 VKI_CLONE_PARENT_SETTID |
913 VKI_CLONE_CHILD_CLEARTID | VKI_CLONE_DETACHED,
914 proxy, &proxy->lwp, &proxy->lwp);
915 } else {
916 VG_(do_signal_routing) = True; /* XXX True, it seems kernels
917 which have futex also have
918 sensible signal handling, but
919 it would be nice to test it
920 directly. */
921
922 ret = VG_(clone)(proxylwp,
923 LWP_stack(proxy),
924 VKI_CLONE_FS | VKI_CLONE_FILES | VKI_CLONE_VM |
925 VKI_CLONE_SIGHAND | VKI_CLONE_THREAD,
926 proxy, NULL, NULL);
927 proxy->lwp = ret;
928 }
929
930 return (ret < 0) ? ret : 0;
931}
932
933/* Wait on a proxy LWP. Returns True if the LWP has exited. */
934static Bool proxy_wait(ProxyLWP *proxy, Bool block, Int *status)
935{
936 Bool ret = False;
937
938 if (have_futex == -1)
939 return False;
940
941 if (have_futex) {
942 if (block) {
943 Int lwp = proxy->lwp;
944
945 while(proxy->lwp != 0)
946 do_futex(&proxy->lwp, VKI_FUTEX_WAIT, lwp, NULL, NULL);
947
948 if (status)
949 *status = proxy->exitcode;
950 ret = True;
951 } else {
952 if (proxy->lwp == 0) {
953 *status = proxy->exitcode;
954 ret = True;
955 }
956 }
957 } else {
958 Int flags = VKI__WCLONE;
959 Int res;
960
961 if (!block)
962 flags |= VKI_WNOHANG;
963 res = VG_(waitpid)(proxy->lwp, status, flags);
964 if (res == proxy->lwp) {
965 vg_assert(*status == proxy->exitcode);
966 ret = True;
967 }
968 }
969
970 return ret;
971}
972
973/* Create a proxy for a new thread */
974void VG_(proxy_create)(ThreadId tid)
975{
976 ThreadState *tst = VG_(get_ThreadState)(tid);
977 ProxyLWP *proxy;
978 Int p[2];
979 vki_ksigset_t mask;
980 Int ret;
981
982 vg_assert(tst->proxy == NULL);
983 vg_assert(tst->status == VgTs_Runnable);
984
985 proxy = LWP_alloc();
986
987 VG_(pipe)(p);
988
989 proxy->tid = tid;
990 proxy->tst = tst;
991 proxy->siginfo.si_signo = 0;
992 proxy->frommain = VG_(safe_fd)(p[0]);
993 proxy->topx = VG_(safe_fd)(p[1]);
994 proxy->state = PXS_WaitReq; /* start by waiting for requests */
995 proxy->terminating = False;
996
997 /* Make sure proxy LWP starts with all signals blocked (not even
998 SEGV, BUS, ILL or FPE) */
999 VG_(block_all_host_signals)(&mask);
1000
1001 ret = proxy_clone(proxy);
1002 if (ret < 0) {
1003 VG_(printf)("Error %d trying to create proxy LWP for tid %d\n",
1004 ret, tid);
1005 VG_(core_panic)("Can't start proxy LWPs");
1006 }
1007
1008 VG_(restore_all_host_signals)(&mask);
1009
1010 tst->proxy = proxy;
1011}
1012
1013/* Clean up proxy after thread dies */
1014void VG_(proxy_delete)(ThreadId tid, Bool force)
1015{
1016 ThreadState *tst = VG_(get_ThreadState)(tid);
1017 ProxyLWP *proxy = tst->proxy;
1018 Bool res;
1019 Int status;
1020 Int lwp;
1021
1022 if (proxy == NULL)
1023 return; /* nothing to do */
1024
1025 lwp = proxy->lwp;
1026
1027#if 0
1028 MAYBE_PRINTF("VG_(proxy_delete)(tid=%d (lwp=%d), force=%s; tst->status=%d\n",
1029 tid, lwp, force ? "true" : "false", tst->status);
1030#endif
1031 vg_assert(proxy->tid == tid);
1032 if (proxy->terminating)
1033 return; /* already going away */
1034
1035 proxy->terminating = True;
1036
1037 VG_(close)(proxy->topx);
1038 proxy->topx = -1;
1039
1040 /* proxy thread will close proxy->frommain itself */
1041
1042 if (force && lwp != 0) {
1043 /* wouldn't need to force it if it were already dead */
1044 vg_assert(tst->status != VgTs_Empty);
1045 //VG_(printf)("kill %d with SIGVGKILL\n", lwp);
1046 VG_(ktkill)(lwp, VKI_SIGVGKILL);
1047 } else
1048 vg_assert(tst->status == VgTs_Empty); /* just killed */
1049
1050 status = -1;
1051 res = False;
1052
1053 /* We need to wait for the PX_Exiting message before doing the
1054 proxy_wait, because if we don't read the results pipe, the proxy
1055 may be blocked writing to it, causing a deadlock with us as we
1056 wait for it to exit. */
1057 sys_wait_results(True, tid, PX_Exiting);
1058 res = proxy_wait(proxy, True, &status);
1059
1060 if (!res || status != 0)
1061 VG_(printf)("proxy %d for tid %d exited status %d, res %d\n",
1062 lwp, tid, status, res);
1063
1064 LWP_free(proxy);
1065 tst->proxy = NULL;
1066}
1067
1068/* Read back the results of any completed syscalls.
1069
1070 At this point, there should be only one pending syscall per thread.
1071 Those threads should be in VgTs_WaitSys state. Each syscall return
1072 may have multiple signals associated with it, so we read those and
1073 set up some pending signals in our signal simulation. When we
1074 finally get the message saying the syscall is complete, we mark the
1075 thread as runnable and return.
1076
1077 If block is set to True, then this call will block until anything
1078 happens (ie, some progress was made).
1079
1080 If reqtype != PX_BAD, then this will block until some reply for
1081 that request type appears (assuming you're expecting that kind of
1082 reply, otherwise it will block forever). If tid != 0, then it will
1083 wait for a reply for that particular tid.
1084 */
1085static void sys_wait_results(Bool block, ThreadId tid, enum RequestType reqtype)
1086{
1087 Bool found_reply = (reqtype == PX_BAD);
1088 struct PX_Reply res;
1089
1090 vg_assert(VG_(gettid)() == VG_(main_pid));
1091
1092 do {
1093 if (reqtype != PX_BAD || block) {
1094 /* wait for activity on recv_res */
1095 struct vki_pollfd pollfd;
1096 Int ret;
1097
1098 /* result_recv could be -1 if we're asking for results before any
1099 syscalls are issued - which is OK - but we can't block on
1100 it. */
1101 vg_assert(result_recv != -1);
1102
1103 pollfd.fd = result_recv;
1104 pollfd.events = VKI_POLLIN;
1105
1106 do {
1107 ret = VG_(poll)(&pollfd, 1, -1);
1108 } while(ret == -VKI_EINTR);
1109
1110 if (ret <= 0) {
1111 VG_(printf)("sys_wait_results: poll failed fd=%d errno=%d\n",
1112 pollfd.fd, ret);
1113 return;
1114 }
1115 }
1116
1117 while(recv_reply(&res)) {
1118 ThreadState *tst;
1119
1120 if (reqtype != PX_BAD &&
1121 res.req == reqtype &&
1122 (tid == 0 || tid == res.tid))
1123 found_reply = True;
1124
1125 tst = VG_(get_ThreadState)(res.tid);
1126
1127 switch(res.req) {
1128 case PX_SetSigmask:
1129 /* Don't need to do anything */
1130 if (VG_(clo_trace_signals) || VG_(clo_trace_syscalls))
1131 VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_SetSigmask for TID %d",
1132 res.tid);
1133 break;
1134
1135 case PX_RunSyscall:
1136 if (VG_(clo_trace_syscalls))
1137 VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_RunSyscall for TID %d: syscall %d result %d",
1138 res.tid, tst->syscallno, tst->m_eax);
1139
1140 if (tst->status != VgTs_WaitSys)
1141 VG_(printf)("tid %d in status %d\n",
1142 tst->tid, tst->status);
1143
1144 vg_assert(res.syscallno == tst->syscallno);
1145 vg_assert(tst->status == VgTs_WaitSys);
1146
1147 VG_(post_syscall)(res.tid);
1148 break;
1149
1150 case PX_Signal:
1151 if (VG_(clo_trace_signals) || VG_(clo_trace_syscalls))
1152 VG_(message)(Vg_DebugMsg, "sys_wait_results: got PX_Signal for TID %d, signal %d",
1153 res.tid, res.siginfo.si_signo);
1154
1155 vg_assert(res.siginfo.si_signo != 0);
1156 if (VG_(threads)[res.tid].proxy &&
1157 !VG_(threads)[res.tid].proxy->terminating)
1158 VG_(deliver_signal)(res.tid, &res.siginfo, True);
1159 break;
1160
1161 case PX_Ping:
1162 /* Got a ping response. Great. */
1163 break;
1164
1165 case PX_Exiting:
1166 /* They're exiting. Hooray! */
1167 break;
1168
1169 case PX_BAD:
1170 case PX_SigACK:
1171 default:
1172 VG_(core_panic)("sys_wait_results: got PX_BAD/PX_SigACK!\n");
1173 }
1174 }
1175 } while(!found_reply);
1176}
1177
1178/* External version */
1179void VG_(proxy_results)(void)
1180{
1181 sys_wait_results(False, 0, PX_BAD);
1182}
1183
fitzhardingea09a1b52003-11-07 23:09:48 +00001184void VG_(proxy_wait_sys)(ThreadId tid)
1185{
1186 ThreadState *tst = VG_(get_ThreadState)(tid);
1187
1188 vg_assert(tst->status == VgTs_WaitSys);
1189
1190 sys_wait_results(True, tid, PX_RunSyscall);
1191}
1192
jsgf855d93d2003-10-13 22:26:55 +00001193/* Tell proxy about it's thread's updated signal mask */
1194void VG_(proxy_setsigmask)(ThreadId tid)
1195{
1196 ThreadState *tst = VG_(get_ThreadState)(tid);
1197 ProxyLWP *proxy = tst->proxy;
1198 Int res;
1199 struct PX_Request req;
1200
1201 vg_assert(proxy != NULL);
1202 vg_assert(proxy->tid == tid);
1203
1204 req.request = PX_SetSigmask;
1205 req.sigmask = tst->sig_mask;
1206
1207 tst->eff_sig_mask = tst->sig_mask;
1208
1209 /* clear the results pipe before we try to write to a proxy to
1210 prevent a deadlock */
1211 VG_(proxy_results)();
1212 res = VG_(write)(proxy->topx, &req, sizeof(req));
1213 vg_assert(res == sizeof(req));
1214
1215 /* wait for proxy to ack mask update; mask changes don't really
1216 have to be synchronous, but they do have to be fully ordered
1217 with respect to each other (ie, if thread A then thread B
1218 updates their signal masks, A's update must be done before B's
1219 is). */
1220 sys_wait_results(True, tid, PX_SetSigmask);
1221}
1222
1223void VG_(proxy_sigack)(ThreadId tid, const vki_ksigset_t *mask)
1224{
1225 ThreadState *tst = VG_(get_ThreadState)(tid);
1226 ProxyLWP *proxy = tst->proxy;
1227 Int res;
1228 struct PX_Request req;
1229
1230 vg_assert(proxy != NULL);
1231 vg_assert(proxy->tid == tid);
1232
1233 if (proxy_wait(proxy, False, NULL))
1234 return;
1235
1236 req.request = PX_SigACK;
1237 req.sigmask = *mask;
1238
1239 tst->eff_sig_mask = *mask;
1240
1241#if 0
1242 /* Clear the results pipe before we try to write to a proxy to
1243 prevent a deadlock.
1244
1245 XXX this breaks things. This is called as a result of a
1246 PX_Signal message, and is called from within sys_wait_results.
1247 If that sys_wait_results was blocking of a particular message,
1248 it will never wake up if we eat those messages by calling
1249 sys_wait_results ourselves from here. Maybe make
1250 sys_wait_results non-recursive?
1251 */
1252 VG_(proxy_results)();
1253#endif
1254
1255 res = VG_(write)(proxy->topx, &req, sizeof(req));
1256 vg_assert(res == sizeof(req));
1257}
1258
1259/* Wait for a signal to be delivered to any thread */
1260void VG_(proxy_waitsig)(void)
1261{
1262 if (VG_(do_signal_routing))
1263 VG_(route_signals)();
1264 else
1265 sys_wait_results(True, VG_INVALID_THREADID /* any */, PX_Signal);
1266}
1267
1268/* Issue a syscall to the thread's ProxyLWP */
1269Int VG_(sys_issue)(int tid)
1270{
1271 ThreadState *tst = VG_(get_ThreadState)(tid);
1272 ProxyLWP *proxy = tst->proxy;
1273 Int res;
1274 struct PX_Request req;
1275
1276 vg_assert(proxy != NULL);
1277 vg_assert(proxy->tid == tid);
fitzhardingea09a1b52003-11-07 23:09:48 +00001278 vg_assert(tst->status == VgTs_WaitSys);
1279
1280 /* Clear the results pipe before we try to write to a proxy to
1281 prevent a deadlock (the proxyLWP may be trying to write a result
1282 back to the scheduler LWP, and therefore not be reading its
1283 input pipe, which would then block the write below).
1284
1285 XXX I think this can't happen - the pipe has 4k of buffering,
1286 and can therefore fit many messages, but we can only have one
1287 outstanding - the write below will not block forever. Fetching
1288 results here can cause all kinds of confusion, because we
1289 definitely don't want the complexity of trying to deliver a
1290 signal right now.
1291 */
1292 if (0)
1293 VG_(proxy_results)();
jsgf855d93d2003-10-13 22:26:55 +00001294
1295 req.request = PX_RunSyscall;
fitzhardinge89f9a322003-10-30 07:25:59 +00001296
1297 tst->syscallno = tst->m_eax;
1298 tst->m_eax = -VKI_ERESTARTSYS;
1299
jsgf855d93d2003-10-13 22:26:55 +00001300 res = VG_(write)(proxy->topx, &req, sizeof(req));
1301
1302 if (res != sizeof(req)) {
fitzhardingea09a1b52003-11-07 23:09:48 +00001303 VG_(message)(Vg_DebugMsg, "sys_issue: write to tid %d failed %d (not %d)\n",
1304 tid, res, sizeof(req));
jsgf855d93d2003-10-13 22:26:55 +00001305 }
1306 return 0;
1307}
1308
1309/* Relatively expensive sanity tests for the syscall machinery */
1310void VG_(proxy_sanity)(void)
1311{
1312 Int tid;
1313 Bool sane = True;
1314 static const struct PX_Request req = { .request = PX_Ping };
1315
1316 for(tid = 0; tid < VG_N_THREADS; tid++) {
1317 ThreadState *tst = &VG_(threads)[tid];
1318 ProxyLWP *px;
1319 Int status;
1320 Int ret;
1321
1322 if (tst->status == VgTs_Empty)
1323 continue;
1324
1325 if (tst->proxy == NULL) {
fitzhardinge89f9a322003-10-30 07:25:59 +00001326 VG_(message)(Vg_DebugMsg, "TID %d: NULL proxy");
jsgf855d93d2003-10-13 22:26:55 +00001327 sane = False;
1328 continue;
1329 }
1330
1331 px = tst->proxy;
1332
1333 if (px->tid != tid) {
fitzhardinge89f9a322003-10-30 07:25:59 +00001334 VG_(message)(Vg_DebugMsg,
1335 "TID %d: proxy LWP %d doesn't have right tid (%d)\n",
1336 tid, px->lwp, px->tid);
jsgf855d93d2003-10-13 22:26:55 +00001337 sane = False;
1338 }
1339
1340 if (proxy_wait(px, False, &status)) {
fitzhardinge89f9a322003-10-30 07:25:59 +00001341 VG_(message)(Vg_DebugMsg,
1342 "TID %d: proxy LWP %d exited with status %d\n",
1343 tid, px->lwp, status);
jsgf855d93d2003-10-13 22:26:55 +00001344 sane = False;
1345 continue;
1346 }
1347
1348 /* No point checking if proxy is busy in a syscall, but all
1349 other times it should respond promptly. */
1350 if (tst->status != VgTs_WaitSys) {
1351 ret = VG_(write)(px->topx, &req, sizeof(req));
1352 if (ret != sizeof(req)) {
fitzhardinge89f9a322003-10-30 07:25:59 +00001353 VG_(message)(Vg_DebugMsg,
1354 "TID %d: failed to write PX_Ping to lwp %d: %d\n",
1355 tid, px->lwp, ret);
jsgf855d93d2003-10-13 22:26:55 +00001356 sane = False;
1357 }
1358 sys_wait_results(True, tid, PX_Ping);
1359 /* Can't make an assertion here, fortunately; this will
1360 either come back or it won't. */
1361 }
1362 }
1363
1364 vg_assert(sane);
1365}
1366
1367/*--------------------------------------------------------------------*/
1368/*--- Proxy LWP machinery. vg_proxylwp.c ---*/
1369/*--------------------------------------------------------------------*/