Cleanup of the dispatch mechanism.  Now syscall returns and the final
request to shutdown valgrind are done with the client request
mechanism too.  This is much better than having to check all
call/return addresses.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@57 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index b1b1b32..9fb6a23 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -54,14 +54,20 @@
 /* Magic values that %ebp might be set to when returning to the
    dispatcher.  The only other legitimate value is to point to the
    start of VG_(baseBlock).  These also are return values from
-   VG_(run_innerloop) to the scheduler. */
-#define VG_TRC_EBP_JMP_SPECIAL    17
-#define VG_TRC_EBP_JMP_SYSCALL    19
-#define VG_TRC_EBP_JMP_CLIENTREQ  23
+   VG_(run_innerloop) to the scheduler.
 
-#define VG_TRC_INNER_COUNTERZERO  29  /* ebp can't have this; sched return only */
-#define VG_TRC_INNER_FASTMISS     31  /* ditto.  Means fast-cache miss. */
-#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* ditto; got sigsegv/sigbus */
+   EBP means %ebp can legitimately have this value when a basic block
+   returns to the dispatch loop.  TRC means that this value is a valid
+   thread return code, which the dispatch loop may return to the
+   scheduler.  */
+#define VG_TRC_EBP_JMP_STKADJ     17 /* EBP only; handled by dispatcher */
+#define VG_TRC_EBP_JMP_SYSCALL    19 /* EBP and TRC */
+#define VG_TRC_EBP_JMP_CLIENTREQ  23 /* EBP and TRC */
+
+#define VG_TRC_INNER_COUNTERZERO  29  /* TRC only; means bb ctr == 0 */
+#define VG_TRC_INNER_FASTMISS     31  /* TRC only; means fast-cache miss. */
+#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* TRC only; got sigsegv/sigbus */
+
 
 /* Debugging hack for assembly code ... sigh. */
 #if 0
@@ -95,6 +101,10 @@
 #define VG_SMC_CACHE_SHIFT 6
 
 
+/* Assembly code stubs make these requests ... */
+#define VG_USERREQ__SIGNAL_RETURNS          0x4001
+#define VG_USERREQ__SHUTDOWN_VALGRIND       0x4002 
+
 #endif /* ndef __VG_INCLUDE_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_dispatch.S b/coregrind/vg_dispatch.S
index 0f4783b..a1b5569 100644
--- a/coregrind/vg_dispatch.S
+++ b/coregrind/vg_dispatch.S
@@ -80,28 +80,17 @@
 	# Start off dispatching paranoically, since we no longer have
 	# any indication whether or not this might be a special call/ret
 	# transfer.
-	jmp	dispatch_callret_maybe
+	jmp	dispatch_stkadj
 	
 	
 dispatch_main:
 	# Jump here to do a new dispatch.
 	# %eax holds destination (original) address.
 	# %ebp indicates further details of the control transfer
-	# requested to the address in %eax.  The idea is that we 
-	# want to check all jump targets to see if they are either
-	# VG_(signalreturn_bogusRA) or VG_(shutdown), both of which
-	# require special treatment.  However, testing all branch
-	# targets is expensive, and anyway in most cases JITter knows
-	# that a jump cannot be to either of these two.  We therefore
-	# adopt the following trick.
+	# requested to the address in %eax.
 	#
-	# If ebp == & VG_(baseBlock), which is what it started out as,
-	# this is a jump for which the JITter knows no check need be
-	# made.
+	# If ebp == & VG_(baseBlock), just jump next to %eax.
 	# 
-	# If ebp == VG_EBP_JMP_CALLRET, we had better make
-	# the check. 
-	#
 	# If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
 	# continuing at eax.
 	#
@@ -109,12 +98,6 @@
 	# continuing at eax.
 	#
 	# If %ebp has any other value, we panic.
-	#
-	# What the JITter assumes is that VG_(signalreturn_bogusRA) can
-	# only be arrived at from an x86 ret insn, and dually that
-	# VG_(shutdown) can only be arrived at from an x86 call insn.
-	# The net effect is that all call and return targets are checked
-	# but straightforward jumps are not.
 	
 	cmpl	$VG_(baseBlock), %ebp
 	jnz	dispatch_exceptional
@@ -174,8 +157,8 @@
 */
 dispatch_exceptional:
 	# this is jumped to only, not fallen-through from above
-	cmpl	$VG_TRC_EBP_JMP_SPECIAL, %ebp
-	jz	dispatch_callret_maybe
+	cmpl	$VG_TRC_EBP_JMP_STKADJ, %ebp
+	jz	dispatch_stkadj
 	cmpl	$VG_TRC_EBP_JMP_SYSCALL, %ebp
 	jz	dispatch_syscall
 	cmpl	$VG_TRC_EBP_JMP_CLIENTREQ, %ebp
@@ -202,7 +185,7 @@
 	movl	$VG_TRC_EBP_JMP_CLIENTREQ, %eax
 	jmp	run_innerloop_exit
 
-dispatch_callret_maybe:
+dispatch_stkadj:
 	# save %eax in %EIP
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
@@ -213,21 +196,10 @@
 	call	VG_(delete_client_stack_blocks_following_ESP_change)
 	popl	%eax
 	movl	$VG_(baseBlock), %ebp
-	
-	# is this a call/return which we need to mess with
-	cmpl	$VG_(signalreturn_bogusRA), %eax
-	jz	dispatch_callret
-	cmpl	$VG_(shutdown), %eax
-	jz	dispatch_callret
-	
+		
 	# ok, its not interesting.  Handle the normal way.
 	jmp	dispatch_boring
 
-dispatch_callret:
-	# %EIP is up to date here since dispatch_callret_maybe dominates
-	movl	$VG_TRC_EBP_JMP_SPECIAL, %eax
-	jmp	run_innerloop_exit
-
 
 .data
 panic_msg_ebp:
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 0514cf9..e5c2c66 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1076,7 +1076,7 @@
          break;
       case JmpCall:
       case JmpRet: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SPECIAL, R_EBP );
+         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_STKADJ, R_EBP );
          break;
       case JmpSyscall: 
          emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 72de134..4d1f8c2 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -32,7 +32,46 @@
 
 #include "vg_constants.h"
 
+/* ------------------ SIMULATED CPU HELPERS ------------------ */
+/* A couple of stubs for returns which we want to catch: signal
+   returns and pthread returns.  In the latter case, the thread's
+   return value is in %EAX, so we pass this as the first argument
+   to the request.  In both cases we use the user request mechanism.
+   You need to to read the definition of VALGRIND_MAGIC_SEQUENCE
+   in valgrind.h to make sense of this.
+*/
+.global VG_(signalreturn_bogusRA)
+VG_(signalreturn_bogusRA):
+	subl	$20, %esp	# allocate arg block
+	movl	%esp, %edx	# %edx == &_zzq_args[0]
+	movl	$VG_USERREQ__SIGNAL_RETURNS, 0(%edx)	# request
+	movl	$0, 4(%edx)	# arg1
+	movl	$0, 8(%edx)	# arg2
+	movl	$0, 12(%edx)	# arg3
+	movl	$0, 16(%edx)	# arg4
+	movl	%edx, %eax
+	# and now the magic sequence itself:
+	roll $29, %eax
+	roll $3, %eax
+	rorl $27, %eax
+	rorl $5, %eax
+	roll $13, %eax
+	roll $19, %eax
+	# should never get here
+	pushl	$signalreturn_bogusRA_panic_msg
+	call	VG_(panic)
+	
+.data
+signalreturn_bogusRA_panic_msg:
+.ascii	"vg_signalreturn_bogusRA: VG_USERREQ__SIGNAL_RETURNS was missed"
+.byte	0
+.text	
+	
 
+	
+/* ------------------ REAL CPU HELPERS ------------------ */
+/* The rest of this lot run on the real CPU. */
+	
 /* Various helper routines, for instructions which are just too
    darn tedious for the JITter to output code in-line:
 	
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 56ee85d..0833cae 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -409,6 +409,13 @@
 #define VG_USERREQ__PTHREAD_MUTEX_DESTROY   0x3008
 #define VG_USERREQ__PTHREAD_CANCEL          0x3009
 
+/* 
+In vg_constants.h:
+#define VG_USERREQ__SIGNAL_RETURNS          0x4001
+#define VG_USERREQ__SHUTDOWN_VALGRIND       0x4002 
+*/
+
+
 /* ---------------------------------------------------------------------
    Constants pertaining to the simulated CPU state, VG_(baseBlock),
    which need to go here to avoid ugly circularities.
@@ -579,9 +586,6 @@
 extern void VG_(do__NR_sigaction)     ( ThreadId tid );
 extern void VG_(do__NR_sigprocmask)   ( Int how, vki_ksigset_t* set );
 
-/* Bogus return address for signal handlers.  Is never executed. */
-extern void VG_(signalreturn_bogusRA) ( void );
-
 /* Modify the current thread's state once we have detected it is
    returning from a signal handler. */
 extern void VG_(signal_returns) ( ThreadId );
@@ -1516,6 +1520,9 @@
 extern void VG_(helper_value_check1_fail);
 extern void VG_(helper_value_check0_fail);
 
+/* NOT A FUNCTION; a bogus RETURN ADDRESS. */
+extern void VG_(signalreturn_bogusRA)( void );
+
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index f75fcce..e748db1 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -185,7 +185,6 @@
 Char* name_of_sched_event ( UInt event )
 {
    switch (event) {
-      case VG_TRC_EBP_JMP_SPECIAL:    return "JMP_SPECIAL";
       case VG_TRC_EBP_JMP_SYSCALL:    return "SYSCALL";
       case VG_TRC_EBP_JMP_CLIENTREQ:  return "CLIENTREQ";
       case VG_TRC_INNER_COUNTERZERO:  return "COUNTERZERO";
@@ -889,7 +888,7 @@
    ThreadId tid, tid_next;
    UInt     trc;
    UInt     dispatch_ctr_SAVED;
-   Int      done_this_time, n_in_fdwait;
+   Int      request_code, done_this_time, n_in_fdwait_or_sleep;
    Char     msg_buf[100];
    Addr     trans_addr;
 
@@ -941,12 +940,13 @@
 
          /* Try and find a thread (tid) to run. */
          tid_next = tid;
-         n_in_fdwait   = 0;
+         n_in_fdwait_or_sleep = 0;
          while (True) {
             tid_next++;
             if (tid_next >= VG_N_THREADS) tid_next = 0;
-            if (vg_threads[tid_next].status == VgTs_WaitFD)
-               n_in_fdwait ++;
+            if (vg_threads[tid_next].status == VgTs_WaitFD
+                || vg_threads[tid_next].status == VgTs_Sleeping)
+               n_in_fdwait_or_sleep ++;
             if (vg_threads[tid_next].status == VgTs_Runnable) 
                break; /* We can run this one. */
             if (tid_next == tid) 
@@ -962,9 +962,10 @@
 	 }
 
          /* We didn't find a runnable thread.  Now what? */
-         if (n_in_fdwait == 0) {
-            /* No runnable threads and non in fd-wait either.  Not
-               good. */
+         if (n_in_fdwait_or_sleep == 0) {
+            /* No runnable threads and no prospect of any appearing
+               even if we wait for an arbitrary length of time.  In
+               short, we have a deadlock. */
 	    pp_sched_status();
             return VgSrc_Deadlock;
          }
@@ -1098,22 +1099,6 @@
                1, whereupon the signal will be "delivered". */
 	    break;
 
-         case VG_TRC_EBP_JMP_SPECIAL: {
-            Addr next_eip = vg_threads[tid].m_eip;
-            if (next_eip == (Addr) & VG_(signalreturn_bogusRA)) {
-               /* vthread tid is returning from a signal handler;
-                  modify its stack/regs accordingly. */
-               VG_(signal_returns)(tid);
-            } 
-            else
-            if (next_eip == (Addr) & VG_(shutdown)) {
-               return VgSrc_Shutdown;
-            } else {
-               VG_(panic)("vg_schedule: VG_TRC_EBP_JMP_SPECIAL");
-            }
-            break;
-         }
-
          case VG_TRC_EBP_JMP_SYSCALL:
             /* Do a syscall for the vthread tid.  This could cause it
                to become non-runnable. */
@@ -1126,9 +1111,11 @@
                maybe_do_trivial_clientreq(), so we don't expect to see
                those here. 
             */
+            /* The thread's %EAX points at an arg block, the first
+               word of which is the request code. */
+            request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
             if (0) {
-               VG_(sprintf)(msg_buf, "request 0x%x", 
-                                     vg_threads[tid].m_eax);
+               VG_(sprintf)(msg_buf, "request 0x%x", request_code );
                print_sched_event(tid, msg_buf);
 	    }
 	    /* Do a non-trivial client request for thread tid.  tid's
@@ -1139,7 +1126,11 @@
                other blocked threads become runnable.  In general we
                can and often do mess with the state of arbitrary
                threads at this point. */
-            do_nontrivial_clientreq(tid);
+            if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
+               return VgSrc_Shutdown;
+            } else {
+               do_nontrivial_clientreq(tid);
+	    }
             break;
 
          default: 
@@ -1745,6 +1736,12 @@
          vg_threads[tid].m_edx = VG_(handle_client_request) ( arg );
 	 break;
 
+      case VG_USERREQ__SIGNAL_RETURNS:
+         /* vthread tid is returning from a signal handler;
+            modify its stack/regs accordingly. */
+         VG_(signal_returns)(tid);
+         break;
+
       default:
          VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
          VG_(panic)("handle_private_client_pthread_request: "
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index ea2826b..33ff722 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -34,7 +34,7 @@
 #include "vg_include.h"
 #include "vg_constants.h"
 #include "vg_unsafe.h"
-
+#include "valgrind.h"  /* for VALGRIND_MAGIC_SEQUENCE */
 
 /* ---------------------------------------------------------------------
    Signal state for this process.
@@ -122,17 +122,6 @@
 
 
 
-/* This is the bogus return address which the implementation
-   of RET in vg_cpu.c checks for.  If it spots a return to 
-   here, it calls vg_signal_returns().  We should never actually
-   enter this procedure, neither on the real nor simulated CPU.
-*/
-void VG_(signalreturn_bogusRA) ( void )
-{
-   VG_(panic) ( "vg_signalreturn_bogusRA -- something is badly wrong" );
-}
-
-
 /* Set up a stack frame (VgSigContext) for the client's signal
    handler.  This includes the signal number and a bogus return
    address.  */
@@ -210,9 +199,11 @@
 
    tst = VG_(get_thread_state)(tid);
 
-   /* esp is now pointing at the sigNo field in the signal frame. */
+   /* Correctly reestablish the frame base address. */
    esp   = tst->m_esp;
-   frame = (VgSigFrame*)(esp-4);
+   frame = (VgSigFrame*)
+              (esp -4 /* because the handler's RET pops the RA */
+                  +20 /* because signalreturn_bogusRA pushes 5 words */);
 
    vg_assert(frame->magicPI == 0x31415927);
    vg_assert(frame->magicE  == 0x27182818);
diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S
index 3fa965c..2ac2e6d 100644
--- a/coregrind/vg_startup.S
+++ b/coregrind/vg_startup.S
@@ -112,14 +112,29 @@
 	# Note that we will enter here on the synthetic CPU, not
 	# the real one!  So the interpreter must notice when this
 	# procedure is called, and use that as its cue to switch
-	# back to the real CPU.  That means the code placed here is
-	# utterly irrelevant, since it will never get run, but I
-	# place a RET here anyway, since it is the traditional way
-	# to return from a subroutine :-)
+	# back to the real CPU.  As usual we have a client request
+	# to do this.  To make sense of this you need to read the
+	# definition of VALGRIND_MAGIC_SEQUENCE in valgrind.h.
+	pushl	%eax
+	pushl	%edx
+	subl	$20, %esp	# allocate arg block
+	movl	%esp, %eax	# %eax == &_zzq_args[0]
+	movl	$VG_USERREQ__SHUTDOWN_VALGRIND, 0(%eax)	# request
+	# dont bother to fill in arg1 .. 4, not important
+	# and now the magic sequence itself:
+	roll $29, %eax
+	roll $3, %eax
+	rorl $27, %eax
+	rorl $5, %eax
+	roll $13, %eax
+	roll $19, %eax
+	# valgrind now exits.  the following insns are
+	# executed on the real CPU.
+	addl	$20, %esp
+	popl	%edx
+	popl	%eax
 	ret
-
-
-
+	
 .global	VG_(switch_to_real_CPU)
 VG_(switch_to_real_CPU):
 	# Once Valgrind has decided it needs to exit, either
@@ -216,6 +231,7 @@
 .Lend_of_swizzle:
 	.size	VG_(swizzle_esp_then_start_GDB), .Lend_of_swizzle-VG_(swizzle_esp_then_start_GDB)
 
+
 ##--------------------------------------------------------------------##
 ##--- end                                             vg_startup.S ---##
 ##--------------------------------------------------------------------##
diff --git a/vg_constants.h b/vg_constants.h
index b1b1b32..9fb6a23 100644
--- a/vg_constants.h
+++ b/vg_constants.h
@@ -54,14 +54,20 @@
 /* Magic values that %ebp might be set to when returning to the
    dispatcher.  The only other legitimate value is to point to the
    start of VG_(baseBlock).  These also are return values from
-   VG_(run_innerloop) to the scheduler. */
-#define VG_TRC_EBP_JMP_SPECIAL    17
-#define VG_TRC_EBP_JMP_SYSCALL    19
-#define VG_TRC_EBP_JMP_CLIENTREQ  23
+   VG_(run_innerloop) to the scheduler.
 
-#define VG_TRC_INNER_COUNTERZERO  29  /* ebp can't have this; sched return only */
-#define VG_TRC_INNER_FASTMISS     31  /* ditto.  Means fast-cache miss. */
-#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* ditto; got sigsegv/sigbus */
+   EBP means %ebp can legitimately have this value when a basic block
+   returns to the dispatch loop.  TRC means that this value is a valid
+   thread return code, which the dispatch loop may return to the
+   scheduler.  */
+#define VG_TRC_EBP_JMP_STKADJ     17 /* EBP only; handled by dispatcher */
+#define VG_TRC_EBP_JMP_SYSCALL    19 /* EBP and TRC */
+#define VG_TRC_EBP_JMP_CLIENTREQ  23 /* EBP and TRC */
+
+#define VG_TRC_INNER_COUNTERZERO  29  /* TRC only; means bb ctr == 0 */
+#define VG_TRC_INNER_FASTMISS     31  /* TRC only; means fast-cache miss. */
+#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* TRC only; got sigsegv/sigbus */
+
 
 /* Debugging hack for assembly code ... sigh. */
 #if 0
@@ -95,6 +101,10 @@
 #define VG_SMC_CACHE_SHIFT 6
 
 
+/* Assembly code stubs make these requests ... */
+#define VG_USERREQ__SIGNAL_RETURNS          0x4001
+#define VG_USERREQ__SHUTDOWN_VALGRIND       0x4002 
+
 #endif /* ndef __VG_INCLUDE_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/vg_dispatch.S b/vg_dispatch.S
index 0f4783b..a1b5569 100644
--- a/vg_dispatch.S
+++ b/vg_dispatch.S
@@ -80,28 +80,17 @@
 	# Start off dispatching paranoically, since we no longer have
 	# any indication whether or not this might be a special call/ret
 	# transfer.
-	jmp	dispatch_callret_maybe
+	jmp	dispatch_stkadj
 	
 	
 dispatch_main:
 	# Jump here to do a new dispatch.
 	# %eax holds destination (original) address.
 	# %ebp indicates further details of the control transfer
-	# requested to the address in %eax.  The idea is that we 
-	# want to check all jump targets to see if they are either
-	# VG_(signalreturn_bogusRA) or VG_(shutdown), both of which
-	# require special treatment.  However, testing all branch
-	# targets is expensive, and anyway in most cases JITter knows
-	# that a jump cannot be to either of these two.  We therefore
-	# adopt the following trick.
+	# requested to the address in %eax.
 	#
-	# If ebp == & VG_(baseBlock), which is what it started out as,
-	# this is a jump for which the JITter knows no check need be
-	# made.
+	# If ebp == & VG_(baseBlock), just jump next to %eax.
 	# 
-	# If ebp == VG_EBP_JMP_CALLRET, we had better make
-	# the check. 
-	#
 	# If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
 	# continuing at eax.
 	#
@@ -109,12 +98,6 @@
 	# continuing at eax.
 	#
 	# If %ebp has any other value, we panic.
-	#
-	# What the JITter assumes is that VG_(signalreturn_bogusRA) can
-	# only be arrived at from an x86 ret insn, and dually that
-	# VG_(shutdown) can only be arrived at from an x86 call insn.
-	# The net effect is that all call and return targets are checked
-	# but straightforward jumps are not.
 	
 	cmpl	$VG_(baseBlock), %ebp
 	jnz	dispatch_exceptional
@@ -174,8 +157,8 @@
 */
 dispatch_exceptional:
 	# this is jumped to only, not fallen-through from above
-	cmpl	$VG_TRC_EBP_JMP_SPECIAL, %ebp
-	jz	dispatch_callret_maybe
+	cmpl	$VG_TRC_EBP_JMP_STKADJ, %ebp
+	jz	dispatch_stkadj
 	cmpl	$VG_TRC_EBP_JMP_SYSCALL, %ebp
 	jz	dispatch_syscall
 	cmpl	$VG_TRC_EBP_JMP_CLIENTREQ, %ebp
@@ -202,7 +185,7 @@
 	movl	$VG_TRC_EBP_JMP_CLIENTREQ, %eax
 	jmp	run_innerloop_exit
 
-dispatch_callret_maybe:
+dispatch_stkadj:
 	# save %eax in %EIP
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
@@ -213,21 +196,10 @@
 	call	VG_(delete_client_stack_blocks_following_ESP_change)
 	popl	%eax
 	movl	$VG_(baseBlock), %ebp
-	
-	# is this a call/return which we need to mess with
-	cmpl	$VG_(signalreturn_bogusRA), %eax
-	jz	dispatch_callret
-	cmpl	$VG_(shutdown), %eax
-	jz	dispatch_callret
-	
+		
 	# ok, its not interesting.  Handle the normal way.
 	jmp	dispatch_boring
 
-dispatch_callret:
-	# %EIP is up to date here since dispatch_callret_maybe dominates
-	movl	$VG_TRC_EBP_JMP_SPECIAL, %eax
-	jmp	run_innerloop_exit
-
 
 .data
 panic_msg_ebp:
diff --git a/vg_from_ucode.c b/vg_from_ucode.c
index 0514cf9..e5c2c66 100644
--- a/vg_from_ucode.c
+++ b/vg_from_ucode.c
@@ -1076,7 +1076,7 @@
          break;
       case JmpCall:
       case JmpRet: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SPECIAL, R_EBP );
+         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_STKADJ, R_EBP );
          break;
       case JmpSyscall: 
          emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
diff --git a/vg_helpers.S b/vg_helpers.S
index 72de134..4d1f8c2 100644
--- a/vg_helpers.S
+++ b/vg_helpers.S
@@ -32,7 +32,46 @@
 
 #include "vg_constants.h"
 
+/* ------------------ SIMULATED CPU HELPERS ------------------ */
+/* A couple of stubs for returns which we want to catch: signal
+   returns and pthread returns.  In the latter case, the thread's
+   return value is in %EAX, so we pass this as the first argument
+   to the request.  In both cases we use the user request mechanism.
+   You need to to read the definition of VALGRIND_MAGIC_SEQUENCE
+   in valgrind.h to make sense of this.
+*/
+.global VG_(signalreturn_bogusRA)
+VG_(signalreturn_bogusRA):
+	subl	$20, %esp	# allocate arg block
+	movl	%esp, %edx	# %edx == &_zzq_args[0]
+	movl	$VG_USERREQ__SIGNAL_RETURNS, 0(%edx)	# request
+	movl	$0, 4(%edx)	# arg1
+	movl	$0, 8(%edx)	# arg2
+	movl	$0, 12(%edx)	# arg3
+	movl	$0, 16(%edx)	# arg4
+	movl	%edx, %eax
+	# and now the magic sequence itself:
+	roll $29, %eax
+	roll $3, %eax
+	rorl $27, %eax
+	rorl $5, %eax
+	roll $13, %eax
+	roll $19, %eax
+	# should never get here
+	pushl	$signalreturn_bogusRA_panic_msg
+	call	VG_(panic)
+	
+.data
+signalreturn_bogusRA_panic_msg:
+.ascii	"vg_signalreturn_bogusRA: VG_USERREQ__SIGNAL_RETURNS was missed"
+.byte	0
+.text	
+	
 
+	
+/* ------------------ REAL CPU HELPERS ------------------ */
+/* The rest of this lot run on the real CPU. */
+	
 /* Various helper routines, for instructions which are just too
    darn tedious for the JITter to output code in-line:
 	
diff --git a/vg_include.h b/vg_include.h
index 56ee85d..0833cae 100644
--- a/vg_include.h
+++ b/vg_include.h
@@ -409,6 +409,13 @@
 #define VG_USERREQ__PTHREAD_MUTEX_DESTROY   0x3008
 #define VG_USERREQ__PTHREAD_CANCEL          0x3009
 
+/* 
+In vg_constants.h:
+#define VG_USERREQ__SIGNAL_RETURNS          0x4001
+#define VG_USERREQ__SHUTDOWN_VALGRIND       0x4002 
+*/
+
+
 /* ---------------------------------------------------------------------
    Constants pertaining to the simulated CPU state, VG_(baseBlock),
    which need to go here to avoid ugly circularities.
@@ -579,9 +586,6 @@
 extern void VG_(do__NR_sigaction)     ( ThreadId tid );
 extern void VG_(do__NR_sigprocmask)   ( Int how, vki_ksigset_t* set );
 
-/* Bogus return address for signal handlers.  Is never executed. */
-extern void VG_(signalreturn_bogusRA) ( void );
-
 /* Modify the current thread's state once we have detected it is
    returning from a signal handler. */
 extern void VG_(signal_returns) ( ThreadId );
@@ -1516,6 +1520,9 @@
 extern void VG_(helper_value_check1_fail);
 extern void VG_(helper_value_check0_fail);
 
+/* NOT A FUNCTION; a bogus RETURN ADDRESS. */
+extern void VG_(signalreturn_bogusRA)( void );
+
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
diff --git a/vg_scheduler.c b/vg_scheduler.c
index f75fcce..e748db1 100644
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -185,7 +185,6 @@
 Char* name_of_sched_event ( UInt event )
 {
    switch (event) {
-      case VG_TRC_EBP_JMP_SPECIAL:    return "JMP_SPECIAL";
       case VG_TRC_EBP_JMP_SYSCALL:    return "SYSCALL";
       case VG_TRC_EBP_JMP_CLIENTREQ:  return "CLIENTREQ";
       case VG_TRC_INNER_COUNTERZERO:  return "COUNTERZERO";
@@ -889,7 +888,7 @@
    ThreadId tid, tid_next;
    UInt     trc;
    UInt     dispatch_ctr_SAVED;
-   Int      done_this_time, n_in_fdwait;
+   Int      request_code, done_this_time, n_in_fdwait_or_sleep;
    Char     msg_buf[100];
    Addr     trans_addr;
 
@@ -941,12 +940,13 @@
 
          /* Try and find a thread (tid) to run. */
          tid_next = tid;
-         n_in_fdwait   = 0;
+         n_in_fdwait_or_sleep = 0;
          while (True) {
             tid_next++;
             if (tid_next >= VG_N_THREADS) tid_next = 0;
-            if (vg_threads[tid_next].status == VgTs_WaitFD)
-               n_in_fdwait ++;
+            if (vg_threads[tid_next].status == VgTs_WaitFD
+                || vg_threads[tid_next].status == VgTs_Sleeping)
+               n_in_fdwait_or_sleep ++;
             if (vg_threads[tid_next].status == VgTs_Runnable) 
                break; /* We can run this one. */
             if (tid_next == tid) 
@@ -962,9 +962,10 @@
 	 }
 
          /* We didn't find a runnable thread.  Now what? */
-         if (n_in_fdwait == 0) {
-            /* No runnable threads and non in fd-wait either.  Not
-               good. */
+         if (n_in_fdwait_or_sleep == 0) {
+            /* No runnable threads and no prospect of any appearing
+               even if we wait for an arbitrary length of time.  In
+               short, we have a deadlock. */
 	    pp_sched_status();
             return VgSrc_Deadlock;
          }
@@ -1098,22 +1099,6 @@
                1, whereupon the signal will be "delivered". */
 	    break;
 
-         case VG_TRC_EBP_JMP_SPECIAL: {
-            Addr next_eip = vg_threads[tid].m_eip;
-            if (next_eip == (Addr) & VG_(signalreturn_bogusRA)) {
-               /* vthread tid is returning from a signal handler;
-                  modify its stack/regs accordingly. */
-               VG_(signal_returns)(tid);
-            } 
-            else
-            if (next_eip == (Addr) & VG_(shutdown)) {
-               return VgSrc_Shutdown;
-            } else {
-               VG_(panic)("vg_schedule: VG_TRC_EBP_JMP_SPECIAL");
-            }
-            break;
-         }
-
          case VG_TRC_EBP_JMP_SYSCALL:
             /* Do a syscall for the vthread tid.  This could cause it
                to become non-runnable. */
@@ -1126,9 +1111,11 @@
                maybe_do_trivial_clientreq(), so we don't expect to see
                those here. 
             */
+            /* The thread's %EAX points at an arg block, the first
+               word of which is the request code. */
+            request_code = ((UInt*)(vg_threads[tid].m_eax))[0];
             if (0) {
-               VG_(sprintf)(msg_buf, "request 0x%x", 
-                                     vg_threads[tid].m_eax);
+               VG_(sprintf)(msg_buf, "request 0x%x", request_code );
                print_sched_event(tid, msg_buf);
 	    }
 	    /* Do a non-trivial client request for thread tid.  tid's
@@ -1139,7 +1126,11 @@
                other blocked threads become runnable.  In general we
                can and often do mess with the state of arbitrary
                threads at this point. */
-            do_nontrivial_clientreq(tid);
+            if (request_code == VG_USERREQ__SHUTDOWN_VALGRIND) {
+               return VgSrc_Shutdown;
+            } else {
+               do_nontrivial_clientreq(tid);
+	    }
             break;
 
          default: 
@@ -1745,6 +1736,12 @@
          vg_threads[tid].m_edx = VG_(handle_client_request) ( arg );
 	 break;
 
+      case VG_USERREQ__SIGNAL_RETURNS:
+         /* vthread tid is returning from a signal handler;
+            modify its stack/regs accordingly. */
+         VG_(signal_returns)(tid);
+         break;
+
       default:
          VG_(printf)("panic'd on private request = 0x%x\n", arg[0] );
          VG_(panic)("handle_private_client_pthread_request: "
diff --git a/vg_signals.c b/vg_signals.c
index ea2826b..33ff722 100644
--- a/vg_signals.c
+++ b/vg_signals.c
@@ -34,7 +34,7 @@
 #include "vg_include.h"
 #include "vg_constants.h"
 #include "vg_unsafe.h"
-
+#include "valgrind.h"  /* for VALGRIND_MAGIC_SEQUENCE */
 
 /* ---------------------------------------------------------------------
    Signal state for this process.
@@ -122,17 +122,6 @@
 
 
 
-/* This is the bogus return address which the implementation
-   of RET in vg_cpu.c checks for.  If it spots a return to 
-   here, it calls vg_signal_returns().  We should never actually
-   enter this procedure, neither on the real nor simulated CPU.
-*/
-void VG_(signalreturn_bogusRA) ( void )
-{
-   VG_(panic) ( "vg_signalreturn_bogusRA -- something is badly wrong" );
-}
-
-
 /* Set up a stack frame (VgSigContext) for the client's signal
    handler.  This includes the signal number and a bogus return
    address.  */
@@ -210,9 +199,11 @@
 
    tst = VG_(get_thread_state)(tid);
 
-   /* esp is now pointing at the sigNo field in the signal frame. */
+   /* Correctly reestablish the frame base address. */
    esp   = tst->m_esp;
-   frame = (VgSigFrame*)(esp-4);
+   frame = (VgSigFrame*)
+              (esp -4 /* because the handler's RET pops the RA */
+                  +20 /* because signalreturn_bogusRA pushes 5 words */);
 
    vg_assert(frame->magicPI == 0x31415927);
    vg_assert(frame->magicE  == 0x27182818);
diff --git a/vg_startup.S b/vg_startup.S
index 3fa965c..2ac2e6d 100644
--- a/vg_startup.S
+++ b/vg_startup.S
@@ -112,14 +112,29 @@
 	# Note that we will enter here on the synthetic CPU, not
 	# the real one!  So the interpreter must notice when this
 	# procedure is called, and use that as its cue to switch
-	# back to the real CPU.  That means the code placed here is
-	# utterly irrelevant, since it will never get run, but I
-	# place a RET here anyway, since it is the traditional way
-	# to return from a subroutine :-)
+	# back to the real CPU.  As usual we have a client request
+	# to do this.  To make sense of this you need to read the
+	# definition of VALGRIND_MAGIC_SEQUENCE in valgrind.h.
+	pushl	%eax
+	pushl	%edx
+	subl	$20, %esp	# allocate arg block
+	movl	%esp, %eax	# %eax == &_zzq_args[0]
+	movl	$VG_USERREQ__SHUTDOWN_VALGRIND, 0(%eax)	# request
+	# dont bother to fill in arg1 .. 4, not important
+	# and now the magic sequence itself:
+	roll $29, %eax
+	roll $3, %eax
+	rorl $27, %eax
+	rorl $5, %eax
+	roll $13, %eax
+	roll $19, %eax
+	# valgrind now exits.  the following insns are
+	# executed on the real CPU.
+	addl	$20, %esp
+	popl	%edx
+	popl	%eax
 	ret
-
-
-
+	
 .global	VG_(switch_to_real_CPU)
 VG_(switch_to_real_CPU):
 	# Once Valgrind has decided it needs to exit, either
@@ -216,6 +231,7 @@
 .Lend_of_swizzle:
 	.size	VG_(swizzle_esp_then_start_GDB), .Lend_of_swizzle-VG_(swizzle_esp_then_start_GDB)
 
+
 ##--------------------------------------------------------------------##
 ##--- end                                             vg_startup.S ---##
 ##--------------------------------------------------------------------##