Fix enough stuff so it will start up and run for a few bbs on amd64,
before dying.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@3230 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/amd64-linux/ldt.c b/coregrind/amd64-linux/ldt.c
index d8bfc08..3c19585 100644
--- a/coregrind/amd64-linux/ldt.c
+++ b/coregrind/amd64-linux/ldt.c
@@ -129,7 +129,6 @@
if (ldt != NULL)
VG_(arena_free)(VG_AR_CORE, ldt);
}
-#endif
/* Clear a TLS array. */
@@ -149,7 +148,6 @@
}
-#if 0
/* Fish the base field out of an VgLdtEntry. This is the only part we
are particularly interested in. */
diff --git a/coregrind/amd64-linux/syscalls.c b/coregrind/amd64-linux/syscalls.c
index 8ad091d..7eb8789 100644
--- a/coregrind/amd64-linux/syscalls.c
+++ b/coregrind/amd64-linux/syscalls.c
@@ -412,9 +412,9 @@
const UInt VGA_(syscall_table_size) =
sizeof(VGA_(syscall_table)) / sizeof(VGA_(syscall_table)[0]);
-void VG_(clear_TLS_for_thread) ( VgLdtEntry* tls )
-{
-}
+//void VG_(clear_TLS_for_thread) ( VgLdtEntry* tls )
+//{
+//}
/*--------------------------------------------------------------------*/
/*--- end ---*/
diff --git a/coregrind/amd64/amd64_private.h b/coregrind/amd64/amd64_private.h
index 58e1b6d..be9f4fd 100644
--- a/coregrind/amd64/amd64_private.h
+++ b/coregrind/amd64/amd64_private.h
@@ -36,15 +36,9 @@
#include "tool_arch.h" // arch-specific tool stuff
/* ---------------------------------------------------------------------
- Exports of vg_ldt.c
+ Exports of state.c that are not core-visible
------------------------------------------------------------------ */
-#if 0
-/* Alloc & copy, and dealloc. */
-extern VgLdtEntry* VG_(allocate_LDT_for_thread) ( VgLdtEntry* parent_ldt );
-extern void VG_(deallocate_LDT_for_thread) ( VgLdtEntry* ldt );
-#endif
-extern void VG_(clear_TLS_for_thread) ( VgLdtEntry* tls );
#endif // __AMD64_PRIVATE_H
diff --git a/coregrind/amd64/core_arch.h b/coregrind/amd64/core_arch.h
index 6272190..c708449 100644
--- a/coregrind/amd64/core_arch.h
+++ b/coregrind/amd64/core_arch.h
@@ -44,8 +44,6 @@
#define VG_ELF_MACHINE EM_X86_64
#define VG_ELF_CLASS ELFCLASS64
-#define InsnSetArch InsnSetAMD64
-
#define VGA_WORD_SIZE 8
/* ---------------------------------------------------------------------
@@ -71,69 +69,23 @@
#define STACK_FRAME_NEXT(rbp) (((UWord*)rbp)[0])
// Get stack pointer and frame pointer
-#define ARCH_GET_REAL_STACK_PTR(esp) do { \
- I_die_here; \
+#define ARCH_GET_REAL_STACK_PTR(lval) do { \
+ asm("movq %%rsp, %0" : "=r" (lval)); \
} while (0)
-#define ARCH_GET_REAL_FRAME_PTR(ebp) do { \
- I_die_here; \
+#define ARCH_GET_REAL_FRAME_PTR(lval) do { \
+ asm("movq %%rbp, %0" : "=r" (lval)); \
} while (0)
/* ---------------------------------------------------------------------
- LDT type
- ------------------------------------------------------------------ */
-
-// XXX: eventually this will be x86-private, not seen by the core(?)
-
-/* This is the hardware-format for a segment descriptor, ie what the
- x86 actually deals with. It is 8 bytes long. It's ugly. */
-
-typedef struct _LDT_ENTRY {
- union {
- struct {
- UShort LimitLow;
- UShort BaseLow;
- unsigned BaseMid : 8;
- unsigned Type : 5;
- unsigned Dpl : 2;
- unsigned Pres : 1;
- unsigned LimitHi : 4;
- unsigned Sys : 1;
- unsigned Reserved_0 : 1;
- unsigned Default_Big : 1;
- unsigned Granularity : 1;
- unsigned BaseHi : 8;
- } Bits;
- struct {
- UInt word1;
- UInt word2;
- } Words;
- }
- LdtEnt;
-} VgLdtEntry;
-
-/* ---------------------------------------------------------------------
Architecture-specific part of a ThreadState
------------------------------------------------------------------ */
// Architecture-specific part of a ThreadState
// XXX: eventually this should be made abstract, ie. the fields not visible
-// to the core... then VgLdtEntry can be made non-visible to the core
-// also.
+// to the core...
typedef
struct {
- /* Pointer to this thread's Local (Segment) Descriptor Table.
- Starts out as NULL, indicating there is no table, and we hope
- to keep it that way. If the thread does __NR_modify_ldt to
- create entries, we allocate a 8192-entry table at that point.
- This is a straight copy of the Linux kernel's scheme. Don't
- forget to deallocate this at thread exit. */
- VgLdtEntry* ldt;
-
- /* TLS table. This consists of a small number (currently 3) of
- entries from the Global Descriptor Table. */
- VgLdtEntry tls[VKI_GDT_ENTRY_TLS_ENTRIES];
-
/* --- BEGIN vex-mandated guest state --- */
/* Saved machine context. */
diff --git a/coregrind/amd64/dispatch.S b/coregrind/amd64/dispatch.S
index a0b3be9..8c9ad28 100644
--- a/coregrind/amd64/dispatch.S
+++ b/coregrind/amd64/dispatch.S
@@ -35,13 +35,6 @@
/*------------------------------------------------------------*/
/*--- The dispatch loop. ---*/
/*------------------------------------------------------------*/
-
-#define TT_LOOKUP(reg, fail) \
- movq %rax, reg; \
- andq $VG_TT_FAST_MASK, reg; \
- movq VG_(tt_fast)(,reg,8), reg; \
- cmpq %rax, (reg); \
- jnz fail
.globl VG_(run_innerloop)
VG_(run_innerloop):
@@ -71,7 +64,24 @@
/* fetch %RIP into %rax */
movq VG_(instr_ptr_offset), %rsi
movq (%rbp, %rsi, 1), %rax
+
+ /* set host FPU control word to the default mode expected
+ by VEX-generated code. See comments in libvex.h for
+ more info. */
+ finit
+ pushq $0x027F
+ fldcw (%rsp)
+ addq $8, %rsp
+ /* set host SSE control word to the default mode expected
+ by VEX-generated code. */
+ pushq $0x1F80
+ ldmxcsr (%rsp)
+ addq $8, %rsp
+
+ /* set dir flag to known value */
+ cld
+
/* fall into main loop */
/* Here, %rax is the only live (real) register. The entire
@@ -84,16 +94,23 @@
/* Are we out of timeslice? If yes, defer to scheduler. */
subl $1, VG_(dispatch_ctr)
-
jz counter_is_zero
- /* try a fast lookup in the translation cache */
- TT_LOOKUP(%rbx, fast_lookup_failed)
- /* Found a match. Call the tce.payload field. The magic 12
- value is offsetof(TCEntry,payload) on a 64-bit platform. */
- addq $12, %rbx
- call *%rbx
-
+ /* try a fast lookup in the translation cache */
+ movq %rax, %rbx
+ andq $VG_TT_FAST_MASK, %rbx
+ movq VG_(tt_fast)(,%rbx,8), %rcx
+ cmpq %rax, (%rcx)
+ jnz fast_lookup_failed
+ /* increment bb profile counter */
+ movq VG_(tt_fastN)(,%rbx,8), %rdx
+ incl (%rdx)
+
+ /* Found a match. Call tce[1], which is 8 bytes along, since
+ each tce element is a 64-bit int. */
+ addq $8, %rcx
+ call *%rcx
+
/*
%rax holds destination (original) address.
%rbp indicates further details of the control transfer
@@ -110,20 +127,34 @@
jmp dispatch_exceptional
-
-fast_lookup_failed:
- /* %RIP is up to date here since dispatch_boring dominates */
- addl $1, VG_(dispatch_ctr)
- movq $VG_TRC_INNER_FASTMISS, %rax
- jmp run_innerloop_exit
-counter_is_zero:
- /* %RIP is up to date here since dispatch_boring dominates */
- addl $1, VG_(dispatch_ctr)
- movq $VG_TRC_INNER_COUNTERZERO, %rax
- jmp run_innerloop_exit
-
-run_innerloop_exit:
+
+/* All exits from the dispatcher go through here. %rax holds
+ the return value.
+*/
+run_innerloop_exit:
+ /* We're leaving. Check that nobody messed with
+ %mxcsr or %fpucw. We can't mess with %rax here as it
+ holds the tentative return value, but any other is OK. */
+ pushq $0
+ fstcw (%rsp)
+ cmpl $0x027F, (%rsp)
+ popq %r11 /* get rid of the word without trashing %eflags */
+ jnz invariant_violation
+ pushq $0
+ stmxcsr (%rsp)
+ andl $0xFFFFFFC0, (%rsp) /* mask out status flags */
+ cmpl $0x1F80, (%rsp)
+ popq %r11
+ jnz invariant_violation
+ /* otherwise we're OK */
+ jmp run_innerloop_exit_REALLY
+
+invariant_violation:
+ movq $VG_TRC_INVARIANT_FAILED, %rax
+ jmp run_innerloop_exit_REALLY
+
+run_innerloop_exit_REALLY:
popq %rdi
popq %r15
popq %r14
@@ -140,8 +171,8 @@
popq %rbx
ret
-
-
+
+
/* Other ways of getting out of the inner loop. Placed out-of-line to
make it look cleaner.
*/
@@ -150,13 +181,26 @@
cmpq $VG_TRC_INNER_COUNTERZERO, %rbp
jz counter_is_zero
- /* save %rax in %RIP and defer to sched */
+ /* save %eax in %EIP and defer to sched */
movq VG_(instr_ptr_offset), %rsi
movq 0(%rsp), %rdi
movq %rax, (%rdi, %rsi, 1)
movq %rbp, %rax
jmp run_innerloop_exit
+fast_lookup_failed:
+ /* %RIP is up to date here since dispatch_boring dominates */
+ addl $1, VG_(dispatch_ctr)
+ movq $VG_TRC_INNER_FASTMISS, %rax
+ jmp run_innerloop_exit
+
+counter_is_zero:
+ /* %RIP is up to date here since dispatch_boring dominates */
+ addl $1, VG_(dispatch_ctr)
+ movq $VG_TRC_INNER_COUNTERZERO, %rax
+ jmp run_innerloop_exit
+
+
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
diff --git a/coregrind/amd64/state.c b/coregrind/amd64/state.c
index 60ee569..3dc1805 100644
--- a/coregrind/amd64/state.c
+++ b/coregrind/amd64/state.c
@@ -98,25 +98,12 @@
/*--- Thread stuff ---*/
/*------------------------------------------------------------*/
-void VGA_(clear_thread)( ThreadArchState *arch )
-{
- arch->ldt = NULL;
- VG_(clear_TLS_for_thread)(arch->tls);
-}
-
void VGA_(cleanup_thread) ( ThreadArchState *arch )
{
- I_die_here;
-#if 0
- /* Deallocate its LDT, if it ever had one. */
- VG_(deallocate_LDT_for_thread)( arch->ldt );
- arch->ldt = NULL;
-
- /* Clear its TLS array. */
- VG_(clear_TLS_for_thread)( arch->tls );
-#endif
+ /* TODO: deallocate the thread's LDT / GDT ? */
}
+
void VGA_(setup_child) ( ThreadArchState *arch, ThreadArchState *parent_arch )
{
I_die_here;
diff --git a/coregrind/arm/core_arch.h b/coregrind/arm/core_arch.h
index bb23f72..2ad94ae 100644
--- a/coregrind/arm/core_arch.h
+++ b/coregrind/arm/core_arch.h
@@ -45,8 +45,6 @@
#define VG_ELF_MACHINE EM_ARM
#define VG_ELF_CLASS ELFCLASS32
-#define InsnSetArch InsnSetARM
-
#define VGA_WORD_SIZE 4
/* ---------------------------------------------------------------------
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 2cc0231..1c9ad5a 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -1129,7 +1129,7 @@
/* We know the initial ESP is pointing at argc/argv */
VG_(client_argc) = *(Int*)cl_esp;
- VG_(client_argv) = (Char**)(cl_esp + sizeof(Int));
+ VG_(client_argv) = (Char**)(cl_esp + sizeof(HWord));
return cl_esp;
}
diff --git a/coregrind/vg_proxylwp.c b/coregrind/vg_proxylwp.c
index dd236f3..fd7bbdb 100644
--- a/coregrind/vg_proxylwp.c
+++ b/coregrind/vg_proxylwp.c
@@ -868,10 +868,10 @@
if (block) {
Int lwp = proxy->lwp;
-
+VG_(printf)("OINK 503\n");
if(proxy->lwp != 0)
do_futex(&proxy->lwp, VKI_FUTEX_WAIT, lwp, NULL, NULL);
-
+VG_(printf)("OINK 504\n");
if (status)
*status = proxy->exitcode;
ret = True;
@@ -884,7 +884,7 @@
} else {
Int flags = __VKI_WCLONE;
Int res;
-
+VG_(printf)("OINK 506\n");
if (!block)
flags |= VKI_WNOHANG;
res = VG_(waitpid)(proxy->lwp, status, flags);
@@ -961,14 +961,14 @@
vg_assert(proxy->tid == tid);
if (proxy->terminating)
return; /* already going away */
-
+VG_(printf)("OINK 401\n");
proxy->terminating = True;
-
+VG_(printf)("OINK 402\n");
VG_(close)(proxy->topx);
proxy->topx = -1;
-
+VG_(printf)("OINK 403\n");
/* proxy thread will close proxy->frommain itself */
-
+VG_(printf)("OINK 404\n");
if (force && lwp != 0) {
/* wouldn't need to force it if it were already dead */
vg_assert(tst->status != VgTs_Empty);
@@ -979,19 +979,21 @@
status = -1;
res = False;
-
+VG_(printf)("OINK 405\n");
/* We need to wait for the PX_Exiting message before doing the
proxy_wait, because if we don't read the results pipe, the proxy
may be blocked writing to it, causing a deadlock with us as we
wait for it to exit. */
sys_wait_results(True, tid, PX_Exiting, True);
+VG_(printf)("OINK 405a\n");
res = proxy_wait(proxy, True, &status);
-
+VG_(printf)("OINK 406\n");
if ((!res || status != 0) && VG_(clo_verbosity) > 1)
VG_(printf)("proxy %d for tid %d exited status %d, res %d\n",
lwp, tid, status, res);
-
+VG_(printf)("OINK 407\n");
LWP_free(proxy);
+VG_(printf)("OINK 408\n");
tst->proxy = NULL;
}
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index 89a17ec..1f130b2 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -208,6 +208,7 @@
case VEX_TRC_JMP_SYSCALL: return "SYSCALL";
case VEX_TRC_JMP_CLIENTREQ: return "CLIENTREQ";
case VEX_TRC_JMP_YIELD: return "YIELD";
+ case VEX_TRC_JMP_NODECODE: return "NODECODE";
case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO";
case VG_TRC_INNER_FASTMISS: return "FASTMISS";
case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL";
@@ -317,6 +318,10 @@
vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
vg_assert(a_vex + 2 * sz_vex == a_spill);
+ vg_assert(VG_(instr_ptr_offset) >= 0);
+ vg_assert(VG_(instr_ptr_offset) <= 10000); /* let's say */
+ vg_assert(sizeof VG_(instr_ptr_offset) == sizeof(HWord));
+
VGP_PUSHCC(VgpRun);
/* there should be no undealt-with signals */
@@ -1075,7 +1080,7 @@
VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d",
tid, done_this_time, (Int)trc );
- if (0 && trc != VG_TRC_INNER_FASTMISS)
+ if (1 && trc != VG_TRC_INNER_FASTMISS)
VG_(message)(Vg_DebugMsg, "thread %d: %llu bbs, event %s",
tid, VG_(bbs_done),
name_of_sched_event(trc) );
@@ -1312,23 +1317,24 @@
void cleanup_after_thread_exited ( ThreadId tid, Bool forcekill )
{
Segment *seg;
-
+VG_(printf)("OINK 40\n");
vg_assert(is_valid_or_empty_tid(tid));
vg_assert(VG_(threads)[tid].status == VgTs_Empty);
-
+VG_(printf)("OINK 41\n");
/* Its stack is now off-limits */
if (VG_(threads)[tid].stack_base) {
seg = VG_(find_segment)( VG_(threads)[tid].stack_base );
VG_TRACK( die_mem_stack, seg->addr, seg->len );
}
-
+VG_(printf)("OINK 42\n");
VGA_(cleanup_thread)( &VG_(threads)[tid].arch );
-
+VG_(printf)("OINK 43\n");
/* Not interested in the timeout anymore */
VG_(threads)[tid].awaken_at = 0xFFFFFFFF;
-
+VG_(printf)("OINK 44\n");
/* Delete proxy LWP */
VG_(proxy_delete)(tid, forcekill);
+VG_(printf)("OINK 45\n");
}
@@ -1404,21 +1410,27 @@
void VG_(nuke_all_threads_except) ( ThreadId me )
{
ThreadId tid;
+ VG_(printf)("HACK HACK HACK: nuke_all_threads_except\n"); return;
+
for (tid = 1; tid < VG_N_THREADS; tid++) {
if (tid == me
|| VG_(threads)[tid].status == VgTs_Empty)
continue;
- if (0)
+ if (1)
VG_(printf)(
"VG_(nuke_all_threads_except): nuking tid %d\n", tid);
+VG_(printf)("OINK 49\n");
VG_(proxy_delete)(tid, True);
+VG_(printf)("OINK 49a\n");
VG_(threads)[tid].status = VgTs_Empty;
VG_(threads)[tid].associated_mx = NULL;
VG_(threads)[tid].associated_cv = NULL;
VG_(threads)[tid].stack_base = (Addr)NULL;
VG_(threads)[tid].stack_size = 0;
cleanup_after_thread_exited( tid, True );
+VG_(printf)("OINK 4\n");
}
+VG_(printf)("OINK 5\n");
}
diff --git a/coregrind/x86/dispatch.S b/coregrind/x86/dispatch.S
index 19489cc..f91d117 100644
--- a/coregrind/x86/dispatch.S
+++ b/coregrind/x86/dispatch.S
@@ -89,20 +89,20 @@
/* Are we out of timeslice? If yes, defer to scheduler. */
subl $1, VG_(dispatch_ctr)
-
jz counter_is_zero
+
/* try a fast lookup in the translation cache */
movl %eax, %ebx
andl $VG_TT_FAST_MASK, %ebx
movl VG_(tt_fast)(,%ebx,4), %ecx
cmpl %eax, (%ecx)
jnz fast_lookup_failed
+ /* increment bb profile counter */
movl VG_(tt_fastN)(,%ebx,4), %edx
incl (%edx)
/* Found a match. Call tce[1], which is 8 bytes along, since
each tce element is a 64-bit int. */
-
addl $8, %ecx
call *%ecx