Merge "upstream integration"
diff --git a/Makefile.android b/Makefile.android
index efc6fc3..4139226 100644
--- a/Makefile.android
+++ b/Makefile.android
@@ -148,15 +148,18 @@
     $(eval LOCAL_AR := $(MY_AR)) \
     $(eval LOCAL_LDLIBS := $(MY_LDLIBS)) \
     $(eval LOCAL_MODULE_TAGS := debug) \
-    $(eval LOCAL_MODULE := $1)
+    $(eval LOCAL_MODULE := $1) \
 
 # Used with start-emulator-library
 end-emulator-library = \
-    $(eval include $(BUILD_HOST_STATIC_LIBRARY))
+    $(eval include $(BUILD_HOST_STATIC_LIBRARY)) \
+    $(eval EMULATOR_MODULE_TYPE := STATIC_LIBRARY)
 
 # A variant of start-emulator-library to start the definition of a host
 # program instead. Use with end-emulator-program
-start-emulator-program = $(call start-emulator-library,$1)
+start-emulator-program = \
+    $(call start-emulator-library,$1) \
+    $(eval EMULATOR_MODULE_TYPE := EXECUTABLES)
 
 # A varient of end-emulator-library for host programs instead
 end-emulator-program = \
diff --git a/Makefile.common b/Makefile.common
index 15d6801..222a1ae 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -348,14 +348,14 @@
     cutils.c \
     d3des.c \
     input.c \
+    iohandler.c \
     ioport.c \
     module.c \
     net-android.c \
     notify.c \
     osdep.c \
-    outputchannel.c \
     path.c \
-    qemu-char-android.c \
+    qemu-char.c \
     qemu-config.c \
     qemu-error.c \
     qemu-malloc.c \
@@ -592,9 +592,10 @@
 ###
 ###  This defines a function that can be used inside a module definition
 ###
-###  $(call gen-hx-header,<input>,<source-files>)
+###  $(call gen-hx-header,<input>,<output>,<source-files>)
 ###
 ###  Where: <input> is the input file, with a .hx suffix (e.g. foo.hx)
+###         <output> is the output file, with a .h or .def suffix
 ###         <source-files> is a list of source files that include the header
 ###
 
@@ -602,16 +603,17 @@
 gen-hx-header = $(eval $(call gen-hx-header-ev,$1,$2,$3))
 
 define gen-hx-header-ev
-intermediates := $$(call intermediates-dir-for,EXECUTABLES,$$(LOCAL_MODULE),true)
+intermediates := $$(call intermediates-dir-for,$$(EMULATOR_MODULE_TYPE),$$(LOCAL_MODULE),true)
 
-QEMU_HEADER_H := $$(intermediates)/$$(1:%.hx=%.h)
+QEMU_HEADER_H := $$(intermediates)/$$2
 $$(QEMU_HEADER_H): PRIVATE_PATH := $$(LOCAL_PATH)
 $$(QEMU_HEADER_H): PRIVATE_CUSTOM_TOOL = $$(PRIVATE_PATH)/hxtool -h < $$< > $$@
 $$(QEMU_HEADER_H): $$(LOCAL_PATH)/$$1 $$(LOCAL_PATH)/hxtool
 	$$(transform-generated-source)
 
 LOCAL_GENERATED_SOURCES += $$(QEMU_HEADER_H)
-_objects := $$(patsubst %,$$(intermediates)/%,$$(2:.c=.o))
+LOCAL_C_INCLUDES += $$(intermediates)
+_objects := $$(patsubst %,$$(intermediates)/%,$$(3:.c=.o))
 $$(_objects): $$(QEMU_HEADER_H)
 endef
 
diff --git a/Makefile.target b/Makefile.target
index fb32582..5f7a8b2 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -29,7 +29,10 @@
     -DNEED_CPU_H \
 
 TCG_TARGET := $(HOST_ARCH)
-ifeq ($(TCG_TARGET),x86)
+ifeq ($(HOST_ARCH),x86)
+  TCG_TARGET := i386
+endif
+ifeq ($(HOST_ARCH),x86_64)
   TCG_TARGET := i386
 endif
 
@@ -132,12 +135,11 @@
 LOCAL_SRC_FILES += $(HW_SOURCES:%=hw/%)
 
 LOCAL_SRC_FILES += \
-    exec.c \
     cpu-exec.c  \
+    exec.c \
     translate-all.c \
     trace.c \
     varint.c \
-    dcache.c \
     softmmu_outside_jit.c \
 
 ##############################################################################
@@ -155,6 +157,7 @@
     target-arm/iwmmxt_helper.c \
     target-arm/neon_helper.c \
     target-arm/helper.c \
+    target-arm/helper-android.c \
     target-arm/translate.c \
     target-arm/machine.c \
     hw/armv7m.c \
@@ -206,6 +209,18 @@
 
 LOCAL_SRC_FILES += $(MCHK_SOURCES:%=memcheck/%)
 
+LOCAL_SRC_FILES += \
+    cpus.c \
+    arch_init.c
+
+# What a mess, os-posix.c depends on the exact values of options
+# which are target specific.
+ifeq ($(HOST_OS),windows)
+    LOCAL_SRC_FILES += os-win32.c oslib-win32.c
+else
+    LOCAL_SRC_FILES += os-posix.c oslib-posix.c
+endif
+$(call gen-hx-header,qemu-options.hx,qemu-options.def,os-posix.c os-win32.c)
 
 $(call end-emulator-library)
 
@@ -248,6 +263,7 @@
     loader.c \
     monitor.c \
     qemu-timer.c \
+    qemu-timer-common.c \
     user-events-qemu.c \
     vl-android.c \
     android/console.c \
@@ -259,8 +275,8 @@
     android/protocol/core-commands-impl.c \
     android/protocol/core-commands-qemu.c \
 
-$(call gen-hx-header,qemu-monitor.hx,monitor.c)
-$(call gen-hx-header,qemu-options.hx,vl-android.c)
+$(call gen-hx-header,qemu-monitor.hx,qemu-monitor.h,monitor.c)
+$(call gen-hx-header,qemu-options.hx,qemu-options.def,vl-android.c qemu-options.h)
 
 ifeq ($(HOST_OS),darwin)
     FRAMEWORKS := OpenGL Cocoa QuickTime ApplicationServices Carbon IOKit
@@ -324,6 +340,7 @@
     loader.c \
     monitor.c \
     qemu-timer.c \
+    qemu-timer-common.c \
     user-events-qemu.c \
     vl-android.c \
     android/cmdline-option.c \
@@ -337,8 +354,8 @@
     android/protocol/ui-commands-qemu.c \
     android/
 
-$(call gen-hx-header,qemu-monitor.hx,monitor.c)
-$(call gen-hx-header,qemu-options.hx,vl-android.c)
+$(call gen-hx-header,qemu-monitor.hx,qemu-monitor.h,monitor.c)
+$(call gen-hx-header,qemu-options.hx,qemu-options.def,vl-android.c qemu-options.h)
 
 # The following files cannot be in static libraries because they contain
 # constructor functions that are otherwise stripped by the final linker
diff --git a/README b/README
index 44ddc25..dfd56f2 100644
--- a/README
+++ b/README
@@ -1,23 +1,3 @@
-This package contains the sources to the Android emulator program.
+Read the documentation in qemu-doc.html.
 
-This program emulates a virtual ARM board that can be used to run Android
-system images on a typical developer machine. To do so, you'll need additionnal
-files provided with the public Android Software Development Kit (SDK).
-
-To download them, go to http://code.google.com/android/
-
-Emulator-specific documentation is available at the following page:
-
-    http://code.google.com/android/reference/emulator.html
-
-Please read the INSTALL file to see how you can rebuild the emulator, or
-build a source distribution package tarball.
-
-Read the CHANGES.TXT file to see what important changes were added since
-the last release.
-
-Note: This program is distributed under the terms of the GNU General Public
-      License, which exact licensing conditions are available in the COPYING
-      file found within this package.
-
-- Android Emulator Team
+Fabrice Bellard.
diff --git a/acl.c b/acl.c
index 311dade..82c2704 100644
--- a/acl.c
+++ b/acl.c
@@ -24,7 +24,6 @@
 
 
 #include "qemu-common.h"
-#include "sysemu.h"
 #include "acl.h"
 
 #ifdef CONFIG_FNMATCH
diff --git a/android-configure.sh b/android-configure.sh
index 5a1748c..4939323 100755
--- a/android-configure.sh
+++ b/android-configure.sh
@@ -482,6 +482,12 @@
         ;;
 esac
 
+case "$TARGET_OS" in
+    linux-*|darwin-*)
+        echo "#define CONFIG_MADVISE  1" >> $config_h
+        ;;
+esac
+
 # the -nand-limits options can only work on non-windows systems
 if [ "$TARGET_OS" != "windows" ] ; then
     echo "#define CONFIG_NAND_LIMITS  1" >> $config_h
diff --git a/android-trace.h b/android-trace.h
new file mode 100644
index 0000000..2d0100d
--- /dev/null
+++ b/android-trace.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2006-2007 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef TRACE_H
+#define TRACE_H
+
+#include <inttypes.h>
+#include "android-trace_common.h"
+
+extern uint64_t start_time, end_time;
+extern uint64_t elapsed_usecs;
+extern uint64_t Now();
+
+struct TranslationBlock;
+
+// The simulated time, in clock ticks, starting with one.
+extern uint64_t sim_time;
+extern uint64_t trace_static_bb_num(void);;
+
+// This variable == 1 if we are currently tracing, otherwise == 0.
+extern int tracing;
+extern int trace_all_addr;
+extern int trace_cache_miss;
+
+extern void start_tracing();
+extern void stop_tracing();
+extern void trace_init(const char *filename);
+extern void trace_bb_start(uint32_t bb_addr);
+extern void trace_add_insn(uint32_t insn, int is_thumb);
+extern void trace_bb_end();
+
+extern int get_insn_ticks_arm(uint32_t insn);
+extern int get_insn_ticks_thumb(uint32_t  insn);
+
+extern void trace_exception(uint32_t pc);
+extern void trace_bb_helper(uint64_t bb_num, struct TranslationBlock *tb);
+extern void trace_insn_helper();
+extern void sim_dcache_load(uint32_t addr);
+extern void sim_dcache_store(uint32_t addr, uint32_t val);
+extern void sim_dcache_swp(uint32_t addr);
+extern void trace_interpreted_method(uint32_t addr, int call_type);
+
+extern const char *trace_filename;
+extern int tracing;
+extern int trace_cache_miss;
+extern int trace_all_addr;
+
+// Trace process/thread operations
+extern void trace_switch(int pid);
+extern void trace_fork(int tgid, int pid);
+extern void trace_clone(int tgid, int pid);
+extern void trace_exit(int exitcode);
+extern void trace_name(char *name);
+
+#endif /* TRACE_H */
diff --git a/android-trace_common.h b/android-trace_common.h
new file mode 100644
index 0000000..fe84c1a
--- /dev/null
+++ b/android-trace_common.h
@@ -0,0 +1,142 @@
+/* Copyright (C) 2006-2007 The Android Open Source Project
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef TRACE_COMMON_H
+#define TRACE_COMMON_H
+
+#include <inttypes.h>
+
+// This should be the same as OPC_BUF_SIZE
+#define kMaxInsnPerBB 512
+
+#define kMaxNumBasicBlocks 1024
+
+#define kMaxNumAddrs 1024
+
+#define kInsnBufferSize 1024
+
+#define kCompressedSize 8192
+
+#define kMethodEnter		0
+#define kMethodExit		1
+#define kMethodException	2
+#define kNativeEnter		4
+#define kNativeExit		5
+#define kNativeException	6
+
+// The trace identifier string must be less than 16 characters.
+#define TRACE_IDENT "qemu_trace_file"
+#define TRACE_VERSION 2
+
+typedef struct TraceHeader {
+    char	ident[16];
+    int		version;
+    uint32_t	start_sec;
+    uint32_t	start_usec;
+    uint32_t	pdate;
+    uint32_t	ptime;
+    uint32_t	num_used_pids;		// number of distinct process ids used
+    int		first_unused_pid;	// -1 if all 32,768 pids are used (unlikely)
+    uint8_t	padding[4];		// next field is 8-byte aligned
+    uint64_t	num_static_bb;
+    uint64_t	num_static_insn;
+    uint64_t	num_dynamic_bb;
+    uint64_t	num_dynamic_insn;
+    uint64_t	elapsed_usecs;
+} TraceHeader;
+
+typedef struct BBRec {
+    uint64_t	start_time;	// time of first occurrence
+    uint64_t	bb_num;		// basic block number
+    uint32_t	repeat;		// repeat count (= 0 if just one occurrence)
+    uint64_t	time_diff;	// diff from previous time (if repeat > 0)
+} BBRec;
+
+// Define a trace record for addresses that miss in the cache
+typedef struct AddrRec {
+    uint64_t	time;
+    uint32_t	addr;
+} AddrRec;
+
+// Define a trace record for the start time of each instruction
+typedef struct InsnRec {
+    uint64_t	time_diff;	// time difference from last instruction
+    uint32_t	repeat;		// repeat count
+} InsnRec;
+
+// Define record types for process id changes.
+#define kPidEndOfFile		0
+#define kPidFork		1
+#define kPidClone		2
+#define kPidSwitch		3
+#define kPidExec		4
+#define kPidMmap		5
+#define kPidExit		6
+#define kPidKthreadName		7
+#define kPidSymbolAdd		8
+#define kPidSymbolRemove	9
+#define kPidMunmap		10
+#define kPidNoAction		11
+#define kPidName		12
+
+#define bswap16(x) ((((x) & 0xff) << 8) | (((x) >> 8) & 0xff))
+
+#define bswap32(x) ((((x) & 0xff) << 24) | (((x) & 0xff00) << 8) \
+        | (((x) >> 8) & 0xff00) | (((x) >> 24) & 0xff))
+
+#define bswap64(x) (((x) << 56) | (((x) & 0xff00) << 40) \
+        | (((x) & 0xff0000) << 24) | (((x) & 0xff000000ull) << 8) \
+        | (((x) >> 8) & 0xff000000ull) | (((x) >> 24) & 0xff0000) \
+        | (((x) >> 40) & 0xff00) | ((x) >> 56))
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define hostToLE16(x)	(x)
+#define hostToLE32(x)	(x)
+#define hostToLE64(x)	(x)
+#define LE16ToHost(x)	(x)
+#define LE32ToHost(x)	(x)
+#define LE64ToHost(x)	(x)
+#define convert16(x)
+#define convert32(x)
+#define convert64(x)
+#else
+#define hostToLE16(x)	bswap16(x)
+#define hostToLE32(x)	bswap32(x)
+#define hostToLE64(x)	bswap64(x)
+#define LE16ToHost(x)	bswap16(x)
+#define LE32ToHost(x)	bswap32(x)
+#define LE64ToHost(x)	bswap64(x)
+#define convert16(x) (x = bswap16(x))
+#define convert32(x) (x = bswap32(x))
+#define convert64(x) (x = bswap64(x))
+#endif
+
+/* XXX: we wrap 16-bit thumb instructions into 32-bit undefined ARM instructions
+ *      for simplicity reasons. See section 3.13.1 section of the ARM ARM for details
+ *      on the undefined instruction space we're using
+ */
+static __inline__ int   insn_is_thumb(uint32_t   insn)
+{
+    return ((insn & 0xfff000f0) == 0xf7f000f0);
+}
+
+static __inline__ uint32_t  insn_wrap_thumb(uint32_t  insn)
+{
+    return 0xf7f000f0 | ((insn & 0xfff0) << 4) | (insn & 0x000f);
+}
+
+static __inline__ uint32_t  insn_unwrap_thumb(uint32_t  insn)
+{
+    return ((insn >> 4) & 0xfff0) | (insn & 0x000f);
+}
+
+#endif /* TRACE_COMMON_H */
diff --git a/android/config/darwin-x86/config-host.h b/android/config/darwin-x86/config-host.h
index 93efda6..0395918 100644
--- a/android/config/darwin-x86/config-host.h
+++ b/android/config/darwin-x86/config-host.h
@@ -16,3 +16,5 @@
 #define O_LARGEFILE      0
 #define MAP_ANONYMOUS    MAP_ANON
 #define CONFIG_ANDROID       1
+#define CONFIG_POSIX 1
+#define CONFIG_MADVISE 1
diff --git a/android/config/freebsd-x86/config-host.h b/android/config/freebsd-x86/config-host.h
index 5518924..2e44a83 100644
--- a/android/config/freebsd-x86/config-host.h
+++ b/android/config/freebsd-x86/config-host.h
@@ -14,3 +14,5 @@
 #define CONFIG_UNAME_RELEASE ""
 #define CONFIG_SKINS 1
 #define CONFIG_ANDROID 1
+#define CONFIG_POSIX 1
+#define CONFIG_MADVISE 1
diff --git a/android/config/linux-ppc/config-host.h b/android/config/linux-ppc/config-host.h
index 5f7a94f..9faf170 100644
--- a/android/config/linux-ppc/config-host.h
+++ b/android/config/linux-ppc/config-host.h
@@ -15,4 +15,6 @@
 #define HOST_WORDS_BIGENDIAN 1
 #define CONFIG_IOVEC 1
 #define CONFIG_LINUX   1
+#define CONFIG_POSIX 1
 #define CONFIG_ANDROID       1
+#define CONFIG_MADVISE 1
diff --git a/android/config/linux-x86/config-host.h b/android/config/linux-x86/config-host.h
index 8cc2487..70a0eed 100644
--- a/android/config/linux-x86/config-host.h
+++ b/android/config/linux-x86/config-host.h
@@ -15,4 +15,6 @@
 #define CONFIG_KVM_GS_RESTORE   1
 #define CONFIG_IOVEC 1
 #define CONFIG_LINUX   1
+#define CONFIG_POSIX 1
 #define CONFIG_ANDROID       1
+#define CONFIG_MADVISE 1
diff --git a/android/console.c b/android/console.c
index daae2d7..f4273f4 100644
--- a/android/console.c
+++ b/android/console.c
@@ -2058,33 +2058,33 @@
 /********************************************************************************************/
 
 static int
-control_write_out_cb(void* opaque, const char* fmt, va_list ap)
+control_write_out_cb(void* opaque, const char* str, int strsize)
 {
     ControlClient client = opaque;
-    int ret = control_vwrite(client, fmt, ap);
-    return ret;
+    control_control_write(client, str, strsize);
+    return strsize;
 }
 
 static int
-control_write_err_cb(void* opaque, const char* fmt, va_list ap)
+control_write_err_cb(void* opaque, const char* str, int strsize)
 {
     int ret = 0;
     ControlClient client = opaque;
     ret += control_write(client, "KO: ");
-    ret += control_vwrite(client, fmt, ap);
-    return ret;
+    control_control_write(client, str, strsize);
+    return ret + strsize;
 }
 
 static int
 do_snapshot_list( ControlClient  client, char*  args )
 {
-    int ret;
-    OutputChannel *out = output_channel_alloc(client, control_write_out_cb);
-    OutputChannel *err = output_channel_alloc(client, control_write_err_cb);
-    do_info_snapshots_oc(out, err);
-    ret = output_channel_written(err);
-    output_channel_free(out);
-    output_channel_free(err);
+    int64_t ret;
+    Monitor *out = monitor_fake_new(client, control_write_out_cb);
+    Monitor *err = monitor_fake_new(client, control_write_err_cb);
+    do_info_snapshots(out, err);
+    ret = monitor_fake_get_bytes(err);
+    monitor_fake_free(err);
+    monitor_fake_free(out);
 
     return ret > 0;
 }
@@ -2092,17 +2092,17 @@
 static int
 do_snapshot_save( ControlClient  client, char*  args )
 {
-    int ret;
+    int64_t ret;
 
     if (args == NULL) {
         control_write(client, "KO: argument missing, try 'avd snapshot save <name>'\r\n");
         return -1;
     }
 
-    OutputChannel *err = output_channel_alloc(client, control_write_err_cb);
-    do_savevm_oc(err, args);
-    ret = output_channel_written(err);
-    output_channel_free(err);
+    Monitor *err = monitor_fake_new(client, control_write_err_cb);
+    do_savevm(err, args);
+    ret = monitor_fake_get_bytes(err);
+    monitor_fake_free(err);
 
     return ret > 0; // no output on error channel indicates success
 }
@@ -2110,17 +2110,17 @@
 static int
 do_snapshot_load( ControlClient  client, char*  args )
 {
-    int ret;
+    int64_t ret;
 
     if (args == NULL) {
         control_write(client, "KO: argument missing, try 'avd snapshot load <name>'\r\n");
         return -1;
     }
 
-    OutputChannel *err = output_channel_alloc(client, control_write_err_cb);
-    do_loadvm_oc(err, args);
-    ret = output_channel_written(err);
-    output_channel_free(err);
+    Monitor *err = monitor_fake_new(client, control_write_err_cb);
+    do_loadvm(err, args);
+    ret = monitor_fake_get_bytes(err);
+    monitor_fake_free(err);
 
     return ret > 0;
 }
@@ -2128,17 +2128,17 @@
 static int
 do_snapshot_del( ControlClient  client, char*  args )
 {
-    int ret;
+    int64_t ret;
 
     if (args == NULL) {
         control_write(client, "KO: argument missing, try 'avd snapshot del <name>'\r\n");
         return -1;
     }
 
-    OutputChannel *err = output_channel_alloc(client, control_write_err_cb);
-    do_delvm_oc(err, args);
-    ret = output_channel_written(err);
-    output_channel_free(err);
+    Monitor *err = monitor_fake_new(client, control_write_err_cb);
+    do_delvm(err, args);
+    ret = monitor_fake_get_bytes(err);
+    monitor_fake_free(err);
 
     return ret > 0;
 }
diff --git a/android/hw-sensors.c b/android/hw-sensors.c
index c3ab12f..69447a2 100644
--- a/android/hw-sensors.c
+++ b/android/hw-sensors.c
@@ -212,7 +212,7 @@
     cl->sensors     = sensors;
     cl->enabledMask = 0;
     cl->delay_ms    = 800;
-    cl->timer       = qemu_new_timer(vm_clock, _hwSensorClient_tick, cl);
+    cl->timer       = qemu_new_timer_ns(vm_clock, _hwSensorClient_tick, cl);
 
     cl->next         = sensors->clients;
     sensors->clients = cl;
@@ -316,7 +316,7 @@
         _hwSensorClient_send(cl, (uint8_t*) buffer, strlen(buffer));
     }
 
-    now_ns = qemu_get_clock(vm_clock);
+    now_ns = qemu_get_clock_ns(vm_clock);
 
     snprintf(buffer, sizeof buffer, "sync:%lld", now_ns/1000);
     _hwSensorClient_send(cl, (uint8_t*)buffer, strlen(buffer));
diff --git a/android/looper-qemu.c b/android/looper-qemu.c
index 5526f5b..714b48d 100644
--- a/android/looper-qemu.c
+++ b/android/looper-qemu.c
@@ -36,7 +36,7 @@
     if (timeout_ms == DURATION_INFINITE)
         qemu_del_timer(tt);
     else
-        qemu_mod_timer(tt, qemu_get_clock_ns(host_clock) + timeout_ms*1000000);
+        qemu_mod_timer(tt, qemu_get_clock_ms(host_clock) + timeout_ms);
 }
 
 static void
@@ -85,7 +85,7 @@
                    void*          opaque)
 {
     timer->clazz = (LoopTimerClass*) &qlooptimer_class;
-    timer->impl  = qemu_new_timer(host_clock, callback, opaque);
+    timer->impl  = qemu_new_timer_ms(host_clock, callback, opaque);
 }
 
 /**********************************************************************
@@ -370,7 +370,7 @@
 static Duration
 qlooper_now(Looper* ll)
 {
-    return qemu_get_clock_ns(host_clock)/1000000;
+    return qemu_get_clock_ms(host_clock);
 }
 
 extern void qemu_system_shutdown_request(void);
diff --git a/android/protocol/core-commands-impl.c b/android/protocol/core-commands-impl.c
index 4366529..bf6da15 100644
--- a/android/protocol/core-commands-impl.c
+++ b/android/protocol/core-commands-impl.c
@@ -19,7 +19,7 @@
 #include "android/android.h"
 #include "android/globals.h"
 #include "telephony/modem_driver.h"
-#include "trace.h"
+#include "android-trace.h"
 #include "android/looper.h"
 #include "android/async-utils.h"
 #include "android/sync-utils.h"
diff --git a/android/protocol/core-commands-qemu.c b/android/protocol/core-commands-qemu.c
index 03fef64..76b3abb 100644
--- a/android/protocol/core-commands-qemu.c
+++ b/android/protocol/core-commands-qemu.c
@@ -19,7 +19,7 @@
 #include "android/globals.h"
 #include "android/hw-sensors.h"
 #include "telephony/modem_driver.h"
-#include "trace.h"
+#include "android-trace.h"
 #include "audio/audio.h"
 #include "android/protocol/core-commands-api.h"
 
diff --git a/arch_init.c b/arch_init.c
new file mode 100644
index 0000000..b966d8a
--- /dev/null
+++ b/arch_init.c
@@ -0,0 +1,732 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#ifndef _WIN32
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+#include "config.h"
+#include "monitor.h"
+#include "sysemu.h"
+#include "arch_init.h"
+#include "audio/audio.h"
+#include "hw/irq.h"
+#include "hw/pci.h"
+#include "hw/audiodev.h"
+#include "kvm.h"
+#include "migration.h"
+#include "net.h"
+#include "gdbstub.h"
+#include "hw/smbios.h"
+
+#ifdef TARGET_SPARC
+int graphic_width = 1024;
+int graphic_height = 768;
+int graphic_depth = 8;
+#else
+int graphic_width = 800;
+int graphic_height = 600;
+int graphic_depth = 15;
+#endif
+
+const char arch_config_name[] = CONFIG_QEMU_SHAREDIR "/target-" TARGET_ARCH ".conf";
+
+#if defined(TARGET_ALPHA)
+#define QEMU_ARCH QEMU_ARCH_ALPHA
+#elif defined(TARGET_ARM)
+#define QEMU_ARCH QEMU_ARCH_ARM
+#elif defined(TARGET_CRIS)
+#define QEMU_ARCH QEMU_ARCH_CRIS
+#elif defined(TARGET_I386)
+#define QEMU_ARCH QEMU_ARCH_I386
+#elif defined(TARGET_M68K)
+#define QEMU_ARCH QEMU_ARCH_M68K
+#elif defined(TARGET_LM32)
+#define QEMU_ARCH QEMU_ARCH_LM32
+#elif defined(TARGET_MICROBLAZE)
+#define QEMU_ARCH QEMU_ARCH_MICROBLAZE
+#elif defined(TARGET_MIPS)
+#define QEMU_ARCH QEMU_ARCH_MIPS
+#elif defined(TARGET_PPC)
+#define QEMU_ARCH QEMU_ARCH_PPC
+#elif defined(TARGET_S390X)
+#define QEMU_ARCH QEMU_ARCH_S390X
+#elif defined(TARGET_SH4)
+#define QEMU_ARCH QEMU_ARCH_SH4
+#elif defined(TARGET_SPARC)
+#define QEMU_ARCH QEMU_ARCH_SPARC
+#endif
+
+const uint32_t arch_type = QEMU_ARCH;
+
+#if 1
+/***********************************************************/
+/* ram save/restore */
+
+#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
+#define RAM_SAVE_FLAG_COMPRESS 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE     0x08
+#define RAM_SAVE_FLAG_EOS      0x10
+#define RAM_SAVE_FLAG_CONTINUE 0x20
+
+static int is_dup_page(uint8_t *page, uint8_t ch)
+{
+    uint32_t val = ch << 24 | ch << 16 | ch << 8 | ch;
+    uint32_t *array = (uint32_t *)page;
+    int i;
+
+    for (i = 0; i < (TARGET_PAGE_SIZE / 4); i++) {
+        if (array[i] != val) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+static RAMBlock *last_block;
+static ram_addr_t last_offset;
+
+static int ram_save_block(QEMUFile *f)
+{
+    RAMBlock *block = last_block;
+    ram_addr_t offset = last_offset;
+    ram_addr_t current_addr;
+    int bytes_sent = 0;
+
+    if (!block)
+        block = QLIST_FIRST(&ram_list.blocks);
+
+    current_addr = block->offset + offset;
+
+    do {
+        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+            uint8_t *p;
+            int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
+
+            cpu_physical_memory_reset_dirty(current_addr,
+                                            current_addr + TARGET_PAGE_SIZE,
+                                            MIGRATION_DIRTY_FLAG);
+
+            p = block->host + offset;
+
+            if (is_dup_page(p, *p)) {
+                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
+                if (!cont) {
+                    qemu_put_byte(f, strlen(block->idstr));
+                    qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                    strlen(block->idstr));
+                }
+                qemu_put_byte(f, *p);
+                bytes_sent = 1;
+            } else {
+                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
+                if (!cont) {
+                    qemu_put_byte(f, strlen(block->idstr));
+                    qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                    strlen(block->idstr));
+                }
+                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                bytes_sent = TARGET_PAGE_SIZE;
+            }
+
+            break;
+        }
+
+        offset += TARGET_PAGE_SIZE;
+        if (offset >= block->length) {
+            offset = 0;
+            block = QLIST_NEXT(block, next);
+            if (!block)
+                block = QLIST_FIRST(&ram_list.blocks);
+        }
+
+        current_addr = block->offset + offset;
+
+    } while (current_addr != last_block->offset + last_offset);
+
+    last_block = block;
+    last_offset = offset;
+
+    return bytes_sent;
+}
+
+static uint64_t bytes_transferred;
+
+static ram_addr_t ram_save_remaining(void)
+{
+    RAMBlock *block;
+    ram_addr_t count = 0;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        ram_addr_t addr;
+        for (addr = block->offset; addr < block->offset + block->length;
+             addr += TARGET_PAGE_SIZE) {
+            if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) {
+                count++;
+            }
+        }
+    }
+
+    return count;
+}
+
+uint64_t ram_bytes_remaining(void)
+{
+    return ram_save_remaining() * TARGET_PAGE_SIZE;
+}
+
+uint64_t ram_bytes_transferred(void)
+{
+    return bytes_transferred;
+}
+
+uint64_t ram_bytes_total(void)
+{
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next)
+        total += block->length;
+
+    return total;
+}
+
+static int block_compar(const void *a, const void *b)
+{
+    RAMBlock * const *ablock = a;
+    RAMBlock * const *bblock = b;
+    if ((*ablock)->offset < (*bblock)->offset) {
+        return -1;
+    } else if ((*ablock)->offset > (*bblock)->offset) {
+        return 1;
+    }
+    return 0;
+}
+
+static void sort_ram_list(void)
+{
+    RAMBlock *block, *nblock, **blocks;
+    int n;
+    n = 0;
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        ++n;
+    }
+    blocks = qemu_malloc(n * sizeof *blocks);
+    n = 0;
+    QLIST_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
+        blocks[n++] = block;
+        QLIST_REMOVE(block, next);
+    }
+    qsort(blocks, n, sizeof *blocks, block_compar);
+    while (--n >= 0) {
+        QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
+    }
+    qemu_free(blocks);
+}
+
+int ram_save_live(QEMUFile *f, int stage, void *opaque)
+{
+    ram_addr_t addr;
+    uint64_t bytes_transferred_last;
+    double bwidth = 0;
+    uint64_t expected_time = 0;
+
+    if (stage < 0) {
+        cpu_physical_memory_set_dirty_tracking(0);
+        return 0;
+    }
+
+    if (cpu_physical_sync_dirty_bitmap(0, TARGET_PHYS_ADDR_MAX) != 0) {
+        qemu_file_set_error(f);
+        return 0;
+    }
+
+    if (stage == 1) {
+        RAMBlock *block;
+        bytes_transferred = 0;
+        last_block = NULL;
+        last_offset = 0;
+        sort_ram_list();
+
+        /* Make sure all dirty bits are set */
+        QLIST_FOREACH(block, &ram_list.blocks, next) {
+            for (addr = block->offset; addr < block->offset + block->length;
+                 addr += TARGET_PAGE_SIZE) {
+                if (!cpu_physical_memory_get_dirty(addr,
+                                                   MIGRATION_DIRTY_FLAG)) {
+                    cpu_physical_memory_set_dirty(addr);
+                }
+            }
+        }
+
+        /* Enable dirty memory tracking */
+        cpu_physical_memory_set_dirty_tracking(1);
+
+        qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
+
+        QLIST_FOREACH(block, &ram_list.blocks, next) {
+            qemu_put_byte(f, strlen(block->idstr));
+            qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+            qemu_put_be64(f, block->length);
+        }
+    }
+
+    bytes_transferred_last = bytes_transferred;
+    bwidth = qemu_get_clock_ns(rt_clock);
+
+    while (!qemu_file_rate_limit(f)) {
+        int bytes_sent;
+
+        bytes_sent = ram_save_block(f);
+        bytes_transferred += bytes_sent;
+        if (bytes_sent == 0) { /* no more blocks */
+            break;
+        }
+    }
+
+    bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
+    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
+
+    /* if we haven't transferred anything this round, force expected_time to a
+     * a very high value, but without crashing */
+    if (bwidth == 0) {
+        bwidth = 0.000001;
+    }
+
+    /* try transferring iterative blocks of memory */
+    if (stage == 3) {
+        int bytes_sent;
+
+        /* flush all remaining blocks regardless of rate limiting */
+        while ((bytes_sent = ram_save_block(f)) != 0) {
+            bytes_transferred += bytes_sent;
+        }
+        cpu_physical_memory_set_dirty_tracking(0);
+    }
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+
+    return (stage == 2) && (expected_time <= migrate_max_downtime());
+}
+
+static inline void *host_from_stream_offset(QEMUFile *f,
+                                            ram_addr_t offset,
+                                            int flags)
+{
+    static RAMBlock *block = NULL;
+    char id[256];
+    uint8_t len;
+
+    if (flags & RAM_SAVE_FLAG_CONTINUE) {
+        if (!block) {
+            fprintf(stderr, "Ack, bad migration stream!\n");
+            return NULL;
+        }
+
+        return block->host + offset;
+    }
+
+    len = qemu_get_byte(f);
+    qemu_get_buffer(f, (uint8_t *)id, len);
+    id[len] = 0;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (!strncmp(id, block->idstr, sizeof(id)))
+            return block->host + offset;
+    }
+
+    fprintf(stderr, "Can't find block %s!\n", id);
+    return NULL;
+}
+
+int ram_load(QEMUFile *f, void *opaque, int version_id)
+{
+    ram_addr_t addr;
+    int flags;
+
+    if (version_id < 3 || version_id > 4) {
+        return -EINVAL;
+    }
+
+    do {
+        addr = qemu_get_be64(f);
+
+        flags = addr & ~TARGET_PAGE_MASK;
+        addr &= TARGET_PAGE_MASK;
+
+        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
+            if (version_id == 3) {
+                if (addr != ram_bytes_total()) {
+                    return -EINVAL;
+                }
+            } else {
+                /* Synchronize RAM block list */
+                char id[256];
+                ram_addr_t length;
+                ram_addr_t total_ram_bytes = addr;
+
+                while (total_ram_bytes) {
+                    RAMBlock *block;
+                    uint8_t len;
+
+                    len = qemu_get_byte(f);
+                    qemu_get_buffer(f, (uint8_t *)id, len);
+                    id[len] = 0;
+                    length = qemu_get_be64(f);
+
+                    QLIST_FOREACH(block, &ram_list.blocks, next) {
+                        if (!strncmp(id, block->idstr, sizeof(id))) {
+                            if (block->length != length)
+                                return -EINVAL;
+                            break;
+                        }
+                    }
+
+                    if (!block) {
+                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
+                                "accept migration\n", id);
+                        return -EINVAL;
+                    }
+
+                    total_ram_bytes -= length;
+                }
+            }
+        }
+
+        if (flags & RAM_SAVE_FLAG_COMPRESS) {
+            void *host;
+            uint8_t ch;
+
+            if (version_id == 3)
+                host = qemu_get_ram_ptr(addr);
+            else
+                host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
+
+            ch = qemu_get_byte(f);
+            memset(host, ch, TARGET_PAGE_SIZE);
+#ifndef _WIN32
+            if (ch == 0 &&
+                (!kvm_enabled() || kvm_has_sync_mmu())) {
+                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+            }
+#endif
+        } else if (flags & RAM_SAVE_FLAG_PAGE) {
+            void *host;
+
+            if (version_id == 3)
+                host = qemu_get_ram_ptr(addr);
+            else
+                host = host_from_stream_offset(f, addr, flags);
+
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        }
+        if (qemu_file_has_error(f)) {
+            return -EIO;
+        }
+    } while (!(flags & RAM_SAVE_FLAG_EOS));
+
+    return 0;
+}
+#endif
+
+void qemu_service_io(void)
+{
+    qemu_notify_event();
+}
+
+#ifdef HAS_AUDIO
+struct soundhw {
+    const char *name;
+    const char *descr;
+    int enabled;
+    int isa;
+    union {
+        int (*init_isa) (qemu_irq *pic);
+        int (*init_pci) (PCIBus *bus);
+    } init;
+};
+
+static struct soundhw soundhw[] = {
+#ifdef HAS_AUDIO_CHOICE
+#if defined(TARGET_I386) || defined(TARGET_MIPS)
+    {
+        "pcspk",
+        "PC speaker",
+        0,
+        1,
+        { .init_isa = pcspk_audio_init }
+    },
+#endif
+
+#ifdef CONFIG_SB16
+    {
+        "sb16",
+        "Creative Sound Blaster 16",
+        0,
+        1,
+        { .init_isa = SB16_init }
+    },
+#endif
+
+#ifdef CONFIG_CS4231A
+    {
+        "cs4231a",
+        "CS4231A",
+        0,
+        1,
+        { .init_isa = cs4231a_init }
+    },
+#endif
+
+#ifdef CONFIG_ADLIB
+    {
+        "adlib",
+#ifdef HAS_YMF262
+        "Yamaha YMF262 (OPL3)",
+#else
+        "Yamaha YM3812 (OPL2)",
+#endif
+        0,
+        1,
+        { .init_isa = Adlib_init }
+    },
+#endif
+
+#ifdef CONFIG_GUS
+    {
+        "gus",
+        "Gravis Ultrasound GF1",
+        0,
+        1,
+        { .init_isa = GUS_init }
+    },
+#endif
+
+#ifdef CONFIG_AC97
+    {
+        "ac97",
+        "Intel 82801AA AC97 Audio",
+        0,
+        0,
+        { .init_pci = ac97_init }
+    },
+#endif
+
+#ifdef CONFIG_ES1370
+    {
+        "es1370",
+        "ENSONIQ AudioPCI ES1370",
+        0,
+        0,
+        { .init_pci = es1370_init }
+    },
+#endif
+
+#ifdef CONFIG_HDA
+    {
+        "hda",
+        "Intel HD Audio",
+        0,
+        0,
+        { .init_pci = intel_hda_and_codec_init }
+    },
+#endif
+
+#endif /* HAS_AUDIO_CHOICE */
+
+    { NULL, NULL, 0, 0, { NULL } }
+};
+
+void select_soundhw(const char *optarg)
+{
+    struct soundhw *c;
+
+    if (*optarg == '?') {
+    show_valid_cards:
+
+        printf("Valid sound card names (comma separated):\n");
+        for (c = soundhw; c->name; ++c) {
+            printf ("%-11s %s\n", c->name, c->descr);
+        }
+        printf("\n-soundhw all will enable all of the above\n");
+        exit(*optarg != '?');
+    }
+    else {
+        size_t l;
+        const char *p;
+        char *e;
+        int bad_card = 0;
+
+        if (!strcmp(optarg, "all")) {
+            for (c = soundhw; c->name; ++c) {
+                c->enabled = 1;
+            }
+            return;
+        }
+
+        p = optarg;
+        while (*p) {
+            e = strchr(p, ',');
+            l = !e ? strlen(p) : (size_t) (e - p);
+
+            for (c = soundhw; c->name; ++c) {
+                if (!strncmp(c->name, p, l) && !c->name[l]) {
+                    c->enabled = 1;
+                    break;
+                }
+            }
+
+            if (!c->name) {
+                if (l > 80) {
+                    fprintf(stderr,
+                            "Unknown sound card name (too big to show)\n");
+                }
+                else {
+                    fprintf(stderr, "Unknown sound card name `%.*s'\n",
+                            (int) l, p);
+                }
+                bad_card = 1;
+            }
+            p += l + (e != NULL);
+        }
+
+        if (bad_card) {
+            goto show_valid_cards;
+        }
+    }
+}
+
+void audio_init(qemu_irq *isa_pic, PCIBus *pci_bus)
+{
+    struct soundhw *c;
+
+    for (c = soundhw; c->name; ++c) {
+        if (c->enabled) {
+            if (c->isa) {
+                if (isa_pic) {
+                    c->init.init_isa(isa_pic);
+                }
+            } else {
+                if (pci_bus) {
+                    c->init.init_pci(pci_bus);
+                }
+            }
+        }
+    }
+}
+#else
+void select_soundhw(const char *optarg)
+{
+}
+void audio_init(qemu_irq *isa_pic, PCIBus *pci_bus)
+{
+}
+#endif
+
+int qemu_uuid_parse(const char *str, uint8_t *uuid)
+{
+    int ret;
+
+    if (strlen(str) != 36) {
+        return -1;
+    }
+
+    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
+                 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
+                 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
+                 &uuid[15]);
+
+    if (ret != 16) {
+        return -1;
+    }
+#ifdef TARGET_I386
+    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
+#endif
+    return 0;
+}
+
+#if 0
+void do_acpitable_option(const char *optarg)
+{
+#ifdef TARGET_I386
+    if (acpi_table_add(optarg) < 0) {
+        fprintf(stderr, "Wrong acpi table provided\n");
+        exit(1);
+    }
+#endif
+}
+#endif
+
+void do_smbios_option(const char *optarg)
+{
+#ifdef TARGET_I386
+    if (smbios_entry_add(optarg) < 0) {
+        fprintf(stderr, "Wrong smbios provided\n");
+        exit(1);
+    }
+#endif
+}
+
+void cpudef_init(void)
+{
+#if defined(cpudef_setup)
+    cpudef_setup(); /* parse cpu definitions in target config file */
+#endif
+}
+
+int audio_available(void)
+{
+#ifdef HAS_AUDIO
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int kvm_available(void)
+{
+#ifdef CONFIG_KVM
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int xen_available(void)
+{
+#ifdef CONFIG_XEN
+    return 1;
+#else
+    return 0;
+#endif
+}
diff --git a/arch_init.h b/arch_init.h
new file mode 100644
index 0000000..86ebc14
--- /dev/null
+++ b/arch_init.h
@@ -0,0 +1,33 @@
+#ifndef QEMU_ARCH_INIT_H
+#define QEMU_ARCH_INIT_H
+
+extern const char arch_config_name[];
+
+enum {
+    QEMU_ARCH_ALL = -1,
+    QEMU_ARCH_ALPHA = 1,
+    QEMU_ARCH_ARM = 2,
+    QEMU_ARCH_CRIS = 4,
+    QEMU_ARCH_I386 = 8,
+    QEMU_ARCH_M68K = 16,
+    QEMU_ARCH_LM32 = 32,
+    QEMU_ARCH_MICROBLAZE = 64,
+    QEMU_ARCH_MIPS = 128,
+    QEMU_ARCH_PPC = 256,
+    QEMU_ARCH_S390X = 512,
+    QEMU_ARCH_SH4 = 1024,
+    QEMU_ARCH_SPARC = 2048,
+};
+
+extern const uint32_t arch_type;
+
+void select_soundhw(const char *optarg);
+void do_acpitable_option(const char *optarg);
+void do_smbios_option(const char *optarg);
+void cpudef_init(void);
+int audio_available(void);
+void audio_init(qemu_irq *isa_pic, PCIBus *pci_bus);
+int kvm_available(void);
+int xen_available(void);
+
+#endif
diff --git a/arm-dis.c b/arm-dis.c
index fe7ac99..3ece02c 100644
--- a/arm-dis.c
+++ b/arm-dis.c
@@ -1587,7 +1587,7 @@
 }
 
 static void
-arm_decode_shift (long given, fprintf_ftype func, void *stream,
+arm_decode_shift (long given, fprintf_function func, void *stream,
 		  int print_shift)
 {
   func (stream, "%s", arm_regnames[given & 0xf]);
@@ -1633,7 +1633,7 @@
 {
   const struct opcode32 *insn;
   void *stream = info->stream;
-  fprintf_ftype func = info->fprintf_func;
+  fprintf_function func = info->fprintf_func;
   unsigned long mask;
   unsigned long value;
   int cond;
@@ -2127,7 +2127,7 @@
 print_arm_address (bfd_vma pc, struct disassemble_info *info, long given)
 {
   void *stream = info->stream;
-  fprintf_ftype func = info->fprintf_func;
+  fprintf_function func = info->fprintf_func;
 
   if (((given & 0x000f0000) == 0x000f0000)
       && ((given & 0x02000000) == 0))
@@ -2222,7 +2222,7 @@
 {
   const struct opcode32 *insn;
   void *stream = info->stream;
-  fprintf_ftype func = info->fprintf_func;
+  fprintf_function func = info->fprintf_func;
 
   if (thumb)
     {
@@ -2676,7 +2676,7 @@
 {
   const struct opcode32 *insn;
   void *stream = info->stream;
-  fprintf_ftype func = info->fprintf_func;
+  fprintf_function func = info->fprintf_func;
 
   if (print_insn_coprocessor (pc, info, given, false))
     return;
@@ -3036,7 +3036,7 @@
 {
   const struct opcode16 *insn;
   void *stream = info->stream;
-  fprintf_ftype func = info->fprintf_func;
+  fprintf_function func = info->fprintf_func;
 
   for (insn = thumb_opcodes; insn->assembler; insn++)
     if ((given & insn->mask) == insn->value)
@@ -3312,7 +3312,7 @@
 {
   const struct opcode32 *insn;
   void *stream = info->stream;
-  fprintf_ftype func = info->fprintf_func;
+  fprintf_function func = info->fprintf_func;
 
   if (print_insn_coprocessor (pc, info, given, true))
     return;
@@ -4101,6 +4101,30 @@
        addresses, since the addend is not currently pc-relative.  */
     pc = 0;
 
+  /* We include the hexdump of the instruction. The format here
+     matches that used by objdump and the ARM ARM (in particular,
+     32 bit Thumb instructions are displayed as pairs of halfwords,
+     not as a single word.)  */
+  if (is_thumb)
+    {
+      if (size == 2)
+	{
+	  info->fprintf_func(info->stream, "%04lx       ",
+			     ((unsigned long)given) & 0xffff);
+	}
+      else
+	{
+	  info->fprintf_func(info->stream, "%04lx %04lx  ",
+			     (((unsigned long)given) >> 16) & 0xffff,
+			     ((unsigned long)given) & 0xffff);
+	}
+    }
+  else
+    {
+      info->fprintf_func(info->stream, "%08lx      ",
+			 ((unsigned long)given) & 0xffffffff);
+    }
+
   printer (pc, info, given);
 
   if (is_thumb)
diff --git a/arm-semi.c b/arm-semi.c
index de652c5..421d41a 100644
--- a/arm-semi.c
+++ b/arm-semi.c
@@ -33,7 +33,6 @@
 #define ARM_ANGEL_HEAP_SIZE (128 * 1024 * 1024)
 #else
 #include "qemu-common.h"
-#include "sysemu.h"
 #include "gdbstub.h"
 #endif
 
@@ -373,45 +372,64 @@
 #ifdef CONFIG_USER_ONLY
         /* Build a commandline from the original argv.  */
         {
-            char **arg = ts->info->host_argv;
-            int len = ARG(1);
-            /* lock the buffer on the ARM side */
-            char *cmdline_buffer = (char*)lock_user(VERIFY_WRITE, ARG(0), len, 0);
+            char *arm_cmdline_buffer;
+            const char *host_cmdline_buffer;
 
-            if (!cmdline_buffer)
-                /* FIXME - should this error code be -TARGET_EFAULT ? */
-                return (uint32_t)-1;
+            unsigned int i;
+            unsigned int arm_cmdline_len = ARG(1);
+            unsigned int host_cmdline_len =
+                ts->info->arg_end-ts->info->arg_start;
 
-            s = cmdline_buffer;
-            while (*arg && len > 2) {
-                int n = strlen(*arg);
-
-                if (s != cmdline_buffer) {
-                    *(s++) = ' ';
-                    len--;
-                }
-                if (n >= len)
-                    n = len - 1;
-                memcpy(s, *arg, n);
-                s += n;
-                len -= n;
-                arg++;
+            if (!arm_cmdline_len || host_cmdline_len > arm_cmdline_len) {
+                return -1; /* not enough space to store command line */
             }
-            /* Null terminate the string.  */
-            *s = 0;
-            len = s - cmdline_buffer;
 
-            /* Unlock the buffer on the ARM side.  */
-            unlock_user(cmdline_buffer, ARG(0), len);
+            if (!host_cmdline_len) {
+                /* We special-case the "empty command line" case (argc==0).
+                   Just provide the terminating 0. */
+                arm_cmdline_buffer = lock_user(VERIFY_WRITE, ARG(0), 1, 0);
+                arm_cmdline_buffer[0] = 0;
+                unlock_user(arm_cmdline_buffer, ARG(0), 1);
 
-            /* Adjust the commandline length argument.  */
-            SET_ARG(1, len);
+                /* Adjust the commandline length argument. */
+                SET_ARG(1, 0);
+                return 0;
+            }
 
-            /* Return success if commandline fit into buffer.  */
-            return *arg ? -1 : 0;
+            /* lock the buffers on the ARM side */
+            arm_cmdline_buffer =
+                lock_user(VERIFY_WRITE, ARG(0), host_cmdline_len, 0);
+            host_cmdline_buffer =
+                lock_user(VERIFY_READ, ts->info->arg_start,
+                                       host_cmdline_len, 1);
+
+            if (arm_cmdline_buffer && host_cmdline_buffer)
+            {
+                /* the last argument is zero-terminated;
+                   no need for additional termination */
+                memcpy(arm_cmdline_buffer, host_cmdline_buffer,
+                       host_cmdline_len);
+
+                /* separate arguments by white spaces */
+                for (i = 0; i < host_cmdline_len-1; i++) {
+                    if (arm_cmdline_buffer[i] == 0) {
+                        arm_cmdline_buffer[i] = ' ';
+                    }
+                }
+
+                /* Adjust the commandline length argument. */
+                SET_ARG(1, host_cmdline_len-1);
+            }
+
+            /* Unlock the buffers on the ARM side.  */
+            unlock_user(arm_cmdline_buffer, ARG(0), host_cmdline_len);
+            unlock_user((void*)host_cmdline_buffer, ts->info->arg_start, 0);
+
+            /* Return success if we could return a commandline.  */
+            return (arm_cmdline_buffer && host_cmdline_buffer) ? 0 : -1;
         }
 #else
-      return -1;
+        return -1;
 #endif
     case SYS_HEAPINFO:
         {
diff --git a/audio/audio.c b/audio/audio.c
index 3e60c12..7acd3d7 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1198,7 +1198,7 @@
     AudioState *s = opaque;
 #if 0
 #define  MAX_DIFFS  100
-    int64_t         now  = qemu_get_clock(vm_clock);
+    int64_t         now  = qemu_get_clock_ms(vm_clock);
     static int64_t  last = 0;
     static float    diffs[MAX_DIFFS];
     static int      num_diffs;
@@ -1227,7 +1227,7 @@
 #endif
 
     audio_run ("timer");
-    qemu_mod_timer (s->ts, qemu_get_clock (vm_clock) + conf.period.ticks);
+    qemu_mod_timer (s->ts, qemu_get_clock_ns (vm_clock) + conf.period.ticks);
 }
 
 
@@ -1250,7 +1250,7 @@
     AudioState *s = &glob_audio_state;
 
     if (audio_is_timer_needed ()) {
-        qemu_mod_timer (s->ts, qemu_get_clock (vm_clock) + 1);
+        qemu_mod_timer (s->ts, qemu_get_clock_ns (vm_clock) + 1);
     }
     else {
         qemu_del_timer (s->ts);
@@ -1964,7 +1964,7 @@
     QLIST_INIT (&s->cap_head);
     atexit (audio_atexit);
 
-    s->ts = qemu_new_timer (vm_clock, audio_timer, s);
+    s->ts = qemu_new_timer_ns (vm_clock, audio_timer, s);
     if (!s->ts) {
         dolog ("Could not create audio timer\n");
         return;
diff --git a/audio/audio_int.h b/audio/audio_int.h
index c6afe81..2ec0bf0 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -240,13 +240,9 @@
 }
 
 #if defined __GNUC__
-#define GCC_ATTR __attribute__ ((__unused__, __format__ (__printf__, 1, 2)))
 #define INIT_FIELD(f) . f
-#define GCC_FMT_ATTR(n, m) __attribute__ ((__format__ (__printf__, n, m)))
 #else
-#define GCC_ATTR /**/
 #define INIT_FIELD(f) /**/
-#define GCC_FMT_ATTR(n, m)
 #endif
 
 static void GCC_ATTR dolog (const char *fmt, ...)
diff --git a/bswap.h b/bswap.h
index 20caae6..82a7951 100644
--- a/bswap.h
+++ b/bswap.h
@@ -144,6 +144,7 @@
 
 #define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
 #define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
+#define cpu_to_be64wu(p, v) cpu_to_be64w(p, v)
 
 #else
 
@@ -201,6 +202,20 @@
     p1[3] = v & 0xff;
 }
 
+static inline void cpu_to_be64wu(uint64_t *p, uint64_t v)
+{
+    uint8_t *p1 = (uint8_t *)p;
+
+    p1[0] = v >> 56;
+    p1[1] = v >> 48;
+    p1[2] = v >> 40;
+    p1[3] = v >> 32;
+    p1[4] = v >> 24;
+    p1[5] = v >> 16;
+    p1[6] = v >> 8;
+    p1[7] = v & 0xff;
+}
+
 #endif
 
 #ifdef HOST_WORDS_BIGENDIAN
diff --git a/bt-host.c b/bt-host.c
index 6931e7c..095254d 100644
--- a/bt-host.c
+++ b/bt-host.c
@@ -19,7 +19,6 @@
 
 #include "qemu-common.h"
 #include "qemu-char.h"
-#include "sysemu.h"
 #include "net.h"
 #include "bt-host.h"
 
diff --git a/bt-vhci.c b/bt-vhci.c
index 679c5e0..3c57720 100644
--- a/bt-vhci.c
+++ b/bt-vhci.c
@@ -19,7 +19,6 @@
 
 #include "qemu-common.h"
 #include "qemu-char.h"
-#include "sysemu.h"
 #include "net.h"
 #include "hw/bt.h"
 
diff --git a/buffered_file.c b/buffered_file.c
index 1836e7e..a661570 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -14,7 +14,6 @@
 #include "qemu-common.h"
 #include "hw/hw.h"
 #include "qemu-timer.h"
-#include "sysemu.h"
 #include "qemu-char.h"
 #include "buffered_file.h"
 
@@ -206,23 +205,26 @@
     return 0;
 }
 
-static size_t buffered_set_rate_limit(void *opaque, size_t new_rate)
+static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate)
 {
     QEMUFileBuffered *s = opaque;
-
     if (s->has_error)
         goto out;
 
+    if (new_rate > SIZE_MAX) {
+        new_rate = SIZE_MAX;
+    }
+
     s->xfer_limit = new_rate / 10;
-    
+
 out:
     return s->xfer_limit;
 }
 
-static size_t buffered_get_rate_limit(void *opaque)
+static int64_t buffered_get_rate_limit(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
-  
+
     return s->xfer_limit;
 }
 
@@ -235,7 +237,7 @@
         return;
     }
 
-    qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 100);
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
 
     if (s->freeze_output)
         return;
@@ -271,9 +273,9 @@
                              buffered_set_rate_limit,
 			     buffered_get_rate_limit);
 
-    s->timer = qemu_new_timer(rt_clock, buffered_rate_tick, s);
+    s->timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s);
 
-    qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 100);
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
 
     return s->file;
 }
diff --git a/cache-utils.h b/cache-utils.h
index b45fde4..0b65907 100644
--- a/cache-utils.h
+++ b/cache-utils.h
@@ -9,7 +9,7 @@
 
 extern struct qemu_cache_conf qemu_cache_conf;
 
-extern void qemu_cache_utils_init(char **envp);
+void qemu_cache_utils_init(char **envp);
 
 /* mildly adjusted code from tcg-dyngen.c */
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
diff --git a/compatfd.c b/compatfd.c
index 46b0ae7..bd377c4 100644
--- a/compatfd.c
+++ b/compatfd.c
@@ -26,45 +26,45 @@
 static void *sigwait_compat(void *opaque)
 {
     struct sigfd_compat_info *info = opaque;
-    int err;
     sigset_t all;
 
     sigfillset(&all);
     sigprocmask(SIG_BLOCK, &all, NULL);
 
-    do {
-	siginfo_t siginfo;
+    while (1) {
+        int sig;
+        int err;
 
-	err = sigwaitinfo(&info->mask, &siginfo);
-	if (err == -1 && errno == EINTR) {
-            err = 0;
-            continue;
+        err = sigwait(&info->mask, &sig);
+        if (err != 0) {
+            if (errno == EINTR) {
+                continue;
+            } else {
+                return NULL;
+            }
+        } else {
+            struct qemu_signalfd_siginfo buffer;
+            size_t offset = 0;
+
+            memset(&buffer, 0, sizeof(buffer));
+            buffer.ssi_signo = sig;
+
+            while (offset < sizeof(buffer)) {
+                ssize_t len;
+
+                len = write(info->fd, (char *)&buffer + offset,
+                            sizeof(buffer) - offset);
+                if (len == -1 && errno == EINTR)
+                    continue;
+
+                if (len <= 0) {
+                    return NULL;
+                }
+
+                offset += len;
+            }
         }
-
-	if (err > 0) {
-	    char buffer[128];
-	    size_t offset = 0;
-
-	    memcpy(buffer, &err, sizeof(err));
-	    while (offset < sizeof(buffer)) {
-		ssize_t len;
-
-		len = write(info->fd, buffer + offset,
-			    sizeof(buffer) - offset);
-		if (len == -1 && errno == EINTR)
-		    continue;
-
-		if (len <= 0) {
-		    err = -1;
-		    break;
-		}
-
-		offset += len;
-	    }
-	}
-    } while (err >= 0);
-
-    return NULL;
+    }
 }
 
 static int qemu_signalfd_compat(const sigset_t *mask)
@@ -76,15 +76,18 @@
 
     info = malloc(sizeof(*info));
     if (info == NULL) {
-	errno = ENOMEM;
-	return -1;
+        errno = ENOMEM;
+        return -1;
     }
 
     if (pipe(fds) == -1) {
-	free(info);
-	return -1;
+        free(info);
+        return -1;
     }
 
+    qemu_set_cloexec(fds[0]);
+    qemu_set_cloexec(fds[1]);
+
     memcpy(&info->mask, mask, sizeof(*mask));
     info->fd = fds[1];
 
@@ -100,29 +103,15 @@
 
 int qemu_signalfd(const sigset_t *mask)
 {
-#if defined(SYS_signalfd)
+#if defined(CONFIG_SIGNALFD)
     int ret;
 
     ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
-    if (!(ret == -1 && errno == ENOSYS))
-	return ret;
+    if (ret != -1) {
+        qemu_set_cloexec(ret);
+        return ret;
+    }
 #endif
 
     return qemu_signalfd_compat(mask);
 }
-
-int qemu_eventfd(int *fds)
-{
-#if defined(SYS_eventfd)
-    int ret;
-
-    ret = syscall(SYS_eventfd, 0);
-    if (ret >= 0) {
-	fds[0] = fds[1] = ret;
-	return 0;
-    } else if (!(ret == -1 && errno == ENOSYS))
-	return ret;
-#endif
-
-    return pipe(fds);
-}
diff --git a/compatfd.h b/compatfd.h
index 55a111a..fc37915 100644
--- a/compatfd.h
+++ b/compatfd.h
@@ -17,12 +17,27 @@
 #include <signal.h>
 
 struct qemu_signalfd_siginfo {
-    uint32_t ssi_signo;
-    uint8_t pad[124];
+    uint32_t ssi_signo;   /* Signal number */
+    int32_t  ssi_errno;   /* Error number (unused) */
+    int32_t  ssi_code;    /* Signal code */
+    uint32_t ssi_pid;     /* PID of sender */
+    uint32_t ssi_uid;     /* Real UID of sender */
+    int32_t  ssi_fd;      /* File descriptor (SIGIO) */
+    uint32_t ssi_tid;     /* Kernel timer ID (POSIX timers) */
+    uint32_t ssi_band;    /* Band event (SIGIO) */
+    uint32_t ssi_overrun; /* POSIX timer overrun count */
+    uint32_t ssi_trapno;  /* Trap number that caused signal */
+    int32_t  ssi_status;  /* Exit status or signal (SIGCHLD) */
+    int32_t  ssi_int;     /* Integer sent by sigqueue(2) */
+    uint64_t ssi_ptr;     /* Pointer sent by sigqueue(2) */
+    uint64_t ssi_utime;   /* User CPU time consumed (SIGCHLD) */
+    uint64_t ssi_stime;   /* System CPU time consumed (SIGCHLD) */
+    uint64_t ssi_addr;    /* Address that generated signal
+                             (for hardware-generated signals) */
+    uint8_t  pad[48];     /* Pad size to 128 bytes (allow for
+                             additional fields in the future) */
 };
 
 int qemu_signalfd(const sigset_t *mask);
 
-int qemu_eventfd(int *fds);
-
 #endif
diff --git a/console.c b/console.c
index f6694b2..a94aecd 100644
--- a/console.c
+++ b/console.c
@@ -32,6 +32,39 @@
 #define QEMU_RGBA(r, g, b, a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 #define QEMU_RGB(r, g, b) QEMU_RGBA(r, g, b, 0xff)
 
+#ifdef CONFIG_SKINNING
+// Skinning overwrites these functions to put the skin in between
+DisplayState *qemu_graphic_console_init(vga_hw_update_ptr update,
+                                   vga_hw_invalidate_ptr invalidate,
+                                   vga_hw_screen_dump_ptr screen_dump,
+                                   vga_hw_text_update_ptr text_update,
+                                   void *opaque);
+#undef graphic_console_init
+#define graphic_console_init qemu_graphic_console_init
+
+void original_qemu_console_resize(DisplayState *ds, int width, int height);
+#undef qemu_console_resize
+#define qemu_console_resize original_qemu_console_resize
+#endif
+
+int multitouch_enabled = 0;
+static QEMUDisplayCloseCallback *qemu_display_close_callback = NULL;
+static void *qemu_display_close_callback_opaque = NULL;
+
+void qemu_set_display_close_handler(QEMUDisplayCloseCallback *cb, void *opaque)
+{
+    qemu_display_close_callback = cb;
+    qemu_display_close_callback_opaque = opaque;
+}
+
+int qemu_run_display_close_handler(void)
+{
+    if (qemu_display_close_callback != NULL) {
+        return qemu_display_close_callback(qemu_display_close_callback_opaque);
+    }
+    return 1;
+}
+
 typedef struct TextAttributes {
     uint8_t fgcol:4;
     uint8_t bgcol:4;
@@ -137,6 +170,7 @@
     TextAttributes t_attrib; /* currently active text attributes */
     TextCell *cells;
     int text_x[2], text_y[2], cursor_invalidate;
+    int echo;
 
     int update_x0;
     int update_y0;
@@ -235,8 +269,8 @@
     return color;
 }
 
-static void vga_fill_rect (DisplayState *ds,
-                           int posx, int posy, int width, int height, uint32_t color)
+void vga_fill_rect(DisplayState *ds, int posx, int posy,
+                    int width, int height, uint32_t color)
 {
     uint8_t *d, *d1;
     int x, y, bpp;
@@ -1069,8 +1103,10 @@
 
     if (index >= MAX_CONSOLES)
         return;
+    if (active_console) {
     active_console->g_width = ds_get_width(active_console->ds);
     active_console->g_height = ds_get_height(active_console->ds);
+    }
     s = consoles[index];
     if (s) {
         DisplayState *ds = s->ds;
@@ -1141,7 +1177,7 @@
     /* characters are pending: we send them a bit later (XXX:
        horrible, should change char device API) */
     if (s->out_fifo.count > 0) {
-        qemu_mod_timer(s->kbd_timer, qemu_get_clock(rt_clock) + 1);
+        qemu_mod_timer(s->kbd_timer, qemu_get_clock_ms(rt_clock) + 1);
     }
 }
 
@@ -1184,9 +1220,15 @@
             *q++ = '\033';
             *q++ = '[';
             *q++ = keysym & 0xff;
+        } else if (s->echo && (keysym == '\r' || keysym == '\n')) {
+            console_puts(s->chr, (const uint8_t *) "\r", 1);
+            *q++ = '\n';
         } else {
                 *q++ = keysym;
         }
+        if (s->echo) {
+            console_puts(s->chr, buf, q - buf);
+        }
         if (s->chr->chr_read) {
             qemu_fifo_write(&s->out_fifo, buf, q - buf);
             kbd_send_chars(s);
@@ -1278,38 +1320,40 @@
 {
     DisplaySurface *surface = (DisplaySurface*) qemu_mallocz(sizeof(DisplaySurface));
 
-    surface->width = width;
-    surface->height = height;
-    surface->linesize = width * 4;
-    surface->pf = qemu_default_pixelformat(32);
-#ifdef HOST_WORDS_BIGENDIAN
-    surface->flags = QEMU_ALLOCATED_FLAG | QEMU_BIG_ENDIAN_FLAG;
-#else
-    surface->flags = QEMU_ALLOCATED_FLAG;
-#endif
-    surface->data = (uint8_t*) qemu_mallocz(surface->linesize * surface->height);
-
+    int linesize = width * 4;
+    qemu_alloc_display(surface, width, height, linesize,
+                       qemu_default_pixelformat(32), 0);
     return surface;
 }
 
 static DisplaySurface* defaultallocator_resize_displaysurface(DisplaySurface *surface,
                                           int width, int height)
 {
+    int linesize = width * 4;
+    qemu_alloc_display(surface, width, height, linesize,
+                       qemu_default_pixelformat(32), 0);
+    return surface;
+}
+
+void qemu_alloc_display(DisplaySurface *surface, int width, int height,
+                        int linesize, PixelFormat pf, int newflags)
+{
+    void *data;
     surface->width = width;
     surface->height = height;
-    surface->linesize = width * 4;
-    surface->pf = qemu_default_pixelformat(32);
-    if (surface->flags & QEMU_ALLOCATED_FLAG)
-        surface->data = (uint8_t*) qemu_realloc(surface->data, surface->linesize * surface->height);
-    else
-        surface->data = (uint8_t*) qemu_malloc(surface->linesize * surface->height);
+    surface->linesize = linesize;
+    surface->pf = pf;
+    if (surface->flags & QEMU_ALLOCATED_FLAG) {
+        data = qemu_realloc(surface->data,
+                            surface->linesize * surface->height);
+    } else {
+        data = qemu_malloc(surface->linesize * surface->height);
+    }
+    surface->data = (uint8_t *)data;
+    surface->flags = newflags | QEMU_ALLOCATED_FLAG;
 #ifdef HOST_WORDS_BIGENDIAN
-    surface->flags = QEMU_ALLOCATED_FLAG | QEMU_BIG_ENDIAN_FLAG;
-#else
-    surface->flags = QEMU_ALLOCATED_FLAG;
+    surface->flags |= QEMU_BIG_ENDIAN_FLAG;
 #endif
-
-    return surface;
 }
 
 DisplaySurface* qemu_create_displaysurface_from(int width, int height, int bpp,
@@ -1441,43 +1485,27 @@
 
 static int n_text_consoles;
 static CharDriverState *text_consoles[128];
-static QemuOpts *text_console_opts[128];
 
-static void text_console_do_init(CharDriverState *chr, DisplayState *ds, QemuOpts *opts)
+static void text_console_set_echo(CharDriverState *chr, bool echo)
+{
+    TextConsole *s = chr->opaque;
+
+    s->echo = echo;
+}
+
+static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
 {
     TextConsole *s;
-    unsigned width;
-    unsigned height;
     static int color_inited;
 
-    width = qemu_opt_get_number(opts, "width", 0);
-    if (width == 0)
-        width = qemu_opt_get_number(opts, "cols", 0) * FONT_WIDTH;
+    s = chr->opaque;
 
-    height = qemu_opt_get_number(opts, "height", 0);
-    if (height == 0)
-        height = qemu_opt_get_number(opts, "rows", 0) * FONT_HEIGHT;
-
-    if (width == 0 || height == 0) {
-        s = new_console(ds, TEXT_CONSOLE);
-        width = ds_get_width(s->ds);
-        height = ds_get_height(s->ds);
-    } else {
-        s = new_console(ds, TEXT_CONSOLE_FIXED_SIZE);
-    }
-
-    if (!s) {
-        free(chr);
-        return;
-    }
-    chr->opaque = s;
     chr->chr_write = console_puts;
     chr->chr_send_event = console_send_event;
 
-    s->chr = chr;
     s->out_fifo.buf = s->out_fifo_buf;
     s->out_fifo.buf_size = sizeof(s->out_fifo_buf);
-    s->kbd_timer = qemu_new_timer(rt_clock, kbd_send_chars, s);
+    s->kbd_timer = qemu_new_timer_ms(rt_clock, kbd_send_chars, s);
     s->ds = ds;
 
     if (!color_inited) {
@@ -1489,8 +1517,10 @@
     s->total_height = DEFAULT_BACKSCROLL;
     s->x = 0;
     s->y = 0;
-    s->g_width = width;
-    s->g_height = height;
+    if (s->console_type == TEXT_CONSOLE) {
+        s->g_width = ds_get_width(s->ds);
+        s->g_height = ds_get_height(s->ds);
+    }
 
     s->hw_invalidate = text_console_invalidate;
     s->hw_text_update = text_console_update;
@@ -1526,6 +1556,9 @@
 CharDriverState *text_console_init(QemuOpts *opts)
 {
     CharDriverState *chr;
+    TextConsole *s;
+    unsigned width;
+    unsigned height;
 
     chr = qemu_mallocz(sizeof(CharDriverState));
 
@@ -1534,9 +1567,32 @@
         exit(1);
     }
     text_consoles[n_text_consoles] = chr;
-    text_console_opts[n_text_consoles] = opts;
     n_text_consoles++;
 
+    width = qemu_opt_get_number(opts, "width", 0);
+    if (width == 0)
+        width = qemu_opt_get_number(opts, "cols", 0) * FONT_WIDTH;
+
+    height = qemu_opt_get_number(opts, "height", 0);
+    if (height == 0)
+        height = qemu_opt_get_number(opts, "rows", 0) * FONT_HEIGHT;
+
+    if (width == 0 || height == 0) {
+        s = new_console(NULL, TEXT_CONSOLE);
+    } else {
+        s = new_console(NULL, TEXT_CONSOLE_FIXED_SIZE);
+    }
+
+    if (!s) {
+        free(chr);
+        return NULL;
+    }
+
+    s->chr = chr;
+    s->g_width = width;
+    s->g_height = height;
+    chr->opaque = s;
+    chr->chr_set_echo = text_console_set_echo;
     return chr;
 }
 
@@ -1577,9 +1633,7 @@
     int i;
 
     for (i = 0; i < n_text_consoles; i++) {
-        text_console_do_init(text_consoles[i], ds, text_console_opts[i]);
-        qemu_opts_del(text_console_opts[i]);
-        text_console_opts[i] = NULL;
+        text_console_do_init(text_consoles[i], ds);
     }
 
     n_text_consoles = 0;
diff --git a/console.h b/console.h
index eda9207..97755ab 100644
--- a/console.h
+++ b/console.h
@@ -10,6 +10,7 @@
 #define MOUSE_EVENT_LBUTTON 0x01
 #define MOUSE_EVENT_RBUTTON 0x02
 #define MOUSE_EVENT_MBUTTON 0x04
+extern int multitouch_enabled;
 
 /* identical to the ps/2 keyboard bits */
 #define QEMU_SCROLL_LOCK_LED (1 << 0)
@@ -23,6 +24,10 @@
 #define GUI_REFRESH_INTERVAL 30
 #endif
 
+typedef int QEMUDisplayCloseCallback(void *opaque);
+void qemu_set_display_close_handler(QEMUDisplayCloseCallback *cb, void *opaque);
+int qemu_run_display_close_handler(void);
+
 typedef void QEMUPutKBDEvent(void *opaque, int keycode);
 typedef void QEMUPutLEDEvent(void *opaque, int ledstate);
 typedef void QEMUPutMouseEvent(void *opaque, int dx, int dy, int dz, int buttons_state);
@@ -39,6 +44,14 @@
     QTAILQ_ENTRY(QEMUPutMouseEntry) node;
 } QEMUPutMouseEntry;
 
+typedef struct QEMUPutKBDEntry {
+    QEMUPutKBDEvent *put_kbd_event;
+    void *opaque;
+
+    /* used internally by qemu for handling keyboards */
+    QTAILQ_ENTRY(QEMUPutKBDEntry) next;
+} QEMUPutKBDEntry;
+
 typedef struct QEMUPutLEDEntry {
     QEMUPutLEDEvent *put_led;
     void *opaque;
@@ -46,7 +59,7 @@
 } QEMUPutLEDEntry;
 
 void qemu_add_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque);
-void qemu_remove_kbd_event_handler(void);
+void qemu_remove_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque);
 QEMUPutMouseEntry *qemu_add_mouse_event_handler(QEMUPutMouseEvent *func,
                                                 void *opaque, int absolute,
                                                 const char *name);
@@ -165,7 +178,13 @@
     void (*dpy_fill)(struct DisplayState *s, int x, int y,
                      int w, int h, uint32_t c);
     void (*dpy_text_cursor)(struct DisplayState *s, int x, int y);
-
+#ifdef CONFIG_GLES2
+    void (*dpy_updatecaption)(void);
+#endif
+#ifdef CONFIG_SKINNING
+    void (*dpy_enablezoom)(struct DisplayState *s, int width, int height);
+    void (*dpy_getresolution)(int *width, int *height);
+#endif
     struct DisplayChangeListener *next;
 };
 
@@ -204,10 +223,14 @@
     struct DisplayState *next;
 };
 
+void vga_fill_rect (DisplayState *ds, int posx, int posy,
+    int width, int height, uint32_t color);
 void register_displaystate(DisplayState *ds);
 DisplayState *get_displaystate(void);
 DisplaySurface* qemu_create_displaysurface_from(int width, int height, int bpp,
                                                 int linesize, uint8_t *data);
+void qemu_alloc_display(DisplaySurface *surface, int width, int height,
+                        int linesize, PixelFormat pf, int newflags);
 PixelFormat qemu_different_endianness_pixelformat(int bpp);
 PixelFormat qemu_default_pixelformat(int bpp);
 
@@ -256,7 +279,6 @@
 }
 
 void unregister_displayupdatelistener(DisplayState *ds, DisplayUpdateListener *dul);
-
 #endif
 
 static inline void dpy_update(DisplayState *s, int x, int y, int w, int h)
@@ -275,6 +297,20 @@
 #endif
 }
 
+#ifdef CONFIG_GLES2
+static inline void dpy_updatecaption(DisplayState *s)
+{
+    struct DisplayChangeListener *dcl = s->listeners;
+    while (dcl != NULL) {
+        if(dcl->dpy_updatecaption != NULL)
+        {
+            dcl->dpy_updatecaption();
+        }
+        dcl = dcl->next;
+    }
+}
+#endif
+
 static inline void dpy_resize(DisplayState *s)
 {
     struct DisplayChangeListener *dcl = s->listeners;
@@ -331,6 +367,26 @@
     }
 }
 
+#ifdef CONFIG_SKINNING
+static inline void dpy_enablezoom(struct DisplayState *s, int width, int height)
+{
+    struct DisplayChangeListener *dcl = s->listeners;
+    while (dcl != NULL) {
+        if (dcl->dpy_enablezoom) dcl->dpy_enablezoom(s, width, height);
+        dcl = dcl->next;
+    }
+}
+
+static inline void dpy_getresolution(struct DisplayState *s, int *width, int *height)
+{
+    struct DisplayChangeListener *dcl = s->listeners;
+    while (dcl != NULL) {
+        if (dcl->dpy_getresolution) dcl->dpy_getresolution(width, height);
+        dcl = dcl->next;
+    }
+}
+#endif
+
 static inline int ds_get_linesize(DisplayState *ds)
 {
     return ds->surface->linesize;
@@ -366,7 +422,7 @@
 {
     if (!(ch & 0xff))
         ch |= ' ';
-    cpu_to_le32wu((uint32_t *) dest, ch);
+    *dest = ch;
 }
 
 typedef void (*vga_hw_update_ptr)(void *);
diff --git a/cpu-all.h b/cpu-all.h
index 01e6838..bf4197b 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -765,15 +765,15 @@
 CPUState *cpu_copy(CPUState *env);
 CPUState *qemu_get_cpu(int cpu);
 
-void cpu_dump_state(CPUState *env, FILE *f,
-                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+#define CPU_DUMP_CODE 0x00010000
+
+void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
                     int flags);
-void cpu_dump_statistics (CPUState *env, FILE *f,
-                          int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
+void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
                           int flags);
 
 void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...)
-    __attribute__ ((__format__ (__printf__, 2, 3)));
+    GCC_FMT_ATTR(2, 3);
 extern CPUState *first_cpu;
 extern CPUState *cpu_single_env;
 
@@ -862,9 +862,31 @@
 /* memory API */
 
 extern int phys_ram_fd;
-extern uint8_t *phys_ram_dirty;
 extern ram_addr_t ram_size;
-extern ram_addr_t last_ram_offset;
+
+/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
+#define RAM_PREALLOC_MASK   (1 << 0)
+
+typedef struct RAMBlock {
+    uint8_t *host;
+    ram_addr_t offset;
+    ram_addr_t length;
+    uint32_t flags;
+    char idstr[256];
+    QLIST_ENTRY(RAMBlock) next;
+#if defined(__linux__) && !defined(TARGET_S390X)
+    int fd;
+#endif
+} RAMBlock;
+
+typedef struct RAMList {
+    uint8_t *phys_dirty;
+    QLIST_HEAD(ram, RAMBlock) blocks;
+} RAMList;
+extern RAMList ram_list;
+
+extern const char *mem_path;
+extern int mem_prealloc;
 
 /* physical memory access */
 
@@ -891,18 +913,44 @@
 /* read dirty bit (return 0 or 1) */
 static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
 {
-    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+}
+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
 }
 
 static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
                                                 int dirty_flags)
 {
-    return phys_ram_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
 }
 
 static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
 {
-    phys_ram_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+    ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+}
+
+static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
+                                                      int dirty_flags)
+{
+    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+}
+
+static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
+                                                        int length,
+                                                        int dirty_flags)
+{
+    int i, mask, len;
+    uint8_t *p;
+
+    len = length >> TARGET_PAGE_BITS;
+    mask = ~dirty_flags;
+    p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
+    for (i = 0; i < len; i++) {
+        p[i] &= mask;
+    }
 }
 
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
diff --git a/cpu-common.h b/cpu-common.h
index a422689..9e32177 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -28,10 +28,21 @@
 typedef void CPUWriteMemoryFunc(void *opaque, target_phys_addr_t addr, uint32_t value);
 typedef uint32_t CPUReadMemoryFunc(void *opaque, target_phys_addr_t addr);
 
-void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
                                          ram_addr_t size,
                                          ram_addr_t phys_offset,
-                                         ram_addr_t region_offset);
+                                      ram_addr_t region_offset,
+                                      bool log_dirty);
+
+static inline void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+                                                       ram_addr_t size,
+                                                       ram_addr_t phys_offset,
+                                                       ram_addr_t region_offset)
+{
+    cpu_register_physical_memory_log(start_addr, size, phys_offset,
+                                     region_offset, false);
+}
+
 static inline void cpu_register_physical_memory(target_phys_addr_t start_addr,
                                                 ram_addr_t size,
                                                 ram_addr_t phys_offset)
@@ -40,12 +51,19 @@
 }
 
 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr);
-ram_addr_t qemu_ram_alloc(ram_addr_t);
+ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
+                        ram_addr_t size, void *host);
+ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size);
 void qemu_ram_free(ram_addr_t addr);
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should only be used for ram local to a device.  */
 void *qemu_get_ram_ptr(ram_addr_t addr);
+/* Same but slower, to use for migration, where the order of
+ * RAMBlocks must not change. */
+void *qemu_safe_ram_ptr(ram_addr_t addr);
 /* This should not be used by devices.  */
-ram_addr_t qemu_ram_addr_from_host(void *ptr);
+int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 
 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
                            CPUWriteMemoryFunc * const *mem_write,
diff --git a/cpu-exec.c b/cpu-exec.c
index 8cf4c40..62af9f7 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -203,12 +203,14 @@
 {
     CPUWatchpoint *wp;
 
-    if (!env->watchpoint_hit)
-        QTAILQ_FOREACH(wp, &env->watchpoints, entry)
+    if (!env->watchpoint_hit) {
+        QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
             wp->flags &= ~BP_WATCHPOINT_HIT;
-
-    if (debug_excp_handler)
+        }
+    }
+    if (debug_excp_handler) {
         debug_excp_handler(env);
+    }
 }
 
 /* main execution loop */
@@ -255,7 +257,9 @@
     env->cc_x = (env->sr >> 4) & 1;
 #elif defined(TARGET_ALPHA)
 #elif defined(TARGET_ARM)
+#elif defined(TARGET_UNICORE32)
 #elif defined(TARGET_PPC)
+#elif defined(TARGET_LM32)
 #elif defined(TARGET_MICROBLAZE)
 #elif defined(TARGET_MIPS)
 #elif defined(TARGET_SH4)
@@ -280,8 +284,9 @@
                 if (env->exception_index >= EXCP_INTERRUPT) {
                     /* exit request from the cpu execution loop */
                     ret = env->exception_index;
-                    if (ret == EXCP_DEBUG)
+                    if (ret == EXCP_DEBUG) {
                         cpu_handle_debug_exception(env);
+                    }
                     break;
                 } else {
 #if defined(CONFIG_USER_ONLY)
@@ -311,6 +316,8 @@
                     env->old_exception = -1;
 #elif defined(TARGET_PPC)
                     do_interrupt(env);
+#elif defined(TARGET_LM32)
+                    do_interrupt(env);
 #elif defined(TARGET_MICROBLAZE)
                     do_interrupt(env);
 #elif defined(TARGET_MIPS)
@@ -319,6 +326,8 @@
                     do_interrupt(env);
 #elif defined(TARGET_ARM)
                     do_interrupt(env);
+#elif defined(TARGET_UNICORE32)
+                    do_interrupt(env);
 #elif defined(TARGET_SH4)
 		    do_interrupt(env);
 #elif defined(TARGET_ALPHA)
@@ -327,6 +336,8 @@
                     do_interrupt(env);
 #elif defined(TARGET_M68K)
                     do_interrupt(0);
+#elif defined(TARGET_S390X)
+                    do_interrupt(env);
 #endif
                     env->exception_index = -1;
 #endif
@@ -356,7 +367,7 @@
                     }
 #if defined(TARGET_ARM) || defined(TARGET_SPARC) || defined(TARGET_MIPS) || \
     defined(TARGET_PPC) || defined(TARGET_ALPHA) || defined(TARGET_CRIS) || \
-    defined(TARGET_MICROBLAZE)
+    defined(TARGET_MICROBLAZE) || defined(TARGET_LM32) || defined(TARGET_UNICORE32)
                     if (interrupt_request & CPU_INTERRUPT_HALT) {
                         env->interrupt_request &= ~CPU_INTERRUPT_HALT;
                         env->halted = 1;
@@ -390,10 +401,10 @@
                             do_interrupt(EXCP12_MCHK, 0, 0, 0, 0);
                             next_tb = 0;
                         } else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
-                                   (((env->hflags2 & HF2_VINTR_MASK) && 
+                                   (((env->hflags2 & HF2_VINTR_MASK) &&
                                      (env->hflags2 & HF2_HIF_MASK)) ||
-                                    (!(env->hflags2 & HF2_VINTR_MASK) && 
-                                     (env->eflags & IF_MASK && 
+                                    (!(env->hflags2 & HF2_VINTR_MASK) &&
+                                     (env->eflags & IF_MASK &&
                                       !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
                             int intno;
                             svm_check_intercept(SVM_EXIT_INTR);
@@ -411,7 +422,7 @@
                             next_tb = 0;
 #if !defined(CONFIG_USER_ONLY)
                         } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) &&
-                                   (env->eflags & IF_MASK) && 
+                                   (env->eflags & IF_MASK) &&
                                    !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
                             int intno;
                             /* FIXME: this should respect TPR */
@@ -436,6 +447,13 @@
                             env->interrupt_request &= ~CPU_INTERRUPT_HARD;
                         next_tb = 0;
                     }
+#elif defined(TARGET_LM32)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD)
+                        && (env->ie & IE_IE)) {
+                        env->exception_index = EXCP_IRQ;
+                        do_interrupt(env);
+                        next_tb = 0;
+                    }
 #elif defined(TARGET_MICROBLAZE)
                     if ((interrupt_request & CPU_INTERRUPT_HARD)
                         && (env->sregs[SR_MSR] & MSR_IE)
@@ -447,11 +465,7 @@
                     }
 #elif defined(TARGET_MIPS)
                     if ((interrupt_request & CPU_INTERRUPT_HARD) &&
-                        cpu_mips_hw_interrupts_pending(env) &&
-                        (env->CP0_Status & (1 << CP0St_IE)) &&
-                        !(env->CP0_Status & (1 << CP0St_EXL)) &&
-                        !(env->CP0_Status & (1 << CP0St_ERL)) &&
-                        !(env->hflags & MIPS_HFLAG_DM)) {
+                        cpu_mips_hw_interrupts_pending(env)) {
                         /* Raise it */
                         env->exception_index = EXCP_EXT_INTERRUPT;
                         env->error_code = 0;
@@ -473,9 +487,6 @@
                                 next_tb = 0;
                             }
                         }
-		    } else if (interrupt_request & CPU_INTERRUPT_TIMER) {
-			//do_interrupt(0, 0, 0, 0, 0);
-			env->interrupt_request &= ~CPU_INTERRUPT_TIMER;
 		    }
 #elif defined(TARGET_ARM)
                     if (interrupt_request & CPU_INTERRUPT_FIQ
@@ -500,6 +511,12 @@
                         do_interrupt(env);
                         next_tb = 0;
                     }
+#elif defined(TARGET_UNICORE32)
+                    if (interrupt_request & CPU_INTERRUPT_HARD
+                        && !(env->uncached_asr & ASR_I)) {
+                        do_interrupt(env);
+                        next_tb = 0;
+                    }
 #elif defined(TARGET_SH4)
                     if (interrupt_request & CPU_INTERRUPT_HARD) {
                         do_interrupt(env);
@@ -537,6 +554,12 @@
                         do_interrupt(1);
                         next_tb = 0;
                     }
+#elif defined(TARGET_S390X) && !defined(CONFIG_USER_ONLY)
+                    if ((interrupt_request & CPU_INTERRUPT_HARD) &&
+                        (env->psw.mask & PSW_MASK_EXT)) {
+                        do_interrupt(env);
+                        next_tb = 0;
+                    }
 #endif
                    /* Don't use the cached interupt_request value,
                       do_interrupt may have updated the EXITTB flag. */
@@ -650,8 +673,10 @@
     env->eflags = env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
 #elif defined(TARGET_ARM)
     /* XXX: Save/restore host fpu exception state?.  */
+#elif defined(TARGET_UNICORE32)
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_PPC)
+#elif defined(TARGET_LM32)
 #elif defined(TARGET_M68K)
     cpu_m68k_flush_flags(env, env->cc_op);
     env->cc_op = CC_OP_FLAGS;
@@ -775,7 +800,7 @@
     if (tb) {
         /* the PC is inside the translated code. It means that we have
            a virtual CPU fault */
-        cpu_restore_state(tb, env, pc, puc);
+        cpu_restore_state(tb, env, pc);
     }
 
     /* we restore the process signal mask as the sigreturn should
@@ -1255,7 +1280,7 @@
         break;
     }
 
-    return handle_cpu_signal(pc, (unsigned long)info->si_addr, 
+    return handle_cpu_signal(pc, (unsigned long)info->si_addr,
                              is_write, &uc->uc_sigmask, puc);
 }
 
diff --git a/cpus.c b/cpus.c
new file mode 100644
index 0000000..470eb32
--- /dev/null
+++ b/cpus.c
@@ -0,0 +1,629 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "config-host.h"
+
+#include "monitor.h"
+#include "sysemu.h"
+#include "gdbstub.h"
+#include "dma.h"
+#include "kvm.h"
+
+#include "cpus.h"
+
+static CPUState *cur_cpu;
+static CPUState *next_cpu;
+
+/***********************************************************/
+void hw_error(const char *fmt, ...)
+{
+    va_list ap;
+    CPUState *env;
+
+    va_start(ap, fmt);
+    fprintf(stderr, "qemu: hardware error: ");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    for(env = first_cpu; env != NULL; env = env->next_cpu) {
+        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
+#ifdef TARGET_I386
+        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
+#else
+        cpu_dump_state(env, stderr, fprintf, 0);
+#endif
+    }
+    va_end(ap);
+    abort();
+}
+
+static void do_vm_stop(int reason)
+{
+    if (vm_running) {
+        cpu_disable_ticks();
+        vm_running = 0;
+        pause_all_vcpus();
+        vm_state_notify(0, reason);
+    }
+}
+
+static int cpu_can_run(CPUState *env)
+{
+    if (env->stop)
+        return 0;
+    if (env->stopped)
+        return 0;
+    return 1;
+}
+
+static int cpu_has_work(CPUState *env)
+{
+    if (env->stop)
+        return 1;
+    if (env->stopped)
+        return 0;
+    if (!env->halted)
+        return 1;
+    if (qemu_cpu_has_work(env))
+        return 1;
+    return 0;
+}
+
+int tcg_has_work(void)
+{
+    CPUState *env;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu)
+        if (cpu_has_work(env))
+            return 1;
+    return 0;
+}
+
+#ifndef _WIN32
+static int io_thread_fd = -1;
+
+#if 0
+static void qemu_event_increment(void)
+{
+    static const char byte = 0;
+
+    if (io_thread_fd == -1)
+        return;
+
+    write(io_thread_fd, &byte, sizeof(byte));
+}
+#endif
+
+static void qemu_event_read(void *opaque)
+{
+    int fd = (unsigned long)opaque;
+    ssize_t len;
+
+    /* Drain the notify pipe */
+    do {
+        char buffer[512];
+        len = read(fd, buffer, sizeof(buffer));
+    } while ((len == -1 && errno == EINTR) || len > 0);
+}
+
+static int qemu_event_init(void)
+{
+    int err;
+    int fds[2];
+
+    err = pipe(fds);
+    if (err == -1)
+        return -errno;
+
+    err = fcntl_setfl(fds[0], O_NONBLOCK);
+    if (err < 0)
+        goto fail;
+
+    err = fcntl_setfl(fds[1], O_NONBLOCK);
+    if (err < 0)
+        goto fail;
+
+    qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
+                         (void *)(unsigned long)fds[0]);
+
+    io_thread_fd = fds[1];
+    return 0;
+
+fail:
+    close(fds[0]);
+    close(fds[1]);
+    return err;
+}
+#else
+HANDLE qemu_event_handle;
+
+static void dummy_event_handler(void *opaque)
+{
+}
+
+static int qemu_event_init(void)
+{
+    qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (!qemu_event_handle) {
+        perror("Failed CreateEvent");
+        return -1;
+    }
+    qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
+    return 0;
+}
+
+#if 0
+static void qemu_event_increment(void)
+{
+    SetEvent(qemu_event_handle);
+}
+#endif
+#endif
+
+#ifndef CONFIG_IOTHREAD
+int qemu_init_main_loop(void)
+{
+    return qemu_event_init();
+}
+
+void qemu_init_vcpu(void *_env)
+{
+    CPUState *env = _env;
+
+    if (kvm_enabled())
+        kvm_init_vcpu(env);
+    return;
+}
+
+int qemu_cpu_self(void *env)
+{
+    return 1;
+}
+
+void resume_all_vcpus(void)
+{
+}
+
+void pause_all_vcpus(void)
+{
+}
+
+void qemu_cpu_kick(void *env)
+{
+    return;
+}
+
+void qemu_notify_event(void)
+{
+    CPUState *env = cpu_single_env;
+
+    if (env) {
+        cpu_exit(env);
+#ifdef USE_KQEMU
+        if (env->kqemu_enabled)
+            kqemu_cpu_interrupt(env);
+#endif
+     }
+}
+
+void qemu_mutex_lock_iothread(void)
+{
+}
+
+void qemu_mutex_unlock_iothread(void)
+{
+}
+
+void vm_stop(int reason)
+{
+    do_vm_stop(reason);
+}
+
+#else /* CONFIG_IOTHREAD */
+
+#include "qemu-thread.h"
+
+QemuMutex qemu_global_mutex;
+static QemuMutex qemu_fair_mutex;
+
+static QemuThread io_thread;
+
+static QemuThread *tcg_cpu_thread;
+static QemuCond *tcg_halt_cond;
+
+static int qemu_system_ready;
+/* cpu creation */
+static QemuCond qemu_cpu_cond;
+/* system init */
+static QemuCond qemu_system_cond;
+static QemuCond qemu_pause_cond;
+
+static void block_io_signals(void);
+static void unblock_io_signals(void);
+static int tcg_has_work(void);
+
+int qemu_init_main_loop(void)
+{
+    int ret;
+
+    ret = qemu_event_init();
+    if (ret)
+        return ret;
+
+    qemu_cond_init(&qemu_pause_cond);
+    qemu_mutex_init(&qemu_fair_mutex);
+    qemu_mutex_init(&qemu_global_mutex);
+    qemu_mutex_lock(&qemu_global_mutex);
+
+    unblock_io_signals();
+    qemu_thread_self(&io_thread);
+
+    return 0;
+}
+
+static void qemu_wait_io_event(CPUState *env)
+{
+    while (!tcg_has_work())
+        qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
+
+    qemu_mutex_unlock(&qemu_global_mutex);
+
+    /*
+     * Users of qemu_global_mutex can be starved, having no chance
+     * to acquire it since this path will get to it first.
+     * So use another lock to provide fairness.
+     */
+    qemu_mutex_lock(&qemu_fair_mutex);
+    qemu_mutex_unlock(&qemu_fair_mutex);
+
+    qemu_mutex_lock(&qemu_global_mutex);
+    if (env->stop) {
+        env->stop = 0;
+        env->stopped = 1;
+        qemu_cond_signal(&qemu_pause_cond);
+    }
+}
+
+static int qemu_cpu_exec(CPUState *env);
+
+static void *kvm_cpu_thread_fn(void *arg)
+{
+    CPUState *env = arg;
+
+    block_io_signals();
+    qemu_thread_self(env->thread);
+
+    /* signal CPU creation */
+    qemu_mutex_lock(&qemu_global_mutex);
+    env->created = 1;
+    qemu_cond_signal(&qemu_cpu_cond);
+
+    /* and wait for machine initialization */
+    while (!qemu_system_ready)
+        qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+
+    while (1) {
+        if (cpu_can_run(env))
+            qemu_cpu_exec(env);
+        qemu_wait_io_event(env);
+    }
+
+    return NULL;
+}
+
+static void tcg_cpu_exec(void);
+
+static void *tcg_cpu_thread_fn(void *arg)
+{
+    CPUState *env = arg;
+
+    block_io_signals();
+    qemu_thread_self(env->thread);
+
+    /* signal CPU creation */
+    qemu_mutex_lock(&qemu_global_mutex);
+    for (env = first_cpu; env != NULL; env = env->next_cpu)
+        env->created = 1;
+    qemu_cond_signal(&qemu_cpu_cond);
+
+    /* and wait for machine initialization */
+    while (!qemu_system_ready)
+        qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+
+    while (1) {
+        tcg_cpu_exec();
+        qemu_wait_io_event(cur_cpu);
+    }
+
+    return NULL;
+}
+
+void qemu_cpu_kick(void *_env)
+{
+    CPUState *env = _env;
+    qemu_cond_broadcast(env->halt_cond);
+    if (kvm_enabled())
+        qemu_thread_signal(env->thread, SIGUSR1);
+}
+
+int qemu_cpu_self(void *env)
+{
+    return (cpu_single_env != NULL);
+}
+
+static void cpu_signal(int sig)
+{
+    if (cpu_single_env)
+        cpu_exit(cpu_single_env);
+}
+
+static void block_io_signals(void)
+{
+    sigset_t set;
+    struct sigaction sigact;
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR2);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+
+    memset(&sigact, 0, sizeof(sigact));
+    sigact.sa_handler = cpu_signal;
+    sigaction(SIGUSR1, &sigact, NULL);
+}
+
+static void unblock_io_signals(void)
+{
+    sigset_t set;
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR2);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+    pthread_sigmask(SIG_BLOCK, &set, NULL);
+}
+
+static void qemu_signal_lock(unsigned int msecs)
+{
+    qemu_mutex_lock(&qemu_fair_mutex);
+
+    while (qemu_mutex_trylock(&qemu_global_mutex)) {
+        qemu_thread_signal(tcg_cpu_thread, SIGUSR1);
+        if (!qemu_mutex_timedlock(&qemu_global_mutex, msecs))
+            break;
+    }
+    qemu_mutex_unlock(&qemu_fair_mutex);
+}
+
+void qemu_mutex_lock_iothread(void)
+{
+    if (kvm_enabled()) {
+        qemu_mutex_lock(&qemu_fair_mutex);
+        qemu_mutex_lock(&qemu_global_mutex);
+        qemu_mutex_unlock(&qemu_fair_mutex);
+    } else
+        qemu_signal_lock(100);
+}
+
+void qemu_mutex_unlock_iothread(void)
+{
+    qemu_mutex_unlock(&qemu_global_mutex);
+}
+
+static int all_vcpus_paused(void)
+{
+    CPUState *penv = first_cpu;
+
+    while (penv) {
+        if (!penv->stopped)
+            return 0;
+        penv = (CPUState *)penv->next_cpu;
+    }
+
+    return 1;
+}
+
+void pause_all_vcpus(void)
+{
+    CPUState *penv = first_cpu;
+
+    while (penv) {
+        penv->stop = 1;
+        qemu_thread_signal(penv->thread, SIGUSR1);
+        qemu_cpu_kick(penv);
+        penv = (CPUState *)penv->next_cpu;
+    }
+
+    while (!all_vcpus_paused()) {
+        qemu_cond_timedwait(&qemu_pause_cond, &qemu_global_mutex, 100);
+        penv = first_cpu;
+        while (penv) {
+            qemu_thread_signal(penv->thread, SIGUSR1);
+            penv = (CPUState *)penv->next_cpu;
+        }
+    }
+}
+
+void resume_all_vcpus(void)
+{
+    CPUState *penv = first_cpu;
+
+    while (penv) {
+        penv->stop = 0;
+        penv->stopped = 0;
+        qemu_thread_signal(penv->thread, SIGUSR1);
+        qemu_cpu_kick(penv);
+        penv = (CPUState *)penv->next_cpu;
+    }
+}
+
+static void tcg_init_vcpu(void *_env)
+{
+    CPUState *env = _env;
+    /* share a single thread for all cpus with TCG */
+    if (!tcg_cpu_thread) {
+        env->thread = qemu_mallocz(sizeof(QemuThread));
+        env->halt_cond = qemu_mallocz(sizeof(QemuCond));
+        qemu_cond_init(env->halt_cond);
+        qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
+        while (env->created == 0)
+            qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
+        tcg_cpu_thread = env->thread;
+        tcg_halt_cond = env->halt_cond;
+    } else {
+        env->thread = tcg_cpu_thread;
+        env->halt_cond = tcg_halt_cond;
+    }
+}
+
+static void kvm_start_vcpu(CPUState *env)
+{
+#if 0
+    kvm_init_vcpu(env);
+    env->thread = qemu_mallocz(sizeof(QemuThread));
+    env->halt_cond = qemu_mallocz(sizeof(QemuCond));
+    qemu_cond_init(env->halt_cond);
+    qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
+    while (env->created == 0)
+        qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
+#endif
+}
+
+void qemu_init_vcpu(void *_env)
+{
+    CPUState *env = _env;
+
+    if (kvm_enabled())
+        kvm_start_vcpu(env);
+    else
+        tcg_init_vcpu(env);
+}
+
+void qemu_notify_event(void)
+{
+    qemu_event_increment();
+}
+
+void vm_stop(int reason)
+{
+    QemuThread me;
+    qemu_thread_self(&me);
+
+    if (!qemu_thread_equal(&me, &io_thread)) {
+        qemu_system_vmstop_request(reason);
+        /*
+         * FIXME: should not return to device code in case
+         * vm_stop() has been requested.
+         */
+        if (cpu_single_env) {
+            cpu_exit(cpu_single_env);
+            cpu_single_env->stop = 1;
+        }
+        return;
+    }
+    do_vm_stop(reason);
+}
+
+#endif
+
+static int qemu_cpu_exec(CPUState *env)
+{
+    int ret;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+    ti = profile_getclock();
+#endif
+    if (use_icount) {
+        int64_t count;
+        int decr;
+        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
+        env->icount_decr.u16.low = 0;
+        env->icount_extra = 0;
+        count = qemu_next_icount_deadline();
+        count = (count + (1 << icount_time_shift) - 1)
+                >> icount_time_shift;
+        qemu_icount += count;
+        decr = (count > 0xffff) ? 0xffff : count;
+        count -= decr;
+        env->icount_decr.u16.low = decr;
+        env->icount_extra = count;
+    }
+#ifdef CONFIG_TRACE
+    if (tbflush_requested) {
+        tbflush_requested = 0;
+        tb_flush(env);
+        return EXCP_INTERRUPT;
+    }
+#endif
+
+
+    ret = cpu_exec(env);
+#ifdef CONFIG_PROFILER
+    qemu_time += profile_getclock() - ti;
+#endif
+    if (use_icount) {
+        /* Fold pending instructions back into the
+           instruction counter, and clear the interrupt flag.  */
+        qemu_icount -= (env->icount_decr.u16.low
+                        + env->icount_extra);
+        env->icount_decr.u32 = 0;
+        env->icount_extra = 0;
+    }
+    return ret;
+}
+
+void tcg_cpu_exec(void)
+{
+    int ret = 0;
+
+    if (next_cpu == NULL)
+        next_cpu = first_cpu;
+    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
+        CPUState *env = cur_cpu = next_cpu;
+
+        if (!vm_running)
+            break;
+        if (qemu_timer_alarm_pending()) {
+            break;
+        }
+        if (cpu_can_run(env))
+            ret = qemu_cpu_exec(env);
+        if (ret == EXCP_DEBUG) {
+            gdb_set_stop_cpu(env);
+            debug_requested = 1;
+            break;
+        }
+    }
+}
+
diff --git a/cpus.h b/cpus.h
new file mode 100644
index 0000000..c25f35e
--- /dev/null
+++ b/cpus.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_CPUS_H
+#define QEMU_CPUS_H
+
+void tcg_cpu_exec(void);
+void vm_state_notify(int running, int reason);
+extern int tbflush_requested;
+extern int debug_requested;
+
+void resume_all_vcpus(void);
+void pause_all_vcpus(void);
+int qemu_init_main_loop(void);
+
+#endif /* QEMU_CPUS_H */
diff --git a/cutils.c b/cutils.c
index 036ae3c..f9a7e36 100644
--- a/cutils.c
+++ b/cutils.c
@@ -23,6 +23,7 @@
  */
 #include "qemu-common.h"
 #include "host-utils.h"
+#include <math.h>
 
 void pstrcpy(char *buf, int buf_size, const char *str)
 {
@@ -168,30 +169,50 @@
 }
 
 /*
- * Copies iovecs from src to the end dst until src is completely copied or the
- * total size of the copied iovec reaches size. The size of the last copied
- * iovec is changed in order to fit the specified total size if it isn't a
- * perfect fit already.
+ * Copies iovecs from src to the end of dst. It starts copying after skipping
+ * the given number of bytes in src and copies until src is completely copied
+ * or the total size of the copied iovec reaches size.The size of the last
+ * copied iovec is changed in order to fit the specified total size if it isn't
+ * a perfect fit already.
  */
-void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
+void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
+    size_t size)
 {
     int i;
     size_t done;
+    void *iov_base;
+    uint64_t iov_len;
 
     assert(dst->nalloc != -1);
 
     done = 0;
     for (i = 0; (i < src->niov) && (done != size); i++) {
-        if (done + src->iov[i].iov_len > size) {
-            qemu_iovec_add(dst, src->iov[i].iov_base, size - done);
+        if (skip >= src->iov[i].iov_len) {
+            /* Skip the whole iov */
+            skip -= src->iov[i].iov_len;
+            continue;
+        } else {
+            /* Skip only part (or nothing) of the iov */
+            iov_base = (uint8_t*) src->iov[i].iov_base + skip;
+            iov_len = src->iov[i].iov_len - skip;
+            skip = 0;
+        }
+
+        if (done + iov_len > size) {
+            qemu_iovec_add(dst, iov_base, size - done);
             break;
         } else {
-            qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len);
+            qemu_iovec_add(dst, iov_base, iov_len);
         }
-        done += src->iov[i].iov_len;
+        done += iov_len;
     }
 }
 
+void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
+{
+    qemu_iovec_copy(dst, src, 0, size);
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
@@ -234,6 +255,49 @@
     }
 }
 
+void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count)
+{
+    size_t n;
+    int i;
+
+    for (i = 0; i < qiov->niov && count; ++i) {
+        n = MIN(count, qiov->iov[i].iov_len);
+        memset(qiov->iov[i].iov_base, c, n);
+        count -= n;
+    }
+}
+
+void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
+                            size_t skip)
+{
+    int i;
+    size_t done;
+    void *iov_base;
+    uint64_t iov_len;
+
+    done = 0;
+    for (i = 0; (i < qiov->niov) && (done != count); i++) {
+        if (skip >= qiov->iov[i].iov_len) {
+            /* Skip the whole iov */
+            skip -= qiov->iov[i].iov_len;
+            continue;
+        } else {
+            /* Skip only part (or nothing) of the iov */
+            iov_base = (uint8_t*) qiov->iov[i].iov_base + skip;
+            iov_len = qiov->iov[i].iov_len - skip;
+            skip = 0;
+        }
+
+        if (done + iov_len > count) {
+            memset(iov_base, c, count - done);
+            break;
+        } else {
+            memset(iov_base, c, iov_len);
+        }
+        done += iov_len;
+    }
+}
+
 #ifndef _WIN32
 /* Sets a specific flag */
 int fcntl_setfl(int fd, int flag)
@@ -251,3 +315,97 @@
 }
 #endif
 
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. Default without any postfix
+ * is MB. End pointer will be returned in *end, if not NULL. A valid
+ * value must be terminated by whitespace, ',' or '\0'. Return -1 on
+ * error.
+ */
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix)
+{
+    int64_t retval = -1;
+    char *endptr;
+    unsigned char c, d;
+    int mul_required = 0;
+    double val, mul, integral, fraction;
+
+    errno = 0;
+    val = strtod(nptr, &endptr);
+    if (isnan(val) || endptr == nptr || errno != 0) {
+        goto fail;
+    }
+    fraction = modf(val, &integral);
+    if (fraction != 0) {
+        mul_required = 1;
+    }
+    /*
+     * Any whitespace character is fine for terminating the number,
+     * in addition we accept ',' to handle strings where the size is
+     * part of a multi token argument.
+     */
+    c = *endptr;
+    d = c;
+    if (qemu_isspace(c) || c == '\0' || c == ',') {
+        c = 0;
+        if (default_suffix) {
+            d = default_suffix;
+        } else {
+            d = c;
+        }
+    }
+    switch (qemu_toupper(d)) {
+    case STRTOSZ_DEFSUFFIX_B:
+        mul = 1;
+        if (mul_required) {
+            goto fail;
+        }
+        break;
+    case STRTOSZ_DEFSUFFIX_KB:
+        mul = 1 << 10;
+        break;
+    case 0:
+        if (mul_required) {
+            goto fail;
+        }
+    case STRTOSZ_DEFSUFFIX_MB:
+        mul = 1ULL << 20;
+        break;
+    case STRTOSZ_DEFSUFFIX_GB:
+        mul = 1ULL << 30;
+        break;
+    case STRTOSZ_DEFSUFFIX_TB:
+        mul = 1ULL << 40;
+        break;
+    default:
+        goto fail;
+    }
+    /*
+     * If not terminated by whitespace, ',', or \0, increment endptr
+     * to point to next character, then check that we are terminated
+     * by an appropriate separating character, ie. whitespace, ',', or
+     * \0. If not, we are seeing trailing garbage, thus fail.
+     */
+    if (c != 0) {
+        endptr++;
+        if (!qemu_isspace(*endptr) && *endptr != ',' && *endptr != 0) {
+            goto fail;
+        }
+    }
+    if ((val * mul >= INT64_MAX) || val < 0) {
+        goto fail;
+    }
+    retval = val * mul;
+
+fail:
+    if (end) {
+        *end = endptr;
+    }
+
+    return retval;
+}
+
+int64_t strtosz(const char *nptr, char **end)
+{
+    return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
+}
diff --git a/dcache.c b/dcache.c
deleted file mode 100644
index 7cce505..0000000
--- a/dcache.c
+++ /dev/null
@@ -1,334 +0,0 @@
-/* Copyright (C) 2007-2008 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "dcache.h"
-#include "cpu.h"
-#include "exec-all.h"
-#include "trace.h"
-#include "varint.h"
-
-extern FILE *ftrace_debug;
-
-int dcache_size = 16 * 1024;
-int dcache_ways = 4;
-int dcache_line_size = 32;
-int dcache_replace_policy = kPolicyRandom;
-int dcache_load_miss_penalty = 30;
-int dcache_store_miss_penalty = 5;
-
-typedef struct Dcache {
-  int		size;
-  int		ways;
-  int		line_size;
-  int		log_line_size;
-  int		rows;
-  uint32_t	addr_mask;
-  int		replace_policy;
-  int		next_way;
-  int		extra_increment_counter;
-  int		*replace;
-  uint32_t	**table;
-  int		load_miss_penalty;
-  int		store_miss_penalty;
-  uint64_t	load_hits;
-  uint64_t	load_misses;
-  uint64_t	store_hits;
-  uint64_t	store_misses;
-} Dcache;
-
-Dcache dcache;
-
-void dcache_cleanup();
-
-// Returns the log2 of "num" rounded up to the nearest integer.
-int log2_roundup(int num)
-{
-  int power2;
-  int exp;
-
-  for (exp = 0, power2 = 1; power2 < num; power2 <<= 1) {
-    exp += 1;
-  }
-  return exp;
-}
-
-void dcache_init(int size, int ways, int line_size, int replace_policy,
-                 int load_miss_penalty, int store_miss_penalty)
-{
-  int ii;
-
-  // Compute the logs of the params, rounded up
-  int log_size = log2_roundup(size);
-  int log_ways = log2_roundup(ways);
-  int log_line_size = log2_roundup(line_size);
-
-  // The number of rows in the table = size / (line_size * ways)
-  int log_rows = log_size - log_line_size - log_ways;
-
-  dcache.size = 1 << log_size;
-  dcache.ways = 1 << log_ways;
-  dcache.line_size = 1 << log_line_size;
-  dcache.log_line_size = log_line_size;
-  dcache.rows = 1 << log_rows;
-  dcache.addr_mask = (1 << log_rows) - 1;
-
-  // Allocate an array of pointers, one for each row
-  uint32_t **table = malloc(sizeof(uint32_t *) << log_rows);
-
-  // Allocate the data for the whole cache in one call to malloc()
-  int data_size = sizeof(uint32_t) << (log_rows + log_ways);
-  uint32_t *data = malloc(data_size);
-
-  // Fill the cache with invalid addresses
-  memset(data, ~0, data_size);
-
-  // Assign the pointers into the data array
-  int rows = dcache.rows;
-  for (ii = 0; ii < rows; ++ii) {
-    table[ii] = &data[ii << log_ways];
-  }
-  dcache.table = table;
-  dcache.replace_policy = replace_policy;
-  dcache.next_way = 0;
-  dcache.extra_increment_counter = 0;
-
-  dcache.replace = NULL;
-  if (replace_policy == kPolicyRoundRobin) {
-    dcache.replace = malloc(sizeof(int) << log_rows);
-    memset(dcache.replace, 0, sizeof(int) << log_rows);
-  }
-  dcache.load_miss_penalty = load_miss_penalty;
-  dcache.store_miss_penalty = store_miss_penalty;
-  dcache.load_hits = 0;
-  dcache.load_misses = 0;
-  dcache.store_hits = 0;
-  dcache.store_misses = 0;
-
-  atexit(dcache_cleanup);
-}
-
-void dcache_stats()
-{
-  uint64_t hits = dcache.load_hits + dcache.store_hits;
-  uint64_t misses = dcache.load_misses + dcache.store_misses;
-  uint64_t total = hits + misses;
-  double hit_per = 0;
-  double miss_per = 0;
-  if (total) {
-    hit_per = 100.0 * hits / total;
-    miss_per = 100.0 * misses / total;
-  }
-  printf("\n");
-  printf("Dcache hits   %10llu %6.2f%%\n", hits, hit_per);
-  printf("Dcache misses %10llu %6.2f%%\n", misses, miss_per);
-  printf("Dcache total  %10llu\n", hits + misses);
-}
-
-void dcache_free()
-{
-  free(dcache.table[0]);
-  free(dcache.table);
-  free(dcache.replace);
-  dcache.table = NULL;
-}
-
-void dcache_cleanup()
-{
-  dcache_stats();
-  dcache_free();
-}
-
-void compress_trace_addresses(TraceAddr *trace_addr)
-{
-  AddrRec *ptr;
-  char *comp_ptr = trace_addr->compressed_ptr;
-  uint32_t prev_addr = trace_addr->prev_addr;
-  uint64_t prev_time = trace_addr->prev_time;
-  AddrRec *last = &trace_addr->buffer[kMaxNumAddrs];
-  for (ptr = trace_addr->buffer; ptr != last; ++ptr) {
-    if (comp_ptr >= trace_addr->high_water_ptr) {
-      uint32_t size = comp_ptr - trace_addr->compressed;
-      fwrite(trace_addr->compressed, sizeof(char), size, trace_addr->fstream);
-      comp_ptr = trace_addr->compressed;
-    }
-
-    int addr_diff = ptr->addr - prev_addr;
-    uint64_t time_diff = ptr->time - prev_time;
-    prev_addr = ptr->addr;
-    prev_time = ptr->time;
-
-    comp_ptr = varint_encode_signed(addr_diff, comp_ptr);
-    comp_ptr = varint_encode(time_diff, comp_ptr);
-  }
-  trace_addr->compressed_ptr = comp_ptr;
-  trace_addr->prev_addr = prev_addr;
-  trace_addr->prev_time = prev_time;
-}
-
-// This function is called by the generated code to simulate
-// a dcache load access.
-void dcache_load(uint32_t addr)
-{
-  int ii;
-  int ways = dcache.ways;
-  uint32_t cache_addr = addr >> dcache.log_line_size;
-  int row = cache_addr & dcache.addr_mask;
-  //printf("ld %lld 0x%x\n", sim_time, addr);
-  for (ii = 0; ii < ways; ++ii) {
-    if (cache_addr == dcache.table[row][ii]) {
-      dcache.load_hits += 1;
-#if 0
-      printf("dcache load hit  addr: 0x%x cache_addr: 0x%x row %d way %d\n",
-             addr, cache_addr, row, ii);
-#endif
-      // If we are tracing all addresses, then include this in the trace.
-      if (trace_all_addr) {
-        AddrRec *next = trace_load.next;
-        next->addr = addr;
-        next->time = sim_time;
-        next += 1;
-        if (next == &trace_load.buffer[kMaxNumAddrs]) {
-          // Compress the trace
-          compress_trace_addresses(&trace_load);
-          next = &trace_load.buffer[0];
-        }
-        trace_load.next = next;
-      }
-      return;
-    }
-  }
-  // This is a cache miss
-
-#if 0
-  if (ftrace_debug)
-    fprintf(ftrace_debug, "t%lld %08x\n", sim_time, addr);
-#endif
-  if (trace_load.fstream) {
-    AddrRec *next = trace_load.next;
-    next->addr = addr;
-    next->time = sim_time;
-    next += 1;
-    if (next == &trace_load.buffer[kMaxNumAddrs]) {
-      // Compress the trace
-      compress_trace_addresses(&trace_load);
-      next = &trace_load.buffer[0];
-    }
-    trace_load.next = next;
-  }
-
-  dcache.load_misses += 1;
-  sim_time += dcache.load_miss_penalty;
-
-  // Pick a way to replace
-  int way;
-  if (dcache.replace_policy == kPolicyRoundRobin) {
-    // Round robin replacement policy
-    way = dcache.replace[row];
-    int next_way = way + 1;
-    if (next_way == dcache.ways)
-      next_way = 0;
-    dcache.replace[row] = next_way;
-  } else {
-    // Random replacement policy
-    way = dcache.next_way;
-    dcache.next_way += 1;
-    if (dcache.next_way >= dcache.ways)
-      dcache.next_way = 0;
-
-    // Every 13 replacements, add an extra increment to the next way
-    dcache.extra_increment_counter += 1;
-    if (dcache.extra_increment_counter == 13) {
-      dcache.extra_increment_counter = 0;
-      dcache.next_way += 1;
-      if (dcache.next_way >= dcache.ways)
-        dcache.next_way = 0;
-    }
-  }
-#if 0
-  printf("dcache load miss addr: 0x%x cache_addr: 0x%x row %d replacing way %d\n",
-         addr, cache_addr, row, way);
-#endif
-  dcache.table[row][way] = cache_addr;
-}
-
-// This function is called by the generated code to simulate
-// a dcache store access.
-void dcache_store(uint32_t addr, uint32_t val)
-{
-  //printf("st %lld 0x%08x val 0x%x\n", sim_time, addr, val);
-
-  int ii;
-  int ways = dcache.ways;
-  uint32_t cache_addr = addr >> dcache.log_line_size;
-  int row = cache_addr & dcache.addr_mask;
-  for (ii = 0; ii < ways; ++ii) {
-    if (cache_addr == dcache.table[row][ii]) {
-      dcache.store_hits += 1;
-#if 0
-      printf("dcache store hit  addr: 0x%x cache_addr: 0x%x row %d way %d\n",
-             addr, cache_addr, row, ii);
-#endif
-      // If we are tracing all addresses, then include this in the trace.
-      if (trace_all_addr) {
-        AddrRec *next = trace_store.next;
-        next->addr = addr;
-        next->time = sim_time;
-        next += 1;
-        if (next == &trace_store.buffer[kMaxNumAddrs]) {
-          // Compress the trace
-          compress_trace_addresses(&trace_store);
-          next = &trace_store.buffer[0];
-        }
-        trace_store.next = next;
-      }
-      return;
-    }
-  }
-  // This is a cache miss
-#if 0
-  printf("dcache store miss addr: 0x%x cache_addr: 0x%x row %d\n",
-         addr, cache_addr, row);
-#endif
-
-#if 0
-  if (ftrace_debug)
-    fprintf(ftrace_debug, "t%lld %08x\n", sim_time, addr);
-#endif
-
-  if (trace_store.fstream) {
-    AddrRec *next = trace_store.next;
-    next->addr = addr;
-    next->time = sim_time;
-    next += 1;
-    if (next == &trace_store.buffer[kMaxNumAddrs]) {
-      // Compress the trace
-      compress_trace_addresses(&trace_store);
-      next = &trace_store.buffer[0];
-    }
-    trace_store.next = next;
-  }
-
-  dcache.store_misses += 1;
-  sim_time += dcache.store_miss_penalty;
-
-  // Assume no write-allocate for now
-}
-
-// This function is called by the generated code to simulate
-// a dcache load and store (swp) access.
-void dcache_swp(uint32_t addr)
-{
-  dcache_load(addr);
-  dcache_store(addr, 0);
-}
diff --git a/dcache.h b/dcache.h
deleted file mode 100644
index 8857600..0000000
--- a/dcache.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (C) 2007-2008 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-#ifndef DCACHE_H
-#define DCACHE_H
-
-#include <inttypes.h>
-
-// Define constants for the replacement policies
-#define kPolicyRoundRobin 1
-#define kPolicyRandom 2
-
-extern int dcache_size;
-extern int dcache_ways;
-extern int dcache_line_size;
-extern int dcache_replace_policy;
-extern int dcache_load_miss_penalty;
-extern int dcache_store_miss_penalty;
-
-extern void dcache_init(int size, int ways, int line_size, int replace_policy,
-                        int load_miss_penalty, int store_miss_penalty);
-
-#endif /* DCACHE_H */
diff --git a/def-helper.h b/def-helper.h
index 8a88c5b..8a822c7 100644
--- a/def-helper.h
+++ b/def-helper.h
@@ -81,9 +81,29 @@
 #define dh_is_64bit_ptr (TCG_TARGET_REG_BITS == 64)
 #define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t))
 
+#define dh_is_signed_void 0
+#define dh_is_signed_i32 0
+#define dh_is_signed_s32 1
+#define dh_is_signed_i64 0
+#define dh_is_signed_s64 1
+#define dh_is_signed_f32 0
+#define dh_is_signed_f64 0
+#define dh_is_signed_tl  0
+#define dh_is_signed_int 1
+/* ??? This is highly specific to the host cpu.  There are even special
+   extension instructions that may be required, e.g. ia64's addp4.  But
+   for now we don't support any 64-bit targets with 32-bit pointers.  */
+#define dh_is_signed_ptr 0
+#define dh_is_signed_env dh_is_signed_ptr
+#define dh_is_signed(t) dh_is_signed_##t
+
+#define dh_sizemask(t, n) \
+  sizemask |= dh_is_64bit(t) << (n*2); \
+  sizemask |= dh_is_signed(t) << (n*2+1)
+
 #define dh_arg(t, n) \
   args[n - 1] = glue(GET_TCGV_, dh_alias(t))(glue(arg, n)); \
-  sizemask |= dh_is_64bit(t) << n
+  dh_sizemask(t, n)
 
 #define dh_arg_decl(t, n) glue(TCGv_, dh_alias(t)) glue(arg, n)
 
@@ -138,8 +158,8 @@
 static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) dh_arg_decl(t1, 1)) \
 { \
   TCGArg args[1]; \
-  int sizemask; \
-  sizemask = dh_is_64bit(ret); \
+  int sizemask = 0; \
+  dh_sizemask(ret, 0); \
   dh_arg(t1, 1); \
   tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 1, args); \
 }
@@ -149,8 +169,8 @@
     dh_arg_decl(t2, 2)) \
 { \
   TCGArg args[2]; \
-  int sizemask; \
-  sizemask = dh_is_64bit(ret); \
+  int sizemask = 0; \
+  dh_sizemask(ret, 0); \
   dh_arg(t1, 1); \
   dh_arg(t2, 2); \
   tcg_gen_helperN(HELPER(name), flags, sizemask, dh_retvar(ret), 2, args); \
@@ -161,8 +181,8 @@
     dh_arg_decl(t2, 2), dh_arg_decl(t3, 3)) \
 { \
   TCGArg args[3]; \
-  int sizemask; \
-  sizemask = dh_is_64bit(ret); \
+  int sizemask = 0; \
+  dh_sizemask(ret, 0); \
   dh_arg(t1, 1); \
   dh_arg(t2, 2); \
   dh_arg(t3, 3); \
@@ -174,8 +194,8 @@
     dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), dh_arg_decl(t4, 4)) \
 { \
   TCGArg args[4]; \
-  int sizemask; \
-  sizemask = dh_is_64bit(ret); \
+  int sizemask = 0; \
+  dh_sizemask(ret, 0); \
   dh_arg(t1, 1); \
   dh_arg(t2, 2); \
   dh_arg(t3, 3); \
diff --git a/device_tree.c b/device_tree.c
index 426a631..f5d5eb1 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -20,7 +20,6 @@
 
 #include "config.h"
 #include "qemu-common.h"
-#include "sysemu.h"
 #include "device_tree.h"
 #include "hw/loader.h"
 
@@ -74,7 +73,7 @@
 }
 
 int qemu_devtree_setprop(void *fdt, const char *node_path,
-                         const char *property, uint32_t *val_array, int size)
+                         const char *property, void *val_array, int size)
 {
     int offset;
 
diff --git a/device_tree.h b/device_tree.h
index f05c4e7..cecd98f 100644
--- a/device_tree.h
+++ b/device_tree.h
@@ -17,7 +17,7 @@
 void *load_device_tree(const char *filename_path, int *sizep);
 
 int qemu_devtree_setprop(void *fdt, const char *node_path,
-                         const char *property, uint32_t *val_array, int size);
+                         const char *property, void *val_array, int size);
 int qemu_devtree_setprop_cell(void *fdt, const char *node_path,
                               const char *property, uint32_t val);
 int qemu_devtree_setprop_string(void *fdt, const char *node_path,
diff --git a/dis-asm.h b/dis-asm.h
index 9b9657e..296537a 100644
--- a/dis-asm.h
+++ b/dis-asm.h
@@ -9,11 +9,7 @@
 #ifndef DIS_ASM_H
 #define DIS_ASM_H
 
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
+#include "qemu-common.h"
 
 typedef void *PTR;
 typedef uint64_t bfd_vma;
@@ -237,8 +233,6 @@
     } udata;
 } asymbol;
 
-typedef int (*fprintf_ftype) (FILE*, const char*, ...);
-
 enum dis_insn_type {
   dis_noninsn,			/* Not a valid instruction */
   dis_nonbranch,		/* Not a branch instruction */
@@ -261,7 +255,7 @@
    by hand, or using one of the initialization macros below.  */
 
 typedef struct disassemble_info {
-  fprintf_ftype fprintf_func;
+  fprintf_function fprintf_func;
   FILE *stream;
   PTR application_data;
 
@@ -368,47 +362,48 @@
    target address.  Return number of bytes processed.  */
 typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
 
-extern int print_insn_big_mips		(bfd_vma, disassemble_info*);
-extern int print_insn_little_mips	(bfd_vma, disassemble_info*);
-extern int print_insn_i386		(bfd_vma, disassemble_info*);
-extern int print_insn_m68k		(bfd_vma, disassemble_info*);
-extern int print_insn_z8001		(bfd_vma, disassemble_info*);
-extern int print_insn_z8002		(bfd_vma, disassemble_info*);
-extern int print_insn_h8300		(bfd_vma, disassemble_info*);
-extern int print_insn_h8300h		(bfd_vma, disassemble_info*);
-extern int print_insn_h8300s		(bfd_vma, disassemble_info*);
-extern int print_insn_h8500		(bfd_vma, disassemble_info*);
-extern int print_insn_alpha		(bfd_vma, disassemble_info*);
-extern disassembler_ftype arc_get_disassembler (int, int);
-extern int print_insn_arm		(bfd_vma, disassemble_info*);
-extern int print_insn_sparc		(bfd_vma, disassemble_info*);
-extern int print_insn_big_a29k		(bfd_vma, disassemble_info*);
-extern int print_insn_little_a29k	(bfd_vma, disassemble_info*);
-extern int print_insn_i960		(bfd_vma, disassemble_info*);
-extern int print_insn_sh		(bfd_vma, disassemble_info*);
-extern int print_insn_shl		(bfd_vma, disassemble_info*);
-extern int print_insn_hppa		(bfd_vma, disassemble_info*);
-extern int print_insn_m32r		(bfd_vma, disassemble_info*);
-extern int print_insn_m88k		(bfd_vma, disassemble_info*);
-extern int print_insn_mn10200		(bfd_vma, disassemble_info*);
-extern int print_insn_mn10300		(bfd_vma, disassemble_info*);
-extern int print_insn_ns32k		(bfd_vma, disassemble_info*);
-extern int print_insn_big_powerpc	(bfd_vma, disassemble_info*);
-extern int print_insn_little_powerpc	(bfd_vma, disassemble_info*);
-extern int print_insn_rs6000		(bfd_vma, disassemble_info*);
-extern int print_insn_w65		(bfd_vma, disassemble_info*);
-extern int print_insn_d10v		(bfd_vma, disassemble_info*);
-extern int print_insn_v850		(bfd_vma, disassemble_info*);
-extern int print_insn_tic30		(bfd_vma, disassemble_info*);
-extern int print_insn_ppc		(bfd_vma, disassemble_info*);
-extern int print_insn_s390		(bfd_vma, disassemble_info*);
-extern int print_insn_crisv32           (bfd_vma, disassemble_info*);
-extern int print_insn_microblaze        (bfd_vma, disassemble_info*);
-extern int print_insn_ia64              (bfd_vma, disassemble_info*);
+int print_insn_big_mips         (bfd_vma, disassemble_info*);
+int print_insn_little_mips      (bfd_vma, disassemble_info*);
+int print_insn_i386             (bfd_vma, disassemble_info*);
+int print_insn_m68k             (bfd_vma, disassemble_info*);
+int print_insn_z8001            (bfd_vma, disassemble_info*);
+int print_insn_z8002            (bfd_vma, disassemble_info*);
+int print_insn_h8300            (bfd_vma, disassemble_info*);
+int print_insn_h8300h           (bfd_vma, disassemble_info*);
+int print_insn_h8300s           (bfd_vma, disassemble_info*);
+int print_insn_h8500            (bfd_vma, disassemble_info*);
+int print_insn_alpha            (bfd_vma, disassemble_info*);
+disassembler_ftype arc_get_disassembler (int, int);
+int print_insn_arm              (bfd_vma, disassemble_info*);
+int print_insn_sparc            (bfd_vma, disassemble_info*);
+int print_insn_big_a29k         (bfd_vma, disassemble_info*);
+int print_insn_little_a29k      (bfd_vma, disassemble_info*);
+int print_insn_i960             (bfd_vma, disassemble_info*);
+int print_insn_sh               (bfd_vma, disassemble_info*);
+int print_insn_shl              (bfd_vma, disassemble_info*);
+int print_insn_hppa             (bfd_vma, disassemble_info*);
+int print_insn_m32r             (bfd_vma, disassemble_info*);
+int print_insn_m88k             (bfd_vma, disassemble_info*);
+int print_insn_mn10200          (bfd_vma, disassemble_info*);
+int print_insn_mn10300          (bfd_vma, disassemble_info*);
+int print_insn_ns32k            (bfd_vma, disassemble_info*);
+int print_insn_big_powerpc      (bfd_vma, disassemble_info*);
+int print_insn_little_powerpc   (bfd_vma, disassemble_info*);
+int print_insn_rs6000           (bfd_vma, disassemble_info*);
+int print_insn_w65              (bfd_vma, disassemble_info*);
+int print_insn_d10v             (bfd_vma, disassemble_info*);
+int print_insn_v850             (bfd_vma, disassemble_info*);
+int print_insn_tic30            (bfd_vma, disassemble_info*);
+int print_insn_ppc              (bfd_vma, disassemble_info*);
+int print_insn_s390             (bfd_vma, disassemble_info*);
+int print_insn_crisv32          (bfd_vma, disassemble_info*);
+int print_insn_crisv10          (bfd_vma, disassemble_info*);
+int print_insn_microblaze       (bfd_vma, disassemble_info*);
+int print_insn_ia64             (bfd_vma, disassemble_info*);
 
 #if 0
 /* Fetch the disassembler for a given BFD, if that support is available.  */
-extern disassembler_ftype disassembler	(bfd *);
+disassembler_ftype disassembler(bfd *);
 #endif
 
 
@@ -417,21 +412,20 @@
 
 /* Here is a function which callers may wish to use for read_memory_func.
    It gets bytes from a buffer.  */
-extern int buffer_read_memory
-  (bfd_vma, bfd_byte *, int, struct disassemble_info *);
+int buffer_read_memory(bfd_vma, bfd_byte *, int, struct disassemble_info *);
 
 /* This function goes with buffer_read_memory.
    It prints a message using info->fprintf_func and info->stream.  */
-extern void perror_memory (int, bfd_vma, struct disassemble_info *);
+void perror_memory(int, bfd_vma, struct disassemble_info *);
 
 
 /* Just print the address in hex.  This is included for completeness even
    though both GDB and objdump provide their own (to print symbolic
    addresses).  */
-extern void generic_print_address (bfd_vma, struct disassemble_info *);
+void generic_print_address(bfd_vma, struct disassemble_info *);
 
 /* Always true.  */
-extern int generic_symbol_at_address (bfd_vma, struct disassemble_info *);
+int generic_symbol_at_address(bfd_vma, struct disassemble_info *);
 
 /* Macro to initialize a disassemble_info struct.  This should be called
    by all applications creating such a struct.  */
diff --git a/disas.c b/disas.c
index 475aa86..6007916 100644
--- a/disas.c
+++ b/disas.c
@@ -8,10 +8,6 @@
 #include "exec-all.h"
 #include "disas.h"
 
-#ifdef TARGET_I386
-#include "kvm.h"
-#endif
-
 /* Filled in by elfload.c.  Simplistic, but will do for now. */
 struct syminfo *syminfos = NULL;
 
@@ -37,10 +33,6 @@
                     int length,
                     struct disassemble_info *info)
 {
-#ifdef TARGET_I386
-    if (kvm_enabled())
-        cpu_synchronize_state(cpu_single_env, 0);
-#endif
     cpu_memory_rw_debug(cpu_single_env, memaddr, myaddr, length, 0);
     return 0;
 }
@@ -216,8 +208,16 @@
     disasm_info.mach = bfd_mach_alpha;
     print_insn = print_insn_alpha;
 #elif defined(TARGET_CRIS)
+    if (flags != 32) {
+        disasm_info.mach = bfd_mach_cris_v0_v10;
+        print_insn = print_insn_crisv10;
+    } else {
     disasm_info.mach = bfd_mach_cris_v32;
     print_insn = print_insn_crisv32;
+    }
+#elif defined(TARGET_S390X)
+    disasm_info.mach = bfd_mach_s390_64;
+    print_insn = print_insn_s390;
 #elif defined(TARGET_MICROBLAZE)
     disasm_info.mach = bfd_arch_microblaze;
     print_insn = print_insn_microblaze;
@@ -310,11 +310,6 @@
 #endif
     for (pc = (unsigned long)code; size > 0; pc += count, size -= count) {
 	fprintf(out, "0x%08lx:  ", pc);
-#ifdef __arm__
-        /* since data is included in the code, it is better to
-           display code data too */
-        fprintf(out, "%08x  ", (int)bfd_getl32((const bfd_byte *)pc));
-#endif
 	count = print_insn(pc, &disasm_info);
 	fprintf(out, "\n");
 	if (count < 0)
@@ -350,14 +345,15 @@
                      struct disassemble_info *info)
 {
     if (monitor_disas_is_physical) {
-        cpu_physical_memory_rw(memaddr, myaddr, length, 0);
+        cpu_physical_memory_read(memaddr, myaddr, length);
     } else {
         cpu_memory_rw_debug(monitor_disas_env, memaddr,myaddr, length, 0);
     }
     return 0;
 }
 
-static int monitor_fprintf(FILE *stream, const char *fmt, ...)
+static int GCC_FMT_ATTR(2, 3)
+monitor_fprintf(FILE *stream, const char *fmt, ...)
 {
     va_list ap;
     va_start(ap, fmt);
@@ -421,6 +417,9 @@
 #elif defined(TARGET_SH4)
     disasm_info.mach = bfd_mach_sh4;
     print_insn = print_insn_sh;
+#elif defined(TARGET_S390X)
+    disasm_info.mach = bfd_mach_s390_64;
+    print_insn = print_insn_s390;
 #else
     monitor_printf(mon, "0x" TARGET_FMT_lx
                    ": Asm output not supported on this arch\n", pc);
diff --git a/disas.h b/disas.h
index 6a9332d..f9287f7 100644
--- a/disas.h
+++ b/disas.h
@@ -8,11 +8,8 @@
 void disas(FILE *out, void *code, unsigned long size);
 void target_disas(FILE *out, target_ulong code, target_ulong size, int flags);
 
-/* The usual mess... FIXME: Remove this condition once dyngen-exec.h is gone */
-#ifndef __DYNGEN_EXEC_H__
 void monitor_disas(Monitor *mon, CPUState *env,
                    target_ulong pc, int nb_insn, int is_physical, int flags);
-#endif
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
 const char *lookup_symbol(target_ulong orig_addr);
diff --git a/dyngen-exec.h b/dyngen-exec.h
index 283c286..54fe3d6 100644
--- a/dyngen-exec.h
+++ b/dyngen-exec.h
@@ -19,19 +19,7 @@
 #if !defined(__DYNGEN_EXEC_H__)
 #define __DYNGEN_EXEC_H__
 
-/* prevent Solaris from trying to typedef FILE in gcc's
-   include/floatingpoint.h which will conflict with the
-   definition down below */
-#ifdef __sun__
-#define _FILEDEFED
-#endif
-
-/* NOTE: standard headers should be used with special care at this
-   point because host CPU registers are used as global variables. Some
-   host headers do not allow that. */
-#include <stddef.h>
-#include <stdint.h>
-#include <stdbool.h>
+#include "qemu-common.h"
 
 #ifdef __OpenBSD__
 #include <sys/types.h>
@@ -40,15 +28,6 @@
 /* XXX: This may be wrong for 64-bit ILP32 hosts.  */
 typedef void * host_reg_t;
 
-#ifdef CONFIG_BSD
-typedef struct __sFILE FILE;
-#else
-typedef struct FILE FILE;
-#endif
-extern int fprintf(FILE *, const char *, ...);
-extern int fputs(const char *, FILE *);
-extern int printf(const char *, ...);
-
 #if defined(__i386__)
 #define AREG0 "ebp"
 #define AREG1 "ebx"
diff --git a/elf.h b/elf.h
index 7067c90..ffcac7e 100644
--- a/elf.h
+++ b/elf.h
@@ -104,6 +104,9 @@
 
 #define EM_H8_300H      47      /* Hitachi H8/300H */
 #define EM_H8S          48      /* Hitachi H8S     */
+#define EM_LATTICEMICO32 138    /* LatticeMico32 */
+
+#define EM_UNICORE32    110     /* UniCore32 */
 
 /*
  * This is an interim value that we will use until the committee comes
@@ -1191,6 +1194,25 @@
   Elf64_Word n_type;	/* Content type */
 } Elf64_Nhdr;
 
+
+/* This data structure represents a PT_LOAD segment.  */
+struct elf32_fdpic_loadseg {
+  /* Core address to which the segment is mapped.  */
+  Elf32_Addr addr;
+  /* VMA recorded in the program header.  */
+  Elf32_Addr p_vaddr;
+  /* Size of this segment in memory.  */
+  Elf32_Word p_memsz;
+};
+struct elf32_fdpic_loadmap {
+  /* Protocol version number, must be zero.  */
+  Elf32_Half version;
+  /* Number of segments in this map.  */
+  Elf32_Half nsegs;
+  /* The actual memory map.  */
+  struct elf32_fdpic_loadseg segs[/*nsegs*/];
+};
+
 #ifdef ELF_CLASS
 #if ELF_CLASS == ELFCLASS32
 
diff --git a/envlist.h b/envlist.h
index e76d4a1..b9addcc 100644
--- a/envlist.h
+++ b/envlist.h
@@ -7,13 +7,13 @@
 
 typedef struct envlist envlist_t;
 
-extern	envlist_t *envlist_create(void);
-extern	void envlist_free(envlist_t *);
-extern	int envlist_setenv(envlist_t *, const char *);
-extern	int envlist_unsetenv(envlist_t *, const char *);
-extern	int envlist_parse_set(envlist_t *, const char *);
-extern	int envlist_parse_unset(envlist_t *, const char *);
-extern	char **envlist_to_environ(const envlist_t *, size_t *);
+envlist_t *envlist_create(void);
+void envlist_free(envlist_t *);
+int envlist_setenv(envlist_t *, const char *);
+int envlist_unsetenv(envlist_t *, const char *);
+int envlist_parse_set(envlist_t *, const char *);
+int envlist_parse_unset(envlist_t *, const char *);
+char **envlist_to_environ(const envlist_t *, size_t *);
 
 #ifdef __cplusplus
 }
diff --git a/exec-all.h b/exec-all.h
index d2cadd4..91d1bda 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -25,6 +25,15 @@
 /* allow to see translation results - the slowdown should be negligible, so we leave it */
 #define DEBUG_DISAS
 
+/* Page tracking code uses ram addresses in system mode, and virtual
+   addresses in userspace mode.  Define tb_page_addr_t to be an appropriate
+   type.  */
+#if defined(CONFIG_USER_ONLY)
+typedef abi_ulong tb_page_addr_t;
+#else
+typedef ram_addr_t tb_page_addr_t;
+#endif
+
 /* is_jmp field values */
 #define DISAS_NEXT    0 /* next instruction can be analyzed */
 #define DISAS_JUMP    1 /* only pc was modified dynamically */
@@ -60,19 +69,14 @@
 
 void gen_intermediate_code(CPUState *env, struct TranslationBlock *tb);
 void gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb);
-void gen_pc_load(CPUState *env, struct TranslationBlock *tb,
-                 unsigned long searched_pc, int pc_pos, void *puc);
+void restore_state_to_opc(CPUState *env, struct TranslationBlock *tb, int pc_pos);
 
 unsigned long code_gen_max_block_size(void);
 void cpu_gen_init(void);
 int cpu_gen_code(CPUState *env, struct TranslationBlock *tb,
                  int *gen_code_size_ptr);
 int cpu_restore_state(struct TranslationBlock *tb,
-                      CPUState *env, unsigned long searched_pc,
-                      void *puc);
-int cpu_restore_state_copy(struct TranslationBlock *tb,
-                           CPUState *env, unsigned long searched_pc,
-                           void *puc);
+                      CPUState *env, unsigned long searched_pc);
 void cpu_resume_from_signal(CPUState *env1, void *puc);
 void cpu_io_recompile(CPUState *env, void *retaddr);
 TranslationBlock *tb_gen_code(CPUState *env,
@@ -187,7 +191,7 @@
 
 static inline unsigned int tb_phys_hash_func(unsigned long pc)
 {
-    return pc & (CODE_GEN_PHYS_HASH_SIZE - 1);
+    return (pc >> 2) & (CODE_GEN_PHYS_HASH_SIZE - 1);
 }
 
 #ifdef CONFIG_MEMCHECK
@@ -403,59 +407,8 @@
     }
     p = (void *)(unsigned long)addr
         + env1->tlb_table[mmu_idx][page_index].addend;
-    return qemu_ram_addr_from_host(p);
+    return qemu_ram_addr_from_host_nofail(p);
 }
-
-#if 0
-/* Deterministic execution requires that IO only be performed on the last
-   instruction of a TB so that interrupts take effect immediately.  */
-static inline int can_do_io(CPUState *env)
-{
-    if (!use_icount)
-        return 1;
-
-    /* If not executing code then assume we are ok.  */
-    if (!env->current_tb)
-        return 1;
-
-    return env->can_do_io != 0;
-}
-#endif
-#endif /* 0 */
-
-#ifdef CONFIG_KQEMU
-#define KQEMU_MODIFY_PAGE_MASK (0xff & ~(VGA_DIRTY_FLAG | CODE_DIRTY_FLAG))
-
-#define MSR_QPI_COMMBASE 0xfabe0010
-
-int kqemu_init(CPUState *env);
-int kqemu_cpu_exec(CPUState *env);
-void kqemu_flush_page(CPUState *env, target_ulong addr);
-void kqemu_flush(CPUState *env, int global);
-void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr);
-void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr);
-void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
-                        ram_addr_t phys_offset);
-void kqemu_cpu_interrupt(CPUState *env);
-void kqemu_record_dump(void);
-
-extern uint32_t kqemu_comm_base;
-
-extern ram_addr_t kqemu_phys_ram_size;
-extern uint8_t *kqemu_phys_ram_base;
-
-static inline int kqemu_is_ok(CPUState *env)
-{
-    return(env->kqemu_enabled &&
-           (env->cr[0] & CR0_PE_MASK) &&
-           !(env->hflags & HF_INHIBIT_IRQ_MASK) &&
-           (env->eflags & IF_MASK) &&
-           !(env->eflags & VM_MASK) &&
-           (env->kqemu_enabled == 2 ||
-            ((env->hflags & HF_CPL_MASK) == 3 &&
-             (env->eflags & IOPL_MASK) != IOPL_MASK)));
-}
-
 #endif
 
 typedef void (CPUDebugExcpHandler)(CPUState *env);
diff --git a/exec.c b/exec.c
index 003a4cc..f650a9e 100644
--- a/exec.c
+++ b/exec.c
@@ -115,21 +115,9 @@
 
 #if !defined(CONFIG_USER_ONLY)
 int phys_ram_fd;
-uint8_t *phys_ram_dirty;
 static int in_migration;
 
-typedef struct RAMBlock {
-    uint8_t *host;
-    ram_addr_t offset;
-    ram_addr_t length;
-    struct RAMBlock *next;
-} RAMBlock;
-
-static RAMBlock *ram_blocks;
-/* TODO: When we implement (and use) ram deallocation (e.g. for hotplug)
-   then we can no longer assume contiguous ram offsets, and external uses
-   of this variable will break.  */
-ram_addr_t last_ram_offset;
+RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
 #endif
 
 CPUState *first_cpu;
@@ -226,21 +214,21 @@
     DWORD old_protect;
     VirtualProtect(addr, size,
                    PAGE_EXECUTE_READWRITE, &old_protect);
-    
+
 }
 #else
 static void map_exec(void *addr, long size)
 {
     unsigned long start, end, page_size;
-    
+
     page_size = getpagesize();
     start = (unsigned long)addr;
     start &= ~(page_size - 1);
-    
+
     end = (unsigned long)addr + size;
     end += page_size - 1;
     end &= ~(page_size - 1);
-    
+
     mprotect((void *)start, end - start,
              PROT_READ | PROT_WRITE | PROT_EXEC);
 }
@@ -440,7 +428,7 @@
         code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
     /* The code gen buffer location may have constraints depending on
        the host cpu and OS */
-#if defined(__linux__) 
+#if defined(__linux__)
     {
         int flags;
         void *start = NULL;
@@ -479,7 +467,8 @@
             exit(1);
         }
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__)
     {
         int flags;
         void *addr = NULL;
@@ -492,9 +481,16 @@
         /* Cannot map more than that */
         if (code_gen_buffer_size > (800 * 1024 * 1024))
             code_gen_buffer_size = (800 * 1024 * 1024);
+#elif defined(__sparc_v9__)
+        // Map the buffer below 2G, so we can use direct calls and branches
+        flags |= MAP_FIXED;
+        addr = (void *) 0x60000000UL;
+        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
+            code_gen_buffer_size = (512 * 1024 * 1024);
+        }
 #endif
         code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
+                               PROT_WRITE | PROT_READ | PROT_EXEC,
                                flags, -1, 0);
         if (code_gen_buffer == MAP_FAILED) {
             fprintf(stderr, "Could not allocate dynamic translator buffer\n");
@@ -525,6 +521,11 @@
 #if !defined(CONFIG_USER_ONLY)
     io_mem_init();
 #endif
+#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
+    /* There's no guest base to take into account, so go ahead and
+       initialize the prologue now.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
 }
 
 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
@@ -1005,8 +1006,7 @@
                 restore the CPU state */
 
                 current_tb_modified = 1;
-                cpu_restore_state(current_tb, env,
-                                  env->mem_io_pc, NULL);
+                cpu_restore_state(current_tb, env,  env->mem_io_pc);
                 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                      &current_flags);
             }
@@ -1114,7 +1114,7 @@
                    restore the CPU state */
 
             current_tb_modified = 1;
-            cpu_restore_state(current_tb, env, pc, puc);
+            cpu_restore_state(current_tb, env, pc);
             cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                  &current_flags);
         }
@@ -1565,14 +1565,15 @@
     TranslationBlock *tb;
     static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
 
+    spin_lock(&interrupt_lock);
     tb = env->current_tb;
     /* if the cpu is currently executing code, we must unlink it and
        all the potentially executing TB */
-    if (tb && !testandset(&interrupt_lock)) {
+    if (tb) {
         env->current_tb = NULL;
         tb_reset_jump_recursive(tb);
-        resetlock(&interrupt_lock);
     }
+    spin_unlock(&interrupt_lock);
 }
 
 /* mask must never be zero, except for A20 change call */
@@ -1773,11 +1774,11 @@
     /* Discard jump cache entries for any tb which might potentially
        overlap the flushed page.  */
     i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
-    memset (&env->tb_jmp_cache[i], 0, 
+    memset (&env->tb_jmp_cache[i], 0,
             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 
     i = tb_jmp_cache_hash_page(addr);
-    memset (&env->tb_jmp_cache[i], 0, 
+    memset (&env->tb_jmp_cache[i], 0,
             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
 }
 
@@ -1861,7 +1862,7 @@
 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
                                     target_ulong vaddr)
 {
-    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] |= CODE_DIRTY_FLAG;
+    cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
 }
 
 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
@@ -1882,8 +1883,7 @@
 {
     CPUState *env;
     unsigned long length, start1;
-    int i, mask, len;
-    uint8_t *p;
+    int i;
 
     start &= TARGET_PAGE_MASK;
     end = TARGET_PAGE_ALIGN(end);
@@ -1891,18 +1891,14 @@
     length = end - start;
     if (length == 0)
         return;
-    len = length >> TARGET_PAGE_BITS;
-    mask = ~dirty_flags;
-    p = phys_ram_dirty + (start >> TARGET_PAGE_BITS);
-    for(i = 0; i < len; i++)
-        p[i] &= mask;
+    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
 
     /* we modify the TLB cache so that the dirty bit will be set again
        when accessing the range */
-    start1 = (unsigned long)qemu_get_ram_ptr(start);
+    start1 = (unsigned long)qemu_safe_ram_ptr(start);
     /* Chek that we don't span multiple blocks - this breaks the
        address comparisons below.  */
-    if ((unsigned long)qemu_get_ram_ptr(end - 1) - start1
+    if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
             != (end - 1) - start) {
         abort();
     }
@@ -1949,7 +1945,7 @@
     if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
         p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
             + tlb_entry->addend);
-        ram_addr = qemu_ram_addr_from_host(p);
+        ram_addr = qemu_ram_addr_from_host_nofail(p);
         if (!cpu_physical_memory_is_dirty(ram_addr)) {
             tlb_entry->addr_write |= TLB_NOTDIRTY;
         }
@@ -2369,16 +2365,17 @@
    start_addr and region_offset are rounded down to a page boundary
    before calculating this offset.  This should not be a problem unless
    the low bits of start_addr and region_offset differ.  */
-void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
+void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
                                          ram_addr_t size,
                                          ram_addr_t phys_offset,
-                                         ram_addr_t region_offset)
+                                         ram_addr_t region_offset,
+                                         bool log_dirty)
 {
     target_phys_addr_t addr, end_addr;
     PhysPageDesc *p;
     CPUState *env;
     ram_addr_t orig_size = size;
-    void *subpage;
+    subpage_t *subpage;
 
     if (kvm_enabled())
         kvm_set_phys_mem(start_addr, size, phys_offset);
@@ -2389,7 +2386,9 @@
     region_offset &= TARGET_PAGE_MASK;
     size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
     end_addr = start_addr + (target_phys_addr_t)size;
-    for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {
+
+    addr = start_addr;
+    do {
         p = phys_page_find(addr >> TARGET_PAGE_BITS);
         if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
             ram_addr_t orig_memory = p->phys_offset;
@@ -2398,7 +2397,7 @@
 
             CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
                           need_subpage);
-            if (need_subpage || phys_offset & IO_MEM_SUBWIDTH) {
+            if (need_subpage) {
                 if (!(orig_memory & IO_MEM_SUBPAGE)) {
                     subpage = subpage_init((addr & TARGET_PAGE_MASK),
                                            &p->phys_offset, orig_memory,
@@ -2430,7 +2429,7 @@
                 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
                               end_addr2, need_subpage);
 
-                if (need_subpage || phys_offset & IO_MEM_SUBWIDTH) {
+                if (need_subpage) {
                     subpage = subpage_init((addr & TARGET_PAGE_MASK),
                                            &p->phys_offset, IO_MEM_UNASSIGNED,
                                            addr & TARGET_PAGE_MASK);
@@ -2441,7 +2440,8 @@
             }
         }
         region_offset += TARGET_PAGE_SIZE;
-    }
+        addr += TARGET_PAGE_SIZE;
+    } while (addr != end_addr);
 
     /* since each CPU stores ram addresses in its TLB cache, we must
        reset the modified entries */
@@ -2474,47 +2474,217 @@
         kvm_uncoalesce_mmio_region(addr, size);
 }
 
-ram_addr_t qemu_ram_alloc(ram_addr_t size)
+static ram_addr_t find_ram_offset(ram_addr_t size)
 {
-    RAMBlock *new_block;
+    RAMBlock *block, *next_block;
+    ram_addr_t offset = 0, mingap = ULONG_MAX;
+
+    if (QLIST_EMPTY(&ram_list.blocks))
+        return 0;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        ram_addr_t end, next = ULONG_MAX;
+
+        end = block->offset + block->length;
+
+        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
+            if (next_block->offset >= end) {
+                next = MIN(next, next_block->offset);
+            }
+        }
+        if (next - end >= size && next - end < mingap) {
+            offset =  end;
+            mingap = next - end;
+        }
+    }
+    return offset;
+}
+
+static ram_addr_t last_ram_offset(void)
+{
+    RAMBlock *block;
+    ram_addr_t last = 0;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next)
+        last = MAX(last, block->offset + block->length);
+
+    return last;
+}
+
+ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
+                                   ram_addr_t size, void *host)
+{
+    RAMBlock *new_block, *block;
 
     size = TARGET_PAGE_ALIGN(size);
-    new_block = qemu_malloc(sizeof(*new_block));
+    new_block = qemu_mallocz(sizeof(*new_block));
 
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
-    /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
-    new_block->host = mmap((void*)0x1000000, size, PROT_EXEC|PROT_READ|PROT_WRITE,
-                           MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+#if 0
+    if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
+        char *id = dev->parent_bus->info->get_dev_path(dev);
+        if (id) {
+            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
+            qemu_free(id);
+        }
+    }
+#endif
+    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (!strcmp(block->idstr, new_block->idstr)) {
+            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
+                    new_block->idstr);
+            abort();
+        }
+    }
+
+    if (host) {
+        new_block->host = host;
+        new_block->flags |= RAM_PREALLOC_MASK;
+    } else {
+        if (mem_path) {
+#if 0 && defined (__linux__) && !defined(TARGET_S390X)
+            new_block->host = file_ram_alloc(new_block, size, mem_path);
+            if (!new_block->host) {
+                new_block->host = qemu_vmalloc(size);
+                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+            }
 #else
-    new_block->host = qemu_vmalloc(size);
+            fprintf(stderr, "-mem-path option unsupported\n");
+            exit(1);
+#endif
+        } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+            /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+            new_block->host = mmap((void*)0x1000000, size,
+                                   PROT_EXEC|PROT_READ|PROT_WRITE,
+                                   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+#else
+            new_block->host = qemu_vmalloc(size);
 #endif
 #ifdef MADV_MERGEABLE
-    madvise(new_block->host, size, MADV_MERGEABLE);
+            madvise(new_block->host, size, MADV_MERGEABLE);
 #endif
-    new_block->offset = last_ram_offset;
+        }
+    }
+
+    new_block->offset = find_ram_offset(size);
     new_block->length = size;
 
-    new_block->next = ram_blocks;
-    ram_blocks = new_block;
+    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
 
-    phys_ram_dirty = qemu_realloc(phys_ram_dirty,
-        (last_ram_offset + size) >> TARGET_PAGE_BITS);
-    memset(phys_ram_dirty + (last_ram_offset >> TARGET_PAGE_BITS),
+    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
+                                       last_ram_offset() >> TARGET_PAGE_BITS);
+    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
            0xff, size >> TARGET_PAGE_BITS);
 
-    last_ram_offset += size;
-
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
 
     return new_block->offset;
 }
 
+ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
+{
+    return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
+}
+
 void qemu_ram_free(ram_addr_t addr)
 {
-    /* TODO: implement this.  */
+    RAMBlock *block;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (addr == block->offset) {
+            QLIST_REMOVE(block, next);
+            if (block->flags & RAM_PREALLOC_MASK) {
+                ;
+            } else if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+                if (block->fd) {
+                    munmap(block->host, block->length);
+                    close(block->fd);
+                } else {
+                    qemu_vfree(block->host);
+                }
+#else
+                abort();
+#endif
+            } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+                munmap(block->host, block->length);
+#else
+                qemu_vfree(block->host);
+#endif
+            }
+            qemu_free(block);
+            return;
+        }
+    }
+
 }
 
+#ifndef _WIN32
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
+{
+#ifndef CONFIG_ANDROID
+    RAMBlock *block;
+    ram_addr_t offset;
+    int flags;
+    void *area, *vaddr;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        offset = addr - block->offset;
+        if (offset < block->length) {
+            vaddr = block->host + offset;
+            if (block->flags & RAM_PREALLOC_MASK) {
+                ;
+            } else {
+                flags = MAP_FIXED;
+                munmap(vaddr, length);
+                if (mem_path) {
+#if defined(__linux__) && !defined(TARGET_S390X)
+                    if (block->fd) {
+#ifdef MAP_POPULATE
+                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
+                            MAP_PRIVATE;
+#else
+                        flags |= MAP_PRIVATE;
+#endif
+                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                    flags, block->fd, offset);
+                    } else {
+                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                    flags, -1, 0);
+                    }
+#else
+                    abort();
+#endif
+                } else {
+#if defined(TARGET_S390X) && defined(CONFIG_KVM)
+                    flags |= MAP_SHARED | MAP_ANONYMOUS;
+                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
+                                flags, -1, 0);
+#else
+                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
+                                flags, -1, 0);
+#endif
+                }
+                if (area != vaddr) {
+                    fprintf(stderr, "Could not remap addr: %lx@%lx\n",
+                            length, addr);
+                    exit(1);
+                }
+                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
+            }
+            return;
+        }
+    }
+#endif /* !CONFIG_ANDROID */
+}
+#endif /* !_WIN32 */
+
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
    With the exception of the softmmu code in this file, this should
    only be used for local memory (e.g. video ram) that the device owns,
@@ -2525,57 +2695,69 @@
  */
 void *qemu_get_ram_ptr(ram_addr_t addr)
 {
-    RAMBlock *prev;
-    RAMBlock **prevp;
     RAMBlock *block;
 
-    prev = NULL;
-    prevp = &ram_blocks;
-    block = ram_blocks;
-    while (block && (block->offset > addr
-                     || block->offset + block->length <= addr)) {
-        if (prev)
-          prevp = &prev->next;
-        prev = block;
-        block = block->next;
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (addr - block->offset < block->length) {
+            /* Move this entry to to start of the list.  */
+            if (block != QLIST_FIRST(&ram_list.blocks)) {
+                QLIST_REMOVE(block, next);
+                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
     }
-    if (!block) {
+            return block->host + (addr - block->offset);
+        }
+    }
+
         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
         abort();
+
+    return NULL;
     }
-    /* Move this entry to to start of the list.  */
-    if (prev) {
-        prev->next = block->next;
-        block->next = *prevp;
-        *prevp = block;
-    }
+
+/* Return a host pointer to ram allocated with qemu_ram_alloc.
+ * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
+ */
+void *qemu_safe_ram_ptr(ram_addr_t addr)
+{
+    RAMBlock *block;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (addr - block->offset < block->length) {
     return block->host + (addr - block->offset);
 }
+    }
+
+    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
+    abort();
+
+    return NULL;
+}
+
+int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
+{
+    RAMBlock *block;
+    uint8_t *host = ptr;
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        if (host - block->host < block->length) {
+            *ram_addr = block->offset + (host - block->host);
+            return 0;
+    }
+    }
+    return -1;
+}
 
 /* Some of the softmmu routines need to translate from a host pointer
    (typically a TLB entry) back to a ram offset.  */
-ram_addr_t qemu_ram_addr_from_host(void *ptr)
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
 {
-    RAMBlock *prev;
-    RAMBlock **prevp;
-    RAMBlock *block;
-    uint8_t *host = ptr;
+    ram_addr_t ram_addr;
 
-    prev = NULL;
-    prevp = &ram_blocks;
-    block = ram_blocks;
-    while (block && (block->host > host
-                     || block->host + block->length <= host)) {
-        if (prev)
-          prevp = &prev->next;
-        prev = block;
-        block = block->next;
-    }
-    if (!block) {
+    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
         fprintf(stderr, "Bad ram pointer %p\n", ptr);
         abort();
     }
-    return block->offset + (host - block->host);
+    return ram_addr;
 }
 
 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
@@ -2657,16 +2839,16 @@
                                 uint32_t val)
 {
     int dirty_flags;
-    dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
         tb_invalidate_phys_page_fast(ram_addr, 1);
-        dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
     }
     stb_p(qemu_get_ram_ptr(ram_addr), val);
     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] = dirty_flags;
+    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
     /* we remove the notdirty callback only if the code has been
        flushed */
     if (dirty_flags == 0xff)
@@ -2677,16 +2859,16 @@
                                 uint32_t val)
 {
     int dirty_flags;
-    dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
         tb_invalidate_phys_page_fast(ram_addr, 2);
-        dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
     }
     stw_p(qemu_get_ram_ptr(ram_addr), val);
     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] = dirty_flags;
+    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
     /* we remove the notdirty callback only if the code has been
        flushed */
     if (dirty_flags == 0xff)
@@ -2697,16 +2879,16 @@
                                 uint32_t val)
 {
     int dirty_flags;
-    dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
         tb_invalidate_phys_page_fast(ram_addr, 4);
-        dirty_flags = phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS];
+        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
     }
     stl_p(qemu_get_ram_ptr(ram_addr), val);
     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] = dirty_flags;
+    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
     /* we remove the notdirty callback only if the code has been
        flushed */
     if (dirty_flags == 0xff)
@@ -2754,7 +2936,7 @@
                     cpu_abort(env, "check_watchpoint: could not find TB for "
                               "pc=%p", (void *)env->mem_io_pc);
                 }
-                cpu_restore_state(tb, env, env->mem_io_pc, NULL);
+                cpu_restore_state(tb, env, env->mem_io_pc);
                 tb_phys_invalidate(tb, -1);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     env->exception_index = EXCP_DEBUG;
@@ -3158,8 +3340,8 @@
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
                     /* set dirty bit */
-                    phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
-                        (0xff & ~CODE_DIRTY_FLAG);
+                    cpu_physical_memory_set_dirty_flags(
+                        addr1, (0xff & ~CODE_DIRTY_FLAG));
                 }
             }
         } else {
@@ -3355,7 +3537,7 @@
 {
     if (buffer != bounce.buffer) {
         if (is_write) {
-            ram_addr_t addr1 = qemu_ram_addr_from_host(buffer);
+            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
             while (access_len) {
                 unsigned l;
                 l = TARGET_PAGE_SIZE;
@@ -3365,8 +3547,8 @@
                     /* invalidate code */
                     tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
                     /* set dirty bit */
-                    phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
-                        (0xff & ~CODE_DIRTY_FLAG);
+                    cpu_physical_memory_set_dirty_flags(
+                        addr1, (0xff & ~CODE_DIRTY_FLAG));
                 }
                 addr1 += l;
                 access_len -= l;
@@ -3377,7 +3559,7 @@
     if (is_write) {
         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
     }
-    qemu_free(bounce.buffer);
+    qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
     cpu_notify_map_clients();
 }
@@ -3500,8 +3682,8 @@
                 /* invalidate code */
                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                 /* set dirty bit */
-                phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
-                    (0xff & ~CODE_DIRTY_FLAG);
+                cpu_physical_memory_set_dirty_flags(
+                    addr1, (0xff & ~CODE_DIRTY_FLAG));
             }
         }
     }
@@ -3569,8 +3751,8 @@
             /* invalidate code */
             tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
             /* set dirty bit */
-            phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
-                (0xff & ~CODE_DIRTY_FLAG);
+            cpu_physical_memory_set_dirty_flags(addr1,
+                (0xff & ~CODE_DIRTY_FLAG));
         }
     }
 }
@@ -3640,11 +3822,11 @@
 
     tb = tb_find_pc((unsigned long)retaddr);
     if (!tb) {
-        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
+        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
                   retaddr);
     }
     n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
+    cpu_restore_state(tb, env, (unsigned long)retaddr);
     /* Calculate how many instructions had been executed before the fault
        occurred.  */
     n = n - env->icount_decr.u16.low;
@@ -3690,8 +3872,7 @@
 
 #if !defined(CONFIG_USER_ONLY)
 
-void dump_exec_info(FILE *f,
-                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
 {
     int i, target_code_size, max_target_code_size;
     int direct_jmp_count, direct_jmp2_count, cross_page;
@@ -3718,14 +3899,14 @@
     }
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %ld/%ld\n",
+    cpu_fprintf(f, "gen code size       %td/%ld\n",
                 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
-    cpu_fprintf(f, "TB count            %d/%d\n", 
+    cpu_fprintf(f, "TB count            %d/%d\n",
                 nb_tbs, code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
                 nb_tbs ? target_code_size / nb_tbs : 0,
                 max_target_code_size);
-    cpu_fprintf(f, "TB avg host size    %d bytes (expansion ratio: %0.1f)\n",
+    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
                 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
                 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
     cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
diff --git a/fpu/softfloat-macros.h b/fpu/softfloat-macros.h
index 7838228..e82ce23 100644
--- a/fpu/softfloat-macros.h
+++ b/fpu/softfloat-macros.h
@@ -1,3 +1,8 @@
+/*
+ * QEMU float support macros
+ *
+ * Derived from SoftFloat.
+ */
 
 /*============================================================================
 
@@ -31,6 +36,17 @@
 =============================================================================*/
 
 /*----------------------------------------------------------------------------
+| This macro tests for minimum version of the GNU C compiler.
+*----------------------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
+         ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
+#endif
+
+
+/*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
 | bits are shifted off, they are ``jammed'' into the least significant bit of
 | the result by setting the least significant bit to 1.  The value of `count'
@@ -39,9 +55,9 @@
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/
 
-INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+INLINE void shift32RightJamming( uint32_t a, int16 count, uint32_t *zPtr )
 {
-    bits32 z;
+    uint32_t z;
 
     if ( count == 0 ) {
         z = a;
@@ -65,9 +81,9 @@
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/
 
-INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+INLINE void shift64RightJamming( uint64_t a, int16 count, uint64_t *zPtr )
 {
-    bits64 z;
+    uint64_t z;
 
     if ( count == 0 ) {
         z = a;
@@ -101,9 +117,9 @@
 
 INLINE void
  shift64ExtraRightJamming(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z0, z1;
+    uint64_t z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -138,9 +154,9 @@
 
 INLINE void
  shift128Right(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z0, z1;
+    uint64_t z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -173,9 +189,9 @@
 
 INLINE void
  shift128RightJamming(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z0, z1;
+    uint64_t z0, z1;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -224,16 +240,16 @@
 
 INLINE void
  shift128ExtraRightJamming(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
      int16 count,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 negCount = ( - count ) & 63;
 
     if ( count == 0 ) {
@@ -282,7 +298,7 @@
 
 INLINE void
  shortShift128Left(
-     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, int16 count, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
 
     *z1Ptr = a1<<count;
@@ -301,16 +317,16 @@
 
 INLINE void
  shortShift192Left(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
      int16 count,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 negCount;
 
     z2 = a2<<count;
@@ -336,9 +352,9 @@
 
 INLINE void
  add128(
-     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits64 z1;
+    uint64_t z1;
 
     z1 = a1 + b1;
     *z1Ptr = z1;
@@ -356,18 +372,18 @@
 
 INLINE void
  add192(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
-     bits64 b0,
-     bits64 b1,
-     bits64 b2,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t b2,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 carry0, carry1;
 
     z2 = a2 + b2;
@@ -394,7 +410,7 @@
 
 INLINE void
  sub128(
-     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
 
     *z1Ptr = a1 - b1;
@@ -412,18 +428,18 @@
 
 INLINE void
  sub192(
-     bits64 a0,
-     bits64 a1,
-     bits64 a2,
-     bits64 b0,
-     bits64 b1,
-     bits64 b2,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t b2,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2;
+    uint64_t z0, z1, z2;
     int8 borrow0, borrow1;
 
     z2 = a2 - b2;
@@ -446,21 +462,21 @@
 | `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
 {
-    bits32 aHigh, aLow, bHigh, bLow;
-    bits64 z0, zMiddleA, zMiddleB, z1;
+    uint32_t aHigh, aLow, bHigh, bLow;
+    uint64_t z0, zMiddleA, zMiddleB, z1;
 
     aLow = a;
     aHigh = a>>32;
     bLow = b;
     bHigh = b>>32;
-    z1 = ( (bits64) aLow ) * bLow;
-    zMiddleA = ( (bits64) aLow ) * bHigh;
-    zMiddleB = ( (bits64) aHigh ) * bLow;
-    z0 = ( (bits64) aHigh ) * bHigh;
+    z1 = ( (uint64_t) aLow ) * bLow;
+    zMiddleA = ( (uint64_t) aLow ) * bHigh;
+    zMiddleB = ( (uint64_t) aHigh ) * bLow;
+    z0 = ( (uint64_t) aHigh ) * bHigh;
     zMiddleA += zMiddleB;
-    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
     zMiddleA <<= 32;
     z1 += zMiddleA;
     z0 += ( z1 < zMiddleA );
@@ -478,15 +494,15 @@
 
 INLINE void
  mul128By64To192(
-     bits64 a0,
-     bits64 a1,
-     bits64 b,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t b,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
  )
 {
-    bits64 z0, z1, z2, more1;
+    uint64_t z0, z1, z2, more1;
 
     mul64To128( a1, b, &z1, &z2 );
     mul64To128( a0, b, &z0, &more1 );
@@ -506,18 +522,18 @@
 
 INLINE void
  mul128To256(
-     bits64 a0,
-     bits64 a1,
-     bits64 b0,
-     bits64 b1,
-     bits64 *z0Ptr,
-     bits64 *z1Ptr,
-     bits64 *z2Ptr,
-     bits64 *z3Ptr
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr,
+     uint64_t *z3Ptr
  )
 {
-    bits64 z0, z1, z2, z3;
-    bits64 more1, more2;
+    uint64_t z0, z1, z2, z3;
+    uint64_t more1, more2;
 
     mul64To128( a1, b1, &z2, &z3 );
     mul64To128( a1, b0, &z1, &more2 );
@@ -543,18 +559,18 @@
 | unsigned integer is returned.
 *----------------------------------------------------------------------------*/
 
-static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
 {
-    bits64 b0, b1;
-    bits64 rem0, rem1, term0, term1;
-    bits64 z;
+    uint64_t b0, b1;
+    uint64_t rem0, rem1, term0, term1;
+    uint64_t z;
 
     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
     b0 = b>>32;
     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
     mul64To128( b, z, &term0, &term1 );
     sub128( a0, a1, term0, term1, &rem0, &rem1 );
-    while ( ( (sbits64) rem0 ) < 0 ) {
+    while ( ( (int64_t) rem0 ) < 0 ) {
         z -= LIT64( 0x100000000 );
         b1 = b<<32;
         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
@@ -575,18 +591,18 @@
 | value.
 *----------------------------------------------------------------------------*/
 
-static bits32 estimateSqrt32( int16 aExp, bits32 a )
+static uint32_t estimateSqrt32( int16 aExp, uint32_t a )
 {
-    static const bits16 sqrtOddAdjustments[] = {
+    static const uint16_t sqrtOddAdjustments[] = {
         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
     };
-    static const bits16 sqrtEvenAdjustments[] = {
+    static const uint16_t sqrtEvenAdjustments[] = {
         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
     };
     int8 index;
-    bits32 z;
+    uint32_t z;
 
     index = ( a>>27 ) & 15;
     if ( aExp & 1 ) {
@@ -598,9 +614,9 @@
         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
         z = a / z + z;
         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
-        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+        if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
     }
-    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+    return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
 
 }
 
@@ -609,8 +625,15 @@
 | `a'.  If `a' is zero, 32 is returned.
 *----------------------------------------------------------------------------*/
 
-static int8 countLeadingZeros32( bits32 a )
+static int8 countLeadingZeros32( uint32_t a )
 {
+#if SOFTFLOAT_GNUC_PREREQ(3, 4)
+    if (a) {
+        return __builtin_clz(a);
+    } else {
+        return 32;
+    }
+#else
     static const int8 countLeadingZerosHigh[] = {
         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -642,7 +665,7 @@
     }
     shiftCount += countLeadingZerosHigh[ a>>24 ];
     return shiftCount;
-
+#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -650,12 +673,19 @@
 | `a'.  If `a' is zero, 64 is returned.
 *----------------------------------------------------------------------------*/
 
-static int8 countLeadingZeros64( bits64 a )
+static int8 countLeadingZeros64( uint64_t a )
 {
+#if SOFTFLOAT_GNUC_PREREQ(3, 4)
+    if (a) {
+        return __builtin_clzll(a);
+    } else {
+        return 64;
+    }
+#else
     int8 shiftCount;
 
     shiftCount = 0;
-    if ( a < ( (bits64) 1 )<<32 ) {
+    if ( a < ( (uint64_t) 1 )<<32 ) {
         shiftCount += 32;
     }
     else {
@@ -663,7 +693,7 @@
     }
     shiftCount += countLeadingZeros32( a );
     return shiftCount;
-
+#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -672,7 +702,7 @@
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 == b0 ) && ( a1 == b1 );
@@ -685,7 +715,7 @@
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
@@ -698,7 +728,7 @@
 | returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
@@ -711,7 +741,7 @@
 | Otherwise, returns 0.
 *----------------------------------------------------------------------------*/
 
-INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
 {
 
     return ( a0 != b0 ) || ( a1 != b1 );
diff --git a/fpu/softfloat-native.c b/fpu/softfloat-native.c
index 049c830..8848651 100644
--- a/fpu/softfloat-native.c
+++ b/fpu/softfloat-native.c
@@ -254,7 +254,7 @@
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 }
 
-int float32_is_nan( float32 a1 )
+int float32_is_quiet_nan( float32 a1 )
 {
     float32u u;
     uint64_t a;
@@ -263,6 +263,15 @@
     return ( 0xFF800000 < ( a<<1 ) );
 }
 
+int float32_is_any_nan( float32 a1 )
+{
+    float32u u;
+    uint32_t a;
+    u.f = a1;
+    a = u.i;
+    return (a & ~(1 << 31)) > 0x7f800000U;
+}
+
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE double-precision conversion routines.
 *----------------------------------------------------------------------------*/
@@ -411,17 +420,27 @@
 
 }
 
-int float64_is_nan( float64 a1 )
+int float64_is_quiet_nan( float64 a1 )
 {
     float64u u;
     uint64_t a;
     u.f = a1;
     a = u.i;
 
-    return ( LIT64( 0xFFF0000000000000 ) < (bits64) ( a<<1 ) );
+    return ( LIT64( 0xFFF0000000000000 ) < (uint64_t) ( a<<1 ) );
 
 }
 
+int float64_is_any_nan( float64 a1 )
+{
+    float64u u;
+    uint64_t a;
+    u.f = a1;
+    a = u.i;
+
+    return (a & ~(1ULL << 63)) > LIT64 (0x7FF0000000000000 );
+}
+
 #ifdef FLOATX80
 
 /*----------------------------------------------------------------------------
@@ -500,15 +519,22 @@
     aLow = u.i.low & ~ LIT64( 0x4000000000000000 );
     return
            ( ( u.i.high & 0x7FFF ) == 0x7FFF )
-        && (bits64) ( aLow<<1 )
+        && (uint64_t) ( aLow<<1 )
         && ( u.i.low == aLow );
 }
 
-int floatx80_is_nan( floatx80 a1 )
+int floatx80_is_quiet_nan( floatx80 a1 )
 {
     floatx80u u;
     u.f = a1;
-    return ( ( u.i.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( u.i.low<<1 );
+    return ( ( u.i.high & 0x7FFF ) == 0x7FFF ) && (uint64_t) ( u.i.low<<1 );
+}
+
+int floatx80_is_any_nan( floatx80 a1 )
+{
+    floatx80u u;
+    u.f = a1;
+    return ((u.i.high & 0x7FFF) == 0x7FFF) && ( u.i.low<<1 );
 }
 
 #endif
diff --git a/fpu/softfloat-native.h b/fpu/softfloat-native.h
index 6da0bcb..6afb74a 100644
--- a/fpu/softfloat-native.h
+++ b/fpu/softfloat-native.h
@@ -172,6 +172,15 @@
 #endif
 
 /*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion constants.
+*----------------------------------------------------------------------------*/
+#define float32_zero (0.0)
+#define float32_one (1.0)
+#define float32_ln2 (0.6931471)
+#define float32_pi (3.1415926)
+#define float32_half (0.5)
+
+/*----------------------------------------------------------------------------
 | Software IEC/IEEE single-precision conversion routines.
 *----------------------------------------------------------------------------*/
 int float32_to_int32( float32  STATUS_PARAM);
@@ -210,7 +219,7 @@
 }
 float32 float32_rem( float32, float32  STATUS_PARAM);
 float32 float32_sqrt( float32  STATUS_PARAM);
-INLINE int float32_eq( float32 a, float32 b STATUS_PARAM)
+INLINE int float32_eq_quiet( float32 a, float32 b STATUS_PARAM)
 {
     return a == b;
 }
@@ -222,7 +231,7 @@
 {
     return a < b;
 }
-INLINE int float32_eq_signaling( float32 a, float32 b STATUS_PARAM)
+INLINE int float32_eq( float32 a, float32 b STATUS_PARAM)
 {
     return a <= b && a >= b;
 }
@@ -237,12 +246,16 @@
 INLINE int float32_unordered( float32 a, float32 b STATUS_PARAM)
 {
     return isunordered(a, b);
-
+}
+INLINE int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM)
+{
+    return isunordered(a, b);
 }
 int float32_compare( float32, float32 STATUS_PARAM );
 int float32_compare_quiet( float32, float32 STATUS_PARAM );
 int float32_is_signaling_nan( float32 );
-int float32_is_nan( float32 );
+int float32_is_quiet_nan( float32 );
+int float32_is_any_nan( float32 );
 
 INLINE float32 float32_abs(float32 a)
 {
@@ -271,12 +284,21 @@
     return fpclassify(a) == FP_ZERO;
 }
 
-INLINE float32 float32_scalbn(float32 a, int n)
+INLINE float32 float32_scalbn(float32 a, int n STATUS_PARAM)
 {
     return scalbnf(a, n);
 }
 
 /*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion constants.
+*----------------------------------------------------------------------------*/
+#define float64_zero (0.0)
+#define float64_one (1.0)
+#define float64_ln2 (0.693147180559945)
+#define float64_pi (3.141592653589793)
+#define float64_half (0.5)
+
+/*----------------------------------------------------------------------------
 | Software IEC/IEEE double-precision conversion routines.
 *----------------------------------------------------------------------------*/
 int float64_to_int32( float64 STATUS_PARAM );
@@ -318,7 +340,7 @@
 }
 float64 float64_rem( float64, float64 STATUS_PARAM );
 float64 float64_sqrt( float64 STATUS_PARAM );
-INLINE int float64_eq( float64 a, float64 b STATUS_PARAM)
+INLINE int float64_eq_quiet( float64 a, float64 b STATUS_PARAM)
 {
     return a == b;
 }
@@ -330,7 +352,7 @@
 {
     return a < b;
 }
-INLINE int float64_eq_signaling( float64 a, float64 b STATUS_PARAM)
+INLINE int float64_eq( float64 a, float64 b STATUS_PARAM)
 {
     return a <= b && a >= b;
 }
@@ -346,12 +368,16 @@
 INLINE int float64_unordered( float64 a, float64 b STATUS_PARAM)
 {
     return isunordered(a, b);
-
+}
+INLINE int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM)
+{
+    return isunordered(a, b);
 }
 int float64_compare( float64, float64 STATUS_PARAM );
 int float64_compare_quiet( float64, float64 STATUS_PARAM );
 int float64_is_signaling_nan( float64 );
-int float64_is_nan( float64 );
+int float64_is_any_nan( float64 );
+int float64_is_quiet_nan( float64 );
 
 INLINE float64 float64_abs(float64 a)
 {
@@ -380,7 +406,7 @@
     return fpclassify(a) == FP_ZERO;
 }
 
-INLINE float64 float64_scalbn(float64 a, int n)
+INLINE float64 float64_scalbn(float64 a, int n STATUS_PARAM)
 {
     return scalbn(a, n);
 }
@@ -388,6 +414,15 @@
 #ifdef FLOATX80
 
 /*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion constants.
+*----------------------------------------------------------------------------*/
+#define floatx80_zero (0.0L)
+#define floatx80_one (1.0L)
+#define floatx80_ln2 (0.69314718055994530943L)
+#define floatx80_pi (3.14159265358979323851L)
+#define floatx80_half (0.5L)
+
+/*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision conversion routines.
 *----------------------------------------------------------------------------*/
 int floatx80_to_int32( floatx80 STATUS_PARAM );
@@ -422,7 +457,7 @@
 }
 floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
 floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
-INLINE int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM)
+INLINE int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM)
 {
     return a == b;
 }
@@ -434,7 +469,7 @@
 {
     return a < b;
 }
-INLINE int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM)
+INLINE int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM)
 {
     return a <= b && a >= b;
 }
@@ -450,12 +485,16 @@
 INLINE int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM)
 {
     return isunordered(a, b);
-
+}
+INLINE int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM)
+{
+    return isunordered(a, b);
 }
 int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
 int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
 int floatx80_is_signaling_nan( floatx80 );
-int floatx80_is_nan( floatx80 );
+int floatx80_is_quiet_nan( floatx80 );
+int floatx80_is_any_nan( floatx80 );
 
 INLINE floatx80 floatx80_abs(floatx80 a)
 {
@@ -484,7 +523,7 @@
     return fpclassify(a) == FP_ZERO;
 }
 
-INLINE floatx80 floatx80_scalbn(floatx80 a, int n)
+INLINE floatx80 floatx80_scalbn(floatx80 a, int n STATUS_PARAM)
 {
     return scalbnl(a, n);
 }
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 8e6aceb..9d68aae 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -1,3 +1,8 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
 
 /*============================================================================
 
@@ -30,12 +35,6 @@
 
 =============================================================================*/
 
-#if defined(TARGET_MIPS) || defined(TARGET_HPPA)
-#define SNAN_BIT_IS_ONE		1
-#else
-#define SNAN_BIT_IS_ONE		0
-#endif
-
 /*----------------------------------------------------------------------------
 | Raises the exceptions specified by `flags'.  Floating-point traps can be
 | defined here if desired.  It is currently not possible for such a trap
@@ -53,36 +52,111 @@
 *----------------------------------------------------------------------------*/
 typedef struct {
     flag sign;
-    bits64 high, low;
+    uint64_t high, low;
 } commonNaNT;
 
 /*----------------------------------------------------------------------------
-| The pattern for a default generated single-precision NaN.
+| Returns 1 if the half-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
-#if defined(TARGET_SPARC)
-#define float32_default_nan make_float32(0x7FFFFFFF)
-#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
-#define float32_default_nan make_float32(0x7FC00000)
-#elif defined(TARGET_HPPA)
-#define float32_default_nan make_float32(0x7FA00000)
-#elif SNAN_BIT_IS_ONE
-#define float32_default_nan make_float32(0x7FBFFFFF)
+
+int float16_is_quiet_nan(float16 a_)
+{
+    uint16_t a = float16_val(a_);
+#if SNAN_BIT_IS_ONE
+    return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
 #else
-#define float32_default_nan make_float32(0xFFC00000)
+    return ((a & ~0x8000) >= 0x7c80);
 #endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float16_is_signaling_nan(float16 a_)
+{
+    uint16_t a = float16_val(a_);
+#if SNAN_BIT_IS_ONE
+    return ((a & ~0x8000) >= 0x7c80);
+#else
+    return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the half-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+float16 float16_maybe_silence_nan(float16 a_)
+{
+    if (float16_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        return float16_default_nan;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        uint16_t a = float16_val(a_);
+        a |= (1 << 9);
+        return make_float16(a);
+#endif
+    }
+    return a_;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float16ToCommonNaN( float16 a STATUS_PARAM )
+{
+    commonNaNT z;
+
+    if ( float16_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
+    z.sign = float16_val(a) >> 15;
+    z.low = 0;
+    z.high = ((uint64_t) float16_val(a))<<54;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the half-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
+{
+    uint16_t mantissa = a.high>>54;
+
+    if (STATUS(default_nan_mode)) {
+        return float16_default_nan;
+    }
+
+    if (mantissa) {
+        return make_float16(((((uint16_t) a.sign) << 15)
+                             | (0x1F << 10) | mantissa));
+    } else {
+        return float16_default_nan;
+    }
+}
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float32_is_nan( float32 a_ )
+int float32_is_quiet_nan( float32 a_ )
 {
     uint32_t a = float32_val(a_);
 #if SNAN_BIT_IS_ONE
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 #else
-    return ( 0xFF800000 <= (bits32) ( a<<1 ) );
+    return ( 0xFF800000 <= (uint32_t) ( a<<1 ) );
 #endif
 }
 
@@ -95,13 +169,36 @@
 {
     uint32_t a = float32_val(a_);
 #if SNAN_BIT_IS_ONE
-    return ( 0xFF800000 <= (bits32) ( a<<1 ) );
+    return ( 0xFF800000 <= (uint32_t) ( a<<1 ) );
 #else
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 #endif
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the single-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float32 float32_maybe_silence_nan( float32 a_ )
+{
+    if (float32_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        return float32_default_nan;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        uint32_t a = float32_val(a_);
+        a |= (1 << 22);
+        return make_float32(a);
+#endif
+    }
+    return a_;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
@@ -114,7 +211,7 @@
     if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
     z.sign = float32_val(a)>>31;
     z.low = 0;
-    z.high = ( (bits64) float32_val(a) )<<41;
+    z.high = ( (uint64_t) float32_val(a) )<<41;
     return z;
 }
 
@@ -123,17 +220,134 @@
 | precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static float32 commonNaNToFloat32( commonNaNT a )
+static float32 commonNaNToFloat32( commonNaNT a STATUS_PARAM)
 {
-    bits32 mantissa = a.high>>41;
+    uint32_t mantissa = a.high>>41;
+
+    if ( STATUS(default_nan_mode) ) {
+        return float32_default_nan;
+    }
+
     if ( mantissa )
         return make_float32(
-            ( ( (bits32) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) );
+            ( ( (uint32_t) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) );
     else
         return float32_default_nan;
 }
 
 /*----------------------------------------------------------------------------
+| Select which NaN to propagate for a two-input operation.
+| IEEE754 doesn't specify all the details of this, so the
+| algorithm is target-specific.
+| The routine is passed various bits of information about the
+| two NaNs and should return 0 to select NaN a and 1 for NaN b.
+| Note that signalling NaNs are always squashed to quiet NaNs
+| by the caller, by calling floatXX_maybe_silence_nan() before
+| returning them.
+|
+| aIsLargerSignificand is only valid if both a and b are NaNs
+| of some kind, and is true if a has the larger significand,
+| or if both a and b have the same significand but a is
+| positive but b is negative. It is only needed for the x87
+| tie-break rule.
+*----------------------------------------------------------------------------*/
+
+#if defined(TARGET_ARM)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                    flag aIsLargerSignificand)
+{
+    /* ARM mandated NaN propagation rules: take the first of:
+     *  1. A if it is signaling
+     *  2. B if it is signaling
+     *  3. A (quiet)
+     *  4. B (quiet)
+     * A signaling NaN is always quietened before returning it.
+     */
+    if (aIsSNaN) {
+        return 0;
+    } else if (bIsSNaN) {
+        return 1;
+    } else if (aIsQNaN) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+#elif defined(TARGET_MIPS)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                    flag aIsLargerSignificand)
+{
+    /* According to MIPS specifications, if one of the two operands is
+     * a sNaN, a new qNaN has to be generated. This is done in
+     * floatXX_maybe_silence_nan(). For qNaN inputs the specifications
+     * says: "When possible, this QNaN result is one of the operand QNaN
+     * values." In practice it seems that most implementations choose
+     * the first operand if both operands are qNaN. In short this gives
+     * the following rules:
+     *  1. A if it is signaling
+     *  2. B if it is signaling
+     *  3. A (quiet)
+     *  4. B (quiet)
+     * A signaling NaN is always silenced before returning it.
+     */
+    if (aIsSNaN) {
+        return 0;
+    } else if (bIsSNaN) {
+        return 1;
+    } else if (aIsQNaN) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+#elif defined(TARGET_PPC)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                   flag aIsLargerSignificand)
+{
+    /* PowerPC propagation rules:
+     *  1. A if it sNaN or qNaN
+     *  2. B if it sNaN or qNaN
+     * A signaling NaN is always silenced before returning it.
+     */
+    if (aIsSNaN || aIsQNaN) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+#else
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+                    flag aIsLargerSignificand)
+{
+    /* This implements x87 NaN propagation rules:
+     * SNaN + QNaN => return the QNaN
+     * two SNaNs => return the one with the larger significand, silenced
+     * two QNaNs => return the one with the larger significand
+     * SNaN and a non-NaN => return the SNaN, silenced
+     * QNaN and a non-NaN => return the QNaN
+     *
+     * If we get down to comparing significands and they are the same,
+     * return the NaN with the positive sign bit (if any).
+     */
+    if (aIsSNaN) {
+        if (bIsSNaN) {
+            return aIsLargerSignificand ? 0 : 1;
+        }
+        return bIsQNaN ? 1 : 0;
+    }
+    else if (aIsQNaN) {
+        if (bIsSNaN || !bIsQNaN)
+            return 0;
+        else {
+            return aIsLargerSignificand ? 0 : 1;
+        }
+    } else {
+        return 1;
+    }
+}
+#endif
+
+/*----------------------------------------------------------------------------
 | Takes two single-precision floating-point values `a' and `b', one of which
 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 | signaling NaN, the invalid exception is raised.
@@ -141,78 +355,52 @@
 
 static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-    bits32 av, bv, res;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+    uint32_t av, bv;
+
+    aIsQuietNaN = float32_is_quiet_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsQuietNaN = float32_is_quiet_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    av = float32_val(a);
+    bv = float32_val(b);
+
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
 
     if ( STATUS(default_nan_mode) )
         return float32_default_nan;
 
-    aIsNaN = float32_is_nan( a );
-    aIsSignalingNaN = float32_is_signaling_nan( a );
-    bIsNaN = float32_is_nan( b );
-    bIsSignalingNaN = float32_is_signaling_nan( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-#if SNAN_BIT_IS_ONE
-    av &= ~0x00400000;
-    bv &= ~0x00400000;
-#else
-    av |= 0x00400000;
-    bv |= 0x00400000;
-#endif
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        res = bIsNaN ? bv : av;
+    if ((uint32_t)(av<<1) < (uint32_t)(bv<<1)) {
+        aIsLargerSignificand = 0;
+    } else if ((uint32_t)(bv<<1) < (uint32_t)(av<<1)) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (av < bv) ? 1 : 0;
     }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN )
-            res = av;
-        else {
- returnLargerSignificand:
-            if ( (bits32) ( av<<1 ) < (bits32) ( bv<<1 ) )
-                res = bv;
-            else if ( (bits32) ( bv<<1 ) < (bits32) ( av<<1 ) )
-                res = av;
-            else
-                res = ( av < bv ) ? av : bv;
-        }
-    }
-    else {
-        res = bv;
-    }
-    return make_float32(res);
-}
 
-/*----------------------------------------------------------------------------
-| The pattern for a default generated double-precision NaN.
-*----------------------------------------------------------------------------*/
-#if defined(TARGET_SPARC)
-#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
-#elif defined(TARGET_POWERPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
-#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
-#elif defined(TARGET_HPPA)
-#define float64_default_nan make_float64(LIT64( 0x7FF4000000000000 ))
-#elif SNAN_BIT_IS_ONE
-#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
-#else
-#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
-#endif
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return float32_maybe_silence_nan(b);
+    } else {
+        return float32_maybe_silence_nan(a);
+    }
+}
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float64_is_nan( float64 a_ )
+int float64_is_quiet_nan( float64 a_ )
 {
-    bits64 a = float64_val(a_);
+    uint64_t a = float64_val(a_);
 #if SNAN_BIT_IS_ONE
     return
            ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
         && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
 #else
-    return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) );
+    return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) );
 #endif
 }
 
@@ -223,9 +411,9 @@
 
 int float64_is_signaling_nan( float64 a_ )
 {
-    bits64 a = float64_val(a_);
+    uint64_t a = float64_val(a_);
 #if SNAN_BIT_IS_ONE
-    return ( LIT64( 0xFFF0000000000000 ) <= (bits64) ( a<<1 ) );
+    return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) );
 #else
     return
            ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
@@ -234,6 +422,29 @@
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the double-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float64 float64_maybe_silence_nan( float64 a_ )
+{
+    if (float64_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        return float64_default_nan;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        uint64_t a = float64_val(a_);
+        a |= LIT64( 0x0008000000000000 );
+        return make_float64(a);
+#endif
+    }
+    return a_;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
@@ -255,13 +466,17 @@
 | precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static float64 commonNaNToFloat64( commonNaNT a )
+static float64 commonNaNToFloat64( commonNaNT a STATUS_PARAM)
 {
-    bits64 mantissa = a.high>>12;
+    uint64_t mantissa = a.high>>12;
+
+    if ( STATUS(default_nan_mode) ) {
+        return float64_default_nan;
+    }
 
     if ( mantissa )
         return make_float64(
-              ( ( (bits64) a.sign )<<63 )
+              ( ( (uint64_t) a.sign )<<63 )
             | LIT64( 0x7FF0000000000000 )
             | ( a.high>>12 ));
     else
@@ -276,105 +491,108 @@
 
 static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-    bits64 av, bv, res;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+    uint64_t av, bv;
+
+    aIsQuietNaN = float64_is_quiet_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsQuietNaN = float64_is_quiet_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    av = float64_val(a);
+    bv = float64_val(b);
+
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
 
     if ( STATUS(default_nan_mode) )
         return float64_default_nan;
 
-    aIsNaN = float64_is_nan( a );
-    aIsSignalingNaN = float64_is_signaling_nan( a );
-    bIsNaN = float64_is_nan( b );
-    bIsSignalingNaN = float64_is_signaling_nan( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-#if SNAN_BIT_IS_ONE
-    av &= ~LIT64( 0x0008000000000000 );
-    bv &= ~LIT64( 0x0008000000000000 );
-#else
-    av |= LIT64( 0x0008000000000000 );
-    bv |= LIT64( 0x0008000000000000 );
-#endif
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        res = bIsNaN ? bv : av;
+    if ((uint64_t)(av<<1) < (uint64_t)(bv<<1)) {
+        aIsLargerSignificand = 0;
+    } else if ((uint64_t)(bv<<1) < (uint64_t)(av<<1)) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (av < bv) ? 1 : 0;
     }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN )
-            res = av;
-        else {
- returnLargerSignificand:
-            if ( (bits64) ( av<<1 ) < (bits64) ( bv<<1 ) )
-                res = bv;
-            else if ( (bits64) ( bv<<1 ) < (bits64) ( av<<1 ) )
-                res = av;
-            else
-                res = ( av < bv ) ? av : bv;
-        }
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return float64_maybe_silence_nan(b);
+    } else {
+        return float64_maybe_silence_nan(a);
     }
-    else {
-        res = bv;
-    }
-    return make_float64(res);
 }
 
 #ifdef FLOATX80
 
 /*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN.  The
-| `high' and `low' values hold the most- and least-significant bits,
-| respectively.
-*----------------------------------------------------------------------------*/
-#if SNAN_BIT_IS_ONE
-#define floatx80_default_nan_high 0x7FFF
-#define floatx80_default_nan_low  LIT64( 0xBFFFFFFFFFFFFFFF )
-#else
-#define floatx80_default_nan_high 0xFFFF
-#define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
-#endif
-
-/*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
-| quiet NaN; otherwise returns 0.
+| quiet NaN; otherwise returns 0. This slightly differs from the same
+| function for other types as floatx80 has an explicit bit.
 *----------------------------------------------------------------------------*/
 
-int floatx80_is_nan( floatx80 a )
+int floatx80_is_quiet_nan( floatx80 a )
 {
 #if SNAN_BIT_IS_ONE
-    bits64 aLow;
+    uint64_t aLow;
 
     aLow = a.low & ~ LIT64( 0x4000000000000000 );
     return
            ( ( a.high & 0x7FFF ) == 0x7FFF )
-        && (bits64) ( aLow<<1 )
+        && (uint64_t) ( aLow<<1 )
         && ( a.low == aLow );
 #else
-    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+    return ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 )));
 #endif
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
-| signaling NaN; otherwise returns 0.
+| signaling NaN; otherwise returns 0. This slightly differs from the same
+| function for other types as floatx80 has an explicit bit.
 *----------------------------------------------------------------------------*/
 
 int floatx80_is_signaling_nan( floatx80 a )
 {
 #if SNAN_BIT_IS_ONE
-    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+    return ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 )));
 #else
-    bits64 aLow;
+    uint64_t aLow;
 
     aLow = a.low & ~ LIT64( 0x4000000000000000 );
     return
            ( ( a.high & 0x7FFF ) == 0x7FFF )
-        && (bits64) ( aLow<<1 )
+        && (uint64_t) ( aLow<<1 )
         && ( a.low == aLow );
 #endif
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the extended double-precision floating point value
+| `a' is a signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_maybe_silence_nan( floatx80 a )
+{
+    if (floatx80_is_signaling_nan(a)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        a.low = floatx80_default_nan_low;
+        a.high = floatx80_default_nan_high;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        a.low |= LIT64( 0xC000000000000000 );
+        return a;
+#endif
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
 | point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
 | invalid exception is raised.
@@ -385,9 +603,15 @@
     commonNaNT z;
 
     if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
-    z.sign = a.high>>15;
-    z.low = 0;
-    z.high = a.low;
+    if ( a.low >> 63 ) {
+        z.sign = a.high >> 15;
+        z.low = 0;
+        z.high = a.low << 1;
+    } else {
+        z.sign = floatx80_default_nan_high >> 15;
+        z.low = 0;
+        z.high = floatx80_default_nan_low << 1;
+    }
     return z;
 }
 
@@ -396,15 +620,24 @@
 | double-precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static floatx80 commonNaNToFloatx80( commonNaNT a )
+static floatx80 commonNaNToFloatx80( commonNaNT a STATUS_PARAM)
 {
     floatx80 z;
 
-    if (a.high)
-        z.low = a.high;
-    else
+    if ( STATUS(default_nan_mode) ) {
         z.low = floatx80_default_nan_low;
-    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+
+    if (a.high >> 1) {
+        z.low = LIT64( 0x8000000000000000 ) | a.high >> 1;
+        z.high = ( ( (uint16_t) a.sign )<<15 ) | 0x7FFF;
+    } else {
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+    }
+
     return z;
 }
 
@@ -416,7 +649,15 @@
 
 static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+
+    aIsQuietNaN = floatx80_is_quiet_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsQuietNaN = floatx80_is_quiet_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
 
     if ( STATUS(default_nan_mode) ) {
         a.low = floatx80_default_nan_low;
@@ -424,31 +665,19 @@
         return a;
     }
 
-    aIsNaN = floatx80_is_nan( a );
-    aIsSignalingNaN = floatx80_is_signaling_nan( a );
-    bIsNaN = floatx80_is_nan( b );
-    bIsSignalingNaN = floatx80_is_signaling_nan( b );
-#if SNAN_BIT_IS_ONE
-    a.low &= ~LIT64( 0xC000000000000000 );
-    b.low &= ~LIT64( 0xC000000000000000 );
-#else
-    a.low |= LIT64( 0xC000000000000000 );
-    b.low |= LIT64( 0xC000000000000000 );
-#endif
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        return bIsNaN ? b : a;
+    if (a.low < b.low) {
+        aIsLargerSignificand = 0;
+    } else if (b.low < a.low) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
     }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN ) return a;
- returnLargerSignificand:
-        if ( a.low < b.low ) return b;
-        if ( b.low < a.low ) return a;
-        return ( a.high < b.high ) ? a : b;
-    }
-    else {
-        return b;
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return floatx80_maybe_silence_nan(b);
+    } else {
+        return floatx80_maybe_silence_nan(a);
     }
 }
 
@@ -457,23 +686,11 @@
 #ifdef FLOAT128
 
 /*----------------------------------------------------------------------------
-| The pattern for a default generated quadruple-precision NaN.  The `high' and
-| `low' values hold the most- and least-significant bits, respectively.
-*----------------------------------------------------------------------------*/
-#if SNAN_BIT_IS_ONE
-#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
-#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
-#else
-#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
-#define float128_default_nan_low  LIT64( 0x0000000000000000 )
-#endif
-
-/*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
 *----------------------------------------------------------------------------*/
 
-int float128_is_nan( float128 a )
+int float128_is_quiet_nan( float128 a )
 {
 #if SNAN_BIT_IS_ONE
     return
@@ -481,7 +698,7 @@
         && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
 #else
     return
-           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+           ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) )
         && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
 #endif
 }
@@ -495,7 +712,7 @@
 {
 #if SNAN_BIT_IS_ONE
     return
-           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+           ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) )
         && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
 #else
     return
@@ -505,6 +722,29 @@
 }
 
 /*----------------------------------------------------------------------------
+| Returns a quiet NaN if the quadruple-precision floating point value `a' is
+| a signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float128 float128_maybe_silence_nan( float128 a )
+{
+    if (float128_is_signaling_nan(a)) {
+#if SNAN_BIT_IS_ONE
+#  if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+        a.low = float128_default_nan_low;
+        a.high = float128_default_nan_high;
+#  else
+#    error Rules for silencing a signaling NaN are target-specific
+#  endif
+#else
+        a.high |= LIT64( 0x0000800000000000 );
+        return a;
+#endif
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point NaN
 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 | exception is raised.
@@ -525,12 +765,18 @@
 | precision floating-point format.
 *----------------------------------------------------------------------------*/
 
-static float128 commonNaNToFloat128( commonNaNT a )
+static float128 commonNaNToFloat128( commonNaNT a STATUS_PARAM)
 {
     float128 z;
 
+    if ( STATUS(default_nan_mode) ) {
+        z.low = float128_default_nan_low;
+        z.high = float128_default_nan_high;
+        return z;
+    }
+
     shift128Right( a.high, a.low, 16, &z.high, &z.low );
-    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 );
+    z.high |= ( ( (uint64_t) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 );
     return z;
 }
 
@@ -542,7 +788,15 @@
 
 static float128 propagateFloat128NaN( float128 a, float128 b STATUS_PARAM)
 {
-    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+    flag aIsLargerSignificand;
+
+    aIsQuietNaN = float128_is_quiet_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsQuietNaN = float128_is_quiet_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
 
     if ( STATUS(default_nan_mode) ) {
         a.low = float128_default_nan_low;
@@ -550,31 +804,19 @@
         return a;
     }
 
-    aIsNaN = float128_is_nan( a );
-    aIsSignalingNaN = float128_is_signaling_nan( a );
-    bIsNaN = float128_is_nan( b );
-    bIsSignalingNaN = float128_is_signaling_nan( b );
-#if SNAN_BIT_IS_ONE
-    a.high &= ~LIT64( 0x0000800000000000 );
-    b.high &= ~LIT64( 0x0000800000000000 );
-#else
-    a.high |= LIT64( 0x0000800000000000 );
-    b.high |= LIT64( 0x0000800000000000 );
-#endif
-    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
-    if ( aIsSignalingNaN ) {
-        if ( bIsSignalingNaN ) goto returnLargerSignificand;
-        return bIsNaN ? b : a;
+    if (lt128(a.high<<1, a.low, b.high<<1, b.low)) {
+        aIsLargerSignificand = 0;
+    } else if (lt128(b.high<<1, b.low, a.high<<1, a.low)) {
+        aIsLargerSignificand = 1;
+    } else {
+        aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
     }
-    else if ( aIsNaN ) {
-        if ( bIsSignalingNaN || ! bIsNaN ) return a;
- returnLargerSignificand:
-        if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
-        if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
-        return ( a.high < b.high ) ? a : b;
-    }
-    else {
-        return b;
+
+    if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+                aIsLargerSignificand)) {
+        return float128_maybe_silence_nan(b);
+    } else {
+        return float128_maybe_silence_nan(a);
     }
 }
 
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 0b82797..baba1dc 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1,3 +1,8 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
 
 /*============================================================================
 
@@ -30,8 +35,6 @@
 
 =============================================================================*/
 
-/* FIXME: Flush-To-Zero only effects results.  Denormal inputs should also
-   be flushed to zero.  */
 #include "softfloat.h"
 
 /*----------------------------------------------------------------------------
@@ -69,6 +72,33 @@
 #endif
 
 /*----------------------------------------------------------------------------
+| Returns the fraction bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint32_t extractFloat16Frac(float16 a)
+{
+    return float16_val(a) & 0x3ff;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat16Exp(float16 a)
+{
+    return (float16_val(a) >> 10) & 0x1f;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat16Sign(float16 a)
+{
+    return float16_val(a)>>15;
+}
+
+/*----------------------------------------------------------------------------
 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
 | and 7, and returns the properly rounded 32-bit integer corresponding to the
 | input.  If `zSign' is 1, the input is negated before being converted to an
@@ -79,7 +109,7 @@
 | positive or negative integer is returned.
 *----------------------------------------------------------------------------*/
 
-static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
+static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -110,7 +140,7 @@
     if ( zSign ) z = - z;
     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
         float_raise( float_flag_invalid STATUS_VAR);
-        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
     return z;
@@ -129,7 +159,7 @@
 | returned.
 *----------------------------------------------------------------------------*/
 
-static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
+static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven, increment;
@@ -137,7 +167,7 @@
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    increment = ( (sbits64) absZ1 < 0 );
+    increment = ( (int64_t) absZ1 < 0 );
     if ( ! roundNearestEven ) {
         if ( roundingMode == float_round_to_zero ) {
             increment = 0;
@@ -154,7 +184,7 @@
     if ( increment ) {
         ++absZ0;
         if ( absZ0 == 0 ) goto overflow;
-        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
     }
     z = absZ0;
     if ( zSign ) z = - z;
@@ -162,7 +192,7 @@
  overflow:
         float_raise( float_flag_invalid STATUS_VAR);
         return
-              zSign ? (sbits64) LIT64( 0x8000000000000000 )
+              zSign ? (int64_t) LIT64( 0x8000000000000000 )
             : LIT64( 0x7FFFFFFFFFFFFFFF );
     }
     if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -174,7 +204,7 @@
 | Returns the fraction bits of the single-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits32 extractFloat32Frac( float32 a )
+INLINE uint32_t extractFloat32Frac( float32 a )
 {
 
     return float32_val(a) & 0x007FFFFF;
@@ -204,6 +234,21 @@
 }
 
 /*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
+{
+    if (STATUS(flush_inputs_to_zero)) {
+        if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
+            float_raise(float_flag_input_denormal STATUS_VAR);
+            return make_float32(float32_val(a) & 0x80000000);
+        }
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Normalizes the subnormal single-precision floating-point value represented
 | by the denormalized significand `aSig'.  The normalized exponent and
 | significand are stored at the locations pointed to by `zExpPtr' and
@@ -211,7 +256,7 @@
 *----------------------------------------------------------------------------*/
 
 static void
- normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+ normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
 {
     int8 shiftCount;
 
@@ -232,11 +277,11 @@
 | significand.
 *----------------------------------------------------------------------------*/
 
-INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
 {
 
     return make_float32(
-          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
+          ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
 
 }
 
@@ -262,7 +307,7 @@
 | Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -287,10 +332,10 @@
         }
     }
     roundBits = zSig & 0x7F;
-    if ( 0xFD <= (bits16) zExp ) {
+    if ( 0xFD <= (uint16_t) zExp ) {
         if (    ( 0xFD < zExp )
              || (    ( zExp == 0xFD )
-                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
            ) {
             float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
             return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
@@ -325,7 +370,7 @@
 *----------------------------------------------------------------------------*/
 
 static float32
- normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
 {
     int8 shiftCount;
 
@@ -338,7 +383,7 @@
 | Returns the fraction bits of the double-precision floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloat64Frac( float64 a )
+INLINE uint64_t extractFloat64Frac( float64 a )
 {
 
     return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
@@ -368,6 +413,21 @@
 }
 
 /*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
+{
+    if (STATUS(flush_inputs_to_zero)) {
+        if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
+            float_raise(float_flag_input_denormal STATUS_VAR);
+            return make_float64(float64_val(a) & (1ULL << 63));
+        }
+    }
+    return a;
+}
+
+/*----------------------------------------------------------------------------
 | Normalizes the subnormal double-precision floating-point value represented
 | by the denormalized significand `aSig'.  The normalized exponent and
 | significand are stored at the locations pointed to by `zExpPtr' and
@@ -375,7 +435,7 @@
 *----------------------------------------------------------------------------*/
 
 static void
- normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+ normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
 {
     int8 shiftCount;
 
@@ -396,11 +456,11 @@
 | significand.
 *----------------------------------------------------------------------------*/
 
-INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
 {
 
     return make_float64(
-        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
+        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
 
 }
 
@@ -426,7 +486,7 @@
 | Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven;
@@ -451,10 +511,10 @@
         }
     }
     roundBits = zSig & 0x3FF;
-    if ( 0x7FD <= (bits16) zExp ) {
+    if ( 0x7FD <= (uint16_t) zExp ) {
         if (    ( 0x7FD < zExp )
              || (    ( zExp == 0x7FD )
-                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
            ) {
             float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
             return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
@@ -489,7 +549,7 @@
 *----------------------------------------------------------------------------*/
 
 static float64
- normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
 {
     int8 shiftCount;
 
@@ -505,7 +565,7 @@
 | value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloatx80Frac( floatx80 a )
+INLINE uint64_t extractFloatx80Frac( floatx80 a )
 {
 
     return a.low;
@@ -544,7 +604,7 @@
 *----------------------------------------------------------------------------*/
 
 static void
- normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+ normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
 {
     int8 shiftCount;
 
@@ -559,12 +619,12 @@
 | extended double-precision floating-point value, returning the result.
 *----------------------------------------------------------------------------*/
 
-INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
 {
     floatx80 z;
 
     z.low = zSig;
-    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+    z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
     return z;
 
 }
@@ -595,7 +655,7 @@
 
 static floatx80
  roundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
  STATUS_PARAM)
 {
     int8 roundingMode;
@@ -632,7 +692,7 @@
         }
     }
     roundBits = zSig0 & roundMask;
-    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
         if (    ( 0x7FFE < zExp )
              || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
            ) {
@@ -650,7 +710,7 @@
             if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
             if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
             zSig0 += roundIncrement;
-            if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            if ( (int64_t) zSig0 < 0 ) zExp = 1;
             roundIncrement = roundMask + 1;
             if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
                 roundMask |= roundIncrement;
@@ -673,7 +733,7 @@
     if ( zSig0 == 0 ) zExp = 0;
     return packFloatx80( zSign, zExp, zSig0 );
  precision80:
-    increment = ( (sbits64) zSig1 < 0 );
+    increment = ( (int64_t) zSig1 < 0 );
     if ( ! roundNearestEven ) {
         if ( roundingMode == float_round_to_zero ) {
             increment = 0;
@@ -687,7 +747,7 @@
             }
         }
     }
-    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
         if (    ( 0x7FFE < zExp )
              || (    ( zExp == 0x7FFE )
                   && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
@@ -716,7 +776,7 @@
             if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
             if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
             if ( roundNearestEven ) {
-                increment = ( (sbits64) zSig1 < 0 );
+                increment = ( (int64_t) zSig1 < 0 );
             }
             else {
                 if ( zSign ) {
@@ -729,8 +789,8 @@
             if ( increment ) {
                 ++zSig0;
                 zSig0 &=
-                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
-                if ( (sbits64) zSig0 < 0 ) zExp = 1;
+                    ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+                if ( (int64_t) zSig0 < 0 ) zExp = 1;
             }
             return packFloatx80( zSign, zExp, zSig0 );
         }
@@ -743,7 +803,7 @@
             zSig0 = LIT64( 0x8000000000000000 );
         }
         else {
-            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+            zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
         }
     }
     else {
@@ -764,7 +824,7 @@
 
 static floatx80
  normalizeRoundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
  STATUS_PARAM)
 {
     int8 shiftCount;
@@ -791,7 +851,7 @@
 | floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloat128Frac1( float128 a )
+INLINE uint64_t extractFloat128Frac1( float128 a )
 {
 
     return a.low;
@@ -803,7 +863,7 @@
 | floating-point value `a'.
 *----------------------------------------------------------------------------*/
 
-INLINE bits64 extractFloat128Frac0( float128 a )
+INLINE uint64_t extractFloat128Frac0( float128 a )
 {
 
     return a.high & LIT64( 0x0000FFFFFFFFFFFF );
@@ -845,11 +905,11 @@
 
 static void
  normalizeFloat128Subnormal(
-     bits64 aSig0,
-     bits64 aSig1,
+     uint64_t aSig0,
+     uint64_t aSig1,
      int32 *zExpPtr,
-     bits64 *zSig0Ptr,
-     bits64 *zSig1Ptr
+     uint64_t *zSig0Ptr,
+     uint64_t *zSig1Ptr
  )
 {
     int8 shiftCount;
@@ -888,12 +948,12 @@
 *----------------------------------------------------------------------------*/
 
 INLINE float128
- packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+ packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
 {
     float128 z;
 
     z.low = zSig1;
-    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+    z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
     return z;
 
 }
@@ -921,14 +981,14 @@
 
 static float128
  roundAndPackFloat128(
-     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
+     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
 {
     int8 roundingMode;
     flag roundNearestEven, increment, isTiny;
 
     roundingMode = STATUS(float_rounding_mode);
     roundNearestEven = ( roundingMode == float_round_nearest_even );
-    increment = ( (sbits64) zSig2 < 0 );
+    increment = ( (int64_t) zSig2 < 0 );
     if ( ! roundNearestEven ) {
         if ( roundingMode == float_round_to_zero ) {
             increment = 0;
@@ -942,7 +1002,7 @@
             }
         }
     }
-    if ( 0x7FFD <= (bits32) zExp ) {
+    if ( 0x7FFD <= (uint32_t) zExp ) {
         if (    ( 0x7FFD < zExp )
              || (    ( zExp == 0x7FFD )
                   && eq128(
@@ -986,7 +1046,7 @@
             zExp = 0;
             if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
             if ( roundNearestEven ) {
-                increment = ( (sbits64) zSig2 < 0 );
+                increment = ( (int64_t) zSig2 < 0 );
             }
             else {
                 if ( zSign ) {
@@ -1022,10 +1082,10 @@
 
 static float128
  normalizeRoundAndPackFloat128(
-     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
+     flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
 {
     int8 shiftCount;
-    bits64 zSig2;
+    uint64_t zSig2;
 
     if ( zSig0 == 0 ) {
         zSig0 = zSig1;
@@ -1059,7 +1119,7 @@
     flag zSign;
 
     if ( a == 0 ) return float32_zero;
-    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
     zSign = ( a < 0 );
     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
 
@@ -1076,7 +1136,7 @@
     flag zSign;
     uint32 absA;
     int8 shiftCount;
-    bits64 zSig;
+    uint64_t zSig;
 
     if ( a == 0 ) return float64_zero;
     zSign = ( a < 0 );
@@ -1101,7 +1161,7 @@
     flag zSign;
     uint32 absA;
     int8 shiftCount;
-    bits64 zSig;
+    uint64_t zSig;
 
     if ( a == 0 ) return packFloatx80( 0, 0, 0 );
     zSign = ( a < 0 );
@@ -1127,7 +1187,7 @@
     flag zSign;
     uint32 absA;
     int8 shiftCount;
-    bits64 zSig0;
+    uint64_t zSig0;
 
     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
     zSign = ( a < 0 );
@@ -1204,7 +1264,7 @@
     flag zSign;
 
     if ( a == 0 ) return float64_zero;
-    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
         return packFloat64( 1, 0x43E, 0 );
     }
     zSign = ( a < 0 );
@@ -1258,7 +1318,7 @@
     uint64 absA;
     int8 shiftCount;
     int32 zExp;
-    bits64 zSig0, zSig1;
+    uint64_t zSig0, zSig1;
 
     if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
     zSign = ( a < 0 );
@@ -1295,9 +1355,10 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
-    bits64 aSig64;
+    uint32_t aSig;
+    uint64_t aSig64;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
@@ -1325,8 +1386,9 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
+    uint32_t aSig;
     int32 z;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1337,7 +1399,7 @@
             float_raise( float_flag_invalid STATUS_VAR);
             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
         }
-        return (sbits32) 0x80000000;
+        return (int32_t) 0x80000000;
     }
     else if ( aExp <= 0x7E ) {
         if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -1345,7 +1407,7 @@
     }
     aSig = ( aSig | 0x00800000 )<<8;
     z = aSig>>( - shiftCount );
-    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
     }
     if ( aSign ) z = - z;
@@ -1355,6 +1417,55 @@
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the single-precision floating-point value
+| `a' to the 16-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    uint32_t aSig;
+    int32 z;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0x8E;
+    if ( 0 <= shiftCount ) {
+        if ( float32_val(a) != 0xC7000000 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+                return 0x7FFF;
+            }
+        }
+        return (int32_t) 0xffff8000;
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+        return 0;
+    }
+    shiftCount -= 0x10;
+    aSig = ( aSig | 0x00800000 )<<8;
+    z = aSig>>( - shiftCount );
+    if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    if ( aSign ) {
+        z = - z;
+    }
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
 | `a' to the 64-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
 | Arithmetic---which means in particular that the conversion is rounded
@@ -1367,8 +1478,9 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
-    bits64 aSig64, aSigExtra;
+    uint32_t aSig;
+    uint64_t aSig64, aSigExtra;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1379,7 +1491,7 @@
         if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
             return LIT64( 0x7FFFFFFFFFFFFFFF );
         }
-        return (sbits64) LIT64( 0x8000000000000000 );
+        return (int64_t) LIT64( 0x8000000000000000 );
     }
     if ( aExp ) aSig |= 0x00800000;
     aSig64 = aSig;
@@ -1403,9 +1515,10 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits32 aSig;
-    bits64 aSig64;
+    uint32_t aSig;
+    uint64_t aSig64;
     int64 z;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1418,7 +1531,7 @@
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
         }
-        return (sbits64) LIT64( 0x8000000000000000 );
+        return (int64_t) LIT64( 0x8000000000000000 );
     }
     else if ( aExp <= 0x7E ) {
         if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
@@ -1427,7 +1540,7 @@
     aSig64 = aSig | 0x00800000;
     aSig64 <<= 40;
     z = aSig64>>( - shiftCount );
-    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+    if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
     }
     if ( aSign ) z = - z;
@@ -1446,13 +1559,14 @@
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
+        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat64( aSign, 0x7FF, 0 );
     }
     if ( aExp == 0 ) {
@@ -1460,7 +1574,7 @@
         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
         --aExp;
     }
-    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+    return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
 
 }
 
@@ -1477,13 +1591,14 @@
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( aExp == 0 ) {
@@ -1491,7 +1606,7 @@
         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
     }
     aSig |= 0x00800000;
-    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
 
 }
 
@@ -1510,13 +1625,14 @@
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat128( aSign, 0x7FFF, 0, 0 );
     }
     if ( aExp == 0 ) {
@@ -1524,7 +1640,7 @@
         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
         --aExp;
     }
-    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
 
 }
 
@@ -1541,9 +1657,10 @@
 {
     flag aSign;
     int16 aExp;
-    bits32 lastBitMask, roundBitsMask;
+    uint32_t lastBitMask, roundBitsMask;
     int8 roundingMode;
-    bits32 z;
+    uint32_t z;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aExp = extractFloat32Exp( a );
     if ( 0x96 <= aExp ) {
@@ -1553,7 +1670,7 @@
         return a;
     }
     if ( aExp <= 0x7E ) {
-        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
+        if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
         STATUS(float_exception_flags) |= float_flag_inexact;
         aSign = extractFloat32Sign( a );
         switch ( STATUS(float_rounding_mode) ) {
@@ -1600,7 +1717,7 @@
 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
 {
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig, zSig;
+    uint32_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat32Frac( a );
@@ -1654,7 +1771,7 @@
     aSig |= 0x20000000;
     zSig = ( aSig + bSig )<<1;
     --zExp;
-    if ( (sbits32) zSig < 0 ) {
+    if ( (int32_t) zSig < 0 ) {
         zSig = aSig + bSig;
         ++zExp;
     }
@@ -1674,7 +1791,7 @@
 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
 {
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig, zSig;
+    uint32_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat32Frac( a );
@@ -1747,6 +1864,8 @@
 float32 float32_add( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat32Sign( a );
     bSign = extractFloat32Sign( b );
@@ -1768,6 +1887,8 @@
 float32 float32_sub( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat32Sign( a );
     bSign = extractFloat32Sign( b );
@@ -1790,9 +1911,12 @@
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig;
-    bits64 zSig64;
-    bits32 zSig;
+    uint32_t aSig, bSig;
+    uint64_t zSig64;
+    uint32_t zSig;
+
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1830,9 +1954,9 @@
     zExp = aExp + bExp - 0x7F;
     aSig = ( aSig | 0x00800000 )<<7;
     bSig = ( bSig | 0x00800000 )<<8;
-    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
     zSig = zSig64;
-    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+    if ( 0 <= (int32_t) ( zSig<<1 ) ) {
         zSig <<= 1;
         --zExp;
     }
@@ -1850,7 +1974,9 @@
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits32 aSig, bSig, zSig;
+    uint32_t aSig, bSig, zSig;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1894,9 +2020,9 @@
         aSig >>= 1;
         ++zExp;
     }
-    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+    zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
     if ( ( zSig & 0x3F ) == 0 ) {
-        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+        zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
     }
     return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
 
@@ -1912,11 +2038,13 @@
 {
     flag aSign, zSign;
     int16 aExp, bExp, expDiff;
-    bits32 aSig, bSig;
-    bits32 q;
-    bits64 aSig64, bSig64, q64;
-    bits32 alternateASig;
-    sbits32 sigMean;
+    uint32_t aSig, bSig;
+    uint32_t q;
+    uint64_t aSig64, bSig64, q64;
+    uint32_t alternateASig;
+    int32_t sigMean;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -1958,7 +2086,7 @@
         q = ( bSig <= aSig );
         if ( q ) aSig -= bSig;
         if ( 0 < expDiff ) {
-            q = ( ( (bits64) aSig )<<32 ) / bSig;
+            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
             q >>= 32 - expDiff;
             bSig >>= 2;
             aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
@@ -1970,8 +2098,8 @@
     }
     else {
         if ( bSig <= aSig ) aSig -= bSig;
-        aSig64 = ( (bits64) aSig )<<40;
-        bSig64 = ( (bits64) bSig )<<40;
+        aSig64 = ( (uint64_t) aSig )<<40;
+        bSig64 = ( (uint64_t) bSig )<<40;
         expDiff -= 64;
         while ( 0 < expDiff ) {
             q64 = estimateDiv128To64( aSig64, 0, bSig64 );
@@ -1990,12 +2118,12 @@
         alternateASig = aSig;
         ++q;
         aSig -= bSig;
-    } while ( 0 <= (sbits32) aSig );
+    } while ( 0 <= (int32_t) aSig );
     sigMean = aSig + alternateASig;
     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
         aSig = alternateASig;
     }
-    zSign = ( (sbits32) aSig < 0 );
+    zSign = ( (int32_t) aSig < 0 );
     if ( zSign ) aSig = - aSig;
     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
 
@@ -2011,8 +2139,9 @@
 {
     flag aSign;
     int16 aExp, zExp;
-    bits32 aSig, zSig;
-    bits64 rem, term;
+    uint32_t aSig, zSig;
+    uint64_t rem, term;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -2041,11 +2170,11 @@
             goto roundAndPack;
         }
         aSig >>= aExp & 1;
-        term = ( (bits64) zSig ) * zSig;
-        rem = ( ( (bits64) aSig )<<32 ) - term;
-        while ( (sbits64) rem < 0 ) {
+        term = ( (uint64_t) zSig ) * zSig;
+        rem = ( ( (uint64_t) aSig )<<32 ) - term;
+        while ( (int64_t) rem < 0 ) {
             --zSig;
-            rem += ( ( (bits64) zSig )<<1 ) | 1;
+            rem += ( ( (uint64_t) zSig )<<1 ) | 1;
         }
         zSig |= ( rem != 0 );
     }
@@ -2075,30 +2204,31 @@
 
 static const float64 float32_exp2_coefficients[15] =
 {
-    make_float64( 0x3ff0000000000000ll ), /*  1 */
-    make_float64( 0x3fe0000000000000ll ), /*  2 */
-    make_float64( 0x3fc5555555555555ll ), /*  3 */
-    make_float64( 0x3fa5555555555555ll ), /*  4 */
-    make_float64( 0x3f81111111111111ll ), /*  5 */
-    make_float64( 0x3f56c16c16c16c17ll ), /*  6 */
-    make_float64( 0x3f2a01a01a01a01all ), /*  7 */
-    make_float64( 0x3efa01a01a01a01all ), /*  8 */
-    make_float64( 0x3ec71de3a556c734ll ), /*  9 */
-    make_float64( 0x3e927e4fb7789f5cll ), /* 10 */
-    make_float64( 0x3e5ae64567f544e4ll ), /* 11 */
-    make_float64( 0x3e21eed8eff8d898ll ), /* 12 */
-    make_float64( 0x3de6124613a86d09ll ), /* 13 */
-    make_float64( 0x3da93974a8c07c9dll ), /* 14 */
-    make_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
+    const_float64( 0x3ff0000000000000ll ), /*  1 */
+    const_float64( 0x3fe0000000000000ll ), /*  2 */
+    const_float64( 0x3fc5555555555555ll ), /*  3 */
+    const_float64( 0x3fa5555555555555ll ), /*  4 */
+    const_float64( 0x3f81111111111111ll ), /*  5 */
+    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
+    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
+    const_float64( 0x3efa01a01a01a01all ), /*  8 */
+    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
+    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
+    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
+    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
+    const_float64( 0x3de6124613a86d09ll ), /* 13 */
+    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
+    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
 };
 
 float32 float32_exp2( float32 a STATUS_PARAM )
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
     float64 r, x, xn;
     int i;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
@@ -2143,8 +2273,9 @@
 {
     flag aSign, zSign;
     int16 aExp;
-    bits32 aSig, zSig, i;
+    uint32_t aSig, zSig, i;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
@@ -2168,7 +2299,7 @@
     zSig = aExp << 23;
 
     for (i = 1 << 22; i > 0; i >>= 1) {
-        aSig = ( (bits64)aSig * aSig ) >> 23;
+        aSig = ( (uint64_t)aSig * aSig ) >> 23;
         if ( aSig & 0x01000000 ) {
             aSig >>= 1;
             zSig |= i;
@@ -2183,12 +2314,118 @@
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float32_eq( float32 a, float32 b STATUS_PARAM )
 {
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    av = float32_val(a);
+    bv = float32_val(b);
+    return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    av = float32_val(a);
+    bv = float32_val(b);
+    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt( float32 a, float32 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    av = float32_val(a);
+    bv = float32_val(b);
+    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
+    return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  The invalid exception is raised if either
+| operand is a NaN.  The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_unordered( float32 a, float32 b STATUS_PARAM )
+{
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2199,84 +2436,7 @@
         return 0;
     }
     return ( float32_val(a) == float32_val(b) ) ||
-            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  The comparison
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_le( float32 a, float32 b STATUS_PARAM )
-{
-    flag aSign, bSign;
-    bits32 av, bv;
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
-    return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_lt( float32 a, float32 b STATUS_PARAM )
-{
-    flag aSign, bSign;
-    bits32 av, bv;
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    av = float32_val(a);
-    bv = float32_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
-    return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
-{
-    bits32 av, bv;
-
-    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
-         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    av = float32_val(a);
-    bv = float32_val(b);
-    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
-
+            ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
 }
 
 /*----------------------------------------------------------------------------
@@ -2289,7 +2449,9 @@
 int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits32 av, bv;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2303,7 +2465,7 @@
     bSign = extractFloat32Sign( b );
     av = float32_val(a);
     bv = float32_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+    if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
     return ( av == bv ) || ( aSign ^ ( av < bv ) );
 
 }
@@ -2318,7 +2480,9 @@
 int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits32 av, bv;
+    uint32_t av, bv;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2332,12 +2496,35 @@
     bSign = extractFloat32Sign( b );
     av = float32_val(a);
     bv = float32_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+    if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
     return ( av != bv ) && ( aSign ^ ( av < bv ) );
 
 }
 
 /*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
+{
+    a = float32_squash_input_denormal(a STATUS_VAR);
+    b = float32_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
 | `a' to the 32-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
@@ -2351,7 +2538,8 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2378,8 +2566,9 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig, savedASig;
+    uint64_t aSig, savedASig;
     int32 z;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2401,7 +2590,7 @@
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
         float_raise( float_flag_invalid STATUS_VAR);
-        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
@@ -2412,6 +2601,57 @@
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
+| `a' to the 16-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    uint64_t aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( 0x40E < aExp ) {
+        if ( ( aExp == 0x7FF ) && aSig ) {
+            aSign = 0;
+        }
+        goto invalid;
+    }
+    else if ( aExp < 0x3FF ) {
+        if ( aExp || aSig ) {
+            STATUS(float_exception_flags) |= float_flag_inexact;
+        }
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x433 - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) {
+        z = - z;
+    }
+    if ( ( (int16_t)z < 0 ) ^ aSign ) {
+ invalid:
+        float_raise( float_flag_invalid STATUS_VAR);
+        return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
 | `a' to the 64-bit two's complement integer format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
 | Arithmetic---which means in particular that the conversion is rounded
@@ -2424,7 +2664,8 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig, aSigExtra;
+    uint64_t aSig, aSigExtra;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2440,7 +2681,7 @@
                ) {
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         aSigExtra = 0;
         aSig <<= - shiftCount;
@@ -2466,8 +2707,9 @@
 {
     flag aSign;
     int16 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
     int64 z;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -2485,7 +2727,7 @@
                     return LIT64( 0x7FFFFFFFFFFFFFFF );
                 }
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         z = aSig<<shiftCount;
     }
@@ -2495,7 +2737,7 @@
             return 0;
         }
         z = aSig>>( - shiftCount );
-        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+        if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
             STATUS(float_exception_flags) |= float_flag_inexact;
         }
     }
@@ -2515,14 +2757,15 @@
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig;
-    bits32 zSig;
+    uint64_t aSig;
+    uint32_t zSig;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
-        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat32( aSign, 0xFF, 0 );
     }
     shift64RightJamming( aSig, 22, &aSig );
@@ -2546,29 +2789,28 @@
 | than the desired result exponent whenever `zSig' is a complete, normalized
 | significand.
 *----------------------------------------------------------------------------*/
-static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
+static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
 {
-    return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
+    return make_float16(
+        (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
 }
 
 /* Half precision floats come in two formats: standard IEEE and "ARM" format.
    The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
-  
-float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
+
+float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
+    uint32_t aSig;
 
-    aSign = a >> 15;
-    aExp = (a >> 10) & 0x1f;
-    aSig = a & 0x3ff;
+    aSign = extractFloat16Sign(a);
+    aExp = extractFloat16Exp(a);
+    aSig = extractFloat16Frac(a);
 
     if (aExp == 0x1f && ieee) {
         if (aSig) {
-            /* Make sure correct exceptions are raised.  */
-            float32ToCommonNaN(a STATUS_VAR);
-            aSig |= 0x200;
+            return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
         return packFloat32(aSign, 0xff, aSig << 13);
     }
@@ -2586,38 +2828,45 @@
     return packFloat32( aSign, aExp + 0x70, aSig << 13);
 }
 
-bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
+float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
 {
     flag aSign;
     int16 aExp;
-    bits32 aSig;
-    bits32 mask;
-    bits32 increment;
+    uint32_t aSig;
+    uint32_t mask;
+    uint32_t increment;
     int8 roundingMode;
+    a = float32_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
         if (aSig) {
-            /* Make sure correct exceptions are raised.  */
-            float32ToCommonNaN(a STATUS_VAR);
-            aSig |= 0x00400000;
+            /* Input is a NaN */
+            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+            if (!ieee) {
+                return packFloat16(aSign, 0, 0);
+            }
+            return r;
         }
-        return packFloat16(aSign, 0x1f, aSig >> 13);
+        /* Infinity */
+        if (!ieee) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return packFloat16(aSign, 0x1f, 0x3ff);
+        }
+        return packFloat16(aSign, 0x1f, 0);
     }
-    if (aExp == 0 && aSign == 0) {
+    if (aExp == 0 && aSig == 0) {
         return packFloat16(aSign, 0, 0);
     }
     /* Decimal point between bits 22 and 23.  */
     aSig |= 0x00800000;
     aExp -= 0x7f;
     if (aExp < -14) {
-        mask = 0x007fffff;
-        if (aExp < -24) {
-            aExp = -25;
-        } else {
-            mask >>= 24 + aExp;
+        mask = 0x00ffffff;
+        if (aExp >= -24) {
+            mask >>= 25 + aExp;
         }
     } else {
         mask = 0x00001fff;
@@ -2659,7 +2908,7 @@
         }
     } else {
         if (aExp > 16) {
-            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
             return packFloat16(aSign, 0x1f, 0x3ff);
         }
     }
@@ -2686,13 +2935,14 @@
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig;
+    uint64_t aSig;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
-        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( aExp == 0 ) {
@@ -2720,13 +2970,14 @@
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig, zSig0, zSig1;
+    uint64_t aSig, zSig0, zSig1;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
     if ( aExp == 0x7FF ) {
-        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
+        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         return packFloat128( aSign, 0x7FFF, 0, 0 );
     }
     if ( aExp == 0 ) {
@@ -2752,9 +3003,10 @@
 {
     flag aSign;
     int16 aExp;
-    bits64 lastBitMask, roundBitsMask;
+    uint64_t lastBitMask, roundBitsMask;
     int8 roundingMode;
-    bits64 z;
+    uint64_t z;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aExp = extractFloat64Exp( a );
     if ( 0x433 <= aExp ) {
@@ -2764,7 +3016,7 @@
         return a;
     }
     if ( aExp < 0x3FF ) {
-        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
+        if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
         STATUS(float_exception_flags) |= float_flag_inexact;
         aSign = extractFloat64Sign( a );
         switch ( STATUS(float_rounding_mode) ) {
@@ -2824,7 +3076,7 @@
 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
 {
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig;
+    uint64_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat64Frac( a );
@@ -2878,7 +3130,7 @@
     aSig |= LIT64( 0x2000000000000000 );
     zSig = ( aSig + bSig )<<1;
     --zExp;
-    if ( (sbits64) zSig < 0 ) {
+    if ( (int64_t) zSig < 0 ) {
         zSig = aSig + bSig;
         ++zExp;
     }
@@ -2898,7 +3150,7 @@
 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
 {
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig;
+    uint64_t aSig, bSig, zSig;
     int16 expDiff;
 
     aSig = extractFloat64Frac( a );
@@ -2971,6 +3223,8 @@
 float64 float64_add( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat64Sign( a );
     bSign = extractFloat64Sign( b );
@@ -2992,6 +3246,8 @@
 float64 float64_sub( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSign = extractFloat64Sign( a );
     bSign = extractFloat64Sign( b );
@@ -3014,7 +3270,10 @@
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
+
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3054,7 +3313,7 @@
     bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
     mul64To128( aSig, bSig, &zSig0, &zSig1 );
     zSig0 |= ( zSig1 != 0 );
-    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+    if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
         zSig0 <<= 1;
         --zExp;
     }
@@ -3072,9 +3331,11 @@
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig;
-    bits64 rem0, rem1;
-    bits64 term0, term1;
+    uint64_t aSig, bSig, zSig;
+    uint64_t rem0, rem1;
+    uint64_t term0, term1;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3122,7 +3383,7 @@
     if ( ( zSig & 0x1FF ) <= 2 ) {
         mul64To128( bSig, zSig, &term0, &term1 );
         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
-        while ( (sbits64) rem0 < 0 ) {
+        while ( (int64_t) rem0 < 0 ) {
             --zSig;
             add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
         }
@@ -3142,10 +3403,12 @@
 {
     flag aSign, zSign;
     int16 aExp, bExp, expDiff;
-    bits64 aSig, bSig;
-    bits64 q, alternateASig;
-    sbits64 sigMean;
+    uint64_t aSig, bSig;
+    uint64_t q, alternateASig;
+    int64_t sigMean;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
@@ -3205,12 +3468,12 @@
         alternateASig = aSig;
         ++q;
         aSig -= bSig;
-    } while ( 0 <= (sbits64) aSig );
+    } while ( 0 <= (int64_t) aSig );
     sigMean = aSig + alternateASig;
     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
         aSig = alternateASig;
     }
-    zSign = ( (sbits64) aSig < 0 );
+    zSign = ( (int64_t) aSig < 0 );
     if ( zSign ) aSig = - aSig;
     return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
 
@@ -3226,8 +3489,9 @@
 {
     flag aSign;
     int16 aExp, zExp;
-    bits64 aSig, zSig, doubleZSig;
-    bits64 rem0, rem1, term0, term1;
+    uint64_t aSig, zSig, doubleZSig;
+    uint64_t rem0, rem1, term0, term1;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3256,7 +3520,7 @@
         doubleZSig = zSig<<1;
         mul64To128( zSig, zSig, &term0, &term1 );
         sub128( aSig, 0, term0, term1, &rem0, &rem1 );
-        while ( (sbits64) rem0 < 0 ) {
+        while ( (int64_t) rem0 < 0 ) {
             --zSig;
             doubleZSig -= 2;
             add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
@@ -3276,7 +3540,8 @@
 {
     flag aSign, zSign;
     int16 aExp;
-    bits64 aSig, aSig0, aSig1, zSig, i;
+    uint64_t aSig, aSig0, aSig1, zSig, i;
+    a = float64_squash_input_denormal(a STATUS_VAR);
 
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
@@ -3298,7 +3563,7 @@
     aExp -= 0x3FF;
     aSig |= LIT64( 0x0010000000000000 );
     zSign = aExp < 0;
-    zSig = (bits64)aExp << 52;
+    zSig = (uint64_t)aExp << 52;
     for (i = 1LL << 51; i > 0; i >>= 1) {
         mul64To128( aSig, aSig, &aSig0, &aSig1 );
         aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
@@ -3315,13 +3580,120 @@
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise.  The comparison is performed
+| corresponding value `b', and 0 otherwise.  The invalid exception is raised
+| if either operand is a NaN.  Otherwise, the comparison is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float64_eq( float64 a, float64 b STATUS_PARAM )
 {
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    av = float64_val(a);
+    bv = float64_val(b);
+    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    av = float64_val(a);
+    bv = float64_val(b);
+    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt( float64 a, float64 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+    uint64_t av, bv;
+
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    av = float64_val(a);
+    bv = float64_val(b);
+    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
+    return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  The invalid exception is raised if either
+| operand is a NaN.  The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_unordered( float64 a, float64 b STATUS_PARAM )
+{
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3333,83 +3705,7 @@
     }
     av = float64_val(a);
     bv = float64_val(b);
-    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise.  The comparison is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_le( float64 a, float64 b STATUS_PARAM )
-{
-    flag aSign, bSign;
-    bits64 av, bv;
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
-    return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_lt( float64 a, float64 b STATUS_PARAM )
-{
-    flag aSign, bSign;
-    bits64 av, bv;
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    av = float64_val(a);
-    bv = float64_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
-    return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise.  The invalid exception is raised
-| if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
-{
-    bits64 av, bv;
-
-    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
-         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    av = float64_val(a);
-    bv = float64_val(b);
-    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
 
 }
 
@@ -3423,7 +3719,9 @@
 int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3437,7 +3735,7 @@
     bSign = extractFloat64Sign( b );
     av = float64_val(a);
     bv = float64_val(b);
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
     return ( av == bv ) || ( aSign ^ ( av < bv ) );
 
 }
@@ -3452,7 +3750,9 @@
 int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
 {
     flag aSign, bSign;
-    bits64 av, bv;
+    uint64_t av, bv;
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
 
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3466,11 +3766,34 @@
     bSign = extractFloat64Sign( b );
     av = float64_val(a);
     bv = float64_val(b);
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+    if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
     return ( av != bv ) && ( aSign ^ ( av < bv ) );
 
 }
 
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
+{
+    a = float64_squash_input_denormal(a STATUS_VAR);
+    b = float64_squash_input_denormal(b STATUS_VAR);
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
+
 #ifdef FLOATX80
 
 /*----------------------------------------------------------------------------
@@ -3487,12 +3810,12 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
     shiftCount = 0x4037 - aExp;
     if ( shiftCount <= 0 ) shiftCount = 1;
     shift64RightJamming( aSig, shiftCount, &aSig );
@@ -3514,14 +3837,14 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig, savedASig;
+    uint64_t aSig, savedASig;
     int32 z;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( 0x401E < aExp ) {
-        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
         goto invalid;
     }
     else if ( aExp < 0x3FFF ) {
@@ -3536,7 +3859,7 @@
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
         float_raise( float_flag_invalid STATUS_VAR);
-        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
@@ -3559,7 +3882,7 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig, aSigExtra;
+    uint64_t aSig, aSigExtra;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
@@ -3574,7 +3897,7 @@
                ) {
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         aSigExtra = 0;
     }
@@ -3599,7 +3922,7 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig;
+    uint64_t aSig;
     int64 z;
 
     aSig = extractFloatx80Frac( a );
@@ -3614,14 +3937,14 @@
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
         }
-        return (sbits64) LIT64( 0x8000000000000000 );
+        return (int64_t) LIT64( 0x8000000000000000 );
     }
     else if ( aExp < 0x3FFF ) {
         if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
         return 0;
     }
     z = aSig>>( - shiftCount );
-    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
     }
     if ( aSign ) z = - z;
@@ -3640,14 +3963,14 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig;
+    uint64_t aSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) {
-            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
+        if ( (uint64_t) ( aSig<<1 ) ) {
+            return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat32( aSign, 0xFF, 0 );
     }
@@ -3668,14 +3991,14 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig, zSig;
+    uint64_t aSig, zSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) {
-            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
+        if ( (uint64_t) ( aSig<<1 ) ) {
+            return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat64( aSign, 0x7FF, 0 );
     }
@@ -3698,13 +4021,13 @@
 {
     flag aSign;
     int16 aExp;
-    bits64 aSig, zSig0, zSig1;
+    uint64_t aSig, zSig0, zSig1;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
-        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
+    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
+        return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
     }
     shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
     return packFloat128( aSign, aExp, zSig0, zSig1 );
@@ -3724,27 +4047,27 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 lastBitMask, roundBitsMask;
+    uint64_t lastBitMask, roundBitsMask;
     int8 roundingMode;
     floatx80 z;
 
     aExp = extractFloatx80Exp( a );
     if ( 0x403E <= aExp ) {
-        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
             return propagateFloatx80NaN( a, a STATUS_VAR );
         }
         return a;
     }
     if ( aExp < 0x3FFF ) {
         if (    ( aExp == 0 )
-             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+             && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
             return a;
         }
         STATUS(float_exception_flags) |= float_flag_inexact;
         aSign = extractFloatx80Sign( a );
         switch ( STATUS(float_rounding_mode) ) {
          case float_round_nearest_even:
-            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
                ) {
                 return
                     packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
@@ -3797,7 +4120,7 @@
 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
     int32 expDiff;
 
     aSig = extractFloatx80Frac( a );
@@ -3807,7 +4130,7 @@
     expDiff = aExp - bExp;
     if ( 0 < expDiff ) {
         if ( aExp == 0x7FFF ) {
-            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
             return a;
         }
         if ( bExp == 0 ) --expDiff;
@@ -3816,7 +4139,7 @@
     }
     else if ( expDiff < 0 ) {
         if ( bExp == 0x7FFF ) {
-            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
         }
         if ( aExp == 0 ) ++expDiff;
@@ -3825,7 +4148,7 @@
     }
     else {
         if ( aExp == 0x7FFF ) {
-            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
                 return propagateFloatx80NaN( a, b STATUS_VAR );
             }
             return a;
@@ -3840,7 +4163,7 @@
         goto shiftRight1;
     }
     zSig0 = aSig + bSig;
-    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
  shiftRight1:
     shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
     zSig0 |= LIT64( 0x8000000000000000 );
@@ -3863,7 +4186,7 @@
 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
     int32 expDiff;
     floatx80 z;
 
@@ -3875,7 +4198,7 @@
     if ( 0 < expDiff ) goto aExpBigger;
     if ( expDiff < 0 ) goto bExpBigger;
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
             return propagateFloatx80NaN( a, b STATUS_VAR );
         }
         float_raise( float_flag_invalid STATUS_VAR);
@@ -3893,7 +4216,7 @@
     return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
  bExpBigger:
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( aExp == 0 ) ++expDiff;
@@ -3905,7 +4228,7 @@
     goto normalizeRoundAndPack;
  aExpBigger:
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return a;
     }
     if ( bExp == 0 ) --expDiff;
@@ -3972,7 +4295,7 @@
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
+    uint64_t aSig, bSig, zSig0, zSig1;
     floatx80 z;
 
     aSig = extractFloatx80Frac( a );
@@ -3983,15 +4306,15 @@
     bSign = extractFloatx80Sign( b );
     zSign = aSign ^ bSign;
     if ( aExp == 0x7FFF ) {
-        if (    (bits64) ( aSig<<1 )
-             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+        if (    (uint64_t) ( aSig<<1 )
+             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
             return propagateFloatx80NaN( a, b STATUS_VAR );
         }
         if ( ( bExp | bSig ) == 0 ) goto invalid;
         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         if ( ( aExp | aSig ) == 0 ) {
  invalid:
             float_raise( float_flag_invalid STATUS_VAR);
@@ -4011,7 +4334,7 @@
     }
     zExp = aExp + bExp - 0x3FFE;
     mul64To128( aSig, bSig, &zSig0, &zSig1 );
-    if ( 0 < (sbits64) zSig0 ) {
+    if ( 0 < (int64_t) zSig0 ) {
         shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
         --zExp;
     }
@@ -4031,8 +4354,8 @@
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig, bSig, zSig0, zSig1;
-    bits64 rem0, rem1, rem2, term0, term1, term2;
+    uint64_t aSig, bSig, zSig0, zSig1;
+    uint64_t rem0, rem1, rem2, term0, term1, term2;
     floatx80 z;
 
     aSig = extractFloatx80Frac( a );
@@ -4043,15 +4366,15 @@
     bSign = extractFloatx80Sign( b );
     zSign = aSign ^ bSign;
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         if ( bExp == 0x7FFF ) {
-            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+            if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
             goto invalid;
         }
         return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return packFloatx80( zSign, 0, 0 );
     }
     if ( bExp == 0 ) {
@@ -4081,15 +4404,15 @@
     zSig0 = estimateDiv128To64( aSig, rem1, bSig );
     mul64To128( bSig, zSig0, &term0, &term1 );
     sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
     }
     zSig1 = estimateDiv128To64( rem1, 0, bSig );
-    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
         mul64To128( bSig, zSig1, &term1, &term2 );
         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
         }
@@ -4111,8 +4434,8 @@
 {
     flag aSign, zSign;
     int32 aExp, bExp, expDiff;
-    bits64 aSig0, aSig1, bSig;
-    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    uint64_t aSig0, aSig1, bSig;
+    uint64_t q, term0, term1, alternateASig0, alternateASig1;
     floatx80 z;
 
     aSig0 = extractFloatx80Frac( a );
@@ -4121,14 +4444,14 @@
     bSig = extractFloatx80Frac( b );
     bExp = extractFloatx80Exp( b );
     if ( aExp == 0x7FFF ) {
-        if (    (bits64) ( aSig0<<1 )
-             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+        if (    (uint64_t) ( aSig0<<1 )
+             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
             return propagateFloatx80NaN( a, b STATUS_VAR );
         }
         goto invalid;
     }
     if ( bExp == 0x7FFF ) {
-        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+        if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
         return a;
     }
     if ( bExp == 0 ) {
@@ -4142,7 +4465,7 @@
         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
     }
     if ( aExp == 0 ) {
-        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+        if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
         normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
     }
     bSig |= LIT64( 0x8000000000000000 );
@@ -4207,15 +4530,15 @@
 {
     flag aSign;
     int32 aExp, zExp;
-    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
-    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
     floatx80 z;
 
     aSig0 = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
     if ( aExp == 0x7FFF ) {
-        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
+        if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
         if ( ! aSign ) return a;
         goto invalid;
     }
@@ -4238,7 +4561,7 @@
     doubleZSig0 = zSig0<<1;
     mul64To128( zSig0, zSig0, &term0, &term1 );
     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         doubleZSig0 -= 2;
         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
@@ -4250,7 +4573,7 @@
         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
         mul64To128( zSig1, zSig1, &term2, &term3 );
         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
             term3 |= 1;
@@ -4268,19 +4591,132 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| equal to the corresponding value `b', and 0 otherwise.  The comparison is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
 {
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise.  The
+| invalid exception is raised if either operand is a NaN.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point values `a' and `b'
+| cannot be compared, and 0 otherwise.  The invalid exception is raised if
+| either operand is a NaN.   The comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         if (    floatx80_is_signaling_nan( a )
              || floatx80_is_signaling_nan( b ) ) {
@@ -4292,100 +4728,7 @@
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
-           );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| less than or equal to the corresponding value `b', and 0 otherwise.  The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
-{
-    flag aSign, bSign;
-
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 == 0 );
-    }
-    return
-          aSign ? le128( b.high, b.low, a.high, a.low )
-        : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| less than the corresponding value `b', and 0 otherwise.  The comparison
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
-{
-    flag aSign, bSign;
-
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-        return
-               aSign
-            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
-                 != 0 );
-    }
-    return
-          aSign ? lt128( b.high, b.low, a.high, a.low )
-        : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is equal
-| to the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
-{
-
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
-       ) {
-        float_raise( float_flag_invalid STATUS_VAR);
-        return 0;
-    }
-    return
-           ( a.low == b.low )
-        && (    ( a.high == b.high )
-             || (    ( a.low == 0 )
-                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
@@ -4402,9 +4745,9 @@
     flag aSign, bSign;
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         if (    floatx80_is_signaling_nan( a )
              || floatx80_is_signaling_nan( b ) ) {
@@ -4417,7 +4760,7 @@
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -4438,9 +4781,9 @@
     flag aSign, bSign;
 
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
        ) {
         if (    floatx80_is_signaling_nan( a )
              || floatx80_is_signaling_nan( b ) ) {
@@ -4453,7 +4796,7 @@
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -4462,6 +4805,28 @@
 
 }
 
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point values `a' and `b'
+| cannot be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.
+| The comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
+
 #endif
 
 #ifdef FLOAT128
@@ -4480,7 +4845,7 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4509,7 +4874,7 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1, savedASig;
+    uint64_t aSig0, aSig1, savedASig;
     int32 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -4534,7 +4899,7 @@
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
         float_raise( float_flag_invalid STATUS_VAR);
-        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig0<<shiftCount ) != savedASig ) {
         STATUS(float_exception_flags) |= float_flag_inexact;
@@ -4557,7 +4922,7 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4575,7 +4940,7 @@
                ) {
                 return LIT64( 0x7FFFFFFFFFFFFFFF );
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
     }
@@ -4600,7 +4965,7 @@
 {
     flag aSign;
     int32 aExp, shiftCount;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
     int64 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -4622,10 +4987,10 @@
                     return LIT64( 0x7FFFFFFFFFFFFFFF );
                 }
             }
-            return (sbits64) LIT64( 0x8000000000000000 );
+            return (int64_t) LIT64( 0x8000000000000000 );
         }
         z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
-        if ( (bits64) ( aSig1<<shiftCount ) ) {
+        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
             STATUS(float_exception_flags) |= float_flag_inexact;
         }
     }
@@ -4638,7 +5003,7 @@
         }
         z = aSig0>>( - shiftCount );
         if (    aSig1
-             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
             STATUS(float_exception_flags) |= float_flag_inexact;
         }
     }
@@ -4658,8 +5023,8 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig0, aSig1;
-    bits32 zSig;
+    uint64_t aSig0, aSig1;
+    uint32_t zSig;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4667,7 +5032,7 @@
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
+            return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat32( aSign, 0xFF, 0 );
     }
@@ -4693,7 +5058,7 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4701,7 +5066,7 @@
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
+            return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloat64( aSign, 0x7FF, 0 );
     }
@@ -4728,7 +5093,7 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 aSig0, aSig1;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
@@ -4736,7 +5101,7 @@
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
         if ( aSig0 | aSig1 ) {
-            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
+            return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
         }
         return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
     }
@@ -4765,7 +5130,7 @@
 {
     flag aSign;
     int32 aExp;
-    bits64 lastBitMask, roundBitsMask;
+    uint64_t lastBitMask, roundBitsMask;
     int8 roundingMode;
     float128 z;
 
@@ -4790,9 +5155,9 @@
                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
             }
             else {
-                if ( (sbits64) z.low < 0 ) {
+                if ( (int64_t) z.low < 0 ) {
                     ++z.high;
-                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
                 }
             }
         }
@@ -4806,7 +5171,7 @@
     }
     else {
         if ( aExp < 0x3FFF ) {
-            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
             STATUS(float_exception_flags) |= float_flag_inexact;
             aSign = extractFloat128Sign( a );
             switch ( STATUS(float_rounding_mode) ) {
@@ -4868,7 +5233,7 @@
 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
     int32 expDiff;
 
     aSig1 = extractFloat128Frac1( a );
@@ -4949,7 +5314,7 @@
 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
 {
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
     int32 expDiff;
     float128 z;
 
@@ -5074,7 +5439,7 @@
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5138,8 +5503,8 @@
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
-    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5193,7 +5558,7 @@
     zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
     mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
     sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
     }
@@ -5201,7 +5566,7 @@
     if ( ( zSig1 & 0x3FFF ) <= 4 ) {
         mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
         sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
         }
@@ -5222,9 +5587,9 @@
 {
     flag aSign, zSign;
     int32 aExp, bExp, expDiff;
-    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
-    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
-    sbits64 sigMean0;
+    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
+    int64_t sigMean0;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5306,15 +5671,15 @@
         alternateASig1 = aSig1;
         ++q;
         sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
-    } while ( 0 <= (sbits64) aSig0 );
+    } while ( 0 <= (int64_t) aSig0 );
     add128(
-        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
+        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
     if (    ( sigMean0 < 0 )
          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
         aSig0 = alternateASig0;
         aSig1 = alternateASig1;
     }
-    zSign = ( (sbits64) aSig0 < 0 );
+    zSign = ( (int64_t) aSig0 < 0 );
     if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
     return
         normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
@@ -5331,8 +5696,8 @@
 {
     flag aSign;
     int32 aExp, zExp;
-    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
-    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
     float128 z;
 
     aSig1 = extractFloat128Frac1( a );
@@ -5364,7 +5729,7 @@
     doubleZSig0 = zSig0<<1;
     mul64To128( zSig0, zSig0, &term0, &term1 );
     sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
-    while ( (sbits64) rem0 < 0 ) {
+    while ( (int64_t) rem0 < 0 ) {
         --zSig0;
         doubleZSig0 -= 2;
         add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
@@ -5376,7 +5741,7 @@
         sub128( rem1, 0, term1, term2, &rem1, &rem2 );
         mul64To128( zSig1, zSig1, &term2, &term3 );
         sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (sbits64) rem1 < 0 ) {
+        while ( (int64_t) rem1 < 0 ) {
             --zSig1;
             shortShift128Left( 0, zSig1, 1, &term2, &term3 );
             term3 |= 1;
@@ -5392,7 +5757,8 @@
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
@@ -5404,26 +5770,23 @@
          || (    ( extractFloat128Exp( b ) == 0x7FFF )
               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        ) {
-        if (    float128_is_signaling_nan( a )
-             || float128_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid STATUS_VAR);
-        }
+        float_raise( float_flag_invalid STATUS_VAR);
         return 0;
     }
     return
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise.  The comparison
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
+| or equal to the corresponding value `b', and 0 otherwise.  The invalid
+| exception is raised if either operand is a NaN.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float128_le( float128 a, float128 b STATUS_PARAM )
@@ -5443,7 +5806,7 @@
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -5454,8 +5817,9 @@
 
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise.  The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
 int float128_lt( float128 a, float128 b STATUS_PARAM )
@@ -5475,7 +5839,7 @@
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -5485,13 +5849,33 @@
 }
 
 /*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise.  The invalid exception is
-| raised if either operand is a NaN.  Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  The invalid exception is raised if either
+| operand is a NaN. The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
+int float128_unordered( float128 a, float128 b STATUS_PARAM )
+{
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return 1;
+    }
+    return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
 {
 
     if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
@@ -5499,14 +5883,17 @@
          || (    ( extractFloat128Exp( b ) == 0x7FFF )
               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
        ) {
-        float_raise( float_flag_invalid STATUS_VAR);
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
         return 0;
     }
     return
            ( a.low == b.low )
         && (    ( a.high == b.high )
              || (    ( a.low == 0 )
-                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+                  && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
            );
 
 }
@@ -5538,7 +5925,7 @@
     if ( aSign != bSign ) {
         return
                aSign
-            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            || (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  == 0 );
     }
     return
@@ -5574,7 +5961,7 @@
     if ( aSign != bSign ) {
         return
                aSign
-            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+            && (    ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
                  != 0 );
     }
     return
@@ -5583,6 +5970,29 @@
 
 }
 
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise.  Quiet NaNs do not cause an exception.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
+{
+    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+         || (    ( extractFloat128Exp( b ) == 0x7FFF )
+              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+       ) {
+        if (    float128_is_signaling_nan( a )
+             || float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return 1;
+    }
+    return 0;
+}
+
 #endif
 
 /* misc functions */
@@ -5632,6 +6042,24 @@
     return res;
 }
 
+unsigned int float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float32_to_int64_round_to_zero(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffff) {
+        res = 0xffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
 unsigned int float64_to_uint32( float64 a STATUS_PARAM )
 {
     int64_t v;
@@ -5668,6 +6096,24 @@
     return res;
 }
 
+unsigned int float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
+{
+    int64_t v;
+    unsigned int res;
+
+    v = float64_to_int64_round_to_zero(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffff) {
+        res = 0xffff;
+        float_raise( float_flag_invalid STATUS_VAR);
+    } else {
+        res = v;
+    }
+    return res;
+}
+
 /* FIXME: This looks broken.  */
 uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
 {
@@ -5696,7 +6142,9 @@
                                       int is_quiet STATUS_PARAM )            \
 {                                                                            \
     flag aSign, bSign;                                                       \
-    bits ## s av, bv;                                                        \
+    uint ## s ## _t av, bv;                                                  \
+    a = float ## s ## _squash_input_denormal(a STATUS_VAR);                  \
+    b = float ## s ## _squash_input_denormal(b STATUS_VAR);                  \
                                                                              \
     if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
          extractFloat ## s ## Frac( a ) ) ||                                 \
@@ -5714,7 +6162,7 @@
     av = float ## s ## _val(a);                                              \
     bv = float ## s ## _val(b);                                              \
     if ( aSign != bSign ) {                                                  \
-        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
+        if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) {                   \
             /* zero case */                                                  \
             return float_relation_equal;                                     \
         } else {                                                             \
@@ -5742,6 +6190,52 @@
 COMPARE(32, 0xff)
 COMPARE(64, 0x7ff)
 
+INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
+                                      int is_quiet STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
+          ( extractFloatx80Frac( a )<<1 ) ) ||
+        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
+          ( extractFloatx80Frac( b )<<1 ) )) {
+        if (!is_quiet ||
+            floatx80_is_signaling_nan( a ) ||
+            floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return float_relation_unordered;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+
+        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
+             ( ( a.low | b.low ) == 0 ) ) {
+            /* zero case */
+            return float_relation_equal;
+        } else {
+            return 1 - (2 * aSign);
+        }
+    } else {
+        if (a.low == b.low && a.high == b.high) {
+            return float_relation_equal;
+        } else {
+            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
+        }
+    }
+}
+
+int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    return floatx80_compare_internal(a, b, 0 STATUS_VAR);
+}
+
+int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+    return floatx80_compare_internal(a, b, 1 STATUS_VAR);
+}
+
 INLINE int float128_compare_internal( float128 a, float128 b,
                                       int is_quiet STATUS_PARAM )
 {
@@ -5786,18 +6280,71 @@
     return float128_compare_internal(a, b, 1 STATUS_VAR);
 }
 
+/* min() and max() functions. These can't be implemented as
+ * 'compare and pick one input' because that would mishandle
+ * NaNs and +0 vs -0.
+ */
+#define MINMAX(s, nan_exp)                                              \
+INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
+                                        int ismin STATUS_PARAM )        \
+{                                                                       \
+    flag aSign, bSign;                                                  \
+    uint ## s ## _t av, bv;                                             \
+    a = float ## s ## _squash_input_denormal(a STATUS_VAR);             \
+    b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
+    if (float ## s ## _is_any_nan(a) ||                                 \
+        float ## s ## _is_any_nan(b)) {                                 \
+        return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
+    }                                                                   \
+    aSign = extractFloat ## s ## Sign(a);                               \
+    bSign = extractFloat ## s ## Sign(b);                               \
+    av = float ## s ## _val(a);                                         \
+    bv = float ## s ## _val(b);                                         \
+    if (aSign != bSign) {                                               \
+        if (ismin) {                                                    \
+            return aSign ? a : b;                                       \
+        } else {                                                        \
+            return aSign ? b : a;                                       \
+        }                                                               \
+    } else {                                                            \
+        if (ismin) {                                                    \
+            return (aSign ^ (av < bv)) ? a : b;                         \
+        } else {                                                        \
+            return (aSign ^ (av < bv)) ? b : a;                         \
+        }                                                               \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
+}
+
+MINMAX(32, 0xff)
+MINMAX(64, 0x7ff)
+
+
 /* Multiply A by 2 raised to the power N.  */
 float32 float32_scalbn( float32 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int16 aExp;
-    bits32 aSig;
+    int16_t aExp;
+    uint32_t aSig;
 
+    a = float32_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat32Frac( a );
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
 
     if ( aExp == 0xFF ) {
+        if ( aSig ) {
+            return propagateFloat32NaN( a, a STATUS_VAR );
+        }
         return a;
     }
     if ( aExp != 0 )
@@ -5805,6 +6352,12 @@
     else if ( aSig == 0 )
         return a;
 
+    if (n > 0x200) {
+        n = 0x200;
+    } else if (n < -0x200) {
+        n = -0x200;
+    }
+
     aExp += n - 1;
     aSig <<= 7;
     return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
@@ -5813,14 +6366,18 @@
 float64 float64_scalbn( float64 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int16 aExp;
-    bits64 aSig;
+    int16_t aExp;
+    uint64_t aSig;
 
+    a = float64_squash_input_denormal(a STATUS_VAR);
     aSig = extractFloat64Frac( a );
     aExp = extractFloat64Exp( a );
     aSign = extractFloat64Sign( a );
 
     if ( aExp == 0x7FF ) {
+        if ( aSig ) {
+            return propagateFloat64NaN( a, a STATUS_VAR );
+        }
         return a;
     }
     if ( aExp != 0 )
@@ -5828,6 +6385,12 @@
     else if ( aSig == 0 )
         return a;
 
+    if (n > 0x1000) {
+        n = 0x1000;
+    } else if (n < -0x1000) {
+        n = -0x1000;
+    }
+
     aExp += n - 1;
     aSig <<= 10;
     return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
@@ -5837,19 +6400,29 @@
 floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int16 aExp;
-    bits64 aSig;
+    int32_t aExp;
+    uint64_t aSig;
 
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
 
-    if ( aExp == 0x7FF ) {
+    if ( aExp == 0x7FFF ) {
+        if ( aSig<<1 ) {
+            return propagateFloatx80NaN( a, a STATUS_VAR );
+        }
         return a;
     }
+
     if (aExp == 0 && aSig == 0)
         return a;
 
+    if (n > 0x10000) {
+        n = 0x10000;
+    } else if (n < -0x10000) {
+        n = -0x10000;
+    }
+
     aExp += n;
     return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
                                           aSign, aExp, aSig, 0 STATUS_VAR );
@@ -5860,14 +6433,17 @@
 float128 float128_scalbn( float128 a, int n STATUS_PARAM )
 {
     flag aSign;
-    int32 aExp;
-    bits64 aSig0, aSig1;
+    int32_t aExp;
+    uint64_t aSig0, aSig1;
 
     aSig1 = extractFloat128Frac1( a );
     aSig0 = extractFloat128Frac0( a );
     aExp = extractFloat128Exp( a );
     aSign = extractFloat128Sign( a );
     if ( aExp == 0x7FFF ) {
+        if ( aSig0 | aSig1 ) {
+            return propagateFloat128NaN( a, a STATUS_VAR );
+        }
         return a;
     }
     if ( aExp != 0 )
@@ -5875,6 +6451,12 @@
     else if ( aSig0 == 0 && aSig1 == 0 )
         return a;
 
+    if (n > 0x10000) {
+        n = 0x10000;
+    } else if (n < -0x10000) {
+        n = -0x10000;
+    }
+
     aExp += n - 1;
     return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
                                           STATUS_VAR );
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index 9528825..5eff085 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -1,3 +1,9 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
 /*============================================================================
 
 This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
@@ -59,24 +65,15 @@
 typedef uint64_t uint64;
 typedef int64_t int64;
 
-/*----------------------------------------------------------------------------
-| Each of the following `typedef's defines a type that holds integers
-| of _exactly_ the number of bits specified.  For instance, for most
-| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
-| `unsigned short int' and `signed short int' (or `short int'), respectively.
-*----------------------------------------------------------------------------*/
-typedef uint8_t bits8;
-typedef int8_t sbits8;
-typedef uint16_t bits16;
-typedef int16_t sbits16;
-typedef uint32_t bits32;
-typedef int32_t sbits32;
-typedef uint64_t bits64;
-typedef int64_t sbits64;
-
 #define LIT64( a ) a##LL
 #define INLINE static inline
 
+#if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+#define SNAN_BIT_IS_ONE		1
+#else
+#define SNAN_BIT_IS_ONE		0
+#endif
+
 /*----------------------------------------------------------------------------
 | The macro `FLOATX80' must be defined to enable the extended double-precision
 | floating-point format `floatx80'.  If this macro is not defined, the
@@ -120,29 +117,44 @@
 //#define USE_SOFTFLOAT_STRUCT_TYPES
 #ifdef USE_SOFTFLOAT_STRUCT_TYPES
 typedef struct {
+    uint16_t v;
+} float16;
+#define float16_val(x) (((float16)(x)).v)
+#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
+#define const_float16(x) { x }
+typedef struct {
     uint32_t v;
 } float32;
 /* The cast ensures an error if the wrong type is passed.  */
 #define float32_val(x) (((float32)(x)).v)
 #define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+#define const_float32(x) { x }
 typedef struct {
     uint64_t v;
 } float64;
 #define float64_val(x) (((float64)(x)).v)
 #define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#define const_float64(x) { x }
 #else
+typedef uint16_t float16;
 typedef uint32_t float32;
 typedef uint64_t float64;
+#define float16_val(x) (x)
 #define float32_val(x) (x)
 #define float64_val(x) (x)
+#define make_float16(x) (x)
 #define make_float32(x) (x)
 #define make_float64(x) (x)
+#define const_float16(x) (x)
+#define const_float32(x) (x)
+#define const_float64(x) (x)
 #endif
 #ifdef FLOATX80
 typedef struct {
     uint64_t low;
     uint16_t high;
 } floatx80;
+#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
 #endif
 #ifdef FLOAT128
 typedef struct {
@@ -180,7 +192,8 @@
     float_flag_divbyzero =  4,
     float_flag_overflow  =  8,
     float_flag_underflow = 16,
-    float_flag_inexact   = 32
+    float_flag_inexact   = 32,
+    float_flag_input_denormal = 64
 };
 
 typedef struct float_status {
@@ -190,16 +203,27 @@
 #ifdef FLOATX80
     signed char floatx80_rounding_precision;
 #endif
+    /* should denormalised results go to zero and set the inexact flag? */
     flag flush_to_zero;
+    /* should denormalised inputs go to zero and set the input_denormal flag? */
+    flag flush_inputs_to_zero;
     flag default_nan_mode;
 } float_status;
 
 void set_float_rounding_mode(int val STATUS_PARAM);
 void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_float_detect_tininess(int val STATUS_PARAM)
+{
+    STATUS(float_detect_tininess) = val;
+}
 INLINE void set_flush_to_zero(flag val STATUS_PARAM)
 {
     STATUS(flush_to_zero) = val;
 }
+INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
+{
+    STATUS(flush_inputs_to_zero) = val;
+}
 INLINE void set_default_nan_mode(flag val STATUS_PARAM)
 {
     STATUS(default_nan_mode) = val;
@@ -221,42 +245,62 @@
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE integer-to-floating-point conversion routines.
 *----------------------------------------------------------------------------*/
-float32 int32_to_float32( int STATUS_PARAM );
-float64 int32_to_float64( int STATUS_PARAM );
+float32 int32_to_float32( int32 STATUS_PARAM );
+float64 int32_to_float64( int32 STATUS_PARAM );
 float32 uint32_to_float32( unsigned int STATUS_PARAM );
 float64 uint32_to_float64( unsigned int STATUS_PARAM );
 #ifdef FLOATX80
-floatx80 int32_to_floatx80( int STATUS_PARAM );
+floatx80 int32_to_floatx80( int32 STATUS_PARAM );
 #endif
 #ifdef FLOAT128
-float128 int32_to_float128( int STATUS_PARAM );
+float128 int32_to_float128( int32 STATUS_PARAM );
 #endif
-float32 int64_to_float32( int64_t STATUS_PARAM );
-float32 uint64_to_float32( uint64_t STATUS_PARAM );
-float64 int64_to_float64( int64_t STATUS_PARAM );
-float64 uint64_to_float64( uint64_t STATUS_PARAM );
+float32 int64_to_float32( int64 STATUS_PARAM );
+float32 uint64_to_float32( uint64 STATUS_PARAM );
+float64 int64_to_float64( int64 STATUS_PARAM );
+float64 uint64_to_float64( uint64 STATUS_PARAM );
 #ifdef FLOATX80
-floatx80 int64_to_floatx80( int64_t STATUS_PARAM );
+floatx80 int64_to_floatx80( int64 STATUS_PARAM );
 #endif
 #ifdef FLOAT128
-float128 int64_to_float128( int64_t STATUS_PARAM );
+float128 int64_to_float128( int64 STATUS_PARAM );
 #endif
 
 /*----------------------------------------------------------------------------
 | Software half-precision conversion routines.
 *----------------------------------------------------------------------------*/
-bits16 float32_to_float16( float32, flag STATUS_PARAM );
-float32 float16_to_float32( bits16, flag STATUS_PARAM );
+float16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( float16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision operations.
+*----------------------------------------------------------------------------*/
+int float16_is_quiet_nan( float16 );
+int float16_is_signaling_nan( float16 );
+float16 float16_maybe_silence_nan( float16 );
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_ARM)
+#define float16_default_nan make_float16(0x7E00)
+#elif SNAN_BIT_IS_ONE
+#define float16_default_nan make_float16(0x7DFF)
+#else
+#define float16_default_nan make_float16(0xFE00)
+#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE single-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int float32_to_int32( float32 STATUS_PARAM );
-int float32_to_int32_round_to_zero( float32 STATUS_PARAM );
-unsigned int float32_to_uint32( float32 STATUS_PARAM );
-unsigned int float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
-int64_t float32_to_int64( float32 STATUS_PARAM );
-int64_t float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+int16 float32_to_int16_round_to_zero( float32 STATUS_PARAM );
+unsigned int float32_to_uint16_round_to_zero( float32 STATUS_PARAM );
+int32 float32_to_int32( float32 STATUS_PARAM );
+int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+uint32 float32_to_uint32( float32 STATUS_PARAM );
+uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64 float32_to_int64( float32 STATUS_PARAM );
+int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
 float64 float32_to_float64( float32 STATUS_PARAM );
 #ifdef FLOATX80
 floatx80 float32_to_floatx80( float32 STATUS_PARAM );
@@ -280,22 +324,33 @@
 int float32_eq( float32, float32 STATUS_PARAM );
 int float32_le( float32, float32 STATUS_PARAM );
 int float32_lt( float32, float32 STATUS_PARAM );
-int float32_eq_signaling( float32, float32 STATUS_PARAM );
+int float32_unordered( float32, float32 STATUS_PARAM );
+int float32_eq_quiet( float32, float32 STATUS_PARAM );
 int float32_le_quiet( float32, float32 STATUS_PARAM );
 int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_unordered_quiet( float32, float32 STATUS_PARAM );
 int float32_compare( float32, float32 STATUS_PARAM );
 int float32_compare_quiet( float32, float32 STATUS_PARAM );
-int float32_is_nan( float32 );
+float32 float32_min(float32, float32 STATUS_PARAM);
+float32 float32_max(float32, float32 STATUS_PARAM);
+int float32_is_quiet_nan( float32 );
 int float32_is_signaling_nan( float32 );
+float32 float32_maybe_silence_nan( float32 );
 float32 float32_scalbn( float32, int STATUS_PARAM );
 
 INLINE float32 float32_abs(float32 a)
 {
+    /* Note that abs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float32(float32_val(a) & 0x7fffffff);
 }
 
 INLINE float32 float32_chs(float32 a)
 {
+    /* Note that chs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float32(float32_val(a) ^ 0x80000000);
 }
 
@@ -314,21 +369,55 @@
     return (float32_val(a) & 0x7fffffff) == 0;
 }
 
+INLINE int float32_is_any_nan(float32 a)
+{
+    return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
+}
+
+INLINE int float32_is_zero_or_denormal(float32 a)
+{
+    return (float32_val(a) & 0x7f800000) == 0;
+}
+
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+    return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
 #define float32_zero make_float32(0)
 #define float32_one make_float32(0x3f800000)
 #define float32_ln2 make_float32(0x3f317218)
+#define float32_pi make_float32(0x40490fdb)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
+
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float32_default_nan make_float32(0x7FFFFFFF)
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float32_default_nan make_float32(0x7FC00000)
+#elif SNAN_BIT_IS_ONE
+#define float32_default_nan make_float32(0x7FBFFFFF)
+#else
+#define float32_default_nan make_float32(0xFFC00000)
+#endif
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE double-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int float64_to_int32( float64 STATUS_PARAM );
-int float64_to_int32_round_to_zero( float64 STATUS_PARAM );
-unsigned int float64_to_uint32( float64 STATUS_PARAM );
-unsigned int float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
-int64_t float64_to_int64( float64 STATUS_PARAM );
-int64_t float64_to_int64_round_to_zero( float64 STATUS_PARAM );
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM);
-uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+int16 float64_to_int16_round_to_zero( float64 STATUS_PARAM );
+unsigned int float64_to_uint16_round_to_zero( float64 STATUS_PARAM );
+int32 float64_to_int32( float64 STATUS_PARAM );
+int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+uint32 float64_to_uint32( float64 STATUS_PARAM );
+uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64 float64_to_int64( float64 STATUS_PARAM );
+int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64 float64_to_uint64 (float64 a STATUS_PARAM);
+uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
 float32 float64_to_float32( float64 STATUS_PARAM );
 #ifdef FLOATX80
 floatx80 float64_to_floatx80( float64 STATUS_PARAM );
@@ -352,22 +441,33 @@
 int float64_eq( float64, float64 STATUS_PARAM );
 int float64_le( float64, float64 STATUS_PARAM );
 int float64_lt( float64, float64 STATUS_PARAM );
-int float64_eq_signaling( float64, float64 STATUS_PARAM );
+int float64_unordered( float64, float64 STATUS_PARAM );
+int float64_eq_quiet( float64, float64 STATUS_PARAM );
 int float64_le_quiet( float64, float64 STATUS_PARAM );
 int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_unordered_quiet( float64, float64 STATUS_PARAM );
 int float64_compare( float64, float64 STATUS_PARAM );
 int float64_compare_quiet( float64, float64 STATUS_PARAM );
-int float64_is_nan( float64 a );
+float64 float64_min(float64, float64 STATUS_PARAM);
+float64 float64_max(float64, float64 STATUS_PARAM);
+int float64_is_quiet_nan( float64 a );
 int float64_is_signaling_nan( float64 );
+float64 float64_maybe_silence_nan( float64 );
 float64 float64_scalbn( float64, int STATUS_PARAM );
 
 INLINE float64 float64_abs(float64 a)
 {
+    /* Note that abs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
 }
 
 INLINE float64 float64_chs(float64 a)
 {
+    /* Note that chs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
     return make_float64(float64_val(a) ^ 0x8000000000000000LL);
 }
 
@@ -386,19 +486,46 @@
     return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
 }
 
+INLINE int float64_is_any_nan(float64 a)
+{
+    return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
+}
+
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+    return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+                        | ((int64_t)sign << 63));
+}
+
 #define float64_zero make_float64(0)
 #define float64_one make_float64(0x3ff0000000000000LL)
 #define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_pi make_float64(0x400921fb54442d18LL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
+#elif SNAN_BIT_IS_ONE
+#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
+#else
+#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
+#endif
 
 #ifdef FLOATX80
 
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int floatx80_to_int32( floatx80 STATUS_PARAM );
-int floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
-int64_t floatx80_to_int64( floatx80 STATUS_PARAM );
-int64_t floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
 float32 floatx80_to_float32( floatx80 STATUS_PARAM );
 float64 floatx80_to_float64( floatx80 STATUS_PARAM );
 #ifdef FLOAT128
@@ -418,11 +545,16 @@
 int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
 int floatx80_le( floatx80, floatx80 STATUS_PARAM );
 int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
-int floatx80_eq_signaling( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
 int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
 int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
-int floatx80_is_nan( floatx80 );
+int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_quiet_nan( floatx80 );
 int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_maybe_silence_nan( floatx80 );
 floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
 
 INLINE floatx80 floatx80_abs(floatx80 a)
@@ -439,7 +571,7 @@
 
 INLINE int floatx80_is_infinity(floatx80 a)
 {
-    return (a.high & 0x7fff) == 0x7fff && a.low == 0;
+    return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
 }
 
 INLINE int floatx80_is_neg(floatx80 a)
@@ -452,6 +584,31 @@
     return (a.high & 0x7fff) == 0 && a.low == 0;
 }
 
+INLINE int floatx80_is_any_nan(floatx80 a)
+{
+    return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
+}
+
+#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
+#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
+#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
+#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
+#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define floatx80_default_nan_high 0x7FFF
+#define floatx80_default_nan_low  LIT64( 0xBFFFFFFFFFFFFFFF )
+#else
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
+#endif
+
 #endif
 
 #ifdef FLOAT128
@@ -459,10 +616,10 @@
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE quadruple-precision conversion routines.
 *----------------------------------------------------------------------------*/
-int float128_to_int32( float128 STATUS_PARAM );
-int float128_to_int32_round_to_zero( float128 STATUS_PARAM );
-int64_t float128_to_int64( float128 STATUS_PARAM );
-int64_t float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+int32 float128_to_int32( float128 STATUS_PARAM );
+int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64 float128_to_int64( float128 STATUS_PARAM );
+int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
 float32 float128_to_float32( float128 STATUS_PARAM );
 float64 float128_to_float64( float128 STATUS_PARAM );
 #ifdef FLOATX80
@@ -482,13 +639,16 @@
 int float128_eq( float128, float128 STATUS_PARAM );
 int float128_le( float128, float128 STATUS_PARAM );
 int float128_lt( float128, float128 STATUS_PARAM );
-int float128_eq_signaling( float128, float128 STATUS_PARAM );
+int float128_unordered( float128, float128 STATUS_PARAM );
+int float128_eq_quiet( float128, float128 STATUS_PARAM );
 int float128_le_quiet( float128, float128 STATUS_PARAM );
 int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_unordered_quiet( float128, float128 STATUS_PARAM );
 int float128_compare( float128, float128 STATUS_PARAM );
 int float128_compare_quiet( float128, float128 STATUS_PARAM );
-int float128_is_nan( float128 );
+int float128_is_quiet_nan( float128 );
 int float128_is_signaling_nan( float128 );
+float128 float128_maybe_silence_nan( float128 );
 float128 float128_scalbn( float128, int STATUS_PARAM );
 
 INLINE float128 float128_abs(float128 a)
@@ -518,6 +678,24 @@
     return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
 }
 
+INLINE int float128_is_any_nan(float128 a)
+{
+    return ((a.high >> 48) & 0x7fff) == 0x7fff &&
+        ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+#else
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
+#define float128_default_nan_low  LIT64( 0x0000000000000000 )
+#endif
+
 #endif
 
 #else /* CONFIG_SOFTFLOAT */
diff --git a/gen-icount.h b/gen-icount.h
index 8879da6..5fb3829 100644
--- a/gen-icount.h
+++ b/gen-icount.h
@@ -29,7 +29,7 @@
     if (use_icount) {
         *icount_arg = num_insns;
         gen_set_label(icount_label);
-        tcg_gen_exit_tb((long)tb + 2);
+        tcg_gen_exit_tb((tcg_target_long)tb + 2);
     }
 }
 
diff --git a/hw/android_arm.c b/hw/android_arm.c
index 3b9dc6d..188051b 100644
--- a/hw/android_arm.c
+++ b/hw/android_arm.c
@@ -79,7 +79,7 @@
     env = cpu_init(cpu_model);
     register_savevm( "cpu", 0, ARM_CPU_SAVE_VERSION, cpu_save, cpu_load, env );
 
-    ram_offset = qemu_ram_alloc(ram_size);
+    ram_offset = qemu_ram_alloc(NULL,"android_arm",ram_size);
     cpu_register_physical_memory(0, ram_size, ram_offset | IO_MEM_RAM);
 
     cpu_pic = arm_pic_init_cpu(env);
diff --git a/hw/apic.c b/hw/apic.c
index b059185..7814ce6 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -599,7 +599,7 @@
 {
     int64_t d;
     uint32_t val;
-    d = (qemu_get_clock(vm_clock) - s->initial_count_load_time) >>
+    d = (qemu_get_clock_ns(vm_clock) - s->initial_count_load_time) >>
         s->count_shift;
     if (s->lvt[APIC_LVT_TIMER] & APIC_LVT_TIMER_PERIODIC) {
         /* periodic */
@@ -806,12 +806,12 @@
             int n = index - 0x32;
             s->lvt[n] = val;
             if (n == APIC_LVT_TIMER)
-                apic_timer_update(s, qemu_get_clock(vm_clock));
+                apic_timer_update(s, qemu_get_clock_ns(vm_clock));
         }
         break;
     case 0x38:
         s->initial_count = val;
-        s->initial_count_load_time = qemu_get_clock(vm_clock);
+        s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
         apic_timer_update(s, s->initial_count_load_time);
         break;
     case 0x39:
@@ -956,7 +956,7 @@
         cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
                                      apic_io_memory);
     }
-    s->timer = qemu_new_timer(vm_clock, apic_timer, s);
+    s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
 
     register_savevm("apic", s->idx, 2, apic_save, apic_load, s);
     qemu_register_reset(apic_reset, 0, s);
diff --git a/hw/armv7m.c b/hw/armv7m.c
index 297a3e1..f7636db 100644
--- a/hw/armv7m.c
+++ b/hw/armv7m.c
@@ -105,13 +105,13 @@
     cpu_physical_memory_write(addr, (uint8_t *)&v, 4);
 }
 
-static CPUReadMemoryFunc *bitband_readfn[] = {
+static CPUReadMemoryFunc * const bitband_readfn[] = {
    bitband_readb,
    bitband_readw,
    bitband_readl
 };
 
-static CPUWriteMemoryFunc *bitband_writefn[] = {
+static CPUWriteMemoryFunc * const bitband_writefn[] = {
    bitband_writeb,
    bitband_writew,
    bitband_writel
@@ -192,9 +192,11 @@
 
     /* Flash programming is done via the SCU, so pretend it is ROM.  */
     cpu_register_physical_memory(0, flash_size,
-                                 qemu_ram_alloc(flash_size) | IO_MEM_ROM);
+                                 qemu_ram_alloc(NULL, "armv7m.flash",
+                                                flash_size) | IO_MEM_ROM);
     cpu_register_physical_memory(0x20000000, sram_size,
-                                 qemu_ram_alloc(sram_size) | IO_MEM_RAM);
+                                 qemu_ram_alloc(NULL, "armv7m.sram",
+                                                sram_size) | IO_MEM_RAM);
     armv7m_bitband_init();
 
     nvic = qdev_create(NULL, "armv7m_nvic");
@@ -233,8 +235,8 @@
        space.  This stops qemu complaining about executing code outside RAM
        when returning from an exception.  */
     cpu_register_physical_memory(0xfffff000, 0x1000,
-                                 qemu_ram_alloc(0x1000) | IO_MEM_RAM);
-
+                                 qemu_ram_alloc(NULL, "armv7m.hack", 
+                                                0x1000) | IO_MEM_RAM);
     return pic;
 }
 
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index f789c78..f732011 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -64,7 +64,7 @@
 static void systick_reload(nvic_state *s, int reset)
 {
     if (reset)
-        s->systick.tick = qemu_get_clock(vm_clock);
+        s->systick.tick = qemu_get_clock_ns(vm_clock);
     s->systick.tick += (s->systick.reload + 1) * systick_scale(s);
     qemu_mod_timer(s->systick.timer, s->systick.tick);
 }
@@ -396,7 +396,7 @@
 
     gic_init(&s->gic);
     cpu_register_physical_memory(0xe000e000, 0x1000, s->gic.iomemtype);
-    s->systick.timer = qemu_new_timer(vm_clock, systick_timer_tick, s);
+    s->systick.timer = qemu_new_timer_ns(vm_clock, systick_timer_tick, s);
     register_savevm("armv7m_nvic", -1, 1, nvic_save, nvic_load, s);
 }
 
diff --git a/hw/bt-hci-csr.c b/hw/bt-hci-csr.c
index 982577d..65ffa37 100644
--- a/hw/bt-hci-csr.c
+++ b/hw/bt-hci-csr.c
@@ -88,7 +88,7 @@
     }
 
     if (s->out_len)
-        qemu_mod_timer(s->out_tm, qemu_get_clock(vm_clock) + s->baud_delay);
+        qemu_mod_timer(s->out_tm, qemu_get_clock_ns(vm_clock) + s->baud_delay);
 }
 
 #define csrhci_out_packetz(s, len) memset(csrhci_out_packet(s, len), 0, len)
@@ -446,7 +446,7 @@
     s->hci->evt_recv = csrhci_out_hci_packet_event;
     s->hci->acl_recv = csrhci_out_hci_packet_acl;
 
-    s->out_tm = qemu_new_timer(vm_clock, csrhci_out_tick, s);
+    s->out_tm = qemu_new_timer_ns(vm_clock, csrhci_out_tick, s);
     s->pins = qemu_allocate_irqs(csrhci_pins, s, __csrhci_pins);
     csrhci_reset(s);
 
diff --git a/hw/bt-hci.c b/hw/bt-hci.c
index f1ee92c..41df24c 100644
--- a/hw/bt-hci.c
+++ b/hw/bt-hci.c
@@ -576,7 +576,7 @@
 
 static void bt_hci_mod_timer_1280ms(QEMUTimer *timer, int period)
 {
-    qemu_mod_timer(timer, qemu_get_clock(vm_clock) +
+    qemu_mod_timer(timer, qemu_get_clock_ns(vm_clock) +
                    muldiv64(period << 7, get_ticks_per_sec(), 100));
 }
 
@@ -657,7 +657,7 @@
     if (master) {
         link->acl_mode = acl_active;
         hci->lm.handle[hci->lm.last_handle].acl_mode_timer =
-                qemu_new_timer(vm_clock, bt_hci_mode_tick, link);
+                qemu_new_timer_ns(vm_clock, bt_hci_mode_tick, link);
     }
 }
 
@@ -1084,7 +1084,7 @@
 
     bt_hci_event_status(hci, HCI_SUCCESS);
 
-    qemu_mod_timer(link->acl_mode_timer, qemu_get_clock(vm_clock) +
+    qemu_mod_timer(link->acl_mode_timer, qemu_get_clock_ns(vm_clock) +
                    muldiv64(interval * 625, get_ticks_per_sec(), 1000000));
     bt_hci_lmp_mode_change_master(hci, link->link, mode, interval);
 
@@ -2145,10 +2145,10 @@
 {
     struct bt_hci_s *s = qemu_mallocz(sizeof(struct bt_hci_s));
 
-    s->lm.inquiry_done = qemu_new_timer(vm_clock, bt_hci_inquiry_done, s);
-    s->lm.inquiry_next = qemu_new_timer(vm_clock, bt_hci_inquiry_next, s);
+    s->lm.inquiry_done = qemu_new_timer_ns(vm_clock, bt_hci_inquiry_done, s);
+    s->lm.inquiry_next = qemu_new_timer_ns(vm_clock, bt_hci_inquiry_next, s);
     s->conn_accept_timer =
-            qemu_new_timer(vm_clock, bt_hci_conn_accept_timeout, s);
+            qemu_new_timer_ns(vm_clock, bt_hci_conn_accept_timeout, s);
 
     s->evt_packet = bt_hci_evt_packet;
     s->evt_submit = bt_hci_evt_submit;
diff --git a/hw/goldfish_pipe.c b/hw/goldfish_pipe.c
index 998ba49..fd31a2b 100644
--- a/hw/goldfish_pipe.c
+++ b/hw/goldfish_pipe.c
@@ -655,7 +655,7 @@
 
     ANEW0(pipe);
     pingPongPipe_init0(&pipe->pingpong, hwpipe, svcOpaque);
-    pipe->timer = qemu_new_timer(vm_clock, throttlePipe_timerFunc, pipe);
+    pipe->timer = qemu_new_timer_ns(vm_clock, throttlePipe_timerFunc, pipe);
     /* For now, limit to 500 KB/s in both directions */
     pipe->sendRate = 1e9 / (500*1024*8);
     pipe->recvRate = pipe->sendRate;
diff --git a/hw/goldfish_timer.c b/hw/goldfish_timer.c
index a714b22..567ce2e 100644
--- a/hw/goldfish_timer.c
+++ b/hw/goldfish_timer.c
@@ -34,16 +34,6 @@
     QEMUTimer *timer;
 };
 
-/* Converts nanoseconds into ticks */
-static int64_t ns2tks(int64_t ns) {
-    return muldiv64(ns, get_ticks_per_sec(), 1000000000);
-}
-
-/* Converts ticks into nanoseconds */
-static int64_t tks2ns(int64_t tks) {
-    return muldiv64(tks, 1000000000, get_ticks_per_sec());
-}
-
 #define  GOLDFISH_TIMER_SAVE_VERSION  1
 
 static void  goldfish_timer_save(QEMUFile*  f, void*  opaque)
@@ -53,9 +43,9 @@
     qemu_put_be64(f, s->now_ns);  /* in case the kernel is in the middle of a timer read */
     qemu_put_byte(f, s->armed);
     if (s->armed) {
-        int64_t  now_tks   = qemu_get_clock(vm_clock);
-        int64_t  alarm_tks = ns2tks(s->alarm_low_ns | (int64_t)s->alarm_high_ns << 32);
-        qemu_put_be64(f, alarm_tks - now_tks);
+        int64_t  now_ns   = qemu_get_clock_ns(vm_clock);
+        int64_t  alarm_ns = (s->alarm_low_ns | (int64_t)s->alarm_high_ns << 32);
+        qemu_put_be64(f, alarm_ns - now_ns);
     }
 }
 
@@ -88,7 +78,7 @@
     struct timer_state *s = (struct timer_state *)opaque;
     switch(offset) {
         case TIMER_TIME_LOW:
-            s->now_ns = tks2ns(qemu_get_clock(vm_clock));
+            s->now_ns = qemu_get_clock_ns(vm_clock);
             return s->now_ns;
         case TIMER_TIME_HIGH:
             return s->now_ns >> 32;
@@ -101,16 +91,16 @@
 static void goldfish_timer_write(void *opaque, target_phys_addr_t offset, uint32_t value_ns)
 {
     struct timer_state *s = (struct timer_state *)opaque;
-    int64_t alarm_tks, now_tks;
+    int64_t alarm_ns, now_ns;
     switch(offset) {
         case TIMER_ALARM_LOW:
             s->alarm_low_ns = value_ns;
-            alarm_tks = ns2tks(s->alarm_low_ns | (int64_t)s->alarm_high_ns << 32);
-            now_tks   = qemu_get_clock(vm_clock);
-            if (alarm_tks <= now_tks) {
+            alarm_ns = (s->alarm_low_ns | (int64_t)s->alarm_high_ns << 32);
+            now_ns   = qemu_get_clock_ns(vm_clock);
+            if (alarm_ns <= now_ns) {
                 goldfish_device_set_irq(&s->dev, 0, 1);
             } else {
-                qemu_mod_timer(s->timer, alarm_tks);
+                qemu_mod_timer(s->timer, alarm_ns);
                 s->armed = 1;
             }
             break;
@@ -251,7 +241,7 @@
 {
     timer_state.dev.base = timerbase;
     timer_state.dev.irq = timerirq;
-    timer_state.timer = qemu_new_timer(vm_clock, goldfish_timer_tick, &timer_state);
+    timer_state.timer = qemu_new_timer_ns(vm_clock, goldfish_timer_tick, &timer_state);
     goldfish_device_add(&timer_state.dev, goldfish_timer_readfn, goldfish_timer_writefn, &timer_state);
     register_savevm( "goldfish_timer", 0, GOLDFISH_TIMER_SAVE_VERSION,
                      goldfish_timer_save, goldfish_timer_load, &timer_state);
diff --git a/hw/goldfish_trace.c b/hw/goldfish_trace.c
index 02f9b8d..9eeb2a4 100644
--- a/hw/goldfish_trace.c
+++ b/hw/goldfish_trace.c
@@ -16,7 +16,7 @@
 #include "qemu_file.h"
 #include "goldfish_trace.h"
 #include "sysemu.h"
-#include "trace.h"
+#include "android-trace.h"
 #ifdef CONFIG_MEMCHECK
 #include "memcheck/memcheck.h"
 #include "memcheck/memcheck_util.h"
diff --git a/hw/hw.h b/hw/hw.h
index efcbe1e..d230448 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -37,8 +37,8 @@
  * the new actual bandwidth. It should be new_rate if everything goes ok, and
  * the old rate otherwise
  */
-typedef size_t (QEMUFileSetRateLimit)(void *opaque, size_t new_rate);
-typedef size_t (QEMUFileGetRateLimit)(void *opaque);
+typedef int64_t (QEMUFileSetRateLimit)(void *opaque, int64_t new_rate);
+typedef int64_t (QEMUFileGetRateLimit)(void *opaque);
 
 QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer,
                          QEMUFileGetBufferFunc *get_buffer,
@@ -47,10 +47,11 @@
                          QEMUFileSetRateLimit *set_rate_limit,
 			 QEMUFileGetRateLimit *get_rate_limit);
 QEMUFile *qemu_fopen(const char *filename, const char *mode);
+QEMUFile *qemu_fdopen(int fd, const char *mode);
 QEMUFile *qemu_fopen_socket(int fd);
 QEMUFile *qemu_popen(FILE *popen_file, const char *mode);
 QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
-int qemu_popen_fd(QEMUFile *f);
+int qemu_stdio_fd(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
@@ -66,7 +67,9 @@
 void qemu_put_be16(QEMUFile *f, unsigned int v);
 void qemu_put_be32(QEMUFile *f, unsigned int v);
 void qemu_put_be64(QEMUFile *f, uint64_t v);
+#ifdef CONFIG_ANDROID
 void qemu_put_float(QEMUFile *f, float v);
+#endif
 int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
 int qemu_get_byte(QEMUFile *f);
 
@@ -80,10 +83,12 @@
 unsigned int qemu_get_be16(QEMUFile *f);
 unsigned int qemu_get_be32(QEMUFile *f);
 uint64_t qemu_get_be64(QEMUFile *f);
+#ifdef CONFIG_ANDROID
 float qemu_get_float(QEMUFile *f);
+#endif
 int qemu_file_rate_limit(QEMUFile *f);
-size_t qemu_file_set_rate_limit(QEMUFile *f, size_t new_rate);
-size_t qemu_file_get_rate_limit(QEMUFile *f);
+int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
+int64_t qemu_file_get_rate_limit(QEMUFile *f);
 int qemu_file_has_error(QEMUFile *f);
 void qemu_file_set_error(QEMUFile *f);
 
diff --git a/hw/i8254.c b/hw/i8254.c
index c202c9c..a553ec2 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -66,7 +66,7 @@
     uint64_t d;
     int counter;
 
-    d = muldiv64(qemu_get_clock(vm_clock) - s->count_load_time, PIT_FREQ, get_ticks_per_sec());
+    d = muldiv64(qemu_get_clock_ns(vm_clock) - s->count_load_time, PIT_FREQ, get_ticks_per_sec());
     switch(s->mode) {
     case 0:
     case 1:
@@ -189,7 +189,7 @@
     case 5:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = qemu_get_clock(vm_clock);
+            s->count_load_time = qemu_get_clock_ns(vm_clock);
             pit_irq_timer_update(s, s->count_load_time);
         }
         break;
@@ -197,7 +197,7 @@
     case 3:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = qemu_get_clock(vm_clock);
+            s->count_load_time = qemu_get_clock_ns(vm_clock);
             pit_irq_timer_update(s, s->count_load_time);
         }
         /* XXX: disable/enable counting */
@@ -228,7 +228,7 @@
 {
     if (val == 0)
         val = 0x10000;
-    s->count_load_time = qemu_get_clock(vm_clock);
+    s->count_load_time = qemu_get_clock_ns(vm_clock);
     s->count = val;
     pit_irq_timer_update(s, s->count_load_time);
 }
@@ -262,7 +262,7 @@
                     if (!(val & 0x10) && !s->status_latched) {
                         /* status latch */
                         /* XXX: add BCD and null count */
-                        s->status =  (pit_get_out1(s, qemu_get_clock(vm_clock)) << 7) |
+                        s->status =  (pit_get_out1(s, qemu_get_clock_ns(vm_clock)) << 7) |
                             (s->rw_mode << 4) |
                             (s->mode << 1) |
                             s->bcd;
@@ -492,7 +492,7 @@
 
     s = &pit->channels[0];
     /* the timer 0 is connected to an IRQ */
-    s->irq_timer = qemu_new_timer(vm_clock, pit_irq_timer, s);
+    s->irq_timer = qemu_new_timer_ns(vm_clock, pit_irq_timer, s);
     s->irq = irq;
 
     register_savevm("i8254", base, 1, pit_save, pit_load, pit);
diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index f93a3cb..9cf880d 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -108,8 +108,8 @@
         qemu_del_timer(s->coalesced_timer);
     } else {
         /* divide each RTC interval to 2 - 8 smaller intervals */
-        int c = MIN(s->irq_coalesced, 7) + 1; 
-        int64_t next_clock = qemu_get_clock(vm_clock) +
+        int c = MIN(s->irq_coalesced, 7) + 1;
+        int64_t next_clock = qemu_get_clock_ns(vm_clock) +
 		muldiv64(s->period / c, get_ticks_per_sec(), 32768);
         qemu_mod_timer(s->coalesced_timer, next_clock);
     }
@@ -150,7 +150,7 @@
 #endif
 #endif
     enable_pie = 1;
-	
+
     if (period_code != 0
         && (((s->cmos_data[RTC_REG_B] & REG_B_PIE) && enable_pie)
             || ((s->cmos_data[RTC_REG_B] & REG_B_SQWE) && s->sqw_irq))) {
@@ -237,7 +237,7 @@
             /* UIP bit is read only */
             s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) |
                 (s->cmos_data[RTC_REG_A] & REG_A_UIP);
-            rtc_timer_update(s, qemu_get_clock(vm_clock));
+            rtc_timer_update(s, qemu_get_clock_ns(vm_clock));
             break;
         case RTC_REG_B:
             if (data & REG_B_SET) {
@@ -251,7 +251,7 @@
                 }
             }
             s->cmos_data[RTC_REG_B] = data;
-            rtc_timer_update(s, qemu_get_clock(vm_clock));
+            rtc_timer_update(s, qemu_get_clock_ns(vm_clock));
             break;
         case RTC_REG_C:
         case RTC_REG_D:
@@ -610,18 +610,18 @@
     s->base_year = base_year;
     rtc_set_date_from_host(s);
 
-    s->periodic_timer = qemu_new_timer(vm_clock,
+    s->periodic_timer = qemu_new_timer_ns(vm_clock,
                                        rtc_periodic_timer, s);
 #ifdef TARGET_I386
     if (rtc_td_hack)
-        s->coalesced_timer = qemu_new_timer(vm_clock, rtc_coalesced_timer, s);
+        s->coalesced_timer = qemu_new_timer_ns(vm_clock, rtc_coalesced_timer, s);
 #endif
-    s->second_timer = qemu_new_timer(vm_clock,
+    s->second_timer = qemu_new_timer_ns(vm_clock,
                                      rtc_update_second, s);
-    s->second_timer2 = qemu_new_timer(vm_clock,
+    s->second_timer2 = qemu_new_timer_ns(vm_clock,
                                       rtc_update_second2, s);
 
-    s->next_second_time = qemu_get_clock(vm_clock) + (get_ticks_per_sec() * 99) / 100;
+    s->next_second_time = qemu_get_clock_ns(vm_clock) + (get_ticks_per_sec() * 99) / 100;
     qemu_mod_timer(s->second_timer2, s->next_second_time);
 
     register_ioport_write(base, 2, 1, cmos_ioport_write, s);
@@ -731,14 +731,14 @@
     s->base_year = base_year;
     rtc_set_date_from_host(s);
 
-    s->periodic_timer = qemu_new_timer(vm_clock,
+    s->periodic_timer = qemu_new_timer_ns(vm_clock,
                                        rtc_periodic_timer, s);
-    s->second_timer = qemu_new_timer(vm_clock,
+    s->second_timer = qemu_new_timer_ns(vm_clock,
                                      rtc_update_second, s);
-    s->second_timer2 = qemu_new_timer(vm_clock,
+    s->second_timer2 = qemu_new_timer_ns(vm_clock,
                                       rtc_update_second2, s);
 
-    s->next_second_time = qemu_get_clock(vm_clock) + (get_ticks_per_sec() * 99) / 100;
+    s->next_second_time = qemu_get_clock_ns(vm_clock) + (get_ticks_per_sec() * 99) / 100;
     qemu_mod_timer(s->second_timer2, s->next_second_time);
 
     io_memory = cpu_register_io_memory(rtc_mm_read, rtc_mm_write, s);
diff --git a/hw/msmouse.c b/hw/msmouse.c
index 69356a5..05f893c 100644
--- a/hw/msmouse.c
+++ b/hw/msmouse.c
@@ -64,7 +64,7 @@
     qemu_free (chr);
 }
 
-CharDriverState *qemu_chr_open_msmouse(void)
+CharDriverState *qemu_chr_open_msmouse(QemuOpts *opts)
 {
     CharDriverState *chr;
 
diff --git a/hw/msmouse.h b/hw/msmouse.h
index 947afd9..456cb21 100644
--- a/hw/msmouse.h
+++ b/hw/msmouse.h
@@ -1,2 +1,2 @@
 /* msmouse.c */
-CharDriverState *qemu_chr_open_msmouse(void);
+CharDriverState *qemu_chr_open_msmouse(QemuOpts *opts);
diff --git a/hw/pc.c b/hw/pc.c
index ff7670e..0114ff5 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -550,7 +550,7 @@
     *p++ = 0x1f;		/* pop ds */
     *p++ = 0x58;		/* pop ax */
     *p++ = 0xcb;		/* lret */
-    
+
     /* Actual code */
     *reloc = (p - rom);
 
@@ -910,7 +910,7 @@
         cpu_model = "qemu32";
 #endif
     }
-    
+
     for(i = 0; i < smp_cpus; i++) {
         env = cpu_init(cpu_model);
         if (!env) {
@@ -927,25 +927,23 @@
     vmport_init();
 
     /* allocate RAM */
-    ram_addr = qemu_ram_alloc(0xa0000);
+    ram_addr = qemu_ram_alloc(NULL, "pc.ram",
+                              below_4g_mem_size + above_4g_mem_size);
     cpu_register_physical_memory(0, 0xa0000, ram_addr);
-
-    /* Allocate, even though we won't register, so we don't break the
-     * phys_ram_base + PA assumption. This range includes vga (0xa0000 - 0xc0000),
-     * and some bios areas, which will be registered later
-     */
-    ram_addr = qemu_ram_alloc(0x100000 - 0xa0000);
-    ram_addr = qemu_ram_alloc(below_4g_mem_size - 0x100000);
     cpu_register_physical_memory(0x100000,
                  below_4g_mem_size - 0x100000,
-                 ram_addr);
+                 ram_addr + 0x100000);
+    if (above_4g_mem_size > 0) {
+        cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
+                                     ram_addr + below_4g_mem_size);
+    }
 #else
     /*
      * Allocate a single contiguous RAM so that the goldfish
      * framebuffer can work well especially when the frame buffer is
      * large.
      */
-    ram_addr = qemu_ram_alloc(below_4g_mem_size);
+    ram_addr = qemu_ram_alloc(NULL, "pc.ram", below_4g_mem_size);
     cpu_register_physical_memory(0, below_4g_mem_size, ram_addr);
 #endif
 
@@ -975,7 +973,7 @@
         (bios_size % 65536) != 0) {
         goto bios_error;
     }
-    bios_offset = qemu_ram_alloc(bios_size);
+    bios_offset = qemu_ram_alloc(NULL, "bios.bin", bios_size);
     ret = load_image(filename, qemu_get_ram_ptr(bios_offset));
     if (ret != bios_size) {
     bios_error:
@@ -995,7 +993,7 @@
 
 
 
-    option_rom_offset = qemu_ram_alloc(0x20000);
+    option_rom_offset = qemu_ram_alloc(NULL, "pc.rom", 0x20000);
     oprom_area_size = 0;
     cpu_register_physical_memory(0xc0000, 0x20000, option_rom_offset);
 
diff --git a/hw/usb-ohci.c b/hw/usb-ohci.c
index 7dd8ed3..d34b85d 100644
--- a/hw/usb-ohci.c
+++ b/hw/usb-ohci.c
@@ -626,7 +626,7 @@
 
     starting_frame = OHCI_BM(iso_td.flags, TD_SF);
     frame_count = OHCI_BM(iso_td.flags, TD_FC);
-    relative_frame_number = USUB(ohci->frame_number, starting_frame); 
+    relative_frame_number = USUB(ohci->frame_number, starting_frame);
 
 #ifdef DEBUG_ISOCH
     printf("--- ISO_TD ED head 0x%.8x tailp 0x%.8x\n"
@@ -640,8 +640,8 @@
            iso_td.flags, iso_td.bp, iso_td.next, iso_td.be,
            iso_td.offset[0], iso_td.offset[1], iso_td.offset[2], iso_td.offset[3],
            iso_td.offset[4], iso_td.offset[5], iso_td.offset[6], iso_td.offset[7],
-           ohci->frame_number, starting_frame, 
-           frame_count, relative_frame_number,         
+           ohci->frame_number, starting_frame,
+           frame_count, relative_frame_number,
            OHCI_BM(iso_td.flags, TD_DI), OHCI_BM(iso_td.flags, TD_CC));
 #endif
 
@@ -651,7 +651,7 @@
     } else if (relative_frame_number > frame_count) {
         /* ISO TD expired - retire the TD to the Done Queue and continue with
            the next ISO TD of the same ED */
-        dprintf("usb-ohci: ISO_TD R=%d > FC=%d\n", relative_frame_number, 
+        dprintf("usb-ohci: ISO_TD R=%d > FC=%d\n", relative_frame_number,
                frame_count);
         OHCI_SET_BM(iso_td.flags, TD_CC, OHCI_CC_DATAOVERRUN);
         ed->head &= ~OHCI_DPTR_MASK;
@@ -692,8 +692,8 @@
     start_offset = iso_td.offset[relative_frame_number];
     next_offset = iso_td.offset[relative_frame_number + 1];
 
-    if (!(OHCI_BM(start_offset, TD_PSW_CC) & 0xe) || 
-        ((relative_frame_number < frame_count) && 
+    if (!(OHCI_BM(start_offset, TD_PSW_CC) & 0xe) ||
+        ((relative_frame_number < frame_count) &&
          !(OHCI_BM(next_offset, TD_PSW_CC) & 0xe))) {
         printf("usb-ohci: ISO_TD cc != not accessed 0x%.8x 0x%.8x\n",
                start_offset, next_offset);
@@ -758,7 +758,7 @@
             if (ret != USB_RET_NODEV)
                 break;
         }
-    
+
         if (ret == USB_RET_ASYNC) {
             return 1;
         }
@@ -1095,7 +1095,7 @@
 /* Generate a SOF event, and set a timer for EOF */
 static void ohci_sof(OHCIState *ohci)
 {
-    ohci->sof_time = qemu_get_clock(vm_clock);
+    ohci->sof_time = qemu_get_clock_ns(vm_clock);
     qemu_mod_timer(ohci->eof_timer, ohci->sof_time + usb_frame_time);
     ohci_set_interrupt(ohci, OHCI_INTR_SF);
 }
@@ -1179,12 +1179,12 @@
  */
 static int ohci_bus_start(OHCIState *ohci)
 {
-    ohci->eof_timer = qemu_new_timer(vm_clock,
+    ohci->eof_timer = qemu_new_timer_ns(vm_clock,
                     ohci_frame_boundary,
                     ohci);
 
     if (ohci->eof_timer == NULL) {
-        fprintf(stderr, "usb-ohci: %s: qemu_new_timer failed\n", ohci->name);
+        fprintf(stderr, "usb-ohci: %s: qemu_new_timer_ns failed\n", ohci->name);
         /* TODO: Signal unrecoverable error */
         return 0;
     }
@@ -1304,7 +1304,7 @@
     /* Being in USB operational state guarnatees sof_time was
      * set already.
      */
-    tks = qemu_get_clock(vm_clock) - ohci->sof_time;
+    tks = qemu_get_clock_ns(vm_clock) - ohci->sof_time;
 
     /* avoid muldiv if possible */
     if (tks >= usb_frame_time)
diff --git a/input.c b/input.c
index ec05548..d20347f 100644
--- a/input.c
+++ b/input.c
@@ -28,24 +28,45 @@
 #include "console.h"
 #include "qjson.h"
 
-static QEMUPutKBDEvent *qemu_put_kbd_event;
-static void *qemu_put_kbd_event_opaque;
+#ifdef CONFIG_SKINNING
+QEMUPutMouseEntry *original_qemu_add_mouse_event_handler(QEMUPutMouseEvent *func,
+                                                         void *opaque, int absolute,
+                                                         const char *name);
+#undef qemu_add_mouse_event_handler
+#define qemu_add_mouse_event_handler original_qemu_add_mouse_event_handler
+#endif
+static QTAILQ_HEAD(, QEMUPutKBDEntry) kbd_handlers =
+    QTAILQ_HEAD_INITIALIZER(kbd_handlers);
 static QTAILQ_HEAD(, QEMUPutLEDEntry) led_handlers = QTAILQ_HEAD_INITIALIZER(led_handlers);
 static QTAILQ_HEAD(, QEMUPutMouseEntry) mouse_handlers =
     QTAILQ_HEAD_INITIALIZER(mouse_handlers);
-static NotifierList mouse_mode_notifiers = 
+static NotifierList mouse_mode_notifiers =
     NOTIFIER_LIST_INITIALIZER(mouse_mode_notifiers);
 
 void qemu_add_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque)
 {
-    qemu_put_kbd_event_opaque = opaque;
-    qemu_put_kbd_event = func;
+    QEMUPutKBDEntry *s;
+
+    if (func != NULL) {
+        s = qemu_mallocz(sizeof(QEMUPutKBDEntry));
+
+        s->put_kbd_event = func;
+        s->opaque = opaque;
+
+        QTAILQ_INSERT_TAIL(&kbd_handlers, s, next);
+    }
 }
 
-void qemu_remove_kbd_event_handler(void)
+void qemu_remove_kbd_event_handler(QEMUPutKBDEvent *func, void *opaque)
 {
-    qemu_put_kbd_event_opaque = NULL;
-    qemu_put_kbd_event = NULL;
+    QEMUPutKBDEntry *cursor, *cursor_next;
+    if (func != NULL) {
+        QTAILQ_FOREACH_SAFE(cursor, &kbd_handlers, next, cursor_next) {
+            if (cursor->put_kbd_event == func && cursor->opaque == opaque) {
+                QTAILQ_REMOVE(&kbd_handlers, cursor, next);
+            }
+        }
+    }
 }
 
 static void check_mode_change(void)
@@ -129,8 +150,9 @@
 
 void kbd_put_keycode(int keycode)
 {
-    if (qemu_put_kbd_event) {
-        qemu_put_kbd_event(qemu_put_kbd_event_opaque, keycode);
+    QEMUPutKBDEntry *cursor;
+    QTAILQ_FOREACH(cursor, &kbd_handlers, next) {
+        cursor->put_kbd_event(cursor->opaque, keycode);
     }
 }
 
@@ -148,7 +170,9 @@
     QEMUPutMouseEntry *entry;
     QEMUPutMouseEvent *mouse_event;
     void *mouse_event_opaque;
+#ifndef CONFIG_SKINNING
     int width;
+#endif
 
     if (QTAILQ_EMPTY(&mouse_handlers)) {
         return;
@@ -160,16 +184,20 @@
     mouse_event_opaque = entry->qemu_put_mouse_event_opaque;
 
     if (mouse_event) {
+#ifndef CONFIG_SKINNING
         if (graphic_rotate) {
-            if (entry->qemu_put_mouse_event_absolute)
+            if (entry->qemu_put_mouse_event_absolute) {
                 width = 0x7fff;
-            else
+            } else {
                 width = graphic_width - 1;
-            mouse_event(mouse_event_opaque,
-                        width - dy, dx, dz, buttons_state);
-        } else
-            mouse_event(mouse_event_opaque,
-                        dx, dy, dz, buttons_state);
+            }
+            mouse_event(mouse_event_opaque, width - dy, dx, dz, buttons_state);
+        } else {
+            mouse_event(mouse_event_opaque, dx, dy, dz, buttons_state);
+        }
+#else
+        mouse_event(mouse_event_opaque, dx, dy, dz, buttons_state);
+#endif
     }
 }
 
diff --git a/iohandler.c b/iohandler.c
new file mode 100644
index 0000000..2b82421
--- /dev/null
+++ b/iohandler.c
@@ -0,0 +1,193 @@
+/*
+ * QEMU System Emulator - managing I/O handler
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "qemu-char.h"
+#include "qemu-queue.h"
+
+#ifndef _WIN32
+#include <sys/wait.h>
+#endif
+
+typedef struct IOHandlerRecord {
+    int fd;
+    IOCanReadHandler *fd_read_poll;
+    IOHandler *fd_read;
+    IOHandler *fd_write;
+    int deleted;
+    void *opaque;
+    QLIST_ENTRY(IOHandlerRecord) next;
+} IOHandlerRecord;
+
+static QLIST_HEAD(, IOHandlerRecord) io_handlers =
+    QLIST_HEAD_INITIALIZER(io_handlers);
+
+
+/* XXX: fd_read_poll should be suppressed, but an API change is
+   necessary in the character devices to suppress fd_can_read(). */
+int qemu_set_fd_handler2(int fd,
+                         IOCanReadHandler *fd_read_poll,
+                         IOHandler *fd_read,
+                         IOHandler *fd_write,
+                         void *opaque)
+{
+    IOHandlerRecord *ioh;
+
+    if (!fd_read && !fd_write) {
+        QLIST_FOREACH(ioh, &io_handlers, next) {
+            if (ioh->fd == fd) {
+                ioh->deleted = 1;
+                break;
+            }
+        }
+    } else {
+        QLIST_FOREACH(ioh, &io_handlers, next) {
+            if (ioh->fd == fd)
+                goto found;
+        }
+        ioh = qemu_mallocz(sizeof(IOHandlerRecord));
+        QLIST_INSERT_HEAD(&io_handlers, ioh, next);
+    found:
+        ioh->fd = fd;
+        ioh->fd_read_poll = fd_read_poll;
+        ioh->fd_read = fd_read;
+        ioh->fd_write = fd_write;
+        ioh->opaque = opaque;
+        ioh->deleted = 0;
+    }
+    return 0;
+}
+
+int qemu_set_fd_handler(int fd,
+                        IOHandler *fd_read,
+                        IOHandler *fd_write,
+                        void *opaque)
+{
+    return qemu_set_fd_handler2(fd, NULL, fd_read, fd_write, opaque);
+}
+
+void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds)
+{
+    IOHandlerRecord *ioh;
+
+    QLIST_FOREACH(ioh, &io_handlers, next) {
+        if (ioh->deleted)
+            continue;
+        if (ioh->fd_read &&
+            (!ioh->fd_read_poll ||
+             ioh->fd_read_poll(ioh->opaque) != 0)) {
+            FD_SET(ioh->fd, readfds);
+            if (ioh->fd > *pnfds)
+                *pnfds = ioh->fd;
+        }
+        if (ioh->fd_write) {
+            FD_SET(ioh->fd, writefds);
+            if (ioh->fd > *pnfds)
+                *pnfds = ioh->fd;
+        }
+    }
+}
+
+void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int ret)
+{
+    if (ret > 0) {
+        IOHandlerRecord *pioh, *ioh;
+
+        QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) {
+            if (!ioh->deleted && ioh->fd_read && FD_ISSET(ioh->fd, readfds)) {
+                ioh->fd_read(ioh->opaque);
+            }
+            if (!ioh->deleted && ioh->fd_write && FD_ISSET(ioh->fd, writefds)) {
+                ioh->fd_write(ioh->opaque);
+            }
+
+            /* Do this last in case read/write handlers marked it for deletion */
+            if (ioh->deleted) {
+                QLIST_REMOVE(ioh, next);
+                qemu_free(ioh);
+            }
+        }
+    }
+}
+
+/* reaping of zombies.  right now we're not passing the status to
+   anyone, but it would be possible to add a callback.  */
+#ifndef _WIN32
+typedef struct ChildProcessRecord {
+    int pid;
+    QLIST_ENTRY(ChildProcessRecord) next;
+} ChildProcessRecord;
+
+static QLIST_HEAD(, ChildProcessRecord) child_watches =
+    QLIST_HEAD_INITIALIZER(child_watches);
+
+static QEMUBH *sigchld_bh;
+
+static void sigchld_handler(int signal)
+{
+    qemu_bh_schedule(sigchld_bh);
+}
+
+static void sigchld_bh_handler(void *opaque)
+{
+    ChildProcessRecord *rec, *next;
+
+    QLIST_FOREACH_SAFE(rec, &child_watches, next, next) {
+        if (waitpid(rec->pid, NULL, WNOHANG) == rec->pid) {
+            QLIST_REMOVE(rec, next);
+            qemu_free(rec);
+        }
+    }
+}
+
+static void qemu_init_child_watch(void)
+{
+    struct sigaction act;
+    sigchld_bh = qemu_bh_new(sigchld_bh_handler, NULL);
+
+    act.sa_handler = sigchld_handler;
+    act.sa_flags = SA_NOCLDSTOP;
+    sigaction(SIGCHLD, &act, NULL);
+}
+
+int qemu_add_child_watch(pid_t pid)
+{
+    ChildProcessRecord *rec;
+
+    if (!sigchld_bh) {
+        qemu_init_child_watch();
+    }
+
+    QLIST_FOREACH(rec, &child_watches, next) {
+        if (rec->pid == pid) {
+            return 1;
+        }
+    }
+    rec = qemu_mallocz(sizeof(ChildProcessRecord));
+    rec->pid = pid;
+    QLIST_INSERT_HEAD(&child_watches, rec, next);
+    return 0;
+}
+#endif
diff --git a/json-lexer.c b/json-lexer.c
index 534fcf7..65c9720 100644
--- a/json-lexer.c
+++ b/json-lexer.c
@@ -27,12 +27,8 @@
  *
  */
 
-/* Building with mingw results in an error because ERROR is defined as a
- * macro in this environment. Undefined it */
-#undef ERROR
-
 enum json_lexer_state {
-    ERROR = 0,
+    IN_ERROR = 0,
     IN_DQ_UCODE3,
     IN_DQ_UCODE2,
     IN_DQ_UCODE1,
@@ -154,7 +150,7 @@
     /* Zero */
     [IN_ZERO] = {
         TERMINAL(JSON_INTEGER),
-        ['0' ... '9'] = ERROR,
+        ['0' ... '9'] = IN_ERROR,
         ['.'] = IN_MANTISSA,
     },
 
@@ -306,7 +302,7 @@
             lexer->token = qstring_new();
             new_state = IN_START;
             break;
-        case ERROR:
+        case IN_ERROR:
             return -EINVAL;
         default:
             break;
diff --git a/json-parser.c b/json-parser.c
index 70b9b6f..6c06ef9 100644
--- a/json-parser.c
+++ b/json-parser.c
@@ -91,7 +91,8 @@
 /**
  * Error handler
  */
-static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
+static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
+                                           QObject *token, const char *msg, ...)
 {
     va_list ap;
     va_start(ap, msg);
diff --git a/migration-exec.c b/migration-exec.c
index 0dd5aff..ab1dea2 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -20,14 +20,16 @@
 #include "sysemu.h"
 #include "buffered_file.h"
 #include "block.h"
+#include <sys/types.h>
+#include <sys/wait.h>
 
 //#define DEBUG_MIGRATION_EXEC
 
 #ifdef DEBUG_MIGRATION_EXEC
-#define dprintf(fmt, ...) \
+#define DPRINTF(fmt, ...) \
     do { printf("migration-exec: " fmt, ## __VA_ARGS__); } while (0)
 #else
-#define dprintf(fmt, ...) \
+#define DPRINTF(fmt, ...) \
     do { } while (0)
 #endif
 
@@ -43,13 +45,21 @@
 
 static int exec_close(FdMigrationState *s)
 {
-    dprintf("exec_close\n");
+    int ret = 0;
+    DPRINTF("exec_close\n");
     if (s->opaque) {
-        qemu_fclose(s->opaque);
+        ret = qemu_fclose(s->opaque);
         s->opaque = NULL;
         s->fd = -1;
+        if (ret != -1 &&
+            WIFEXITED(ret)
+            && WEXITSTATUS(ret) == 0) {
+            ret = 0;
+        } else {
+            ret = -1;
     }
-    return 0;
+    }
+    return ret;
 }
 
 MigrationState *exec_start_outgoing_migration(const char *command,
@@ -63,18 +73,18 @@
 
     f = popen(command, "w");
     if (f == NULL) {
-        dprintf("Unable to popen exec target\n");
+        DPRINTF("Unable to popen exec target\n");
         goto err_after_alloc;
     }
 
     s->fd = fileno(f);
     if (s->fd == -1) {
-        dprintf("Unable to retrieve file descriptor for popen'd handle\n");
+        DPRINTF("Unable to retrieve file descriptor for popen'd handle\n");
         goto err_after_open;
     }
 
     if (fcntl(s->fd, F_SETFD, O_NONBLOCK) == -1) {
-        dprintf("Unable to set nonblocking mode on file descriptor\n");
+        DPRINTF("Unable to set nonblocking mode on file descriptor\n");
         goto err_after_open;
     }
 
@@ -116,9 +126,9 @@
         goto err;
     }
     qemu_announce_self();
-    dprintf("successfully loaded vm state\n");
+    DPRINTF("successfully loaded vm state\n");
     /* we've successfully migrated, close the fd */
-    qemu_set_fd_handler2(qemu_popen_fd(f), NULL, NULL, NULL, NULL);
+    qemu_set_fd_handler2(qemu_stdio_fd(f), NULL, NULL, NULL, NULL);
     vm_start();
 
 err:
@@ -129,14 +139,14 @@
 {
     QEMUFile *f;
 
-    dprintf("Attempting to start an incoming migration\n");
+    DPRINTF("Attempting to start an incoming migration\n");
     f = qemu_popen_cmd(command, "r");
     if(f == NULL) {
-        dprintf("Unable to apply qemu wrapper to popen file\n");
+        DPRINTF("Unable to apply qemu wrapper to popen file\n");
         return -errno;
     }
 
-    qemu_set_fd_handler2(qemu_popen_fd(f), NULL,
+    qemu_set_fd_handler2(qemu_stdio_fd(f), NULL,
 			 exec_accept_incoming_migration, NULL,
 			 (void *)(unsigned long)f);
 
diff --git a/migration.h b/migration.h
index 37c7f8e..9d145b0 100644
--- a/migration.h
+++ b/migration.h
@@ -102,4 +102,11 @@
     return container_of(mig_state, FdMigrationState, mig_state);
 }
 
+uint64_t ram_bytes_remaining(void);
+uint64_t ram_bytes_transferred(void);
+uint64_t ram_bytes_total(void);
+
+int ram_save_live(QEMUFile *f, int stage, void *opaque);
+int ram_load(QEMUFile *f, void *opaque, int version_id);
+
 #endif
diff --git a/monitor-android.h b/monitor-android.h
new file mode 100644
index 0000000..b9b0b37
--- /dev/null
+++ b/monitor-android.h
@@ -0,0 +1,49 @@
+/* This file is included from monitor.c, it's purpose is to hold as much
+ * Android-specific stuff as possible to ease upstream integrations.
+ */
+
+Monitor*
+monitor_fake_new(void* opaque, MonitorFakeFunc cb)
+{
+    Monitor* mon;
+
+    assert(cb != NULL);
+    mon = qemu_mallocz(sizeof(*mon));
+    mon->fake_opaque = opaque;
+    mon->fake_func   = cb;
+    mon->fake_count  = 0;
+
+    return mon;
+}
+
+int
+monitor_fake_get_bytes(Monitor* mon)
+{
+    assert(mon->fake_func != NULL);
+    return mon->fake_count;
+}
+
+void
+monitor_fake_free(Monitor* mon)
+{
+    assert(mon->fake_func != NULL);
+    free(mon);
+}
+
+/* This replaces the definition in monitor.c which is in a
+ * #ifndef CONFIG_ANDROID .. #endif block.
+ */
+void monitor_flush(Monitor *mon)
+{
+    if (!mon)
+        return;
+
+    if (mon->fake_func != NULL) {
+        mon->fake_func(mon->fake_opaque, (void*)mon->outbuf, mon->outbuf_index);
+        mon->outbuf_index = 0;
+        mon->fake_count += mon->outbuf_index;
+    } else if (!mon->mux_out) {
+        qemu_chr_write(mon->chr, mon->outbuf, mon->outbuf_index);
+        mon->outbuf_index = 0;
+    }
+}
diff --git a/monitor.c b/monitor.c
index 2d0a071..95e81aa 100644
--- a/monitor.c
+++ b/monitor.c
@@ -86,8 +86,18 @@
     void *password_opaque;
     QLIST_ENTRY(Monitor) entry;
     int has_quit;
+#ifdef CONFIG_ANDROID
+    void*            fake_opaque;
+    MonitorFakeFunc  fake_func;
+    int64_t          fake_count;
+
+#endif
 };
 
+#ifdef CONFIG_ANDROID
+#include "monitor-android.h"
+#endif
+
 static QLIST_HEAD(mon_list, Monitor) mon_list;
 
 #if defined(TARGET_I386)
@@ -158,6 +168,7 @@
     }
 }
 
+#ifndef CONFIG_ANDROID /* See monitor-android.h */
 void monitor_flush(Monitor *mon)
 {
     if (mon && mon->outbuf_index != 0 && !mon->mux_out) {
@@ -165,6 +176,7 @@
         mon->outbuf_index = 0;
     }
 }
+#endif
 
 /* flush at every end of line or if the buffer is full */
 static void monitor_puts(Monitor *mon, const char *str)
@@ -187,13 +199,11 @@
     }
 }
 
-int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
 {
     char buf[4096];
-    int ret = vsnprintf(buf, sizeof(buf), fmt, ap);
+    vsnprintf(buf, sizeof(buf), fmt, ap);
     monitor_puts(mon, buf);
-
-    return ret;
 }
 
 void monitor_printf(Monitor *mon, const char *fmt, ...)
@@ -231,7 +241,8 @@
     }
 }
 
-static int monitor_fprintf(FILE *stream, const char *fmt, ...)
+static int GCC_FMT_ATTR(2, 3) monitor_fprintf(FILE *stream,
+                                              const char *fmt, ...)
 {
     va_list ap;
     va_start(ap, fmt);
@@ -1120,8 +1131,7 @@
         kbd_put_keycode(keycode & 0x7f);
     }
     /* delayed key up events */
-    qemu_mod_timer(key_timer, qemu_get_clock(vm_clock) +
-                   muldiv64(get_ticks_per_sec(), hold_time, 1000));
+    qemu_mod_timer(key_timer, qemu_get_clock_ms(vm_clock) + hold_time);
 }
 
 static int mouse_button_state;
@@ -3072,7 +3082,7 @@
     Monitor *mon;
 
     if (is_first_init) {
-        key_timer = qemu_new_timer(vm_clock, release_keys, NULL);
+        key_timer = qemu_new_timer_ms(vm_clock, release_keys, NULL);
         is_first_init = 0;
     }
 
diff --git a/monitor.h b/monitor.h
index c94a3c8..6f2a3e2 100644
--- a/monitor.h
+++ b/monitor.h
@@ -14,7 +14,8 @@
 #define MONITOR_IS_DEFAULT    0x01
 #define MONITOR_USE_READLINE  0x02
 #define MONITOR_USE_CONTROL   0x04
-#define MONITOR_QUIT_DOESNT_EXIT  0x08  /* prevent 'quit' from exiting the emulator */
+#define MONITOR_USE_PRETTY    0x08
+#define MONITOR_QUIT_DOESNT_EXIT  0x80  /* prevent 'quit' from exiting the emulator */
 
 /* flags for monitor commands */
 #define MONITOR_CMD_ASYNC       0x0001
@@ -33,6 +34,9 @@
     QEVENT_BLOCK_IO_ERROR,
     QEVENT_RTC_CHANGE,
     QEVENT_WATCHDOG,
+    QEVENT_SPICE_CONNECTED,
+    QEVENT_SPICE_INITIALIZED,
+    QEVENT_SPICE_DISCONNECTED,
     QEVENT_MAX,
 } MonitorEvent;
 
@@ -50,9 +54,9 @@
 
 int monitor_get_fd(Monitor *mon, const char *fdname);
 
-int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap);
-void monitor_printf(Monitor *mon, const char *fmt, ...)
-    __attribute__ ((__format__ (__printf__, 2, 3)));
+void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+    GCC_FMT_ATTR(2, 0);
+void monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 void monitor_print_filename(Monitor *mon, const char *filename);
 void monitor_flush(Monitor *mon);
 
@@ -60,4 +64,17 @@
 
 void monitor_set_error(Monitor *mon, QError *qerror);
 
+#ifdef CONFIG_ANDROID
+typedef int (*MonitorFakeFunc)(void* opaque, const char* str, int strsize);
+
+/* Create a new fake Monitor object to send all output to an internal
+ * buffer. This is used to send snapshot save/load errors (produced in
+ * savevm.c) to the Android console when 'avd snapshot save' or
+ * 'avd snapshot load' are used.
+ */
+Monitor* monitor_fake_new(void* opaque, MonitorFakeFunc cb);
+int      monitor_fake_get_bytes(Monitor* mon);
+void     monitor_fake_free(Monitor* mon);
+#endif
+
 #endif /* !MONITOR_H */
diff --git a/os-posix.c b/os-posix.c
new file mode 100644
index 0000000..324bf13
--- /dev/null
+++ b/os-posix.c
@@ -0,0 +1,398 @@
+/*
+ * os-posix.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+/*needed for MAP_POPULATE before including qemu-options.h */
+#include <sys/mman.h>
+#include <pwd.h>
+#include <libgen.h>
+
+/* Needed early for CONFIG_BSD etc. */
+#include "config-host.h"
+#include "sysemu.h"
+#include "net.h"
+#include "qemu-options.h"
+
+#ifdef CONFIG_LINUX
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#endif
+
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
+static struct passwd *user_pwd;
+static const char *chroot_dir;
+static int daemonize;
+static int fds[2];
+
+void os_setup_early_signal_handling(void)
+{
+    struct sigaction act;
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+    act.sa_handler = SIG_IGN;
+    sigaction(SIGPIPE, &act, NULL);
+}
+
+static void termsig_handler(int signal)
+{
+    qemu_system_shutdown_request();
+}
+
+static void sigchld_handler(int signal)
+{
+    waitpid(-1, NULL, WNOHANG);
+}
+
+void os_setup_signal_handling(void)
+{
+    struct sigaction act;
+
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = termsig_handler;
+    sigaction(SIGINT,  &act, NULL);
+    sigaction(SIGHUP,  &act, NULL);
+    sigaction(SIGTERM, &act, NULL);
+
+    act.sa_handler = sigchld_handler;
+    act.sa_flags = SA_NOCLDSTOP;
+    sigaction(SIGCHLD, &act, NULL);
+}
+
+/* Find a likely location for support files using the location of the binary.
+   For installed binaries this will be "$bindir/../share/qemu".  When
+   running from the build tree this will be "$bindir/../pc-bios".  */
+#define SHARE_SUFFIX "/share/qemu"
+#define BUILD_SUFFIX "/pc-bios"
+char *os_find_datadir(const char *argv0)
+{
+    char *dir;
+    char *p = NULL;
+    char *res;
+    char buf[PATH_MAX];
+    size_t max_len;
+
+#if defined(__linux__)
+    {
+        int len;
+        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+        if (len > 0) {
+            buf[len] = 0;
+            p = buf;
+        }
+    }
+#elif defined(__FreeBSD__)
+    {
+        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+        size_t len = sizeof(buf) - 1;
+
+        *buf = '\0';
+        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
+            *buf) {
+            buf[sizeof(buf) - 1] = '\0';
+            p = buf;
+        }
+    }
+#endif
+    /* If we don't have any way of figuring out the actual executable
+       location then try argv[0].  */
+    if (!p) {
+        p = realpath(argv0, buf);
+        if (!p) {
+            return NULL;
+        }
+    }
+    dir = dirname(p);
+    dir = dirname(dir);
+
+    max_len = strlen(dir) +
+        MAX(strlen(SHARE_SUFFIX), strlen(BUILD_SUFFIX)) + 1;
+    res = qemu_mallocz(max_len);
+    snprintf(res, max_len, "%s%s", dir, SHARE_SUFFIX);
+    if (access(res, R_OK)) {
+        snprintf(res, max_len, "%s%s", dir, BUILD_SUFFIX);
+        if (access(res, R_OK)) {
+            qemu_free(res);
+            res = NULL;
+        }
+    }
+
+    return res;
+}
+#undef SHARE_SUFFIX
+#undef BUILD_SUFFIX
+
+void os_set_proc_name(const char *s)
+{
+#if defined(PR_SET_NAME)
+    char name[16];
+    if (!s)
+        return;
+    name[sizeof(name) - 1] = 0;
+    strncpy(name, s, sizeof(name));
+    /* Could rewrite argv[0] too, but that's a bit more complicated.
+       This simple way is enough for `top'. */
+    if (prctl(PR_SET_NAME, name)) {
+        perror("unable to change process name");
+        exit(1);
+    }
+#else
+    fprintf(stderr, "Change of process name not supported by your OS\n");
+    exit(1);
+#endif
+}
+
+/*
+ * Parse OS specific command line options.
+ * return 0 if option handled, -1 otherwise
+ */
+void os_parse_cmd_args(int index, const char *optarg)
+{
+    switch (index) {
+#ifdef CONFIG_SLIRP
+    case QEMU_OPTION_smb:
+#if 1
+        net_slirp_smb(optarg);
+#else
+        if (net_slirp_smb(optarg) < 0)
+            exit(1);
+#endif
+        break;
+#endif
+    case QEMU_OPTION_runas:
+        user_pwd = getpwnam(optarg);
+        if (!user_pwd) {
+            fprintf(stderr, "User \"%s\" doesn't exist\n", optarg);
+            exit(1);
+        }
+        break;
+    case QEMU_OPTION_chroot:
+        chroot_dir = optarg;
+        break;
+    case QEMU_OPTION_daemonize:
+        daemonize = 1;
+        break;
+    }
+    return;
+}
+
+static void change_process_uid(void)
+{
+    if (user_pwd) {
+        if (setgid(user_pwd->pw_gid) < 0) {
+            fprintf(stderr, "Failed to setgid(%d)\n", user_pwd->pw_gid);
+            exit(1);
+        }
+        if (setuid(user_pwd->pw_uid) < 0) {
+            fprintf(stderr, "Failed to setuid(%d)\n", user_pwd->pw_uid);
+            exit(1);
+        }
+        if (setuid(0) != -1) {
+            fprintf(stderr, "Dropping privileges failed\n");
+            exit(1);
+        }
+    }
+}
+
+static void change_root(void)
+{
+    if (chroot_dir) {
+        if (chroot(chroot_dir) < 0) {
+            fprintf(stderr, "chroot failed\n");
+            exit(1);
+        }
+        if (chdir("/")) {
+            perror("not able to chdir to /");
+            exit(1);
+        }
+    }
+
+}
+
+void os_daemonize(void)
+{
+    if (daemonize) {
+	pid_t pid;
+
+	if (pipe(fds) == -1)
+	    exit(1);
+
+	pid = fork();
+	if (pid > 0) {
+	    uint8_t status;
+	    ssize_t len;
+
+	    close(fds[1]);
+
+	again:
+            len = read(fds[0], &status, 1);
+            if (len == -1 && (errno == EINTR))
+                goto again;
+
+            if (len != 1)
+                exit(1);
+            else if (status == 1) {
+                fprintf(stderr, "Could not acquire pidfile: %s\n", strerror(errno));
+                exit(1);
+            } else
+                exit(0);
+	} else if (pid < 0)
+            exit(1);
+
+	close(fds[0]);
+	qemu_set_cloexec(fds[1]);
+
+	setsid();
+
+	pid = fork();
+	if (pid > 0)
+	    exit(0);
+	else if (pid < 0)
+	    exit(1);
+
+	umask(027);
+
+        signal(SIGTSTP, SIG_IGN);
+        signal(SIGTTOU, SIG_IGN);
+        signal(SIGTTIN, SIG_IGN);
+    }
+}
+
+void os_setup_post(void)
+{
+    int fd = 0;
+
+    if (daemonize) {
+	uint8_t status = 0;
+	ssize_t len;
+
+    again1:
+	len = write(fds[1], &status, 1);
+	if (len == -1 && (errno == EINTR))
+	    goto again1;
+
+	if (len != 1)
+	    exit(1);
+
+        if (chdir("/")) {
+            perror("not able to chdir to /");
+            exit(1);
+        }
+	TFR(fd = qemu_open("/dev/null", O_RDWR));
+	if (fd == -1)
+	    exit(1);
+    }
+
+    change_root();
+    change_process_uid();
+
+    if (daemonize) {
+        dup2(fd, 0);
+        dup2(fd, 1);
+        dup2(fd, 2);
+
+        close(fd);
+    }
+}
+
+void os_pidfile_error(void)
+{
+    if (daemonize) {
+        uint8_t status = 1;
+        if (write(fds[1], &status, 1) != 1) {
+            perror("daemonize. Writing to pipe\n");
+        }
+    } else
+        fprintf(stderr, "Could not acquire pid file: %s\n", strerror(errno));
+}
+
+void os_set_line_buffering(void)
+{
+    setvbuf(stdout, NULL, _IOLBF, 0);
+}
+
+/*
+ * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
+ */
+int qemu_eventfd(int fds[2])
+{
+#ifdef CONFIG_EVENTFD
+    int ret;
+
+    ret = eventfd(0, 0);
+    if (ret >= 0) {
+        fds[0] = ret;
+        qemu_set_cloexec(ret);
+        if ((fds[1] = dup(ret)) == -1) {
+            close(ret);
+            return -1;
+        }
+        qemu_set_cloexec(fds[1]);
+        return 0;
+    }
+
+    if (errno != ENOSYS) {
+        return -1;
+    }
+#endif
+
+    return qemu_pipe(fds);
+}
+
+int qemu_create_pidfile(const char *filename)
+{
+    char buffer[128];
+    int len;
+    int fd;
+
+    fd = qemu_open(filename, O_RDWR | O_CREAT, 0600);
+    if (fd == -1) {
+        return -1;
+    }
+    if (lockf(fd, F_TLOCK, 0) == -1) {
+        return -1;
+    }
+    len = snprintf(buffer, sizeof(buffer), "%ld\n", (long)getpid());
+    if (write(fd, buffer, len) != len) {
+        return -1;
+    }
+
+    return 0;
+}
+
+int qemu_get_thread_id(void)
+{
+#if defined (__linux__)
+    return syscall(SYS_gettid);
+#else
+    return getpid();
+#endif
+}
diff --git a/os-win32.c b/os-win32.c
new file mode 100644
index 0000000..d6d54c6
--- /dev/null
+++ b/os-win32.c
@@ -0,0 +1,273 @@
+/*
+ * os-win32.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <windows.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#include <sys/time.h>
+#include "config-host.h"
+#include "sysemu.h"
+#include "qemu-options.h"
+
+/***********************************************************/
+/* Functions missing in mingw */
+
+int setenv(const char *name, const char *value, int overwrite)
+{
+    int result = 0;
+    if (overwrite || !getenv(name)) {
+        size_t length = strlen(name) + strlen(value) + 2;
+        char *string = qemu_malloc(length);
+        snprintf(string, length, "%s=%s", name, value);
+        result = putenv(string);
+    }
+    return result;
+}
+
+/***********************************************************/
+/* Polling handling */
+
+typedef struct PollingEntry {
+    PollingFunc *func;
+    void *opaque;
+    struct PollingEntry *next;
+} PollingEntry;
+
+static PollingEntry *first_polling_entry;
+
+int qemu_add_polling_cb(PollingFunc *func, void *opaque)
+{
+    PollingEntry **ppe, *pe;
+    pe = qemu_mallocz(sizeof(PollingEntry));
+    pe->func = func;
+    pe->opaque = opaque;
+    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
+    *ppe = pe;
+    return 0;
+}
+
+void qemu_del_polling_cb(PollingFunc *func, void *opaque)
+{
+    PollingEntry **ppe, *pe;
+    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
+        pe = *ppe;
+        if (pe->func == func && pe->opaque == opaque) {
+            *ppe = pe->next;
+            qemu_free(pe);
+            break;
+        }
+    }
+}
+
+/***********************************************************/
+/* Wait objects support */
+typedef struct WaitObjects {
+    int num;
+    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
+    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
+    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
+} WaitObjects;
+
+static WaitObjects wait_objects = {0};
+
+int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
+{
+    WaitObjects *w = &wait_objects;
+
+    if (w->num >= MAXIMUM_WAIT_OBJECTS)
+        return -1;
+    w->events[w->num] = handle;
+    w->func[w->num] = func;
+    w->opaque[w->num] = opaque;
+    w->num++;
+    return 0;
+}
+
+void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
+{
+    int i, found;
+    WaitObjects *w = &wait_objects;
+
+    found = 0;
+    for (i = 0; i < w->num; i++) {
+        if (w->events[i] == handle)
+            found = 1;
+        if (found) {
+            w->events[i] = w->events[i + 1];
+            w->func[i] = w->func[i + 1];
+            w->opaque[i] = w->opaque[i + 1];
+        }
+    }
+    if (found)
+        w->num--;
+}
+
+void os_host_main_loop_wait(int *timeout)
+{
+    int ret, ret2, i;
+    PollingEntry *pe;
+
+    /* XXX: need to suppress polling by better using win32 events */
+    ret = 0;
+    for(pe = first_polling_entry; pe != NULL; pe = pe->next) {
+        ret |= pe->func(pe->opaque);
+    }
+    if (ret == 0) {
+        int err;
+        WaitObjects *w = &wait_objects;
+
+        qemu_mutex_unlock_iothread();
+        ret = WaitForMultipleObjects(w->num, w->events, FALSE, *timeout);
+        qemu_mutex_lock_iothread();
+        if (WAIT_OBJECT_0 + 0 <= ret && ret <= WAIT_OBJECT_0 + w->num - 1) {
+            if (w->func[ret - WAIT_OBJECT_0])
+                w->func[ret - WAIT_OBJECT_0](w->opaque[ret - WAIT_OBJECT_0]);
+
+            /* Check for additional signaled events */
+            for(i = (ret - WAIT_OBJECT_0 + 1); i < w->num; i++) {
+
+                /* Check if event is signaled */
+                ret2 = WaitForSingleObject(w->events[i], 0);
+                if(ret2 == WAIT_OBJECT_0) {
+                    if (w->func[i])
+                        w->func[i](w->opaque[i]);
+                } else if (ret2 == WAIT_TIMEOUT) {
+                } else {
+                    err = GetLastError();
+                    fprintf(stderr, "WaitForSingleObject error %d %d\n", i, err);
+                }
+            }
+        } else if (ret == WAIT_TIMEOUT) {
+        } else {
+            err = GetLastError();
+            fprintf(stderr, "WaitForMultipleObjects error %d %d\n", ret, err);
+        }
+    }
+
+    *timeout = 0;
+}
+
+static BOOL WINAPI qemu_ctrl_handler(DWORD type)
+{
+    exit(STATUS_CONTROL_C_EXIT);
+    return TRUE;
+}
+
+void os_setup_early_signal_handling(void)
+{
+    /* Note: cpu_interrupt() is currently not SMP safe, so we force
+       QEMU to run on a single CPU */
+    HANDLE h;
+    DWORD_PTR mask, smask;
+    int i;
+
+    SetConsoleCtrlHandler(qemu_ctrl_handler, TRUE);
+
+    h = GetCurrentProcess();
+    if (GetProcessAffinityMask(h, &mask, &smask)) {
+        for(i = 0; i < 32; i++) {
+            if (mask & (1 << i))
+                break;
+        }
+        if (i != 32) {
+            mask = 1 << i;
+            SetProcessAffinityMask(h, mask);
+        }
+    }
+}
+
+/* Look for support files in the same directory as the executable.  */
+char *os_find_datadir(const char *argv0)
+{
+    char *p;
+    char buf[MAX_PATH];
+    DWORD len;
+
+    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
+    if (len == 0) {
+        return NULL;
+    }
+
+    buf[len] = 0;
+    p = buf + len - 1;
+    while (p != buf && *p != '\\')
+        p--;
+    *p = 0;
+    if (access(buf, R_OK) == 0) {
+        return qemu_strdup(buf);
+    }
+    return NULL;
+}
+
+void os_set_line_buffering(void)
+{
+    setbuf(stdout, NULL);
+    setbuf(stderr, NULL);
+}
+
+/*
+ * Parse OS specific command line options.
+ * return 0 if option handled, -1 otherwise
+ */
+void os_parse_cmd_args(int index, const char *optarg)
+{
+    return;
+}
+
+void os_pidfile_error(void)
+{
+    fprintf(stderr, "Could not acquire pid file: %s\n", strerror(errno));
+}
+
+int qemu_create_pidfile(const char *filename)
+{
+    char buffer[128];
+    int len;
+    HANDLE file;
+    OVERLAPPED overlap;
+    BOOL ret;
+    memset(&overlap, 0, sizeof(overlap));
+
+    file = CreateFile(filename, GENERIC_WRITE, FILE_SHARE_READ, NULL,
+		      OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (file == INVALID_HANDLE_VALUE) {
+        return -1;
+    }
+    len = snprintf(buffer, sizeof(buffer), "%ld\n", (long)getpid());
+    ret = WriteFileEx(file, (LPCVOID)buffer, (DWORD)len,
+		      &overlap, NULL);
+    if (ret == 0) {
+        return -1;
+    }
+    return 0;
+}
+
+int qemu_get_thread_id(void)
+{
+    return GetCurrentThreadId();
+}
diff --git a/osdep.c b/osdep.c
index 46c779d..6c402d9 100644
--- a/osdep.c
+++ b/osdep.c
@@ -32,13 +32,16 @@
 /* Needed early for CONFIG_BSD etc. */
 #include "config-host.h"
 
+#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
+#include <sys/mman.h>
+#endif
+
 #ifdef CONFIG_SOLARIS
 #include <sys/types.h>
 #include <sys/statvfs.h>
-#endif
-
-#ifdef CONFIG_EVENTFD
-#include <sys/eventfd.h>
+/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
+   discussion about Solaris header problems */
+extern int madvise(caddr_t, size_t, int);
 #endif
 
 #ifdef _WIN32
@@ -63,191 +66,22 @@
 #include "sysemu.h"
 #include "qemu_socket.h"
 
-#if !defined(_POSIX_C_SOURCE) || defined(_WIN32) || defined(__sun__) || defined(__APPLE__)
-static void *oom_check(void *ptr)
+int qemu_madvise(void *addr, size_t len, int advice)
 {
-    if (ptr == NULL) {
-#if defined(_WIN32)
-        fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
-#else
-        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
-#endif
-        abort();
-    }
-    return ptr;
-}
-#endif
-
-#if defined(_WIN32)
-void *qemu_memalign(size_t alignment, size_t size)
-{
-    if (!size) {
-        abort();
-    }
-    return oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
-}
-
-void *qemu_vmalloc(size_t size)
-{
-    /* FIXME: this is not exactly optimal solution since VirtualAlloc
-       has 64Kb granularity, but at least it guarantees us that the
-       memory is page aligned. */
-    if (!size) {
-        abort();
-    }
-    return oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
-}
-
-void qemu_vfree(void *ptr)
-{
-    VirtualFree(ptr, 0, MEM_RELEASE);
-}
-
-#else
-
-void *qemu_memalign(size_t alignment, size_t size)
-{
-#if defined(_POSIX_C_SOURCE) && !defined(__sun__) && !defined(__APPLE__)
-    int ret;
-    void *ptr;
-    ret = posix_memalign(&ptr, alignment, size);
-    if (ret != 0) {
-        fprintf(stderr, "Failed to allocate %zu B: %s\n",
-                size, strerror(ret));
-        abort();
-    }
-    return ptr;
-#elif defined(CONFIG_BSD)
-    return oom_check(valloc(size));
-#else
-    return oom_check(memalign(alignment, size));
-#endif
-}
-
-/* alloc shared memory pages */
-void *qemu_vmalloc(size_t size)
-{
-    return qemu_memalign(getpagesize(), size);
-}
-
-void qemu_vfree(void *ptr)
-{
-    free(ptr);
-}
-
-#endif
-
-int qemu_create_pidfile(const char *filename)
-{
-    char buffer[128];
-    int len;
-#ifndef _WIN32
-    int fd;
-
-    fd = qemu_open(filename, O_RDWR | O_CREAT, 0600);
-    if (fd == -1)
+    if (advice == QEMU_MADV_INVALID) {
+        errno = EINVAL;
         return -1;
-
-    if (lockf(fd, F_TLOCK, 0) == -1)
-        return -1;
-
-    len = snprintf(buffer, sizeof(buffer), "%ld\n", (long)getpid());
-    if (write(fd, buffer, len) != len)
-        return -1;
-#else
-    HANDLE file;
-    OVERLAPPED overlap;
-    BOOL ret;
-    memset(&overlap, 0, sizeof(overlap));
-
-    file = CreateFile(filename, GENERIC_WRITE, FILE_SHARE_READ, NULL,
-		      OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
-
-    if (file == INVALID_HANDLE_VALUE)
-      return -1;
-
-    len = snprintf(buffer, sizeof(buffer), "%ld\n", (long)getpid());
-    ret = WriteFileEx(file, (LPCVOID)buffer, (DWORD)len,
-		      &overlap, NULL);
-    if (ret == 0)
-      return -1;
-#endif
-    return 0;
-}
-
-#ifdef _WIN32
-
-/* mingw32 needs ffs for compilations without optimization. */
-int ffs(int i)
-{
-    /* Use gcc's builtin ffs. */
-    return __builtin_ffs(i);
-}
-
-/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
-#define _W32_FT_OFFSET (116444736000000000ULL)
-
-int qemu_gettimeofday(qemu_timeval *tp)
-{
-  union {
-    unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
-    FILETIME ft;
-  }  _now;
-
-  if(tp)
-    {
-      GetSystemTimeAsFileTime (&_now.ft);
-      tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL );
-      tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL);
     }
-  /* Always return 0 as per Open Group Base Specifications Issue 6.
-     Do not set errno on error.  */
-  return 0;
-}
-#endif /* _WIN32 */
-
-
-#ifdef _WIN32
-#ifndef CONFIG_ANDROID
-void socket_set_nonblock(int fd)
-{
-    unsigned long opt = 1;
-    ioctlsocket(fd, FIONBIO, &opt);
-}
-#endif
-
-int inet_aton(const char *cp, struct in_addr *ia)
-{
-    uint32_t addr = inet_addr(cp);
-    if (addr == 0xffffffff)
-	return 0;
-    ia->s_addr = addr;
-    return 1;
-}
-
-void qemu_set_cloexec(int fd)
-{
-}
-
+#if defined(CONFIG_MADVISE)
+    return madvise(addr, len, advice);
+#elif defined(CONFIG_POSIX_MADVISE)
+    return posix_madvise(addr, len, advice);
 #else
-
-#ifndef CONFIG_ANDROID
-void socket_set_nonblock(int fd)
-{
-    int f;
-    f = fcntl(fd, F_GETFL);
-    fcntl(fd, F_SETFL, f | O_NONBLOCK);
-}
+    errno = EINVAL;
+    return -1;
 #endif
-
-void qemu_set_cloexec(int fd)
-{
-    int f;
-    f = fcntl(fd, F_GETFD);
-    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
 }
 
-#endif
 
 /*
  * Opens a file with FD_CLOEXEC set
@@ -309,58 +143,6 @@
     return total;
 }
 
-#ifndef _WIN32
-/*
- * Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
- */
-int qemu_eventfd(int fds[2])
-{
-#ifdef CONFIG_EVENTFD
-    int ret;
-
-    ret = eventfd(0, 0);
-    if (ret >= 0) {
-        fds[0] = ret;
-        qemu_set_cloexec(ret);
-        if ((fds[1] = dup(ret)) == -1) {
-            close(ret);
-            return -1;
-        }
-        qemu_set_cloexec(fds[1]);
-        return 0;
-    }
-
-    if (errno != ENOSYS) {
-        return -1;
-    }
-#endif
-
-    return qemu_pipe(fds);
-}
-
-/*
- * Creates a pipe with FD_CLOEXEC set on both file descriptors
- */
-int qemu_pipe(int pipefd[2])
-{
-    int ret;
-
-#ifdef CONFIG_PIPE2
-    ret = pipe2(pipefd, O_CLOEXEC);
-    if (ret != -1 || errno != ENOSYS) {
-        return ret;
-    }
-#endif
-    ret = pipe(pipefd);
-    if (ret == 0) {
-        qemu_set_cloexec(pipefd[0]);
-        qemu_set_cloexec(pipefd[1]);
-    }
-
-    return ret;
-}
-#endif
-
 /*
  * Opens a socket with FD_CLOEXEC set
  */
diff --git a/osdep.h b/osdep.h
index b7e8d51..d6d8110 100644
--- a/osdep.h
+++ b/osdep.h
@@ -8,9 +8,7 @@
 #include <sys/signal.h>
 #endif
 
-#ifndef _WIN32
 #include <sys/time.h>
-#endif
 
 #ifndef glue
 #define xglue(x, y) x ## y
@@ -57,6 +55,10 @@
 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
 #endif
 
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
@@ -90,23 +92,60 @@
 void *qemu_vmalloc(size_t size);
 void qemu_vfree(void *ptr);
 
+#define QEMU_MADV_INVALID -1
+
+#if defined(CONFIG_MADVISE)
+
+#define QEMU_MADV_WILLNEED  MADV_WILLNEED
+#define QEMU_MADV_DONTNEED  MADV_DONTNEED
+#ifdef MADV_DONTFORK
+#define QEMU_MADV_DONTFORK  MADV_DONTFORK
+#else
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#endif
+#ifdef MADV_MERGEABLE
+#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
+#else
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#endif
+
+#elif defined(CONFIG_POSIX_MADVISE)
+
+#define QEMU_MADV_WILLNEED  POSIX_MADV_WILLNEED
+#define QEMU_MADV_DONTNEED  POSIX_MADV_DONTNEED
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+
+#else /* no-op */
+
+#define QEMU_MADV_WILLNEED  QEMU_MADV_INVALID
+#define QEMU_MADV_DONTNEED  QEMU_MADV_INVALID
+#define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+
+#endif
+
+int qemu_madvise(void *addr, size_t len, int advice);
+
 int qemu_create_pidfile(const char *filename);
+int qemu_get_thread_id(void);
 
 #ifdef _WIN32
-int ffs(int i);
-
-int setenv(const char *name, const char *value, int overwrite);
-int asprintf(char **sptr, char *fmt, ...);
-int vasprintf(char **sptr, char *fmt, va_list args);
-
-typedef struct {
-    long tv_sec;
-    long tv_usec;
-} qemu_timeval;
-int qemu_gettimeofday(qemu_timeval *tp);
+static inline void qemu_timersub(const struct timeval *val1,
+                                 const struct timeval *val2,
+                                 struct timeval *res)
+{
+    res->tv_sec = val1->tv_sec - val2->tv_sec;
+    if (val1->tv_usec < val2->tv_usec) {
+        res->tv_sec--;
+        res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000;
+    } else {
+        res->tv_usec = val1->tv_usec - val2->tv_usec;
+    }
+}
 #else
-typedef struct timeval qemu_timeval;
-#define qemu_gettimeofday(tp) gettimeofday(tp, NULL);
-#endif /* !_WIN32 */
+#define qemu_timersub timersub
+#define ffs __builtin_ffs
+#endif
 
 #endif
diff --git a/oslib-posix.c b/oslib-posix.c
new file mode 100644
index 0000000..ba6e559
--- /dev/null
+++ b/oslib-posix.c
@@ -0,0 +1,159 @@
+/*
+ * os-posix-lib.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "sysemu.h"
+#include "trace.h"
+#include "qemu_socket.h"
+
+void *qemu_oom_check(void *ptr)
+{
+    if (ptr == NULL) {
+        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+        abort();
+    }
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
+    int ret;
+    ret = posix_memalign(&ptr, alignment, size);
+    if (ret != 0) {
+        fprintf(stderr, "Failed to allocate %zu B: %s\n",
+                size, strerror(ret));
+        abort();
+    }
+#elif defined(CONFIG_BSD)
+    ptr = qemu_oom_check(valloc(size));
+#else
+    ptr = qemu_oom_check(memalign(alignment, size));
+#endif
+    //trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+/* alloc shared memory pages */
+void *qemu_vmalloc(size_t size)
+{
+    return qemu_memalign(getpagesize(), size);
+}
+
+void qemu_vfree(void *ptr)
+{
+    //trace_qemu_vfree(ptr);
+    free(ptr);
+}
+
+#if 0 /* in sockets.c */
+void socket_set_nonblock(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFL);
+    fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+#endif
+
+void qemu_set_cloexec(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFD);
+    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+}
+
+/*
+ * Creates a pipe with FD_CLOEXEC set on both file descriptors
+ */
+int qemu_pipe(int pipefd[2])
+{
+    int ret;
+
+#ifdef CONFIG_PIPE2
+    ret = pipe2(pipefd, O_CLOEXEC);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    ret = pipe(pipefd);
+    if (ret == 0) {
+        qemu_set_cloexec(pipefd[0]);
+        qemu_set_cloexec(pipefd[1]);
+    }
+
+    return ret;
+}
+
+int qemu_utimensat(int dirfd, const char *path, const struct timespec *times,
+                   int flags)
+{
+    struct timeval tv[2], tv_now;
+    struct stat st;
+    int i;
+#ifdef CONFIG_UTIMENSAT
+    int ret;
+
+    ret = utimensat(dirfd, path, times, flags);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    /* Fallback: use utimes() instead of utimensat() */
+
+    /* happy if special cases */
+    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
+        return 0;
+    }
+    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
+        return utimes(path, NULL);
+    }
+
+    /* prepare for hard cases */
+    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
+        gettimeofday(&tv_now, NULL);
+    }
+    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
+        stat(path, &st);
+    }
+
+    for (i = 0; i < 2; i++) {
+        if (times[i].tv_nsec == UTIME_NOW) {
+            tv[i].tv_sec = tv_now.tv_sec;
+            tv[i].tv_usec = tv_now.tv_usec;
+        } else if (times[i].tv_nsec == UTIME_OMIT) {
+            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
+            tv[i].tv_usec = 0;
+        } else {
+            tv[i].tv_sec = times[i].tv_sec;
+            tv[i].tv_usec = times[i].tv_nsec / 1000;
+        }
+    }
+
+    return utimes(path, &tv[0]);
+}
diff --git a/oslib-win32.c b/oslib-win32.c
new file mode 100644
index 0000000..9d82abd
--- /dev/null
+++ b/oslib-win32.c
@@ -0,0 +1,116 @@
+/*
+ * os-win32.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions for win32 which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <windows.h>
+#include "config-host.h"
+#include "sysemu.h"
+#include "trace.h"
+#include "qemu_socket.h"
+
+void *qemu_oom_check(void *ptr)
+{
+    if (ptr == NULL) {
+        fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
+        abort();
+    }
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+
+    if (!size) {
+        abort();
+    }
+    ptr = qemu_oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
+    //trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+void *qemu_vmalloc(size_t size)
+{
+    void *ptr;
+
+    /* FIXME: this is not exactly optimal solution since VirtualAlloc
+       has 64Kb granularity, but at least it guarantees us that the
+       memory is page aligned. */
+    if (!size) {
+        abort();
+    }
+    ptr = qemu_oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
+    //trace_qemu_vmalloc(size, ptr);
+    return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+    //trace_qemu_vfree(ptr);
+    VirtualFree(ptr, 0, MEM_RELEASE);
+}
+
+#if 0 /* in sockets.c */
+void socket_set_nonblock(int fd)
+{
+    unsigned long opt = 1;
+    ioctlsocket(fd, FIONBIO, &opt);
+}
+#endif
+
+int inet_aton(const char *cp, struct in_addr *ia)
+{
+    uint32_t addr = inet_addr(cp);
+    if (addr == 0xffffffff) {
+	return 0;
+    }
+    ia->s_addr = addr;
+    return 1;
+}
+
+void qemu_set_cloexec(int fd)
+{
+}
+
+/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
+#define _W32_FT_OFFSET (116444736000000000ULL)
+
+int qemu_gettimeofday(qemu_timeval *tp)
+{
+  union {
+    unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
+    FILETIME ft;
+  }  _now;
+
+  if(tp) {
+      GetSystemTimeAsFileTime (&_now.ft);
+      tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL );
+      tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL);
+  }
+  /* Always return 0 as per Open Group Base Specifications Issue 6.
+     Do not set errno on error.  */
+  return 0;
+}
diff --git a/outputchannel.c b/outputchannel.c
deleted file mode 100644
index 192b488..0000000
--- a/outputchannel.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "outputchannel.h"
-#include "qemu-common.h"
-
-struct OutputChannel {
-    void*               opaque;   /* caller-specific information */
-    OutputChannelPrintf printf;   /* callback function to do the printing */
-    unsigned int        written;  /* number of bytes written to the channel */
-};
-
-OutputChannel* output_channel_alloc(void* opaque, OutputChannelPrintf cb)
-{
-    OutputChannel* oc = qemu_mallocz(sizeof(*oc));
-    oc->printf  = cb;
-    oc->opaque  = opaque;
-    oc->written = 0;
-
-    return oc;
-}
-
-int output_channel_printf(OutputChannel* oc, const char* fmt, ...)
-{
-    int ret;
-    va_list ap;
-    va_start(ap, fmt);
-    ret = oc->printf(oc->opaque, fmt, ap);
-    va_end(ap);
-
-    /* Don't count errors and no-ops towards number of bytes written */
-    if (ret > 0) {
-        oc->written += ret;
-    }
-
-    return ret;
-}
-
-void output_channel_free(OutputChannel* oc)
-{
-    free(oc);
-}
-
-unsigned int output_channel_written(OutputChannel* oc)
-{
-    return oc->written;
-}
diff --git a/outputchannel.h b/outputchannel.h
deleted file mode 100644
index 4dece8d..0000000
--- a/outputchannel.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef _OUTPUTCHANNEL_H
-#define _OUTPUTCHANNEL_H
-
-#include <stdarg.h>
-
-/* Callback function to print a printf-formatted string to a channel */
-typedef int (*OutputChannelPrintf) (void* opaque, const char* fmt, va_list ap);
-
-typedef struct OutputChannel OutputChannel;
-
-/* Allocates a new output channel */
-OutputChannel* output_channel_alloc(void* opaque, OutputChannelPrintf cb);
-
-/* Prints a printf-formatted string to the output channel */
-int output_channel_printf(OutputChannel* oc, const char* fmt, ...);
-
-/* Frees an output channel */
-void output_channel_free(OutputChannel* oc);
-
-/* Returns the number of bytes written to the channel */
-unsigned int output_channel_written(OutputChannel* oc);
-
-#endif /* _OUTPUTCHANNEL_H */
diff --git a/path.c b/path.c
index 4fbc210..f0d703f 100644
--- a/path.c
+++ b/path.c
@@ -41,7 +41,8 @@
     return s2[i] == 0;
 }
 
-static struct pathelem *add_entry(struct pathelem *root, const char *name);
+static struct pathelem *add_entry(struct pathelem *root, const char *name,
+                                  unsigned char type);
 
 static struct pathelem *new_entry(const char *root,
                                   struct pathelem *parent,
@@ -59,6 +60,15 @@
 
 #define streq(a,b) (strcmp((a), (b)) == 0)
 
+/* Not all systems provide this feature */
+#if defined(DT_DIR) && defined(DT_UNKNOWN)
+# define dirent_type(dirent) ((dirent)->d_type)
+# define is_dir_maybe(type)  ((type) == DT_DIR || (type) == DT_UNKNOWN)
+#else
+# define dirent_type(dirent) (1)
+# define is_dir_maybe(type)  (type)
+#endif
+
 static struct pathelem *add_dir_maybe(struct pathelem *path)
 {
     DIR *dir;
@@ -68,7 +78,7 @@
 
         while ((dirent = readdir(dir)) != NULL) {
             if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")){
-                path = add_entry(path, dirent->d_name);
+                path = add_entry(path, dirent->d_name, dirent_type(dirent));
             }
         }
         closedir(dir);
@@ -76,16 +86,22 @@
     return path;
 }
 
-static struct pathelem *add_entry(struct pathelem *root, const char *name)
+static struct pathelem *add_entry(struct pathelem *root, const char *name,
+                                  unsigned char type)
 {
+    struct pathelem **e;
+
     root->num_entries++;
 
     root = realloc(root, sizeof(*root)
                    + sizeof(root->entries[0])*root->num_entries);
+    e = &root->entries[root->num_entries-1];
 
-    root->entries[root->num_entries-1] = new_entry(root->pathname, root, name);
-    root->entries[root->num_entries-1]
-        = add_dir_maybe(root->entries[root->num_entries-1]);
+    *e = new_entry(root->pathname, root, name);
+    if (is_dir_maybe(type)) {
+        *e = add_dir_maybe(*e);
+    }
+
     return root;
 }
 
diff --git a/poison.h b/poison.h
index d7db7f4..8fa3ee6 100644
--- a/poison.h
+++ b/poison.h
@@ -10,6 +10,7 @@
 #pragma GCC poison TARGET_ALPHA
 #pragma GCC poison TARGET_ARM
 #pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_LM32
 #pragma GCC poison TARGET_M68K
 #pragma GCC poison TARGET_MIPS
 #pragma GCC poison TARGET_MIPS64
@@ -45,6 +46,14 @@
 #pragma GCC poison CPU_INTERRUPT_DEBUG
 #pragma GCC poison CPU_INTERRUPT_VIRQ
 #pragma GCC poison CPU_INTERRUPT_NMI
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
 
 #endif
 #endif
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index a67ffe3..e1c58d0 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -24,6 +24,7 @@
 
 #include "qemu-queue.h"
 #include "osdep.h"
+#include "sysemu.h"
 #include "qemu-common.h"
 #include "block_int.h"
 
@@ -127,7 +128,7 @@
 
     /*
      * This looks weird, but the aio code only consideres a request
-     * successfull if it has written the number full number of bytes.
+     * successful if it has written the number full number of bytes.
      *
      * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
      * so in fact we return the ioctl command here to make posix_aio_read()
@@ -269,7 +270,7 @@
      * Ok, we have to do it the hard way, copy all segments into
      * a single aligned buffer.
      */
-    buf = qemu_memalign(512, aiocb->aio_nbytes);
+    buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes);
     if (aiocb->aio_type & QEMU_AIO_WRITE) {
         char *p = buf;
         int i;
@@ -453,6 +454,9 @@
                 } else {
                     ret = -ret;
                 }
+
+                //trace_paio_complete(acb, acb->common.opaque, ret);
+
                 /* remove the request */
                 *pacb = acb->next;
                 /* call the callback */
@@ -535,6 +539,8 @@
     struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb;
     int active = 0;
 
+    //trace_paio_cancel(acb, acb->common.opaque);
+
     mutex_lock(&lock);
     if (!acb->active) {
         QTAILQ_REMOVE(&request_list, acb, node);
@@ -583,6 +589,7 @@
     acb->next = posix_aio_state->first_aio;
     posix_aio_state->first_aio = acb;
 
+    //trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
     qemu_paio_submit(acb);
     return &acb->common;
 }
@@ -599,6 +606,7 @@
     acb->aio_type = QEMU_AIO_IOCTL;
     acb->aio_fildes = fd;
     acb->ev_signo = SIGUSR2;
+    acb->async_context_id = get_async_context_id();
     acb->aio_offset = 0;
     acb->aio_ioctl_buf = buf;
     acb->aio_ioctl_cmd = req;
diff --git a/qemu-char-android.c b/qemu-char-android.c
deleted file mode 100644
index 0ecf01c..0000000
--- a/qemu-char-android.c
+++ /dev/null
@@ -1,2441 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "sockets.h"
-#include "net.h"
-#include "monitor.h"
-#include "console.h"
-#include "sysemu.h"
-#include "qemu-timer.h"
-#include "qemu-char.h"
-#include "block.h"
-#include "hw/usb.h"
-#include "hw/baum.h"
-#include "hw/msmouse.h"
-
-#include <unistd.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <zlib.h>
-
-#ifndef _WIN32
-#include <sys/times.h>
-#include <sys/wait.h>
-#include <termios.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <sys/resource.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <net/if.h>
-#ifdef __NetBSD__
-#include <net/if_tap.h>
-#endif
-#ifdef __linux__
-#include <linux/if_tun.h>
-#endif
-#include <arpa/inet.h>
-#include <dirent.h>
-#include <netdb.h>
-#include <sys/select.h>
-#ifdef CONFIG_BSD
-#include <sys/stat.h>
-#ifdef __FreeBSD__
-#include <libutil.h>
-#include <dev/ppbus/ppi.h>
-#include <dev/ppbus/ppbconf.h>
-#elif defined(__DragonFly__)
-#include <libutil.h>
-#include <dev/misc/ppi/ppi.h>
-#include <bus/ppbus/ppbconf.h>
-#else
-#include <util.h>
-#endif
-#elif defined (__GLIBC__) && defined (__FreeBSD_kernel__)
-#include <freebsd/stdlib.h>
-#else
-#ifdef __linux__
-#include <pty.h>
-
-#include <linux/ppdev.h>
-#include <linux/parport.h>
-#endif
-#ifdef __sun__
-#include <sys/stat.h>
-#include <sys/ethernet.h>
-#include <sys/sockio.h>
-#include <netinet/arp.h>
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> // must come after ip.h
-#include <netinet/udp.h>
-#include <netinet/tcp.h>
-#include <net/if.h>
-#include <syslog.h>
-#include <stropts.h>
-#endif
-#endif
-#endif
-
-#include "qemu_socket.h"
-
-#define READ_BUF_LEN 4096
-
-#ifdef CONFIG_ANDROID
-#include "charpipe.h"
-#include "modem_driver.h"
-#include "android/gps.h"
-#include "android/hw-kmsg.h"
-#include "android/hw-qemud.h"
-#endif /* CONFIG_ANDROID */
-
-/***********************************************************/
-/* character device */
-
-static QTAILQ_HEAD(CharDriverStateHead, CharDriverState) chardevs =
-    QTAILQ_HEAD_INITIALIZER(chardevs);
-static int initial_reset_issued;
-
-static void qemu_chr_event(CharDriverState *s, int event)
-{
-    /* Keep track if the char device is open */
-    switch (event) {
-        case CHR_EVENT_OPENED:
-            s->opened = 1;
-            break;
-        case CHR_EVENT_CLOSED:
-            s->opened = 0;
-            break;
-    }
-
-    if (!s->chr_event)
-        return;
-    s->chr_event(s->handler_opaque, event);
-}
-
-static void qemu_chr_generic_open_bh(void *opaque)
-{
-    CharDriverState *s = opaque;
-    qemu_chr_event(s, CHR_EVENT_OPENED);
-    qemu_bh_delete(s->bh);
-    s->bh = NULL;
-}
-
-void qemu_chr_generic_open(CharDriverState *s)
-{
-    if (s->bh == NULL) {
-	s->bh = qemu_bh_new(qemu_chr_generic_open_bh, s);
-	qemu_bh_schedule(s->bh);
-    }
-}
-
-void qemu_chr_reset(CharDriverState *s)
-{
-    if (s->bh == NULL && initial_reset_issued) {
-	s->bh = qemu_bh_new(qemu_chr_generic_open_bh, s);
-	qemu_bh_schedule(s->bh);
-    }
-}
-
-void qemu_chr_initial_reset(void)
-{
-    CharDriverState *chr;
-
-    initial_reset_issued = 1;
-
-    QTAILQ_FOREACH(chr, &chardevs, next) {
-        qemu_chr_reset(chr);
-    }
-}
-
-int qemu_chr_write(CharDriverState *s, const uint8_t *buf, int len)
-{
-    return s->chr_write(s, buf, len);
-}
-
-int qemu_chr_ioctl(CharDriverState *s, int cmd, void *arg)
-{
-    if (!s->chr_ioctl)
-        return -ENOTSUP;
-    return s->chr_ioctl(s, cmd, arg);
-}
-
-int qemu_chr_can_read(CharDriverState *s)
-{
-    if (!s->chr_can_read)
-        return 0;
-    return s->chr_can_read(s->handler_opaque);
-}
-
-void qemu_chr_read(CharDriverState *s, uint8_t *buf, int len)
-{
-    s->chr_read(s->handler_opaque, buf, len);
-}
-
-int qemu_chr_get_msgfd(CharDriverState *s)
-{
-    return s->get_msgfd ? s->get_msgfd(s) : -1;
-}
-
-void qemu_chr_accept_input(CharDriverState *s)
-{
-    if (s->chr_accept_input)
-        s->chr_accept_input(s);
-}
-
-void qemu_chr_printf(CharDriverState *s, const char *fmt, ...)
-{
-    char buf[READ_BUF_LEN];
-    va_list ap;
-    va_start(ap, fmt);
-    vsnprintf(buf, sizeof(buf), fmt, ap);
-    qemu_chr_write(s, (uint8_t *)buf, strlen(buf));
-    va_end(ap);
-}
-
-void qemu_chr_send_event(CharDriverState *s, int event)
-{
-    if (s->chr_send_event)
-        s->chr_send_event(s, event);
-}
-
-void qemu_chr_add_handlers(CharDriverState *s,
-                           IOCanReadHandler *fd_can_read,
-                           IOReadHandler *fd_read,
-                           IOEventHandler *fd_event,
-                           void *opaque)
-{
-    s->chr_can_read = fd_can_read;
-    s->chr_read = fd_read;
-    s->chr_event = fd_event;
-    s->handler_opaque = opaque;
-    if (s->chr_update_read_handler)
-        s->chr_update_read_handler(s);
-
-    /* We're connecting to an already opened device, so let's make sure we
-       also get the open event */
-    if (s->opened) {
-        qemu_chr_generic_open(s);
-    }
-}
-
-static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    return len;
-}
-
-static CharDriverState *qemu_chr_open_null(void)
-{
-    CharDriverState *chr;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    chr->chr_write = null_chr_write;
-    return chr;
-}
-
-/* MUX driver for serial I/O splitting */
-#define MAX_MUX 4
-#define MUX_BUFFER_SIZE 32	/* Must be a power of 2.  */
-#define MUX_BUFFER_MASK (MUX_BUFFER_SIZE - 1)
-typedef struct {
-    IOCanReadHandler *chr_can_read[MAX_MUX];
-    IOReadHandler *chr_read[MAX_MUX];
-    IOEventHandler *chr_event[MAX_MUX];
-    void *ext_opaque[MAX_MUX];
-    CharDriverState *drv;
-    int focus;
-    int mux_cnt;
-    int term_got_escape;
-    int max_size;
-    /* Intermediate input buffer allows to catch escape sequences even if the
-       currently active device is not accepting any input - but only until it
-       is full as well. */
-    unsigned char buffer[MAX_MUX][MUX_BUFFER_SIZE];
-    int prod[MAX_MUX];
-    int cons[MAX_MUX];
-    int timestamps;
-    int linestart;
-    int64_t timestamps_start;
-} MuxDriver;
-
-
-static int mux_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    MuxDriver *d = chr->opaque;
-    int ret;
-    if (!d->timestamps) {
-        ret = d->drv->chr_write(d->drv, buf, len);
-    } else {
-        int i;
-
-        ret = 0;
-        for (i = 0; i < len; i++) {
-            if (d->linestart) {
-                char buf1[64];
-                int64_t ti;
-                int secs;
-
-                ti = qemu_get_clock(rt_clock);
-                if (d->timestamps_start == -1)
-                    d->timestamps_start = ti;
-                ti -= d->timestamps_start;
-                secs = ti / 1000;
-                snprintf(buf1, sizeof(buf1),
-                         "[%02d:%02d:%02d.%03d] ",
-                         secs / 3600,
-                         (secs / 60) % 60,
-                         secs % 60,
-                         (int)(ti % 1000));
-                d->drv->chr_write(d->drv, (uint8_t *)buf1, strlen(buf1));
-                d->linestart = 0;
-            }
-            ret += d->drv->chr_write(d->drv, buf+i, 1);
-            if (buf[i] == '\n') {
-                d->linestart = 1;
-            }
-        }
-    }
-    return ret;
-}
-
-static const char * const mux_help[] = {
-    "% h    print this help\n\r",
-    "% x    exit emulator\n\r",
-    "% s    save disk data back to file (if -snapshot)\n\r",
-    "% t    toggle console timestamps\n\r"
-    "% b    send break (magic sysrq)\n\r",
-    "% c    switch between console and monitor\n\r",
-    "% %  sends %\n\r",
-    NULL
-};
-
-int term_escape_char = 0x01; /* ctrl-a is used for escape */
-static void mux_print_help(CharDriverState *chr)
-{
-    int i, j;
-    char ebuf[15] = "Escape-Char";
-    char cbuf[50] = "\n\r";
-
-    if (term_escape_char > 0 && term_escape_char < 26) {
-        snprintf(cbuf, sizeof(cbuf), "\n\r");
-        snprintf(ebuf, sizeof(ebuf), "C-%c", term_escape_char - 1 + 'a');
-    } else {
-        snprintf(cbuf, sizeof(cbuf),
-                 "\n\rEscape-Char set to Ascii: 0x%02x\n\r\n\r",
-                 term_escape_char);
-    }
-    chr->chr_write(chr, (uint8_t *)cbuf, strlen(cbuf));
-    for (i = 0; mux_help[i] != NULL; i++) {
-        for (j=0; mux_help[i][j] != '\0'; j++) {
-            if (mux_help[i][j] == '%')
-                chr->chr_write(chr, (uint8_t *)ebuf, strlen(ebuf));
-            else
-                chr->chr_write(chr, (uint8_t *)&mux_help[i][j], 1);
-        }
-    }
-}
-
-static void mux_chr_send_event(MuxDriver *d, int mux_nr, int event)
-{
-    if (d->chr_event[mux_nr])
-        d->chr_event[mux_nr](d->ext_opaque[mux_nr], event);
-}
-
-static int mux_proc_byte(CharDriverState *chr, MuxDriver *d, int ch)
-{
-    if (d->term_got_escape) {
-        d->term_got_escape = 0;
-        if (ch == term_escape_char)
-            goto send_char;
-        switch(ch) {
-        case '?':
-        case 'h':
-            mux_print_help(chr);
-            break;
-        case 'x':
-            {
-                 const char *term =  "QEMU: Terminated\n\r";
-                 chr->chr_write(chr,(uint8_t *)term,strlen(term));
-                 exit(0);
-                 break;
-            }
-        case 's':
-            bdrv_commit_all();
-            break;
-        case 'b':
-            qemu_chr_event(chr, CHR_EVENT_BREAK);
-            break;
-        case 'c':
-            /* Switch to the next registered device */
-            mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
-            d->focus++;
-            if (d->focus >= d->mux_cnt)
-                d->focus = 0;
-            mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
-            break;
-        case 't':
-            d->timestamps = !d->timestamps;
-            d->timestamps_start = -1;
-            d->linestart = 0;
-            break;
-        }
-    } else if (ch == term_escape_char) {
-        d->term_got_escape = 1;
-    } else {
-    send_char:
-        return 1;
-    }
-    return 0;
-}
-
-static void mux_chr_accept_input(CharDriverState *chr)
-{
-    MuxDriver *d = chr->opaque;
-    int m = d->focus;
-
-    while (d->prod[m] != d->cons[m] &&
-           d->chr_can_read[m] &&
-           d->chr_can_read[m](d->ext_opaque[m])) {
-        d->chr_read[m](d->ext_opaque[m],
-                       &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
-    }
-}
-
-static int mux_chr_can_read(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    MuxDriver *d = chr->opaque;
-    int m = d->focus;
-
-    if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE)
-        return 1;
-    if (d->chr_can_read[m])
-        return d->chr_can_read[m](d->ext_opaque[m]);
-    return 0;
-}
-
-static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
-{
-    CharDriverState *chr = opaque;
-    MuxDriver *d = chr->opaque;
-    int m = d->focus;
-    int i;
-
-    mux_chr_accept_input (opaque);
-
-    for(i = 0; i < size; i++)
-        if (mux_proc_byte(chr, d, buf[i])) {
-            if (d->prod[m] == d->cons[m] &&
-                d->chr_can_read[m] &&
-                d->chr_can_read[m](d->ext_opaque[m]))
-                d->chr_read[m](d->ext_opaque[m], &buf[i], 1);
-            else
-                d->buffer[m][d->prod[m]++ & MUX_BUFFER_MASK] = buf[i];
-        }
-}
-
-static void mux_chr_event(void *opaque, int event)
-{
-    CharDriverState *chr = opaque;
-    MuxDriver *d = chr->opaque;
-    int i;
-
-    /* Send the event to all registered listeners */
-    for (i = 0; i < d->mux_cnt; i++)
-        mux_chr_send_event(d, i, event);
-}
-
-static void mux_chr_update_read_handler(CharDriverState *chr)
-{
-    MuxDriver *d = chr->opaque;
-
-    if (d->mux_cnt >= MAX_MUX) {
-        fprintf(stderr, "Cannot add I/O handlers, MUX array is full\n");
-        return;
-    }
-    d->ext_opaque[d->mux_cnt] = chr->handler_opaque;
-    d->chr_can_read[d->mux_cnt] = chr->chr_can_read;
-    d->chr_read[d->mux_cnt] = chr->chr_read;
-    d->chr_event[d->mux_cnt] = chr->chr_event;
-    /* Fix up the real driver with mux routines */
-    if (d->mux_cnt == 0) {
-        qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read,
-                              mux_chr_event, chr);
-    }
-    if (d->focus != -1) {
-        mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
-    }
-    d->focus = d->mux_cnt;
-    d->mux_cnt++;
-    mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
-}
-
-static CharDriverState *qemu_chr_open_mux(CharDriverState *drv)
-{
-    CharDriverState *chr;
-    MuxDriver *d;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    d = qemu_mallocz(sizeof(MuxDriver));
-
-    chr->opaque = d;
-    d->drv = drv;
-    d->focus = -1;
-    chr->chr_write = mux_chr_write;
-    chr->chr_update_read_handler = mux_chr_update_read_handler;
-    chr->chr_accept_input = mux_chr_accept_input;
-
-    /* Muxes are always open on creation */
-    qemu_chr_generic_open(chr);
-
-    return chr;
-}
-
-
-#ifdef _WIN32
-int send_all(int fd, const void *buf, int len1)
-{
-#if 1
-	return socket_send(fd, buf, len1);
-#else
-    int ret, len;
-
-    len = len1;
-    while (len > 0) {
-        ret = send(fd, buf, len, 0);
-        if (ret < 0) {
-            errno = WSAGetLastError();
-            if (errno != WSAEWOULDBLOCK && errno != WSAEAGAIN) {
-                return -1;
-            }
-        } else if (ret == 0) {
-            break;
-        } else {
-            buf += ret;
-            len -= ret;
-        }
-    }
-    return len1 - len;
-#endif
-}
-
-#else
-
-static int unix_write(int fd, const uint8_t *buf, int len1)
-{
-    int ret, len;
-
-    len = len1;
-    while (len > 0) {
-        ret = write(fd, buf, len);
-        if (ret < 0) {
-            if (errno != EINTR && errno != EAGAIN)
-                return -1;
-        } else if (ret == 0) {
-            break;
-        } else {
-            buf += ret;
-            len -= ret;
-        }
-    }
-    return len1 - len;
-}
-
-int send_all(int fd, const void *buf, int len1)
-{
-    return unix_write(fd, buf, len1);
-}
-#endif /* !_WIN32 */
-
-#ifndef _WIN32
-
-typedef struct {
-    int fd_in, fd_out;
-    int max_size;
-} FDCharDriver;
-
-#define STDIO_MAX_CLIENTS 1
-static int stdio_nb_clients = 0;
-
-static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    FDCharDriver *s = chr->opaque;
-    return send_all(s->fd_out, buf, len);
-}
-
-static int fd_chr_read_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    FDCharDriver *s = chr->opaque;
-
-    s->max_size = qemu_chr_can_read(chr);
-    return s->max_size;
-}
-
-static void fd_chr_read(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    FDCharDriver *s = chr->opaque;
-    int size, len;
-    uint8_t buf[READ_BUF_LEN];
-
-    len = sizeof(buf);
-    if (len > s->max_size)
-        len = s->max_size;
-    if (len == 0)
-        return;
-    size = read(s->fd_in, buf, len);
-    if (size == 0) {
-        /* FD has been closed. Remove it from the active list.  */
-        qemu_set_fd_handler2(s->fd_in, NULL, NULL, NULL, NULL);
-        qemu_chr_event(chr, CHR_EVENT_CLOSED);
-        return;
-    }
-    if (size > 0) {
-        qemu_chr_read(chr, buf, size);
-    }
-}
-
-static void fd_chr_update_read_handler(CharDriverState *chr)
-{
-    FDCharDriver *s = chr->opaque;
-
-    if (s->fd_in >= 0) {
-        if (display_type == DT_NOGRAPHIC && s->fd_in == 0) {
-        } else {
-            qemu_set_fd_handler2(s->fd_in, fd_chr_read_poll,
-                                 fd_chr_read, NULL, chr);
-        }
-    }
-}
-
-static void fd_chr_close(struct CharDriverState *chr)
-{
-    FDCharDriver *s = chr->opaque;
-
-    if (s->fd_in >= 0) {
-        if (display_type == DT_NOGRAPHIC && s->fd_in == 0) {
-        } else {
-            qemu_set_fd_handler2(s->fd_in, NULL, NULL, NULL, NULL);
-        }
-    }
-
-    qemu_free(s);
-    qemu_chr_event(chr, CHR_EVENT_CLOSED);
-}
-
-/* open a character device to a unix fd */
-static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out)
-{
-    CharDriverState *chr;
-    FDCharDriver *s;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(FDCharDriver));
-    s->fd_in = fd_in;
-    s->fd_out = fd_out;
-    chr->opaque = s;
-    chr->chr_write = fd_chr_write;
-    chr->chr_update_read_handler = fd_chr_update_read_handler;
-    chr->chr_close = fd_chr_close;
-
-    qemu_chr_generic_open(chr);
-
-    return chr;
-}
-
-static CharDriverState *qemu_chr_open_file_out(const char *file_out)
-{
-    int fd_out;
-
-    TFR(fd_out = open(file_out, O_WRONLY | O_TRUNC | O_CREAT | O_BINARY, 0666));
-    if (fd_out < 0)
-        return NULL;
-    return qemu_chr_open_fd(-1, fd_out);
-}
-
-static CharDriverState *qemu_chr_open_pipe(const char *filename)
-{
-    int fd_in, fd_out;
-    char filename_in[256], filename_out[256];
-
-    snprintf(filename_in, 256, "%s.in", filename);
-    snprintf(filename_out, 256, "%s.out", filename);
-    TFR(fd_in = open(filename_in, O_RDWR | O_BINARY));
-    TFR(fd_out = open(filename_out, O_RDWR | O_BINARY));
-    if (fd_in < 0 || fd_out < 0) {
-	if (fd_in >= 0)
-	    close(fd_in);
-	if (fd_out >= 0)
-	    close(fd_out);
-        TFR(fd_in = fd_out = open(filename, O_RDWR | O_BINARY));
-        if (fd_in < 0)
-            return NULL;
-    }
-    return qemu_chr_open_fd(fd_in, fd_out);
-}
-
-static CharDriverState *qemu_chr_open_fdpair(const char *fd_pair)
-{
-    int fd_in, fd_out;
-    char *endptr;
-
-    /* fd_pair should contain two decimal fd values, separated by
-     * a colon. */
-    endptr = NULL;
-    fd_in = strtol(fd_pair, &endptr, 10);
-    if (endptr == NULL || endptr == fd_pair || *endptr != ':')
-        return NULL;
-    endptr++;   // skip colon
-    fd_pair = endptr;
-    endptr = NULL;
-    fd_out = strtol(fd_pair, &endptr, 10);
-    if (endptr == NULL || endptr == fd_pair || *endptr != '\0')
-        return NULL;
-
-    return qemu_chr_open_fd(fd_in, fd_out);
-}
-
-/* for STDIO, we handle the case where several clients use it
-   (nographic mode) */
-
-#define TERM_FIFO_MAX_SIZE 1
-
-static uint8_t term_fifo[TERM_FIFO_MAX_SIZE];
-static int term_fifo_size;
-
-static int stdio_read_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-
-    /* try to flush the queue if needed */
-    if (term_fifo_size != 0 && qemu_chr_can_read(chr) > 0) {
-        qemu_chr_read(chr, term_fifo, 1);
-        term_fifo_size = 0;
-    }
-    /* see if we can absorb more chars */
-    if (term_fifo_size == 0)
-        return 1;
-    else
-        return 0;
-}
-
-static void stdio_read(void *opaque)
-{
-    int size;
-    uint8_t buf[1];
-    CharDriverState *chr = opaque;
-
-    size = read(0, buf, 1);
-    if (size == 0) {
-        /* stdin has been closed. Remove it from the active list.  */
-        qemu_set_fd_handler2(0, NULL, NULL, NULL, NULL);
-        qemu_chr_event(chr, CHR_EVENT_CLOSED);
-        return;
-    }
-    if (size > 0) {
-        if (qemu_chr_can_read(chr) > 0) {
-            qemu_chr_read(chr, buf, 1);
-        } else if (term_fifo_size == 0) {
-            term_fifo[term_fifo_size++] = buf[0];
-        }
-    }
-}
-
-/* init terminal so that we can grab keys */
-static struct termios oldtty;
-static int old_fd0_flags;
-static int term_atexit_done;
-
-static void term_exit(void)
-{
-    tcsetattr (0, TCSANOW, &oldtty);
-    fcntl(0, F_SETFL, old_fd0_flags);
-}
-
-static void term_init(void)
-{
-    struct termios tty;
-
-    tcgetattr (0, &tty);
-    oldtty = tty;
-    old_fd0_flags = fcntl(0, F_GETFL);
-
-    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
-                          |INLCR|IGNCR|ICRNL|IXON);
-    tty.c_oflag |= OPOST;
-    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
-    /* if graphical mode, we allow Ctrl-C handling */
-    if (display_type == DT_NOGRAPHIC)
-        tty.c_lflag &= ~ISIG;
-    tty.c_cflag &= ~(CSIZE|PARENB);
-    tty.c_cflag |= CS8;
-    tty.c_cc[VMIN] = 1;
-    tty.c_cc[VTIME] = 0;
-
-    tcsetattr (0, TCSANOW, &tty);
-
-    if (!term_atexit_done++)
-        atexit(term_exit);
-
-    fcntl(0, F_SETFL, O_NONBLOCK);
-}
-
-static void qemu_chr_close_stdio(struct CharDriverState *chr)
-{
-    term_exit();
-    stdio_nb_clients--;
-    qemu_set_fd_handler2(0, NULL, NULL, NULL, NULL);
-    fd_chr_close(chr);
-}
-
-static CharDriverState *qemu_chr_open_stdio(void)
-{
-    CharDriverState *chr;
-
-    if (stdio_nb_clients >= STDIO_MAX_CLIENTS)
-        return NULL;
-    chr = qemu_chr_open_fd(0, 1);
-    chr->chr_close = qemu_chr_close_stdio;
-    qemu_set_fd_handler2(0, stdio_read_poll, stdio_read, NULL, chr);
-    stdio_nb_clients++;
-    term_init();
-
-    return chr;
-}
-
-#ifdef __sun__
-/* Once Solaris has openpty(), this is going to be removed. */
-static int openpty(int *amaster, int *aslave, char *name,
-                   struct termios *termp, struct winsize *winp)
-{
-        const char *slave;
-        int mfd = -1, sfd = -1;
-
-        *amaster = *aslave = -1;
-
-        mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY);
-        if (mfd < 0)
-                goto err;
-
-        if (grantpt(mfd) == -1 || unlockpt(mfd) == -1)
-                goto err;
-
-        if ((slave = ptsname(mfd)) == NULL)
-                goto err;
-
-        if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1)
-                goto err;
-
-        if (ioctl(sfd, I_PUSH, "ptem") == -1 ||
-            (termp != NULL && tcgetattr(sfd, termp) < 0))
-                goto err;
-
-        if (amaster)
-                *amaster = mfd;
-        if (aslave)
-                *aslave = sfd;
-        if (winp)
-                ioctl(sfd, TIOCSWINSZ, winp);
-
-        return 0;
-
-err:
-        if (sfd != -1)
-                close(sfd);
-        close(mfd);
-        return -1;
-}
-
-static void cfmakeraw (struct termios *termios_p)
-{
-        termios_p->c_iflag &=
-                ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON);
-        termios_p->c_oflag &= ~OPOST;
-        termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN);
-        termios_p->c_cflag &= ~(CSIZE|PARENB);
-        termios_p->c_cflag |= CS8;
-
-        termios_p->c_cc[VMIN] = 0;
-        termios_p->c_cc[VTIME] = 0;
-}
-#endif
-
-#ifndef _WIN32
-
-typedef struct {
-    int fd;
-    int connected;
-    int polling;
-    int read_bytes;
-    QEMUTimer *timer;
-} PtyCharDriver;
-
-static void pty_chr_update_read_handler(CharDriverState *chr);
-static void pty_chr_state(CharDriverState *chr, int connected);
-
-static int pty_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    PtyCharDriver *s = chr->opaque;
-
-    if (!s->connected) {
-        /* guest sends data, check for (re-)connect */
-        pty_chr_update_read_handler(chr);
-        return 0;
-    }
-    return send_all(s->fd, buf, len);
-}
-
-static int pty_chr_read_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    PtyCharDriver *s = chr->opaque;
-
-    s->read_bytes = qemu_chr_can_read(chr);
-    return s->read_bytes;
-}
-
-static void pty_chr_read(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    PtyCharDriver *s = chr->opaque;
-    int size, len;
-    uint8_t buf[READ_BUF_LEN];
-
-    len = sizeof(buf);
-    if (len > s->read_bytes)
-        len = s->read_bytes;
-    if (len == 0)
-        return;
-    size = read(s->fd, buf, len);
-    if ((size == -1 && errno == EIO) ||
-        (size == 0)) {
-        pty_chr_state(chr, 0);
-        return;
-    }
-    if (size > 0) {
-        pty_chr_state(chr, 1);
-        qemu_chr_read(chr, buf, size);
-    }
-}
-
-static void pty_chr_update_read_handler(CharDriverState *chr)
-{
-    PtyCharDriver *s = chr->opaque;
-
-    qemu_set_fd_handler2(s->fd, pty_chr_read_poll,
-                         pty_chr_read, NULL, chr);
-    s->polling = 1;
-    /*
-     * Short timeout here: just need wait long enougth that qemu makes
-     * it through the poll loop once.  When reconnected we want a
-     * short timeout so we notice it almost instantly.  Otherwise
-     * read() gives us -EIO instantly, making pty_chr_state() reset the
-     * timeout to the normal (much longer) poll interval before the
-     * timer triggers.
-     */
-    qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 10);
-}
-
-static void pty_chr_state(CharDriverState *chr, int connected)
-{
-    PtyCharDriver *s = chr->opaque;
-
-    if (!connected) {
-        qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-        s->connected = 0;
-        s->polling = 0;
-        /* (re-)connect poll interval for idle guests: once per second.
-         * We check more frequently in case the guests sends data to
-         * the virtual device linked to our pty. */
-        qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 1000);
-    } else {
-        if (!s->connected)
-            qemu_chr_generic_open(chr);
-        s->connected = 1;
-    }
-}
-
-static void pty_chr_timer(void *opaque)
-{
-    struct CharDriverState *chr = opaque;
-    PtyCharDriver *s = chr->opaque;
-
-    if (s->connected)
-        return;
-    if (s->polling) {
-        /* If we arrive here without polling being cleared due
-         * read returning -EIO, then we are (re-)connected */
-        pty_chr_state(chr, 1);
-        return;
-    }
-
-    /* Next poll ... */
-    pty_chr_update_read_handler(chr);
-}
-
-static void pty_chr_close(struct CharDriverState *chr)
-{
-    PtyCharDriver *s = chr->opaque;
-
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-    close(s->fd);
-    qemu_del_timer(s->timer);
-    qemu_free_timer(s->timer);
-    qemu_free(s);
-    qemu_chr_event(chr, CHR_EVENT_CLOSED);
-}
-
-static CharDriverState *qemu_chr_open_pty(void)
-{
-    CharDriverState *chr;
-    PtyCharDriver *s;
-    struct termios tty;
-    int slave_fd, len;
-#if defined(__OpenBSD__) || defined(__DragonFly__)
-    char pty_name[PATH_MAX];
-#define q_ptsname(x) pty_name
-#else
-    char *pty_name = NULL;
-#define q_ptsname(x) ptsname(x)
-    extern char* ptsname(int);
-#endif
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(PtyCharDriver));
-
-    if (openpty(&s->fd, &slave_fd, pty_name, NULL, NULL) < 0) {
-        return NULL;
-    }
-
-    /* Set raw attributes on the pty. */
-    tcgetattr(slave_fd, &tty);
-    cfmakeraw(&tty);
-    tcsetattr(slave_fd, TCSAFLUSH, &tty);
-    close(slave_fd);
-
-    len = strlen(q_ptsname(s->fd)) + 5;
-    chr->filename = qemu_malloc(len);
-    snprintf(chr->filename, len, "pty:%s", q_ptsname(s->fd));
-    fprintf(stderr, "char device redirected to %s\n", q_ptsname(s->fd));
-
-    chr->opaque = s;
-    chr->chr_write = pty_chr_write;
-    chr->chr_update_read_handler = pty_chr_update_read_handler;
-    chr->chr_close = pty_chr_close;
-
-    s->timer = qemu_new_timer(rt_clock, pty_chr_timer, chr);
-
-    return chr;
-}
-
-static void tty_serial_init(int fd, int speed,
-                            int parity, int data_bits, int stop_bits)
-{
-    struct termios tty;
-    speed_t spd;
-
-#if 0
-    printf("tty_serial_init: speed=%d parity=%c data=%d stop=%d\n",
-           speed, parity, data_bits, stop_bits);
-#endif
-    tcgetattr (fd, &tty);
-    if (!term_atexit_done) {
-        oldtty = tty;
-    }
-
-#define check_speed(val) if (speed <= val) { spd = B##val; break; }
-    speed = speed * 10 / 11;
-    do {
-        check_speed(50);
-        check_speed(75);
-        check_speed(110);
-        check_speed(134);
-        check_speed(150);
-        check_speed(200);
-        check_speed(300);
-        check_speed(600);
-        check_speed(1200);
-        check_speed(1800);
-        check_speed(2400);
-        check_speed(4800);
-        check_speed(9600);
-        check_speed(19200);
-        check_speed(38400);
-        /* Non-Posix values follow. They may be unsupported on some systems. */
-        check_speed(57600);
-        check_speed(115200);
-#ifdef B230400
-        check_speed(230400);
-#endif
-#ifdef B460800
-        check_speed(460800);
-#endif
-#ifdef B500000
-        check_speed(500000);
-#endif
-#ifdef B576000
-        check_speed(576000);
-#endif
-#ifdef B921600
-        check_speed(921600);
-#endif
-#ifdef B1000000
-        check_speed(1000000);
-#endif
-#ifdef B1152000
-        check_speed(1152000);
-#endif
-#ifdef B1500000
-        check_speed(1500000);
-#endif
-#ifdef B2000000
-        check_speed(2000000);
-#endif
-#ifdef B2500000
-        check_speed(2500000);
-#endif
-#ifdef B3000000
-        check_speed(3000000);
-#endif
-#ifdef B3500000
-        check_speed(3500000);
-#endif
-#ifdef B4000000
-        check_speed(4000000);
-#endif
-        spd = B115200;
-    } while (0);
-
-    cfsetispeed(&tty, spd);
-    cfsetospeed(&tty, spd);
-
-    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
-                          |INLCR|IGNCR|ICRNL|IXON);
-    tty.c_oflag |= OPOST;
-    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN|ISIG);
-    tty.c_cflag &= ~(CSIZE|PARENB|PARODD|CRTSCTS|CSTOPB);
-    switch(data_bits) {
-    default:
-    case 8:
-        tty.c_cflag |= CS8;
-        break;
-    case 7:
-        tty.c_cflag |= CS7;
-        break;
-    case 6:
-        tty.c_cflag |= CS6;
-        break;
-    case 5:
-        tty.c_cflag |= CS5;
-        break;
-    }
-    switch(parity) {
-    default:
-    case 'N':
-        break;
-    case 'E':
-        tty.c_cflag |= PARENB;
-        break;
-    case 'O':
-        tty.c_cflag |= PARENB | PARODD;
-        break;
-    }
-    if (stop_bits == 2)
-        tty.c_cflag |= CSTOPB;
-
-    tcsetattr (fd, TCSANOW, &tty);
-}
-
-static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
-{
-    FDCharDriver *s = chr->opaque;
-
-    switch(cmd) {
-    case CHR_IOCTL_SERIAL_SET_PARAMS:
-        {
-            QEMUSerialSetParams *ssp = arg;
-            tty_serial_init(s->fd_in, ssp->speed, ssp->parity,
-                            ssp->data_bits, ssp->stop_bits);
-        }
-        break;
-    case CHR_IOCTL_SERIAL_SET_BREAK:
-        {
-            int enable = *(int *)arg;
-            if (enable)
-                tcsendbreak(s->fd_in, 1);
-        }
-        break;
-    case CHR_IOCTL_SERIAL_GET_TIOCM:
-        {
-            int sarg = 0;
-            int *targ = (int *)arg;
-            ioctl(s->fd_in, TIOCMGET, &sarg);
-            *targ = 0;
-            if (sarg & TIOCM_CTS)
-                *targ |= CHR_TIOCM_CTS;
-            if (sarg & TIOCM_CAR)
-                *targ |= CHR_TIOCM_CAR;
-            if (sarg & TIOCM_DSR)
-                *targ |= CHR_TIOCM_DSR;
-            if (sarg & TIOCM_RI)
-                *targ |= CHR_TIOCM_RI;
-            if (sarg & TIOCM_DTR)
-                *targ |= CHR_TIOCM_DTR;
-            if (sarg & TIOCM_RTS)
-                *targ |= CHR_TIOCM_RTS;
-        }
-        break;
-    case CHR_IOCTL_SERIAL_SET_TIOCM:
-        {
-            int sarg = *(int *)arg;
-            int targ = 0;
-            ioctl(s->fd_in, TIOCMGET, &targ);
-            targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR
-                     | CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS);
-            if (sarg & CHR_TIOCM_CTS)
-                targ |= TIOCM_CTS;
-            if (sarg & CHR_TIOCM_CAR)
-                targ |= TIOCM_CAR;
-            if (sarg & CHR_TIOCM_DSR)
-                targ |= TIOCM_DSR;
-            if (sarg & CHR_TIOCM_RI)
-                targ |= TIOCM_RI;
-            if (sarg & CHR_TIOCM_DTR)
-                targ |= TIOCM_DTR;
-            if (sarg & CHR_TIOCM_RTS)
-                targ |= TIOCM_RTS;
-            ioctl(s->fd_in, TIOCMSET, &targ);
-        }
-        break;
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static CharDriverState *qemu_chr_open_tty(const char *filename)
-{
-    CharDriverState *chr;
-    int fd;
-
-    TFR(fd = open(filename, O_RDWR | O_NONBLOCK));
-    tty_serial_init(fd, 115200, 'N', 8, 1);
-    chr = qemu_chr_open_fd(fd, fd);
-    if (!chr) {
-        close(fd);
-        return NULL;
-    }
-    chr->chr_ioctl = tty_serial_ioctl;
-    qemu_chr_reset(chr);
-    return chr;
-}
-#else  /* _WIN32 */
-static CharDriverState *qemu_chr_open_pty(void)
-{
-    return NULL;
-}
-#endif /* _WIN32 */
-
-#if defined(__linux__)
-typedef struct {
-    int fd;
-    int mode;
-} ParallelCharDriver;
-
-static int pp_hw_mode(ParallelCharDriver *s, uint16_t mode)
-{
-    if (s->mode != mode) {
-	int m = mode;
-        if (ioctl(s->fd, PPSETMODE, &m) < 0)
-            return 0;
-	s->mode = mode;
-    }
-    return 1;
-}
-
-static int pp_ioctl(CharDriverState *chr, int cmd, void *arg)
-{
-    ParallelCharDriver *drv = chr->opaque;
-    int fd = drv->fd;
-    uint8_t b;
-
-    switch(cmd) {
-    case CHR_IOCTL_PP_READ_DATA:
-        if (ioctl(fd, PPRDATA, &b) < 0)
-            return -ENOTSUP;
-        *(uint8_t *)arg = b;
-        break;
-    case CHR_IOCTL_PP_WRITE_DATA:
-        b = *(uint8_t *)arg;
-        if (ioctl(fd, PPWDATA, &b) < 0)
-            return -ENOTSUP;
-        break;
-    case CHR_IOCTL_PP_READ_CONTROL:
-        if (ioctl(fd, PPRCONTROL, &b) < 0)
-            return -ENOTSUP;
-	/* Linux gives only the lowest bits, and no way to know data
-	   direction! For better compatibility set the fixed upper
-	   bits. */
-        *(uint8_t *)arg = b | 0xc0;
-        break;
-    case CHR_IOCTL_PP_WRITE_CONTROL:
-        b = *(uint8_t *)arg;
-        if (ioctl(fd, PPWCONTROL, &b) < 0)
-            return -ENOTSUP;
-        break;
-    case CHR_IOCTL_PP_READ_STATUS:
-        if (ioctl(fd, PPRSTATUS, &b) < 0)
-            return -ENOTSUP;
-        *(uint8_t *)arg = b;
-        break;
-    case CHR_IOCTL_PP_DATA_DIR:
-        if (ioctl(fd, PPDATADIR, (int *)arg) < 0)
-            return -ENOTSUP;
-        break;
-    case CHR_IOCTL_PP_EPP_READ_ADDR:
-	if (pp_hw_mode(drv, IEEE1284_MODE_EPP|IEEE1284_ADDR)) {
-	    struct ParallelIOArg *parg = arg;
-	    int n = read(fd, parg->buffer, parg->count);
-	    if (n != parg->count) {
-		return -EIO;
-	    }
-	}
-        break;
-    case CHR_IOCTL_PP_EPP_READ:
-	if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
-	    struct ParallelIOArg *parg = arg;
-	    int n = read(fd, parg->buffer, parg->count);
-	    if (n != parg->count) {
-		return -EIO;
-	    }
-	}
-        break;
-    case CHR_IOCTL_PP_EPP_WRITE_ADDR:
-	if (pp_hw_mode(drv, IEEE1284_MODE_EPP|IEEE1284_ADDR)) {
-	    struct ParallelIOArg *parg = arg;
-	    int n = write(fd, parg->buffer, parg->count);
-	    if (n != parg->count) {
-		return -EIO;
-	    }
-	}
-        break;
-    case CHR_IOCTL_PP_EPP_WRITE:
-	if (pp_hw_mode(drv, IEEE1284_MODE_EPP)) {
-	    struct ParallelIOArg *parg = arg;
-	    int n = write(fd, parg->buffer, parg->count);
-	    if (n != parg->count) {
-		return -EIO;
-	    }
-	}
-        break;
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static void pp_close(CharDriverState *chr)
-{
-    ParallelCharDriver *drv = chr->opaque;
-    int fd = drv->fd;
-
-    pp_hw_mode(drv, IEEE1284_MODE_COMPAT);
-    ioctl(fd, PPRELEASE);
-    close(fd);
-    qemu_free(drv);
-}
-
-static CharDriverState *qemu_chr_open_pp(const char *filename)
-{
-    CharDriverState *chr;
-    ParallelCharDriver *drv;
-    int fd;
-
-    TFR(fd = open(filename, O_RDWR));
-    if (fd < 0)
-        return NULL;
-
-    if (ioctl(fd, PPCLAIM) < 0) {
-        close(fd);
-        return NULL;
-    }
-
-    drv = qemu_mallocz(sizeof(ParallelCharDriver));
-    drv->fd = fd;
-    drv->mode = IEEE1284_MODE_COMPAT;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    chr->chr_write = null_chr_write;
-    chr->chr_ioctl = pp_ioctl;
-    chr->chr_close = pp_close;
-    chr->opaque = drv;
-
-    qemu_chr_reset(chr);
-
-    return chr;
-}
-#endif /* __linux__ */
-
-#if defined(__FreeBSD__) || defined(__DragonFly__)
-static int pp_ioctl(CharDriverState *chr, int cmd, void *arg)
-{
-    int fd = (int)chr->opaque;
-    uint8_t b;
-
-    switch(cmd) {
-    case CHR_IOCTL_PP_READ_DATA:
-        if (ioctl(fd, PPIGDATA, &b) < 0)
-            return -ENOTSUP;
-        *(uint8_t *)arg = b;
-        break;
-    case CHR_IOCTL_PP_WRITE_DATA:
-        b = *(uint8_t *)arg;
-        if (ioctl(fd, PPISDATA, &b) < 0)
-            return -ENOTSUP;
-        break;
-    case CHR_IOCTL_PP_READ_CONTROL:
-        if (ioctl(fd, PPIGCTRL, &b) < 0)
-            return -ENOTSUP;
-        *(uint8_t *)arg = b;
-        break;
-    case CHR_IOCTL_PP_WRITE_CONTROL:
-        b = *(uint8_t *)arg;
-        if (ioctl(fd, PPISCTRL, &b) < 0)
-            return -ENOTSUP;
-        break;
-    case CHR_IOCTL_PP_READ_STATUS:
-        if (ioctl(fd, PPIGSTATUS, &b) < 0)
-            return -ENOTSUP;
-        *(uint8_t *)arg = b;
-        break;
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static CharDriverState *qemu_chr_open_pp(const char *filename)
-{
-    CharDriverState *chr;
-    int fd;
-
-    fd = open(filename, O_RDWR);
-    if (fd < 0)
-        return NULL;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    chr->opaque = (void *)fd;
-    chr->chr_write = null_chr_write;
-    chr->chr_ioctl = pp_ioctl;
-    return chr;
-}
-#endif
-
-#else /* _WIN32 */
-
-typedef struct {
-    int max_size;
-    HANDLE hcom, hrecv, hsend;
-    OVERLAPPED orecv, osend;
-    BOOL fpipe;
-    DWORD len;
-} WinCharState;
-
-#define NSENDBUF 2048
-#define NRECVBUF 2048
-#define MAXCONNECT 1
-#define NTIMEOUT 5000
-
-static int win_chr_poll(void *opaque);
-static int win_chr_pipe_poll(void *opaque);
-
-static void win_chr_close(CharDriverState *chr)
-{
-    WinCharState *s = chr->opaque;
-
-    if (s->hsend) {
-        CloseHandle(s->hsend);
-        s->hsend = NULL;
-    }
-    if (s->hrecv) {
-        CloseHandle(s->hrecv);
-        s->hrecv = NULL;
-    }
-    if (s->hcom) {
-        CloseHandle(s->hcom);
-        s->hcom = NULL;
-    }
-    if (s->fpipe)
-        qemu_del_polling_cb(win_chr_pipe_poll, chr);
-    else
-        qemu_del_polling_cb(win_chr_poll, chr);
-}
-
-static int win_chr_init(CharDriverState *chr, const char *filename)
-{
-    WinCharState *s = chr->opaque;
-    COMMCONFIG comcfg;
-    COMMTIMEOUTS cto = { 0, 0, 0, 0, 0};
-    COMSTAT comstat;
-    DWORD size;
-    DWORD err;
-
-    s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
-    if (!s->hsend) {
-        fprintf(stderr, "Failed CreateEvent\n");
-        goto fail;
-    }
-    s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
-    if (!s->hrecv) {
-        fprintf(stderr, "Failed CreateEvent\n");
-        goto fail;
-    }
-
-    s->hcom = CreateFile(filename, GENERIC_READ|GENERIC_WRITE, 0, NULL,
-                      OPEN_EXISTING, FILE_FLAG_OVERLAPPED, 0);
-    if (s->hcom == INVALID_HANDLE_VALUE) {
-        fprintf(stderr, "Failed CreateFile (%lu)\n", GetLastError());
-        s->hcom = NULL;
-        goto fail;
-    }
-
-    if (!SetupComm(s->hcom, NRECVBUF, NSENDBUF)) {
-        fprintf(stderr, "Failed SetupComm\n");
-        goto fail;
-    }
-
-    ZeroMemory(&comcfg, sizeof(COMMCONFIG));
-    size = sizeof(COMMCONFIG);
-    GetDefaultCommConfig(filename, &comcfg, &size);
-    comcfg.dcb.DCBlength = sizeof(DCB);
-    CommConfigDialog(filename, NULL, &comcfg);
-
-    if (!SetCommState(s->hcom, &comcfg.dcb)) {
-        fprintf(stderr, "Failed SetCommState\n");
-        goto fail;
-    }
-
-    if (!SetCommMask(s->hcom, EV_ERR)) {
-        fprintf(stderr, "Failed SetCommMask\n");
-        goto fail;
-    }
-
-    cto.ReadIntervalTimeout = MAXDWORD;
-    if (!SetCommTimeouts(s->hcom, &cto)) {
-        fprintf(stderr, "Failed SetCommTimeouts\n");
-        goto fail;
-    }
-
-    if (!ClearCommError(s->hcom, &err, &comstat)) {
-        fprintf(stderr, "Failed ClearCommError\n");
-        goto fail;
-    }
-    qemu_add_polling_cb(win_chr_poll, chr);
-    return 0;
-
- fail:
-    win_chr_close(chr);
-    return -1;
-}
-
-static int win_chr_write(CharDriverState *chr, const uint8_t *buf, int len1)
-{
-    WinCharState *s = chr->opaque;
-    DWORD len, ret, size, err;
-
-    len = len1;
-    ZeroMemory(&s->osend, sizeof(s->osend));
-    s->osend.hEvent = s->hsend;
-    while (len > 0) {
-        if (s->hsend)
-            ret = WriteFile(s->hcom, buf, len, &size, &s->osend);
-        else
-            ret = WriteFile(s->hcom, buf, len, &size, NULL);
-        if (!ret) {
-            err = GetLastError();
-            if (err == ERROR_IO_PENDING) {
-                ret = GetOverlappedResult(s->hcom, &s->osend, &size, TRUE);
-                if (ret) {
-                    buf += size;
-                    len -= size;
-                } else {
-                    break;
-                }
-            } else {
-                break;
-            }
-        } else {
-            buf += size;
-            len -= size;
-        }
-    }
-    return len1 - len;
-}
-
-static int win_chr_read_poll(CharDriverState *chr)
-{
-    WinCharState *s = chr->opaque;
-
-    s->max_size = qemu_chr_can_read(chr);
-    return s->max_size;
-}
-
-static void win_chr_readfile(CharDriverState *chr)
-{
-    WinCharState *s = chr->opaque;
-    int ret, err;
-    uint8_t buf[1024];
-    DWORD size;
-
-    ZeroMemory(&s->orecv, sizeof(s->orecv));
-    s->orecv.hEvent = s->hrecv;
-    ret = ReadFile(s->hcom, buf, s->len, &size, &s->orecv);
-    if (!ret) {
-        err = GetLastError();
-        if (err == ERROR_IO_PENDING) {
-            ret = GetOverlappedResult(s->hcom, &s->orecv, &size, TRUE);
-        }
-    }
-
-    if (size > 0) {
-        qemu_chr_read(chr, buf, size);
-    }
-}
-
-static void win_chr_read(CharDriverState *chr)
-{
-    WinCharState *s = chr->opaque;
-
-    if (s->len > s->max_size)
-        s->len = s->max_size;
-    if (s->len == 0)
-        return;
-
-    win_chr_readfile(chr);
-}
-
-static int win_chr_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    WinCharState *s = chr->opaque;
-    COMSTAT status;
-    DWORD comerr;
-
-    ClearCommError(s->hcom, &comerr, &status);
-    if (status.cbInQue > 0) {
-        s->len = status.cbInQue;
-        win_chr_read_poll(chr);
-        win_chr_read(chr);
-        return 1;
-    }
-    return 0;
-}
-
-static CharDriverState *qemu_chr_open_win(const char *filename)
-{
-    CharDriverState *chr;
-    WinCharState *s;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(WinCharState));
-    chr->opaque = s;
-    chr->chr_write = win_chr_write;
-    chr->chr_close = win_chr_close;
-
-    if (win_chr_init(chr, filename) < 0) {
-        free(s);
-        free(chr);
-        return NULL;
-    }
-    qemu_chr_reset(chr);
-    return chr;
-}
-
-static int win_chr_pipe_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    WinCharState *s = chr->opaque;
-    DWORD size;
-
-    PeekNamedPipe(s->hcom, NULL, 0, NULL, &size, NULL);
-    if (size > 0) {
-        s->len = size;
-        win_chr_read_poll(chr);
-        win_chr_read(chr);
-        return 1;
-    }
-    return 0;
-}
-
-static int win_chr_pipe_init(CharDriverState *chr, const char *filename)
-{
-    WinCharState *s = chr->opaque;
-    OVERLAPPED ov;
-    int ret;
-    DWORD size;
-    char openname[256];
-
-    s->fpipe = TRUE;
-
-    s->hsend = CreateEvent(NULL, TRUE, FALSE, NULL);
-    if (!s->hsend) {
-        fprintf(stderr, "Failed CreateEvent\n");
-        goto fail;
-    }
-    s->hrecv = CreateEvent(NULL, TRUE, FALSE, NULL);
-    if (!s->hrecv) {
-        fprintf(stderr, "Failed CreateEvent\n");
-        goto fail;
-    }
-
-    snprintf(openname, sizeof(openname), "\\\\.\\pipe\\%s", filename);
-    s->hcom = CreateNamedPipe(openname, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
-                              PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
-                              PIPE_WAIT,
-                              MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
-    if (s->hcom == INVALID_HANDLE_VALUE) {
-        fprintf(stderr, "Failed CreateNamedPipe (%lu)\n", GetLastError());
-        s->hcom = NULL;
-        goto fail;
-    }
-
-    ZeroMemory(&ov, sizeof(ov));
-    ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
-    ret = ConnectNamedPipe(s->hcom, &ov);
-    if (ret) {
-        fprintf(stderr, "Failed ConnectNamedPipe\n");
-        goto fail;
-    }
-
-    ret = GetOverlappedResult(s->hcom, &ov, &size, TRUE);
-    if (!ret) {
-        fprintf(stderr, "Failed GetOverlappedResult\n");
-        if (ov.hEvent) {
-            CloseHandle(ov.hEvent);
-            ov.hEvent = NULL;
-        }
-        goto fail;
-    }
-
-    if (ov.hEvent) {
-        CloseHandle(ov.hEvent);
-        ov.hEvent = NULL;
-    }
-    qemu_add_polling_cb(win_chr_pipe_poll, chr);
-    return 0;
-
- fail:
-    win_chr_close(chr);
-    return -1;
-}
-
-
-static CharDriverState *qemu_chr_open_win_pipe(const char *filename)
-{
-    CharDriverState *chr;
-    WinCharState *s;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(WinCharState));
-    chr->opaque = s;
-    chr->chr_write = win_chr_write;
-    chr->chr_close = win_chr_close;
-
-    if (win_chr_pipe_init(chr, filename) < 0) {
-        free(s);
-        free(chr);
-        return NULL;
-    }
-    qemu_chr_reset(chr);
-    return chr;
-}
-
-static CharDriverState *qemu_chr_open_win_file(HANDLE fd_out)
-{
-    CharDriverState *chr;
-    WinCharState *s;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(WinCharState));
-    s->hcom = fd_out;
-    chr->opaque = s;
-    chr->chr_write = win_chr_write;
-    qemu_chr_reset(chr);
-    return chr;
-}
-
-static CharDriverState *qemu_chr_open_win_con(const char *filename)
-{
-    return qemu_chr_open_win_file(GetStdHandle(STD_OUTPUT_HANDLE));
-}
-
-static CharDriverState *qemu_chr_open_win_file_out(const char *file_out)
-{
-    HANDLE fd_out;
-
-    fd_out = CreateFile(file_out, GENERIC_WRITE, FILE_SHARE_READ, NULL,
-                        OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
-    if (fd_out == INVALID_HANDLE_VALUE)
-        return NULL;
-
-    return qemu_chr_open_win_file(fd_out);
-}
-#endif /* !_WIN32 */
-
-/***********************************************************/
-/* UDP Net console */
-
-typedef struct {
-    int fd;
-    SockAddress  daddr;
-    uint8_t buf[1024];
-    int bufcnt;
-    int bufptr;
-    int max_size;
-} NetCharDriver;
-
-static int udp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    NetCharDriver *s = chr->opaque;
-
-    return socket_sendto(s->fd, (const void *)buf, len, &s->daddr);
-}
-
-static int udp_chr_read_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    NetCharDriver *s = chr->opaque;
-
-    s->max_size = qemu_chr_can_read(chr);
-
-    /* If there were any stray characters in the queue process them
-     * first
-     */
-    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
-        qemu_chr_read(chr, &s->buf[s->bufptr], 1);
-        s->bufptr++;
-        s->max_size = qemu_chr_can_read(chr);
-    }
-    return s->max_size;
-}
-
-static void udp_chr_read(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    NetCharDriver *s = chr->opaque;
-
-    if (s->max_size == 0)
-        return;
-    s->bufcnt = socket_recv(s->fd, (void *)s->buf, sizeof(s->buf));
-    s->bufptr = s->bufcnt;
-    if (s->bufcnt <= 0)
-        return;
-
-    s->bufptr = 0;
-    while (s->max_size > 0 && s->bufptr < s->bufcnt) {
-        qemu_chr_read(chr, &s->buf[s->bufptr], 1);
-        s->bufptr++;
-        s->max_size = qemu_chr_can_read(chr);
-    }
-}
-
-static void udp_chr_update_read_handler(CharDriverState *chr)
-{
-    NetCharDriver *s = chr->opaque;
-
-    if (s->fd >= 0) {
-        qemu_set_fd_handler2(s->fd, udp_chr_read_poll,
-                             udp_chr_read, NULL, chr);
-    }
-}
-
-static void udp_chr_close(CharDriverState *chr)
-{
-    NetCharDriver *s = chr->opaque;
-    if (s->fd >= 0) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-        socket_close(s->fd);
-    }
-    qemu_free(s);
-}
-
-static CharDriverState *qemu_chr_open_udp(const char *def)
-{
-    CharDriverState *chr = NULL;
-    NetCharDriver *s = NULL;
-    int fd = -1;
-    SockAddress  saddr;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s   = qemu_mallocz(sizeof(NetCharDriver));
-
-    fd = socket_create_inet(SOCKET_DGRAM);
-    if (fd < 0) {
-        perror("socket(PF_INET, SOCK_DGRAM)");
-        goto return_err;
-    }
-
-    if (parse_host_src_port(&s->daddr, &saddr, def) < 0) {
-        printf("Could not parse: %s\n", def);
-        goto return_err;
-    }
-
-    if (socket_bind(fd, &saddr) < 0) {
-        perror("bind");
-        goto return_err;
-    }
-
-    s->fd = fd;
-    s->bufcnt = 0;
-    s->bufptr = 0;
-    chr->opaque = s;
-    chr->chr_write = udp_chr_write;
-    chr->chr_update_read_handler = udp_chr_update_read_handler;
-    chr->chr_close = udp_chr_close;
-    return chr;
-
-return_err:
-    if (chr)
-        free(chr);
-    if (s)
-        free(s);
-    if (fd >= 0)
-        closesocket(fd);
-    return NULL;
-}
-
-/***********************************************************/
-/* TCP Net console */
-
-typedef struct {
-    int fd, listen_fd;
-    int connected;
-    int max_size;
-    int do_telnetopt;
-    int do_nodelay;
-    int is_unix;
-    int msgfd;
-} TCPCharDriver;
-
-static void tcp_chr_accept(void *opaque);
-
-static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
-{
-    TCPCharDriver *s = chr->opaque;
-    if (s->connected) {
-        return send_all(s->fd, buf, len);
-    } else {
-        /* XXX: indicate an error ? */
-        return len;
-    }
-}
-
-static int tcp_chr_read_poll(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    TCPCharDriver *s = chr->opaque;
-    if (!s->connected)
-        return 0;
-    s->max_size = qemu_chr_can_read(chr);
-    return s->max_size;
-}
-
-#define IAC 255
-#define IAC_BREAK 243
-static void tcp_chr_process_IAC_bytes(CharDriverState *chr,
-                                      TCPCharDriver *s,
-                                      uint8_t *buf, int *size)
-{
-    /* Handle any telnet client's basic IAC options to satisfy char by
-     * char mode with no echo.  All IAC options will be removed from
-     * the buf and the do_telnetopt variable will be used to track the
-     * state of the width of the IAC information.
-     *
-     * IAC commands come in sets of 3 bytes with the exception of the
-     * "IAC BREAK" command and the double IAC.
-     */
-
-    int i;
-    int j = 0;
-
-    for (i = 0; i < *size; i++) {
-        if (s->do_telnetopt > 1) {
-            if ((unsigned char)buf[i] == IAC && s->do_telnetopt == 2) {
-                /* Double IAC means send an IAC */
-                if (j != i)
-                    buf[j] = buf[i];
-                j++;
-                s->do_telnetopt = 1;
-            } else {
-                if ((unsigned char)buf[i] == IAC_BREAK && s->do_telnetopt == 2) {
-                    /* Handle IAC break commands by sending a serial break */
-                    qemu_chr_event(chr, CHR_EVENT_BREAK);
-                    s->do_telnetopt++;
-                }
-                s->do_telnetopt++;
-            }
-            if (s->do_telnetopt >= 4) {
-                s->do_telnetopt = 1;
-            }
-        } else {
-            if ((unsigned char)buf[i] == IAC) {
-                s->do_telnetopt = 2;
-            } else {
-                if (j != i)
-                    buf[j] = buf[i];
-                j++;
-            }
-        }
-    }
-    *size = j;
-}
-
-#if 0
-static int tcp_get_msgfd(CharDriverState *chr)
-{
-    TCPCharDriver *s = chr->opaque;
-    int fd = s->msgfd;
-    s->msgfd = -1;
-    return fd;
-}
-#endif
-
-#ifndef _WIN32
-static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg)
-{
-    TCPCharDriver *s = chr->opaque;
-    struct cmsghdr *cmsg;
-
-    for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
-        int fd;
-
-        if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
-            cmsg->cmsg_level != SOL_SOCKET ||
-            cmsg->cmsg_type != SCM_RIGHTS)
-            continue;
-
-        fd = *((int *)CMSG_DATA(cmsg));
-        if (fd < 0)
-            continue;
-
-        if (s->msgfd != -1)
-            close(s->msgfd);
-        s->msgfd = fd;
-    }
-}
-
-static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
-{
-    TCPCharDriver *s = chr->opaque;
-    struct msghdr msg = { NULL, };
-    struct iovec iov[1];
-    union {
-        struct cmsghdr cmsg;
-        char control[CMSG_SPACE(sizeof(int))];
-    } msg_control;
-    ssize_t ret;
-
-    iov[0].iov_base = buf;
-    iov[0].iov_len = len;
-
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 1;
-    msg.msg_control = &msg_control;
-    msg.msg_controllen = sizeof(msg_control);
-
-    ret = recvmsg(s->fd, &msg, 0);
-    if (ret > 0 && s->is_unix)
-        unix_process_msgfd(chr, &msg);
-
-    return ret;
-}
-#else
-static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
-{
-    TCPCharDriver *s = chr->opaque;
-    return recv(s->fd, buf, len, 0);
-}
-#endif
-
-static void tcp_chr_read(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    TCPCharDriver *s = chr->opaque;
-    uint8_t buf[READ_BUF_LEN];
-    int len, size;
-
-    if (!s->connected || s->max_size <= 0)
-        return;
-    len = sizeof(buf);
-    if (len > s->max_size)
-        len = s->max_size;
-    size = tcp_chr_recv(chr, (void *)buf, len);
-    if (size == 0) {
-        /* connection closed */
-        s->connected = 0;
-        if (s->listen_fd >= 0) {
-            qemu_set_fd_handler(s->listen_fd, tcp_chr_accept, NULL, chr);
-        }
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-        socket_close(s->fd);
-        s->fd = -1;
-        qemu_chr_event(chr, CHR_EVENT_CLOSED);
-    } else if (size > 0) {
-        if (s->do_telnetopt)
-            tcp_chr_process_IAC_bytes(chr, s, buf, &size);
-        if (size > 0)
-            qemu_chr_read(chr, buf, size);
-    }
-}
-
-#ifndef _WIN32
-CharDriverState *qemu_chr_open_eventfd(int eventfd)
-{
-    return qemu_chr_open_fd(eventfd, eventfd);
-}
-#endif
-
-static void tcp_chr_connect(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    TCPCharDriver *s = chr->opaque;
-
-    s->connected = 1;
-    qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
-                         tcp_chr_read, NULL, chr);
-    qemu_chr_generic_open(chr);
-}
-
-#define IACSET(x,a,b,c) x[0] = a; x[1] = b; x[2] = c;
-static void tcp_chr_telnet_init(int fd)
-{
-    char buf[3];
-    /* Send the telnet negotion to put telnet in binary, no echo, single char mode */
-    IACSET(buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
-    socket_send(fd, (char *)buf, 3);
-    IACSET(buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
-    socket_send(fd, (char *)buf, 3);
-    IACSET(buf, 0xff, 0xfb, 0x00);  /* IAC WILL Binary */
-    socket_send(fd, (char *)buf, 3);
-    IACSET(buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
-    socket_send(fd, (char *)buf, 3);
-}
-
-static void tcp_chr_accept(void *opaque)
-{
-    CharDriverState *chr = opaque;
-    TCPCharDriver *s = chr->opaque;
-    int fd;
-
-    for(;;) {
-        fd = socket_accept(s->listen_fd, NULL);
-        if (fd < 0) {
-            return;
-        } else if (fd >= 0) {
-            if (s->do_telnetopt)
-                tcp_chr_telnet_init(fd);
-            break;
-        }
-    }
-    socket_set_nonblock(fd);
-    if (s->do_nodelay)
-        socket_set_nodelay(fd);
-    s->fd = fd;
-    qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
-    tcp_chr_connect(chr);
-}
-
-static void tcp_chr_close(CharDriverState *chr)
-{
-    TCPCharDriver *s = chr->opaque;
-    if (s->fd >= 0) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-        closesocket(s->fd);
-    }
-    if (s->listen_fd >= 0) {
-        qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
-        closesocket(s->listen_fd);
-    }
-    qemu_free(s);
-}
-
-static CharDriverState *qemu_chr_open_tcp(const char *host_str,
-                                          int is_telnet,
-					  int is_unix)
-{
-    CharDriverState *chr = NULL;
-    TCPCharDriver *s = NULL;
-    int fd = -1, offset = 0;
-    int is_listen = 0;
-    int is_waitconnect = 1;
-    int do_nodelay = 0;
-    const char *ptr;
-
-    ptr = host_str;
-    while((ptr = strchr(ptr,','))) {
-        ptr++;
-        if (!strncmp(ptr,"server",6)) {
-            is_listen = 1;
-        } else if (!strncmp(ptr,"nowait",6)) {
-            is_waitconnect = 0;
-        } else if (!strncmp(ptr,"nodelay",6)) {
-            do_nodelay = 1;
-        } else if (!strncmp(ptr,"to=",3)) {
-            /* nothing, inet_listen() parses this one */;
-        } else if (!strncmp(ptr,"ipv4",4)) {
-            /* nothing, inet_connect() and inet_listen() parse this one */;
-        } else if (!strncmp(ptr,"ipv6",4)) {
-            /* nothing, inet_connect() and inet_listen() parse this one */;
-        } else {
-            printf("Unknown option: %s\n", ptr);
-            goto fail;
-        }
-    }
-    if (!is_listen)
-        is_waitconnect = 0;
-
-    chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(TCPCharDriver));
-
-    if (is_listen) {
-        chr->filename = qemu_malloc(256);
-        if (is_unix) {
-            pstrcpy(chr->filename, 256, "unix:");
-        } else if (is_telnet) {
-            pstrcpy(chr->filename, 256, "telnet:");
-        } else {
-            pstrcpy(chr->filename, 256, "tcp:");
-        }
-        offset = strlen(chr->filename);
-    }
-    if (is_unix) {
-        if (is_listen) {
-            fd = unix_listen(host_str, chr->filename + offset, 256 - offset);
-        } else {
-            fd = unix_connect(host_str);
-        }
-    } else {
-#ifdef CONFIG_ANDROID
-        if (!strncmp(host_str,"socket=",7)) {
-            char *end;
-            long val = strtol(host_str+7, &end, 10);
-            if (val <= 0 || end == host_str+7) {
-                printf("Invalid socket number: '%s'\n", host_str+7);
-                goto fail;
-            }
-            fd = (int) val;
-        } else
-#endif
-        if (is_listen) {
-            fd = inet_listen(host_str, chr->filename + offset, 256 - offset,
-                             SOCKET_STREAM, 0);
-        } else {
-            fd = inet_connect(host_str, SOCKET_STREAM);
-        }
-    }
-    if (fd < 0)
-        goto fail;
-
-    if (!is_waitconnect)
-        socket_set_nonblock(fd);
-
-    s->connected = 0;
-    s->fd = -1;
-    s->listen_fd = -1;
-    s->is_unix = is_unix;
-    s->do_nodelay = do_nodelay && !is_unix;
-
-    chr->opaque = s;
-    chr->chr_write = tcp_chr_write;
-    chr->chr_close = tcp_chr_close;
-
-    if (is_listen) {
-        s->listen_fd = fd;
-        qemu_set_fd_handler(s->listen_fd, tcp_chr_accept, NULL, chr);
-        if (is_telnet)
-            s->do_telnetopt = 1;
-    } else {
-        s->connected = 1;
-        s->fd = fd;
-        socket_set_nodelay(fd);
-        tcp_chr_connect(chr);
-    }
-
-    if (is_listen && is_waitconnect) {
-        printf("QEMU waiting for connection on: %s\n",
-               chr->filename ? chr->filename : host_str);
-        tcp_chr_accept(chr);
-        socket_set_nonblock(s->listen_fd);
-    }
-
-    return chr;
- fail:
-    if (fd >= 0)
-        closesocket(fd);
-    qemu_free(s);
-    qemu_free(chr);
-    return NULL;
-}
-
-CharDriverState *qemu_chr_open(const char *label, const char *filename, void (*init)(struct CharDriverState *s))
-{
-    const char *p;
-    CharDriverState *chr;
-
-    if (!strcmp(filename, "vc")) {
-        chr = text_console_init_compat(label, NULL);
-    } else
-    if (strstart(filename, "vc:", &p)) {
-        chr = text_console_init_compat(label, p);
-    } else
-    if (!strcmp(filename, "null")) {
-        chr = qemu_chr_open_null();
-    } else
-    if (strstart(filename, "tcp:", &p)) {
-        chr = qemu_chr_open_tcp(p, 0, 0);
-    } else
-    if (strstart(filename, "telnet:", &p)) {
-        chr = qemu_chr_open_tcp(p, 1, 0);
-    } else
-    if (strstart(filename, "udp:", &p)) {
-        chr = qemu_chr_open_udp(p);
-    } else
-    if (strstart(filename, "mon:", &p)) {
-        chr = qemu_chr_open(label, p, NULL);
-        if (chr) {
-            chr = qemu_chr_open_mux(chr);
-            monitor_init(chr, MONITOR_USE_READLINE);
-        } else {
-            printf("Unable to open driver: %s\n", p);
-        }
-    } else if (!strcmp(filename, "msmouse")) {
-        chr = qemu_chr_open_msmouse();
-    } else
-#ifndef _WIN32
-    if (strstart(filename, "unix:", &p)) {
-	chr = qemu_chr_open_tcp(p, 0, 1);
-    } else if (strstart(filename, "file:", &p)) {
-        chr = qemu_chr_open_file_out(p);
-    } else if (strstart(filename, "pipe:", &p)) {
-        chr = qemu_chr_open_pipe(p);
-    } else if (strstart(filename, "fdpair:", &p)) {
-        chr = qemu_chr_open_fdpair(p);
-    } else if (!strcmp(filename, "pty")) {
-        chr = qemu_chr_open_pty();
-    } else if (!strcmp(filename, "stdio")) {
-        chr = qemu_chr_open_stdio();
-    } else
-#if defined(__linux__)
-    if (strstart(filename, "/dev/parport", NULL)) {
-        chr = qemu_chr_open_pp(filename);
-    } else
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
-    if (strstart(filename, "/dev/ppi", NULL)) {
-        chr = qemu_chr_open_pp(filename);
-    } else
-#endif
-    if (strstart(filename, "/dev/", NULL)) {
-        chr = qemu_chr_open_tty(filename);
-    } else
-#else /* !_WIN32 */
-    if (strstart(filename, "COM", NULL)) {
-        chr = qemu_chr_open_win(filename);
-    } else
-    if (strstart(filename, "pipe:", &p)) {
-        chr = qemu_chr_open_win_pipe(p);
-    } else
-    if (strstart(filename, "con:", NULL)) {
-        chr = qemu_chr_open_win_con(filename);
-    } else
-    if (strstart(filename, "file:", &p)) {
-        chr = qemu_chr_open_win_file_out(p);
-    } else
-#endif
-    if (!strcmp(filename, "android-modem")) {
-        CharDriverState*  cs;
-        qemu_chr_open_charpipe( &cs, &android_modem_cs );
-        return cs;
-    } else if (!strcmp(filename, "android-gps")) {
-        CharDriverState*  cs;
-        qemu_chr_open_charpipe( &cs, &android_gps_cs );
-        return cs;
-    } else if (!strcmp(filename, "android-kmsg")) {
-        return android_kmsg_get_cs();
-    } else if (!strcmp(filename, "android-qemud")) {
-        return android_qemud_get_cs();
-    } else
-#ifdef CONFIG_BRLAPI
-    if (!strcmp(filename, "braille")) {
-        chr = chr_baum_init();
-    } else
-#endif
-    {
-        chr = NULL;
-    }
-
-    if (chr) {
-        if (!chr->filename)
-            chr->filename = qemu_strdup(filename);
-        chr->init = init;
-        chr->label = qemu_strdup(label);
-        QTAILQ_INSERT_TAIL(&chardevs, chr, next);
-    }
-    return chr;
-}
-
-void qemu_chr_close(CharDriverState *chr)
-{
-    QTAILQ_REMOVE(&chardevs, chr, next);
-    if (chr->chr_close)
-        chr->chr_close(chr);
-    qemu_free(chr->filename);
-    qemu_free(chr->label);
-    qemu_free(chr);
-}
-
-void qemu_chr_info(Monitor *mon)
-{
-    CharDriverState *chr;
-
-    QTAILQ_FOREACH(chr, &chardevs, next) {
-        monitor_printf(mon, "%s: filename=%s\n", chr->label, chr->filename);
-    }
-}
-
-CharDriverState *qemu_chr_find(const char *name)
-{
-    CharDriverState *chr;
-
-    QTAILQ_FOREACH(chr, &chardevs, next) {
-        if (strcmp(chr->label, name) != 0)
-            continue;
-        return chr;
-    }
-    return NULL;
-}
diff --git a/qemu-char.c b/qemu-char.c
index f1834a6..46c15ce 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu-common.h"
+#include "sockets.h"
 #include "net.h"
 #include "monitor.h"
 #include "console.h"
@@ -52,22 +53,19 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <net/if.h>
-#ifdef __NetBSD__
-#include <net/if_tap.h>
-#endif
-#ifdef __linux__
-#include <linux/if_tun.h>
-#endif
 #include <arpa/inet.h>
 #include <dirent.h>
 #include <netdb.h>
 #include <sys/select.h>
 #ifdef CONFIG_BSD
 #include <sys/stat.h>
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <libutil.h>
 #include <dev/ppbus/ppi.h>
 #include <dev/ppbus/ppbconf.h>
+#if defined(__GLIBC__)
+#include <pty.h>
+#endif
 #elif defined(__DragonFly__)
 #include <libutil.h>
 #include <dev/misc/ppi/ppi.h>
@@ -75,8 +73,6 @@
 #else
 #include <util.h>
 #endif
-#elif defined (__GLIBC__) && defined (__FreeBSD_kernel__)
-#include <freebsd/stdlib.h>
 #else
 #ifdef __linux__
 #include <pty.h>
@@ -106,11 +102,20 @@
 
 #define READ_BUF_LEN 4096
 
+#ifdef CONFIG_ANDROID
+#include "charpipe.h"
+#include "modem_driver.h"
+#include "android/gps.h"
+#include "android/hw-kmsg.h"
+#include "android/hw-qemud.h"
+#endif /* CONFIG_ANDROID */
+
 /***********************************************************/
 /* character device */
 
 static QTAILQ_HEAD(CharDriverStateHead, CharDriverState) chardevs =
     QTAILQ_HEAD_INITIALIZER(chardevs);
+static int initial_reset_issued;
 
 static void qemu_chr_event(CharDriverState *s, int event)
 {
@@ -221,6 +226,10 @@
                            IOEventHandler *fd_event,
                            void *opaque)
 {
+    if (!opaque && !fd_can_read && !fd_read && !fd_event) {
+        /* chr driver being released. */
+        ++s->avail_connections;
+    }
     s->chr_can_read = fd_can_read;
     s->chr_read = fd_read;
     s->chr_event = fd_event;
@@ -291,7 +300,7 @@
                 int64_t ti;
                 int secs;
 
-                ti = qemu_get_clock(rt_clock);
+                ti = qemu_get_clock_ms(rt_clock);
                 if (d->timestamps_start == -1)
                     d->timestamps_start = ti;
                 ti -= d->timestamps_start;
@@ -376,12 +385,7 @@
                  break;
             }
         case 's':
-            {
-                int i;
-                for (i = 0; i < nb_drives; i++) {
-                        bdrv_commit(drives_table[i].bdrv);
-                }
-            }
+            bdrv_commit_all();
             break;
         case 'b':
             qemu_chr_event(chr, CHR_EVENT_BREAK);
@@ -505,6 +509,9 @@
     chr->chr_write = mux_chr_write;
     chr->chr_update_read_handler = mux_chr_update_read_handler;
     chr->chr_accept_input = mux_chr_accept_input;
+    /* Frontend guest-open / -close notification is not support with muxes */
+    chr->chr_guest_open = NULL;
+    chr->chr_guest_close = NULL;
 
     /* Muxes are always open on creation */
     qemu_chr_generic_open(chr);
@@ -516,6 +523,9 @@
 #ifdef _WIN32
 int send_all(int fd, const void *buf, int len1)
 {
+#if 1
+	return socket_send(fd, buf, len1);
+#else
     int ret, len;
 
     len = len1;
@@ -523,7 +533,7 @@
         ret = send(fd, buf, len, 0);
         if (ret < 0) {
             errno = WSAGetLastError();
-            if (errno != WSAEWOULDBLOCK) {
+            if (errno != WSAEWOULDBLOCK && errno != WSAEAGAIN) {
                 return -1;
             }
         } else if (ret == 0) {
@@ -534,13 +544,15 @@
         }
     }
     return len1 - len;
+#endif
 }
 
 #else
 
-static int unix_write(int fd, const uint8_t *buf, int len1)
+int send_all(int fd, const void *_buf, int len1)
 {
     int ret, len;
+    const uint8_t *buf = _buf;
 
     len = len1;
     while (len > 0) {
@@ -557,13 +569,32 @@
     }
     return len1 - len;
 }
-
-int send_all(int fd, const void *buf, int len1)
-{
-    return unix_write(fd, buf, len1);
-}
 #endif /* !_WIN32 */
 
+static CharDriverState *qemu_chr_open_android_modem(QemuOpts* opts)
+{
+    CharDriverState*  cs;
+    qemu_chr_open_charpipe( &cs, &android_modem_cs );
+    return cs;
+}
+static CharDriverState *qemu_chr_open_android_gps(QemuOpts* opts)
+{
+    CharDriverState*  cs;
+    qemu_chr_open_charpipe( &cs, &android_gps_cs );
+    return cs;
+}
+
+static CharDriverState *qemu_chr_open_android_kmsg(QemuOpts* opts)
+{
+    return android_kmsg_get_cs();
+}
+
+static CharDriverState *qemu_chr_open_android_qemud(QemuOpts* opts)
+{
+    return android_qemud_get_cs();
+}
+
+
 #ifndef _WIN32
 
 typedef struct {
@@ -699,6 +730,18 @@
     return qemu_chr_open_fd(fd_in, fd_out);
 }
 
+#ifdef CONFIG_ANDROID
+static CharDriverState *qemu_chr_open_fdpair(QemuOpts* opts)
+{
+    int fd_in  = qemu_opt_get_number(opts, "fdin",-1);
+    int fd_out = qemu_opt_get_number(opts, "fdout",-1);
+
+    if (fd_in < 0 || fd_out < 0)
+        return NULL;
+
+    return qemu_chr_open_fd(fd_in, fd_out);
+}
+#endif /* CONFIG_ANDROID */
 
 /* for STDIO, we handle the case where several clients use it
    (nographic mode) */
@@ -749,7 +792,7 @@
 /* init terminal so that we can grab keys */
 static struct termios oldtty;
 static int old_fd0_flags;
-static int term_atexit_done;
+static bool stdio_allow_signal;
 
 static void term_exit(void)
 {
@@ -757,32 +800,26 @@
     fcntl(0, F_SETFL, old_fd0_flags);
 }
 
-static void term_init(QemuOpts *opts)
+static void qemu_chr_set_echo_stdio(CharDriverState *chr, bool echo)
 {
     struct termios tty;
 
-    tcgetattr (0, &tty);
-    oldtty = tty;
-    old_fd0_flags = fcntl(0, F_GETFL);
-
+    tty = oldtty;
+    if (!echo) {
     tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
                           |INLCR|IGNCR|ICRNL|IXON);
     tty.c_oflag |= OPOST;
     tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
-    /* if graphical mode, we allow Ctrl-C handling */
-    if (!qemu_opt_get_bool(opts, "signal", display_type != DT_NOGRAPHIC))
-        tty.c_lflag &= ~ISIG;
     tty.c_cflag &= ~(CSIZE|PARENB);
     tty.c_cflag |= CS8;
     tty.c_cc[VMIN] = 1;
     tty.c_cc[VTIME] = 0;
+    }
+    /* if graphical mode, we allow Ctrl-C handling */
+    if (!stdio_allow_signal)
+        tty.c_lflag &= ~ISIG;
 
     tcsetattr (0, TCSANOW, &tty);
-
-    if (!term_atexit_done++)
-        atexit(term_exit);
-
-    fcntl(0, F_SETFL, O_NONBLOCK);
 }
 
 static void qemu_chr_close_stdio(struct CharDriverState *chr)
@@ -799,11 +836,21 @@
 
     if (stdio_nb_clients >= STDIO_MAX_CLIENTS)
         return NULL;
+    if (stdio_nb_clients == 0) {
+        old_fd0_flags = fcntl(0, F_GETFL);
+        tcgetattr (0, &oldtty);
+        fcntl(0, F_SETFL, O_NONBLOCK);
+        atexit(term_exit);
+    }
+
     chr = qemu_chr_open_fd(0, 1);
     chr->chr_close = qemu_chr_close_stdio;
+    chr->chr_set_echo = qemu_chr_set_echo_stdio;
     qemu_set_fd_handler2(0, stdio_read_poll, stdio_read, NULL, chr);
     stdio_nb_clients++;
-    term_init(opts);
+    stdio_allow_signal = qemu_opt_get_bool(opts, "signal",
+                                           display_type != DT_NOGRAPHIC);
+    qemu_chr_set_echo(chr, false);
 
     return chr;
 }
@@ -865,9 +912,7 @@
 }
 #endif
 
-#if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
-    || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) \
-    || defined(__GLIBC__)
+#ifndef _WIN32
 
 typedef struct {
     int fd;
@@ -940,7 +985,7 @@
      * timeout to the normal (much longer) poll interval before the
      * timer triggers.
      */
-    qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 10);
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 10);
 }
 
 static void pty_chr_state(CharDriverState *chr, int connected)
@@ -954,7 +999,7 @@
         /* (re-)connect poll interval for idle guests: once per second.
          * We check more frequently in case the guests sends data to
          * the virtual device linked to our pty. */
-        qemu_mod_timer(s->timer, qemu_get_clock(rt_clock) + 1000);
+        qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 1000);
     } else {
         if (!s->connected)
             qemu_chr_generic_open(chr);
@@ -1004,6 +1049,7 @@
 #else
     char *pty_name = NULL;
 #define q_ptsname(x) ptsname(x)
+    //extern char* ptsname(int);
 #endif
 
     chr = qemu_mallocz(sizeof(CharDriverState));
@@ -1030,7 +1076,7 @@
     chr->chr_update_read_handler = pty_chr_update_read_handler;
     chr->chr_close = pty_chr_close;
 
-    s->timer = qemu_new_timer(rt_clock, pty_chr_timer, chr);
+    s->timer = qemu_new_timer_ms(rt_clock, pty_chr_timer, chr);
 
     return chr;
 }
@@ -1046,9 +1092,6 @@
            speed, parity, data_bits, stop_bits);
 #endif
     tcgetattr (fd, &tty);
-    if (!term_atexit_done) {
-        oldtty = tty;
-    }
 
 #define check_speed(val) if (speed <= val) { spd = B##val; break; }
     speed = speed * 10 / 11;
@@ -1220,11 +1263,6 @@
     return 0;
 }
 
-static void tty_exit(void)
-{
-    tcsetattr(0, TCSANOW, &oldtty);
-}
-
 static void qemu_chr_close_tty(CharDriverState *chr)
 {
     FDCharDriver *s = chr->opaque;
@@ -1259,16 +1297,14 @@
     }
     chr->chr_ioctl = tty_serial_ioctl;
     chr->chr_close = qemu_chr_close_tty;
-    if (!term_atexit_done++)
-        atexit(tty_exit);
     return chr;
 }
-#else  /* ! __linux__ && ! __sun__ */
+#else  /* _WIN32 */
 static CharDriverState *qemu_chr_open_pty(QemuOpts *opts)
 {
     return NULL;
 }
-#endif /* __linux__ || __sun__ */
+#endif /* _WIN32 */
 
 #if defined(__linux__)
 typedef struct {
@@ -1415,7 +1451,7 @@
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
 static int pp_ioctl(CharDriverState *chr, int cmd, void *arg)
 {
-    int fd = (int)(long)chr->opaque;
+    int fd = (int)(intptr_t)chr->opaque;
     uint8_t b;
 
     switch(cmd) {
@@ -1461,7 +1497,7 @@
         return NULL;
 
     chr = qemu_mallocz(sizeof(CharDriverState));
-    chr->opaque = (void *)(long)fd;
+    chr->opaque = (void *)(intptr_t)fd;
     chr->chr_write = null_chr_write;
     chr->chr_ioctl = pp_ioctl;
     return chr;
@@ -1829,6 +1865,7 @@
 
 typedef struct {
     int fd;
+    SockAddress  daddr;
     uint8_t buf[READ_BUF_LEN];
     int bufcnt;
     int bufptr;
@@ -1839,7 +1876,7 @@
 {
     NetCharDriver *s = chr->opaque;
 
-    return send(s->fd, (const void *)buf, len, 0);
+    return socket_sendto(s->fd, (const void *)buf, len, &s->daddr);
 }
 
 static int udp_chr_read_poll(void *opaque)
@@ -1867,7 +1904,7 @@
 
     if (s->max_size == 0)
         return;
-    s->bufcnt = recv(s->fd, (void *)s->buf, sizeof(s->buf), 0);
+    s->bufcnt = socket_recv(s->fd, (void *)s->buf, sizeof(s->buf));
     s->bufptr = s->bufcnt;
     if (s->bufcnt <= 0)
         return;
@@ -1895,10 +1932,9 @@
     NetCharDriver *s = chr->opaque;
     if (s->fd >= 0) {
         qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-        closesocket(s->fd);
+        socket_close(s->fd);
     }
     qemu_free(s);
-    qemu_chr_event(chr, CHR_EVENT_CLOSED);
 }
 
 static CharDriverState *qemu_chr_open_udp(QemuOpts *opts)
@@ -1908,7 +1944,7 @@
     int fd = -1;
 
     chr = qemu_mallocz(sizeof(CharDriverState));
-    s = qemu_mallocz(sizeof(NetCharDriver));
+    s   = qemu_mallocz(sizeof(NetCharDriver));
 
     fd = inet_dgram_opts(opts);
     if (fd < 0) {
@@ -2106,7 +2142,7 @@
             qemu_set_fd_handler(s->listen_fd, tcp_chr_accept, NULL, chr);
         }
         qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
-        closesocket(s->fd);
+        socket_close(s->fd);
         s->fd = -1;
         qemu_chr_event(chr, CHR_EVENT_CLOSED);
     } else if (size > 0) {
@@ -2141,46 +2177,24 @@
     char buf[3];
     /* Send the telnet negotion to put telnet in binary, no echo, single char mode */
     IACSET(buf, 0xff, 0xfb, 0x01);  /* IAC WILL ECHO */
-    send(fd, (char *)buf, 3, 0);
+    socket_send(fd, (char *)buf, 3);
     IACSET(buf, 0xff, 0xfb, 0x03);  /* IAC WILL Suppress go ahead */
-    send(fd, (char *)buf, 3, 0);
+    socket_send(fd, (char *)buf, 3);
     IACSET(buf, 0xff, 0xfb, 0x00);  /* IAC WILL Binary */
-    send(fd, (char *)buf, 3, 0);
+    socket_send(fd, (char *)buf, 3);
     IACSET(buf, 0xff, 0xfd, 0x00);  /* IAC DO Binary */
-    send(fd, (char *)buf, 3, 0);
-}
-
-static void socket_set_nodelay(int fd)
-{
-    int val = 1;
-    setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
+    socket_send(fd, (char *)buf, 3);
 }
 
 static void tcp_chr_accept(void *opaque)
 {
     CharDriverState *chr = opaque;
     TCPCharDriver *s = chr->opaque;
-    struct sockaddr_in saddr;
-#ifndef _WIN32
-    struct sockaddr_un uaddr;
-#endif
-    struct sockaddr *addr;
-    socklen_t len;
     int fd;
 
     for(;;) {
-#ifndef _WIN32
-	if (s->is_unix) {
-	    len = sizeof(uaddr);
-	    addr = (struct sockaddr *)&uaddr;
-	} else
-#endif
-	{
-	    len = sizeof(saddr);
-	    addr = (struct sockaddr *)&saddr;
-	}
-        fd = qemu_accept(s->listen_fd, addr, &len);
-        if (fd < 0 && errno != EINTR) {
+        fd = socket_accept(s->listen_fd, NULL);
+        if (fd < 0) {
             return;
         } else if (fd >= 0) {
             if (s->do_telnetopt)
@@ -2309,6 +2323,70 @@
     return NULL;
 }
 
+/***********************************************************/
+/* Memory chardev */
+typedef struct {
+    size_t outbuf_size;
+    size_t outbuf_capacity;
+    uint8_t *outbuf;
+} MemoryDriver;
+
+static int mem_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
+{
+    MemoryDriver *d = chr->opaque;
+
+    /* TODO: the QString implementation has the same code, we should
+     * introduce a generic way to do this in cutils.c */
+    if (d->outbuf_capacity < d->outbuf_size + len) {
+        /* grow outbuf */
+        d->outbuf_capacity += len;
+        d->outbuf_capacity *= 2;
+        d->outbuf = qemu_realloc(d->outbuf, d->outbuf_capacity);
+    }
+
+    memcpy(d->outbuf + d->outbuf_size, buf, len);
+    d->outbuf_size += len;
+
+    return len;
+}
+
+void qemu_chr_init_mem(CharDriverState *chr)
+{
+    MemoryDriver *d;
+
+    d = qemu_malloc(sizeof(*d));
+    d->outbuf_size = 0;
+    d->outbuf_capacity = 4096;
+    d->outbuf = qemu_mallocz(d->outbuf_capacity);
+
+    memset(chr, 0, sizeof(*chr));
+    chr->opaque = d;
+    chr->chr_write = mem_chr_write;
+}
+
+QString *qemu_chr_mem_to_qs(CharDriverState *chr)
+{
+    MemoryDriver *d = chr->opaque;
+    return qstring_from_substr((char *) d->outbuf, 0, d->outbuf_size - 1);
+}
+
+/* NOTE: this driver can not be closed with qemu_chr_close()! */
+void qemu_chr_close_mem(CharDriverState *chr)
+{
+    MemoryDriver *d = chr->opaque;
+
+    qemu_free(d->outbuf);
+    qemu_free(chr->opaque);
+    chr->opaque = NULL;
+    chr->chr_write = NULL;
+}
+
+size_t qemu_chr_mem_osize(const CharDriverState *chr)
+{
+    const MemoryDriver *d = chr->opaque;
+    return d->outbuf_size;
+}
+
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
 {
     char host[65], port[33], width[8], height[8];
@@ -2427,6 +2505,40 @@
         qemu_opt_set(opts, "path", filename);
         return opts;
     }
+#ifdef CONFIG_ANDROID
+    if (strstart(filename, "fdpair:", &p)) {
+        int fdin, fdout;
+        char temp[8];
+        qemu_opt_set(opts, "backend", "fdpair");
+        if (sscanf(p, "%d,%d", &fdin, &fdout) != 2) {
+            goto fail;
+        }
+        if (fdin < 0 || fdout < 0) {
+            goto fail;
+        }
+        snprintf(temp, sizeof temp, "%d", fdin);
+        qemu_opt_set(opts, "fdin", temp);
+        snprintf(temp, sizeof temp, "%d", fdout);
+        qemu_opt_set(opts, "fdout", temp);
+        return opts;
+    }
+    if (!strcmp(filename, "android-kmsg")) {
+        qemu_opt_set(opts, "backend", "android-kmsg");
+        return opts;
+    }
+    if (!strcmp(filename, "android-qemud")) {
+        qemu_opt_set(opts, "backend", "android-qemud");
+        return opts;
+    }
+    if (!strcmp(filename, "android-modem")) {
+        qemu_opt_set(opts, "backend", "android-modem");
+        return opts;
+    }
+    if (!strcmp(filename, "android-gps")) {
+        qemu_opt_set(opts, "backend", "android-gps");
+        return opts;
+    }
+#endif /* CONFIG_ANDROID */
 
 fail:
     qemu_opts_del(opts);
@@ -2441,7 +2553,7 @@
     { .name = "socket",    .open = qemu_chr_open_socket },
     { .name = "udp",       .open = qemu_chr_open_udp },
     { .name = "msmouse",   .open = qemu_chr_open_msmouse },
-    { .name = "vc",        .open = text_console_init_compat },
+    { .name = "vc",        .open = text_console_init },
 #ifdef _WIN32
     { .name = "file",      .open = qemu_chr_open_win_file_out },
     { .name = "pipe",      .open = qemu_chr_open_win_pipe },
@@ -2454,8 +2566,14 @@
     { .name = "stdio",     .open = qemu_chr_open_stdio },
 #endif
 #ifdef CONFIG_ANDROID
+#ifndef _WIN32
     { .name = "fdpair",    .open = qemu_chr_open_fdpair },
 #endif
+    { .name = "android-qemud", .open = qemu_chr_open_android_qemud },
+    { .name = "android-kmsg",  .open = qemu_chr_open_android_kmsg },
+    { .name = "android-modem", .open = qemu_chr_open_android_modem },
+    { .name = "android-gps",   .open = qemu_chr_open_android_gps },
+#endif
 #ifdef CONFIG_BRLAPI
     { .name = "braille",   .open = chr_baum_init },
 #endif
@@ -2481,6 +2599,11 @@
         return NULL;
     }
 
+    if (qemu_opt_get(opts, "backend") == NULL) {
+        fprintf(stderr, "chardev: \"%s\" missing backend\n",
+                qemu_opts_id(opts));
+        return NULL;
+    }
     for (i = 0; i < ARRAY_SIZE(backend_table); i++) {
         if (strcmp(backend_table[i].name, qemu_opt_get(opts, "backend")) == 0)
             break;
@@ -2510,7 +2633,10 @@
         snprintf(base->label, len, "%s-base", qemu_opts_id(opts));
         chr = qemu_chr_open_mux(base);
         chr->filename = base->filename;
+        chr->avail_connections = MAX_MUX;
         QTAILQ_INSERT_TAIL(&chardevs, chr, next);
+    } else {
+        chr->avail_connections = 1;
     }
     chr->label = qemu_strdup(qemu_opts_id(opts));
     return chr;
@@ -2534,9 +2660,31 @@
     if (chr && qemu_opt_get_bool(opts, "mux", 0)) {
         monitor_init(chr, MONITOR_USE_READLINE);
     }
+    qemu_opts_del(opts);
     return chr;
 }
 
+void qemu_chr_set_echo(struct CharDriverState *chr, bool echo)
+{
+    if (chr->chr_set_echo) {
+        chr->chr_set_echo(chr, echo);
+    }
+}
+
+void qemu_chr_guest_open(struct CharDriverState *chr)
+{
+    if (chr->chr_guest_open) {
+        chr->chr_guest_open(chr);
+    }
+}
+
+void qemu_chr_guest_close(struct CharDriverState *chr)
+{
+    if (chr->chr_guest_close) {
+        chr->chr_guest_close(chr);
+    }
+}
+
 void qemu_chr_close(CharDriverState *chr)
 {
     QTAILQ_REMOVE(&chardevs, chr, next);
diff --git a/qemu-char.h b/qemu-char.h
index 7671824..30ea925 100644
--- a/qemu-char.h
+++ b/qemu-char.h
@@ -6,6 +6,7 @@
 #include "qemu-option.h"
 #include "qemu-config.h"
 #include "qobject.h"
+#include "qstring.h"
 
 /* character device */
 
@@ -63,11 +64,15 @@
     void (*chr_send_event)(struct CharDriverState *chr, int event);
     void (*chr_close)(struct CharDriverState *chr);
     void (*chr_accept_input)(struct CharDriverState *chr);
+    void (*chr_set_echo)(struct CharDriverState *chr, bool echo);
+    void (*chr_guest_open)(struct CharDriverState *chr);
+    void (*chr_guest_close)(struct CharDriverState *chr);
     void *opaque;
     QEMUBH *bh;
     char *label;
     char *filename;
     int opened;
+    int avail_connections;
     QTAILQ_ENTRY(CharDriverState) next;
 };
 
@@ -75,8 +80,12 @@
 CharDriverState *qemu_chr_open_opts(QemuOpts *opts,
                                     void (*init)(struct CharDriverState *s));
 CharDriverState *qemu_chr_open(const char *label, const char *filename, void (*init)(struct CharDriverState *s));
+void qemu_chr_set_echo(struct CharDriverState *chr, bool echo);
+void qemu_chr_guest_open(struct CharDriverState *chr);
+void qemu_chr_guest_close(struct CharDriverState *chr);
 void qemu_chr_close(CharDriverState *chr);
-void qemu_chr_printf(CharDriverState *s, const char *fmt, ...);
+void qemu_chr_printf(CharDriverState *s, const char *fmt, ...)
+    GCC_FMT_ATTR(2, 3);
 int qemu_chr_write(CharDriverState *s, const uint8_t *buf, int len);
 void qemu_chr_send_event(CharDriverState *s, int event);
 void qemu_chr_add_handlers(CharDriverState *s,
@@ -92,7 +101,7 @@
 int qemu_chr_get_msgfd(CharDriverState *s);
 void qemu_chr_accept_input(CharDriverState *s);
 void qemu_chr_info_print(Monitor *mon, const QObject *ret_data);
-void qemu_chr_info(Monitor *mon);
+void qemu_chr_info(Monitor *mon, QObject **ret_data);
 CharDriverState *qemu_chr_find(const char *name);
 
 /* add an eventfd to the qemu devices that are polled */
@@ -100,6 +109,12 @@
 
 extern int term_escape_char;
 
+/* memory chardev */
+void qemu_chr_init_mem(CharDriverState *chr);
+void qemu_chr_close_mem(CharDriverState *chr);
+QString *qemu_chr_mem_to_qs(CharDriverState *chr);
+size_t qemu_chr_mem_osize(const CharDriverState *chr);
+
 /* async I/O support */
 
 int qemu_set_fd_handler2(int fd,
diff --git a/qemu-common.h b/qemu-common.h
index f8db5d4..897d510 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -18,10 +18,8 @@
 typedef struct QEMUBH QEMUBH;
 typedef struct DeviceState DeviceState;
 
-/* Hack around the mess dyngen-exec.h causes: We need QEMU_NORETURN in files that
-   cannot include the following headers without conflicts. This condition has
-   to be removed once dyngen is gone. */
-#ifndef __DYNGEN_EXEC_H__
+struct Monitor;
+typedef struct Monitor Monitor;
 
 /* we put basic includes here to avoid repeating them in device drivers */
 #include <stdlib.h>
@@ -38,8 +36,17 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <sys/time.h>
 #include <assert.h>
 
+#ifdef _WIN32
+#include "qemu-os-win32.h"
+#endif
+
+#ifdef CONFIG_POSIX
+#include "qemu-os-posix.h"
+#endif
+
 #ifndef O_LARGEFILE
 #define O_LARGEFILE 0
 #endif
@@ -55,6 +62,9 @@
 #if !defined(ENOTSUP)
 #define ENOTSUP 4096
 #endif
+#ifndef TIME_MAX
+#define TIME_MAX LONG_MAX
+#endif
 
 #ifndef CONFIG_IOVEC
 #define CONFIG_IOVEC
@@ -70,10 +80,29 @@
 #include <sys/uio.h>
 #endif
 
+#if defined __GNUC__
+# if (__GNUC__ < 4) || \
+     defined(__GNUC_MINOR__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 4)
+   /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
+#  define GCC_ATTR __attribute__((__unused__, format(printf, 1, 2)))
+#  define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
+# else
+   /* Use gnu_printf when supported (qemu uses standard format strings). */
+#  define GCC_ATTR __attribute__((__unused__, format(gnu_printf, 1, 2)))
+#  define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+# endif
+#else
+#define GCC_ATTR /**/
+#define GCC_FMT_ATTR(n, m)
+#endif
+
+typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
+    GCC_FMT_ATTR(2, 3);
+
 #ifdef _WIN32
 #define fsync _commit
 #define lseek _lseeki64
-extern int qemu_ftruncate64(int, int64_t);
+int qemu_ftruncate64(int, int64_t);
 #define ftruncate qemu_ftruncate64
 
 static inline char *realpath(const char *path, char *resolved_path)
@@ -122,8 +151,6 @@
 int qemu_bh_poll(void);
 void qemu_bh_update_timeout(int *timeout);
 
-uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c);
-
 void qemu_get_timedate(struct tm *tm, int offset);
 int qemu_timedate_diff(struct tm *tm);
 
@@ -138,6 +165,21 @@
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 
+/*
+ * strtosz() suffixes used to specify the default treatment of an
+ * argument passed to strtosz() without an explicit suffix.
+ * These should be defined using upper case characters in the range
+ * A-Z, as strtosz() will use qemu_toupper() on the given argument
+ * prior to comparison.
+ */
+#define STRTOSZ_DEFSUFFIX_TB	'T'
+#define STRTOSZ_DEFSUFFIX_GB	'G'
+#define STRTOSZ_DEFSUFFIX_MB	'M'
+#define STRTOSZ_DEFSUFFIX_KB	'K'
+#define STRTOSZ_DEFSUFFIX_B	'B'
+int64_t strtosz(const char *nptr, char **end);
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix);
+
 /* path.c */
 void init_paths(const char *prefix);
 const char *path(const char *pathname);
@@ -158,6 +200,12 @@
 #define qemu_isascii(c)		isascii((unsigned char)(c))
 #define qemu_toascii(c)		toascii((unsigned char)(c))
 
+#ifdef _WIN32
+/* ffs() in oslib-win32.c for WIN32, strings.h for the rest of the world */
+int ffs(int i);
+#endif
+
+void *qemu_oom_check(void *ptr);
 void *qemu_malloc(size_t size);
 void *qemu_realloc(void *ptr, size_t size);
 void *qemu_mallocz(size_t size);
@@ -174,6 +222,7 @@
 void qemu_set_cloexec(int fd);
 
 #ifndef _WIN32
+int qemu_add_child_watch(pid_t pid);
 int qemu_eventfd(int pipefd[2]);
 int qemu_pipe(int pipefd[2]);
 #endif
@@ -183,14 +232,16 @@
 
 /* Error handling.  */
 
-void QEMU_NORETURN hw_error(const char *fmt, ...)
-    __attribute__ ((__format__ (__printf__, 1, 2)));
+void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 
 /* IO callbacks.  */
 typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
 typedef int IOCanReadHandler(void *opaque);
 typedef void IOHandler(void *opaque);
 
+void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds);
+void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc);
+
 struct ParallelIOArg {
     void *buffer;
     int count;
@@ -281,14 +332,22 @@
 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
+void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
+    size_t size);
 void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
 void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
+void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count);
+void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
+                            size_t skip);
 
-struct Monitor;
-typedef struct Monitor Monitor;
+/* OS specific functions */
+void os_setup_early_signal_handling(void);
+char *os_find_datadir(const char *argv0);
+void os_parse_cmd_args(int index, const char *optarg);
+void os_pidfile_error(void);
 
 /* Convert a byte between binary and BCD.  */
 static inline uint8_t to_bcd(uint8_t val)
@@ -301,9 +360,31 @@
     return ((val >> 4) * 10) + (val & 0x0f);
 }
 
-#include "module.h"
+/* compute with 96 bit intermediate result: (a*b)/c */
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef HOST_WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif
+        } l;
+    } u, res;
+    uint64_t rl, rh;
 
-#endif /* dyngen-exec.h hack */
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
+
+#include "module.h"
 
 typedef enum DisplayType
 {
diff --git a/qemu-config.c b/qemu-config.c
index 1efdbec..14d3419 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -2,7 +2,6 @@
 #include "qemu-error.h"
 #include "qemu-option.h"
 #include "qemu-config.h"
-#include "sysemu.h"
 #include "hw/qdev.h"
 
 static QemuOptsList qemu_drive_opts = {
@@ -146,6 +145,12 @@
         },{
             .name = "signal",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "name",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "debug",
+            .type = QEMU_OPT_NUMBER,
         },
         { /* end of list */ }
     },
@@ -283,11 +288,29 @@
         },{
             .name = "default",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "pretty",
+            .type = QEMU_OPT_BOOL,
         },
         { /* end of list */ }
     },
 };
 
+#ifdef CONFIG_SIMPLE_TRACE
+static QemuOptsList qemu_trace_opts = {
+    .name = "trace",
+    .implied_opt_name = "trace",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_trace_opts.head),
+    .desc = {
+        {
+            .name = "file",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end if list */ }
+    },
+};
+#endif
+
 static QemuOptsList qemu_cpudef_opts = {
     .name = "cpudef",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_cpudef_opts.head),
@@ -336,6 +359,97 @@
     },
 };
 
+QemuOptsList qemu_spice_opts = {
+    .name = "spice",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_spice_opts.head),
+    .desc = {
+        {
+            .name = "port",
+            .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "tls-port",
+            .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "addr",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "ipv4",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "ipv6",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "password",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "disable-ticketing",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "x509-dir",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "x509-key-file",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "x509-key-password",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "x509-cert-file",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "x509-cacert-file",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "x509-dh-key-file",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "tls-ciphers",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "tls-channel",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "plaintext-channel",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "image-compression",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "jpeg-wan-compression",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "zlib-glz-wan-compression",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "streaming-video",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "agent-mouse",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "playback-compression",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /* end if list */ }
+    },
+};
+
+QemuOptsList qemu_option_rom_opts = {
+    .name = "option-rom",
+    .implied_opt_name = "romfile",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_option_rom_opts.head),
+    .desc = {
+        {
+            .name = "bootindex",
+            .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "romfile",
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end if list */ }
+    },
+};
+
 static QemuOptsList *vm_config_groups[32] = {
     &qemu_drive_opts,
     &qemu_chardev_opts,
@@ -346,6 +460,10 @@
     &qemu_global_opts,
     &qemu_mon_opts,
     &qemu_cpudef_opts,
+#ifdef CONFIG_SIMPLE_TRACE
+    &qemu_trace_opts,
+#endif
+    &qemu_option_rom_opts,
     NULL,
 };
 
diff --git a/qemu-config.h b/qemu-config.h
index 533a049..20d707f 100644
--- a/qemu-config.h
+++ b/qemu-config.h
@@ -3,6 +3,7 @@
 
 extern QemuOptsList qemu_fsdev_opts;
 extern QemuOptsList qemu_virtfs_opts;
+extern QemuOptsList qemu_spice_opts;
 
 QemuOptsList *qemu_find_opts(const char *group);
 void qemu_add_opts(QemuOptsList *list);
diff --git a/qemu-error.c b/qemu-error.c
index 5a35e7c..41c191d 100644
--- a/qemu-error.c
+++ b/qemu-error.c
@@ -12,7 +12,6 @@
 
 #include <stdio.h>
 #include "monitor.h"
-#include "sysemu.h"
 
 /*
  * Print to current monitor if we have one, else to stderr.
diff --git a/qemu-error.h b/qemu-error.h
index a45609f..4d5c537 100644
--- a/qemu-error.h
+++ b/qemu-error.h
@@ -30,12 +30,11 @@
 void loc_set_cmdline(char **argv, int idx, int cnt);
 void loc_set_file(const char *fname, int lno);
 
-void error_vprintf(const char *fmt, va_list ap);
-void error_printf(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
-void error_printf_unless_qmp(const char *fmt, ...)
-    __attribute__ ((format(printf, 1, 2)));
+void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
+void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 void error_print_loc(void);
 void error_set_progname(const char *argv0);
-void error_report(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
+void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 
 #endif
diff --git a/qemu-io.c b/qemu-io.c
index 2dbe20f..376b0ab 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -61,7 +61,7 @@
 
 	if (misalign)
 		len += MISALIGN_OFFSET;
-	buf = qemu_memalign(512, len);
+	buf = qemu_blockalign(bs, len);
 	memset(buf, pattern, len);
 	if (misalign)
 		buf += MISALIGN_OFFSET;
@@ -326,7 +326,7 @@
 " -l, -- length for pattern verification (only with -P)\n"
 " -p, -- use bdrv_pread to read the file\n"
 " -P, -- use a pattern to verify read data\n"
-" -q, -- quite mode, do not show I/O statistics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
 " -s, -- start offset for pattern verification (only with -P)\n"
 " -v, -- dump buffer to standard output\n"
 "\n");
@@ -509,7 +509,7 @@
 " -C, -- report statistics in a machine parsable format\n"
 " -P, -- use a pattern to verify read data\n"
 " -v, -- dump buffer to standard output\n"
-" -q, -- quite mode, do not show I/O statistics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
 "\n");
 }
 
@@ -633,7 +633,7 @@
 " -p, -- use bdrv_pwrite to write the file\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
-" -q, -- quite mode, do not show I/O statistics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
 "\n");
 }
 
@@ -765,7 +765,7 @@
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
-" -q, -- quite mode, do not show I/O statistics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
 "\n");
 }
 
@@ -1100,7 +1100,7 @@
 " -C, -- report statistics in a machine parsable format\n"
 " -P, -- use a pattern to verify read data\n"
 " -v, -- dump buffer to standard output\n"
-" -q, -- quite mode, do not show I/O statistics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
 "\n");
 }
 
@@ -1131,8 +1131,10 @@
 		case 'P':
 			ctx->Pflag = 1;
 			ctx->pattern = parse_pattern(optarg);
-			if (ctx->pattern < 0)
+			if (ctx->pattern < 0) {
+                                free(ctx);
 				return 0;
+                        }
 			break;
 		case 'q':
 			ctx->qflag = 1;
@@ -1198,7 +1200,7 @@
 " used to ensure all outstanding aio requests have been completed\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
-" -q, -- quite mode, do not show I/O statistics\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
 "\n");
 }
 
@@ -1394,6 +1396,93 @@
 	.oneline	= "prints information about the current file",
 };
 
+static void
+discard_help(void)
+{
+	printf(
+"\n"
+" discards a range of bytes from the given offset\n"
+"\n"
+" Example:\n"
+" 'discard 512 1k' - discards 1 kilobyte from 512 bytes into the file\n"
+"\n"
+" Discards a segment of the currently open file.\n"
+" -C, -- report statistics in a machine parsable format\n"
+" -q, -- quiet mode, do not show I/O statistics\n"
+"\n");
+}
+
+static int discard_f(int argc, char **argv);
+
+static const cmdinfo_t discard_cmd = {
+	.name		= "discard",
+	.altname	= "d",
+	.cfunc		= discard_f,
+	.argmin		= 2,
+	.argmax		= -1,
+	.args		= "[-Cq] off len",
+	.oneline	= "discards a number of bytes at a specified offset",
+	.help		= discard_help,
+};
+
+static int
+discard_f(int argc, char **argv)
+{
+	struct timeval t1, t2;
+	int Cflag = 0, qflag = 0;
+	int c, ret;
+	int64_t offset;
+	int count;
+
+	while ((c = getopt(argc, argv, "Cq")) != EOF) {
+		switch (c) {
+		case 'C':
+			Cflag = 1;
+			break;
+		case 'q':
+			qflag = 1;
+			break;
+		default:
+			return command_usage(&discard_cmd);
+		}
+	}
+
+	if (optind != argc - 2) {
+		return command_usage(&discard_cmd);
+	}
+
+	offset = cvtnum(argv[optind]);
+	if (offset < 0) {
+		printf("non-numeric length argument -- %s\n", argv[optind]);
+		return 0;
+	}
+
+	optind++;
+	count = cvtnum(argv[optind]);
+	if (count < 0) {
+		printf("non-numeric length argument -- %s\n", argv[optind]);
+		return 0;
+	}
+
+	gettimeofday(&t1, NULL);
+	ret = bdrv_discard(bs, offset >> BDRV_SECTOR_BITS, count >> BDRV_SECTOR_BITS);
+	gettimeofday(&t2, NULL);
+
+	if (ret < 0) {
+		printf("discard failed: %s\n", strerror(-ret));
+		goto out;
+	}
+
+	/* Finally, report back -- -C gives a parsable format */
+	if (!qflag) {
+		t2 = tsub(t2, t1);
+		print_report("discard", &t2, offset, count, count, 1, Cflag);
+	}
+
+out:
+	return 0;
+}
+
 static int
 alloc_f(int argc, char **argv)
 {
@@ -1446,6 +1535,44 @@
 };
 
 static int
+map_f(int argc, char **argv)
+{
+	int64_t offset;
+	int64_t nb_sectors;
+	char s1[64];
+	int num, num_checked;
+	int ret;
+	const char *retstr;
+
+	offset = 0;
+	nb_sectors = bs->total_sectors;
+
+	do {
+		num_checked = MIN(nb_sectors, INT_MAX);
+		ret = bdrv_is_allocated(bs, offset, num_checked, &num);
+		retstr = ret ? "    allocated" : "not allocated";
+		cvtstr(offset << 9ULL, s1, sizeof(s1));
+		printf("[% 24" PRId64 "] % 8d/% 8d sectors %s at offset %s (%d)\n",
+				offset << 9ULL, num, num_checked, retstr, s1, ret);
+
+		offset += num;
+		nb_sectors -= num;
+	} while(offset < bs->total_sectors);
+
+	return 0;
+}
+
+static const cmdinfo_t map_cmd = {
+       .name           = "map",
+       .argmin         = 0,
+       .argmax         = 0,
+       .cfunc          = map_f,
+       .args           = "",
+       .oneline        = "prints the allocated areas of a file",
+};
+
+
+static int
 close_f(int argc, char **argv)
 {
 	bdrv_close(bs);
@@ -1682,7 +1809,9 @@
 	add_command(&truncate_cmd);
 	add_command(&length_cmd);
 	add_command(&info_cmd);
+	add_command(&discard_cmd);
 	add_command(&alloc_cmd);
+	add_command(&map_cmd);
 
 	add_args_command(init_args_command);
 	add_check_command(init_check_command);
diff --git a/qemu-lock.h b/qemu-lock.h
index 9a3e6ac..a72edda 100644
--- a/qemu-lock.h
+++ b/qemu-lock.h
@@ -15,15 +15,11 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
 
-/* Locking primitives.  Most of this code should be redundant -
-   system emulation doesn't need/use locking, NPTL userspace uses
-   pthread mutexes, and non-NPTL userspace isn't threadsafe anyway.
-   In either case a spinlock is probably the wrong kind of lock.
-   Spinlocks are only good if you know annother CPU has the lock and is
-   likely to release it soon.  In environments where you have more threads
-   than physical CPUs (the extreme case being a single CPU host) a spinlock
-   simply wastes CPU until the OS decides to preempt it.  */
-#if defined(CONFIG_USE_NPTL)
+/* configure guarantees us that we have pthreads on any host except
+ * mingw32, which doesn't support any of the user-only targets.
+ * So we can simply assume we have pthread mutexes here.
+ */
+#if defined(CONFIG_USER_ONLY)
 
 #include <pthread.h>
 #define spin_lock pthread_mutex_lock
@@ -33,203 +29,15 @@
 
 #else
 
-#if defined(__hppa__)
-
-typedef int spinlock_t[4];
-
-#define SPIN_LOCK_UNLOCKED { 1, 1, 1, 1 }
-
-static inline void resetlock (spinlock_t *p)
-{
-    (*p)[0] = (*p)[1] = (*p)[2] = (*p)[3] = 1;
-}
-
-#else
-
+/* Empty implementations, on the theory that system mode emulation
+ * is single-threaded. This means that these functions should only
+ * be used from code run in the TCG cpu thread, and cannot protect
+ * data structures which might also be accessed from the IO thread
+ * or from signal handlers.
+ */
 typedef int spinlock_t;
-
 #define SPIN_LOCK_UNLOCKED 0
 
-static inline void resetlock (spinlock_t *p)
-{
-    *p = SPIN_LOCK_UNLOCKED;
-}
-
-#endif
-
-#if defined(_ARCH_PPC)
-static inline int testandset (int *p)
-{
-    int ret;
-    __asm__ __volatile__ (
-                          "      lwarx %0,0,%1\n"
-                          "      xor. %0,%3,%0\n"
-                          "      bne $+12\n"
-                          "      stwcx. %2,0,%1\n"
-                          "      bne- $-16\n"
-                          : "=&r" (ret)
-                          : "r" (p), "r" (1), "r" (0)
-                          : "cr0", "memory");
-    return ret;
-}
-#elif defined(__i386__)
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#elif defined(__x86_64__)
-static inline int testandset (int *p)
-{
-    long int readval = 0;
-
-    __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
-                          : "+m" (*p), "+a" (readval)
-                          : "r" (1)
-                          : "cc");
-    return readval;
-}
-#elif defined(__s390__)
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ ("0: cs    %0,%1,0(%2)\n"
-			  "   jl    0b"
-			  : "=&d" (ret)
-			  : "r" (1), "a" (p), "0" (*p)
-			  : "cc", "memory" );
-    return ret;
-}
-#elif defined(__alpha__)
-static inline int testandset (int *p)
-{
-    int ret;
-    unsigned long one;
-
-    __asm__ __volatile__ ("0:	mov 1,%2\n"
-			  "	ldl_l %0,%1\n"
-			  "	stl_c %2,%1\n"
-			  "	beq %2,1f\n"
-			  ".subsection 2\n"
-			  "1:	br 0b\n"
-			  ".previous"
-			  : "=r" (ret), "=m" (*p), "=r" (one)
-			  : "m" (*p));
-    return ret;
-}
-#elif defined(__sparc__)
-static inline int testandset (int *p)
-{
-	int ret;
-
-	__asm__ __volatile__("ldstub	[%1], %0"
-			     : "=r" (ret)
-			     : "r" (p)
-			     : "memory");
-
-	return (ret ? 1 : 0);
-}
-#elif defined(__arm__)
-static inline int testandset (int *spinlock)
-{
-    register unsigned int ret;
-    __asm__ __volatile__("swp %0, %1, [%2]"
-                         : "=r"(ret)
-                         : "0"(1), "r"(spinlock));
-
-    return ret;
-}
-#elif defined(__mc68000)
-static inline int testandset (int *p)
-{
-    char ret;
-    __asm__ __volatile__("tas %1; sne %0"
-                         : "=r" (ret)
-                         : "m" (p)
-                         : "cc","memory");
-    return ret;
-}
-#elif defined(__hppa__)
-
-/* Because malloc only guarantees 8-byte alignment for malloc'd data,
-   and GCC only guarantees 8-byte alignment for stack locals, we can't
-   be assured of 16-byte alignment for atomic lock data even if we
-   specify "__attribute ((aligned(16)))" in the type declaration.  So,
-   we use a struct containing an array of four ints for the atomic lock
-   type and dynamically select the 16-byte aligned int from the array
-   for the semaphore.  */
-#define __PA_LDCW_ALIGNMENT 16
-static inline void *ldcw_align (void *p) {
-    unsigned long a = (unsigned long)p;
-    a = (a + __PA_LDCW_ALIGNMENT - 1) & ~(__PA_LDCW_ALIGNMENT - 1);
-    return (void *)a;
-}
-
-static inline int testandset (spinlock_t *p)
-{
-    unsigned int ret;
-    p = ldcw_align(p);
-    __asm__ __volatile__("ldcw 0(%1),%0"
-                         : "=r" (ret)
-                         : "r" (p)
-                         : "memory" );
-    return !ret;
-}
-
-#elif defined(__ia64)
-
-#include <ia64intrin.h>
-
-static inline int testandset (int *p)
-{
-    return __sync_lock_test_and_set (p, 1);
-}
-#elif defined(__mips__)
-static inline int testandset (int *p)
-{
-    int ret;
-
-    __asm__ __volatile__ (
-	"	.set push		\n"
-	"	.set noat		\n"
-	"	.set mips2		\n"
-	"1:	li	$1, 1		\n"
-	"	ll	%0, %1		\n"
-	"	sc	$1, %1		\n"
-	"	beqz	$1, 1b		\n"
-	"	.set pop		"
-	: "=r" (ret), "+R" (*p)
-	:
-	: "memory");
-
-    return ret;
-}
-#else
-#error unimplemented CPU support
-#endif
-
-#if defined(CONFIG_USER_ONLY)
-static inline void spin_lock(spinlock_t *lock)
-{
-    while (testandset(lock));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-    resetlock(lock);
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return !testandset(lock);
-}
-#else
 static inline void spin_lock(spinlock_t *lock)
 {
 }
@@ -238,10 +46,4 @@
 {
 }
 
-static inline int spin_trylock(spinlock_t *lock)
-{
-    return 1;
-}
-#endif
-
 #endif
diff --git a/qemu-malloc.c b/qemu-malloc.c
index 36b0b36..8749fc8 100644
--- a/qemu-malloc.c
+++ b/qemu-malloc.c
@@ -24,16 +24,9 @@
 #include "qemu-common.h"
 #include <stdlib.h>
 
-static void *oom_check(void *ptr)
-{
-    if (ptr == NULL) {
-        abort();
-    }
-    return ptr;
-}
-
 void qemu_free(void *ptr)
 {
+    //trace_qemu_free(ptr);
     free(ptr);
 }
 
@@ -48,26 +41,35 @@
 
 void *qemu_malloc(size_t size)
 {
+    void *ptr;
     if (!size && !allow_zero_malloc()) {
         abort();
     }
-    return oom_check(malloc(size ? size : 1));
+    ptr = qemu_oom_check(malloc(size ? size : 1));
+    //trace_qemu_malloc(size, ptr);
+    return ptr;
 }
 
 void *qemu_realloc(void *ptr, size_t size)
 {
+    void *newptr;
     if (!size && !allow_zero_malloc()) {
         abort();
     }
-    return oom_check(realloc(ptr, size ? size : 1));
+    newptr = qemu_oom_check(realloc(ptr, size ? size : 1));
+    //trace_qemu_realloc(ptr, size, newptr);
+    return newptr;
 }
 
 void *qemu_mallocz(size_t size)
 {
+    void *ptr;
     if (!size && !allow_zero_malloc()) {
         abort();
     }
-    return oom_check(calloc(1, size ? size : 1));
+    ptr = qemu_oom_check(calloc(1, size ? size : 1));
+    //trace_qemu_malloc(size, ptr);
+    return ptr;
 }
 
 char *qemu_strdup(const char *str)
diff --git a/qemu-option.c b/qemu-option.c
index 1f8f41a..65db542 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -394,8 +394,8 @@
 /*
  * Parses a parameter string (param) into an option list (dest).
  *
- * list is the templace is. If dest is NULL, a new copy of list is created for
- * it. If list is NULL, this function fails.
+ * list is the template option list. If dest is NULL, a new copy of list is
+ * created. If list is NULL, this function fails.
  *
  * A parameter string consists of one or more parameters, separated by commas.
  * Each parameter consists of its name and possibly of a value. In the latter
@@ -416,20 +416,13 @@
     char value[256];
     char *param_delim, *value_delim;
     char next_delim;
-    size_t num_options;
 
     if (list == NULL) {
         return NULL;
     }
 
     if (dest == NULL) {
-        // Count valid options
-        num_options = count_option_parameters(list);
-
-        // Create a copy of the option list to fill in values
-        dest = qemu_mallocz((num_options + 1) * sizeof(QEMUOptionParameter));
-        allocated = dest;
-        memcpy(dest, list, (num_options + 1) * sizeof(QEMUOptionParameter));
+        dest = allocated = append_option_parameters(NULL, list);
     }
 
     while (*param) {
diff --git a/qemu-options.h b/qemu-options.h
new file mode 100644
index 0000000..d538f91
--- /dev/null
+++ b/qemu-options.h
@@ -0,0 +1,41 @@
+/*
+ * qemu-options.h
+ *
+ * Defines needed for command line argument processing.
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef _QEMU_OPTIONS_H_
+#define _QEMU_OPTIONS_H_
+
+enum {
+#define DEF(option, opt_arg, opt_enum, opt_help)     \
+    opt_enum,
+#define DEFHEADING(text)
+#include "qemu-options.def"
+#undef DEF
+#undef DEFHEADING
+#undef GEN_DOCS
+};
+
+#endif
diff --git a/qemu-os-posix.h b/qemu-os-posix.h
new file mode 100644
index 0000000..81fd9ab
--- /dev/null
+++ b/qemu-os-posix.h
@@ -0,0 +1,54 @@
+/*
+ * posix specific declarations
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OS_POSIX_H
+#define QEMU_OS_POSIX_H
+
+static inline void os_host_main_loop_wait(int *timeout)
+{
+}
+
+void os_set_line_buffering(void);
+void os_set_proc_name(const char *s);
+void os_setup_signal_handling(void);
+void os_daemonize(void);
+void os_setup_post(void);
+
+typedef struct timeval qemu_timeval;
+#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
+
+#ifndef CONFIG_UTIMENSAT
+#ifndef UTIME_NOW
+# define UTIME_NOW     ((1l << 30) - 1l)
+#endif
+#ifndef UTIME_OMIT
+# define UTIME_OMIT    ((1l << 30) - 2l)
+#endif
+#endif
+typedef struct timespec qemu_timespec;
+int qemu_utimensat(int dirfd, const char *path, const qemu_timespec *times,
+    int flags);
+
+#endif
diff --git a/qemu-os-win32.h b/qemu-os-win32.h
new file mode 100644
index 0000000..ed2753d
--- /dev/null
+++ b/qemu-os-win32.h
@@ -0,0 +1,66 @@
+/*
+ * win32 specific declarations
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OS_WIN32_H
+#define QEMU_OS_WIN32_H
+
+#include <windows.h>
+#include <winsock2.h>
+
+/* Polling handling */
+
+/* return TRUE if no sleep should be done afterwards */
+typedef int PollingFunc(void *opaque);
+
+int qemu_add_polling_cb(PollingFunc *func, void *opaque);
+void qemu_del_polling_cb(PollingFunc *func, void *opaque);
+
+/* Wait objects handling */
+typedef void WaitObjectFunc(void *opaque);
+
+int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+
+void os_host_main_loop_wait(int *timeout);
+
+static inline void os_setup_signal_handling(void) {}
+static inline void os_daemonize(void) {}
+static inline void os_setup_post(void) {}
+void os_set_line_buffering(void);
+static inline void os_set_proc_name(const char *dummy) {}
+
+#if !defined(EPROTONOSUPPORT)
+# define EPROTONOSUPPORT EINVAL
+#endif
+
+int setenv(const char *name, const char *value, int overwrite);
+
+typedef struct {
+    long tv_sec;
+    long tv_usec;
+} qemu_timeval;
+int qemu_gettimeofday(qemu_timeval *tp);
+
+#endif
diff --git a/qemu-sockets.c b/qemu-sockets.c
index c4c0f65..13c0b93 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -665,24 +665,28 @@
 int unix_listen_opts(QemuOpts *opts)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
+    errno = ENOTSUP;
     return -1;
 }
 
 int unix_connect_opts(QemuOpts *opts)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
+    errno = ENOTSUP;
     return -1;
 }
 
 int unix_listen(const char *path, char *ostr, int olen)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
+    errno = ENOTSUP;
     return -1;
 }
 
 int unix_connect(const char *path)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
+    errno = ENOTSUP;
     return -1;
 }
 
diff --git a/qemu-timer-common.c b/qemu-timer-common.c
new file mode 100644
index 0000000..755e300
--- /dev/null
+++ b/qemu-timer-common.c
@@ -0,0 +1,63 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-timer.h"
+
+/***********************************************************/
+/* real time host monotonic timer */
+
+#ifdef _WIN32
+
+int64_t clock_freq;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+    LARGE_INTEGER freq;
+    int ret;
+    ret = QueryPerformanceFrequency(&freq);
+    if (ret == 0) {
+        fprintf(stderr, "Could not calibrate ticks\n");
+        exit(1);
+    }
+    clock_freq = freq.QuadPart;
+}
+
+#else
+
+int use_rt_clock;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+    use_rt_clock = 0;
+#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
+    || defined(__DragonFly__) || defined(__FreeBSD_kernel__) \
+    || defined(__OpenBSD__)
+    {
+        struct timespec ts;
+        if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
+            use_rt_clock = 1;
+        }
+    }
+#endif
+}
+#endif
diff --git a/qemu-timer.c b/qemu-timer.c
index 82a5de3..4e51dd3 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -64,78 +64,6 @@
 static QEMUTimer *icount_rt_timer;
 static QEMUTimer *icount_vm_timer;
 
-
-/***********************************************************/
-/* real time host monotonic timer */
-
-
-static int64_t get_clock_realtime(void)
-{
-    struct timeval tv;
-
-    gettimeofday(&tv, NULL);
-    return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
-}
-
-#ifdef WIN32
-
-static int64_t clock_freq;
-
-static void init_get_clock(void)
-{
-    LARGE_INTEGER freq;
-    int ret;
-    ret = QueryPerformanceFrequency(&freq);
-    if (ret == 0) {
-        fprintf(stderr, "Could not calibrate ticks\n");
-        exit(1);
-    }
-    clock_freq = freq.QuadPart;
-}
-
-static int64_t get_clock(void)
-{
-    LARGE_INTEGER ti;
-    QueryPerformanceCounter(&ti);
-    return muldiv64(ti.QuadPart, get_ticks_per_sec(), clock_freq);
-}
-
-#else
-
-static int use_rt_clock;
-
-static void init_get_clock(void)
-{
-    use_rt_clock = 0;
-#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
-    || defined(__DragonFly__) || defined(__FreeBSD_kernel__)
-    {
-        struct timespec ts;
-        if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
-            use_rt_clock = 1;
-        }
-    }
-#endif
-}
-
-static int64_t get_clock(void)
-{
-#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
-	|| defined(__DragonFly__) || defined(__FreeBSD_kernel__)
-    if (use_rt_clock) {
-        struct timespec ts;
-        clock_gettime(CLOCK_MONOTONIC, &ts);
-        return ts.tv_sec * 1000000000LL + ts.tv_nsec;
-    } else
-#endif
-    {
-        /* XXX: using gettimeofday leads to problems if the date
-           changes, so it should be avoided. */
-        return get_clock_realtime();
-    }
-}
-#endif
-
 /***********************************************************/
 /* guest cycle counter */
 
@@ -259,12 +187,14 @@
 struct QEMUClock {
     int type;
     int enabled;
-    /* XXX: add frequency */
+
+    QEMUTimer *warp_timer;
 };
 
 struct QEMUTimer {
     QEMUClock *clock;
-    int64_t expire_time;
+    int64_t expire_time;	/* in nanoseconds */
+    int scale;
     QEMUTimerCB *cb;
     void *opaque;
     struct QEMUTimer *next;
@@ -275,14 +205,23 @@
     int (*start)(struct qemu_alarm_timer *t);
     void (*stop)(struct qemu_alarm_timer *t);
     void (*rearm)(struct qemu_alarm_timer *t);
-    void *priv;
-
+#if defined(__linux__)
+    int fd;
+    timer_t timer;
+#elif defined(_WIN32)
+    HANDLE timer;
+#endif
     char expired;
     char pending;
 };
 
 static struct qemu_alarm_timer *alarm_timer;
 
+static bool qemu_timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
+{
+    return timer_head && (timer_head->expire_time <= current_time);
+}
+
 int qemu_alarm_pending(void)
 {
     return alarm_timer->pending;
@@ -301,15 +240,14 @@
     t->rearm(t);
 }
 
-/* TODO: MIN_TIMER_REARM_US should be optimized */
-#define MIN_TIMER_REARM_US 250
+/* TODO: MIN_TIMER_REARM_NS should be optimized */
+#define MIN_TIMER_REARM_NS 250000
 
 #ifdef _WIN32
 
-struct qemu_alarm_win32 {
-    MMRESULT timerId;
-    unsigned int period;
-} alarm_win32_data = {0, 0};
+static int mm_start_timer(struct qemu_alarm_timer *t);
+static void mm_stop_timer(struct qemu_alarm_timer *t);
+static void mm_rearm_timer(struct qemu_alarm_timer *t);
 
 static int win32_start_timer(struct qemu_alarm_timer *t);
 static void win32_stop_timer(struct qemu_alarm_timer *t);
@@ -353,7 +291,7 @@
         return;
 
     cur_time = cpu_get_clock();
-    cur_icount = qemu_get_clock(vm_clock);
+    cur_icount = qemu_get_clock_ns(vm_clock);
     delta = cur_icount - cur_time;
     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
     if (delta > 0
@@ -375,14 +313,14 @@
 static void icount_adjust_rt(void * opaque)
 {
     qemu_mod_timer(icount_rt_timer,
-                   qemu_get_clock(rt_clock) + 1000);
+                   qemu_get_clock_ms(rt_clock) + 1000);
     icount_adjust();
 }
 
 static void icount_adjust_vm(void * opaque)
 {
     qemu_mod_timer(icount_vm_timer,
-                   qemu_get_clock(vm_clock) + get_ticks_per_sec() / 10);
+                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
     icount_adjust();
 }
 
@@ -395,24 +333,24 @@
 #ifndef _WIN32
 #ifdef __linux__
     /* HPET - if available - is preferred */
-    {"hpet", hpet_start_timer, hpet_stop_timer, NULL, NULL},
+    {"hpet", hpet_start_timer, hpet_stop_timer, NULL},
     /* ...otherwise try RTC */
-    {"rtc", rtc_start_timer, rtc_stop_timer, NULL, NULL},
+    {"rtc", rtc_start_timer, rtc_stop_timer, NULL},
 #endif
-    {"unix", unix_start_timer, unix_stop_timer, NULL, NULL},
+    {"unix", unix_start_timer, unix_stop_timer, NULL},
 #ifdef __linux__
     /* on Linux, the 'dynticks' clock sometimes doesn't work
      * properly. this results in the UI freezing while emulation
      * continues, for several seconds... So move it to the end
      * of the list. */
     {"dynticks", dynticks_start_timer,
-     dynticks_stop_timer, dynticks_rearm_timer, NULL},
+     dynticks_stop_timer, dynticks_rearm_timer},
 #endif
 #else
-    {"dynticks", win32_start_timer,
-     win32_stop_timer, win32_rearm_timer, &alarm_win32_data},
-    {"win32", win32_start_timer,
-     win32_stop_timer, NULL, &alarm_win32_data},
+    {"mmtimer", mm_start_timer, mm_stop_timer, NULL},
+    {"mmtimer2", mm_start_timer, mm_stop_timer, mm_rearm_timer},
+    {"dynticks", win32_start_timer, win32_stop_timer, win32_rearm_timer},
+    {"win32", win32_start_timer, win32_stop_timer, NULL},
 #endif
     {NULL, }
 };
@@ -503,7 +441,92 @@
     clock->enabled = enabled;
 }
 
-QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque)
+static int64_t vm_clock_warp_start;
+
+static void icount_warp_rt(void *opaque)
+{
+    if (vm_clock_warp_start == -1) {
+        return;
+    }
+
+    if (vm_running) {
+        int64_t clock = qemu_get_clock_ns(rt_clock);
+        int64_t warp_delta = clock - vm_clock_warp_start;
+        if (use_icount == 1) {
+            qemu_icount_bias += warp_delta;
+        } else {
+            /*
+             * In adaptive mode, do not let the vm_clock run too
+             * far ahead of real time.
+             */
+            int64_t cur_time = cpu_get_clock();
+            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
+            int64_t delta = cur_time - cur_icount;
+            qemu_icount_bias += MIN(warp_delta, delta);
+        }
+        if (qemu_timer_expired(active_timers[QEMU_CLOCK_VIRTUAL],
+                               qemu_get_clock_ns(vm_clock))) {
+            qemu_notify_event();
+        }
+    }
+    vm_clock_warp_start = -1;
+}
+
+void qemu_clock_warp(QEMUClock *clock)
+{
+    int64_t deadline;
+
+    if (!clock->warp_timer) {
+        return;
+    }
+
+    /*
+     * There are too many global variables to make the "warp" behavior
+     * applicable to other clocks.  But a clock argument removes the
+     * need for if statements all over the place.
+     */
+    assert(clock == vm_clock);
+
+    /*
+     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
+     * ensures that the deadline for the timer is computed correctly below.
+     * This also makes sure that the insn counter is synchronized before the
+     * CPU starts running, in case the CPU is woken by an event other than
+     * the earliest vm_clock timer.
+     */
+    icount_warp_rt(NULL);
+    if (qemu_cpu_has_work(cpu_single_env) || !active_timers[clock->type]) {
+        qemu_del_timer(clock->warp_timer);
+        return;
+    }
+
+    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
+    deadline = qemu_next_icount_deadline();
+    if (deadline > 0) {
+        /*
+         * Ensure the vm_clock proceeds even when the virtual CPU goes to
+         * sleep.  Otherwise, the CPU might be waiting for a future timer
+         * interrupt to wake it up, but the interrupt never comes because
+         * the vCPU isn't running any insns and thus doesn't advance the
+         * vm_clock.
+         *
+         * An extreme solution for this problem would be to never let VCPUs
+         * sleep in icount mode if there is a pending vm_clock timer; rather
+         * time could just advance to the next vm_clock event.  Instead, we
+         * do stop VCPUs and only advance vm_clock after some "real" time,
+         * (related to the time left until the next event) has passed.  This
+         * rt_clock timer will do this.  This avoids that the warps are too
+         * visible externally---for example, you will not be sending network
+         * packets continously instead of every 100ms.
+         */
+        qemu_mod_timer(clock->warp_timer, vm_clock_warp_start + deadline);
+    } else {
+        qemu_notify_event();
+    }
+}
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
+                          QEMUTimerCB *cb, void *opaque)
 {
     QEMUTimer *ts;
 
@@ -511,6 +534,7 @@
     ts->clock = clock;
     ts->cb = cb;
     ts->opaque = opaque;
+    ts->scale = scale;
     return ts;
 }
 
@@ -541,7 +565,7 @@
 
 /* modify the current timer so that it will be fired when current_time
    >= expire_time. The corresponding callback will be called. */
-void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time)
+static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time)
 {
     QEMUTimer **pt, *t;
 
@@ -553,10 +577,9 @@
     pt = &active_timers[ts->clock->type];
     for(;;) {
         t = *pt;
-        if (!t)
+        if (!qemu_timer_expired_ns(t, expire_time)) {
             break;
-        if (t->expire_time > expire_time)
-            break;
+        }
         pt = &t->next;
     }
     ts->expire_time = expire_time;
@@ -569,9 +592,18 @@
             qemu_rearm_alarm_timer(alarm_timer);
         }
         /* Interrupt execution to force deadline recalculation.  */
-        if (use_icount)
+        qemu_clock_warp(ts->clock);
+        if (use_icount) {
             qemu_notify_event();
     }
+    }
+}
+
+/* modify the current timer so that it will be fired when current_time
+   >= expire_time. The corresponding callback will be called. */
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time)
+{
+    qemu_mod_timer_ns(ts, expire_time * ts->scale);
 }
 
 int qemu_timer_pending(QEMUTimer *ts)
@@ -586,9 +618,7 @@
 
 int qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time)
 {
-    if (!timer_head)
-        return 0;
-    return (timer_head->expire_time <= current_time);
+    return qemu_timer_expired_ns(timer_head, current_time * timer_head->scale);
 }
 
 static void qemu_run_timers(QEMUClock *clock)
@@ -599,12 +629,13 @@
     if (!clock->enabled)
         return;
 
-    current_time = qemu_get_clock (clock);
+    current_time = qemu_get_clock_ns(clock);
     ptimer_head = &active_timers[clock->type];
     for(;;) {
         ts = *ptimer_head;
-        if (!ts || ts->expire_time > current_time)
+        if (!qemu_timer_expired_ns(ts, current_time)) {
             break;
+        }
         /* remove timer from the list before calling the callback */
         *ptimer_head = ts->next;
         ts->next = NULL;
@@ -650,7 +681,6 @@
 
 void init_clocks(void)
 {
-    init_get_clock();
     rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
     vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
     host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
@@ -677,7 +707,7 @@
 
     expire_time = qemu_get_be64(f);
     if (expire_time != -1) {
-        qemu_mod_timer(ts, expire_time);
+        qemu_mod_timer_ns(ts, expire_time);
     } else {
         qemu_del_timer(ts);
     }
@@ -705,6 +735,10 @@
     if (!option)
         return;
 
+#ifdef CONFIG_IOTHREAD
+    vm_clock->warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
+#endif
+
     if (strcmp(option, "auto") != 0) {
         icount_time_shift = strtol(option, NULL, 0);
         use_icount = 1;
@@ -722,12 +756,12 @@
        the virtual time trigger catches emulated time passing too fast.
        Realtime triggers occur even when idle, so use them less frequently
        than VM triggers.  */
-    icount_rt_timer = qemu_new_timer(rt_clock, icount_adjust_rt, NULL);
+    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
     qemu_mod_timer(icount_rt_timer,
-                   qemu_get_clock(rt_clock) + 1000);
-    icount_vm_timer = qemu_new_timer(vm_clock, icount_adjust_vm, NULL);
+                   qemu_get_clock_ms(rt_clock) + 1000);
+    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
     qemu_mod_timer(icount_vm_timer,
-                   qemu_get_clock(vm_clock) + get_ticks_per_sec() / 10);
+                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
 }
 
 void qemu_run_all_timers(void)
@@ -758,10 +792,11 @@
     return ret;
 }
 
+
+static int64_t qemu_next_alarm_deadline(void);
+
 #ifdef _WIN32
-static void CALLBACK host_alarm_handler(UINT uTimerID, UINT uMsg,
-                                        DWORD_PTR dwUser, DWORD_PTR dw1,
-                                        DWORD_PTR dw2)
+static void CALLBACK host_alarm_handler(PVOID lpParam, BOOLEAN unused)
 #else
 static void host_alarm_handler(int host_signum)
 #endif
@@ -776,7 +811,7 @@
         static int64_t delta_min = INT64_MAX;
         static int64_t delta_max, delta_cum, last_clock, delta, ti;
         static int count;
-        ti = qemu_get_clock(vm_clock);
+        ti = qemu_get_clock_ns(vm_clock);
         if (last_clock != 0) {
             delta = ti - last_clock;
             if (delta < delta_min)
@@ -800,14 +835,7 @@
     }
 #endif
     if (alarm_has_dynticks(t) ||
-        (!use_icount &&
-            qemu_timer_expired(active_timers[QEMU_CLOCK_VIRTUAL],
-                               qemu_get_clock(vm_clock))) ||
-        qemu_timer_expired(active_timers[QEMU_CLOCK_REALTIME],
-                           qemu_get_clock(rt_clock)) ||
-        qemu_timer_expired(active_timers[QEMU_CLOCK_HOST],
-                           qemu_get_clock(host_clock))) {
-
+        qemu_next_alarm_deadline () <= 0) {
         t->expired = alarm_has_dynticks(t);
         t->pending = 1;
         timer_alarm_pending = 1;
@@ -815,20 +843,15 @@
     }
 }
 
-int64_t qemu_next_deadline(void)
+int64_t qemu_next_icount_deadline(void)
 {
     /* To avoid problems with overflow limit this to 2^32.  */
     int64_t delta = INT32_MAX;
 
+    assert(use_icount);
     if (active_timers[QEMU_CLOCK_VIRTUAL]) {
         delta = active_timers[QEMU_CLOCK_VIRTUAL]->expire_time -
-                     qemu_get_clock(vm_clock);
-    }
-    if (active_timers[QEMU_CLOCK_HOST]) {
-        int64_t hdelta = active_timers[QEMU_CLOCK_HOST]->expire_time -
-                 qemu_get_clock(host_clock);
-        if (hdelta < delta)
-            delta = hdelta;
+                     qemu_get_clock_ns(vm_clock);
     }
 
     if (delta < 0)
@@ -837,35 +860,37 @@
     return delta;
 }
 
-#ifndef _WIN32
-
-#if defined(__linux__)
-
-#define RTC_FREQ 1024
-
-static uint64_t qemu_next_deadline_dyntick(void)
+static int64_t qemu_next_alarm_deadline(void)
 {
     int64_t delta;
     int64_t rtdelta;
 
-    if (use_icount)
+    if (!use_icount && active_timers[QEMU_CLOCK_VIRTUAL]) {
+        delta = active_timers[QEMU_CLOCK_VIRTUAL]->expire_time -
+                     qemu_get_clock_ns(vm_clock);
+    } else {
         delta = INT32_MAX;
-    else
-        delta = (qemu_next_deadline() + 999) / 1000;
-
+    }
+    if (active_timers[QEMU_CLOCK_HOST]) {
+        int64_t hdelta = active_timers[QEMU_CLOCK_HOST]->expire_time -
+                 qemu_get_clock_ns(host_clock);
+        if (hdelta < delta)
+            delta = hdelta;
+    }
     if (active_timers[QEMU_CLOCK_REALTIME]) {
         rtdelta = (active_timers[QEMU_CLOCK_REALTIME]->expire_time -
-                 qemu_get_clock(rt_clock))*1000;
+                 qemu_get_clock_ns(rt_clock));
         if (rtdelta < delta)
             delta = rtdelta;
     }
 
-    if (delta < MIN_TIMER_REARM_US)
-        delta = MIN_TIMER_REARM_US;
-
     return delta;
 }
 
+#if defined(__linux__)
+
+#define RTC_FREQ 1024
+
 static void enable_sigio_timer(int fd)
 {
     struct sigaction act;
@@ -914,7 +939,7 @@
         goto fail;
 
     enable_sigio_timer(fd);
-    t->priv = (void *)(long)fd;
+    t->fd = fd;
 
     return 0;
 fail:
@@ -924,7 +949,7 @@
 
 static void hpet_stop_timer(struct qemu_alarm_timer *t)
 {
-    int fd = (long)t->priv;
+    int fd = t->fd;
 
     close(fd);
 }
@@ -953,14 +978,14 @@
 
     enable_sigio_timer(rtc_fd);
 
-    t->priv = (void *)(long)rtc_fd;
+    t->fd = rtc_fd;
 
     return 0;
 }
 
 static void rtc_stop_timer(struct qemu_alarm_timer *t)
 {
-    int rtc_fd = (long)t->priv;
+    int rtc_fd = t->fd;
 
     close(rtc_fd);
 }
@@ -995,24 +1020,24 @@
         return -1;
     }
 
-    t->priv = (void *)(long)host_timer;
+    t->timer = host_timer;
 
     return 0;
 }
 
 static void dynticks_stop_timer(struct qemu_alarm_timer *t)
 {
-    timer_t host_timer = (timer_t)(long)t->priv;
+    timer_t host_timer = t->timer;
 
     timer_delete(host_timer);
 }
 
 static void dynticks_rearm_timer(struct qemu_alarm_timer *t)
 {
-    timer_t host_timer = (timer_t)(long)t->priv;
+    timer_t host_timer = t->timer;
     struct itimerspec timeout;
-    int64_t nearest_delta_us = INT64_MAX;
-    int64_t current_us;
+    int64_t nearest_delta_ns = INT64_MAX;
+    int64_t current_ns;
 
     assert(alarm_has_dynticks(t));
     if (!active_timers[QEMU_CLOCK_REALTIME] &&
@@ -1020,7 +1045,9 @@
         !active_timers[QEMU_CLOCK_HOST])
         return;
 
-    nearest_delta_us = qemu_next_deadline_dyntick();
+    nearest_delta_ns = qemu_next_alarm_deadline();
+    if (nearest_delta_ns < MIN_TIMER_REARM_NS)
+        nearest_delta_ns = MIN_TIMER_REARM_NS;
 
     /* check whether a timer is already running */
     if (timer_gettime(host_timer, &timeout)) {
@@ -1028,14 +1055,14 @@
         fprintf(stderr, "Internal timer error: aborting\n");
         exit(1);
     }
-    current_us = timeout.it_value.tv_sec * 1000000 + timeout.it_value.tv_nsec/1000;
-    if (current_us && current_us <= nearest_delta_us)
+    current_ns = timeout.it_value.tv_sec * 1000000000LL + timeout.it_value.tv_nsec;
+    if (current_ns && current_ns <= nearest_delta_ns)
         return;
 
     timeout.it_interval.tv_sec = 0;
     timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
-    timeout.it_value.tv_sec =  nearest_delta_us / 1000000;
-    timeout.it_value.tv_nsec = (nearest_delta_us % 1000000) * 1000;
+    timeout.it_value.tv_sec =  nearest_delta_ns / 1000000000;
+    timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000;
     if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) {
         perror("settime");
         fprintf(stderr, "Internal timer error: aborting\n");
@@ -1045,6 +1072,8 @@
 
 #endif /* defined(__linux__) */
 
+#if !defined(_WIN32)
+
 static int unix_start_timer(struct qemu_alarm_timer *t)
 {
     struct sigaction act;
@@ -1084,51 +1113,137 @@
 
 #ifdef _WIN32
 
-static int win32_start_timer(struct qemu_alarm_timer *t)
+static MMRESULT mm_timer;
+static unsigned mm_period;
+
+static void CALLBACK mm_alarm_handler(UINT uTimerID, UINT uMsg,
+                                      DWORD_PTR dwUser, DWORD_PTR dw1,
+                                      DWORD_PTR dw2)
+{
+    struct qemu_alarm_timer *t = alarm_timer;
+    if (!t) {
+        return;
+    }
+    if (alarm_has_dynticks(t) || qemu_next_alarm_deadline() <= 0) {
+        t->expired = alarm_has_dynticks(t);
+        t->pending = 1;
+        qemu_notify_event();
+    }
+}
+
+static int mm_start_timer(struct qemu_alarm_timer *t)
 {
     TIMECAPS tc;
-    struct qemu_alarm_win32 *data = t->priv;
     UINT flags;
 
     memset(&tc, 0, sizeof(tc));
     timeGetDevCaps(&tc, sizeof(tc));
 
-    data->period = tc.wPeriodMin;
-    timeBeginPeriod(data->period);
+    mm_period = tc.wPeriodMin;
+    timeBeginPeriod(mm_period);
 
     flags = TIME_CALLBACK_FUNCTION;
-    if (alarm_has_dynticks(t))
+    if (alarm_has_dynticks(t)) {
         flags |= TIME_ONESHOT;
-    else
+    } else {
         flags |= TIME_PERIODIC;
+    }
 
-    data->timerId = timeSetEvent(1,         // interval (ms)
-                        data->period,       // resolution
-                        host_alarm_handler, // function
-                        (DWORD)t,           // parameter
+    mm_timer = timeSetEvent(1,                  /* interval (ms) */
+                            mm_period,          /* resolution */
+                            mm_alarm_handler,   /* function */
+                            (DWORD_PTR)t,       /* parameter */
                         flags);
 
-    if (!data->timerId) {
+    if (!mm_timer) {
         fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
                 GetLastError());
-        timeEndPeriod(data->period);
+        timeEndPeriod(mm_period);
         return -1;
     }
 
     return 0;
 }
 
+static void mm_stop_timer(struct qemu_alarm_timer *t)
+{
+    timeKillEvent(mm_timer);
+    timeEndPeriod(mm_period);
+}
+
+static void mm_rearm_timer(struct qemu_alarm_timer *t)
+{
+    int nearest_delta_ms;
+
+    assert(alarm_has_dynticks(t));
+    if (!active_timers[QEMU_CLOCK_REALTIME] &&
+        !active_timers[QEMU_CLOCK_VIRTUAL] &&
+        !active_timers[QEMU_CLOCK_HOST]) {
+        return;
+    }
+
+    timeKillEvent(mm_timer);
+
+    nearest_delta_ms = (qemu_next_alarm_deadline() + 999999) / 1000000;
+    if (nearest_delta_ms < 1) {
+        nearest_delta_ms = 1;
+    }
+    mm_timer = timeSetEvent(nearest_delta_ms,
+                            mm_period,
+                            mm_alarm_handler,
+                            (DWORD_PTR)t,
+                            TIME_ONESHOT | TIME_CALLBACK_FUNCTION);
+
+    if (!mm_timer) {
+        fprintf(stderr, "Failed to re-arm win32 alarm timer %ld\n",
+                GetLastError());
+
+        timeEndPeriod(mm_period);
+        exit(1);
+    }
+}
+
+static int win32_start_timer(struct qemu_alarm_timer *t)
+{
+    HANDLE hTimer;
+    BOOLEAN success;
+
+    /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
+       is zero) that has already expired, the timer is not updated.  Since
+       creating a new timer is relatively expensive, set a bogus one-hour
+       interval in the dynticks case.  */
+    success = CreateTimerQueueTimer(&hTimer,
+                          NULL,
+                          host_alarm_handler,
+                          t,
+                          1,
+                          alarm_has_dynticks(t) ? 3600000 : 1,
+                          WT_EXECUTEINTIMERTHREAD);
+
+    if (!success) {
+        fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
+                GetLastError());
+        return -1;
+    }
+
+    t->timer = hTimer;
+    return 0;
+}
+
 static void win32_stop_timer(struct qemu_alarm_timer *t)
 {
-    struct qemu_alarm_win32 *data = t->priv;
+    HANDLE hTimer = t->timer;
 
-    timeKillEvent(data->timerId);
-    timeEndPeriod(data->period);
+    if (hTimer) {
+        DeleteTimerQueueTimer(NULL, hTimer, NULL);
+    }
 }
 
 static void win32_rearm_timer(struct qemu_alarm_timer *t)
 {
-    struct qemu_alarm_win32 *data = t->priv;
+    HANDLE hTimer = t->timer;
+    int nearest_delta_ms;
+    BOOLEAN success;
 
     assert(alarm_has_dynticks(t));
     if (!active_timers[QEMU_CLOCK_REALTIME] &&
@@ -1136,21 +1251,21 @@
         !active_timers[QEMU_CLOCK_HOST])
         return;
 
-    timeKillEvent(data->timerId);
-
-    data->timerId = timeSetEvent(1,
-                        data->period,
-                        host_alarm_handler,
-                        (DWORD)t,
-                        TIME_ONESHOT | TIME_CALLBACK_FUNCTION);
-
-    if (!data->timerId) {
-        fprintf(stderr, "Failed to re-arm win32 alarm timer %ld\n",
-                GetLastError());
-
-        timeEndPeriod(data->period);
-        exit(1);
+    nearest_delta_ms = (qemu_next_alarm_deadline() + 999999) / 1000000;
+    if (nearest_delta_ms < 1) {
+        nearest_delta_ms = 1;
     }
+    success = ChangeTimerQueueTimer(NULL,
+                                    hTimer,
+                                    nearest_delta_ms,
+                                    3600000);
+
+    if (!success) {
+        fprintf(stderr, "Failed to rearm win32 alarm timer: %ld\n",
+                GetLastError());
+        exit(-1);
+    }
+
 }
 
 #endif /* _WIN32 */
@@ -1236,7 +1351,7 @@
         } else {
             /* Wait for either IO to occur or the next
                timer event.  */
-            add = qemu_next_deadline();
+            add = qemu_next_icount_deadline();
             /* We advance the timer before checking for IO.
                Limit the amount we advance so that early IO
                activity won't get the guest too far ahead.  */
diff --git a/qemu-timer.h b/qemu-timer.h
index e175809..c84673a 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -2,9 +2,19 @@
 #define QEMU_TIMER_H
 
 #include "qemu-common.h"
+#include <time.h>
+#include <sys/time.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
 
 /* timers */
 
+#define SCALE_MS 1000000
+#define SCALE_US 1000
+#define SCALE_NS 1
+
 typedef struct QEMUClock QEMUClock;
 typedef void QEMUTimerCB(void *opaque);
 
@@ -29,8 +39,11 @@
 int64_t qemu_get_clock(QEMUClock *clock);
 int64_t qemu_get_clock_ns(QEMUClock *clock);
 void qemu_clock_enable(QEMUClock *clock, int enabled);
+void qemu_clock_warp(QEMUClock *clock);
 
-QEMUTimer *qemu_new_timer(QEMUClock *clock, QEMUTimerCB *cb, void *opaque);
+QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
+                          QEMUTimerCB *cb, void *opaque);
+
 void qemu_free_timer(QEMUTimer *ts);
 void qemu_del_timer(QEMUTimer *ts);
 void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
@@ -40,6 +53,7 @@
 
 void qemu_run_all_timers(void);
 int qemu_alarm_pending(void);
+int64_t qemu_next_icount_deadline(void);
 int64_t qemu_next_deadline(void);
 void configure_alarms(char const *opt);
 void configure_icount(const char *option);
@@ -48,11 +62,75 @@
 int init_timer_alarm(void);
 void quit_timers(void);
 
+int64_t cpu_get_ticks(void);
+void cpu_enable_ticks(void);
+void cpu_disable_ticks(void);
+
+static inline QEMUTimer *qemu_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
+                                           void *opaque)
+{
+    return qemu_new_timer(clock, SCALE_NS, cb, opaque);
+}
+
+static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
+                                           void *opaque)
+{
+    return qemu_new_timer(clock, SCALE_MS, cb, opaque);
+}
+
+static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
+{
+    return qemu_get_clock_ns(clock) / SCALE_MS;
+}
+
 static inline int64_t get_ticks_per_sec(void)
 {
     return 1000000000LL;
 }
 
+/* real time host monotonic timer */
+static inline int64_t get_clock_realtime(void)
+{
+    struct timeval tv;
+
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
+}
+
+/* Warning: don't insert tracepoints into these functions, they are
+   also used by simpletrace backend and tracepoints would cause
+   an infinite recursion! */
+#ifdef _WIN32
+extern int64_t clock_freq;
+
+static inline int64_t get_clock(void)
+{
+    LARGE_INTEGER ti;
+    QueryPerformanceCounter(&ti);
+    return muldiv64(ti.QuadPart, get_ticks_per_sec(), clock_freq);
+}
+
+#else
+
+extern int use_rt_clock;
+
+static inline int64_t get_clock(void)
+{
+#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
+    || defined(__DragonFly__) || defined(__FreeBSD_kernel__)
+    if (use_rt_clock) {
+        struct timespec ts;
+        clock_gettime(CLOCK_MONOTONIC, &ts);
+        return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+    } else
+#endif
+    {
+        /* XXX: using gettimeofday leads to problems if the date
+           changes, so it should be avoided. */
+        return get_clock_realtime();
+    }
+}
+#endif
 
 void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
 void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
diff --git a/qerror.c b/qerror.c
index 0af3ab3..4855604 100644
--- a/qerror.c
+++ b/qerror.c
@@ -101,6 +101,10 @@
         .desc      = "Device '%(device)' has no child bus",
     },
     {
+        .error_fmt = QERR_DEVICE_NO_HOTPLUG,
+        .desc      = "Device '%(device)' does not support hotplugging",
+    },
+    {
         .error_fmt = QERR_DUPLICATE_ID,
         .desc      = "Duplicate ID '%(id)' for %(object)",
     },
@@ -197,6 +201,11 @@
         .desc      = "An undefined error has ocurred",
     },
     {
+        .error_fmt = QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+        .desc      = "'%(device)' uses a %(format) feature which is not "
+                     "supported by this qemu version: %(feature)",
+    },
+    {
         .error_fmt = QERR_VNC_SERVER_FAILED,
         .desc      = "Could not start VNC server on %(target)",
     },
@@ -218,7 +227,8 @@
     return qerr;
 }
 
-static void qerror_abort(const QError *qerr, const char *fmt, ...)
+static void GCC_FMT_ATTR(2, 3) qerror_abort(const QError *qerr,
+                                            const char *fmt, ...)
 {
     va_list ap;
 
@@ -233,7 +243,8 @@
     abort();
 }
 
-static void qerror_set_data(QError *qerr, const char *fmt, va_list *va)
+static void GCC_FMT_ATTR(2, 0) qerror_set_data(QError *qerr,
+                                               const char *fmt, va_list *va)
 {
     QObject *obj;
 
diff --git a/qerror.h b/qerror.h
index 62802ea..df61d2c 100644
--- a/qerror.h
+++ b/qerror.h
@@ -34,12 +34,11 @@
 
 QError *qerror_new(void);
 QError *qerror_from_info(const char *file, int linenr, const char *func,
-                         const char *fmt, va_list *va);
+                         const char *fmt, va_list *va) GCC_FMT_ATTR(4, 0);
 QString *qerror_human(const QError *qerror);
 void qerror_print(QError *qerror);
 void qerror_report_internal(const char *file, int linenr, const char *func,
-                            const char *fmt, ...)
-    __attribute__ ((format(printf, 4, 5)));
+                            const char *fmt, ...) GCC_FMT_ATTR(4, 5);
 #define qerror_report(fmt, ...) \
     qerror_report_internal(__FILE__, __LINE__, __func__, fmt, ## __VA_ARGS__)
 QError *qobject_to_qerror(const QObject *obj);
@@ -91,6 +90,9 @@
 #define QERR_DEVICE_NO_BUS \
     "{ 'class': 'DeviceNoBus', 'data': { 'device': %s } }"
 
+#define QERR_DEVICE_NO_HOTPLUG \
+    "{ 'class': 'DeviceNoHotplug', 'data': { 'device': %s } }"
+
 #define QERR_DUPLICATE_ID \
     "{ 'class': 'DuplicateId', 'data': { 'id': %s, 'object': %s } }"
 
@@ -163,7 +165,13 @@
 #define QERR_UNDEFINED_ERROR \
     "{ 'class': 'UndefinedError', 'data': {} }"
 
+#define QERR_UNKNOWN_BLOCK_FORMAT_FEATURE \
+    "{ 'class': 'UnknownBlockFormatFeature', 'data': { 'device': %s, 'format': %s, 'feature': %s } }"
+
 #define QERR_VNC_SERVER_FAILED \
     "{ 'class': 'VNCServerFailed', 'data': { 'target': %s } }"
 
+#define QERR_FEATURE_DISABLED \
+    "{ 'class': 'FeatureDisabled', 'data': { 'name': %s } }"
+
 #endif /* QERROR_H */
diff --git a/qjson.c b/qjson.c
index e4ee433..f9c8e77 100644
--- a/qjson.c
+++ b/qjson.c
@@ -72,43 +72,57 @@
 
 typedef struct ToJsonIterState
 {
+    int indent;
+    int pretty;
     int count;
     QString *str;
 } ToJsonIterState;
 
-static void to_json(const QObject *obj, QString *str);
+static void to_json(const QObject *obj, QString *str, int pretty, int indent);
 
 static void to_json_dict_iter(const char *key, QObject *obj, void *opaque)
 {
     ToJsonIterState *s = opaque;
     QString *qkey;
+    int j;
 
-    if (s->count) {
+    if (s->count)
         qstring_append(s->str, ", ");
+
+    if (s->pretty) {
+        qstring_append(s->str, "\n");
+        for (j = 0 ; j < s->indent ; j++)
+            qstring_append(s->str, "    ");
     }
 
     qkey = qstring_from_str(key);
-    to_json(QOBJECT(qkey), s->str);
+    to_json(QOBJECT(qkey), s->str, s->pretty, s->indent);
     QDECREF(qkey);
 
     qstring_append(s->str, ": ");
-    to_json(obj, s->str);
+    to_json(obj, s->str, s->pretty, s->indent);
     s->count++;
 }
 
 static void to_json_list_iter(QObject *obj, void *opaque)
 {
     ToJsonIterState *s = opaque;
+    int j;
 
-    if (s->count) {
+    if (s->count)
         qstring_append(s->str, ", ");
+
+    if (s->pretty) {
+        qstring_append(s->str, "\n");
+        for (j = 0 ; j < s->indent ; j++)
+            qstring_append(s->str, "    ");
     }
 
-    to_json(obj, s->str);
+    to_json(obj, s->str, s->pretty, s->indent);
     s->count++;
 }
 
-static void to_json(const QObject *obj, QString *str)
+static void to_json(const QObject *obj, QString *str, int pretty, int indent)
 {
     switch (qobject_type(obj)) {
     case QTYPE_QINT: {
@@ -193,8 +207,16 @@
 
         s.count = 0;
         s.str = str;
+        s.indent = indent + 1;
+        s.pretty = pretty;
         qstring_append(str, "{");
         qdict_iter(val, to_json_dict_iter, &s);
+        if (pretty) {
+            int j;
+            qstring_append(str, "\n");
+            for (j = 0 ; j < indent ; j++)
+                qstring_append(str, "    ");
+        }
         qstring_append(str, "}");
         break;
     }
@@ -204,8 +226,16 @@
 
         s.count = 0;
         s.str = str;
+        s.indent = indent + 1;
+        s.pretty = pretty;
         qstring_append(str, "[");
         qlist_iter(val, (void *)to_json_list_iter, &s);
+        if (pretty) {
+            int j;
+            qstring_append(str, "\n");
+            for (j = 0 ; j < indent ; j++)
+                qstring_append(str, "    ");
+        }
         qstring_append(str, "]");
         break;
     }
@@ -249,7 +279,16 @@
 {
     QString *str = qstring_new();
 
-    to_json(obj, str);
+    to_json(obj, str, 0, 0);
+
+    return str;
+}
+
+QString *qobject_to_json_pretty(const QObject *obj)
+{
+    QString *str = qstring_new();
+
+    to_json(obj, str, 1, 0);
 
     return str;
 }
diff --git a/qjson.h b/qjson.h
index 7afec2e..65b10ea 100644
--- a/qjson.h
+++ b/qjson.h
@@ -18,11 +18,11 @@
 #include "qobject.h"
 #include "qstring.h"
 
-QObject *qobject_from_json(const char *string);
-QObject *qobject_from_jsonf(const char *string, ...)
-    __attribute__((__format__ (__printf__, 1, 2)));
-QObject *qobject_from_jsonv(const char *string, va_list *ap);
+QObject *qobject_from_json(const char *string) GCC_FMT_ATTR(1, 0);
+QObject *qobject_from_jsonf(const char *string, ...) GCC_FMT_ATTR(1, 2);
+QObject *qobject_from_jsonv(const char *string, va_list *ap) GCC_FMT_ATTR(1, 0);
 
 QString *qobject_to_json(const QObject *obj);
+QString *qobject_to_json_pretty(const QObject *obj);
 
 #endif /* QJSON_H */
diff --git a/savevm.c b/savevm.c
index bbf07c5..5da7a8c 100644
--- a/savevm.c
+++ b/savevm.c
@@ -78,7 +78,6 @@
 #include "qemu-timer.h"
 #include "qemu-char.h"
 #include "blockdev.h"
-#include "outputchannel.h"
 #include "block.h"
 #include "audio/audio.h"
 #include "migration.h"
@@ -89,25 +88,37 @@
 
 
 #define SELF_ANNOUNCE_ROUNDS 5
-#define ETH_P_EXPERIMENTAL 0x01F1 /* just a number */
-//#define ETH_P_EXPERIMENTAL 0x0012 /* make it the size of the packet */
-#define EXPERIMENTAL_MAGIC 0xf1f23f4f
 
-static int announce_self_create(uint8_t *buf, 
+#ifndef ETH_P_RARP
+#define ETH_P_RARP 0x8035
+#endif
+#define ARP_HTYPE_ETH 0x0001
+#define ARP_PTYPE_IP 0x0800
+#define ARP_OP_REQUEST_REV 0x3
+
+static int announce_self_create(uint8_t *buf,
 				uint8_t *mac_addr)
 {
-    uint32_t magic = EXPERIMENTAL_MAGIC;
-    uint16_t proto = htons(ETH_P_EXPERIMENTAL);
+    /* Ethernet header. */
+    memset(buf, 0xff, 6);         /* destination MAC addr */
+    memcpy(buf + 6, mac_addr, 6); /* source MAC addr */
+    *(uint16_t *)(buf + 12) = htons(ETH_P_RARP); /* ethertype */
 
-    /* FIXME: should we send a different packet (arp/rarp/ping)? */
+    /* RARP header. */
+    *(uint16_t *)(buf + 14) = htons(ARP_HTYPE_ETH); /* hardware addr space */
+    *(uint16_t *)(buf + 16) = htons(ARP_PTYPE_IP); /* protocol addr space */
+    *(buf + 18) = 6; /* hardware addr length (ethernet) */
+    *(buf + 19) = 4; /* protocol addr length (IPv4) */
+    *(uint16_t *)(buf + 20) = htons(ARP_OP_REQUEST_REV); /* opcode */
+    memcpy(buf + 22, mac_addr, 6); /* source hw addr */
+    memset(buf + 28, 0x00, 4);     /* source protocol addr */
+    memcpy(buf + 32, mac_addr, 6); /* target hw addr */
+    memset(buf + 38, 0x00, 4);     /* target protocol addr */
 
-    memset(buf, 0, 64);
-    memset(buf, 0xff, 6);         /* h_dst */
-    memcpy(buf + 6, mac_addr, 6); /* h_src */
-    memcpy(buf + 12, &proto, 2);  /* h_proto */
-    memcpy(buf + 14, &magic, 4);  /* magic */
+    /* Padding to get up to 60 bytes (ethernet min packet size, minus FCS). */
+    memset(buf + 42, 0x00, 18);
 
-    return 64; /* len */
+    return 60; /* len (FCS will be added by hardware) */
 }
 
 static void qemu_announce_self_once(void *opaque)
@@ -128,8 +139,10 @@
             vc->receive(vc, buf, len);
         }
     }
-    if (count--) {
-	    qemu_mod_timer(timer, qemu_get_clock(rt_clock) + 100);
+    if (--count) {
+        /* delay 50ms, 150ms, 250ms, ... */
+        qemu_mod_timer(timer, qemu_get_clock_ms(rt_clock) +
+                       50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100);
     } else {
 	    qemu_del_timer(timer);
 	    qemu_free_timer(timer);
@@ -139,7 +152,7 @@
 void qemu_announce_self(void)
 {
 	static QEMUTimer *timer;
-	timer = qemu_new_timer(rt_clock, qemu_announce_self_once, &timer);
+	timer = qemu_new_timer_ms(rt_clock, qemu_announce_self_once, &timer);
 	qemu_announce_self_once(&timer);
 }
 
@@ -167,11 +180,11 @@
     int has_error;
 };
 
-typedef struct QEMUFilePopen
+typedef struct QEMUFileStdio
 {
-    FILE *popen_file;
+    FILE *stdio_file;
     QEMUFile *file;
-} QEMUFilePopen;
+} QEMUFileStdio;
 
 typedef struct QEMUFileSocket
 {
@@ -179,7 +192,7 @@
     QEMUFile *file;
 } QEMUFileSocket;
 
-static int file_socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
+static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
 {
     QEMUFileSocket *s = opaque;
     ssize_t len;
@@ -201,16 +214,16 @@
     return 0;
 }
 
-static int popen_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
+static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
 {
-    QEMUFilePopen *s = opaque;
-    return fwrite(buf, 1, size, s->popen_file);
+    QEMUFileStdio *s = opaque;
+    return fwrite(buf, 1, size, s->stdio_file);
 }
 
-static int popen_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
+static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
 {
-    QEMUFilePopen *s = opaque;
-    FILE *fp = s->popen_file;
+    QEMUFileStdio *s = opaque;
+    FILE *fp = s->stdio_file;
     int bytes;
 
     do {
@@ -220,31 +233,42 @@
     return bytes;
 }
 
-static int popen_close(void *opaque)
+static int stdio_pclose(void *opaque)
 {
-    QEMUFilePopen *s = opaque;
-    pclose(s->popen_file);
+    QEMUFileStdio *s = opaque;
+    int ret;
+    ret = pclose(s->stdio_file);
+    qemu_free(s);
+    return ret;
+}
+
+static int stdio_fclose(void *opaque)
+{
+    QEMUFileStdio *s = opaque;
+    fclose(s->stdio_file);
     qemu_free(s);
     return 0;
 }
 
-QEMUFile *qemu_popen(FILE *popen_file, const char *mode)
+QEMUFile *qemu_popen(FILE *stdio_file, const char *mode)
 {
-    QEMUFilePopen *s;
+    QEMUFileStdio *s;
 
-    if (popen_file == NULL || mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) {
+    if (stdio_file == NULL || mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) {
         fprintf(stderr, "qemu_popen: Argument validity check failed\n");
         return NULL;
     }
 
-    s = qemu_mallocz(sizeof(QEMUFilePopen));
+    s = qemu_mallocz(sizeof(QEMUFileStdio));
 
-    s->popen_file = popen_file;
+    s->stdio_file = stdio_file;
 
     if(mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, NULL, popen_get_buffer, popen_close, NULL, NULL, NULL);
+        s->file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_pclose,
+				 NULL, NULL, NULL);
     } else {
-        s->file = qemu_fopen_ops(s, popen_put_buffer, NULL, popen_close, NULL, NULL, NULL);
+        s->file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_pclose,
+				 NULL, NULL, NULL);
     }
     return s->file;
 }
@@ -261,93 +285,112 @@
     return qemu_popen(popen_file, mode);
 }
 
-int qemu_popen_fd(QEMUFile *f)
+int qemu_stdio_fd(QEMUFile *f)
 {
-    QEMUFilePopen *p;
+    QEMUFileStdio *p;
     int fd;
 
-    p = (QEMUFilePopen *)f->opaque;
-    fd = fileno(p->popen_file);
+    p = (QEMUFileStdio *)f->opaque;
+    fd = fileno(p->stdio_file);
 
     return fd;
 }
 
+QEMUFile *qemu_fdopen(int fd, const char *mode)
+{
+    QEMUFileStdio *s;
+
+    if (mode == NULL ||
+	(mode[0] != 'r' && mode[0] != 'w') ||
+	mode[1] != 'b' || mode[2] != 0) {
+        fprintf(stderr, "qemu_fdopen: Argument validity check failed\n");
+        return NULL;
+    }
+
+    s = qemu_mallocz(sizeof(QEMUFileStdio));
+    s->stdio_file = fdopen(fd, mode);
+    if (!s->stdio_file)
+        goto fail;
+
+    if(mode[0] == 'r') {
+        s->file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_fclose,
+				 NULL, NULL, NULL);
+    } else {
+        s->file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_fclose,
+				 NULL, NULL, NULL);
+    }
+    return s->file;
+
+fail:
+    qemu_free(s);
+    return NULL;
+}
+
 QEMUFile *qemu_fopen_socket(int fd)
 {
     QEMUFileSocket *s = qemu_mallocz(sizeof(QEMUFileSocket));
 
     s->fd = fd;
-    s->file = qemu_fopen_ops(s, NULL, file_socket_get_buffer, file_socket_close, NULL, NULL, NULL);
+    s->file = qemu_fopen_ops(s, NULL, socket_get_buffer, file_socket_close,
+			     NULL, NULL, NULL);
     return s->file;
 }
 
-typedef struct QEMUFileStdio
-{
-    FILE *outfile;
-} QEMUFileStdio;
-
 static int file_put_buffer(void *opaque, const uint8_t *buf,
                             int64_t pos, int size)
 {
     QEMUFileStdio *s = opaque;
-    fseek(s->outfile, pos, SEEK_SET);
-    fwrite(buf, 1, size, s->outfile);
-    return size;
+    fseek(s->stdio_file, pos, SEEK_SET);
+    return fwrite(buf, 1, size, s->stdio_file);
 }
 
 static int file_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
 {
     QEMUFileStdio *s = opaque;
-    fseek(s->outfile, pos, SEEK_SET);
-    return fread(buf, 1, size, s->outfile);
-}
-
-static int file_close(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-    fclose(s->outfile);
-    qemu_free(s);
-    return 0;
+    fseek(s->stdio_file, pos, SEEK_SET);
+    return fread(buf, 1, size, s->stdio_file);
 }
 
 QEMUFile *qemu_fopen(const char *filename, const char *mode)
 {
     QEMUFileStdio *s;
 
+    if (mode == NULL ||
+	(mode[0] != 'r' && mode[0] != 'w') ||
+	mode[1] != 'b' || mode[2] != 0) {
+        fprintf(stderr, "qemu_fopen: Argument validity check failed\n");
+        return NULL;
+    }
+
     s = qemu_mallocz(sizeof(QEMUFileStdio));
 
-    s->outfile = fopen(filename, mode);
-    if (!s->outfile)
+    s->stdio_file = fopen(filename, mode);
+    if (!s->stdio_file)
         goto fail;
 
-    if (!strcmp(mode, "wb"))
-        return qemu_fopen_ops(s, file_put_buffer, NULL, file_close, NULL, NULL, NULL);
-    else if (!strcmp(mode, "rb"))
-        return qemu_fopen_ops(s, NULL, file_get_buffer, file_close, NULL, NULL, NULL);
-
+    if(mode[0] == 'w') {
+        s->file = qemu_fopen_ops(s, file_put_buffer, NULL, stdio_fclose,
+				 NULL, NULL, NULL);
+    } else {
+        s->file = qemu_fopen_ops(s, NULL, file_get_buffer, stdio_fclose,
+			       NULL, NULL, NULL);
+    }
+    return s->file;
 fail:
-    if (s->outfile)
-        fclose(s->outfile);
     qemu_free(s);
     return NULL;
 }
 
-typedef struct QEMUFileBdrv
-{
-    BlockDriverState *bs;
-    int64_t base_offset;
-} QEMUFileBdrv;
-
 static int block_put_buffer(void *opaque, const uint8_t *buf,
                            int64_t pos, int size)
 {
-    bdrv_save_vmstate(((QEMUFileBdrv*)opaque)->bs, buf, pos, size);
+    bdrv_save_vmstate(opaque, buf, pos, size);
     return size;
 }
 
 static int block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
 {
-    return bdrv_load_vmstate(((QEMUFileBdrv*)opaque)->bs, buf, pos, size);
+    return bdrv_load_vmstate(opaque, buf, pos, size);
 }
 
 static int bdrv_fclose(void *opaque)
@@ -355,19 +398,12 @@
     return 0;
 }
 
-static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int64_t offset, int is_writable)
+static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
 {
-    QEMUFileBdrv *s;
-
-    s = qemu_mallocz(sizeof(QEMUFileBdrv));
-
-    s->bs = bs;
-    s->base_offset = offset;
-
     if (is_writable)
-        return qemu_fopen_ops(s, block_put_buffer, NULL, bdrv_fclose, NULL, NULL, NULL);
-
-    return qemu_fopen_ops(s, NULL, block_get_buffer, bdrv_fclose, NULL, NULL, NULL);
+        return qemu_fopen_ops(bs, block_put_buffer, NULL, bdrv_fclose,
+			      NULL, NULL, NULL);
+    return qemu_fopen_ops(bs, NULL, block_get_buffer, bdrv_fclose, NULL, NULL, NULL);
 }
 
 QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer,
@@ -387,6 +423,7 @@
     f->close = close;
     f->rate_limit = rate_limit;
     f->set_rate_limit = set_rate_limit;
+    f->get_rate_limit = get_rate_limit;
     f->is_write = 0;
 
     return f;
@@ -564,9 +601,19 @@
     return 0;
 }
 
-size_t qemu_file_set_rate_limit(QEMUFile *f, size_t new_rate)
+int64_t qemu_file_get_rate_limit(QEMUFile *f)
 {
-    if (f->set_rate_limit)
+    if (f->get_rate_limit)
+        return f->get_rate_limit(f->opaque);
+
+    return 0;
+}
+
+int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate)
+{
+    /* any failed or completed migration keeps its state to allow probing of
+     * migration data, but has no associated file anymore */
+    if (f && f->set_rate_limit)
         return f->set_rate_limit(f->opaque, new_rate);
 
     return 0;
@@ -1151,21 +1198,7 @@
     return ret;
 }
 
-static int
-monitor_output_channel_cb(void* opaque, const char* fmt, va_list ap)
-{
-    return monitor_vprintf((Monitor*) opaque, fmt, ap);
-}
-
-
-void do_savevm(Monitor *mon, const char *name)
-{
-    OutputChannel *oc = output_channel_alloc(mon, monitor_output_channel_cb);
-    do_savevm_oc(oc, name);
-    output_channel_free(oc);
-}
-
-void do_savevm_oc(OutputChannel *err, const char *name)
+void do_savevm(Monitor *err, const char *name)
 {
     BlockDriverState *bs, *bs1;
     QEMUSnapshotInfo sn1, *sn = &sn1, old_sn1, *old_sn = &old_sn1;
@@ -1182,7 +1215,7 @@
 
     bs = bdrv_snapshots();
     if (!bs) {
-        output_channel_printf(err, "No block device can accept snapshots\n");
+        monitor_printf(err, "No block device can accept snapshots\n");
         return;
     }
 
@@ -1218,25 +1251,25 @@
     sn->date_sec = tv.tv_sec;
     sn->date_nsec = tv.tv_usec * 1000;
 #endif
-    sn->vm_clock_nsec = qemu_get_clock(vm_clock);
+    sn->vm_clock_nsec = qemu_get_clock_ns(vm_clock);
 
     if (bdrv_get_info(bs, bdi) < 0 || bdi->vm_state_offset <= 0) {
-        output_channel_printf(err, "Device %s does not support VM state snapshots\n",
+        monitor_printf(err, "Device %s does not support VM state snapshots\n",
                               bdrv_get_device_name(bs));
         goto the_end;
     }
 
     /* save the VM state */
-    f = qemu_fopen_bdrv(bs, bdi->vm_state_offset, 1);
+    f = qemu_fopen_bdrv(bs, 1);
     if (!f) {
-        output_channel_printf(err, "Could not open VM state file\n");
+        monitor_printf(err, "Could not open VM state file\n");
         goto the_end;
     }
     ret = qemu_savevm_state(f);
     vm_state_size = qemu_ftell(f);
     qemu_fclose(f);
     if (ret < 0) {
-        output_channel_printf(err, "Error %d while writing VM\n", ret);
+        monitor_printf(err, "Error %d while writing VM\n", ret);
         goto the_end;
     }
 
@@ -1248,7 +1281,7 @@
             if (must_delete) {
                 ret = bdrv_snapshot_delete(bs1, old_sn->id_str);
                 if (ret < 0) {
-                    output_channel_printf(err,
+                    monitor_printf(err,
                                           "Error while deleting snapshot on '%s'\n",
                                           bdrv_get_device_name(bs1));
                 }
@@ -1257,7 +1290,7 @@
             sn->vm_state_size = (bs == bs1 ? vm_state_size : 0);
             ret = bdrv_snapshot_create(bs1, sn);
             if (ret < 0) {
-                output_channel_printf(err, "Error while creating snapshot on '%s'\n",
+                monitor_printf(err, "Error while creating snapshot on '%s'\n",
                                       bdrv_get_device_name(bs1));
             }
         }
@@ -1268,15 +1301,7 @@
         vm_start();
 }
 
-void do_loadvm(Monitor *mon, const char *name)
-{
-    OutputChannel *oc = output_channel_alloc(mon, monitor_output_channel_cb);
-    android_snapshot_update_time_request = 1;
-    do_loadvm_oc(oc, name);
-    output_channel_free(oc);
-}
-
-void do_loadvm_oc(OutputChannel *err, const char *name)
+void do_loadvm(Monitor *err, const char *name)
 {
     BlockDriverState *bs, *bs1;
     BlockDriverInfo bdi1, *bdi = &bdi1;
@@ -1287,7 +1312,7 @@
 
     bs = bdrv_snapshots();
     if (!bs) {
-        output_channel_printf(err, "No block device supports snapshots\n");
+        monitor_printf(err, "No block device supports snapshots\n");
         return;
     }
 
@@ -1303,20 +1328,20 @@
             ret = bdrv_snapshot_goto(bs1, name);
             if (ret < 0) {
                 if (bs != bs1)
-                    output_channel_printf(err, "Warning: ");
+                    monitor_printf(err, "Warning: ");
                 switch(ret) {
                 case -ENOTSUP:
-                    output_channel_printf(err,
+                    monitor_printf(err,
                                    "Snapshots not supported on device '%s'\n",
                                    bdrv_get_device_name(bs1));
                     break;
                 case -ENOENT:
-                    output_channel_printf(err, "Could not find snapshot '%s' on "
+                    monitor_printf(err, "Could not find snapshot '%s' on "
                                    "device '%s'\n",
                                    name, bdrv_get_device_name(bs1));
                     break;
                 default:
-                    output_channel_printf(err, "Error %d while activating snapshot on"
+                    monitor_printf(err, "Error %d while activating snapshot on"
                                    " '%s'\n", ret, bdrv_get_device_name(bs1));
                     break;
                 }
@@ -1328,7 +1353,7 @@
     }
 
     if (bdrv_get_info(bs, bdi) < 0 || bdi->vm_state_offset <= 0) {
-        output_channel_printf(err, "Device %s does not support VM state snapshots\n",
+        monitor_printf(err, "Device %s does not support VM state snapshots\n",
                        bdrv_get_device_name(bs));
         return;
     }
@@ -1339,35 +1364,29 @@
         goto the_end;
 
     /* restore the VM state */
-    f = qemu_fopen_bdrv(bs, bdi->vm_state_offset, 0);
+    f = qemu_fopen_bdrv(bs, 0);
     if (!f) {
-        output_channel_printf(err, "Could not open VM state file\n");
+        monitor_printf(err, "Could not open VM state file\n");
         goto the_end;
     }
     ret = qemu_loadvm_state(f);
     qemu_fclose(f);
     if (ret < 0) {
-        output_channel_printf(err, "Error %d while loading VM state\n", ret);
+        monitor_printf(err, "Error %d while loading VM state\n", ret);
     }
  the_end:
     if (saved_vm_running)
         vm_start();
 }
 
-void do_delvm(Monitor *mon, const char *name)
-{
-    OutputChannel *oc = output_channel_alloc(mon, monitor_output_channel_cb);
-    do_delvm_oc(oc, name);
-    output_channel_free(oc);
-}
-void do_delvm_oc(OutputChannel *err, const char *name)
+void do_delvm(Monitor *err, const char *name)
 {
     BlockDriverState *bs, *bs1;
     int ret;
 
     bs = bdrv_snapshots();
     if (!bs) {
-        output_channel_printf(err, "No block device supports snapshots\n");
+        monitor_printf(err, "No block device supports snapshots\n");
         return;
     }
 
@@ -1377,25 +1396,18 @@
             ret = bdrv_snapshot_delete(bs1, name);
             if (ret < 0) {
                 if (ret == -ENOTSUP)
-                    output_channel_printf(err,
+                    monitor_printf(err,
                                           "Snapshots not supported on device '%s'\n",
                                           bdrv_get_device_name(bs1));
                 else
-                    output_channel_printf(err, "Error %d while deleting snapshot on "
+                    monitor_printf(err, "Error %d while deleting snapshot on "
                                           "'%s'\n", ret, bdrv_get_device_name(bs1));
             }
         }
     }
 }
 
-void do_info_snapshots(Monitor *mon)
-{
-    OutputChannel *oc = output_channel_alloc(mon, monitor_output_channel_cb);
-    do_info_snapshots_oc(oc, oc);
-    output_channel_free(oc);
-}
-
-void do_info_snapshots_oc(OutputChannel *out, OutputChannel *err)
+void do_info_snapshots(Monitor* out, Monitor* err)
 {
     BlockDriverState *bs, *bs1;
     QEMUSnapshotInfo *sn_tab, *sn;
@@ -1404,30 +1416,30 @@
 
     bs = bdrv_snapshots();
     if (!bs) {
-        output_channel_printf(err, "No available block device supports snapshots\n");
+        monitor_printf(err, "No available block device supports snapshots\n");
         return;
     }
-    output_channel_printf(out, "Snapshot devices:");
+    monitor_printf(out, "Snapshot devices:");
     bs1 = NULL;
     while ((bs1 = bdrv_next(bs1))) {
         if (bdrv_can_snapshot(bs1)) {
             if (bs == bs1)
-                output_channel_printf(out, " %s", bdrv_get_device_name(bs1));
+                monitor_printf(out, " %s", bdrv_get_device_name(bs1));
         }
     }
-    output_channel_printf(out, "\n");
+    monitor_printf(out, "\n");
 
     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
     if (nb_sns < 0) {
-        output_channel_printf(err, "bdrv_snapshot_list: error %d\n", nb_sns);
+        monitor_printf(err, "bdrv_snapshot_list: error %d\n", nb_sns);
         return;
     }
-    output_channel_printf(out, "Snapshot list (from %s):\n",
+    monitor_printf(out, "Snapshot list (from %s):\n",
                    bdrv_get_device_name(bs));
-    output_channel_printf(out, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL));
+    monitor_printf(out, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL));
     for(i = 0; i < nb_sns; i++) {
         sn = &sn_tab[i];
-        output_channel_printf(out, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), sn));
+        monitor_printf(out, "%s\n", bdrv_snapshot_dump(buf, sizeof(buf), sn));
     }
     qemu_free(sn_tab);
 }
diff --git a/shaper.c b/shaper.c
index a522919..072bd85 100644
--- a/shaper.c
+++ b/shaper.c
@@ -137,7 +137,7 @@
     QueuedPacket  packet;
 
     while ((packet = shaper->packets) != NULL) {
-        int64_t   now = qemu_get_clock( SHAPER_CLOCK );
+        int64_t   now = qemu_get_clock_ms( SHAPER_CLOCK );
 
        if (packet->expiration > now)
            break;
@@ -167,9 +167,9 @@
     shaper->active = 0;
     shaper->packets = NULL;
     shaper->num_packets = 0;
-    shaper->timer   = qemu_new_timer( SHAPER_CLOCK,
-                                      (QEMUTimerCB*) netshaper_expires,
-                                      shaper );
+    shaper->timer   = qemu_new_timer_ms( SHAPER_CLOCK,
+                                         (QEMUTimerCB*) netshaper_expires,
+                                         shaper );
     shaper->send_func = send_func;
     shaper->max_rate  = 1e6;
     shaper->inv_rate  = 0.;
@@ -216,7 +216,7 @@
         return;
     }
 
-    now = qemu_get_clock( SHAPER_CLOCK );
+    now = qemu_get_clock_ms( SHAPER_CLOCK );
     if (now >= shaper->block_until) {
         shaper->send_func( data, size, opaque );
         shaper->block_until = now + size*shaper->inv_rate;
@@ -274,7 +274,7 @@
     if (shaper->packets)
         return 0;
 
-    now = qemu_get_clock( SHAPER_CLOCK );
+    now = qemu_get_clock_ms( SHAPER_CLOCK );
     return (now >= shaper->block_until);
 }
 
@@ -424,7 +424,7 @@
 netdelay_expires( NetDelay  delay )
 {
     Session  session;
-    int64_t  now = qemu_get_clock( SHAPER_CLOCK );
+    int64_t  now = qemu_get_clock_ms( SHAPER_CLOCK );
     int      rearm = 0;
     int64_t  rearm_time = 0;
 
@@ -463,9 +463,9 @@
 
     delay->sessions     = NULL;
     delay->num_sessions = 0;
-    delay->timer        = qemu_new_timer( SHAPER_CLOCK,
-                                          (QEMUTimerCB*) netdelay_expires,
-                                          delay );
+    delay->timer        = qemu_new_timer_ms( SHAPER_CLOCK,
+                                             (QEMUTimerCB*) netdelay_expires,
+                                             delay );
     delay->active = 0;
     delay->min_ms = 0;
     delay->max_ms = 0;
@@ -553,7 +553,7 @@
                 delay->sessions      = session;
                 delay->num_sessions += 1;
 
-                session->expiration = qemu_get_clock( SHAPER_CLOCK ) + latency;
+                session->expiration = qemu_get_clock_ms( SHAPER_CLOCK ) + latency;
 
                 session->src_ip   = info->src_ip;
                 session->dst_ip   = info->dst_ip;
diff --git a/sysemu.h b/sysemu.h
index 0180dc0..27a3e0e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -8,7 +8,6 @@
 #include "qemu-timer.h"
 #include "qdict.h"
 #include "qerror.h"
-#include "outputchannel.h"
 
 #ifdef _WIN32
 #include <windows.h>
@@ -51,6 +50,7 @@
 int qemu_shutdown_requested(void);
 int qemu_reset_requested(void);
 int qemu_powerdown_requested(void);
+void qemu_system_killed(int signal, pid_t pid);
 #ifdef NEED_CPU_H
 #if !defined(TARGET_SPARC)
 // Please implement a power failure function to signal the OS
@@ -62,13 +62,9 @@
 void qemu_system_reset(void);
 
 void do_savevm(Monitor *mon, const char *name);
-void do_savevm_oc(OutputChannel *err, const char *name);
 void do_loadvm(Monitor *mon, const char *name);
-void do_loadvm_oc(OutputChannel *err, const char *name);
 void do_delvm(Monitor *mon, const char *name);
-void do_delvm_oc(OutputChannel *err, const char *name);
-void do_info_snapshots(Monitor *mon);
-void do_info_snapshots_oc(OutputChannel *out, OutputChannel *err);
+void do_info_snapshots(Monitor *mon, Monitor* err);
 
 void qemu_announce_self(void);
 
@@ -91,22 +87,6 @@
 #define qemu_error_new(fmt, ...) \
     qemu_error_internal(__FILE__, __LINE__, __func__, fmt, ## __VA_ARGS__)
 
-#ifdef _WIN32
-/* Polling handling */
-
-/* return TRUE if no sleep should be done afterwards */
-typedef int PollingFunc(void *opaque);
-
-int qemu_add_polling_cb(PollingFunc *func, void *opaque);
-void qemu_del_polling_cb(PollingFunc *func, void *opaque);
-
-/* Wait objects handling */
-typedef void WaitObjectFunc(void *opaque);
-
-int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
-void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
-#endif
-
 /* TAP win32 */
 int tap_win32_init(VLANState *vlan, const char *model,
                    const char *name, const char *ifname);
@@ -138,10 +118,6 @@
 extern int old_param;
 extern QEMUClock *rtc_clock;
 
-#ifdef CONFIG_KQEMU
-extern int kqemu_allowed;
-#endif
-
 #define MAX_NODES 64
 extern int nb_numa_nodes;
 extern uint64_t node_mem[MAX_NODES];
@@ -263,21 +239,6 @@
                       const char *source);
 #endif
 
-#ifdef HAS_AUDIO
-struct soundhw {
-    const char *name;
-    const char *descr;
-    int enabled;
-    int isa;
-    union {
-        int (*init_isa) (qemu_irq *pic);
-        int (*init_pci) (PCIBus *bus);
-    } init;
-};
-
-extern struct soundhw soundhw[];
-#endif
-
 void do_usb_add(Monitor *mon, const char *devname);
 void do_usb_del(Monitor *mon, const char *devname);
 void usb_info(Monitor *mon);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 240be27..f16e391 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -25,6 +25,8 @@
 
 #define CPUState struct CPUARMState
 
+#include "config.h"
+#include "qemu-common.h"
 #include "cpu-defs.h"
 
 #include "softfloat.h"
diff --git a/target-arm/helper-android.c b/target-arm/helper-android.c
new file mode 100644
index 0000000..8c203d4
--- /dev/null
+++ b/target-arm/helper-android.c
@@ -0,0 +1,51 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cpu.h"
+#include "exec-all.h"
+#include "gdbstub.h"
+#include "def-helper.h"
+#include "helper-android.h"
+#include "qemu-common.h"
+
+#ifdef CONFIG_TRACE
+#include "android-trace.h"
+
+void  HELPER(traceTicks)(uint32_t  ticks)
+{
+    sim_time += ticks;
+}
+
+void  HELPER(traceInsn)(void)
+{
+    trace_insn_helper();
+}
+
+#if HOST_LONG_BITS == 32
+void HELPER(traceBB32)(uint64_t  bb_num, uint32_t  tb)
+{
+    trace_bb_helper(bb_num, (void*)tb);
+}
+#endif
+
+#if HOST_LONG_BITS == 64
+void HELPER(traceBB64)(uint64_t  bb_num, uint64_t  tb)
+{
+    trace_bb_helper(bb_num, (void*)tb);
+}
+#endif
+
+#endif /* CONFIG_TRACE */
+
+#ifdef CONFIG_MEMCHECK
+#include "memcheck/memcheck_api.h"
+
+void HELPER(on_call)(target_ulong pc, target_ulong ret) {
+    memcheck_on_call(pc, ret);
+}
+
+void HELPER(on_ret)(target_ulong ret) {
+    memcheck_on_ret(ret);
+}
+#endif  // CONFIG_MEMCHECK
diff --git a/target-arm/helper-android.h b/target-arm/helper-android.h
new file mode 100644
index 0000000..5342d1d
--- /dev/null
+++ b/target-arm/helper-android.h
@@ -0,0 +1,28 @@
+/* This file must be included from helper.h */
+#ifdef CONFIG_TRACE
+DEF_HELPER_1(traceTicks, void, i32)
+DEF_HELPER_0(traceInsn, void)
+#if HOST_LONG_BITS == 32
+DEF_HELPER_2(traceBB32, void, i64, i32)
+#endif
+#if HOST_LONG_BITS == 64
+DEF_HELPER_2(traceBB64, void, i64, i64)
+#endif
+#endif
+
+#ifdef CONFIG_MEMCHECK
+/* Hooks to translated BL/BLX. This callback is used to build thread's
+ * calling stack.
+ * Param:
+ *  First pointer contains guest PC where BL/BLX has been found.
+ *  Second pointer contains guest PC where BL/BLX will return.
+ */
+DEF_HELPER_2(on_call, void, i32, i32)
+/* Hooks to return from translated BL/BLX. This callback is used to build
+ * thread's calling stack.
+ * Param:
+ *  Pointer contains guest PC where BL/BLX will return.
+ */
+DEF_HELPER_1(on_ret, void, i32)
+#endif  // CONFIG_MEMCHECK
+#include "def-helper.h"
diff --git a/target-arm/helper.c b/target-arm/helper.c
index f595b2c..154aa46 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -8,11 +8,8 @@
 #include "helpers.h"
 #include "qemu-common.h"
 #ifdef CONFIG_TRACE
-#include "trace.h"
+#include "android-trace.h"
 #endif
-#ifdef CONFIG_MEMCHECK
-#include "memcheck/memcheck_api.h"
-#endif  // CONFIG_MEMCHECK
 
 static uint32_t cortexa8_cp15_c0_c1[8] =
 { 0x1031, 0x11, 0x400, 0, 0x31100003, 0x20000000, 0x01202000, 0x11 };
@@ -2634,34 +2631,6 @@
     return float32_to_int32(tmp, s);
 }
 
-#ifdef CONFIG_TRACE
-#include "trace.h"
-void  HELPER(traceTicks)(uint32_t  ticks)
-{
-    sim_time += ticks;
-}
-
-void  HELPER(traceInsn)(void)
-{
-    trace_insn_helper();
-}
-
-#if HOST_LONG_BITS == 32
-void HELPER(traceBB32)(uint64_t  bb_num, uint32_t  tb)
-{
-    trace_bb_helper(bb_num, (void*)tb);
-}
-#endif
-
-#if HOST_LONG_BITS == 64
-void HELPER(traceBB64)(uint64_t  bb_num, uint64_t  tb)
-{
-    trace_bb_helper(bb_num, (void*)tb);
-}
-#endif
-
-#endif /* CONFIG_TRACE */
-
 void HELPER(set_teecr)(CPUState *env, uint32_t val)
 {
     val &= 1;
@@ -2670,13 +2639,3 @@
         tb_flush(env);
     }
 }
-
-#ifdef CONFIG_MEMCHECK
-void HELPER(on_call)(target_ulong pc, target_ulong ret) {
-    memcheck_on_call(pc, ret);
-}
-
-void HELPER(on_ret)(target_ulong ret) {
-    memcheck_on_ret(ret);
-}
-#endif  // CONFIG_MEMCHECK
diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index 5dd4925..bf210fe 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -15,17 +15,6 @@
 DEF_HELPER_1(rbit, i32, i32)
 DEF_HELPER_1(abs, i32, i32)
 
-#ifdef CONFIG_TRACE
-DEF_HELPER_1(traceTicks, void, i32)
-DEF_HELPER_0(traceInsn, void)
-#if HOST_LONG_BITS == 32
-DEF_HELPER_2(traceBB32, void, i64, i32)
-#endif
-#if HOST_LONG_BITS == 64
-DEF_HELPER_2(traceBB64, void, i64, i64)
-#endif
-#endif
-
 #define PAS_OP(pfx)  \
     DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
     DEF_HELPER_3(pfx ## sub8, i32, i32, i32, ptr) \
@@ -466,19 +455,4 @@
 
 DEF_HELPER_2(set_teecr, void, env, i32)
 
-#ifdef CONFIG_MEMCHECK
-/* Hooks to translated BL/BLX. This callback is used to build thread's
- * calling stack.
- * Param:
- *  First pointer contains guest PC where BL/BLX has been found.
- *  Second pointer contains guest PC where BL/BLX will return.
- */
-DEF_HELPER_2(on_call, void, i32, i32)
-/* Hooks to return from translated BL/BLX. This callback is used to build
- * thread's calling stack.
- * Param:
- *  Pointer contains guest PC where BL/BLX will return.
- */
-DEF_HELPER_1(on_ret, void, i32)
-#endif  // CONFIG_MEMCHECK
-#include "def-helper.h"
+#include "helper-android.h"
diff --git a/target-arm/memcheck_arm_helpers.h b/target-arm/memcheck_arm_helpers.h
deleted file mode 100644
index d13b89d..0000000
--- a/target-arm/memcheck_arm_helpers.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/* Copyright (C) 2007-2010 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-/*
- * Contains implementation of memcheck helper routines used by ARM's translator.
- */
-
-#ifndef QEMU_TARGET_ARM_MEMCHECK_ARM_HELPERS_H
-#define QEMU_TARGET_ARM_MEMCHECK_ARM_HELPERS_H
-
-/* This file should compile iff qemu is built with memory checking
- * configuration turned on. */
-#ifndef CONFIG_MEMCHECK
-#error CONFIG_MEMCHECK is not defined.
-#endif  // CONFIG_MEMCHECK
-
-#include "helpers.h"
-#include "memcheck/memcheck_api.h"
-
-/* Array of return addresses detected in gen_intermediate_code_internal. */
-AddrArray   ret_addresses = { 0 };
-
-/* Checks if call stack collection is enabled for the given context.
- * We collect call stack only for the user mode (both, code and CPU), and on
- * condition that memory checking, and call collection are enabled. It also
- * seems that collecting stack for the linker code is excessive, as it doesn't
- * provide much useful info for the memory checker.
- * Return:
- *  boolean: 1 if stack collection is enabled for the given context, or 0 if
- *  it's not enabled.
- */
-static inline int
-watch_call_stack(DisasContext *s)
-{
-    if (!memcheck_enabled || !memcheck_watch_call_stack) {
-        return 0;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    if (!s->user) {
-        /* We're not interested in kernel mode CPU stack. */
-        return 0;
-    }
-#endif  // CONFIG_USER_ONLY
-
-    /* We're not interested in kernel code stack (pc >= 0xC0000000).
-     * Android specific: We're also not interested in android linker stack
-     * (0xB0000000 - 0xB00FFFFF) */
-    if (s->pc >= 0xC0000000 || (0xB0000000 <= s->pc && s->pc <= 0xB00FFFFF)) {
-        return 0;
-    }
-    return 1;
-}
-
-/* Checks if given ARM instruction is BL, or BLX.
- * Return:
- *  boolean: 1 if ARM instruction is BL/BLX, or 0 if it's not.
- */
-static inline int
-is_arm_bl_or_blx(uint32_t insn)
-{
-    /* ARM BL  (immediate): xxxx 1011 xxxx xxxx xxxx xxxx xxxx xxxx
-     * ARM BLX (immediate): 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx
-     * ARM BLX (register):  xxxx 0001 0010 xxxx xxxx xxxx 0011 xxxx
-     */
-    if ((insn & 0x0F000000) == 0x0B000000 ||    // ARM BL (imm)
-        (insn & 0xFE000000) == 0xFA000000 ||    // ARM BLX (imm)
-        (insn & 0x0FF000F0) == 0x12000030) {    // ARM BLX (reg)
-        return 1;
-    }
-    return 0;
-}
-
-/* Checks if given THUMB instruction is BL, or BLX.
- * Param:
- *  insn - THUMB instruction to check.
- *  pc - Emulated PC address for the instruction.
- *  ret_off - If insn is BL, or BLX, upon return ret_off contains
- *      instruction's byte size. If instruction is not BL, or BLX, content of
- *      this parameter is undefined on return.
- * Return:
- *  boolean: 1 if THUMB instruction is BL/BLX, or 0 if it's not.
- */
-static inline int
-is_thumb_bl_or_blx(uint16_t insn, target_ulong pc, target_ulong* ret_off)
-{
-    /* THUMB BLX(register):      0100 0111 1xxx xxxx
-     * THUMB BL(1-stimmediate):  1111 0xxx xxxx xxxx
-     * THUMB BLX(1-stimmediate): 1111 0xxx xxxx xxxx
-     */
-    if ((insn & 0xFF80) == 0x4780) {            // THUMB BLX(reg)
-        *ret_off = 2;
-        return 1;
-    } else if ((insn & 0xF800) == 0xF000) {     // THUMB BL(X)(imm)
-        // This is a 32-bit THUMB. Get the second half of the instuction.
-        insn = lduw_code(pc + 2);
-        if ((insn & 0xC000) == 0xC000) {
-            *ret_off = 4;
-            return 1;
-        }
-    }
-    return 0;
-}
-
-/* Registers a return address detected in gen_intermediate_code_internal.
- * NOTE: If return address has been registered as new in this routine, this will
- * cause invalidation of all existing TBs that contain translated code for that
- * address.
- * NOTE: Before storing PC address in the array, we convert it from emulated
- * address to a physical address. This way we deal with emulated addresses
- * overlapping for different processes.
- * Param:
- *  env - CPU state environment.
- *  addr - Return address to register.
- * Return:
- *  1  - Address has been registered in this routine.
- *  -1 - Address has been already registered before.
- *  0  - Insufficient memory.
- */
-static int
-register_ret_address(CPUState* env, target_ulong addr)
-{
-    int ret;
-    if ((0x90000000 <= addr && addr <= 0xBFFFFFFF)) {
-        /* Address belongs to a module that always loads at this fixed address.
-         * So, we can keep this address in the global array. */
-        ret = addrarray_add(&ret_addresses, get_phys_addr_code(env, addr));
-    } else {
-        ret = addrarray_add(&ret_addresses, get_phys_addr_code(env, addr));
-    }
-    assert(ret != 0);
-
-    if (ret == 1) {
-        /* If this ret address has been added to the array, we need to make sure
-         * that all TBs that contain translated code for that address are
-         * invalidated. This will force retranslation of that code, which will
-         * make sure that our ret callback is set. This is also important part
-         * in keeping consistency between translated code, and intermediate code
-         * generated for guest PC calculation. If we don't invalidate TBs, and
-         * PC calculation code is generated, there will be inconsistency due to
-         * the fact that TB code doesn't contain ret callback, while PC calc
-         * code contains it. This inconsistency will lead to an immanent
-         * segmentation fault.*/
-        TranslationBlock* tb;
-        const target_ulong phys_pc = get_phys_addr_code(env, addr);
-        const target_ulong phys_page1 = phys_pc & TARGET_PAGE_MASK;
-
-        for(tb = tb_phys_hash[tb_phys_hash_func(phys_pc)]; tb != NULL;
-            tb = tb->phys_hash_next) {
-            if (tb->pc == addr && tb->page_addr[0] == phys_page1) {
-                tb_phys_invalidate(tb, -1);
-            }
-        }
-    }
-    return ret;
-}
-
-/* Checks if given address is recognized as a return address.
- * Return:
- *  boolean: 1 if if given address is recognized as a return address,
- *  or 0 if it's not.
- */
-static inline int
-is_ret_address(CPUState* env, target_ulong addr)
-{
-    if ((0x90000000 <= addr && addr <= 0xBFFFFFFF)) {
-        return addrarray_check(&ret_addresses, get_phys_addr_code(env, addr));
-    } else {
-        return addrarray_check(&ret_addresses, get_phys_addr_code(env, addr));
-    }
-}
-
-/* Adds "on_call" callback into generated intermediate code. */
-static inline void
-set_on_call(target_ulong pc, target_ulong ret)
-{
-    TCGv_ptr tmp_pc = tcg_const_ptr(pc & ~1);
-    TCGv_ptr tmp_ret = tcg_const_ptr(ret & ~1);
-
-    gen_helper_on_call(tmp_pc, tmp_ret);
-
-    tcg_temp_free_ptr(tmp_ret);
-    tcg_temp_free_ptr(tmp_pc);
-}
-
-/* Adds "on_ret" callback into generated intermediate code. */
-static inline void
-set_on_ret(target_ulong ret)
-{
-    TCGv_ptr tmp_ret = tcg_const_ptr(ret & ~1);
-
-    gen_helper_on_ret(tmp_ret);
-
-    tcg_temp_free_ptr(tmp_ret);
-}
-
-#endif  // QEMU_TARGET_ARM_MEMCHECK_ARM_HELPERS_H
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6d8db29..bfb6be5 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -126,7 +126,7 @@
             if (tb) {
                 /* the PC is inside the translated code. It means that we have
                    a virtual CPU fault */
-                cpu_restore_state(tb, env, pc, NULL);
+                cpu_restore_state(tb, env, pc);
             }
         }
         raise_exception(env->exception_index);
diff --git a/target-arm/translate-android.h b/target-arm/translate-android.h
new file mode 100644
index 0000000..274c876
--- /dev/null
+++ b/target-arm/translate-android.h
@@ -0,0 +1,387 @@
+/* This file must be included from target-arm/translate.c */
+
+/*****
+ *****
+ *****
+ *****  C O N F I G _ M E M C H E C K
+ *****
+ *****
+ *****/
+
+#ifdef CONFIG_MEMCHECK
+
+/*
+ * Memchecker addition in this module is intended to inject qemu callback into
+ * translated code for each BL/BLX, as well as BL/BLX returns. These callbacks
+ * are used to build calling stack of the thread in order to provide better
+ * reporting on memory access violations. Although this may seem as something
+ * that may gratly impact the performance, in reality it doesn't. Overhead that
+ * is added by setting up callbacks and by callbacks themselves is neglectable.
+ * On the other hand, maintaining calling stack can indeed add some perf.
+ * overhead (TODO: provide solid numbers here).
+ * One of the things to watch out with regards to injecting callbacks, is
+ * consistency between intermediate code generated for execution, and for guest
+ * PC address calculation. If code doesn't match, a segmentation fault is
+ * guaranteed.
+ */
+
+#include "memcheck/memcheck_proc_management.h"
+#include "memcheck/memcheck_api.h"
+
+/* Array of return addresses detected in gen_intermediate_code_internal. */
+AddrArray   ret_addresses = { 0 };
+
+/* Checks if call stack collection is enabled for the given context.
+ * We collect call stack only for the user mode (both, code and CPU), and on
+ * condition that memory checking, and call collection are enabled. It also
+ * seems that collecting stack for the linker code is excessive, as it doesn't
+ * provide much useful info for the memory checker.
+ * Return:
+ *  boolean: 1 if stack collection is enabled for the given context, or 0 if
+ *  it's not enabled.
+ */
+static inline int
+watch_call_stack(DisasContext *s)
+{
+    if (!memcheck_enabled || !memcheck_watch_call_stack) {
+        return 0;
+    }
+
+#ifndef CONFIG_USER_ONLY
+    if (!s->user) {
+        /* We're not interested in kernel mode CPU stack. */
+        return 0;
+    }
+#endif  // CONFIG_USER_ONLY
+
+    /* We're not interested in kernel code stack (pc >= 0xC0000000).
+     * Android specific: We're also not interested in android linker stack
+     * (0xB0000000 - 0xB00FFFFF) */
+    if (s->pc >= 0xC0000000 || (0xB0000000 <= s->pc && s->pc <= 0xB00FFFFF)) {
+        return 0;
+    }
+    return 1;
+}
+
+/* Checks if given ARM instruction is BL, or BLX.
+ * Return:
+ *  boolean: 1 if ARM instruction is BL/BLX, or 0 if it's not.
+ */
+static inline int
+is_arm_bl_or_blx(uint32_t insn)
+{
+    /* ARM BL  (immediate): xxxx 1011 xxxx xxxx xxxx xxxx xxxx xxxx
+     * ARM BLX (immediate): 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx
+     * ARM BLX (register):  xxxx 0001 0010 xxxx xxxx xxxx 0011 xxxx
+     */
+    if ((insn & 0x0F000000) == 0x0B000000 ||    // ARM BL (imm)
+        (insn & 0xFE000000) == 0xFA000000 ||    // ARM BLX (imm)
+        (insn & 0x0FF000F0) == 0x12000030) {    // ARM BLX (reg)
+        return 1;
+    }
+    return 0;
+}
+
+/* Checks if given THUMB instruction is BL, or BLX.
+ * Param:
+ *  insn - THUMB instruction to check.
+ *  pc - Emulated PC address for the instruction.
+ *  ret_off - If insn is BL, or BLX, upon return ret_off contains
+ *      instruction's byte size. If instruction is not BL, or BLX, content of
+ *      this parameter is undefined on return.
+ * Return:
+ *  boolean: 1 if THUMB instruction is BL/BLX, or 0 if it's not.
+ */
+static inline int
+is_thumb_bl_or_blx(uint16_t insn, target_ulong pc, target_ulong* ret_off)
+{
+    /* THUMB BLX(register):      0100 0111 1xxx xxxx
+     * THUMB BL(1-stimmediate):  1111 0xxx xxxx xxxx
+     * THUMB BLX(1-stimmediate): 1111 0xxx xxxx xxxx
+     */
+    if ((insn & 0xFF80) == 0x4780) {            // THUMB BLX(reg)
+        *ret_off = 2;
+        return 1;
+    } else if ((insn & 0xF800) == 0xF000) {     // THUMB BL(X)(imm)
+        // This is a 32-bit THUMB. Get the second half of the instuction.
+        insn = lduw_code(pc + 2);
+        if ((insn & 0xC000) == 0xC000) {
+            *ret_off = 4;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/* Registers a return address detected in gen_intermediate_code_internal.
+ * NOTE: If return address has been registered as new in this routine, this will
+ * cause invalidation of all existing TBs that contain translated code for that
+ * address.
+ * NOTE: Before storing PC address in the array, we convert it from emulated
+ * address to a physical address. This way we deal with emulated addresses
+ * overlapping for different processes.
+ * Param:
+ *  env - CPU state environment.
+ *  addr - Return address to register.
+ * Return:
+ *  1  - Address has been registered in this routine.
+ *  -1 - Address has been already registered before.
+ *  0  - Insufficient memory.
+ */
+static int
+register_ret_address(CPUState* env, target_ulong addr)
+{
+    int ret;
+    if ((0x90000000 <= addr && addr <= 0xBFFFFFFF)) {
+        /* Address belongs to a module that always loads at this fixed address.
+         * So, we can keep this address in the global array. */
+        ret = addrarray_add(&ret_addresses, get_phys_addr_code(env, addr));
+    } else {
+        ret = addrarray_add(&ret_addresses, get_phys_addr_code(env, addr));
+    }
+    assert(ret != 0);
+
+    if (ret == 1) {
+        /* If this ret address has been added to the array, we need to make sure
+         * that all TBs that contain translated code for that address are
+         * invalidated. This will force retranslation of that code, which will
+         * make sure that our ret callback is set. This is also important part
+         * in keeping consistency between translated code, and intermediate code
+         * generated for guest PC calculation. If we don't invalidate TBs, and
+         * PC calculation code is generated, there will be inconsistency due to
+         * the fact that TB code doesn't contain ret callback, while PC calc
+         * code contains it. This inconsistency will lead to an immanent
+         * segmentation fault.*/
+        TranslationBlock* tb;
+        const target_ulong phys_pc = get_phys_addr_code(env, addr);
+        const target_ulong phys_page1 = phys_pc & TARGET_PAGE_MASK;
+
+        for(tb = tb_phys_hash[tb_phys_hash_func(phys_pc)]; tb != NULL;
+            tb = tb->phys_hash_next) {
+            if (tb->pc == addr && tb->page_addr[0] == phys_page1) {
+                tb_phys_invalidate(tb, -1);
+            }
+        }
+    }
+    return ret;
+}
+
+/* Checks if given address is recognized as a return address.
+ * Return:
+ *  boolean: 1 if if given address is recognized as a return address,
+ *  or 0 if it's not.
+ */
+static inline int
+is_ret_address(CPUState* env, target_ulong addr)
+{
+    if ((0x90000000 <= addr && addr <= 0xBFFFFFFF)) {
+        return addrarray_check(&ret_addresses, get_phys_addr_code(env, addr));
+    } else {
+        return addrarray_check(&ret_addresses, get_phys_addr_code(env, addr));
+    }
+}
+
+/* Adds "on_call" callback into generated intermediate code. */
+static inline void
+set_on_call(target_ulong pc, target_ulong ret)
+{
+    TCGv_ptr tmp_pc = tcg_const_ptr(pc & ~1);
+    TCGv_ptr tmp_ret = tcg_const_ptr(ret & ~1);
+
+    gen_helper_on_call(tmp_pc, tmp_ret);
+
+    tcg_temp_free_ptr(tmp_ret);
+    tcg_temp_free_ptr(tmp_pc);
+}
+
+/* Adds "on_ret" callback into generated intermediate code. */
+static inline void
+set_on_ret(target_ulong ret)
+{
+    TCGv_ptr tmp_ret = tcg_const_ptr(ret & ~1);
+
+    gen_helper_on_ret(tmp_ret);
+
+    tcg_temp_free_ptr(tmp_ret);
+}
+
+
+#  define ANDROID_WATCH_CALLSTACK_ARM(s) \
+    if (watch_call_stack(s)) { \
+        if (is_ret_address(env, s->pc)) { \
+            set_on_ret(s->pc); \
+        } \
+        if (is_arm_bl_or_blx(insn)) { \
+            set_on_call(s->pc, s->pc + 4); \
+            if (!s->search_pc) { \
+                register_ret_address(env, s->pc + 4); \
+            } \
+        } \
+    }
+
+#  define ANDROID_WATCH_CALLSTACK_THUMB(s) \
+    if (watch_call_stack(s)) { \
+        target_ulong ret_off; \
+        if (is_ret_address(env, s->pc)) { \
+            set_on_ret(s->pc); \
+        } \
+        if (is_thumb_bl_or_blx(insn, s->pc, &ret_off)) { \
+            set_on_call(s->pc, s->pc + ret_off); \
+            if (!s->search_pc) { \
+                register_ret_address(env, s->pc + ret_off); \
+            } \
+        } \
+    }
+
+#  define ANDROID_DISAS_CONTEXT_FIELDS \
+    int search_pc;
+
+#  define ANDROID_START_CODEGEN(search_pc) \
+    dc->search_pc = search_pc
+
+        /* When memchecker is enabled, we need to keep a match between
+         * translated PC and guest PCs, so memchecker can quickly covert
+         * one to another. Note that we do that only for user mode. */
+#  define ANDROID_CHECK_CODEGEN_PC(search_pc) \
+        ((search_pc) || (memcheck_enabled && dc->user))
+
+#  define ANDROID_END_CODEGEN() \
+    do { \
+        if (memcheck_enabled && dc->user) { \
+            j = gen_opc_ptr - gen_opc_buf; \
+            lj++; \
+            while (lj <= j) \
+                gen_opc_instr_start[lj++] = 0; \
+        } \
+    } while (0)
+
+#else /* !CONFIG_MEMCHECK */
+
+#  define ANDROID_WATCH_CALLSTACK_ARM     ((void)0)
+#  define ANDROID_WATCH_CALLSTACK_THUMB   ((void)0)
+#  define ANDROID_DISAS_CONTEXT_FIELDS     /* nothing */
+#  define ANDROID_START_CODEGEN(s)         ((void)(s))
+#  define ANDROID_CHECK_CODEGEN_PC(s)      (s)
+#  define ANDROID_END_CODEGEN()            ((void)0)
+
+#endif  /* !CONFIG_MEMCHECK */
+
+
+/*****
+ *****
+ *****
+ *****  C O N F I G _ T R A C E
+ *****
+ *****
+ *****/
+
+#ifdef CONFIG_TRACE
+
+#include "android-trace.h"
+#define  gen_traceInsn()   gen_helper_traceInsn()
+
+static void
+gen_traceTicks( int  count )
+{
+    TCGv  tmp = tcg_temp_new_i32();
+    tcg_gen_movi_i32(tmp, count);
+    gen_helper_traceTicks(tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static void
+gen_traceBB( uint64_t  bbNum, void* tb )
+{
+#if HOST_LONG_BITS == 32
+    TCGv_i64  tmpNum = tcg_temp_new_i64();
+    TCGv_i32  tmpTb  = tcg_temp_new_i32();
+
+    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
+    tcg_gen_movi_i32(tmpTb,  (int32_t)tb);
+    gen_helper_traceBB32(tmpNum, tmpTb);
+    tcg_temp_free_i32(tmpTb);
+    tcg_temp_free_i64(tmpNum);
+#elif HOST_LONG_BITS == 64
+    TCGv_i64  tmpNum = tcg_temp_new_i64();
+    TCGv_i64  tmpTb  = tcg_temp_new_i64();
+
+    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
+    tcg_gen_movi_i64(tmpTb,  (int64_t)tb);
+    gen_helper_traceBB64(tmpNum, tmpTb);
+    tcg_temp_free_i64(tmpTb);
+    tcg_temp_free_i64(tmpNum);
+#endif
+}
+
+#  define ANDROID_TRACE_DECLS   int ticks = 0;
+
+#  define ANDROID_TRACE_START_ARM() \
+    do { \
+        if (tracing) { \
+            trace_add_insn(insn, 0); \
+            ticks = get_insn_ticks_arm(insn); \
+            gen_traceInsn(); \
+        } \
+    } while (0)
+
+#  define ANDROID_TRACE_START_THUMB() \
+    do { \
+        if (tracing) { \
+            int  ticks = get_insn_ticks_thumb(insn); \
+            trace_add_insn( insn_wrap_thumb(insn), 1 ); \
+            gen_traceInsn(); \
+            gen_traceTicks(ticks); \
+        } \
+    } while (0)
+
+#  define ANDROID_TRACE_GEN_TICKS() \
+    do { \
+        if (tracing) { \
+        } \
+    } while (0)
+
+#  define ANDROID_TRACE_GEN_SINGLE_TICK() \
+    do { \
+        if (tracing) { \
+            gen_traceTicks(1); \
+            ticks -= 1; \
+        } \
+    } while (0)
+
+# define ANDROID_TRACE_GEN_OTHER_TICKS() \
+    do { \
+        if (tracing && ticks > 0) { \
+            gen_traceTicks(ticks); \
+        } \
+    } while (0)
+
+#  define ANDROID_TRACE_START_BB() \
+    do { \
+        if (tracing) { \
+            gen_traceBB(trace_static_bb_num(), tb); \
+            trace_bb_start(dc->pc); \
+        } \
+    } while (0)
+
+#  define ANDROID_TRACE_END_BB() \
+    do { \
+        if (tracing) { \
+            trace_bb_end(); \
+        } \
+    } while (0)
+
+#else /* !CONFIG_TRACE */
+
+#  define ANDROID_TRACE_DECLS         /* nothing */
+#  define ANDROID_TRACE_START_ARM()   ((void)0)
+#  define ANDROID_TRACE_START_THUMB() ((void)0)
+
+#  define ANDROID_TRACE_GEN_TICKS()        ((void)0)
+#  define ANDROID_TRACE_GEN_SINGLE_TICK()  ((void)0)
+#  define ANDROID_TRACE_GEN_OTHER_TICKS()  ((void)0)
+
+#  define ANDROID_TRACE_START_BB()         ((void)0)
+#  define ANDROID_TRACE_END_BB()           ((void)0)
+
+#endif /* !CONFIG_TRACE */
+
diff --git a/target-arm/translate.c b/target-arm/translate.c
index f93a0cd..1e189f8 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -30,10 +30,6 @@
 #include "tcg-op.h"
 #include "qemu-log.h"
 
-#ifdef CONFIG_TRACE
-#include "trace.h"
-#endif
-
 #include "helpers.h"
 #define GEN_HELPER 1
 #include "helpers.h"
@@ -66,39 +62,17 @@
 #endif
 #ifdef CONFIG_MEMCHECK
     int search_pc;
-#endif  // CONFIG_MEMCHECK
+#endif
 } DisasContext;
 
+#include "translate-android.h"
+
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(s) 1
 #else
 #define IS_USER(s) (s->user)
 #endif
 
-#ifdef CONFIG_TRACE
-#include "helpers.h"
-#endif /* CONFIG_TRACE */
-
-#ifdef CONFIG_MEMCHECK
-/*
- * Memchecker addition in this module is intended to inject qemu callback into
- * translated code for each BL/BLX, as well as BL/BLX returns. These callbacks
- * are used to build calling stack of the thread in order to provide better
- * reporting on memory access violations. Although this may seem as something
- * that may gratly impact the performance, in reality it doesn't. Overhead that
- * is added by setting up callbacks and by callbacks themselves is neglectable.
- * On the other hand, maintaining calling stack can indeed add some perf.
- * overhead (TODO: provide solid numbers here).
- * One of the things to watch out with regards to injecting callbacks, is
- * consistency between intermediate code generated for execution, and for guest
- * PC address calculation. If code doesn't match, a segmentation fault is
- * guaranteed.
- */
-
-#include "memcheck/memcheck_proc_management.h"
-#include "memcheck_arm_helpers.h"
-#endif  // CONFIG_MEMCHECK
-
 /* These instructions trap after executing, so defer them until after the
    conditional executions state has been updated.  */
 #define DISAS_WFI 4
@@ -5756,50 +5730,10 @@
 }
 
 
-#ifdef CONFIG_TRACE
-
-#define  gen_traceInsn()   gen_helper_traceInsn()
-
-static void
-gen_traceTicks( int  count )
-{
-    TCGv  tmp = tcg_temp_new_i32();
-    tcg_gen_movi_i32(tmp, count);
-    gen_helper_traceTicks(tmp);
-    tcg_temp_free_i32(tmp);
-}
-
-static void
-gen_traceBB( uint64_t  bbNum, void* tb )
-{
-#if HOST_LONG_BITS == 32
-    TCGv_i64  tmpNum = tcg_temp_new_i64();
-    TCGv_i32  tmpTb  = tcg_temp_new_i32();
-
-    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
-    tcg_gen_movi_i32(tmpTb,  (int32_t)tb);
-    gen_helper_traceBB32(tmpNum, tmpTb);
-    tcg_temp_free_i32(tmpTb);
-    tcg_temp_free_i64(tmpNum);
-#elif HOST_LONG_BITS == 64
-    TCGv_i64  tmpNum = tcg_temp_new_i64();
-    TCGv_i64  tmpTb  = tcg_temp_new_i64();
-
-    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
-    tcg_gen_movi_i64(tmpTb,  (int64_t)tb);
-    gen_helper_traceBB64(tmpNum, tmpTb);
-    tcg_temp_free_i64(tmpTb);
-    tcg_temp_free_i64(tmpNum);
-#endif
-}
-#endif /* CONFIG_TRACE */
-
 static void disas_arm_insn(CPUState * env, DisasContext *s)
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
-#ifdef CONFIG_TRACE
-    int  ticks = 0;
-#endif
+    ANDROID_TRACE_DECLS
     TCGv tmp;
     TCGv tmp2;
     TCGv tmp3;
@@ -5807,27 +5741,9 @@
     TCGv_i64 tmp64;
     insn = ldl_code(s->pc);
 
-#ifdef CONFIG_MEMCHECK
-    if (watch_call_stack(s)) {
-        if (is_ret_address(env, s->pc)) {
-            set_on_ret(s->pc);
-        }
-        if (is_arm_bl_or_blx(insn)) {
-            set_on_call(s->pc, s->pc + 4);
-            if (!s->search_pc) {
-                register_ret_address(env, s->pc + 4);
-            }
-        }
-    }
-#endif  // CONFIG_MEMCHECK
+    ANDROID_WATCH_CALLSTACK_ARM(s);
 
-#ifdef CONFIG_TRACE
-    if (tracing) {
-        trace_add_insn(insn, 0);
-        ticks = get_insn_ticks_arm(insn);
-        gen_traceInsn();
-    }
-#endif
+    ANDROID_TRACE_START_ARM();
 
     s->pc += 4;
 
@@ -5836,11 +5752,7 @@
         goto illegal_op;
     cond = insn >> 28;
     if (cond == 0xf){
-#ifdef CONFIG_TRACE
-        if (tracing) {
-            gen_traceTicks(ticks);
-        }
-#endif
+        ANDROID_TRACE_GEN_TICKS();
         /* Unconditional instructions.  */
         if (((insn >> 25) & 7) == 1) {
             /* NEON Data processing.  */
@@ -6028,25 +5940,14 @@
         goto illegal_op;
     }
     if (cond != 0xe) {
-#ifdef CONFIG_TRACE
-        if (tracing) {
-            /* a non-executed conditional instruction takes */
-            /* only 1 cycle */
-            gen_traceTicks(1);
-            ticks -= 1;
-        }
-#endif
+        ANDROID_TRACE_GEN_SINGLE_TICK();
         /* if not always execute, we generate a conditional jump to
            next instruction */
         s->condlabel = gen_new_label();
         gen_test_cc(cond ^ 1, s->condlabel);
         s->condjmp = 1;
     }
-#ifdef CONFIG_TRACE
-    if (tracing && ticks > 0) {
-        gen_traceTicks(ticks);
-    }
-#endif
+    ANDROID_TRACE_GEN_OTHER_TICKS();
     if ((insn & 0x0f900000) == 0x03000000) {
         if ((insn & (1 << 21)) == 0) {
             ARCH(6T2);
@@ -7198,14 +7099,7 @@
     }
 
     insn = lduw_code(s->pc);
-#ifdef CONFIG_TRACE
-    if (tracing) {
-        int  ticks = get_insn_ticks_thumb(insn);
-        trace_add_insn( insn_wrap_thumb(insn), 1 );
-        gen_traceInsn();
-        gen_traceTicks(ticks);
-    }
-#endif
+    ANDROID_TRACE_START_THUMB();
 
     insn |= (uint32_t)insn_hw1 << 16;
 
@@ -8188,29 +8082,10 @@
 
     insn = lduw_code(s->pc);
 
-#ifdef CONFIG_MEMCHECK
-    if (watch_call_stack(s)) {
-        target_ulong ret_off;
-        if (is_ret_address(env, s->pc)) {
-            set_on_ret(s->pc);
-        }
-        if (is_thumb_bl_or_blx(insn, s->pc, &ret_off)) {
-            set_on_call(s->pc, s->pc + ret_off);
-            if (!s->search_pc) {
-                register_ret_address(env, s->pc + ret_off);
-            }
-        }
-    }
-#endif  // CONFIG_MEMCHECK
+    ANDROID_WATCH_CALLSTACK_THUMB(s);
 
-#ifdef CONFIG_TRACE
-    if (tracing) {
-        int  ticks = get_insn_ticks_thumb(insn);
-        trace_add_insn( insn_wrap_thumb(insn), 1 );
-        gen_traceInsn();
-        gen_traceTicks(ticks);
-    }
-#endif
+    ANDROID_TRACE_START_THUMB();
+
     s->pc += 2;
 
     switch (insn >> 12) {
@@ -8888,9 +8763,7 @@
         dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR;
     }
 #endif
-#ifdef CONFIG_MEMCHECK
-    dc->search_pc = search_pc;
-#endif  // CONFIG_MEMCHECK
+    ANDROID_START_CODEGEN(search_pc);
     cpu_F0s = tcg_temp_new_i32();
     cpu_F1s = tcg_temp_new_i32();
     cpu_F0d = tcg_temp_new_i64();
@@ -8907,12 +8780,7 @@
         max_insns = CF_COUNT_MASK;
 
     gen_icount_start();
-#ifdef CONFIG_TRACE
-    if (tracing) {
-        gen_traceBB(trace_static.bb_num, tb);
-        trace_bb_start(dc->pc);
-    }
-#endif
+    ANDROID_TRACE_START_BB();
 
     do {
 #ifdef CONFIG_USER_ONLY
@@ -8950,14 +8818,7 @@
             }
         }
 
-#ifdef CONFIG_MEMCHECK
-        /* When memchecker is enabled, we need to keep a match between
-         * translated PC and guest PCs, so memchecker can quickly covert
-         * one to another. Note that we do that only for user mode. */
-        if (search_pc || (memcheck_enabled && dc->user)) {
-#else   // CONFIG_MEMCHECK
-        if (search_pc) {
-#endif  // CONFIG_MEMCHECK
+        if (ANDROID_CHECK_CODEGEN_PC(search_pc)) {
             j = gen_opc_ptr - gen_opc_buf;
             if (lj < j) {
                 lj++;
@@ -9007,11 +8868,7 @@
              dc->pc < next_page_start &&
              num_insns < max_insns);
 
-#ifdef CONFIG_TRACE
-    if (tracing) {
-        trace_bb_end();
-    }
-#endif
+    ANDROID_TRACE_END_BB();
 
     if (tb->cflags & CF_LAST_IO) {
         if (dc->condjmp) {
@@ -9104,14 +8961,7 @@
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
     } else {
-#ifdef CONFIG_MEMCHECK
-        if (memcheck_enabled && dc->user) {
-            j = gen_opc_ptr - gen_opc_buf;
-            lj++;
-            while (lj <= j)
-                gen_opc_instr_start[lj++] = 0;
-        }
-#endif  // CONFIG_MEMCHECK
+        ANDROID_END_CODEGEN();
         tb->size = dc->pc - pc_start;
         tb->icount = num_insns;
     }
@@ -9185,8 +9035,7 @@
 #endif
 }
 
-void gen_pc_load(CPUState *env, TranslationBlock *tb,
-                unsigned long searched_pc, int pc_pos, void *puc)
+void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)
 {
     env->regs[15] = gen_opc_pc[pc_pos];
 }
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 7a7829a..0815482 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -1891,7 +1891,7 @@
         eflags |= CC_Z;
     } else {
         /* always do the store */
-        stq(a0, d); 
+        stq(a0, d);
         EDX = (uint32_t)(d >> 32);
         EAX = (uint32_t)d;
         eflags &= ~CC_Z;
@@ -1916,8 +1916,8 @@
         eflags |= CC_Z;
     } else {
         /* always do the store */
-        stq(a0, d0); 
-        stq(a0 + 8, d1); 
+        stq(a0, d0);
+        stq(a0 + 8, d1);
         EDX = d1;
         EAX = d0;
         eflags &= ~CC_Z;
@@ -2309,7 +2309,7 @@
 }
 
 /* protected mode call */
-void helper_lcall_protected(int new_cs, target_ulong new_eip, 
+void helper_lcall_protected(int new_cs, target_ulong new_eip,
                             int shift, int next_eip_addend)
 {
     int new_stack, i;
@@ -3009,7 +3009,7 @@
         raise_exception(EXCP0D_GPF);
     }
     helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0);
-    
+
     /* currently unimplemented */
     raise_exception_err(EXCP06_ILLOP, 0);
 }
@@ -4393,7 +4393,7 @@
     if (data64) {
         stq(ptr + 0x08, 0); /* rip */
         stq(ptr + 0x10, 0); /* rdp */
-    } else 
+    } else
 #endif
     {
         stl(ptr + 0x08, 0); /* eip */
@@ -4688,7 +4688,7 @@
 {
     helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
     EIP += next_eip_addend;
-    
+
     do_hlt();
 }
 
@@ -4848,7 +4848,7 @@
             if (tb) {
                 /* the PC is inside the translated code. It means that we have
                    a virtual CPU fault */
-                cpu_restore_state(tb, env, pc, NULL);
+                cpu_restore_state(tb, env, pc);
             }
         }
         raise_exception_err(env->exception_index, env->error_code);
@@ -4862,16 +4862,16 @@
 #if defined(CONFIG_USER_ONLY)
 
 void helper_vmrun(int aflag, int next_eip_addend)
-{ 
+{
 }
-void helper_vmmcall(void) 
-{ 
+void helper_vmmcall(void)
+{
 }
 void helper_vmload(int aflag)
-{ 
+{
 }
 void helper_vmsave(int aflag)
-{ 
+{
 }
 void helper_stgi(void)
 {
@@ -4879,20 +4879,20 @@
 void helper_clgi(void)
 {
 }
-void helper_skinit(void) 
-{ 
+void helper_skinit(void)
+{
 }
 void helper_invlpga(int aflag)
-{ 
+{
 }
-void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) 
-{ 
+void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
+{
 }
 void helper_svm_check_intercept_param(uint32_t type, uint64_t param)
 {
 }
 
-void helper_svm_check_io(uint32_t port, uint32_t param, 
+void helper_svm_check_io(uint32_t port, uint32_t param,
                          uint32_t next_eip_addend)
 {
 }
@@ -4901,16 +4901,16 @@
 static inline void svm_save_seg(target_phys_addr_t addr,
                                 const SegmentCache *sc)
 {
-    stw_phys(addr + offsetof(struct vmcb_seg, selector), 
+    stw_phys(addr + offsetof(struct vmcb_seg, selector),
              sc->selector);
-    stq_phys(addr + offsetof(struct vmcb_seg, base), 
+    stq_phys(addr + offsetof(struct vmcb_seg, base),
              sc->base);
-    stl_phys(addr + offsetof(struct vmcb_seg, limit), 
+    stl_phys(addr + offsetof(struct vmcb_seg, limit),
              sc->limit);
-    stw_phys(addr + offsetof(struct vmcb_seg, attrib), 
+    stw_phys(addr + offsetof(struct vmcb_seg, attrib),
              ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
 }
-                                
+
 static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc)
 {
     unsigned int flags;
@@ -4922,7 +4922,7 @@
     sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
 }
 
-static inline void svm_load_seg_cache(target_phys_addr_t addr, 
+static inline void svm_load_seg_cache(target_phys_addr_t addr,
                                       CPUState *env, int seg_reg)
 {
     SegmentCache sc1, *sc = &sc1;
@@ -4965,13 +4965,13 @@
     stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer);
     stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags());
 
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es), 
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es),
                   &env->segs[R_ES]);
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs), 
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs),
                  &env->segs[R_CS]);
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss), 
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss),
                  &env->segs[R_SS]);
-    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds), 
+    svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds),
                  &env->segs[R_DS]);
 
     stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip),
@@ -5015,7 +5015,7 @@
             env->hflags2 |= HF2_HIF_MASK;
     }
 
-    cpu_load_efer(env, 
+    cpu_load_efer(env,
                   ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer)));
     env->eflags = 0;
     load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)),
@@ -5159,13 +5159,13 @@
                 addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)),
                 env->segs[R_FS].base);
 
-    svm_save_seg(addr + offsetof(struct vmcb, save.fs), 
+    svm_save_seg(addr + offsetof(struct vmcb, save.fs),
                  &env->segs[R_FS]);
-    svm_save_seg(addr + offsetof(struct vmcb, save.gs), 
+    svm_save_seg(addr + offsetof(struct vmcb, save.gs),
                  &env->segs[R_GS]);
-    svm_save_seg(addr + offsetof(struct vmcb, save.tr), 
+    svm_save_seg(addr + offsetof(struct vmcb, save.tr),
                  &env->tr);
-    svm_save_seg(addr + offsetof(struct vmcb, save.ldtr), 
+    svm_save_seg(addr + offsetof(struct vmcb, save.ldtr),
                  &env->ldt);
 
 #ifdef TARGET_X86_64
@@ -5203,7 +5203,7 @@
 {
     target_ulong addr;
     helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0);
-    
+
     if (aflag == 2)
         addr = EAX;
     else
@@ -5282,7 +5282,7 @@
     }
 }
 
-void helper_svm_check_io(uint32_t port, uint32_t param, 
+void helper_svm_check_io(uint32_t port, uint32_t param,
                          uint32_t next_eip_addend)
 {
     if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) {
@@ -5291,7 +5291,7 @@
         uint16_t mask = (1 << ((param >> 4) & 7)) - 1;
         if(lduw_phys(addr + port / 8) & (mask << (port & 7))) {
             /* next EIP */
-            stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 
+            stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
                      env->eip + next_eip_addend);
             helper_vmexit(SVM_EXIT_IOIO, param | (port << 16));
         }
@@ -5316,13 +5316,13 @@
     }
 
     /* Save the VM state in the vmcb */
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es), 
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es),
                  &env->segs[R_ES]);
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs), 
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs),
                  &env->segs[R_CS]);
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss), 
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss),
                  &env->segs[R_SS]);
-    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds), 
+    svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds),
                  &env->segs[R_DS]);
 
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base);
@@ -5371,7 +5371,7 @@
     cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3)));
     /* we need to set the efer after the crs so the hidden flags get
        set properly */
-    cpu_load_efer(env, 
+    cpu_load_efer(env,
                   ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer)));
     env->eflags = 0;
     load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)),
@@ -5504,7 +5504,7 @@
 {
     int count;
     target_ulong res, mask;
-    
+
     res = t0;
     count = TARGET_LONG_BITS - 1;
     mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 03e9b17..20d47bb 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -31,6 +31,9 @@
 #define dh_ctype_Reg Reg *
 #define dh_ctype_XMMReg XMMReg *
 #define dh_ctype_MMXReg MMXReg *
+#define dh_is_signed_Reg dh_is_signed_ptr
+#define dh_is_signed_XMMReg dh_is_signed_ptr
+#define dh_is_signed_MMXReg dh_is_signed_ptr
 
 DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg)
 DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index b50f0a9..f4e295f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -380,7 +380,7 @@
     tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 }
 #endif
-    
+
 static void gen_add_A0_im(DisasContext *s, int val)
 {
 #ifdef TARGET_X86_64
@@ -461,7 +461,7 @@
 static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
-    if (shift != 0) 
+    if (shift != 0)
         tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
     tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 #ifdef TARGET_X86_64
@@ -503,7 +503,7 @@
 static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
 {
     tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
-    if (shift != 0) 
+    if (shift != 0)
         tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
     tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 }
@@ -660,7 +660,7 @@
     }
 }
 
-static inline void gen_op_movl_T0_Dshift(int ot) 
+static inline void gen_op_movl_T0_Dshift(int ot)
 {
     tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
@@ -952,7 +952,7 @@
     case CC_OP_SUBW:
     case CC_OP_SUBL:
     case CC_OP_SUBQ:
-        
+
         size = cc_op - CC_OP_SUBB;
         switch(jcc_op) {
         case JCC_Z:
@@ -983,28 +983,28 @@
             switch(size) {
             case 0:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
                                    0, l1);
                 break;
             case 1:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
                                    0, l1);
                 break;
 #ifdef TARGET_X86_64
             case 2:
                 tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
-                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, 
+                tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
                                    0, l1);
                 break;
 #endif
             default:
-                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, 
+                tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
                                    0, l1);
                 break;
             }
             break;
-            
+
         case JCC_B:
             cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
             goto fast_jcc_b;
@@ -1036,7 +1036,7 @@
             }
             tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
             break;
-            
+
         case JCC_L:
             cond = inv ? TCG_COND_GE : TCG_COND_LT;
             goto fast_jcc_l;
@@ -1068,48 +1068,48 @@
             }
             tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
             break;
-            
+
         default:
             goto slow_jcc;
         }
         break;
-        
+
         /* some jumps are easy to compute */
     case CC_OP_ADDB:
     case CC_OP_ADDW:
     case CC_OP_ADDL:
     case CC_OP_ADDQ:
-        
+
     case CC_OP_ADCB:
     case CC_OP_ADCW:
     case CC_OP_ADCL:
     case CC_OP_ADCQ:
-        
+
     case CC_OP_SBBB:
     case CC_OP_SBBW:
     case CC_OP_SBBL:
     case CC_OP_SBBQ:
-        
+
     case CC_OP_LOGICB:
     case CC_OP_LOGICW:
     case CC_OP_LOGICL:
     case CC_OP_LOGICQ:
-        
+
     case CC_OP_INCB:
     case CC_OP_INCW:
     case CC_OP_INCL:
     case CC_OP_INCQ:
-        
+
     case CC_OP_DECB:
     case CC_OP_DECW:
     case CC_OP_DECL:
     case CC_OP_DECQ:
-        
+
     case CC_OP_SHLB:
     case CC_OP_SHLW:
     case CC_OP_SHLL:
     case CC_OP_SHLQ:
-        
+
     case CC_OP_SARB:
     case CC_OP_SARW:
     case CC_OP_SARL:
@@ -1128,7 +1128,7 @@
     default:
     slow_jcc:
         gen_setcc_slow_T0(s, jcc_op);
-        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, 
+        tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
                            cpu_T[0], 0, l1);
         break;
     }
@@ -1420,7 +1420,7 @@
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
-static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, 
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
                             int is_right, int is_arith)
 {
     target_ulong mask;
@@ -1462,7 +1462,7 @@
         gen_op_st_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_reg_T0(ot, op1);
-        
+
     /* update eflags if non zero shift */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
@@ -1483,7 +1483,7 @@
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
     else
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
-        
+
     gen_set_label(shift_label);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 
@@ -1495,7 +1495,7 @@
                             int is_right, int is_arith)
 {
     int mask;
-    
+
     if (ot == OT_QUAD)
         mask = 0x3f;
     else
@@ -1530,7 +1530,7 @@
         gen_op_st_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_reg_T0(ot, op1);
-        
+
     /* update eflags if non zero shift */
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
@@ -1550,7 +1550,7 @@
         tcg_gen_shri_tl(ret, arg1, -arg2);
 }
 
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, 
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
                           int is_right)
 {
     target_ulong mask;
@@ -1584,12 +1584,12 @@
        shifts. */
     label1 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
-    
+
     if (ot <= OT_WORD)
         tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
     else
         tcg_gen_mov_tl(cpu_tmp0, t1);
-    
+
     gen_extu(ot, t0);
     tcg_gen_mov_tl(t2, t0);
 
@@ -1614,7 +1614,7 @@
     } else {
         gen_op_mov_reg_v(ot, op1, t0);
     }
-    
+
     /* update eflags */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
@@ -1633,10 +1633,10 @@
     }
     tcg_gen_andi_tl(t0, t0, CC_C);
     tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-    
+
     tcg_gen_discard_tl(cpu_cc_dst);
     tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        
+
     gen_set_label(label2);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 
@@ -1724,7 +1724,7 @@
 }
 
 /* XXX: add faster immediate = 1 case */
-static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, 
+static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
                            int is_right)
 {
     int label1;
@@ -1737,7 +1737,7 @@
         gen_op_ld_T0_A0(ot + s->mem_index);
     else
         gen_op_mov_TN_reg(ot, 0, op1);
-    
+
     if (is_right) {
         switch (ot) {
         case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
@@ -1770,13 +1770,13 @@
     tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
     tcg_gen_discard_tl(cpu_cc_dst);
     tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        
+
     gen_set_label(label1);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 }
 
 /* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, 
+static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
                                 int is_right)
 {
     int label1, label2, data_bits;
@@ -1810,7 +1810,7 @@
        shifts. */
     label1 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
-    
+
     tcg_gen_addi_tl(cpu_tmp5, t2, -1);
     if (ot == OT_WORD) {
         /* Note: we implement the Intel behaviour for shift count > 16 */
@@ -1821,7 +1821,7 @@
             tcg_gen_ext32u_tl(t0, t0);
 
             tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
-            
+
             /* only needed if count > 16, but a test would complicate */
             tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(32), t2);
             tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
@@ -1835,7 +1835,7 @@
             tcg_gen_shli_tl(t1, t1, 16);
             tcg_gen_or_tl(t1, t1, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            
+
             tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
             tcg_gen_sub_tl(cpu_tmp0, tcg_const_tl(32), cpu_tmp5);
             tcg_gen_shr_tl(cpu_tmp6, t1, cpu_tmp0);
@@ -1858,13 +1858,13 @@
             tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
             tcg_gen_shl_tl(t1, t1, cpu_tmp5);
             tcg_gen_or_tl(t0, t0, t1);
-            
+
         } else {
             if (ot == OT_LONG)
                 tcg_gen_ext32u_tl(t1, t1);
 
             tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-            
+
             tcg_gen_shl_tl(t0, t0, t2);
             tcg_gen_sub_tl(cpu_tmp5, tcg_const_tl(data_bits), t2);
             tcg_gen_shr_tl(t1, t1, cpu_tmp5);
@@ -1880,7 +1880,7 @@
     } else {
         gen_op_mov_reg_v(ot, op1, t0);
     }
-    
+
     /* update eflags */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
@@ -2315,7 +2315,7 @@
     if (s->jmp_opt) {
         l1 = gen_new_label();
         gen_jcc1(s, cc_op, b, l1);
-        
+
         gen_goto_tb(s, 0, next_eip);
 
         gen_set_label(l1);
@@ -2368,17 +2368,17 @@
 
 static inline void gen_op_movl_T0_seg(int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }
 
 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
 {
     tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
+    tcg_gen_st32_tl(cpu_T[0], cpu_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
-    tcg_gen_st_tl(cpu_T[0], cpu_env, 
+    tcg_gen_st_tl(cpu_T[0], cpu_env,
                   offsetof(CPUX86State,segs[seg_reg].base));
 }
 
@@ -2681,7 +2681,7 @@
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
     gen_jmp_im(cur_eip);
-    gen_helper_raise_interrupt(tcg_const_i32(intno), 
+    gen_helper_raise_interrupt(tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
     s->is_jmp = 3;
 }
@@ -3175,7 +3175,7 @@
 #endif
             {
                 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3185,14 +3185,14 @@
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
                 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
                 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3324,13 +3324,13 @@
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
                 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
                 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
             }
@@ -3338,13 +3338,13 @@
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
                 gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
             }
@@ -3460,7 +3460,7 @@
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
             gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
@@ -3468,7 +3468,7 @@
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
             gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
@@ -4605,12 +4605,12 @@
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
-                                           tcg_const_i32(dflag), 
+                                           tcg_const_i32(dflag),
                                            tcg_const_i32(s->pc - pc_start));
             } else {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
-                                      tcg_const_i32(dflag), 
+                                      tcg_const_i32(dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
             gen_eob(s);
@@ -4877,7 +4877,7 @@
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             gen_helper_cmpxchg16b(cpu_A0);
         } else
-#endif        
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
@@ -5334,7 +5334,7 @@
     case 0xc1:
         /* shift Ev,Ib */
         shift = 2;
-    grp2:
+    GRP2:
         {
             if ((b & 1) == 0)
                 ot = OT_BYTE;
@@ -5370,12 +5370,12 @@
     case 0xd1:
         /* shift Ev,1 */
         shift = 1;
-        goto grp2;
+        goto GRP2;
     case 0xd2:
     case 0xd3:
         /* shift Ev,cl */
         shift = 0;
-        goto grp2;
+        goto GRP2;
 
     case 0x1a4: /* shld imm */
         op = 0;
@@ -5452,7 +5452,7 @@
                         gen_helper_fildl_FT0(cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
+                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
                                           (s->mem_index >> 2) - 1);
                         gen_helper_fldl_FT0(cpu_tmp1_i64);
                         break;
@@ -5491,7 +5491,7 @@
                         gen_helper_fildl_ST0(cpu_tmp2_i32);
                         break;
                     case 2:
-                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
+                        tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
                                           (s->mem_index >> 2) - 1);
                         gen_helper_fldl_ST0(cpu_tmp1_i64);
                         break;
@@ -5513,7 +5513,7 @@
                         break;
                     case 2:
                         gen_helper_fisttll_ST0(cpu_tmp1_i64);
-                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
+                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
                                           (s->mem_index >> 2) - 1);
                         break;
                     case 3:
@@ -5539,7 +5539,7 @@
                         break;
                     case 2:
                         gen_helper_fstl_ST0(cpu_tmp1_i64);
-                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
+                        tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
                                           (s->mem_index >> 2) - 1);
                         break;
                     case 3:
@@ -5621,13 +5621,13 @@
                 gen_helper_fpop();
                 break;
             case 0x3d: /* fildll */
-                tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
+                tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
                                   (s->mem_index >> 2) - 1);
                 gen_helper_fildll_ST0(cpu_tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
                 gen_helper_fistll_ST0(cpu_tmp1_i64);
-                tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
+                tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
                                   (s->mem_index >> 2) - 1);
                 gen_helper_fpop();
                 break;
@@ -6015,7 +6015,7 @@
             ot = dflag ? OT_LONG : OT_WORD;
         gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
         gen_op_andl_T0_ffff();
-        gen_check_io(s, ot, pc_start - s->cs_base, 
+        gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
@@ -6206,7 +6206,7 @@
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_iret_protected(tcg_const_i32(s->dflag), 
+            gen_helper_iret_protected(tcg_const_i32(s->dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
             s->cc_op = CC_OP_EFLAGS;
         }
@@ -7090,7 +7090,7 @@
                     break;
                 case 4: /* STGI */
                     if ((!(s->flags & HF_SVME_MASK) &&
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
                         !s->pe)
                         goto illegal_op;
                     if (s->cpl != 0) {
@@ -7111,8 +7111,8 @@
                     }
                     break;
                 case 6: /* SKINIT */
-                    if ((!(s->flags & HF_SVME_MASK) && 
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
+                    if ((!(s->flags & HF_SVME_MASK) &&
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
                         !s->pe)
                         goto illegal_op;
                     gen_helper_skinit();
@@ -7787,8 +7787,7 @@
     gen_intermediate_code_internal(env, tb, 1);
 }
 
-void gen_pc_load(CPUState *env, TranslationBlock *tb,
-                unsigned long searched_pc, int pc_pos, void *puc)
+void restore_state_to_opc(CPUState *env, TranslationBlock *tb, int pc_pos)
 {
     int cc_op;
 #ifdef DEBUG_DISAS
@@ -7800,8 +7799,8 @@
                 qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
             }
         }
-        qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+                pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
                 (uint32_t)tb->cs_base);
     }
 #endif
diff --git a/tcg/README b/tcg/README
index e672258..6600122 100644
--- a/tcg/README
+++ b/tcg/README
@@ -75,10 +75,13 @@
 * Helpers:
 
 Using the tcg_gen_helper_x_y it is possible to call any function
-taking i32, i64 or pointer types. Before calling an helper, all
-globals are stored at their canonical location and it is assumed that
-the function can modify them. In the future, function modifiers will
-be allowed to tell that the helper does not read or write some globals.
+taking i32, i64 or pointer types. By default, before calling a helper,
+all globals are stored at their canonical location and it is assumed
+that the function can modify them. This can be overridden by the
+TCG_CALL_CONST function modifier. By default, the helper is allowed to
+modify the CPU state or raise an exception. This can be overridden by
+the TCG_CALL_PURE function modifier, in which case the call to the
+function is removed if the return value is not used.
 
 On some TCG targets (e.g. x86), several calling conventions are
 supported.
@@ -210,7 +213,7 @@
 
 * eqv_i32/i64 t0, t1, t2
 
-t0=~(t1^t2)
+t0=~(t1^t2), or equivalently, t0=t1^~t2
 
 * nand_i32/i64 t0, t1, t2
 
@@ -265,13 +268,13 @@
 
 * bswap16_i32/i64 t0, t1
 
-16 bit byte swap on a 32/64 bit value. The two/six high order bytes must be
-set to zero.
+16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order
+bytes are set to zero.
 
 * bswap32_i32/i64 t0, t1
 
-32 bit byte swap on a 32/64 bit value. With a 64 bit value, the four high
-order bytes must be set to zero.
+32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that
+the four high order bytes are set to zero.
 
 * bswap64_i64 t0, t1
 
@@ -282,6 +285,28 @@
 Indicate that the value of t0 won't be used later. It is useful to
 force dead code elimination.
 
+* deposit_i32/i64 dest, t1, t2, pos, len
+
+Deposit T2 as a bitfield into T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values:
+
+  LEN - the length of the bitfield
+  POS - the position of the first bit, counting from the LSB
+
+For example, pos=8, len=4 indicates a 4-bit field at bit 8.
+This operation would be equivalent to
+
+  dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
+
+
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
@@ -323,9 +348,37 @@
 write(t0, t1 + offset)
 Write 8, 16, 32 or 64 bits to host memory.
 
+********* 64-bit target on 32-bit host support
+
+The following opcodes are internal to TCG.  Thus they are to be implemented by
+32-bit host code generators, but are not to be emitted by guest translators.
+They are emitted as needed by inline functions within "tcg-op.h".
+
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
+* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the 64-bit inputs T1 and T2 are
+formed from two 32-bit arguments, and the 64-bit output T0
+is returned in two 32-bit outputs.
+
+* mulu2_i32 t0_low, t0_high, t1, t2
+
+Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
+the full 64-bit product T0.  The later is returned in two 32-bit outputs.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments.  The result is a 32-bit value.
+
 ********* QEMU specific operations
 
-* tb_exit t0
+* exit_tb t0
 
 Exit the current TB and return the value t0 (word type).
 
@@ -339,13 +392,17 @@
 qemu_ld8s t0, t1, flags
 qemu_ld16u t0, t1, flags
 qemu_ld16s t0, t1, flags
+qemu_ld32 t0, t1, flags
 qemu_ld32u t0, t1, flags
 qemu_ld32s t0, t1, flags
 qemu_ld64 t0, t1, flags
 
-Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU
-address type. 'flags' contains the QEMU memory index (selects user or
-kernel access) for example.
+Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address
+type. 'flags' contains the QEMU memory index (selects user or kernel access)
+for example.
+
+Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and
+"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits.
 
 * qemu_st8 t0, t1, flags
 qemu_st16 t0, t1, flags
@@ -445,7 +502,7 @@
   the speed of the translation.
 
 - Don't hesitate to use helpers for complicated or seldom used target
-  intructions. There is little performance advantage in using TCG to
+  instructions. There is little performance advantage in using TCG to
   implement target instructions taking more than about twenty TCG
   instructions.
 
diff --git a/tcg/TODO b/tcg/TODO
index f30cb75..0747847 100644
--- a/tcg/TODO
+++ b/tcg/TODO
@@ -1,4 +1,4 @@
-- Add new instructions such as: setcond, clz, ctz, popcnt.
+- Add new instructions such as: clz, ctz, popcnt.
 
 - See if it is worth exporting mul2, mulu2, div2, divu2. 
 
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index f8d626d..fb858d8 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -22,6 +22,51 @@
  * THE SOFTWARE.
  */
 
+#if defined(__ARM_ARCH_7__) ||  \
+    defined(__ARM_ARCH_7A__) || \
+    defined(__ARM_ARCH_7EM__) || \
+    defined(__ARM_ARCH_7M__) || \
+    defined(__ARM_ARCH_7R__)
+#define USE_ARMV7_INSTRUCTIONS
+#endif
+
+#if defined(USE_ARMV7_INSTRUCTIONS) || \
+    defined(__ARM_ARCH_6J__) || \
+    defined(__ARM_ARCH_6K__) || \
+    defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6Z__) || \
+    defined(__ARM_ARCH_6ZK__)
+#define USE_ARMV6_INSTRUCTIONS
+#endif
+
+#if defined(USE_ARMV6_INSTRUCTIONS) || \
+    defined(__ARM_ARCH_5T__) || \
+    defined(__ARM_ARCH_5TE__) || \
+    defined(__ARM_ARCH_5TEJ__)
+#define USE_ARMV5_INSTRUCTIONS
+#endif
+
+#ifdef USE_ARMV5_INSTRUCTIONS
+static const int use_armv5_instructions = 1;
+#else
+static const int use_armv5_instructions = 0;
+#endif
+#undef USE_ARMV5_INSTRUCTIONS
+
+#ifdef USE_ARMV6_INSTRUCTIONS
+static const int use_armv6_instructions = 1;
+#else
+static const int use_armv6_instructions = 0;
+#endif
+#undef USE_ARMV6_INSTRUCTIONS
+
+#ifdef USE_ARMV7_INSTRUCTIONS
+static const int use_armv7_instructions = 1;
+#else
+static const int use_armv7_instructions = 0;
+#endif
+#undef USE_ARMV7_INSTRUCTIONS
+
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "%r0",
@@ -39,14 +84,11 @@
     "%r12",
     "%r13",
     "%r14",
+    "%pc",
 };
 #endif
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R0,
-    TCG_REG_R1,
-    TCG_REG_R2,
-    TCG_REG_R3,
     TCG_REG_R4,
     TCG_REG_R5,
     TCG_REG_R6,
@@ -55,8 +97,12 @@
     TCG_REG_R9,
     TCG_REG_R10,
     TCG_REG_R11,
-    TCG_REG_R12,
     TCG_REG_R13,
+    TCG_REG_R0,
+    TCG_REG_R1,
+    TCG_REG_R2,
+    TCG_REG_R3,
+    TCG_REG_R12,
     TCG_REG_R14,
 };
 
@@ -67,12 +113,25 @@
     TCG_REG_R0, TCG_REG_R1
 };
 
+static inline void reloc_abs32(void *code_ptr, tcg_target_long target)
+{
+    *(uint32_t *) code_ptr = target;
+}
+
+static inline void reloc_pc24(void *code_ptr, tcg_target_long target)
+{
+    uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2);
+
+    *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
+                             | (offset & 0xffffff);
+}
+
 static void patch_reloc(uint8_t *code_ptr, int type,
                 tcg_target_long value, tcg_target_long addend)
 {
     switch (type) {
     case R_ARM_ABS32:
-        *(uint32_t *) code_ptr = value;
+        reloc_abs32(code_ptr, value);
         break;
 
     case R_ARM_CALL:
@@ -81,8 +140,7 @@
         tcg_abort();
 
     case R_ARM_PC24:
-        *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & 0xff000000) |
-                (((value - ((tcg_target_long) code_ptr + 8)) >> 2) & 0xffffff);
+        reloc_pc24(code_ptr, value);
         break;
     }
 }
@@ -105,70 +163,55 @@
          break;
 
     case 'r':
-#ifndef CONFIG_SOFTMMU
-    case 'd':
-    case 'D':
-    case 'x':
-    case 'X':
-#endif
         ct->ct |= TCG_CT_REG;
         tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
         break;
 
+    /* qemu_ld address */
+    case 'l':
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
 #ifdef CONFIG_SOFTMMU
-    /* qemu_ld/st inputs (unless 'X', 'd' or 'D') */
-    case 'x':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
+        /* r0 and r1 will be overwritten when reading the tlb entry,
+           so don't use these. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-        break;
-
-    /* qemu_ld64 data_reg */
-    case 'd':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
-        /* r1 is still needed to load data_reg2, so don't use it.  */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-        break;
-
-    /* qemu_ld/st64 data_reg2 */
-    case 'D':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
-        /* r0, r1 and optionally r2 will be overwritten by the address
-         * and the low word of data, so don't use these.  */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-# if TARGET_LONG_BITS == 64
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-# endif
-        break;
-
-# if TARGET_LONG_BITS == 64
-    /* qemu_ld/st addr_reg2 */
-    case 'X':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
-        /* r0 will be overwritten by the low word of base, so don't use it.  */
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-        break;
-# endif
 #endif
-
-    case '1':
+        break;
+    case 'L':
         ct->ct |= TCG_CT_REG;
         tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+#ifdef CONFIG_SOFTMMU
+        /* r1 is still needed to load data_reg or data_reg2,
+           so don't use it. */
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+#endif
         break;
 
-    case '2':
+    /* qemu_st address & data_reg */
+    case 's':
         ct->ct |= TCG_CT_REG;
         tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
+        /* r0 and r1 will be overwritten when reading the tlb entry
+           (softmmu only) and doing the byte swapping, so don't
+           use these. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
         break;
+    /* qemu_st64 data_reg2 */
+    case 'S':
+        ct->ct |= TCG_CT_REG;
+        tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
+        /* r0 and r1 will be overwritten when reading the tlb entry
+            (softmmu only) and doing the byte swapping, so don't
+            use these. */
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
+#ifdef CONFIG_SOFTMMU
+        /* r2 is still needed to load data_reg, so don't use it. */
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
+#endif
+        break;
 
     default:
         return -1;
@@ -309,6 +352,9 @@
 
 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 {
+    /* We pay attention here to not modify the branch target by skipping
+       the corresponding bytes. This ensure that caches and memory are
+       kept coherent during retranslation. */
 #ifdef HOST_WORDS_BIGENDIAN
     tcg_out8(s, (cond << 4) | 0x0a);
     s->code_ptr += 3;
@@ -324,6 +370,17 @@
                     (((offset - 8) >> 2) & 0x00ffffff));
 }
 
+static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
+{
+    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
+}
+
+static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
+{
+    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
+                (((offset - 8) >> 2) & 0x00ffffff));
+}
+
 static inline void tcg_out_dat_reg(TCGContext *s,
                 int cond, int opc, int rd, int rn, int rm, int shift)
 {
@@ -358,42 +415,38 @@
 }
 
 static inline void tcg_out_movi32(TCGContext *s,
-                int cond, int rd, int32_t arg)
+                int cond, int rd, uint32_t arg)
 {
-    int offset = (uint32_t) arg - ((uint32_t) s->code_ptr + 8);
-
     /* TODO: This is very suboptimal, we can easily have a constant
      * pool somewhere after all the instructions.  */
+    if ((int)arg < 0 && (int)arg >= -0x100) {
+        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff);
+    } else if (use_armv7_instructions) {
+        /* use movw/movt */
+        /* movw */
+        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
+                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
+        if (arg & 0xffff0000) {
+            /* movt */
+            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
+                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
+        }
+    } else {
+        int opc = ARITH_MOV;
+        int rn = 0;
 
-    if (arg < 0 && arg > -0x100)
-        return tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff);
+        do {
+            int i, rot;
 
-    if (offset < 0x100 && offset > -0x100)
-        return offset >= 0 ?
-                tcg_out_dat_imm(s, cond, ARITH_ADD, rd, 15, offset) :
-                tcg_out_dat_imm(s, cond, ARITH_SUB, rd, 15, -offset);
+            i = ctz32(arg) & ~1;
+            rot = ((32 - i) << 7) & 0xf00;
+            tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
+            arg &= ~(0xff << i);
 
-#ifdef __ARM_ARCH_7A__
-    /* use movw/movt */
-    /* movw */
-    tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
-              | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
-    if (arg & 0xffff0000)
-        /* movt */
-        tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
-                  | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
-#else
-    tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, arg & 0xff);
-    if (arg & 0x0000ff00)
-        tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd,
-                        ((arg >>  8) & 0xff) | 0xc00);
-    if (arg & 0x00ff0000)
-        tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd,
-                        ((arg >> 16) & 0xff) | 0x800);
-    if (arg & 0xff000000)
-        tcg_out_dat_imm(s, cond, ARITH_ORR, rd, rd,
-                        ((arg >> 24) & 0xff) | 0x400);
-#endif
+            opc = ARITH_ORR;
+            rn = rd;
+        } while (arg);
+    }
 }
 
 static inline void tcg_out_mul32(TCGContext *s,
@@ -409,7 +462,7 @@
         tcg_out32(s, (cond << 28) | ( 8 << 16) | (0 << 12) |
                         (rs << 8) | 0x90 | rm);
         tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        rd, 0, 8, SHIFT_IMM_LSL(0));
+                        rd, 0, TCG_REG_R8, SHIFT_IMM_LSL(0));
     }
 }
 
@@ -447,6 +500,101 @@
     }
 }
 
+static inline void tcg_out_ext8s(TCGContext *s, int cond,
+                                 int rd, int rn)
+{
+    if (use_armv6_instructions) {
+        /* sxtb */
+        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rn, SHIFT_IMM_LSL(24));
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rd, SHIFT_IMM_ASR(24));
+    }
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int cond,
+                                 int rd, int rn)
+{
+    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int cond,
+                                  int rd, int rn)
+{
+    if (use_armv6_instructions) {
+        /* sxth */
+        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rn, SHIFT_IMM_LSL(16));
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rd, SHIFT_IMM_ASR(16));
+    }
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int cond,
+                                  int rd, int rn)
+{
+    if (use_armv6_instructions) {
+        /* uxth */
+        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rn, SHIFT_IMM_LSL(16));
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rd, SHIFT_IMM_LSR(16));
+    }
+}
+
+static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
+{
+    if (use_armv6_instructions) {
+        /* revsh */
+        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24));
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_ASR(16));
+        tcg_out_dat_reg(s, cond, ARITH_ORR,
+                        rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8));
+    }
+}
+
+static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
+{
+    if (use_armv6_instructions) {
+        /* rev16 */
+        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24));
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_LSR(16));
+        tcg_out_dat_reg(s, cond, ARITH_ORR,
+                        rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8));
+    }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
+{
+    if (use_armv6_instructions) {
+        /* rev */
+        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_EOR,
+                        TCG_REG_R8, rn, rn, SHIFT_IMM_ROR(16));
+        tcg_out_dat_imm(s, cond, ARITH_BIC,
+                        TCG_REG_R8, TCG_REG_R8, 0xff | 0x800);
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        rd, 0, rn, SHIFT_IMM_ROR(8));
+        tcg_out_dat_reg(s, cond, ARITH_EOR,
+                        rd, rd, TCG_REG_R8, SHIFT_IMM_LSR(8));
+    }
+}
+
 static inline void tcg_out_ld32_12(TCGContext *s, int cond,
                 int rd, int rn, tcg_target_long im)
 {
@@ -511,7 +659,7 @@
                         (((-im) & 0xf0) << 4) | ((-im) & 0xf));
 }
 
-static inline void tcg_out_st16u_8(TCGContext *s, int cond,
+static inline void tcg_out_st16_8(TCGContext *s, int cond,
                 int rd, int rn, tcg_target_long im)
 {
     if (im >= 0)
@@ -531,7 +679,7 @@
                     (rn << 16) | (rd << 12) | rm);
 }
 
-static inline void tcg_out_st16u_r(TCGContext *s, int cond,
+static inline void tcg_out_st16_r(TCGContext *s, int cond,
                 int rd, int rn, int rm)
 {
     tcg_out32(s, (cond << 28) | 0x018000b0 |
@@ -551,19 +699,6 @@
                         (((-im) & 0xf0) << 4) | ((-im) & 0xf));
 }
 
-static inline void tcg_out_st16s_8(TCGContext *s, int cond,
-                int rd, int rn, tcg_target_long im)
-{
-    if (im >= 0)
-        tcg_out32(s, (cond << 28) | 0x01c000f0 |
-                        (rn << 16) | (rd << 12) |
-                        ((im & 0xf0) << 4) | (im & 0xf));
-    else
-        tcg_out32(s, (cond << 28) | 0x014000f0 |
-                        (rn << 16) | (rd << 12) |
-                        (((-im) & 0xf0) << 4) | ((-im) & 0xf));
-}
-
 static inline void tcg_out_ld16s_r(TCGContext *s, int cond,
                 int rd, int rn, int rm)
 {
@@ -571,13 +706,6 @@
                     (rn << 16) | (rd << 12) | rm);
 }
 
-static inline void tcg_out_st16s_r(TCGContext *s, int cond,
-                int rd, int rn, int rm)
-{
-    tcg_out32(s, (cond << 28) | 0x018000f0 |
-                    (rn << 16) | (rd << 12) | rm);
-}
-
 static inline void tcg_out_ld8_12(TCGContext *s, int cond,
                 int rd, int rn, tcg_target_long im)
 {
@@ -627,19 +755,6 @@
                         (((-im) & 0xf0) << 4) | ((-im) & 0xf));
 }
 
-static inline void tcg_out_st8s_8(TCGContext *s, int cond,
-                int rd, int rn, tcg_target_long im)
-{
-    if (im >= 0)
-        tcg_out32(s, (cond << 28) | 0x01c000d0 |
-                        (rn << 16) | (rd << 12) |
-                        ((im & 0xf0) << 4) | (im & 0xf));
-    else
-        tcg_out32(s, (cond << 28) | 0x014000d0 |
-                        (rn << 16) | (rd << 12) |
-                        (((-im) & 0xf0) << 4) | ((-im) & 0xf));
-}
-
 static inline void tcg_out_ld8s_r(TCGContext *s, int cond,
                 int rd, int rn, int rm)
 {
@@ -647,13 +762,6 @@
                     (rn << 16) | (rd << 12) | rm);
 }
 
-static inline void tcg_out_st8s_r(TCGContext *s, int cond,
-                int rd, int rn, int rm)
-{
-    tcg_out32(s, (cond << 28) | 0x018000d0 |
-                    (rn << 16) | (rd << 12) | rm);
-}
-
 static inline void tcg_out_ld32u(TCGContext *s, int cond,
                 int rd, int rn, int32_t offset)
 {
@@ -694,14 +802,14 @@
         tcg_out_ld16s_8(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_st16u(TCGContext *s, int cond,
+static inline void tcg_out_st16(TCGContext *s, int cond,
                 int rd, int rn, int32_t offset)
 {
     if (offset > 0xff || offset < -0xff) {
         tcg_out_movi32(s, cond, TCG_REG_R8, offset);
-        tcg_out_st16u_r(s, cond, rd, rn, TCG_REG_R8);
+        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_R8);
     } else
-        tcg_out_st16u_8(s, cond, rd, rn, offset);
+        tcg_out_st16_8(s, cond, rd, rn, offset);
 }
 
 static inline void tcg_out_ld8u(TCGContext *s, int cond,
@@ -724,7 +832,7 @@
         tcg_out_ld8s_8(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_st8u(TCGContext *s, int cond,
+static inline void tcg_out_st8(TCGContext *s, int cond,
                 int rd, int rn, int32_t offset)
 {
     if (offset > 0xfff || offset < -0xfff) {
@@ -738,6 +846,11 @@
 {
     int32_t val;
 
+    if (addr & 1) {
+        /* goto to a Thumb destination isn't supported */
+        tcg_abort();
+    }
+
     val = addr - (tcg_target_long) s->code_ptr;
     if (val - 8 < 0x01fffffd && val - 8 > -0x01fffffd)
         tcg_out_b(s, cond, val);
@@ -746,60 +859,60 @@
         tcg_abort();
 #else
         if (cond == COND_AL) {
-            tcg_out_ld32_12(s, COND_AL, 15, 15, -4);
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
             tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
         } else {
             tcg_out_movi32(s, cond, TCG_REG_R8, val - 8);
             tcg_out_dat_reg(s, cond, ARITH_ADD,
-                            15, 15, TCG_REG_R8, SHIFT_IMM_LSL(0));
+                            TCG_REG_PC, TCG_REG_PC,
+                            TCG_REG_R8, SHIFT_IMM_LSL(0));
         }
 #endif
     }
 }
 
-static inline void tcg_out_call(TCGContext *s, int cond, uint32_t addr)
+static inline void tcg_out_call(TCGContext *s, uint32_t addr)
 {
     int32_t val;
 
-#ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R8, 0, 14, SHIFT_IMM_LSL(0));
-#endif
-
     val = addr - (tcg_target_long) s->code_ptr;
-    if (val < 0x01fffffd && val > -0x01fffffd)
-        tcg_out_bl(s, cond, val);
-    else {
+    if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) {
+        if (addr & 1) {
+            /* Use BLX if the target is in Thumb mode */
+            if (!use_armv5_instructions) {
+                tcg_abort();
+            }
+            tcg_out_blx_imm(s, val);
+        } else {
+            tcg_out_bl(s, COND_AL, val);
+        }
+    } else {
 #if 1
         tcg_abort();
 #else
         if (cond == COND_AL) {
-            tcg_out_dat_imm(s, cond, ARITH_ADD, 14, 15, 4);
-            tcg_out_ld32_12(s, COND_AL, 15, 15, -4);
+            tcg_out_dat_imm(s, cond, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
             tcg_out32(s, addr); /* XXX: This is l->u.value, can we use it? */
         } else {
             tcg_out_movi32(s, cond, TCG_REG_R9, addr);
-            tcg_out_dat_imm(s, cond, ARITH_MOV, 14, 0, 15);
+            tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
+                            TCG_REG_PC, SHIFT_IMM_LSL(0));
             tcg_out_bx(s, cond, TCG_REG_R9);
         }
 #endif
     }
-
-#ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, TCG_REG_R8, SHIFT_IMM_LSL(0));
-#endif
 }
 
 static inline void tcg_out_callr(TCGContext *s, int cond, int arg)
 {
-#ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R8, 0, 14, SHIFT_IMM_LSL(0));
-#endif
-    /* TODO: on ARMv5 and ARMv6 replace with tcg_out_blx(s, cond, arg);  */
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, 15, SHIFT_IMM_LSL(0));
-    tcg_out_bx(s, cond, arg);
-#ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, TCG_REG_R8, SHIFT_IMM_LSL(0));
-#endif
+    if (use_armv5_instructions) {
+        tcg_out_blx(s, cond, arg);
+    } else {
+        tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
+                        TCG_REG_PC, SHIFT_IMM_LSL(0));
+        tcg_out_bx(s, cond, arg);
+    }
 }
 
 static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
@@ -809,7 +922,7 @@
     if (l->has_value)
         tcg_out_goto(s, cond, l->u.value);
     else if (cond == COND_AL) {
-        tcg_out_ld32_12(s, COND_AL, 15, 15, -4);
+        tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
         tcg_out_reloc(s, s->code_ptr, R_ARM_ABS32, label_index, 31337);
         s->code_ptr += 4;
     } else {
@@ -819,57 +932,6 @@
     }
 }
 
-static void tcg_out_div_helper(TCGContext *s, int cond, const TCGArg *args,
-                void *helper_div, void *helper_rem, int shift)
-{
-    int div_reg = args[0];
-    int rem_reg = args[1];
-
-    /* stmdb sp!, { r0 - r3, ip, lr } */
-    /* (Note that we need an even number of registers as per EABI) */
-    tcg_out32(s, (cond << 28) | 0x092d500f);
-
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 0, 0, args[2], SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 1, 0, args[3], SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 2, 0, args[4], SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 3, 0, 2, shift);
-
-    tcg_out_call(s, cond, (uint32_t) helper_div);
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 8, 0, 0, SHIFT_IMM_LSL(0));
-
-    /* ldmia sp, { r0 - r3, fp, lr } */
-    tcg_out32(s, (cond << 28) | 0x089d500f);
-
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 0, 0, args[2], SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 1, 0, args[3], SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 2, 0, args[4], SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 3, 0, 2, shift);
-
-    tcg_out_call(s, cond, (uint32_t) helper_rem);
-
-    tcg_out_dat_reg(s, cond, ARITH_MOV, rem_reg, 0, 0, SHIFT_IMM_LSL(0));
-    tcg_out_dat_reg(s, cond, ARITH_MOV, div_reg, 0, 8, SHIFT_IMM_LSL(0));
-
-    /* ldr r0, [sp], #4 */
-    if (rem_reg != 0 && div_reg != 0)
-        tcg_out32(s, (cond << 28) | 0x04bd0004);
-    /* ldr r1, [sp], #4 */
-    if (rem_reg != 1 && div_reg != 1)
-        tcg_out32(s, (cond << 28) | 0x04bd1004);
-    /* ldr r2, [sp], #4 */
-    if (rem_reg != 2 && div_reg != 2)
-        tcg_out32(s, (cond << 28) | 0x04bd2004);
-    /* ldr r3, [sp], #4 */
-    if (rem_reg != 3 && div_reg != 3)
-        tcg_out32(s, (cond << 28) | 0x04bd3004);
-    /* ldr ip, [sp], #4 */
-    if (rem_reg != 12 && div_reg != 12)
-        tcg_out32(s, (cond << 28) | 0x04bdc004);
-    /* ldr lr, [sp], #4 */
-    if (rem_reg != 14 && div_reg != 14)
-        tcg_out32(s, (cond << 28) | 0x04bde004);
-}
-
 #ifdef CONFIG_SOFTMMU
 
 #include "../../softmmu_defs.h"
@@ -891,10 +953,9 @@
 
 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
-static inline void tcg_out_qemu_ld(TCGContext *s, int cond,
-                const TCGArg *args, int opc)
+static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
-    int addr_reg, data_reg, data_reg2;
+    int addr_reg, data_reg, data_reg2, bswap;
 #ifdef CONFIG_SOFTMMU
     int mem_index, s_bits;
 # if TARGET_LONG_BITS == 64
@@ -903,6 +964,11 @@
     uint32_t *label_ptr;
 #endif
 
+#ifdef TARGET_WORDS_BIGENDIAN
+    bswap = 1;
+#else
+    bswap = 0;
+#endif
     data_reg = *args++;
     if (opc == 3)
         data_reg2 = *args++;
@@ -924,12 +990,12 @@
 #  if CPU_TLB_BITS > 8
 #   error
 #  endif
-    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                    8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8,
+                    0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
     tcg_out_dat_imm(s, COND_AL, ARITH_AND,
-                    0, 8, CPU_TLB_SIZE - 1);
-    tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
-                    0, TCG_AREG0, 0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
+                    TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
+    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
+                    TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
     /* In the
      *  ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_read))]
      * below, the offset is likely to exceed 12 bits if mem_index != 0 and
@@ -938,13 +1004,13 @@
      * before.
      */
     if (mem_index)
-        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 0, 0,
+        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
                         (mem_index << (TLB_SHIFT & 1)) |
                         ((16 - (TLB_SHIFT >> 1)) << 8));
-    tcg_out_ld32_12(s, COND_AL, 1, 0,
+    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
                     offsetof(CPUState, tlb_table[0][0].addr_read));
-    tcg_out_dat_reg(s, COND_AL, ARITH_CMP,
-                    0, 1, 8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+    tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
+                    TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
     /* Check alignment.  */
     if (s_bits)
         tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
@@ -952,95 +1018,101 @@
 #  if TARGET_LONG_BITS == 64
     /* XXX: possibly we could use a block data load or writeback in
      * the first access.  */
-    tcg_out_ld32_12(s, COND_EQ, 1, 0,
+    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
                     offsetof(CPUState, tlb_table[0][0].addr_read) + 4);
-    tcg_out_dat_reg(s, COND_EQ, ARITH_CMP,
-                    0, 1, addr_reg2, SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
+                    TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
 #  endif
-    tcg_out_ld32_12(s, COND_EQ, 1, 0,
+    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
                     offsetof(CPUState, tlb_table[0][0].addend));
 
     switch (opc) {
     case 0:
-        tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, 1);
+        tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
         break;
     case 0 | 4:
-        tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, 1);
+        tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
         break;
     case 1:
-        tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, 1);
+        tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+        if (bswap) {
+            tcg_out_bswap16(s, COND_EQ, data_reg, data_reg);
+        }
         break;
     case 1 | 4:
-        tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, 1);
+        if (bswap) {
+            tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+            tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg);
+        } else {
+            tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+        }
         break;
     case 2:
     default:
-        tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, 1);
+        tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+        if (bswap) {
+            tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
+        }
         break;
     case 3:
-        tcg_out_ld32_rwb(s, COND_EQ, data_reg, 1, addr_reg);
-        tcg_out_ld32_12(s, COND_EQ, data_reg2, 1, 4);
+        if (bswap) {
+            tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg);
+            tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4);
+            tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2);
+            tcg_out_bswap32(s, COND_EQ, data_reg, data_reg);
+        } else {
+            tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
+            tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
+        }
         break;
     }
 
     label_ptr = (void *) s->code_ptr;
-    tcg_out_b(s, COND_EQ, 8);
-
-# ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 8, 0, 14, SHIFT_IMM_LSL(0));
-# endif
+    tcg_out_b_noaddr(s, COND_EQ);
 
     /* TODO: move this code to where the constants pool will be */
-    if (addr_reg)
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        0, 0, addr_reg, SHIFT_IMM_LSL(0));
+    if (addr_reg != TCG_REG_R0) {
+        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                        TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0));
+    }
 # if TARGET_LONG_BITS == 32
-    tcg_out_dat_imm(s, cond, ARITH_MOV, 1, 0, mem_index);
+    tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R1, 0, mem_index);
 # else
-    if (addr_reg2 != 1)
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        1, 0, addr_reg2, SHIFT_IMM_LSL(0));
-    tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index);
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
+    tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
-    tcg_out_bl(s, cond, (tcg_target_long) qemu_ld_helpers[s_bits] -
-                    (tcg_target_long) s->code_ptr);
+    tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
 
     switch (opc) {
     case 0 | 4:
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        0, 0, 0, SHIFT_IMM_LSL(24));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        data_reg, 0, 0, SHIFT_IMM_ASR(24));
+        tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
         break;
     case 1 | 4:
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        0, 0, 0, SHIFT_IMM_LSL(16));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        data_reg, 0, 0, SHIFT_IMM_ASR(16));
+        tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
         break;
     case 0:
     case 1:
     case 2:
     default:
-        if (data_reg)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            data_reg, 0, 0, SHIFT_IMM_LSL(0));
+        if (data_reg != TCG_REG_R0) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
+        }
         break;
     case 3:
-        if (data_reg != 0)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            data_reg, 0, 0, SHIFT_IMM_LSL(0));
-        if (data_reg2 != 1)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            data_reg2, 0, 1, SHIFT_IMM_LSL(0));
+        if (data_reg != TCG_REG_R0) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
+        }
+        if (data_reg2 != TCG_REG_R1) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
+        }
         break;
     }
 
-# ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, 8, SHIFT_IMM_LSL(0));
-# endif
-
-    *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2;
+    reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (GUEST_BASE) {
         uint32_t offset = GUEST_BASE;
@@ -1051,9 +1123,9 @@
             i = ctz32(offset) & ~1;
             rot = ((32 - i) << 7) & 0xf00;
 
-            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 8, addr_reg,
+            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg,
                             ((offset >> i) & 0xff) | rot);
-            addr_reg = 8;
+            addr_reg = TCG_REG_R8;
             offset &= ~(0xff << i);
         }
     }
@@ -1066,33 +1138,47 @@
         break;
     case 1:
         tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
+        if (bswap) {
+            tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
+        }
         break;
     case 1 | 4:
-        tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
+        if (bswap) {
+            tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
+            tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
+        } else {
+            tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
+        }
         break;
     case 2:
     default:
         tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
+        if (bswap) {
+            tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
+        }
         break;
     case 3:
         /* TODO: use block load -
          * check that data_reg2 > data_reg or the other way */
         if (data_reg == addr_reg) {
-            tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, 4);
-            tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
+            tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
+            tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
         } else {
-            tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
-            tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, 4);
+            tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
+            tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
+        }
+        if (bswap) {
+            tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
+            tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
         }
         break;
     }
 #endif
 }
 
-static inline void tcg_out_qemu_st(TCGContext *s, int cond,
-                const TCGArg *args, int opc)
+static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
-    int addr_reg, data_reg, data_reg2;
+    int addr_reg, data_reg, data_reg2, bswap;
 #ifdef CONFIG_SOFTMMU
     int mem_index, s_bits;
 # if TARGET_LONG_BITS == 64
@@ -1101,6 +1187,11 @@
     uint32_t *label_ptr;
 #endif
 
+#ifdef TARGET_WORDS_BIGENDIAN
+    bswap = 1;
+#else
+    bswap = 0;
+#endif
     data_reg = *args++;
     if (opc == 3)
         data_reg2 = *args++;
@@ -1120,11 +1211,11 @@
      *  add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS
      */
     tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                    8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+                    TCG_REG_R8, 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
     tcg_out_dat_imm(s, COND_AL, ARITH_AND,
-                    0, 8, CPU_TLB_SIZE - 1);
-    tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
-                    0, TCG_AREG0, 0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
+                    TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
+    tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
+                    TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
     /* In the
      *  ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_write))]
      * below, the offset is likely to exceed 12 bits if mem_index != 0 and
@@ -1133,13 +1224,13 @@
      * before.
      */
     if (mem_index)
-        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 0, 0,
+        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
                         (mem_index << (TLB_SHIFT & 1)) |
                         ((16 - (TLB_SHIFT >> 1)) << 8));
-    tcg_out_ld32_12(s, COND_AL, 1, 0,
+    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
                     offsetof(CPUState, tlb_table[0][0].addr_write));
-    tcg_out_dat_reg(s, COND_AL, ARITH_CMP,
-                    0, 1, 8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+    tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
+                    TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
     /* Check alignment.  */
     if (s_bits)
         tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
@@ -1147,125 +1238,121 @@
 #  if TARGET_LONG_BITS == 64
     /* XXX: possibly we could use a block data load or writeback in
      * the first access.  */
-    tcg_out_ld32_12(s, COND_EQ, 1, 0,
-                    offsetof(CPUState, tlb_table[0][0].addr_write)
-                    + 4);
-    tcg_out_dat_reg(s, COND_EQ, ARITH_CMP,
-                    0, 1, addr_reg2, SHIFT_IMM_LSL(0));
+    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
+                    offsetof(CPUState, tlb_table[0][0].addr_write) + 4);
+    tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
+                    TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
 #  endif
-    tcg_out_ld32_12(s, COND_EQ, 1, 0,
+    tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
                     offsetof(CPUState, tlb_table[0][0].addend));
 
     switch (opc) {
     case 0:
-        tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, 1);
-        break;
-    case 0 | 4:
-        tcg_out_st8s_r(s, COND_EQ, data_reg, addr_reg, 1);
+        tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
         break;
     case 1:
-        tcg_out_st16u_r(s, COND_EQ, data_reg, addr_reg, 1);
-        break;
-    case 1 | 4:
-        tcg_out_st16s_r(s, COND_EQ, data_reg, addr_reg, 1);
+        if (bswap) {
+            tcg_out_bswap16(s, COND_EQ, TCG_REG_R0, data_reg);
+            tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
+        } else {
+            tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+        }
         break;
     case 2:
     default:
-        tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, 1);
+        if (bswap) {
+            tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
+            tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
+        } else {
+            tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
+        }
         break;
     case 3:
-        tcg_out_st32_rwb(s, COND_EQ, data_reg, 1, addr_reg);
-        tcg_out_st32_12(s, COND_EQ, data_reg2, 1, 4);
+        if (bswap) {
+            tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2);
+            tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg);
+            tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
+            tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4);
+        } else {
+            tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg);
+            tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4);
+        }
         break;
     }
 
     label_ptr = (void *) s->code_ptr;
-    tcg_out_b(s, COND_EQ, 8);
+    tcg_out_b_noaddr(s, COND_EQ);
 
     /* TODO: move this code to where the constants pool will be */
-    if (addr_reg)
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        0, 0, addr_reg, SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0));
 # if TARGET_LONG_BITS == 32
     switch (opc) {
     case 0:
-        tcg_out_dat_imm(s, cond, ARITH_AND, 1, data_reg, 0xff);
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index);
+        tcg_out_ext8u(s, COND_AL, TCG_REG_R1, data_reg);
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
         break;
     case 1:
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        1, 0, data_reg, SHIFT_IMM_LSL(16));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        1, 0, 1, SHIFT_IMM_LSR(16));
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index);
+        tcg_out_ext16u(s, COND_AL, TCG_REG_R1, data_reg);
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
         break;
     case 2:
-        if (data_reg != 1)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            1, 0, data_reg, SHIFT_IMM_LSL(0));
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 2, 0, mem_index);
+        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                        TCG_REG_R1, 0, data_reg, SHIFT_IMM_LSL(0));
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
         break;
     case 3:
-        if (data_reg != 1)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            1, 0, data_reg, SHIFT_IMM_LSL(0));
-        if (data_reg2 != 2)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            2, 0, data_reg2, SHIFT_IMM_LSL(0));
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index);
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R8, 0, mem_index);
+        tcg_out32(s, (COND_AL << 28) | 0x052d8010); /* str r8, [sp, #-0x10]! */
+        if (data_reg != TCG_REG_R2) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            TCG_REG_R2, 0, data_reg, SHIFT_IMM_LSL(0));
+        }
+        if (data_reg2 != TCG_REG_R3) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            TCG_REG_R3, 0, data_reg2, SHIFT_IMM_LSL(0));
+        }
         break;
     }
 # else
-    if (addr_reg2 != 1)
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        1, 0, addr_reg2, SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     switch (opc) {
     case 0:
-        tcg_out_dat_imm(s, cond, ARITH_AND, 2, data_reg, 0xff);
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index);
+        tcg_out_ext8u(s, COND_AL, TCG_REG_R2, data_reg);
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R3, 0, mem_index);
         break;
     case 1:
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        2, 0, data_reg, SHIFT_IMM_LSL(16));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        2, 0, 2, SHIFT_IMM_LSR(16));
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index);
+        tcg_out_ext16u(s, COND_AL, TCG_REG_R2, data_reg);
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R3, 0, mem_index);
         break;
     case 2:
-        if (data_reg != 2)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            2, 0, data_reg, SHIFT_IMM_LSL(0));
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 3, 0, mem_index);
+        if (data_reg != TCG_REG_R2) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            TCG_REG_R2, 0, data_reg, SHIFT_IMM_LSL(0));
+        }
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R3, 0, mem_index);
         break;
     case 3:
-        tcg_out_dat_imm(s, cond, ARITH_MOV, 8, 0, mem_index);
-        tcg_out32(s, (cond << 28) | 0x052d8010); /* str r8, [sp, #-0x10]! */
-        if (data_reg != 2)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            2, 0, data_reg, SHIFT_IMM_LSL(0));
-        if (data_reg2 != 3)
-            tcg_out_dat_reg(s, cond, ARITH_MOV,
-                            3, 0, data_reg2, SHIFT_IMM_LSL(0));
+        tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R8, 0, mem_index);
+        tcg_out32(s, (COND_AL << 28) | 0x052d8010); /* str r8, [sp, #-0x10]! */
+        if (data_reg != TCG_REG_R2) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            TCG_REG_R2, 0, data_reg, SHIFT_IMM_LSL(0));
+        }
+        if (data_reg2 != TCG_REG_R3) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                            TCG_REG_R3, 0, data_reg2, SHIFT_IMM_LSL(0));
+        }
         break;
     }
 # endif
 
-# ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 8, 0, 14, SHIFT_IMM_LSL(0));
-# endif
-
-    tcg_out_bl(s, cond, (tcg_target_long) qemu_st_helpers[s_bits] -
-                    (tcg_target_long) s->code_ptr);
-# if TARGET_LONG_BITS == 64
+    tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
-        tcg_out_dat_imm(s, cond, ARITH_ADD, 13, 13, 0x10);
-# endif
+        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
 
-# ifdef SAVE_LR
-    tcg_out_dat_reg(s, cond, ARITH_MOV, 14, 0, 8, SHIFT_IMM_LSL(0));
-# endif
-
-    *label_ptr += ((void *) s->code_ptr - (void *) label_ptr - 8) >> 2;
+    reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (GUEST_BASE) {
         uint32_t offset = GUEST_BASE;
@@ -1276,9 +1363,9 @@
             i = ctz32(offset) & ~1;
             rot = ((32 - i) << 7) & 0xf00;
 
-            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, 8, addr_reg,
+            tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addr_reg,
                             ((offset >> i) & 0xff) | rot);
-            addr_reg = 8;
+            addr_reg = TCG_REG_R1;
             offset &= ~(0xff << i);
         }
     }
@@ -1286,24 +1373,35 @@
     case 0:
         tcg_out_st8_12(s, COND_AL, data_reg, addr_reg, 0);
         break;
-    case 0 | 4:
-        tcg_out_st8s_8(s, COND_AL, data_reg, addr_reg, 0);
-        break;
     case 1:
-        tcg_out_st16u_8(s, COND_AL, data_reg, addr_reg, 0);
-        break;
-    case 1 | 4:
-        tcg_out_st16s_8(s, COND_AL, data_reg, addr_reg, 0);
+        if (bswap) {
+            tcg_out_bswap16(s, COND_AL, TCG_REG_R0, data_reg);
+            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0);
+        } else {
+            tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0);
+        }
         break;
     case 2:
     default:
-        tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
+        if (bswap) {
+            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
+            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
+        } else {
+            tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
+        }
         break;
     case 3:
         /* TODO: use block store -
          * check that data_reg2 > data_reg or the other way */
-        tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
-        tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4);
+        if (bswap) {
+            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2);
+            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
+            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
+            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4);
+        } else {
+            tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
+            tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4);
+        }
         break;
     }
 #endif
@@ -1311,44 +1409,34 @@
 
 static uint8_t *tb_ret_addr;
 
-static inline void tcg_out_op(TCGContext *s, int opc,
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                 const TCGArg *args, const int *const_args)
 {
     int c;
 
     switch (opc) {
     case INDEX_op_exit_tb:
-#ifdef SAVE_LR
-        if (args[0] >> 8)
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, 15, 0);
-        else
-            tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]);
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 15, 0, 14, SHIFT_IMM_LSL(0));
-        if (args[0] >> 8)
-            tcg_out32(s, args[0]);
-#else
         {
             uint8_t *ld_ptr = s->code_ptr;
             if (args[0] >> 8)
-                tcg_out_ld32_12(s, COND_AL, 0, 15, 0);
+                tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
             else
-                tcg_out_dat_imm(s, COND_AL, ARITH_MOV, 0, 0, args[0]);
+                tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]);
             tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
             if (args[0] >> 8) {
                 *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8;
                 tcg_out32(s, args[0]);
             }
         }
-#endif
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* Direct jump method */
 #if defined(USE_DIRECT_JUMP)
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            tcg_out_b(s, COND_AL, 8);
+            tcg_out_b_noaddr(s, COND_AL);
 #else
-            tcg_out_ld32_12(s, COND_AL, 15, 15, -4);
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
             tcg_out32(s, 0);
 #endif
@@ -1359,12 +1447,12 @@
             if (c > 0xfff || c < -0xfff) {
                 tcg_out_movi32(s, COND_AL, TCG_REG_R0,
                                 (tcg_target_long) (s->tb_next + args[0]));
-                tcg_out_ld32_12(s, COND_AL, 15, TCG_REG_R0, 0);
+                tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
             } else
-                tcg_out_ld32_12(s, COND_AL, 15, 15, c);
+                tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c);
 #else
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, 15, 0);
-            tcg_out_ld32_12(s, COND_AL, 15, TCG_REG_R0, 0);
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
             tcg_out32(s, (tcg_target_long) (s->tb_next + args[0]));
 #endif
         }
@@ -1372,7 +1460,7 @@
         break;
     case INDEX_op_call:
         if (const_args[0])
-            tcg_out_call(s, COND_AL, args[0]);
+            tcg_out_call(s, args[0]);
         else
             tcg_out_callr(s, COND_AL, args[0]);
         break;
@@ -1402,10 +1490,10 @@
         tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
         break;
     case INDEX_op_st8_i32:
-        tcg_out_st8u(s, COND_AL, args[0], args[1], args[2]);
+        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
         break;
     case INDEX_op_st16_i32:
-        tcg_out_st16u(s, COND_AL, args[0], args[1], args[2]);
+        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i32:
         tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
@@ -1427,6 +1515,9 @@
     case INDEX_op_and_i32:
         c = ARITH_AND;
         goto gen_arith;
+    case INDEX_op_andc_i32:
+        c = ARITH_BIC;
+        goto gen_arith;
     case INDEX_op_or_i32:
         c = ARITH_ORR;
         goto gen_arith;
@@ -1466,16 +1557,6 @@
     case INDEX_op_mulu2_i32:
         tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
         break;
-    case INDEX_op_div2_i32:
-        tcg_out_div_helper(s, COND_AL, args,
-                        tcg_helper_div_i64, tcg_helper_rem_i64,
-                        SHIFT_IMM_ASR(31));
-        break;
-    case INDEX_op_divu2_i32:
-        tcg_out_div_helper(s, COND_AL, args,
-                        tcg_helper_divu_i64, tcg_helper_remu_i64,
-                        SHIFT_IMM_LSR(31));
-        break;
     /* XXX: Perhaps args[2] & 0x1f is wrong */
     case INDEX_op_shl_i32:
         c = const_args[2] ?
@@ -1488,14 +1569,38 @@
     case INDEX_op_sar_i32:
         c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
                 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
+        goto gen_shift32;
+    case INDEX_op_rotr_i32:
+        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
+                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
         /* Fall through.  */
     gen_shift32:
         tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
         break;
 
+    case INDEX_op_rotl_i32:
+        if (const_args[2]) {
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+                            ((0x20 - args[2]) & 0x1f) ?
+                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
+                            SHIFT_IMM_LSL(0));
+        } else {
+            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_R8, args[1], 0x20);
+            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+                            SHIFT_REG_ROR(TCG_REG_R8));
+        }
+        break;
+
     case INDEX_op_brcond_i32:
-        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
-                        args[0], args[1], SHIFT_IMM_LSL(0));
+        if (const_args[1]) {
+            int rot;
+            rot = encode_imm(args[1]);
+            tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
+                            args[0], rotl(args[1], rot) | (rot << 7));
+        } else {
+            tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
+                            args[0], args[1], SHIFT_IMM_LSL(0));
+        }
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
         break;
     case INDEX_op_brcond2_i32:
@@ -1513,60 +1618,80 @@
                         args[0], args[2], SHIFT_IMM_LSL(0));
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
         break;
+    case INDEX_op_setcond_i32:
+        if (const_args[2]) {
+            int rot;
+            rot = encode_imm(args[2]);
+            tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
+                            args[1], rotl(args[2], rot) | (rot << 7));
+        } else {
+            tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
+                            args[1], args[2], SHIFT_IMM_LSL(0));
+        }
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
+                        ARITH_MOV, args[0], 0, 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
+                        ARITH_MOV, args[0], 0, 0);
+        break;
+    case INDEX_op_setcond2_i32:
+        /* See brcond2_i32 comment */
+        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
+                        args[2], args[4], SHIFT_IMM_LSL(0));
+        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
+                        args[1], args[3], SHIFT_IMM_LSL(0));
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
+                        ARITH_MOV, args[0], 0, 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
+                        ARITH_MOV, args[0], 0, 0);
+        break;
 
     case INDEX_op_qemu_ld8u:
-        tcg_out_qemu_ld(s, COND_AL, args, 0);
+        tcg_out_qemu_ld(s, args, 0);
         break;
     case INDEX_op_qemu_ld8s:
-        tcg_out_qemu_ld(s, COND_AL, args, 0 | 4);
+        tcg_out_qemu_ld(s, args, 0 | 4);
         break;
     case INDEX_op_qemu_ld16u:
-        tcg_out_qemu_ld(s, COND_AL, args, 1);
+        tcg_out_qemu_ld(s, args, 1);
         break;
     case INDEX_op_qemu_ld16s:
-        tcg_out_qemu_ld(s, COND_AL, args, 1 | 4);
+        tcg_out_qemu_ld(s, args, 1 | 4);
         break;
-    case INDEX_op_qemu_ld32u:
-        tcg_out_qemu_ld(s, COND_AL, args, 2);
+    case INDEX_op_qemu_ld32:
+        tcg_out_qemu_ld(s, args, 2);
         break;
     case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, COND_AL, args, 3);
+        tcg_out_qemu_ld(s, args, 3);
         break;
 
     case INDEX_op_qemu_st8:
-        tcg_out_qemu_st(s, COND_AL, args, 0);
+        tcg_out_qemu_st(s, args, 0);
         break;
     case INDEX_op_qemu_st16:
-        tcg_out_qemu_st(s, COND_AL, args, 1);
+        tcg_out_qemu_st(s, args, 1);
         break;
     case INDEX_op_qemu_st32:
-        tcg_out_qemu_st(s, COND_AL, args, 2);
+        tcg_out_qemu_st(s, args, 2);
         break;
     case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, COND_AL, args, 3);
+        tcg_out_qemu_st(s, args, 3);
+        break;
+
+    case INDEX_op_bswap16_i32:
+        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
+        break;
+    case INDEX_op_bswap32_i32:
+        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
         break;
 
     case INDEX_op_ext8s_i32:
-#ifdef __ARM_ARCH_7A__
-        /* sxtb */
-        tcg_out32(s, 0xe6af0070 | (args[0] << 12) | args[1]);
-#else
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                        args[0], 0, args[1], SHIFT_IMM_LSL(24));
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                        args[0], 0, args[0], SHIFT_IMM_ASR(24));
-#endif
+        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
         break;
     case INDEX_op_ext16s_i32:
-#ifdef __ARM_ARCH_7A__
-        /* sxth */
-        tcg_out32(s, 0xe6bf0070 | (args[0] << 12) | args[1]);
-#else
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                        args[0], 0, args[1], SHIFT_IMM_LSL(16));
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                        args[0], 0, args[0], SHIFT_IMM_ASR(16));
-#endif
+        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
+        break;
+    case INDEX_op_ext16u_i32:
+        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
         break;
 
     default:
@@ -1598,9 +1723,8 @@
     { INDEX_op_sub_i32, { "r", "r", "rI" } },
     { INDEX_op_mul_i32, { "r", "r", "r" } },
     { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
-    { INDEX_op_div2_i32, { "r", "r", "r", "1", "2" } },
-    { INDEX_op_divu2_i32, { "r", "r", "r", "1", "2" } },
     { INDEX_op_and_i32, { "r", "r", "rI" } },
+    { INDEX_op_andc_i32, { "r", "r", "rI" } },
     { INDEX_op_or_i32, { "r", "r", "rI" } },
     { INDEX_op_xor_i32, { "r", "r", "rI" } },
     { INDEX_op_neg_i32, { "r", "r" } },
@@ -1609,50 +1733,75 @@
     { INDEX_op_shl_i32, { "r", "r", "ri" } },
     { INDEX_op_shr_i32, { "r", "r", "ri" } },
     { INDEX_op_sar_i32, { "r", "r", "ri" } },
+    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
 
-    { INDEX_op_brcond_i32, { "r", "r" } },
+    { INDEX_op_brcond_i32, { "r", "rI" } },
+    { INDEX_op_setcond_i32, { "r", "r", "rI" } },
 
     /* TODO: "r", "r", "r", "r", "ri", "ri" */
     { INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
     { INDEX_op_sub2_i32, { "r", "r", "r", "r", "r", "r" } },
     { INDEX_op_brcond2_i32, { "r", "r", "r", "r" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "r", "r" } },
 
-    { INDEX_op_qemu_ld8u, { "r", "x", "X" } },
-    { INDEX_op_qemu_ld8s, { "r", "x", "X" } },
-    { INDEX_op_qemu_ld16u, { "r", "x", "X" } },
-    { INDEX_op_qemu_ld16s, { "r", "x", "X" } },
-    { INDEX_op_qemu_ld32u, { "r", "x", "X" } },
-    { INDEX_op_qemu_ld64, { "d", "r", "x", "X" } },
+#if TARGET_LONG_BITS == 32
+    { INDEX_op_qemu_ld8u, { "r", "l" } },
+    { INDEX_op_qemu_ld8s, { "r", "l" } },
+    { INDEX_op_qemu_ld16u, { "r", "l" } },
+    { INDEX_op_qemu_ld16s, { "r", "l" } },
+    { INDEX_op_qemu_ld32, { "r", "l" } },
+    { INDEX_op_qemu_ld64, { "L", "L", "l" } },
 
-    { INDEX_op_qemu_st8, { "x", "x", "X" } },
-    { INDEX_op_qemu_st16, { "x", "x", "X" } },
-    { INDEX_op_qemu_st32, { "x", "x", "X" } },
-    { INDEX_op_qemu_st64, { "x", "D", "x", "X" } },
+    { INDEX_op_qemu_st8, { "s", "s" } },
+    { INDEX_op_qemu_st16, { "s", "s" } },
+    { INDEX_op_qemu_st32, { "s", "s" } },
+    { INDEX_op_qemu_st64, { "S", "S", "s" } },
+#else
+    { INDEX_op_qemu_ld8u, { "r", "l", "l" } },
+    { INDEX_op_qemu_ld8s, { "r", "l", "l" } },
+    { INDEX_op_qemu_ld16u, { "r", "l", "l" } },
+    { INDEX_op_qemu_ld16s, { "r", "l", "l" } },
+    { INDEX_op_qemu_ld32, { "r", "l", "l" } },
+    { INDEX_op_qemu_ld64, { "L", "L", "l", "l" } },
+
+    { INDEX_op_qemu_st8, { "s", "s", "s" } },
+    { INDEX_op_qemu_st16, { "s", "s", "s" } },
+    { INDEX_op_qemu_st32, { "s", "s", "s" } },
+    { INDEX_op_qemu_st64, { "S", "S", "s", "s" } },
+#endif
+
+    { INDEX_op_bswap16_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "r", "r" } },
 
     { INDEX_op_ext8s_i32, { "r", "r" } },
     { INDEX_op_ext16s_i32, { "r", "r" } },
+    { INDEX_op_ext16u_i32, { "r", "r" } },
 
     { -1 },
 };
 
-void tcg_target_init(TCGContext *s)
+static void tcg_target_init(TCGContext *s)
 {
+#if !defined(CONFIG_USER_ONLY)
     /* fail safe */
     if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
         tcg_abort();
+#endif
 
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0,
-                    ((2 << TCG_REG_R14) - 1) & ~(1 << TCG_REG_R8));
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
     tcg_regset_set32(tcg_target_call_clobber_regs, 0,
-                    ((2 << TCG_REG_R3) - 1) |
-                    (1 << TCG_REG_R12) | (1 << TCG_REG_R14));
+                     (1 << TCG_REG_R0) |
+                     (1 << TCG_REG_R1) |
+                     (1 << TCG_REG_R2) |
+                     (1 << TCG_REG_R3) |
+                     (1 << TCG_REG_R12) |
+                     (1 << TCG_REG_R14));
 
     tcg_regset_clear(s->reserved_regs);
-#ifdef SAVE_LR
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R14);
-#endif
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R8);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
 
     tcg_add_target_add_op_defs(arm_op_defs);
 }
@@ -1684,7 +1833,7 @@
     }
 }
 
-static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
+static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
 {
     tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
 }
@@ -1695,14 +1844,17 @@
     tcg_out_movi32(s, COND_AL, ret, arg);
 }
 
-void tcg_target_qemu_prologue(TCGContext *s)
+static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    /* stmdb sp!, { r9 - r11, lr } */
-    tcg_out32(s, (COND_AL << 28) | 0x092d4e00);
+    /* There is no need to save r7, it is used to store the address
+       of the env structure and is not modified by GCC. */
+
+    /* stmdb sp!, { r4 - r6, r8 - r11, lr } */
+    tcg_out32(s, (COND_AL << 28) | 0x092d4f70);
 
     tcg_out_bx(s, COND_AL, TCG_REG_R0);
     tb_ret_addr = s->code_ptr;
 
-    /* ldmia sp!, { r9 - r11, pc } */
-    tcg_out32(s, (COND_AL << 28) | 0x08bd8e00);
+    /* ldmia sp!, { r4 - r6, r8 - r11, pc } */
+    tcg_out32(s, (COND_AL << 28) | 0x08bd8f70);
 }
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 5eac7bf..d8d7d94 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -26,14 +26,6 @@
 
 #define TCG_TARGET_REG_BITS 32
 #undef TCG_TARGET_WORDS_BIGENDIAN
-#undef TCG_TARGET_HAS_div_i32
-#undef TCG_TARGET_HAS_div_i64
-#undef TCG_TARGET_HAS_bswap32_i32
-#define TCG_TARGET_HAS_ext8s_i32
-#define TCG_TARGET_HAS_ext16s_i32
-#define TCG_TARGET_HAS_neg_i32
-#undef TCG_TARGET_HAS_neg_i64
-#define TCG_TARGET_HAS_not_i32
 #undef TCG_TARGET_STACK_GROWSUP
 
 enum {
@@ -52,38 +44,40 @@
     TCG_REG_R12,
     TCG_REG_R13,
     TCG_REG_R14,
+    TCG_REG_PC,
 };
 
-#define TCG_TARGET_NB_REGS 15
+#define TCG_TARGET_NB_REGS 16
 
 #define TCG_CT_CONST_ARM 0x100
 
 /* used for function call generation */
 #define TCG_REG_CALL_STACK		TCG_REG_R13
 #define TCG_TARGET_STACK_ALIGN		8
+#define TCG_TARGET_CALL_ALIGN_ARGS	1
 #define TCG_TARGET_CALL_STACK_OFFSET	0
 
 /* optional instructions */
-// #define TCG_TARGET_HAS_div_i32
-// #define TCG_TARGET_HAS_rot_i32
-// #define TCG_TARGET_HAS_ext8s_i32
-// #define TCG_TARGET_HAS_ext16s_i32
-// #define TCG_TARGET_HAS_ext8u_i32
-// #define TCG_TARGET_HAS_ext16u_i32
-// #define TCG_TARGET_HAS_bswap16_i32
-// #define TCG_TARGET_HAS_bswap32_i32
-// #define TCG_TARGET_HAS_not_i32
-// #define TCG_TARGET_HAS_neg_i32
-// #define TCG_TARGET_HAS_andc_i32
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#undef TCG_TARGET_HAS_ext8u_i32       /* and r0, r1, #0xff */
+#define TCG_TARGET_HAS_ext16u_i32
+#define TCG_TARGET_HAS_bswap16_i32
+#define TCG_TARGET_HAS_bswap32_i32
+#define TCG_TARGET_HAS_not_i32
+#define TCG_TARGET_HAS_neg_i32
+#define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_andc_i32
 // #define TCG_TARGET_HAS_orc_i32
+// #define TCG_TARGET_HAS_eqv_i32
+// #define TCG_TARGET_HAS_nand_i32
+// #define TCG_TARGET_HAS_nor_i32
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
 enum {
     /* Note: must be synced with dyngen-exec.h */
     TCG_AREG0 = TCG_REG_R7,
-    TCG_AREG1 = TCG_REG_R4,
-    TCG_AREG2 = TCG_REG_R5,
 };
 
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 4677971..7f4653e 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -24,41 +24,22 @@
 
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%r0",
-    "%r1",
-    "%rp",
-    "%r3",
-    "%r4",
-    "%r5",
-    "%r6",
-    "%r7",
-    "%r8",
-    "%r9",
-    "%r10",
-    "%r11",
-    "%r12",
-    "%r13",
-    "%r14",
-    "%r15",
-    "%r16",
-    "%r17",
-    "%r18",
-    "%r19",
-    "%r20",
-    "%r21",
-    "%r22",
-    "%r23",
-    "%r24",
-    "%r25",
-    "%r26",
-    "%dp",
-    "%ret0",
-    "%ret1",
-    "%sp",
-    "%r31",
+    "%r0", "%r1", "%rp", "%r3", "%r4", "%r5", "%r6", "%r7",
+    "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+    "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
+    "%r24", "%r25", "%r26", "%dp", "%ret0", "%ret1", "%sp", "%r31",
 };
 #endif
 
+/* This is an 8 byte temp slot in the stack frame.  */
+#define STACK_TEMP_OFS -16
+
+#ifdef CONFIG_USE_GUEST_BASE
+#define TCG_GUEST_BASE_REG TCG_REG_R16
+#else
+#define TCG_GUEST_BASE_REG TCG_REG_R0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R4,
     TCG_REG_R5,
@@ -75,6 +56,14 @@
     TCG_REG_R14,
     TCG_REG_R15,
     TCG_REG_R16,
+
+    TCG_REG_R26,
+    TCG_REG_R25,
+    TCG_REG_R24,
+    TCG_REG_R23,
+
+    TCG_REG_RET0,
+    TCG_REG_RET1,
 };
 
 static const int tcg_target_call_iarg_regs[4] = {
@@ -89,16 +78,101 @@
     TCG_REG_RET1,
 };
 
+/* True iff val fits a signed field of width BITS.  */
+static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
+{
+    return (val << ((sizeof(tcg_target_long) * 8 - bits))
+            >> (sizeof(tcg_target_long) * 8 - bits)) == val;
+}
+
+/* True iff depi can be used to compute (reg | MASK).
+   Accept a bit pattern like:
+      0....01....1
+      1....10....0
+      0..01..10..0
+   Copied from gcc sources.  */
+static inline int or_mask_p(tcg_target_ulong mask)
+{
+    if (mask == 0 || mask == -1) {
+        return 0;
+    }
+    mask += mask & -mask;
+    return (mask & (mask - 1)) == 0;
+}
+
+/* True iff depi or extru can be used to compute (reg & mask).
+   Accept a bit pattern like these:
+      0....01....1
+      1....10....0
+      1..10..01..1
+   Copied from gcc sources.  */
+static inline int and_mask_p(tcg_target_ulong mask)
+{
+    return or_mask_p(~mask);
+}
+
+static int low_sign_ext(int val, int len)
+{
+    return (((val << 1) & ~(-1u << len)) | ((val >> (len - 1)) & 1));
+}
+
+static int reassemble_12(int as12)
+{
+    return (((as12 & 0x800) >> 11) |
+            ((as12 & 0x400) >> 8) |
+            ((as12 & 0x3ff) << 3));
+}
+
+static int reassemble_17(int as17)
+{
+    return (((as17 & 0x10000) >> 16) |
+            ((as17 & 0x0f800) << 5) |
+            ((as17 & 0x00400) >> 8) |
+            ((as17 & 0x003ff) << 3));
+}
+
+static int reassemble_21(int as21)
+{
+    return (((as21 & 0x100000) >> 20) |
+            ((as21 & 0x0ffe00) >> 8) |
+            ((as21 & 0x000180) << 7) |
+            ((as21 & 0x00007c) << 14) |
+            ((as21 & 0x000003) << 12));
+}
+
+/* ??? Bizzarely, there is no PCREL12F relocation type.  I guess all
+   such relocations are simply fully handled by the assembler.  */
+#define R_PARISC_PCREL12F  R_PARISC_NONE
+
 static void patch_reloc(uint8_t *code_ptr, int type,
                         tcg_target_long value, tcg_target_long addend)
 {
+    uint32_t *insn_ptr = (uint32_t *)code_ptr;
+    uint32_t insn = *insn_ptr;
+    tcg_target_long pcrel;
+
+    value += addend;
+    pcrel = (value - ((tcg_target_long)code_ptr + 8)) >> 2;
+
     switch (type) {
+    case R_PARISC_PCREL12F:
+        assert(check_fit_tl(pcrel, 12));
+        /* ??? We assume all patches are forward.  See tcg_out_brcond
+           re setting the NUL bit on the branch and eliding the nop.  */
+        assert(pcrel >= 0);
+        insn &= ~0x1ffdu;
+        insn |= reassemble_12(pcrel);
+        break;
     case R_PARISC_PCREL17F:
-        hppa_patch17f((uint32_t *)code_ptr, value, addend);
+        assert(check_fit_tl(pcrel, 17));
+        insn &= ~0x1f1ffdu;
+        insn |= reassemble_17(pcrel);
         break;
     default:
         tcg_abort();
     }
+
+    *insn_ptr = insn;
 }
 
 /* maximum number of register used for input function arguments */
@@ -126,6 +200,24 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R24);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R23);
         break;
+    case 'Z':
+        ct->ct |= TCG_CT_CONST_0;
+        break;
+    case 'I':
+        ct->ct |= TCG_CT_CONST_S11;
+        break;
+    case 'J':
+        ct->ct |= TCG_CT_CONST_S5;
+	break;
+    case 'K':
+        ct->ct |= TCG_CT_CONST_MS11;
+        break;
+    case 'M':
+        ct->ct |= TCG_CT_CONST_AND;
+        break;
+    case 'O':
+        ct->ct |= TCG_CT_CONST_OR;
+        break;
     default:
         return -1;
     }
@@ -135,15 +227,25 @@
 }
 
 /* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
-                                         const TCGArgConstraint *arg_ct)
+static int tcg_target_const_match(tcg_target_long val,
+                                  const TCGArgConstraint *arg_ct)
 {
-    int ct;
-
-    ct = arg_ct->ct;
-
-    /* TODO */
-
+    int ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    } else if (ct & TCG_CT_CONST_0) {
+        return val == 0;
+    } else if (ct & TCG_CT_CONST_S5) {
+        return check_fit_tl(val, 5);
+    } else if (ct & TCG_CT_CONST_S11) {
+        return check_fit_tl(val, 11);
+    } else if (ct & TCG_CT_CONST_MS11) {
+        return check_fit_tl(-val, 11);
+    } else if (ct & TCG_CT_CONST_AND) {
+        return and_mask_p(val);
+    } else if (ct & TCG_CT_CONST_OR) {
+        return or_mask_p(val);
+    }
     return 0;
 }
 
@@ -163,126 +265,163 @@
 #define INSN_SHDEP_CP(x) ((31 - (x)) << 5)
 #define INSN_SHDEP_P(x)  ((x) << 5)
 #define INSN_COND(x)     ((x) << 13)
+#define INSN_IM11(x)     low_sign_ext(x, 11)
+#define INSN_IM14(x)     low_sign_ext(x, 14)
+#define INSN_IM5(x)      (low_sign_ext(x, 5) << 16)
 
-#define COND_NEVER 0
-#define COND_EQUAL 1
-#define COND_LT    2
-#define COND_LTEQ  3
-#define COND_LTU   4
-#define COND_LTUEQ 5
-#define COND_SV    6
-#define COND_OD    7
+#define COND_NEVER   0
+#define COND_EQ      1
+#define COND_LT      2
+#define COND_LE      3
+#define COND_LTU     4
+#define COND_LEU     5
+#define COND_SV      6
+#define COND_OD      7
+#define COND_FALSE   8
 
+#define INSN_ADD	(INSN_OP(0x02) | INSN_EXT6(0x18))
+#define INSN_ADDC	(INSN_OP(0x02) | INSN_EXT6(0x1c))
+#define INSN_ADDI	(INSN_OP(0x2d))
+#define INSN_ADDIL	(INSN_OP(0x0a))
+#define INSN_ADDL	(INSN_OP(0x02) | INSN_EXT6(0x28))
+#define INSN_AND	(INSN_OP(0x02) | INSN_EXT6(0x08))
+#define INSN_ANDCM	(INSN_OP(0x02) | INSN_EXT6(0x00))
+#define INSN_COMCLR	(INSN_OP(0x02) | INSN_EXT6(0x22))
+#define INSN_COMICLR	(INSN_OP(0x24))
+#define INSN_DEP	(INSN_OP(0x35) | INSN_EXT3SH(3))
+#define INSN_DEPI	(INSN_OP(0x35) | INSN_EXT3SH(7))
+#define INSN_EXTRS	(INSN_OP(0x34) | INSN_EXT3SH(7))
+#define INSN_EXTRU	(INSN_OP(0x34) | INSN_EXT3SH(6))
+#define INSN_LDIL	(INSN_OP(0x08))
+#define INSN_LDO	(INSN_OP(0x0d))
+#define INSN_MTCTL	(INSN_OP(0x00) | INSN_EXT8B(0xc2))
+#define INSN_OR		(INSN_OP(0x02) | INSN_EXT6(0x09))
+#define INSN_SHD	(INSN_OP(0x34) | INSN_EXT3SH(2))
+#define INSN_SUB	(INSN_OP(0x02) | INSN_EXT6(0x10))
+#define INSN_SUBB	(INSN_OP(0x02) | INSN_EXT6(0x14))
+#define INSN_SUBI	(INSN_OP(0x25))
+#define INSN_VEXTRS	(INSN_OP(0x34) | INSN_EXT3SH(5))
+#define INSN_VEXTRU	(INSN_OP(0x34) | INSN_EXT3SH(4))
+#define INSN_VSHD	(INSN_OP(0x34) | INSN_EXT3SH(0))
+#define INSN_XOR	(INSN_OP(0x02) | INSN_EXT6(0x0a))
+#define INSN_ZDEP	(INSN_OP(0x35) | INSN_EXT3SH(2))
+#define INSN_ZVDEP	(INSN_OP(0x35) | INSN_EXT3SH(0))
 
-/* Logical ADD */
-#define ARITH_ADD  (INSN_OP(0x02) | INSN_EXT6(0x28))
-#define ARITH_AND  (INSN_OP(0x02) | INSN_EXT6(0x08))
-#define ARITH_OR   (INSN_OP(0x02) | INSN_EXT6(0x09))
-#define ARITH_XOR  (INSN_OP(0x02) | INSN_EXT6(0x0a))
-#define ARITH_SUB  (INSN_OP(0x02) | INSN_EXT6(0x10))
+#define INSN_BL         (INSN_OP(0x3a) | INSN_EXT3BR(0))
+#define INSN_BL_N       (INSN_OP(0x3a) | INSN_EXT3BR(0) | 2)
+#define INSN_BLR        (INSN_OP(0x3a) | INSN_EXT3BR(2))
+#define INSN_BV         (INSN_OP(0x3a) | INSN_EXT3BR(6))
+#define INSN_BV_N       (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2)
+#define INSN_BLE_SR4    (INSN_OP(0x39) | (1 << 13))
 
-#define SHD        (INSN_OP(0x34) | INSN_EXT3SH(2))
-#define VSHD       (INSN_OP(0x34) | INSN_EXT3SH(0))
-#define DEP        (INSN_OP(0x35) | INSN_EXT3SH(3))
-#define ZDEP       (INSN_OP(0x35) | INSN_EXT3SH(2))
-#define ZVDEP      (INSN_OP(0x35) | INSN_EXT3SH(0))
-#define EXTRU      (INSN_OP(0x34) | INSN_EXT3SH(6))
-#define EXTRS      (INSN_OP(0x34) | INSN_EXT3SH(7))
-#define VEXTRS     (INSN_OP(0x34) | INSN_EXT3SH(5))
+#define INSN_LDB        (INSN_OP(0x10))
+#define INSN_LDH        (INSN_OP(0x11))
+#define INSN_LDW        (INSN_OP(0x12))
+#define INSN_LDWM       (INSN_OP(0x13))
+#define INSN_FLDDS      (INSN_OP(0x0b) | INSN_EXT4(0) | (1 << 12))
 
-#define SUBI       (INSN_OP(0x25))
-#define MTCTL      (INSN_OP(0x00) | INSN_EXT8B(0xc2))
+#define INSN_LDBX	(INSN_OP(0x03) | INSN_EXT4(0))
+#define INSN_LDHX	(INSN_OP(0x03) | INSN_EXT4(1))
+#define INSN_LDWX       (INSN_OP(0x03) | INSN_EXT4(2))
 
-#define BL         (INSN_OP(0x3a) | INSN_EXT3BR(0))
-#define BLE_SR4    (INSN_OP(0x39) | (1 << 13))
-#define BV         (INSN_OP(0x3a) | INSN_EXT3BR(6))
-#define BV_N       (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2)
-#define LDIL       (INSN_OP(0x08))
-#define LDO        (INSN_OP(0x0d))
+#define INSN_STB        (INSN_OP(0x18))
+#define INSN_STH        (INSN_OP(0x19))
+#define INSN_STW        (INSN_OP(0x1a))
+#define INSN_STWM       (INSN_OP(0x1b))
+#define INSN_FSTDS      (INSN_OP(0x0b) | INSN_EXT4(8) | (1 << 12))
 
-#define LDB        (INSN_OP(0x10))
-#define LDH        (INSN_OP(0x11))
-#define LDW        (INSN_OP(0x12))
-#define LDWM       (INSN_OP(0x13))
+#define INSN_COMBT      (INSN_OP(0x20))
+#define INSN_COMBF      (INSN_OP(0x22))
+#define INSN_COMIBT     (INSN_OP(0x21))
+#define INSN_COMIBF     (INSN_OP(0x23))
 
-#define STB        (INSN_OP(0x18))
-#define STH        (INSN_OP(0x19))
-#define STW        (INSN_OP(0x1a))
-#define STWM       (INSN_OP(0x1b))
+/* supplied by libgcc */
+extern void *__canonicalize_funcptr_for_compare(void *);
 
-#define COMBT      (INSN_OP(0x20))
-#define COMBF      (INSN_OP(0x22))
-
-static int lowsignext(uint32_t val, int start, int length)
+static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
 {
-    return (((val << 1) & ~(~0 << length)) |
-            ((val >> (length - 1)) & 1)) << start;
+    /* PA1.1 defines COPY as OR r,0,t; PA2.0 defines COPY as LDO 0(r),t
+       but hppa-dis.c is unaware of this definition */
+    if (ret != arg) {
+        tcg_out32(s, INSN_OR | INSN_T(ret) | INSN_R1(arg)
+                  | INSN_R2(TCG_REG_R0));
+    }
 }
 
-static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         int ret, tcg_target_long arg)
 {
-    /* PA1.1 defines COPY as OR r,0,t */
-    tcg_out32(s, ARITH_OR | INSN_T(ret) | INSN_R1(arg) | INSN_R2(TCG_REG_R0));
-
-    /* PA2.0 defines COPY as LDO 0(r),t
-     * but hppa-dis.c is unaware of this definition */
-    /* tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(arg) | reassemble_14(0)); */
-}
-
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                                int ret, tcg_target_long arg)
-{
-    if (arg == (arg & 0x1fff)) {
-        tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(TCG_REG_R0) |
-                     reassemble_14(arg));
+    if (check_fit_tl(arg, 14)) {
+        tcg_out32(s, INSN_LDO | INSN_R1(ret)
+                  | INSN_R2(TCG_REG_R0) | INSN_IM14(arg));
     } else {
-        tcg_out32(s, LDIL | INSN_R2(ret) |
-                     reassemble_21(lrsel((uint32_t)arg, 0)));
-        if (arg & 0x7ff)
-            tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(ret) |
-                         reassemble_14(rrsel((uint32_t)arg, 0)));
+        uint32_t hi, lo;
+        hi = arg >> 11;
+        lo = arg & 0x7ff;
+
+        tcg_out32(s, INSN_LDIL | INSN_R2(ret) | reassemble_21(hi));
+        if (lo) {
+            tcg_out32(s, INSN_LDO | INSN_R1(ret)
+                      | INSN_R2(ret) | INSN_IM14(lo));
+        }
     }
 }
 
-static inline void tcg_out_ld_raw(TCGContext *s, int ret,
-                                  tcg_target_long arg)
+static void tcg_out_ldst(TCGContext *s, int ret, int addr,
+                         tcg_target_long offset, int op)
 {
-    tcg_out32(s, LDIL | INSN_R2(ret) |
-                 reassemble_21(lrsel((uint32_t)arg, 0)));
-    tcg_out32(s, LDW | INSN_R1(ret) | INSN_R2(ret) |
-                 reassemble_14(rrsel((uint32_t)arg, 0)));
-}
+    if (!check_fit_tl(offset, 14)) {
+        uint32_t hi, lo, op;
 
-static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
-                                  tcg_target_long arg)
-{
-    tcg_out_ld_raw(s, ret, arg);
-}
+        hi = offset >> 11;
+        lo = offset & 0x7ff;
 
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset,
-                                int op)
-{
-    if (offset == (offset & 0xfff))
-        tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) |
-                 reassemble_14(offset));
-    else {
-        fprintf(stderr, "unimplemented %s with offset %d\n", __func__, offset);
-        tcg_abort();
+        if (addr == TCG_REG_R0) {
+            op = INSN_LDIL | INSN_R2(TCG_REG_R1);
+        } else {
+            op = INSN_ADDIL | INSN_R2(addr);
+        }
+        tcg_out32(s, op | reassemble_21(hi));
+
+        addr = TCG_REG_R1;
+	offset = lo;
+    }
+
+    if (ret != addr || offset != 0 || op != INSN_LDO) {
+        tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) | INSN_IM14(offset));
     }
 }
 
+/* This function is required by tcg.c.  */
 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
                               int arg1, tcg_target_long arg2)
 {
-    fprintf(stderr, "unimplemented %s\n", __func__);
-    tcg_abort();
+    tcg_out_ldst(s, ret, arg1, arg2, INSN_LDW);
 }
 
+/* This function is required by tcg.c.  */
 static inline void tcg_out_st(TCGContext *s, TCGType type, int ret,
                               int arg1, tcg_target_long arg2)
 {
-    fprintf(stderr, "unimplemented %s\n", __func__);
-    tcg_abort();
+    tcg_out_ldst(s, ret, arg1, arg2, INSN_STW);
+}
+
+static void tcg_out_ldst_index(TCGContext *s, int data,
+                               int base, int index, int op)
+{
+    tcg_out32(s, op | INSN_T(data) | INSN_R1(index) | INSN_R2(base));
+}
+
+static inline void tcg_out_addi2(TCGContext *s, int ret, int arg1,
+                                 tcg_target_long val)
+{
+    tcg_out_ldst(s, ret, arg1, val, INSN_LDO);
+}
+
+/* This function is required by tcg.c.  */
+static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+{
+    tcg_out_addi2(s, reg, reg, val);
 }
 
 static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int op)
@@ -293,61 +432,447 @@
 static inline void tcg_out_arithi(TCGContext *s, int t, int r1,
                                   tcg_target_long val, int op)
 {
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R20, val);
-    tcg_out_arith(s, t, r1, TCG_REG_R20, op);
-}
-
-static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
-{
-    tcg_out_arithi(s, reg, reg, val, ARITH_ADD);
+    assert(check_fit_tl(val, 11));
+    tcg_out32(s, op | INSN_R1(t) | INSN_R2(r1) | INSN_IM11(val));
 }
 
 static inline void tcg_out_nop(TCGContext *s)
 {
-    tcg_out32(s, ARITH_OR | INSN_T(TCG_REG_R0) | INSN_R1(TCG_REG_R0) |
-                 INSN_R2(TCG_REG_R0));
+    tcg_out_arith(s, TCG_REG_R0, TCG_REG_R0, TCG_REG_R0, INSN_OR);
 }
 
-static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) {
-    tcg_out32(s, EXTRS | INSN_R1(ret) | INSN_R2(arg) |
-                 INSN_SHDEP_P(31) | INSN_DEP_LEN(8));
-}
-
-static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) {
-    tcg_out32(s, EXTRS | INSN_R1(ret) | INSN_R2(arg) |
-                 INSN_SHDEP_P(31) | INSN_DEP_LEN(16));
-}
-
-static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg) {
-    if(ret != arg)
-        tcg_out_mov(s, ret, arg);
-    tcg_out32(s, DEP | INSN_R2(ret) | INSN_R1(ret) |
-                 INSN_SHDEP_CP(15) | INSN_DEP_LEN(8));
-    tcg_out32(s, SHD | INSN_T(ret) | INSN_R1(TCG_REG_R0) |
-                 INSN_R2(ret) | INSN_SHDEP_CP(8));
-}
-
-static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp) {
-    tcg_out32(s, SHD | INSN_T(temp) | INSN_R1(arg) |
-                 INSN_R2(arg) | INSN_SHDEP_CP(16));
-    tcg_out32(s, DEP | INSN_R2(temp) | INSN_R1(temp) |
-                 INSN_SHDEP_CP(15) | INSN_DEP_LEN(8));
-    tcg_out32(s, SHD | INSN_T(ret) | INSN_R1(arg) |
-                 INSN_R2(temp) | INSN_SHDEP_CP(8));
-}
-
-static inline void tcg_out_call(TCGContext *s, void *func)
+static inline void tcg_out_mtctl_sar(TCGContext *s, int arg)
 {
-    uint32_t val = (uint32_t)__canonicalize_funcptr_for_compare(func);
-    tcg_out32(s, LDIL | INSN_R2(TCG_REG_R20) |
-                 reassemble_21(lrsel(val, 0)));
-    tcg_out32(s, BLE_SR4 | INSN_R2(TCG_REG_R20) |
-                 reassemble_17(rrsel(val, 0) >> 2));
-    tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31);
+    tcg_out32(s, INSN_MTCTL | INSN_R2(11) | INSN_R1(arg));
+}
+
+/* Extract LEN bits at position OFS from ARG and place in RET.
+   Note that here the bit ordering is reversed from the PA-RISC
+   standard, such that the right-most bit is 0.  */
+static inline void tcg_out_extr(TCGContext *s, int ret, int arg,
+                                unsigned ofs, unsigned len, int sign)
+{
+    assert(ofs < 32 && len <= 32 - ofs);
+    tcg_out32(s, (sign ? INSN_EXTRS : INSN_EXTRU)
+              | INSN_R1(ret) | INSN_R2(arg)
+              | INSN_SHDEP_P(31 - ofs) | INSN_DEP_LEN(len));
+}
+
+/* Likewise with OFS interpreted little-endian.  */
+static inline void tcg_out_dep(TCGContext *s, int ret, int arg,
+                               unsigned ofs, unsigned len)
+{
+    assert(ofs < 32 && len <= 32 - ofs);
+    tcg_out32(s, INSN_DEP | INSN_R2(ret) | INSN_R1(arg)
+              | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
+}
+
+static inline void tcg_out_shd(TCGContext *s, int ret, int hi, int lo,
+                               unsigned count)
+{
+    assert(count < 32);
+    tcg_out32(s, INSN_SHD | INSN_R1(hi) | INSN_R2(lo) | INSN_T(ret)
+              | INSN_SHDEP_CP(count));
+}
+
+static void tcg_out_vshd(TCGContext *s, int ret, int hi, int lo, int creg)
+{
+    tcg_out_mtctl_sar(s, creg);
+    tcg_out32(s, INSN_VSHD | INSN_T(ret) | INSN_R1(hi) | INSN_R2(lo));
+}
+
+static void tcg_out_ori(TCGContext *s, int ret, int arg, tcg_target_ulong m)
+{
+    int bs0, bs1;
+
+    /* Note that the argument is constrained to match or_mask_p.  */
+    for (bs0 = 0; bs0 < 32; bs0++) {
+        if ((m & (1u << bs0)) != 0) {
+            break;
+        }
+    }
+    for (bs1 = bs0; bs1 < 32; bs1++) {
+        if ((m & (1u << bs1)) == 0) {
+            break;
+        }
+    }
+    assert(bs1 == 32 || (1ul << bs1) > m);
+
+    tcg_out_mov(s, TCG_TYPE_I32, ret, arg);
+    tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(-1)
+              | INSN_SHDEP_CP(31 - bs0) | INSN_DEP_LEN(bs1 - bs0));
+}
+
+static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m)
+{
+    int ls0, ls1, ms0;
+
+    /* Note that the argument is constrained to match and_mask_p.  */
+    for (ls0 = 0; ls0 < 32; ls0++) {
+        if ((m & (1u << ls0)) == 0) {
+            break;
+        }
+    }
+    for (ls1 = ls0; ls1 < 32; ls1++) {
+        if ((m & (1u << ls1)) != 0) {
+            break;
+        }
+    }
+    for (ms0 = ls1; ms0 < 32; ms0++) {
+        if ((m & (1u << ms0)) == 0) {
+            break;
+        }
+    }
+    assert (ms0 == 32);
+
+    if (ls1 == 32) {
+        tcg_out_extr(s, ret, arg, 0, ls0, 0);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_I32, ret, arg);
+        tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(0)
+                  | INSN_SHDEP_CP(31 - ls0) | INSN_DEP_LEN(ls1 - ls0));
+    }
+}
+
+static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
+{
+    tcg_out_extr(s, ret, arg, 0, 8, 1);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
+{
+    tcg_out_extr(s, ret, arg, 0, 16, 1);
+}
+
+static void tcg_out_shli(TCGContext *s, int ret, int arg, int count)
+{
+    count &= 31;
+    tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg)
+              | INSN_SHDEP_CP(31 - count) | INSN_DEP_LEN(32 - count));
+}
+
+static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg)
+{
+    tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI);
+    tcg_out_mtctl_sar(s, TCG_REG_R20);
+    tcg_out32(s, INSN_ZVDEP | INSN_R2(ret) | INSN_R1(arg) | INSN_DEP_LEN(32));
+}
+
+static void tcg_out_shri(TCGContext *s, int ret, int arg, int count)
+{
+    count &= 31;
+    tcg_out_extr(s, ret, arg, count, 32 - count, 0);
+}
+
+static void tcg_out_shr(TCGContext *s, int ret, int arg, int creg)
+{
+    tcg_out_vshd(s, ret, TCG_REG_R0, arg, creg);
+}
+
+static void tcg_out_sari(TCGContext *s, int ret, int arg, int count)
+{
+    count &= 31;
+    tcg_out_extr(s, ret, arg, count, 32 - count, 1);
+}
+
+static void tcg_out_sar(TCGContext *s, int ret, int arg, int creg)
+{
+    tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI);
+    tcg_out_mtctl_sar(s, TCG_REG_R20);
+    tcg_out32(s, INSN_VEXTRS | INSN_R1(ret) | INSN_R2(arg) | INSN_DEP_LEN(32));
+}
+
+static void tcg_out_rotli(TCGContext *s, int ret, int arg, int count)
+{
+    count &= 31;
+    tcg_out_shd(s, ret, arg, arg, 32 - count);
+}
+
+static void tcg_out_rotl(TCGContext *s, int ret, int arg, int creg)
+{
+    tcg_out_arithi(s, TCG_REG_R20, creg, 32, INSN_SUBI);
+    tcg_out_vshd(s, ret, arg, arg, TCG_REG_R20);
+}
+
+static void tcg_out_rotri(TCGContext *s, int ret, int arg, int count)
+{
+    count &= 31;
+    tcg_out_shd(s, ret, arg, arg, count);
+}
+
+static void tcg_out_rotr(TCGContext *s, int ret, int arg, int creg)
+{
+    tcg_out_vshd(s, ret, arg, arg, creg);
+}
+
+static void tcg_out_bswap16(TCGContext *s, int ret, int arg, int sign)
+{
+    if (ret != arg) {
+        tcg_out_mov(s, TCG_TYPE_I32, ret, arg); /* arg =  xxAB */
+    }
+    tcg_out_dep(s, ret, ret, 16, 8);          /* ret =  xBAB */
+    tcg_out_extr(s, ret, ret, 8, 16, sign);   /* ret =  ..BA */
+}
+
+static void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp)
+{
+                                          /* arg =  ABCD */
+    tcg_out_rotri(s, temp, arg, 16);      /* temp = CDAB */
+    tcg_out_dep(s, temp, temp, 16, 8);    /* temp = CBAB */
+    tcg_out_shd(s, ret, arg, temp, 8);    /* ret =  DCBA */
+}
+
+static void tcg_out_call(TCGContext *s, void *func)
+{
+    tcg_target_long val, hi, lo, disp;
+
+    val = (uint32_t)__canonicalize_funcptr_for_compare(func);
+    disp = (val - ((tcg_target_long)s->code_ptr + 8)) >> 2;
+
+    if (check_fit_tl(disp, 17)) {
+        tcg_out32(s, INSN_BL_N | INSN_R2(TCG_REG_RP) | reassemble_17(disp));
+    } else {
+        hi = val >> 11;
+        lo = val & 0x7ff;
+
+        tcg_out32(s, INSN_LDIL | INSN_R2(TCG_REG_R20) | reassemble_21(hi));
+        tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R20)
+                  | reassemble_17(lo >> 2));
+        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_RP, TCG_REG_R31);
+    }
+}
+
+static void tcg_out_xmpyu(TCGContext *s, int retl, int reth,
+                          int arg1, int arg2)
+{
+    /* Store both words into the stack for copy to the FPU.  */
+    tcg_out_ldst(s, arg1, TCG_REG_SP, STACK_TEMP_OFS, INSN_STW);
+    tcg_out_ldst(s, arg2, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_STW);
+
+    /* Load both words into the FPU at the same time.  We get away
+       with this because we can address the left and right half of the
+       FPU registers individually once loaded.  */
+    /* fldds stack_temp(sp),fr22 */
+    tcg_out32(s, INSN_FLDDS | INSN_R2(TCG_REG_SP)
+              | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22));
+
+    /* xmpyu fr22r,fr22,fr22 */
+    tcg_out32(s, 0x3ad64796);
+
+    /* Store the 64-bit result back into the stack.  */
+    /* fstds stack_temp(sp),fr22 */
+    tcg_out32(s, INSN_FSTDS | INSN_R2(TCG_REG_SP)
+              | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22));
+
+    /* Load the pieces of the result that the caller requested.  */
+    if (reth) {
+        tcg_out_ldst(s, reth, TCG_REG_SP, STACK_TEMP_OFS, INSN_LDW);
+    }
+    if (retl) {
+        tcg_out_ldst(s, retl, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_LDW);
+    }
+}
+
+static void tcg_out_add2(TCGContext *s, int destl, int desth,
+                         int al, int ah, int bl, int bh, int blconst)
+{
+    int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl);
+
+    if (blconst) {
+        tcg_out_arithi(s, tmp, al, bl, INSN_ADDI);
+    } else {
+        tcg_out_arith(s, tmp, al, bl, INSN_ADD);
+    }
+    tcg_out_arith(s, desth, ah, bh, INSN_ADDC);
+
+    tcg_out_mov(s, TCG_TYPE_I32, destl, tmp);
+}
+
+static void tcg_out_sub2(TCGContext *s, int destl, int desth, int al, int ah,
+                         int bl, int bh, int alconst, int blconst)
+{
+    int tmp = (destl == ah || destl == bh ? TCG_REG_R20 : destl);
+
+    if (alconst) {
+        if (blconst) {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, bl);
+            bl = TCG_REG_R20;
+        }
+        tcg_out_arithi(s, tmp, bl, al, INSN_SUBI);
+    } else if (blconst) {
+        tcg_out_arithi(s, tmp, al, -bl, INSN_ADDI);
+    } else {
+        tcg_out_arith(s, tmp, al, bl, INSN_SUB);
+    }
+    tcg_out_arith(s, desth, ah, bh, INSN_SUBB);
+
+    tcg_out_mov(s, TCG_TYPE_I32, destl, tmp);
+}
+
+static void tcg_out_branch(TCGContext *s, int label_index, int nul)
+{
+    TCGLabel *l = &s->labels[label_index];
+    uint32_t op = nul ? INSN_BL_N : INSN_BL;
+
+    if (l->has_value) {
+        tcg_target_long val = l->u.value;
+
+        val -= (tcg_target_long)s->code_ptr + 8;
+        val >>= 2;
+        assert(check_fit_tl(val, 17));
+
+        tcg_out32(s, op | reassemble_17(val));
+    } else {
+        /* We need to keep the offset unchanged for retranslation.  */
+        uint32_t old_insn = *(uint32_t *)s->code_ptr;
+
+        tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL17F, label_index, 0);
+        tcg_out32(s, op | (old_insn & 0x1f1ffdu));
+    }
+}
+
+static const uint8_t tcg_cond_to_cmp_cond[10] =
+{
+    [TCG_COND_EQ] = COND_EQ,
+    [TCG_COND_NE] = COND_EQ | COND_FALSE,
+    [TCG_COND_LT] = COND_LT,
+    [TCG_COND_GE] = COND_LT | COND_FALSE,
+    [TCG_COND_LE] = COND_LE,
+    [TCG_COND_GT] = COND_LE | COND_FALSE,
+    [TCG_COND_LTU] = COND_LTU,
+    [TCG_COND_GEU] = COND_LTU | COND_FALSE,
+    [TCG_COND_LEU] = COND_LEU,
+    [TCG_COND_GTU] = COND_LEU | COND_FALSE,
+};
+
+static void tcg_out_brcond(TCGContext *s, int cond, TCGArg c1,
+                           TCGArg c2, int c2const, int label_index)
+{
+    TCGLabel *l = &s->labels[label_index];
+    int op, pacond;
+
+    /* Note that COMIB operates as if the immediate is the first
+       operand.  We model brcond with the immediate in the second
+       to better match what targets are likely to give us.  For
+       consistency, model COMB with reversed operands as well.  */
+    pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)];
+
+    if (c2const) {
+        op = (pacond & COND_FALSE ? INSN_COMIBF : INSN_COMIBT);
+        op |= INSN_IM5(c2);
+    } else {
+        op = (pacond & COND_FALSE ? INSN_COMBF : INSN_COMBT);
+        op |= INSN_R1(c2);
+    }
+    op |= INSN_R2(c1);
+    op |= INSN_COND(pacond & 7);
+
+    if (l->has_value) {
+        tcg_target_long val = l->u.value;
+
+        val -= (tcg_target_long)s->code_ptr + 8;
+        val >>= 2;
+        assert(check_fit_tl(val, 12));
+
+        /* ??? Assume that all branches to defined labels are backward.
+           Which means that if the nul bit is set, the delay slot is
+           executed if the branch is taken, and not executed in fallthru.  */
+        tcg_out32(s, op | reassemble_12(val));
+        tcg_out_nop(s);
+    } else {
+        /* We need to keep the offset unchanged for retranslation.  */
+        uint32_t old_insn = *(uint32_t *)s->code_ptr;
+
+        tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL12F, label_index, 0);
+        /* ??? Assume that all branches to undefined labels are forward.
+           Which means that if the nul bit is set, the delay slot is
+           not executed if the branch is taken, which is what we want.  */
+        tcg_out32(s, op | 2 | (old_insn & 0x1ffdu));
+    }
+}
+
+static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret,
+                           TCGArg c1, TCGArg c2, int c2const)
+{
+    int op, pacond;
+
+    /* Note that COMICLR operates as if the immediate is the first
+       operand.  We model setcond with the immediate in the second
+       to better match what targets are likely to give us.  For
+       consistency, model COMCLR with reversed operands as well.  */
+    pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)];
+
+    if (c2const) {
+        op = INSN_COMICLR | INSN_R2(c1) | INSN_R1(ret) | INSN_IM11(c2);
+    } else {
+        op = INSN_COMCLR | INSN_R2(c1) | INSN_R1(c2) | INSN_T(ret);
+    }
+    op |= INSN_COND(pacond & 7);
+    op |= pacond & COND_FALSE ? 1 << 12 : 0;
+
+    tcg_out32(s, op);
+}
+
+static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
+                            TCGArg bl, int blconst, TCGArg bh, int bhconst,
+                            int label_index)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst);
+        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+        break;
+
+    default:
+        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+        tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
+        tcg_out_brcond(s, tcg_unsigned_cond(cond),
+                       al, bl, blconst, label_index);
+        break;
+    }
+}
+
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg ret,
+                            TCGArg c1, TCGArg c2, int c2const)
+{
+    tcg_out_comclr(s, tcg_invert_cond(cond), ret, c1, c2, c2const);
+    tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
+}
+
+static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
+                             TCGArg al, TCGArg ah, TCGArg bl, int blconst,
+                             TCGArg bh, int bhconst)
+{
+    int scratch = TCG_REG_R20;
+
+    if (ret != al && ret != ah
+        && (blconst || ret != bl)
+        && (bhconst || ret != bh)) {
+        scratch = ret;
+    }
+
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        tcg_out_setcond(s, cond, scratch, al, bl, blconst);
+        tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
+        tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
+        break;
+
+    default:
+        tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
+        tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
+        tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
+        tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
+        tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
+        break;
+    }
+
+    tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
 }
 
 #if defined(CONFIG_SOFTMMU)
-
 #include "../../softmmu_defs.h"
 
 static void *qemu_ld_helpers[4] = {
@@ -363,492 +888,574 @@
     __stl_mmu,
     __stq_mmu,
 };
-#endif
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+/* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
+   the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
+   TLB for the memory index.  The return value is the offset from ENV
+   contained in R1 afterward (to be used when loading ADDEND); if the
+   return value is 0, R1 is not used.  */
+
+static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
+                            int addrhi, int s_bits, int lab_miss, int offset)
 {
-    int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
-#if defined(CONFIG_SOFTMMU)
-    uint32_t *label1_ptr, *label2_ptr;
-#endif
-#if TARGET_LONG_BITS == 64
-#if defined(CONFIG_SOFTMMU)
-    uint32_t *label3_ptr;
-#endif
-    int addr_reg2;
-#endif
+    int ret;
 
-    data_reg = *args++;
-    if (opc == 3)
-        data_reg2 = *args++;
-    else
-        data_reg2 = 0; /* suppress warning */
-    addr_reg = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_reg2 = *args++;
-#endif
-    mem_index = *args;
-    s_bits = opc & 3;
+    /* Extracting the index into the TLB.  The "normal C operation" is
+          r1 = addr_reg >> TARGET_PAGE_BITS;
+          r1 &= CPU_TLB_SIZE - 1;
+          r1 <<= CPU_TLB_ENTRY_BITS;
+       What this does is extract CPU_TLB_BITS beginning at TARGET_PAGE_BITS
+       and place them at CPU_TLB_ENTRY_BITS.  We can combine the first two
+       operations with an EXTRU.  Unfortunately, the current value of
+       CPU_TLB_ENTRY_BITS is > 3, so we can't merge that shift with the
+       add that follows.  */
+    tcg_out_extr(s, r1, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS, 0);
+    tcg_out_shli(s, r1, r1, CPU_TLB_ENTRY_BITS);
+    tcg_out_arith(s, r1, r1, TCG_AREG0, INSN_ADDL);
 
-    r0 = TCG_REG_R26;
-    r1 = TCG_REG_R25;
-
-#if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg);
-
-    tcg_out_mov(s, r0, addr_reg);
-
-    tcg_out32(s, SHD | INSN_T(r1) | INSN_R1(TCG_REG_R0) | INSN_R2(r1) |
-                 INSN_SHDEP_CP(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
-
-    tcg_out_arithi(s, r0, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                   ARITH_AND);
-
-    tcg_out_arithi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS,
-                   ARITH_AND);
-
-    tcg_out_arith(s, r1, r1, TCG_AREG0, ARITH_ADD);
-    tcg_out_arithi(s, r1, r1,
-                   offsetof(CPUState, tlb_table[mem_index][0].addr_read),
-                   ARITH_ADD);
-
-    tcg_out_ldst(s, TCG_REG_R20, r1, 0, LDW);
-
-#if TARGET_LONG_BITS == 32
-    /* if equal, jump to label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
-                 INSN_COND(COND_EQUAL));
-    tcg_out_mov(s, r0, addr_reg); /* delay slot */
-#else
-    /* if not equal, jump to label3 */
-    label3_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, COMBF | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
-                 INSN_COND(COND_EQUAL));
-    tcg_out_mov(s, r0, addr_reg); /* delay slot */
-
-    tcg_out_ldst(s, TCG_REG_R20, r1, 4, LDW);
-
-    /* if equal, jump to label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(addr_reg2) |
-                 INSN_COND(COND_EQUAL));
-    tcg_out_nop(s); /* delay slot */
-
-    /* label3: */
-    *label3_ptr |= reassemble_12((uint32_t *)s->code_ptr - label3_ptr - 2);
-#endif
-
-#if TARGET_LONG_BITS == 32
-    tcg_out_mov(s, TCG_REG_R26, addr_reg);
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R25, mem_index);
-#else
-    tcg_out_mov(s, TCG_REG_R26, addr_reg);
-    tcg_out_mov(s, TCG_REG_R25, addr_reg2);
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R24, mem_index);
-#endif
-
-    tcg_out_call(s, qemu_ld_helpers[s_bits]);
-
-    switch(opc) {
-        case 0 | 4:
-            tcg_out_ext8s(s, data_reg, TCG_REG_RET0);
-            break;
-        case 1 | 4:
-            tcg_out_ext16s(s, data_reg, TCG_REG_RET0);
-            break;
-        case 0:
-        case 1:
-        case 2:
-        default:
-            tcg_out_mov(s, data_reg, TCG_REG_RET0);
-            break;
-        case 3:
-            tcg_abort();
-            tcg_out_mov(s, data_reg, TCG_REG_RET0);
-            tcg_out_mov(s, data_reg2, TCG_REG_RET1);
-            break;
+    /* Make sure that both the addr_{read,write} and addend can be
+       read with a 14-bit offset from the same base register.  */
+    if (check_fit_tl(offset + CPU_TLB_SIZE, 14)) {
+        ret = 0;
+    } else {
+        ret = (offset + 0x400) & ~0x7ff;
+        offset = ret - offset;
+        tcg_out_addi2(s, TCG_REG_R1, r1, ret);
+        r1 = TCG_REG_R1;
     }
 
-    /* jump to label2 */
-    label2_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, BL | INSN_R2(TCG_REG_R0) | 2);
-
-    /* label1: */
-    *label1_ptr |= reassemble_12((uint32_t *)s->code_ptr - label1_ptr - 2);
-
-    tcg_out_arithi(s, TCG_REG_R20, r1,
-                   offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read),
-                   ARITH_ADD);
-    tcg_out_ldst(s, TCG_REG_R20, TCG_REG_R20, 0, LDW);
-    tcg_out_arith(s, r0, r0, TCG_REG_R20, ARITH_ADD);
-#else
-    r0 = addr_reg;
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 0;
-#else
-    bswap = 1;
-#endif
-    switch (opc) {
-        case 0:
-            tcg_out_ldst(s, data_reg, r0, 0, LDB);
-            break;
-        case 0 | 4:
-            tcg_out_ldst(s, data_reg, r0, 0, LDB);
-            tcg_out_ext8s(s, data_reg, data_reg);
-            break;
-        case 1:
-            tcg_out_ldst(s, data_reg, r0, 0, LDH);
-            if (bswap)
-                tcg_out_bswap16(s, data_reg, data_reg);
-            break;
-        case 1 | 4:
-            tcg_out_ldst(s, data_reg, r0, 0, LDH);
-            if (bswap)
-                tcg_out_bswap16(s, data_reg, data_reg);
-            tcg_out_ext16s(s, data_reg, data_reg);
-            break;
-        case 2:
-            tcg_out_ldst(s, data_reg, r0, 0, LDW);
-            if (bswap)
-                tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20);
-            break;
-        case 3:
-            tcg_abort();
-            if (!bswap) {
-                tcg_out_ldst(s, data_reg, r0, 0, LDW);
-                tcg_out_ldst(s, data_reg2, r0, 4, LDW);
-            } else {
-                tcg_out_ldst(s, data_reg, r0, 4, LDW);
-                tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20);
-                tcg_out_ldst(s, data_reg2, r0, 0, LDW);
-                tcg_out_bswap32(s, data_reg2, data_reg2, TCG_REG_R20);
-            }
-            break;
-        default:
-            tcg_abort();
+    /* Load the entry from the computed slot.  */
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R23, r1, offset);
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset + 4);
+    } else {
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
     }
 
-#if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr |= reassemble_17((uint32_t *)s->code_ptr - label2_ptr - 2);
-#endif
+    /* Compute the value that ought to appear in the TLB for a hit, namely, the page
+       of the address.  We include the low N bits of the address to catch unaligned
+       accesses and force them onto the slow path.  Do this computation after having
+       issued the load from the TLB slot to give the load time to complete.  */
+    tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+    /* If not equal, jump to lab_miss. */
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_brcond2(s, TCG_COND_NE, TCG_REG_R20, TCG_REG_R23,
+                        r0, 0, addrhi, 0, lab_miss);
+    } else {
+        tcg_out_brcond(s, TCG_COND_NE, TCG_REG_R20, r0, 0, lab_miss);
+    }
+
+    return ret;
 }
+#endif
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
+                                   int addr_reg, int addend_reg, int opc)
 {
-    int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
-#if defined(CONFIG_SOFTMMU)
-    uint32_t *label1_ptr, *label2_ptr;
-#endif
-#if TARGET_LONG_BITS == 64
-#if defined(CONFIG_SOFTMMU)
-    uint32_t *label3_ptr;
-#endif
-    int addr_reg2;
-#endif
-
-    data_reg = *args++;
-    if (opc == 3)
-        data_reg2 = *args++;
-    else
-        data_reg2 = 0; /* suppress warning */
-    addr_reg = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_reg2 = *args++;
-#endif
-    mem_index = *args;
-
-    s_bits = opc;
-
-    r0 = TCG_REG_R26;
-    r1 = TCG_REG_R25;
-
-#if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg);
-
-    tcg_out_mov(s, r0, addr_reg);
-
-    tcg_out32(s, SHD | INSN_T(r1) | INSN_R1(TCG_REG_R0) | INSN_R2(r1) |
-                 INSN_SHDEP_CP(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
-
-    tcg_out_arithi(s, r0, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                   ARITH_AND);
-
-    tcg_out_arithi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS,
-                   ARITH_AND);
-
-    tcg_out_arith(s, r1, r1, TCG_AREG0, ARITH_ADD);
-    tcg_out_arithi(s, r1, r1,
-                   offsetof(CPUState, tlb_table[mem_index][0].addr_write),
-                   ARITH_ADD);
-
-    tcg_out_ldst(s, TCG_REG_R20, r1, 0, LDW);
-
-#if TARGET_LONG_BITS == 32
-    /* if equal, jump to label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
-                 INSN_COND(COND_EQUAL));
-    tcg_out_mov(s, r0, addr_reg); /* delay slot */
-#else
-    /* if not equal, jump to label3 */
-    label3_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, COMBF | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
-                 INSN_COND(COND_EQUAL));
-    tcg_out_mov(s, r0, addr_reg); /* delay slot */
-
-    tcg_out_ldst(s, TCG_REG_R20, r1, 4, LDW);
-
-    /* if equal, jump to label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(addr_reg2) |
-                 INSN_COND(COND_EQUAL));
-    tcg_out_nop(s); /* delay slot */
-
-    /* label3: */
-    *label3_ptr |= reassemble_12((uint32_t *)s->code_ptr - label3_ptr - 2);
-#endif
-
-    tcg_out_mov(s, TCG_REG_R26, addr_reg);
-#if TARGET_LONG_BITS == 64
-    tcg_out_mov(s, TCG_REG_R25, addr_reg2);
-    if (opc == 3) {
-        tcg_abort();
-        tcg_out_mov(s, TCG_REG_R24, data_reg);
-        tcg_out_mov(s, TCG_REG_R23, data_reg2);
-        /* TODO: push mem_index */
-        tcg_abort();
-    } else {
-        switch(opc) {
-        case 0:
-            tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R24) | INSN_R2(data_reg) |
-                         INSN_SHDEP_P(31) | INSN_DEP_LEN(8));
-            break;
-        case 1:
-            tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R24) | INSN_R2(data_reg) |
-                         INSN_SHDEP_P(31) | INSN_DEP_LEN(16));
-            break;
-        case 2:
-            tcg_out_mov(s, TCG_REG_R24, data_reg);
-            break;
-        }
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R23, mem_index);
-    }
-#else
-    if (opc == 3) {
-        tcg_abort();
-        tcg_out_mov(s, TCG_REG_R25, data_reg);
-        tcg_out_mov(s, TCG_REG_R24, data_reg2);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R23, mem_index);
-    } else {
-        switch(opc) {
-        case 0:
-            tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R25) | INSN_R2(data_reg) |
-                         INSN_SHDEP_P(31) | INSN_DEP_LEN(8));
-            break;
-        case 1:
-            tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R25) | INSN_R2(data_reg) |
-                         INSN_SHDEP_P(31) | INSN_DEP_LEN(16));
-            break;
-        case 2:
-            tcg_out_mov(s, TCG_REG_R25, data_reg);
-            break;
-        }
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R24, mem_index);
-    }
-#endif
-    tcg_out_call(s, qemu_st_helpers[s_bits]);
-
-    /* jump to label2 */
-    label2_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, BL | INSN_R2(TCG_REG_R0) | 2);
-
-    /* label1: */
-    *label1_ptr |= reassemble_12((uint32_t *)s->code_ptr - label1_ptr - 2);
-
-    tcg_out_arithi(s, TCG_REG_R20, r1,
-                   offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write),
-                   ARITH_ADD);
-    tcg_out_ldst(s, TCG_REG_R20, TCG_REG_R20, 0, LDW);
-    tcg_out_arith(s, r0, r0, TCG_REG_R20, ARITH_ADD);
-#else
-    r0 = addr_reg;
-#endif
-
 #ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 0;
+    const int bswap = 0;
 #else
-    bswap = 1;
+    const int bswap = 1;
 #endif
+
     switch (opc) {
     case 0:
-        tcg_out_ldst(s, data_reg, r0, 0, STB);
+        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX);
+        break;
+    case 0 | 4:
+        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDBX);
+        tcg_out_ext8s(s, datalo_reg, datalo_reg);
         break;
     case 1:
+        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX);
         if (bswap) {
-            tcg_out_bswap16(s, TCG_REG_R20, data_reg);
-            data_reg = TCG_REG_R20;
+            tcg_out_bswap16(s, datalo_reg, datalo_reg, 0);
         }
-        tcg_out_ldst(s, data_reg, r0, 0, STH);
+        break;
+    case 1 | 4:
+        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDHX);
+        if (bswap) {
+            tcg_out_bswap16(s, datalo_reg, datalo_reg, 1);
+        } else {
+            tcg_out_ext16s(s, datalo_reg, datalo_reg);
+        }
         break;
     case 2:
+        tcg_out_ldst_index(s, datalo_reg, addr_reg, addend_reg, INSN_LDWX);
         if (bswap) {
-            tcg_out_bswap32(s, TCG_REG_R20, data_reg, TCG_REG_R20);
-            data_reg = TCG_REG_R20;
+            tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20);
         }
-        tcg_out_ldst(s, data_reg, r0, 0, STW);
         break;
     case 3:
-        tcg_abort();
-        if (!bswap) {
-            tcg_out_ldst(s, data_reg, r0, 0, STW);
-            tcg_out_ldst(s, data_reg2, r0, 4, STW);
+        if (bswap) {
+            int t = datahi_reg;
+            datahi_reg = datalo_reg;
+            datalo_reg = t;
+        }
+        /* We can't access the low-part with a reg+reg addressing mode,
+           so perform the addition now and use reg_ofs addressing mode.  */
+        if (addend_reg != TCG_REG_R0) {
+            tcg_out_arith(s, TCG_REG_R20, addr_reg, addend_reg, INSN_ADD);
+            addr_reg = TCG_REG_R20;
+	}
+        /* Make sure not to clobber the base register.  */
+        if (datahi_reg == addr_reg) {
+            tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW);
+            tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW);
         } else {
-            tcg_out_bswap32(s, TCG_REG_R20, data_reg, TCG_REG_R20);
-            tcg_out_ldst(s, TCG_REG_R20, r0, 4, STW);
-            tcg_out_bswap32(s, TCG_REG_R20, data_reg2, TCG_REG_R20);
-            tcg_out_ldst(s, TCG_REG_R20, r0, 0, STW);
+            tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_LDW);
+            tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_LDW);
+        }
+        if (bswap) {
+            tcg_out_bswap32(s, datalo_reg, datalo_reg, TCG_REG_R20);
+            tcg_out_bswap32(s, datahi_reg, datahi_reg, TCG_REG_R20);
         }
         break;
     default:
         tcg_abort();
     }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+{
+    int datalo_reg = *args++;
+    /* Note that datahi_reg is only used for 64-bit loads.  */
+    int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0);
+    int addrlo_reg = *args++;
 
 #if defined(CONFIG_SOFTMMU)
+    /* Note that addrhi_reg is only used for 64-bit guests.  */
+    int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
+    int mem_index = *args;
+    int lab1, lab2, argreg, offset;
+
+    lab1 = gen_new_label();
+    lab2 = gen_new_label();
+
+    offset = offsetof(CPUState, tlb_table[mem_index][0].addr_read);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
+                              opc & 3, lab1, offset);
+
+    /* TLB Hit.  */
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+               offsetof(CPUState, tlb_table[mem_index][0].addend) - offset);
+    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+    tcg_out_branch(s, lab2, 1);
+
+    /* TLB Miss.  */
+    /* label1: */
+    tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
+
+    argreg = TCG_REG_R26;
+    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+    }
+    tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+
+    tcg_out_call(s, qemu_ld_helpers[opc & 3]);
+
+    switch (opc) {
+    case 0:
+        tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xff);
+        break;
+    case 0 | 4:
+        tcg_out_ext8s(s, datalo_reg, TCG_REG_RET0);
+        break;
+    case 1:
+        tcg_out_andi(s, datalo_reg, TCG_REG_RET0, 0xffff);
+        break;
+    case 1 | 4:
+        tcg_out_ext16s(s, datalo_reg, TCG_REG_RET0);
+        break;
+    case 2:
+    case 2 | 4:
+        tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET0);
+        break;
+    case 3:
+        tcg_out_mov(s, TCG_TYPE_I32, datahi_reg, TCG_REG_RET0);
+        tcg_out_mov(s, TCG_TYPE_I32, datalo_reg, TCG_REG_RET1);
+        break;
+    default:
+        tcg_abort();
+    }
+
     /* label2: */
-    *label2_ptr |= reassemble_17((uint32_t *)s->code_ptr - label2_ptr - 2);
+    tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
+#else
+    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+                           (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_R0), opc);
 #endif
 }
 
-static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
-                              const int *const_args)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
+                                   int addr_reg, int opc)
 {
-    int c;
+#ifdef TARGET_WORDS_BIGENDIAN
+    const int bswap = 0;
+#else
+    const int bswap = 1;
+#endif
 
     switch (opc) {
+    case 0:
+        tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STB);
+        break;
+    case 1:
+        if (bswap) {
+            tcg_out_bswap16(s, TCG_REG_R20, datalo_reg, 0);
+            datalo_reg = TCG_REG_R20;
+        }
+        tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STH);
+        break;
+    case 2:
+        if (bswap) {
+            tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20);
+            datalo_reg = TCG_REG_R20;
+        }
+        tcg_out_ldst(s, datalo_reg, addr_reg, 0, INSN_STW);
+        break;
+    case 3:
+        if (bswap) {
+            tcg_out_bswap32(s, TCG_REG_R20, datalo_reg, TCG_REG_R20);
+            tcg_out_bswap32(s, TCG_REG_R23, datahi_reg, TCG_REG_R23);
+            datahi_reg = TCG_REG_R20;
+            datalo_reg = TCG_REG_R23;
+        }
+        tcg_out_ldst(s, datahi_reg, addr_reg, 0, INSN_STW);
+        tcg_out_ldst(s, datalo_reg, addr_reg, 4, INSN_STW);
+        break;
+    default:
+        tcg_abort();
+    }
+
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
+{
+    int datalo_reg = *args++;
+    /* Note that datahi_reg is only used for 64-bit loads.  */
+    int datahi_reg = (opc == 3 ? *args++ : TCG_REG_R0);
+    int addrlo_reg = *args++;
+
+#if defined(CONFIG_SOFTMMU)
+    /* Note that addrhi_reg is only used for 64-bit guests.  */
+    int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
+    int mem_index = *args;
+    int lab1, lab2, argreg, offset;
+
+    lab1 = gen_new_label();
+    lab2 = gen_new_label();
+
+    offset = offsetof(CPUState, tlb_table[mem_index][0].addr_write);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
+                              opc, lab1, offset);
+
+    /* TLB Hit.  */
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+               offsetof(CPUState, tlb_table[mem_index][0].addend) - offset);
+
+    /* There are no indexed stores, so we must do this addition explitly.
+       Careful to avoid R20, which is used for the bswaps to follow.  */
+    tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_REG_R20, INSN_ADDL);
+    tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, TCG_REG_R31, opc);
+    tcg_out_branch(s, lab2, 1);
+
+    /* TLB Miss.  */
+    /* label1: */
+    tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
+
+    argreg = TCG_REG_R26;
+    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+    }
+
+    switch(opc) {
+    case 0:
+        tcg_out_andi(s, argreg--, datalo_reg, 0xff);
+        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        break;
+    case 1:
+        tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
+        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        break;
+    case 2:
+        tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
+        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        break;
+    case 3:
+        /* Because of the alignment required by the 64-bit data argument,
+           we will always use R23/R24.  Also, we will always run out of
+           argument registers for storing mem_index, so that will have
+           to go on the stack.  */
+        if (mem_index == 0) {
+            argreg = TCG_REG_R0;
+        } else {
+            argreg = TCG_REG_R20;
+            tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        }
+        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
+        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
+        tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_SP,
+                   TCG_TARGET_CALL_STACK_OFFSET - 4);
+        break;
+    default:
+        tcg_abort();
+    }
+
+    tcg_out_call(s, qemu_st_helpers[opc]);
+
+    /* label2: */
+    tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
+#else
+    /* There are no indexed stores, so if GUEST_BASE is set we must do the add
+       explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
+    if (GUEST_BASE != 0) {
+        tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+        addrlo_reg = TCG_REG_R31;
+    }
+    tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
+#endif
+}
+
+static void tcg_out_exit_tb(TCGContext *s, TCGArg arg)
+{
+    if (!check_fit_tl(arg, 14)) {
+        uint32_t hi, lo;
+        hi = arg & ~0x7ff;
+        lo = arg & 0x7ff;
+        if (lo) {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, hi);
+            tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18));
+            tcg_out_addi(s, TCG_REG_RET0, lo);
+            return;
+        }
+        arg = hi;
+    }
+    tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18));
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, arg);
+}
+
+static void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
+{
+    if (s->tb_jmp_offset) {
+        /* direct jump method */
+        fprintf(stderr, "goto_tb direct\n");
+        tcg_abort();
+    } else {
+        /* indirect jump method */
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, TCG_REG_R0,
+                   (tcg_target_long)(s->tb_next + arg));
+        tcg_out32(s, INSN_BV_N | INSN_R2(TCG_REG_R20));
+    }
+    s->tb_next_offset[arg] = s->code_ptr - s->code_buf;
+}
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+                              const int *const_args)
+{
+    switch (opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, args[0]);
-        tcg_out32(s, BV_N | INSN_R2(TCG_REG_R18));
+        tcg_out_exit_tb(s, args[0]);
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_offset) {
-            /* direct jump method */
-            fprintf(stderr, "goto_tb direct\n");
-            tcg_abort();
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R20, args[0]);
-            tcg_out32(s, BV_N | INSN_R2(TCG_REG_R20));
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-        } else {
-            /* indirect jump method */
-            tcg_out_ld_ptr(s, TCG_REG_R20,
-                           (tcg_target_long)(s->tb_next + args[0]));
-            tcg_out32(s, BV_N | INSN_R2(TCG_REG_R20));
-        }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        tcg_out_goto_tb(s, args[0]);
         break;
+
     case INDEX_op_call:
-        tcg_out32(s, BLE_SR4 | INSN_R2(args[0]));
-        tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31);
+        if (const_args[0]) {
+            tcg_out_call(s, (void *)args[0]);
+        } else {
+            /* ??? FIXME: the value in the register in args[0] is almost
+               certainly a procedure descriptor, not a code address.  We
+               probably need to use the millicode $$dyncall routine.  */
+            tcg_abort();
+        }
         break;
+
     case INDEX_op_jmp:
         fprintf(stderr, "unimplemented jmp\n");
         tcg_abort();
         break;
+
     case INDEX_op_br:
-        fprintf(stderr, "unimplemented br\n");
-        tcg_abort();
+        tcg_out_branch(s, args[0], 1);
         break;
+
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]);
         break;
 
     case INDEX_op_ld8u_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDB);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB);
         break;
     case INDEX_op_ld8s_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDB);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB);
         tcg_out_ext8s(s, args[0], args[0]);
         break;
     case INDEX_op_ld16u_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDH);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH);
         break;
     case INDEX_op_ld16s_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDH);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH);
         tcg_out_ext16s(s, args[0], args[0]);
         break;
     case INDEX_op_ld_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], LDW);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDW);
         break;
 
     case INDEX_op_st8_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], STB);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_STB);
         break;
     case INDEX_op_st16_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], STH);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_STH);
         break;
     case INDEX_op_st_i32:
-        tcg_out_ldst(s, args[0], args[1], args[2], STW);
+        tcg_out_ldst(s, args[0], args[1], args[2], INSN_STW);
+        break;
+
+    case INDEX_op_add_i32:
+        if (const_args[2]) {
+            tcg_out_addi2(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_arith(s, args[0], args[1], args[2], INSN_ADDL);
+        }
         break;
 
     case INDEX_op_sub_i32:
-        c = ARITH_SUB;
-        goto gen_arith;
+        if (const_args[1]) {
+            if (const_args[2]) {
+                tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1] - args[2]);
+            } else {
+                /* Recall that SUBI is a reversed subtract.  */
+                tcg_out_arithi(s, args[0], args[2], args[1], INSN_SUBI);
+            }
+        } else if (const_args[2]) {
+            tcg_out_addi2(s, args[0], args[1], -args[2]);
+        } else {
+            tcg_out_arith(s, args[0], args[1], args[2], INSN_SUB);
+        }
+        break;
+
     case INDEX_op_and_i32:
-        c = ARITH_AND;
-        goto gen_arith;
+        if (const_args[2]) {
+            tcg_out_andi(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_arith(s, args[0], args[1], args[2], INSN_AND);
+        }
+        break;
+
     case INDEX_op_or_i32:
-        c = ARITH_OR;
-        goto gen_arith;
+        if (const_args[2]) {
+            tcg_out_ori(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_arith(s, args[0], args[1], args[2], INSN_OR);
+        }
+        break;
+
     case INDEX_op_xor_i32:
-        c = ARITH_XOR;
-        goto gen_arith;
-    case INDEX_op_add_i32:
-        c = ARITH_ADD;
-        goto gen_arith;
+        tcg_out_arith(s, args[0], args[1], args[2], INSN_XOR);
+        break;
+
+    case INDEX_op_andc_i32:
+        if (const_args[2]) {
+            tcg_out_andi(s, args[0], args[1], ~args[2]);
+        } else {
+            tcg_out_arith(s, args[0], args[1], args[2], INSN_ANDCM);
+        }
+        break;
 
     case INDEX_op_shl_i32:
-        tcg_out32(s, SUBI | INSN_R1(TCG_REG_R20) | INSN_R2(args[2]) |
-                     lowsignext(0x1f, 0, 11));
-        tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(TCG_REG_R20));
-        tcg_out32(s, ZVDEP | INSN_R2(args[0]) | INSN_R1(args[1]) |
-                     INSN_DEP_LEN(32));
+        if (const_args[2]) {
+            tcg_out_shli(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_shl(s, args[0], args[1], args[2]);
+        }
         break;
+
     case INDEX_op_shr_i32:
-        tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(args[2]));
-        tcg_out32(s, VSHD | INSN_T(args[0]) | INSN_R1(TCG_REG_R0) |
-                     INSN_R2(args[1]));
+        if (const_args[2]) {
+            tcg_out_shri(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_shr(s, args[0], args[1], args[2]);
+        }
         break;
+
     case INDEX_op_sar_i32:
-        tcg_out32(s, SUBI | INSN_R1(TCG_REG_R20) | INSN_R2(args[2]) |
-                     lowsignext(0x1f, 0, 11));
-        tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(TCG_REG_R20));
-        tcg_out32(s, VEXTRS | INSN_R1(args[0]) | INSN_R2(args[1]) |
-                     INSN_DEP_LEN(32));
+        if (const_args[2]) {
+            tcg_out_sari(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_sar(s, args[0], args[1], args[2]);
+        }
+        break;
+
+    case INDEX_op_rotl_i32:
+        if (const_args[2]) {
+            tcg_out_rotli(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_rotl(s, args[0], args[1], args[2]);
+        }
+        break;
+
+    case INDEX_op_rotr_i32:
+        if (const_args[2]) {
+            tcg_out_rotri(s, args[0], args[1], args[2]);
+        } else {
+            tcg_out_rotr(s, args[0], args[1], args[2]);
+        }
         break;
 
     case INDEX_op_mul_i32:
-        fprintf(stderr, "unimplemented mul\n");
-        tcg_abort();
+        tcg_out_xmpyu(s, args[0], TCG_REG_R0, args[1], args[2]);
         break;
     case INDEX_op_mulu2_i32:
-        fprintf(stderr, "unimplemented mulu2\n");
-        tcg_abort();
+        tcg_out_xmpyu(s, args[0], args[1], args[2], args[3]);
         break;
-    case INDEX_op_div2_i32:
-        fprintf(stderr, "unimplemented div2\n");
-        tcg_abort();
+
+    case INDEX_op_bswap16_i32:
+        tcg_out_bswap16(s, args[0], args[1], 0);
         break;
-    case INDEX_op_divu2_i32:
-        fprintf(stderr, "unimplemented divu2\n");
-        tcg_abort();
+    case INDEX_op_bswap32_i32:
+        tcg_out_bswap32(s, args[0], args[1], TCG_REG_R20);
+        break;
+
+    case INDEX_op_not_i32:
+        tcg_out_arithi(s, args[0], args[1], -1, INSN_SUBI);
+        break;
+    case INDEX_op_ext8s_i32:
+        tcg_out_ext8s(s, args[0], args[1]);
+        break;
+    case INDEX_op_ext16s_i32:
+        tcg_out_ext16s(s, args[0], args[1]);
         break;
 
     case INDEX_op_brcond_i32:
-        fprintf(stderr, "unimplemented brcond\n");
-        tcg_abort();
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        break;
+    case INDEX_op_brcond2_i32:
+        tcg_out_brcond2(s, args[4], args[0], args[1],
+                        args[2], const_args[2],
+                        args[3], const_args[3], args[5]);
+        break;
+
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args[5], args[0], args[1], args[2],
+                         args[3], const_args[3], args[4], const_args[4]);
+        break;
+
+    case INDEX_op_add2_i32:
+        tcg_out_add2(s, args[0], args[1], args[2], args[3],
+                     args[4], args[5], const_args[4]);
+        break;
+
+    case INDEX_op_sub2_i32:
+        tcg_out_sub2(s, args[0], args[1], args[2], args[3],
+                     args[4], args[5], const_args[2], const_args[4]);
         break;
 
     case INDEX_op_qemu_ld8u:
@@ -863,9 +1470,12 @@
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld(s, args, 1 | 4);
         break;
-    case INDEX_op_qemu_ld32u:
+    case INDEX_op_qemu_ld32:
         tcg_out_qemu_ld(s, args, 2);
         break;
+    case INDEX_op_qemu_ld64:
+        tcg_out_qemu_ld(s, args, 3);
+        break;
 
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, 0);
@@ -876,87 +1486,186 @@
     case INDEX_op_qemu_st32:
         tcg_out_qemu_st(s, args, 2);
         break;
+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);
+        break;
 
     default:
         fprintf(stderr, "unknown opcode 0x%x\n", opc);
         tcg_abort();
     }
-    return;
-
-gen_arith:
-    tcg_out_arith(s, args[0], args[1], args[2], c);
 }
 
 static const TCGTargetOpDef hppa_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
 
-    { INDEX_op_call, { "r" } },
+    { INDEX_op_call, { "ri" } },
     { INDEX_op_jmp, { "r" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
     { INDEX_op_movi_i32, { "r" } },
+
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
     { INDEX_op_ld16s_i32, { "r", "r" } },
     { INDEX_op_ld_i32, { "r", "r" } },
-    { INDEX_op_st8_i32, { "r", "r" } },
-    { INDEX_op_st16_i32, { "r", "r" } },
-    { INDEX_op_st_i32, { "r", "r" } },
+    { INDEX_op_st8_i32, { "rZ", "r" } },
+    { INDEX_op_st16_i32, { "rZ", "r" } },
+    { INDEX_op_st_i32, { "rZ", "r" } },
 
-    { INDEX_op_add_i32, { "r", "r", "r" } },
-    { INDEX_op_sub_i32, { "r", "r", "r" } },
-    { INDEX_op_and_i32, { "r", "r", "r" } },
-    { INDEX_op_or_i32, { "r", "r", "r" } },
-    { INDEX_op_xor_i32, { "r", "r", "r" } },
+    { INDEX_op_add_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_sub_i32, { "r", "rI", "ri" } },
+    { INDEX_op_and_i32, { "r", "rZ", "rM" } },
+    { INDEX_op_or_i32, { "r", "rZ", "rO" } },
+    { INDEX_op_xor_i32, { "r", "rZ", "rZ" } },
+    /* Note that the second argument will be inverted, which means
+       we want a constant whose inversion matches M, and that O = ~M.
+       See the implementation of and_mask_p.  */
+    { INDEX_op_andc_i32, { "r", "rZ", "rO" } },
 
-    { INDEX_op_shl_i32, { "r", "r", "r" } },
-    { INDEX_op_shr_i32, { "r", "r", "r" } },
-    { INDEX_op_sar_i32, { "r", "r", "r" } },
+    { INDEX_op_mul_i32, { "r", "r", "r" } },
+    { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
 
-    { INDEX_op_brcond_i32, { "r", "r" } },
+    { INDEX_op_shl_i32, { "r", "r", "ri" } },
+    { INDEX_op_shr_i32, { "r", "r", "ri" } },
+    { INDEX_op_sar_i32, { "r", "r", "ri" } },
+    { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+    { INDEX_op_rotr_i32, { "r", "r", "ri" } },
+
+    { INDEX_op_bswap16_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "r", "r" } },
+    { INDEX_op_not_i32, { "r", "r" } },
+
+    { INDEX_op_ext8s_i32, { "r", "r" } },
+    { INDEX_op_ext16s_i32, { "r", "r" } },
+
+    { INDEX_op_brcond_i32, { "rZ", "rJ" } },
+    { INDEX_op_brcond2_i32,  { "rZ", "rZ", "rJ", "rJ" } },
+
+    { INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
+    { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
+
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } },
 
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
     { INDEX_op_qemu_ld64, { "r", "r", "L" } },
 
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L", "L" } },
+    { INDEX_op_qemu_st8, { "LZ", "L" } },
+    { INDEX_op_qemu_st16, { "LZ", "L" } },
+    { INDEX_op_qemu_st32, { "LZ", "L" } },
+    { INDEX_op_qemu_st64, { "LZ", "LZ", "L" } },
 #else
     { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
     { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
 
-    { INDEX_op_qemu_st8, { "L", "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
+    { INDEX_op_qemu_st8, { "LZ", "L", "L" } },
+    { INDEX_op_qemu_st16, { "LZ", "L", "L" } },
+    { INDEX_op_qemu_st32, { "LZ", "L", "L" } },
+    { INDEX_op_qemu_st64, { "LZ", "LZ", "L", "L" } },
 #endif
     { -1 },
 };
 
-void tcg_target_init(TCGContext *s)
+static int tcg_target_callee_save_regs[] = {
+    /* R2, the return address register, is saved specially
+       in the caller's frame.  */
+    /* R3, the frame pointer, is not currently modified.  */
+    TCG_REG_R4,
+    TCG_REG_R5,
+    TCG_REG_R6,
+    TCG_REG_R7,
+    TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R12,
+    TCG_REG_R13,
+    TCG_REG_R14,
+    TCG_REG_R15,
+    TCG_REG_R16,
+    /* R17 is the global env, so no need to save.  */
+    TCG_REG_R18
+};
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+    int frame_size, i;
+
+    /* Allocate space for the fixed frame marker.  */
+    frame_size = -TCG_TARGET_CALL_STACK_OFFSET;
+    frame_size += TCG_TARGET_STATIC_CALL_ARGS_SIZE;
+
+    /* Allocate space for the saved registers.  */
+    frame_size += ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
+
+    /* Align the allocated space.  */
+    frame_size = ((frame_size + TCG_TARGET_STACK_ALIGN - 1)
+                  & -TCG_TARGET_STACK_ALIGN);
+
+    /* The return address is stored in the caller's frame.  */
+    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -20);
+
+    /* Allocate stack frame, saving the first register at the same time.  */
+    tcg_out_ldst(s, tcg_target_callee_save_regs[0],
+                 TCG_REG_SP, frame_size, INSN_STWM);
+
+    /* Save all callee saved registers.  */
+    for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_st(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i],
+                   TCG_REG_SP, -frame_size + i * 4);
+    }
+
+#ifdef CONFIG_USE_GUEST_BASE
+    if (GUEST_BASE != 0) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
+    /* Jump to TB, and adjust R18 to be the return address.  */
+    tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R26));
+    tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R18, TCG_REG_R31);
+
+    /* Restore callee saved registers.  */
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -frame_size - 20);
+    for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+        tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i],
+                   TCG_REG_SP, -frame_size + i * 4);
+    }
+
+    /* Deallocate stack frame and return.  */
+    tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_RP));
+    tcg_out_ldst(s, tcg_target_callee_save_regs[0],
+                 TCG_REG_SP, -frame_size, INSN_LDWM);
+}
+
+static void tcg_target_init(TCGContext *s)
 {
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
-    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
-                     (1 << TCG_REG_R20) |
-                     (1 << TCG_REG_R21) |
-                     (1 << TCG_REG_R22) |
-                     (1 << TCG_REG_R23) |
-                     (1 << TCG_REG_R24) |
-                     (1 << TCG_REG_R25) |
-                     (1 << TCG_REG_R26));
+
+    tcg_regset_clear(tcg_target_call_clobber_regs);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET0);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET1);
 
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);  /* hardwired to zero */
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 7ab6f0c..a5cc440 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -69,135 +69,51 @@
     TCG_REG_R31,
 };
 
+#define TCG_CT_CONST_0    0x0100
+#define TCG_CT_CONST_S5   0x0200
+#define TCG_CT_CONST_S11  0x0400
+#define TCG_CT_CONST_MS11 0x0800
+#define TCG_CT_CONST_AND  0x1000
+#define TCG_CT_CONST_OR   0x2000
+
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_SP
-#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_STACK_ALIGN 64
+#define TCG_TARGET_CALL_STACK_OFFSET -48
+#define TCG_TARGET_STATIC_CALL_ARGS_SIZE 8*4
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
 #define TCG_TARGET_STACK_GROWSUP
 
 /* optional instructions */
-//#define TCG_TARGET_HAS_ext8s_i32
-//#define TCG_TARGET_HAS_ext16s_i32
-//#define TCG_TARGET_HAS_bswap16_i32
-//#define TCG_TARGET_HAS_bswap32_i32
+// #define TCG_TARGET_HAS_div_i32
+#define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_ext8s_i32
+#define TCG_TARGET_HAS_ext16s_i32
+#define TCG_TARGET_HAS_bswap16_i32
+#define TCG_TARGET_HAS_bswap32_i32
+#define TCG_TARGET_HAS_not_i32
+#define TCG_TARGET_HAS_andc_i32
+// #define TCG_TARGET_HAS_orc_i32
+
+/* optional instructions automatically implemented */
+#undef TCG_TARGET_HAS_neg_i32           /* sub rd, 0, rs */
+#undef TCG_TARGET_HAS_ext8u_i32         /* and rd, rs, 0xff */
+#undef TCG_TARGET_HAS_ext16u_i32        /* and rd, rs, 0xffff */
+
+#define TCG_TARGET_HAS_GUEST_BASE
 
 /* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_R17
-#define TCG_AREG1 TCG_REG_R14
-#define TCG_AREG2 TCG_REG_R15
 
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
 {
     start &= ~31;
-    while (start <= stop)
-    {
-        asm volatile ("fdc 0(%0)\n"
-                      "sync\n"
-                      "fic 0(%%sr4, %0)\n"
-                      "sync\n"
+    while (start <= stop) {
+        asm volatile ("fdc 0(%0)\n\t"
+                      "sync\n\t"
+                      "fic 0(%%sr4, %0)\n\t"
+                      "sync"
                       : : "r"(start) : "memory");
         start += 32;
     }
 }
-
-/* supplied by libgcc */
-extern void *__canonicalize_funcptr_for_compare(void *);
-
-/* Field selection types defined by hppa */
-#define rnd(x)                  (((x)+0x1000)&~0x1fff)
-/* lsel: select left 21 bits */
-#define lsel(v,a)               (((v)+(a))>>11)
-/* rsel: select right 11 bits */
-#define rsel(v,a)               (((v)+(a))&0x7ff)
-/* lrsel with rounding of addend to nearest 8k */
-#define lrsel(v,a)              (((v)+rnd(a))>>11)
-/* rrsel with rounding of addend to nearest 8k */
-#define rrsel(v,a)              ((((v)+rnd(a))&0x7ff)+((a)-rnd(a)))
-
-#define mask(x,sz)              ((x) & ~((1<<(sz))-1))
-
-static inline int reassemble_12(int as12)
-{
-    return (((as12 & 0x800) >> 11) |
-            ((as12 & 0x400) >> 8) |
-            ((as12 & 0x3ff) << 3));
-}
-
-static inline int reassemble_14(int as14)
-{
-    return (((as14 & 0x1fff) << 1) |
-            ((as14 & 0x2000) >> 13));
-}
-
-static inline int reassemble_17(int as17)
-{
-    return (((as17 & 0x10000) >> 16) |
-            ((as17 & 0x0f800) << 5) |
-            ((as17 & 0x00400) >> 8) |
-            ((as17 & 0x003ff) << 3));
-}
-
-static inline int reassemble_21(int as21)
-{
-    return (((as21 & 0x100000) >> 20) |
-            ((as21 & 0x0ffe00) >> 8) |
-            ((as21 & 0x000180) << 7) |
-            ((as21 & 0x00007c) << 14) |
-            ((as21 & 0x000003) << 12));
-}
-
-static inline void hppa_patch21l(uint32_t *insn, int val, int addend)
-{
-    val = lrsel(val, addend);
-    *insn = mask(*insn, 21) | reassemble_21(val);
-}
-
-static inline void hppa_patch14r(uint32_t *insn, int val, int addend)
-{
-    val = rrsel(val, addend);
-    *insn = mask(*insn, 14) | reassemble_14(val);
-}
-
-static inline void hppa_patch17r(uint32_t *insn, int val, int addend)
-{
-    val = rrsel(val, addend);
-    *insn = (*insn & ~0x1f1ffd) | reassemble_17(val);
-}
-
-
-static inline void hppa_patch21l_dprel(uint32_t *insn, int val, int addend)
-{
-    register unsigned int dp asm("r27");
-    hppa_patch21l(insn, val - dp, addend);
-}
-
-static inline void hppa_patch14r_dprel(uint32_t *insn, int val, int addend)
-{
-    register unsigned int dp asm("r27");
-    hppa_patch14r(insn, val - dp, addend);
-}
-
-static inline void hppa_patch17f(uint32_t *insn, int val, int addend)
-{
-    int dot = (int)insn & ~0x3;
-    int v = ((val + addend) - dot - 8) / 4;
-    if (v > (1 << 16) || v < -(1 << 16)) {
-        printf("cannot fit branch to offset %d [%08x->%08x]\n", v, dot, val);
-        abort();
-    }
-    *insn = (*insn & ~0x1f1ffd) | reassemble_17(v);
-}
-
-static inline void hppa_load_imm21l(uint32_t *insn, int val, int addend)
-{
-    /* Transform addil L'sym(%dp) to ldil L'val, %r1 */
-    *insn = 0x20200000 | reassemble_21(lrsel(val, 0));
-}
-
-static inline void hppa_load_imm14r(uint32_t *insn, int val, int addend)
-{
-    /* Transform ldw R'sym(%r1), %rN to ldo R'sym(%r1), %rN */
-    hppa_patch14r(insn, val, addend);
-    /* HACK */
-    if (addend == 0)
-        *insn = (*insn & ~0xfc000000) | (0x0d << 26);
-}
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..09a5c55 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -24,18 +24,33 @@
 
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%eax",
-    "%ecx",
-    "%edx",
-    "%ebx",
-    "%esp",
-    "%ebp",
-    "%esi",
-    "%edi",
+#if TCG_TARGET_REG_BITS == 64
+    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+#else
+    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+#endif
 };
 #endif
 
 static const int tcg_target_reg_alloc_order[] = {
+#if TCG_TARGET_REG_BITS == 64
+    TCG_REG_RBP,
+    TCG_REG_RBX,
+    TCG_REG_R12,
+    TCG_REG_R13,
+    TCG_REG_R14,
+    TCG_REG_R15,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R9,
+    TCG_REG_R8,
+    TCG_REG_RCX,
+    TCG_REG_RDX,
+    TCG_REG_RSI,
+    TCG_REG_RDI,
+    TCG_REG_RAX,
+#else
     TCG_REG_EAX,
     TCG_REG_EDX,
     TCG_REG_ECX,
@@ -43,23 +58,49 @@
     TCG_REG_ESI,
     TCG_REG_EDI,
     TCG_REG_EBP,
+#endif
 };
 
-static const int tcg_target_call_iarg_regs[3] = { TCG_REG_EAX, TCG_REG_EDX, TCG_REG_ECX };
-static const int tcg_target_call_oarg_regs[2] = { TCG_REG_EAX, TCG_REG_EDX };
+static const int tcg_target_call_iarg_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+    TCG_REG_RDI,
+    TCG_REG_RSI,
+    TCG_REG_RDX,
+    TCG_REG_RCX,
+    TCG_REG_R8,
+    TCG_REG_R9,
+#else
+    TCG_REG_EAX,
+    TCG_REG_EDX,
+    TCG_REG_ECX
+#endif
+};
+
+static const int tcg_target_call_oarg_regs[2] = {
+    TCG_REG_EAX,
+    TCG_REG_EDX
+};
 
 static uint8_t *tb_ret_addr;
 
-static void patch_reloc(uint8_t *code_ptr, int type, 
+static void patch_reloc(uint8_t *code_ptr, int type,
                         tcg_target_long value, tcg_target_long addend)
 {
     value += addend;
     switch(type) {
-    case R_386_32:
+    case R_386_PC32:
+        value -= (uintptr_t)code_ptr;
+        if (value != (int32_t)value) {
+            tcg_abort();
+        }
         *(uint32_t *)code_ptr = value;
         break;
-    case R_386_PC32:
-        *(uint32_t *)code_ptr = value - (long)code_ptr;
+    case R_386_PC8:
+        value -= (uintptr_t)code_ptr;
+        if (value != (int8_t)value) {
+            tcg_abort();
+        }
+        *(uint8_t *)code_ptr = value;
         break;
     default:
         tcg_abort();
@@ -69,6 +110,10 @@
 /* maximum number of register used for input function arguments */
 static inline int tcg_target_get_call_iarg_regs_count(int flags)
 {
+    if (TCG_TARGET_REG_BITS == 64) {
+        return 6;
+    }
+
     flags &= TCG_CALL_TYPE_MASK;
     switch(flags) {
     case TCG_CALL_TYPE_STD:
@@ -115,20 +160,42 @@
         break;
     case 'q':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xf);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_regset_set32(ct->u.regs, 0, 0xffff);
+        } else {
+            tcg_regset_set32(ct->u.regs, 0, 0xf);
+        }
         break;
     case 'r':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xff);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_regset_set32(ct->u.regs, 0, 0xffff);
+        } else {
+            tcg_regset_set32(ct->u.regs, 0, 0xff);
+        }
         break;
 
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, 0xff);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_regset_set32(ct->u.regs, 0, 0xffff);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
+        } else {
+            tcg_regset_set32(ct->u.regs, 0, 0xff);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
+        }
         break;
+
+    case 'e':
+        ct->ct |= TCG_CT_CONST_S32;
+        break;
+    case 'Z':
+        ct->ct |= TCG_CT_CONST_U32;
+        break;
+
     default:
         return -1;
     }
@@ -141,14 +208,83 @@
 static inline int tcg_target_const_match(tcg_target_long val,
                                          const TCGArgConstraint *arg_ct)
 {
-    int ct;
-    ct = arg_ct->ct;
-    if (ct & TCG_CT_CONST)
+    int ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST) {
         return 1;
-    else
-        return 0;
+    }
+    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+        return 1;
+    }
+    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+        return 1;
+    }
+    return 0;
 }
 
+#if TCG_TARGET_REG_BITS == 64
+# define LOWREGMASK(x)	((x) & 7)
+#else
+# define LOWREGMASK(x)	(x)
+#endif
+
+#define P_EXT		0x100		/* 0x0f opcode prefix */
+#define P_DATA16	0x200		/* 0x66 opcode prefix */
+#if TCG_TARGET_REG_BITS == 64
+# define P_ADDR32	0x400		/* 0x67 opcode prefix */
+# define P_REXW		0x800		/* Set REX.W = 1 */
+# define P_REXB_R	0x1000		/* REG field as byte register */
+# define P_REXB_RM	0x2000		/* R/M field as byte register */
+#else
+# define P_ADDR32	0
+# define P_REXW		0
+# define P_REXB_R	0
+# define P_REXB_RM	0
+#endif
+
+#define OPC_ARITH_EvIz	(0x81)
+#define OPC_ARITH_EvIb	(0x83)
+#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
+#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_BSWAP	(0xc8 | P_EXT)
+#define OPC_CALL_Jz	(0xe8)
+#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32	(0x48)
+#define OPC_IMUL_GvEv	(0xaf | P_EXT)
+#define OPC_IMUL_GvEvIb	(0x6b)
+#define OPC_IMUL_GvEvIz	(0x69)
+#define OPC_INC_r32	(0x40)
+#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
+#define OPC_JCC_short	(0x70)		/* ... plus condition code */
+#define OPC_JMP_long	(0xe9)
+#define OPC_JMP_short	(0xeb)
+#define OPC_LEA         (0x8d)
+#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
+#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
+#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVL_EvIz	(0xc7)
+#define OPC_MOVL_Iv     (0xb8)
+#define OPC_MOVSBL	(0xbe | P_EXT)
+#define OPC_MOVSWL	(0xbf | P_EXT)
+#define OPC_MOVSLQ	(0x63 | P_REXW)
+#define OPC_MOVZBL	(0xb6 | P_EXT)
+#define OPC_MOVZWL	(0xb7 | P_EXT)
+#define OPC_POP_r32	(0x58)
+#define OPC_PUSH_r32	(0x50)
+#define OPC_PUSH_Iv	(0x68)
+#define OPC_PUSH_Ib	(0x6a)
+#define OPC_RET		(0xc3)
+#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
+#define OPC_SHIFT_1	(0xd1)
+#define OPC_SHIFT_Ib	(0xc1)
+#define OPC_SHIFT_cl	(0xd3)
+#define OPC_TESTL	(0x85)
+#define OPC_XCHG_ax_r32	(0x90)
+
+#define OPC_GRP3_Ev	(0xf7)
+#define OPC_GRP5	(0xff)
+
+/* Group 1 opcode extensions for 0x80-0x83.
+   These are also used as modifiers for OPC_ARITH.  */
 #define ARITH_ADD 0
 #define ARITH_OR  1
 #define ARITH_ADC 2
@@ -158,12 +294,28 @@
 #define ARITH_XOR 6
 #define ARITH_CMP 7
 
+/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
 #define SHIFT_ROL 0
 #define SHIFT_ROR 1
 #define SHIFT_SHL 4
 #define SHIFT_SHR 5
 #define SHIFT_SAR 7
 
+/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
+#define EXT3_NOT   2
+#define EXT3_NEG   3
+#define EXT3_MUL   4
+#define EXT3_IMUL  5
+#define EXT3_DIV   6
+#define EXT3_IDIV  7
+
+/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
+#define EXT5_INC_Ev	0
+#define EXT5_DEC_Ev	1
+#define EXT5_CALLN_Ev	2
+#define EXT5_JMPN_Ev	4
+
+/* Condition codes to be added to OPC_JCC_{long,short}.  */
 #define JCC_JMP (-1)
 #define JCC_JO  0x0
 #define JCC_JNO 0x1
@@ -182,8 +334,6 @@
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-#define P_EXT   0x100 /* 0x0f opcode prefix */
-
 static const uint8_t tcg_cond_to_jcc[10] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
@@ -197,228 +347,616 @@
     [TCG_COND_GTU] = JCC_JA,
 };
 
-static inline void tcg_out_opc(TCGContext *s, int opc)
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
 {
-    if (opc & P_EXT)
+    int rex;
+
+    if (opc & P_DATA16) {
+        /* We should never be asking for both 16 and 64-bit operation.  */
+        assert((opc & P_REXW) == 0);
+        tcg_out8(s, 0x66);
+    }
+    if (opc & P_ADDR32) {
+        tcg_out8(s, 0x67);
+    }
+
+    rex = 0;
+    rex |= (opc & P_REXW) >> 8;		/* REX.W */
+    rex |= (r & 8) >> 1;		/* REX.R */
+    rex |= (x & 8) >> 2;		/* REX.X */
+    rex |= (rm & 8) >> 3;		/* REX.B */
+
+    /* P_REXB_{R,RM} indicates that the given register is the low byte.
+       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
+       as otherwise the encoding indicates %[abcd]h.  Note that the values
+       that are ORed in merely indicate that the REX byte must be present;
+       those bits get discarded in output.  */
+    rex |= opc & (r >= 4 ? P_REXB_R : 0);
+    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
+
+    if (rex) {
+        tcg_out8(s, (uint8_t)(rex | 0x40));
+    }
+
+    if (opc & P_EXT) {
         tcg_out8(s, 0x0f);
+    }
     tcg_out8(s, opc);
 }
-
-static inline void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
+#else
+static void tcg_out_opc(TCGContext *s, int opc)
 {
-    tcg_out_opc(s, opc);
-    tcg_out8(s, 0xc0 | (r << 3) | rm);
+    if (opc & P_DATA16) {
+        tcg_out8(s, 0x66);
+    }
+    if (opc & P_EXT) {
+        tcg_out8(s, 0x0f);
+    }
+    tcg_out8(s, opc);
+}
+/* Discard the register arguments to tcg_out_opc early, so as not to penalize
+   the 32-bit compilation paths.  This method works with all versions of gcc,
+   whereas relying on optimization may not be able to exclude them.  */
+#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
+#endif
+
+static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
+{
+    tcg_out_opc(s, opc, r, rm, 0);
+    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
 }
 
-/* rm == -1 means no register index */
-static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm, 
-                                        int32_t offset)
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
+   We handle either RM and INDEX missing with a negative value.  In 64-bit
+   mode for absolute addresses, ~RM is the size of the immediate operand
+   that will follow the instruction.  */
+
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+                                     int index, int shift,
+                                     tcg_target_long offset)
 {
-    tcg_out_opc(s, opc);
-    if (rm == -1) {
-        tcg_out8(s, 0x05 | (r << 3));
-        tcg_out32(s, offset);
-    } else if (offset == 0 && rm != TCG_REG_EBP) {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x04 | (r << 3));
-            tcg_out8(s, 0x24);
+    int mod, len;
+
+    if (index < 0 && rm < 0) {
+        if (TCG_TARGET_REG_BITS == 64) {
+            /* Try for a rip-relative addressing mode.  This has replaced
+               the 32-bit-mode absolute addressing encoding.  */
+            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
+            tcg_target_long disp = offset - pc;
+            if (disp == (int32_t)disp) {
+                tcg_out_opc(s, opc, r, 0, 0);
+                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
+                tcg_out32(s, disp);
+                return;
+            }
+
+            /* Try for an absolute address encoding.  This requires the
+               use of the MODRM+SIB encoding and is therefore larger than
+               rip-relative addressing.  */
+            if (offset == (int32_t)offset) {
+                tcg_out_opc(s, opc, r, 0, 0);
+                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
+                tcg_out8(s, (4 << 3) | 5);
+                tcg_out32(s, offset);
+                return;
+            }
+
+            /* ??? The memory isn't directly addressable.  */
+            tcg_abort();
         } else {
-            tcg_out8(s, 0x00 | (r << 3) | rm);
+            /* Absolute address.  */
+            tcg_out_opc(s, opc, r, 0, 0);
+            tcg_out8(s, (r << 3) | 5);
+            tcg_out32(s, offset);
+            return;
         }
-    } else if ((int8_t)offset == offset) {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x44 | (r << 3));
-            tcg_out8(s, 0x24);
-        } else {
-            tcg_out8(s, 0x40 | (r << 3) | rm);
-        }
-        tcg_out8(s, offset);
+    }
+
+    /* Find the length of the immediate addend.  Note that the encoding
+       that would be used for (%ebp) indicates absolute addressing.  */
+    if (rm < 0) {
+        mod = 0, len = 4, rm = 5;
+    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
+        mod = 0, len = 0;
+    } else if (offset == (int8_t)offset) {
+        mod = 0x40, len = 1;
     } else {
-        if (rm == TCG_REG_ESP) {
-            tcg_out8(s, 0x84 | (r << 3));
-            tcg_out8(s, 0x24);
+        mod = 0x80, len = 4;
+    }
+
+    /* Use a single byte MODRM format if possible.  Note that the encoding
+       that would be used for %esp is the escape to the two byte form.  */
+    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
+        /* Single byte MODRM format.  */
+        tcg_out_opc(s, opc, r, rm, 0);
+        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+    } else {
+        /* Two byte MODRM+SIB format.  */
+
+        /* Note that the encoding that would place %esp into the index
+           field indicates no index register.  In 64-bit mode, the REX.X
+           bit counts, so %r12 can be used as the index.  */
+        if (index < 0) {
+            index = 4;
         } else {
-            tcg_out8(s, 0x80 | (r << 3) | rm);
+            assert(index != TCG_REG_ESP);
         }
+
+        tcg_out_opc(s, opc, r, rm, index);
+        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
+        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
+    }
+
+    if (len == 1) {
+        tcg_out8(s, offset);
+    } else if (len == 4) {
         tcg_out32(s, offset);
     }
 }
 
-static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
+/* A simplification of the above with no index or shift.  */
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
+                                        int rm, tcg_target_long offset)
 {
-    if (arg != ret)
-        tcg_out_modrm(s, 0x8b, ret, arg);
+    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
 }
 
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                                int ret, int32_t arg)
+/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+    /* Propagate an opcode prefix, such as P_REXW.  */
+    int ext = subop & ~0x7;
+    subop &= 0x7;
+
+    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
+{
+    if (arg != ret) {
+        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+        tcg_out_modrm(s, opc, ret, arg);
+    }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         int ret, tcg_target_long arg)
 {
     if (arg == 0) {
-        /* xor r0,r0 */
-        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
-    } else {
-        tcg_out8(s, 0xb8 + ret);
+        tgen_arithr(s, ARITH_XOR, ret, ret);
+        return;
+    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
+        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
         tcg_out32(s, arg);
+    } else if (arg == (int32_t)arg) {
+        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
+        tcg_out32(s, arg);
+    } else {
+        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+        tcg_out32(s, arg);
+        tcg_out32(s, arg >> 31 >> 1);
     }
 }
 
+static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
+{
+    if (val == (int8_t)val) {
+        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
+        tcg_out8(s, val);
+    } else if (val == (int32_t)val) {
+        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
+        tcg_out32(s, val);
+    } else {
+        tcg_abort();
+    }
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
                               int arg1, tcg_target_long arg2)
 {
-    /* movl */
-    tcg_out_modrm_offset(s, 0x8b, ret, arg1, arg2);
+    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
 }
 
 static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
                               int arg1, tcg_target_long arg2)
 {
-    /* movl */
-    tcg_out_modrm_offset(s, 0x89, arg, arg1, arg2);
+    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 }
 
-static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
+static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
 {
-    if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
-        /* inc */
-        tcg_out_opc(s, 0x40 + r0);
-    } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
-        /* dec */
-        tcg_out_opc(s, 0x48 + r0);
-    } else if (val == (int8_t)val) {
-        tcg_out_modrm(s, 0x83, c, r0);
-        tcg_out8(s, val);
-    } else if (c == ARITH_AND && val == 0xffu && r0 < 4) {
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT, r0, r0);
-    } else if (c == ARITH_AND && val == 0xffffu) {
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0);
+    /* Propagate an opcode prefix, such as P_DATA16.  */
+    int ext = subopc & ~0x7;
+    subopc &= 0x7;
+
+    if (count == 1) {
+        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
     } else {
-        tcg_out_modrm(s, 0x81, c, r0);
-        tcg_out32(s, val);
+        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
+        tcg_out8(s, count);
     }
 }
 
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_rolw_8(TCGContext *s, int reg)
+{
+    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
+{
+    /* movzbl */
+    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
+}
+
+static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
+{
+    /* movsbl */
+    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
+{
+    /* movzwl */
+    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
+{
+    /* movsw[lq] */
+    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
+}
+
+static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
+{
+    /* 32-bit mov zero extends.  */
+    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
+}
+
+static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
+{
+    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
+}
+
+static inline void tcg_out_bswap64(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static void tgen_arithi(TCGContext *s, int c, int r0,
+                        tcg_target_long val, int cf)
+{
+    int rexw = 0;
+
+    if (TCG_TARGET_REG_BITS == 64) {
+        rexw = c & -8;
+        c &= 7;
+    }
+
+    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+       partial flags update stalls on Pentium4 and are not recommended
+       by current Intel optimization manuals.  */
+    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+        int is_inc = (c == ARITH_ADD) ^ (val < 0);
+        if (TCG_TARGET_REG_BITS == 64) {
+            /* The single-byte increment encodings are re-tasked as the
+               REX prefixes.  Use the MODRM encoding.  */
+            tcg_out_modrm(s, OPC_GRP5 + rexw,
+                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
+        } else {
+            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
+        }
+        return;
+    }
+
+    if (c == ARITH_AND) {
+        if (TCG_TARGET_REG_BITS == 64) {
+            if (val == 0xffffffffu) {
+                tcg_out_ext32u(s, r0, r0);
+                return;
+            }
+            if (val == (uint32_t)val) {
+                /* AND with no high bits set can use a 32-bit operation.  */
+                rexw = 0;
+            }
+        }
+        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+            tcg_out_ext8u(s, r0, r0);
+            return;
+        }
+        if (val == 0xffffu) {
+            tcg_out_ext16u(s, r0, r0);
+            return;
+        }
+    }
+
+    if (val == (int8_t)val) {
+        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
+        tcg_out8(s, val);
+        return;
+    }
+    if (rexw == 0 || val == (int32_t)val) {
+        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
+        tcg_out32(s, val);
+        return;
+    }
+
+    tcg_abort();
+}
+
 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
 {
-    if (val != 0)
-        tgen_arithi(s, ARITH_ADD, reg, val, 0);
+    if (val != 0) {
+        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
+    }
 }
 
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+#undef small  /* for mingw build */
+
+/* Use SMALL != 0 to force a short forward branch.  */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
-    
+
     if (l->has_value) {
         val = l->u.value - (tcg_target_long)s->code_ptr;
         val1 = val - 2;
         if ((int8_t)val1 == val1) {
-            if (opc == -1)
-                tcg_out8(s, 0xeb);
-            else
-                tcg_out8(s, 0x70 + opc);
+            if (opc == -1) {
+                tcg_out8(s, OPC_JMP_short);
+            } else {
+                tcg_out8(s, OPC_JCC_short + opc);
+            }
             tcg_out8(s, val1);
         } else {
+            if (small) {
+                tcg_abort();
+            }
             if (opc == -1) {
-                tcg_out8(s, 0xe9);
+                tcg_out8(s, OPC_JMP_long);
                 tcg_out32(s, val - 5);
             } else {
-                tcg_out8(s, 0x0f);
-                tcg_out8(s, 0x80 + opc);
+                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
                 tcg_out32(s, val - 6);
             }
         }
+    } else if (small) {
+        if (opc == -1) {
+            tcg_out8(s, OPC_JMP_short);
+        } else {
+            tcg_out8(s, OPC_JCC_short + opc);
+        }
+        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+        s->code_ptr += 1;
     } else {
         if (opc == -1) {
-            tcg_out8(s, 0xe9);
+            tcg_out8(s, OPC_JMP_long);
         } else {
-            tcg_out8(s, 0x0f);
-            tcg_out8(s, 0x80 + opc);
+            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
         }
         tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
         s->code_ptr += 4;
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index)
+static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
+                        int const_arg2, int rexw)
 {
     if (const_arg2) {
         if (arg2 == 0) {
             /* test r, r */
-            tcg_out_modrm(s, 0x85, arg1, arg1);
+            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
         } else {
-            tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
+            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
         }
     } else {
-        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
+        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
     }
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
 }
 
+static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
+                             TCGArg arg1, TCGArg arg2, int const_arg2,
+                             int label_index, int small)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
+                             TCGArg arg1, TCGArg arg2, int const_arg2,
+                             int label_index, int small)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
+}
+#else
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
-                            const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+                            const int *const_args, int small)
 {
     int label_next;
     label_next = gen_new_label();
     switch(args[4]) {
     case TCG_COND_EQ:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
-        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+                         label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+                         args[5], small);
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
-        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+                         args[5], small);
+        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
+                         args[5], small);
         break;
     case TCG_COND_LT:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_LE:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_GT:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_GE:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_LTU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_LEU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_GTU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     case TCG_COND_GEU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+                         args[5], small);
+        tcg_out_jxx(s, JCC_JNE, label_next, 1);
+        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+                         args[5], small);
         break;
     default:
         tcg_abort();
     }
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
+#endif
+
+static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+    tcg_out_ext8u(s, dest, dest);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+    tcg_out_ext8u(s, dest, dest);
+}
+#else
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+                             const int *const_args)
+{
+    TCGArg new_args[6];
+    int label_true, label_over;
+
+    memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+    if (args[0] == args[1] || args[0] == args[2]
+        || (!const_args[3] && args[0] == args[3])
+        || (!const_args[4] && args[0] == args[4])) {
+        /* When the destination overlaps with one of the argument
+           registers, don't do anything tricky.  */
+        label_true = gen_new_label();
+        label_over = gen_new_label();
+
+        new_args[5] = label_true;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+        tcg_out_jxx(s, JCC_JMP, label_over, 1);
+        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    } else {
+        /* When the destination does not overlap one of the arguments,
+           clear the destination first, jump if cond false, and emit an
+           increment in the true case.  This results in smaller code.  */
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+        label_over = gen_new_label();
+        new_args[4] = tcg_invert_cond(new_args[4]);
+        new_args[5] = label_over;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    }
+}
+#endif
+
+static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
+{
+    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
+
+    if (disp == (int32_t)disp) {
+        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
+        tcg_out32(s, disp);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
+        tcg_out_modrm(s, OPC_GRP5,
+                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
+    }
+}
+
+static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
+{
+    tcg_out_branch(s, 1, dest);
+}
+
+static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
+{
+    tcg_out_branch(s, 0, dest);
+}
 
 #if defined(CONFIG_SOFTMMU)
 
@@ -437,11 +975,164 @@
     __stl_mmu,
     __stq_mmu,
 };
+
+/* Perform the TLB load and compare.
+
+   Inputs:
+   ADDRLO_IDX contains the index into ARGS of the low part of the
+   address; the high part of the address is at ADDR_LOW_IDX+1.
+
+   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+   This should be offsetof addr_read or addr_write.
+
+   Outputs:
+   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
+   positions of the displacements of forward jumps to the TLB miss case.
+
+   First argument register is loaded with the low part of the address.
+   In the TLB hit case, it has been adjusted as indicated by the TLB
+   and so is a host address.  In the TLB miss case, it continues to
+   hold a guest address.
+
+   Second argument register is clobbered.  */
+
+static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
+                                    int mem_index, int s_bits,
+                                    const TCGArg *args,
+                                    uint8_t **label_ptr, int which)
+{
+    const int addrlo = args[addrlo_idx];
+    const int r0 = tcg_target_call_iarg_regs[0];
+    const int r1 = tcg_target_call_iarg_regs[1];
+    TCGType type = TCG_TYPE_I32;
+    int rexw = 0;
+
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
+        type = TCG_TYPE_I64;
+        rexw = P_REXW;
+    }
+
+    tcg_out_mov(s, type, r1, addrlo);
+    tcg_out_mov(s, type, r0, addrlo);
+
+    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+    tgen_arithi(s, ARITH_AND + rexw, r0,
+                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+    tgen_arithi(s, ARITH_AND + rexw, r1,
+                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
+
+    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+                             offsetof(CPUState, tlb_table[mem_index][0])
+                             + which);
+
+    /* cmp 0(r1), r0 */
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+
+    tcg_out_mov(s, type, r0, addrlo);
+
+    /* jne label1 */
+    tcg_out8(s, OPC_JCC_short + JCC_JNE);
+    label_ptr[0] = s->code_ptr;
+    s->code_ptr++;
+
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        /* cmp 4(r1), addrhi */
+        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+
+        /* jne label1 */
+        tcg_out8(s, OPC_JCC_short + JCC_JNE);
+        label_ptr[1] = s->code_ptr;
+        s->code_ptr++;
+    }
+
+    /* TLB Hit.  */
+
+    /* add addend(r1), r0 */
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+                         offsetof(CPUTLBEntry, addend) - which);
+}
 #endif
 
-#ifndef CONFIG_USER_ONLY
-#define GUEST_BASE 0
+static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
+                                   int base, tcg_target_long ofs, int sizeop)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    const int bswap = 1;
+#else
+    const int bswap = 0;
 #endif
+    switch (sizeop) {
+    case 0:
+        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+        break;
+    case 0 | 4:
+        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+        break;
+    case 1:
+        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+        if (bswap) {
+            tcg_out_rolw_8(s, datalo);
+        }
+        break;
+    case 1 | 4:
+        if (bswap) {
+            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+            tcg_out_rolw_8(s, datalo);
+            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+        }
+        break;
+    case 2:
+        tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+        if (bswap) {
+            tcg_out_bswap32(s, datalo);
+        }
+        break;
+#if TCG_TARGET_REG_BITS == 64
+    case 2 | 4:
+        if (bswap) {
+            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+            tcg_out_bswap32(s, datalo);
+            tcg_out_ext32s(s, datalo, datalo);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+        }
+        break;
+#endif
+    case 3:
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+            if (bswap) {
+                tcg_out_bswap64(s, datalo);
+            }
+        } else {
+            if (bswap) {
+                int t = datalo;
+                datalo = datahi;
+                datahi = t;
+            }
+            if (base != datalo) {
+                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+            } else {
+                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+            }
+            if (bswap) {
+                tcg_out_bswap32(s, datalo);
+                tcg_out_bswap32(s, datahi);
+            }
+        }
+        break;
+    default:
+        tcg_abort();
+    }
+}
 
 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
    EAX. It will be useful once fixed registers globals are less
@@ -449,635 +1140,505 @@
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
+    int data_reg, data_reg2 = 0;
+    int addrlo_idx;
 #if defined(CONFIG_SOFTMMU)
-    uint8_t *label1_ptr, *label2_ptr;
-#endif
-#if TARGET_LONG_BITS == 64
-#if defined(CONFIG_SOFTMMU)
-    uint8_t *label3_ptr;
-#endif
-    int addr_reg2;
+    int mem_index, s_bits, arg_idx;
+    uint8_t *label_ptr[3];
 #endif
 
-    data_reg = *args++;
-    if (opc == 3)
-        data_reg2 = *args++;
-    else
-        data_reg2 = 0;
-    addr_reg = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_reg2 = *args++;
-#endif
-    mem_index = *args;
-    s_bits = opc & 3;
-
-    r0 = TCG_REG_EAX;
-    r1 = TCG_REG_EDX;
-
-#if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg); 
-
-    tcg_out_mov(s, r0, addr_reg); 
- 
-    tcg_out_modrm(s, 0xc1, 5, r1); /* shr $x, r1 */
-    tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 
-    
-    tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
-    tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-    tcg_out8(s, (5 << 3) | r1);
-    tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
-
-    /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
-    
-    tcg_out_mov(s, r0, addr_reg);
-    
-#if TARGET_LONG_BITS == 32
-    /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-#else
-    /* jne label3 */
-    tcg_out8(s, 0x70 + JCC_JNE);
-    label3_ptr = s->code_ptr;
-    s->code_ptr++;
-    
-    /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
-
-    /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-    
-    /* label3: */
-    *label3_ptr = s->code_ptr - label3_ptr - 1;
-#endif
-
-    /* XXX: move that code at the end of the TB */
-#if TARGET_LONG_BITS == 32
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EDX, mem_index);
-#else
-    tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-#endif
-    tcg_out8(s, 0xe8);
-    tcg_out32(s, (tcg_target_long)qemu_ld_helpers[s_bits] - 
-              (tcg_target_long)s->code_ptr - 4);
-
-    switch(opc) {
-    case 0 | 4:
-        /* movsbl */
-        tcg_out_modrm(s, 0xbe | P_EXT, data_reg, TCG_REG_EAX);
-        break;
-    case 1 | 4:
-        /* movswl */
-        tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX);
-        break;
-    case 0:
-        /* movzbl */
-        tcg_out_modrm(s, 0xb6 | P_EXT, data_reg, TCG_REG_EAX);
-        break;
-    case 1:
-        /* movzwl */
-        tcg_out_modrm(s, 0xb7 | P_EXT, data_reg, TCG_REG_EAX);
-        break;
-    case 2:
-    default:
-        tcg_out_mov(s, data_reg, TCG_REG_EAX);
-        break;
-    case 3:
-        if (data_reg == TCG_REG_EDX) {
-            tcg_out_opc(s, 0x90 + TCG_REG_EDX); /* xchg %edx, %eax */
-            tcg_out_mov(s, data_reg2, TCG_REG_EAX);
-        } else {
-            tcg_out_mov(s, data_reg, TCG_REG_EAX);
-            tcg_out_mov(s, data_reg2, TCG_REG_EDX);
-        }
-        break;
+    data_reg = args[0];
+    addrlo_idx = 1;
+    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+        data_reg2 = args[1];
+        addrlo_idx = 2;
     }
 
+#if defined(CONFIG_SOFTMMU)
+    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+    s_bits = opc & 3;
+
+    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
+                     label_ptr, offsetof(CPUTLBEntry, addr_read));
+
+    /* TLB Hit.  */
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
+                           tcg_target_call_iarg_regs[0], 0, opc);
+
     /* jmp label2 */
-    tcg_out8(s, 0xeb);
-    label2_ptr = s->code_ptr;
+    tcg_out8(s, OPC_JMP_short);
+    label_ptr[2] = s->code_ptr;
     s->code_ptr++;
-    
+
+    /* TLB Miss.  */
+
     /* label1: */
-    *label1_ptr = s->code_ptr - label1_ptr - 1;
+    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+    }
 
-    /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
-                         offsetof(CPUTLBEntry, addr_read));
-#else
-    r0 = addr_reg;
-#endif
+    /* XXX: move that code at the end of the TB */
+    /* The first argument is already loaded with addrlo.  */
+    arg_idx = 1;
+    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+                    args[addrlo_idx + 1]);
+    }
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
+                 mem_index);
+    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
     switch(opc) {
-    case 0:
-        /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, data_reg, r0, GUEST_BASE);
-        break;
     case 0 | 4:
-        /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, data_reg, r0, GUEST_BASE);
-        break;
-    case 1:
-        /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, GUEST_BASE);
-        if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66); 
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
-        }
+        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
         break;
     case 1 | 4:
-        /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, data_reg, r0, GUEST_BASE);
-        if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66); 
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
-
-            /* movswl data_reg, data_reg */
-            tcg_out_modrm(s, 0xbf | P_EXT, data_reg, data_reg);
-        }
+        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+        break;
+    case 0:
+        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
+        break;
+    case 1:
+        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
         break;
     case 2:
-        /* movl (r0), data_reg */
-        tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
-        if (bswap) {
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
-        }
+        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
         break;
+#if TCG_TARGET_REG_BITS == 64
+    case 2 | 4:
+        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+        break;
+#endif
     case 3:
-        /* XXX: could be nicer */
-        if (r0 == data_reg) {
-            r1 = TCG_REG_EDX;
-            if (r1 == data_reg)
-                r1 = TCG_REG_EAX;
-            tcg_out_mov(s, r1, r0);
-            r0 = r1;
-        }
-        if (!bswap) {
-            tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
-            tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4);
+        if (TCG_TARGET_REG_BITS == 64) {
+            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+        } else if (data_reg == TCG_REG_EDX) {
+            /* xchg %edx, %eax */
+            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
         } else {
-            tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4);
-            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
-
-            tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE);
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT);
+            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
         }
         break;
     default:
         tcg_abort();
     }
 
-#if defined(CONFIG_SOFTMMU)
     /* label2: */
-    *label2_ptr = s->code_ptr - label2_ptr - 1;
+    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+#else
+    {
+        int32_t offset = GUEST_BASE;
+        int base = args[addrlo_idx];
+
+        if (TCG_TARGET_REG_BITS == 64) {
+            /* ??? We assume all operations have left us with register
+               contents that are zero extended.  So far this appears to
+               be true.  If we want to enforce this, we can either do
+               an explicit zero-extension here, or (if GUEST_BASE == 0)
+               use the ADDR32 prefix.  For now, do nothing.  */
+
+            if (offset != GUEST_BASE) {
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+                base = TCG_REG_RDI, offset = 0;
+            }
+        }
+
+        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+    }
 #endif
 }
 
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
+                                   int base, tcg_target_long ofs, int sizeop)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    const int bswap = 1;
+#else
+    const int bswap = 0;
+#endif
+    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
+       we could perform the bswap twice to restore the original value
+       instead of moving to the scratch.  But as it is, the L constraint
+       means that the second argument reg is definitely free here.  */
+    int scratch = tcg_target_call_iarg_regs[1];
+
+    switch (sizeop) {
+    case 0:
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+        break;
+    case 1:
+        if (bswap) {
+            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+            tcg_out_rolw_8(s, scratch);
+            datalo = scratch;
+        }
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+        break;
+    case 2:
+        if (bswap) {
+            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+            tcg_out_bswap32(s, scratch);
+            datalo = scratch;
+        }
+        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+        break;
+    case 3:
+        if (TCG_TARGET_REG_BITS == 64) {
+            if (bswap) {
+                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
+                tcg_out_bswap64(s, scratch);
+                datalo = scratch;
+            }
+            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+        } else if (bswap) {
+            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
+            tcg_out_bswap32(s, scratch);
+            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+            tcg_out_bswap32(s, scratch);
+            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+        } else {
+            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+        }
+        break;
+    default:
+        tcg_abort();
+    }
+}
 
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
+    int data_reg, data_reg2 = 0;
+    int addrlo_idx;
 #if defined(CONFIG_SOFTMMU)
-    uint8_t *label1_ptr, *label2_ptr;
-#endif
-#if TARGET_LONG_BITS == 64
-#if defined(CONFIG_SOFTMMU)
-    uint8_t *label3_ptr;
-#endif
-    int addr_reg2;
+    int mem_index, s_bits;
+    int stack_adjust;
+    uint8_t *label_ptr[3];
 #endif
 
-    data_reg = *args++;
-    if (opc == 3)
-        data_reg2 = *args++;
-    else
-        data_reg2 = 0;
-    addr_reg = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_reg2 = *args++;
-#endif
-    mem_index = *args;
+    data_reg = args[0];
+    addrlo_idx = 1;
+    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+        data_reg2 = args[1];
+        addrlo_idx = 2;
+    }
 
+#if defined(CONFIG_SOFTMMU)
+    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
     s_bits = opc;
 
-    r0 = TCG_REG_EAX;
-    r1 = TCG_REG_EDX;
+    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
+                     label_ptr, offsetof(CPUTLBEntry, addr_write));
 
-#if defined(CONFIG_SOFTMMU)
-    tcg_out_mov(s, r1, addr_reg); 
+    /* TLB Hit.  */
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
+                           tcg_target_call_iarg_regs[0], 0, opc);
 
-    tcg_out_mov(s, r0, addr_reg); 
- 
-    tcg_out_modrm(s, 0xc1, 5, r1); /* shr $x, r1 */
-    tcg_out8(s, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); 
-    
-    tcg_out_modrm(s, 0x81, 4, r0); /* andl $x, r0 */
-    tcg_out32(s, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    
-    tcg_out_modrm(s, 0x81, 4, r1); /* andl $x, r1 */
-    tcg_out32(s, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    tcg_out_opc(s, 0x8d); /* lea offset(r1, %ebp), r1 */
-    tcg_out8(s, 0x80 | (r1 << 3) | 0x04);
-    tcg_out8(s, (5 << 3) | r1);
-    tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
-
-    /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
-    
-    tcg_out_mov(s, r0, addr_reg);
-    
-#if TARGET_LONG_BITS == 32
-    /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
-    label1_ptr = s->code_ptr;
+    /* jmp label2 */
+    tcg_out8(s, OPC_JMP_short);
+    label_ptr[2] = s->code_ptr;
     s->code_ptr++;
-#else
-    /* jne label3 */
-    tcg_out8(s, 0x70 + JCC_JNE);
-    label3_ptr = s->code_ptr;
-    s->code_ptr++;
-    
-    /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
 
-    /* je label1 */
-    tcg_out8(s, 0x70 + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-    
-    /* label3: */
-    *label3_ptr = s->code_ptr - label3_ptr - 1;
-#endif
+    /* TLB Miss.  */
+
+    /* label1: */
+    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+    }
 
     /* XXX: move that code at the end of the TB */
-#if TARGET_LONG_BITS == 32
-    if (opc == 3) {
-        tcg_out_mov(s, TCG_REG_EDX, data_reg);
-        tcg_out_mov(s, TCG_REG_ECX, data_reg2);
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 4);
-    } else {
-        switch(opc) {
-        case 0:
-            /* movzbl */
-            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_EDX, data_reg);
-            break;
-        case 1:
-            /* movzwl */
-            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_EDX, data_reg);
-            break;
-        case 2:
-            tcg_out_mov(s, TCG_REG_EDX, data_reg);
-            break;
-        }
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-    }
-#else
-    if (opc == 3) {
-        tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out_opc(s, 0x50 + data_reg2); /* push */
-        tcg_out_opc(s, 0x50 + data_reg); /* push */
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 12);
-    } else {
-        tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
-        switch(opc) {
-        case 0:
-            /* movzbl */
-            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_ECX, data_reg);
-            break;
-        case 1:
-            /* movzwl */
-            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_ECX, data_reg);
-            break;
-        case 2:
-            tcg_out_mov(s, TCG_REG_ECX, data_reg);
-            break;
-        }
-        tcg_out8(s, 0x6a); /* push Ib */
-        tcg_out8(s, mem_index);
-        tcg_out8(s, 0xe8);
-        tcg_out32(s, (tcg_target_long)qemu_st_helpers[s_bits] - 
-                  (tcg_target_long)s->code_ptr - 4);
-        tcg_out_addi(s, TCG_REG_ESP, 4);
-    }
-#endif
-    
-    /* jmp label2 */
-    tcg_out8(s, 0xeb);
-    label2_ptr = s->code_ptr;
-    s->code_ptr++;
-    
-    /* label1: */
-    *label1_ptr = s->code_ptr - label1_ptr - 1;
-
-    /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
-                         offsetof(CPUTLBEntry, addr_write));
-#else
-    r0 = addr_reg;
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 1;
-#else
-    bswap = 0;
-#endif
-    switch(opc) {
-    case 0:
-        /* movb */
-        tcg_out_modrm_offset(s, 0x88, data_reg, r0, GUEST_BASE);
-        break;
-    case 1:
-        if (bswap) {
-            tcg_out_mov(s, r1, data_reg);
-            tcg_out8(s, 0x66); /* rolw $8, %ecx */
-            tcg_out_modrm(s, 0xc1, 0, r1);
-            tcg_out8(s, 8);
-            data_reg = r1;
-        }
-        /* movw */
-        tcg_out8(s, 0x66);
-        tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
-        break;
-    case 2:
-        if (bswap) {
-            tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
-            data_reg = r1;
-        }
-        /* movl */
-        tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
-        break;
-    case 3:
-        if (bswap) {
-            tcg_out_mov(s, r1, data_reg2);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
-            tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE);
-            tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
-            tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                    TCG_REG_RSI, data_reg);
+        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
+        stack_adjust = 0;
+    } else if (TARGET_LONG_BITS == 32) {
+        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
+        if (opc == 3) {
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
+            tcg_out_pushi(s, mem_index);
+            stack_adjust = 4;
         } else {
-            tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
-            tcg_out_modrm_offset(s, 0x89, data_reg2, r0, GUEST_BASE + 4);
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
+            stack_adjust = 0;
         }
-        break;
-    default:
-        tcg_abort();
+    } else {
+        if (opc == 3) {
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
+            tcg_out_pushi(s, mem_index);
+            tcg_out_push(s, data_reg2);
+            tcg_out_push(s, data_reg);
+            stack_adjust = 12;
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
+            switch(opc) {
+            case 0:
+                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
+                break;
+            case 1:
+                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
+                break;
+            case 2:
+                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
+                break;
+            }
+            tcg_out_pushi(s, mem_index);
+            stack_adjust = 4;
+        }
     }
 
-#if defined(CONFIG_SOFTMMU)
+    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+
+    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
+        /* Pop and discard.  This is 2 bytes smaller than the add.  */
+        tcg_out_pop(s, TCG_REG_ECX);
+    } else if (stack_adjust != 0) {
+        tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+    }
+
     /* label2: */
-    *label2_ptr = s->code_ptr - label2_ptr - 1;
+    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+#else
+    {
+        int32_t offset = GUEST_BASE;
+        int base = args[addrlo_idx];
+
+        if (TCG_TARGET_REG_BITS == 64) {
+            /* ??? We assume all operations have left us with register
+               contents that are zero extended.  So far this appears to
+               be true.  If we want to enforce this, we can either do
+               an explicit zero-extension here, or (if GUEST_BASE == 0)
+               use the ADDR32 prefix.  For now, do nothing.  */
+
+            if (offset != GUEST_BASE) {
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+                base = TCG_REG_RDI, offset = 0;
+            }
+        }
+
+        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+    }
 #endif
 }
 
-static inline void tcg_out_op(TCGContext *s, int opc, 
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                               const TCGArg *args, const int *const_args)
 {
-    int c;
-    
+    int c, rexw = 0;
+
+#if TCG_TARGET_REG_BITS == 64
+# define OP_32_64(x) \
+        case glue(glue(INDEX_op_, x), _i64): \
+            rexw = P_REXW; /* FALLTHRU */    \
+        case glue(glue(INDEX_op_, x), _i32)
+#else
+# define OP_32_64(x) \
+        case glue(glue(INDEX_op_, x), _i32)
+#endif
+
     switch(opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_EAX, args[0]);
-        tcg_out8(s, 0xe9); /* jmp tb_ret_addr */
-        tcg_out32(s, tb_ret_addr - s->code_ptr - 4);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
+        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-            tcg_out8(s, 0xe9); /* jmp im */
+            tcg_out8(s, OPC_JMP_long); /* jmp im */
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
             tcg_out32(s, 0);
         } else {
             /* indirect jump method */
-            /* jmp Ev */
-            tcg_out_modrm_offset(s, 0xff, 4, -1, 
+            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
                                  (tcg_target_long)(s->tb_next + args[0]));
         }
         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
         break;
     case INDEX_op_call:
         if (const_args[0]) {
-            tcg_out8(s, 0xe8);
-            tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
+            tcg_out_calli(s, args[0]);
         } else {
-            tcg_out_modrm(s, 0xff, 2, args[0]);
+            /* call *reg */
+            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
         }
         break;
     case INDEX_op_jmp:
         if (const_args[0]) {
-            tcg_out8(s, 0xe9);
-            tcg_out32(s, args[0] - (tcg_target_long)s->code_ptr - 4);
+            tcg_out_jmp(s, args[0]);
         } else {
-            tcg_out_modrm(s, 0xff, 4, args[0]);
+            /* jmp *reg */
+            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
         }
         break;
     case INDEX_op_br:
-        tcg_out_jxx(s, JCC_JMP, args[0]);
+        tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
         break;
-    case INDEX_op_ld8u_i32:
-        /* movzbl */
-        tcg_out_modrm_offset(s, 0xb6 | P_EXT, args[0], args[1], args[2]);
+    OP_32_64(ld8u):
+        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
+        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
         break;
-    case INDEX_op_ld8s_i32:
-        /* movsbl */
-        tcg_out_modrm_offset(s, 0xbe | P_EXT, args[0], args[1], args[2]);
+    OP_32_64(ld8s):
+        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
         break;
-    case INDEX_op_ld16u_i32:
-        /* movzwl */
-        tcg_out_modrm_offset(s, 0xb7 | P_EXT, args[0], args[1], args[2]);
+    OP_32_64(ld16u):
+        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
+        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
         break;
-    case INDEX_op_ld16s_i32:
-        /* movswl */
-        tcg_out_modrm_offset(s, 0xbf | P_EXT, args[0], args[1], args[2]);
+    OP_32_64(ld16s):
+        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
         break;
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_ld32u_i64:
+#endif
     case INDEX_op_ld_i32:
-        /* movl */
-        tcg_out_modrm_offset(s, 0x8b, args[0], args[1], args[2]);
+        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
-    case INDEX_op_st8_i32:
-        /* movb */
-        tcg_out_modrm_offset(s, 0x88, args[0], args[1], args[2]);
+
+    OP_32_64(st8):
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+                             args[0], args[1], args[2]);
         break;
-    case INDEX_op_st16_i32:
-        /* movw */
-        tcg_out8(s, 0x66);
-        tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]);
+    OP_32_64(st16):
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+                             args[0], args[1], args[2]);
         break;
+#if TCG_TARGET_REG_BITS == 64
+    case INDEX_op_st32_i64:
+#endif
     case INDEX_op_st_i32:
-        /* movl */
-        tcg_out_modrm_offset(s, 0x89, args[0], args[1], args[2]);
+        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
-    case INDEX_op_sub_i32:
+
+    OP_32_64(add):
+        /* For 3-operand addition, use LEA.  */
+        if (args[0] != args[1]) {
+            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
+
+            if (const_args[2]) {
+                c3 = a2, a2 = -1;
+            } else if (a0 == a2) {
+                /* Watch out for dest = src + dest, since we've removed
+                   the matching constraint on the add.  */
+                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
+                break;
+            }
+
+            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
+            break;
+        }
+        c = ARITH_ADD;
+        goto gen_arith;
+    OP_32_64(sub):
         c = ARITH_SUB;
         goto gen_arith;
-    case INDEX_op_and_i32:
+    OP_32_64(and):
         c = ARITH_AND;
         goto gen_arith;
-    case INDEX_op_or_i32:
+    OP_32_64(or):
         c = ARITH_OR;
         goto gen_arith;
-    case INDEX_op_xor_i32:
+    OP_32_64(xor):
         c = ARITH_XOR;
         goto gen_arith;
-    case INDEX_op_add_i32:
-        c = ARITH_ADD;
     gen_arith:
         if (const_args[2]) {
-            tgen_arithi(s, c, args[0], args[2], 0);
+            tgen_arithi(s, c + rexw, args[0], args[2], 0);
         } else {
-            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
+            tgen_arithr(s, c + rexw, args[0], args[2]);
         }
         break;
-    case INDEX_op_mul_i32:
+
+    OP_32_64(mul):
         if (const_args[2]) {
             int32_t val;
             val = args[2];
             if (val == (int8_t)val) {
-                tcg_out_modrm(s, 0x6b, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
                 tcg_out8(s, val);
             } else {
-                tcg_out_modrm(s, 0x69, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
                 tcg_out32(s, val);
             }
         } else {
-            tcg_out_modrm(s, 0xaf | P_EXT, args[0], args[2]);
+            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
         }
         break;
-    case INDEX_op_mulu2_i32:
-        tcg_out_modrm(s, 0xf7, 4, args[3]);
+
+    OP_32_64(div2):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
         break;
-    case INDEX_op_div2_i32:
-        tcg_out_modrm(s, 0xf7, 7, args[4]);
+    OP_32_64(divu2):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
         break;
-    case INDEX_op_divu2_i32:
-        tcg_out_modrm(s, 0xf7, 6, args[4]);
-        break;
-    case INDEX_op_shl_i32:
+
+    OP_32_64(shl):
         c = SHIFT_SHL;
-    gen_shift32:
+        goto gen_shift;
+    OP_32_64(shr):
+        c = SHIFT_SHR;
+        goto gen_shift;
+    OP_32_64(sar):
+        c = SHIFT_SAR;
+        goto gen_shift;
+    OP_32_64(rotl):
+        c = SHIFT_ROL;
+        goto gen_shift;
+    OP_32_64(rotr):
+        c = SHIFT_ROR;
+        goto gen_shift;
+    gen_shift:
         if (const_args[2]) {
-            if (args[2] == 1) {
-                tcg_out_modrm(s, 0xd1, c, args[0]);
-            } else {
-                tcg_out_modrm(s, 0xc1, c, args[0]);
-                tcg_out8(s, args[2]);
-            }
+            tcg_out_shifti(s, c + rexw, args[0], args[2]);
         } else {
-            tcg_out_modrm(s, 0xd3, c, args[0]);
+            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
         }
         break;
-    case INDEX_op_shr_i32:
-        c = SHIFT_SHR;
-        goto gen_shift32;
-    case INDEX_op_sar_i32:
-        c = SHIFT_SAR;
-        goto gen_shift32;
-    case INDEX_op_rotl_i32:
-        c = SHIFT_ROL;
-        goto gen_shift32;
-    case INDEX_op_rotr_i32:
-        c = SHIFT_ROR;
-        goto gen_shift32;
 
-    case INDEX_op_add2_i32:
-        if (const_args[4]) 
-            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
-        if (const_args[5]) 
-            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
-        break;
-    case INDEX_op_sub2_i32:
-        if (const_args[4]) 
-            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
-        if (const_args[5]) 
-            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
-        else
-            tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
-        break;
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
+                         args[3], 0);
         break;
-    case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args);
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond32(s, args[3], args[0], args[1],
+                          args[2], const_args[2]);
         break;
 
-    case INDEX_op_bswap16_i32:
-        tcg_out8(s, 0x66);
-        tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
-        tcg_out8(s, 8);
+    OP_32_64(bswap16):
+        tcg_out_rolw_8(s, args[0]);
         break;
-    case INDEX_op_bswap32_i32:
-        tcg_out_opc(s, (0xc8 + args[0]) | P_EXT);
+    OP_32_64(bswap32):
+        tcg_out_bswap32(s, args[0]);
         break;
 
-    case INDEX_op_neg_i32:
-        tcg_out_modrm(s, 0xf7, 3, args[0]);
+    OP_32_64(neg):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
+        break;
+    OP_32_64(not):
+        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
         break;
 
-    case INDEX_op_not_i32:
-        tcg_out_modrm(s, 0xf7, 2, args[0]);
+    OP_32_64(ext8s):
+        tcg_out_ext8s(s, args[0], args[1], rexw);
         break;
-
-    case INDEX_op_ext8s_i32:
-        tcg_out_modrm(s, 0xbe | P_EXT, args[0], args[1]);
+    OP_32_64(ext16s):
+        tcg_out_ext16s(s, args[0], args[1], rexw);
         break;
-    case INDEX_op_ext16s_i32:
-        tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
+    OP_32_64(ext8u):
+        tcg_out_ext8u(s, args[0], args[1]);
         break;
-    case INDEX_op_ext8u_i32:
-        tcg_out_modrm(s, 0xb6 | P_EXT, args[0], args[1]);
-        break;
-    case INDEX_op_ext16u_i32:
-        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
+    OP_32_64(ext16u):
+        tcg_out_ext16u(s, args[0], args[1]);
         break;
 
     case INDEX_op_qemu_ld8u:
@@ -1092,13 +1653,16 @@
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld(s, args, 1 | 4);
         break;
+#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_qemu_ld32u:
+#endif
+    case INDEX_op_qemu_ld32:
         tcg_out_qemu_ld(s, args, 2);
         break;
     case INDEX_op_qemu_ld64:
         tcg_out_qemu_ld(s, args, 3);
         break;
-        
+
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, 0);
         break;
@@ -1112,9 +1676,82 @@
         tcg_out_qemu_st(s, args, 3);
         break;
 
+#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_brcond2_i32:
+        tcg_out_brcond2(s, args, const_args, 0);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args, const_args);
+        break;
+    case INDEX_op_mulu2_i32:
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
+        break;
+    case INDEX_op_add2_i32:
+        if (const_args[4]) {
+            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
+        } else {
+            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
+        }
+        if (const_args[5]) {
+            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
+        } else {
+            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
+        }
+        break;
+    case INDEX_op_sub2_i32:
+        if (const_args[4]) {
+            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
+        } else {
+            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
+        }
+        if (const_args[5]) {
+            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
+        } else {
+            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
+        }
+        break;
+#else /* TCG_TARGET_REG_BITS == 64 */
+    case INDEX_op_movi_i64:
+        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
+        break;
+    case INDEX_op_ld32s_i64:
+        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_ld_i64:
+        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_st_i64:
+        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_qemu_ld32s:
+        tcg_out_qemu_ld(s, args, 2 | 4);
+        break;
+
+    case INDEX_op_brcond_i64:
+        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
+                         args[3], 0);
+        break;
+    case INDEX_op_setcond_i64:
+        tcg_out_setcond64(s, args[3], args[0], args[1],
+                          args[2], const_args[2]);
+        break;
+
+    case INDEX_op_bswap64_i64:
+        tcg_out_bswap64(s, args[0]);
+        break;
+    case INDEX_op_ext32u_i64:
+        tcg_out_ext32u(s, args[0], args[1]);
+        break;
+    case INDEX_op_ext32s_i64:
+        tcg_out_ext32s(s, args[0], args[1]);
+        break;
+#endif
+
     default:
         tcg_abort();
     }
+
+#undef OP_32_64
 }
 
 static const TCGTargetOpDef x86_op_defs[] = {
@@ -1134,10 +1771,9 @@
     { INDEX_op_st16_i32, { "r", "r" } },
     { INDEX_op_st_i32, { "r", "r" } },
 
-    { INDEX_op_add_i32, { "r", "0", "ri" } },
+    { INDEX_op_add_i32, { "r", "r", "ri" } },
     { INDEX_op_sub_i32, { "r", "0", "ri" } },
     { INDEX_op_mul_i32, { "r", "0", "ri" } },
-    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
     { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
     { INDEX_op_and_i32, { "r", "0", "ri" } },
@@ -1152,10 +1788,6 @@
 
     { INDEX_op_brcond_i32, { "r", "ri" } },
 
-    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
-    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
-    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
-
     { INDEX_op_bswap16_i32, { "r", "0" } },
     { INDEX_op_bswap32_i32, { "r", "0" } },
 
@@ -1165,15 +1797,84 @@
 
     { INDEX_op_ext8s_i32, { "r", "q" } },
     { INDEX_op_ext16s_i32, { "r", "r" } },
-    { INDEX_op_ext8u_i32, { "r", "q"} },
-    { INDEX_op_ext16u_i32, { "r", "r"} },
+    { INDEX_op_ext8u_i32, { "r", "q" } },
+    { INDEX_op_ext16u_i32, { "r", "r" } },
 
-#if TARGET_LONG_BITS == 32
+    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+
+#if TCG_TARGET_REG_BITS == 32
+    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
+    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
+    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+#else
+    { INDEX_op_mov_i64, { "r", "r" } },
+    { INDEX_op_movi_i64, { "r" } },
+    { INDEX_op_ld8u_i64, { "r", "r" } },
+    { INDEX_op_ld8s_i64, { "r", "r" } },
+    { INDEX_op_ld16u_i64, { "r", "r" } },
+    { INDEX_op_ld16s_i64, { "r", "r" } },
+    { INDEX_op_ld32u_i64, { "r", "r" } },
+    { INDEX_op_ld32s_i64, { "r", "r" } },
+    { INDEX_op_ld_i64, { "r", "r" } },
+    { INDEX_op_st8_i64, { "r", "r" } },
+    { INDEX_op_st16_i64, { "r", "r" } },
+    { INDEX_op_st32_i64, { "r", "r" } },
+    { INDEX_op_st_i64, { "r", "r" } },
+
+    { INDEX_op_add_i64, { "r", "0", "re" } },
+    { INDEX_op_mul_i64, { "r", "0", "re" } },
+    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
+    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
+    { INDEX_op_sub_i64, { "r", "0", "re" } },
+    { INDEX_op_and_i64, { "r", "0", "reZ" } },
+    { INDEX_op_or_i64, { "r", "0", "re" } },
+    { INDEX_op_xor_i64, { "r", "0", "re" } },
+
+    { INDEX_op_shl_i64, { "r", "0", "ci" } },
+    { INDEX_op_shr_i64, { "r", "0", "ci" } },
+    { INDEX_op_sar_i64, { "r", "0", "ci" } },
+    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
+    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
+
+    { INDEX_op_brcond_i64, { "r", "re" } },
+    { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+    { INDEX_op_bswap16_i64, { "r", "0" } },
+    { INDEX_op_bswap32_i64, { "r", "0" } },
+    { INDEX_op_bswap64_i64, { "r", "0" } },
+    { INDEX_op_neg_i64, { "r", "0" } },
+    { INDEX_op_not_i64, { "r", "0" } },
+
+    { INDEX_op_ext8s_i64, { "r", "r" } },
+    { INDEX_op_ext16s_i64, { "r", "r" } },
+    { INDEX_op_ext32s_i64, { "r", "r" } },
+    { INDEX_op_ext8u_i64, { "r", "r" } },
+    { INDEX_op_ext16u_i64, { "r", "r" } },
+    { INDEX_op_ext32u_i64, { "r", "r" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
     { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32s, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
     { INDEX_op_qemu_ld64, { "r", "r", "L" } },
 
     { INDEX_op_qemu_st8, { "cb", "L" } },
@@ -1185,7 +1886,7 @@
     { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
     { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
 
     { INDEX_op_qemu_st8, { "cb", "L", "L" } },
@@ -1197,64 +1898,85 @@
 };
 
 static int tcg_target_callee_save_regs[] = {
-    /*    TCG_REG_EBP, */ /* currently used for the global env, so no
-                             need to save */
+#if TCG_TARGET_REG_BITS == 64
+    TCG_REG_RBP,
+    TCG_REG_RBX,
+    TCG_REG_R12,
+    TCG_REG_R13,
+    /* TCG_REG_R14, */ /* Currently used for the global env. */
+    TCG_REG_R15,
+#else
+    /* TCG_REG_EBP, */ /* Currently used for the global env. */
     TCG_REG_EBX,
     TCG_REG_ESI,
     TCG_REG_EDI,
+#endif
 };
 
-static inline void tcg_out_push(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, 0x50 + reg);
-}
-
-static inline void tcg_out_pop(TCGContext *s, int reg)
-{
-    tcg_out_opc(s, 0x58 + reg);
-}
-
 /* Generate global QEMU prologue and epilogue code */
-void tcg_target_qemu_prologue(TCGContext *s)
+static void tcg_target_qemu_prologue(TCGContext *s)
 {
     int i, frame_size, push_size, stack_addend;
-    
+
     /* TB prologue */
-    /* save all callee saved registers */
-    for(i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+
+    /* Save all callee saved registers.  */
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
         tcg_out_push(s, tcg_target_callee_save_regs[i]);
     }
-    /* reserve some stack space */
-    push_size = 4 + ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
+
+    /* Reserve some stack space.  */
+    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
+    push_size *= TCG_TARGET_REG_BITS / 8;
+
     frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
-    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & 
+    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
         ~(TCG_TARGET_STACK_ALIGN - 1);
     stack_addend = frame_size - push_size;
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
-    tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
-    
+    /* jmp *tb.  */
+    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
+
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
+
     tcg_out_addi(s, TCG_REG_ESP, stack_addend);
-    for(i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
+
+    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
     }
-    tcg_out8(s, 0xc3); /* ret */
+    tcg_out_opc(s, OPC_RET, 0, 0, 0);
 }
 
-void tcg_target_init(TCGContext *s)
+static void tcg_target_init(TCGContext *s)
 {
+#if !defined(CONFIG_USER_ONLY)
     /* fail safe */
     if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
         tcg_abort();
+#endif
 
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
-    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
-                     (1 << TCG_REG_EAX) | 
-                     (1 << TCG_REG_EDX) | 
-                     (1 << TCG_REG_ECX));
-    
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
+        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
+    } else {
+        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
+    }
+
+    tcg_regset_clear(tcg_target_call_clobber_regs);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+    }
+
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index f97034c..a869cf5 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -23,10 +23,18 @@
  */
 #define TCG_TARGET_I386 1
 
+#if defined(__x86_64__)
+# define TCG_TARGET_REG_BITS 64
+#else
 #define TCG_TARGET_REG_BITS 32
+#endif
 //#define TCG_TARGET_WORDS_BIGENDIAN
 
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_TARGET_NB_REGS 16
+#else
 #define TCG_TARGET_NB_REGS 8
+#endif
 
 enum {
     TCG_REG_EAX = 0,
@@ -37,14 +45,37 @@
     TCG_REG_EBP,
     TCG_REG_ESI,
     TCG_REG_EDI,
+
+    /* 64-bit registers; always define the symbols to avoid
+       too much if-deffing.  */
+    TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
+    TCG_REG_R11,
+    TCG_REG_R12,
+    TCG_REG_R13,
+    TCG_REG_R14,
+    TCG_REG_R15,
+    TCG_REG_RAX = TCG_REG_EAX,
+    TCG_REG_RCX = TCG_REG_ECX,
+    TCG_REG_RDX = TCG_REG_EDX,
+    TCG_REG_RBX = TCG_REG_EBX,
+    TCG_REG_RSP = TCG_REG_ESP,
+    TCG_REG_RBP = TCG_REG_EBP,
+    TCG_REG_RSI = TCG_REG_ESI,
+    TCG_REG_RDI = TCG_REG_EDI,
 };
 
+#define TCG_CT_CONST_S32 0x100
+#define TCG_CT_CONST_U32 0x200
+
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_ESP 
 #define TCG_TARGET_STACK_ALIGN 16
 #define TCG_TARGET_CALL_STACK_OFFSET 0
 
 /* optional instructions */
+#define TCG_TARGET_HAS_div2_i32
 #define TCG_TARGET_HAS_rot_i32
 #define TCG_TARGET_HAS_ext8s_i32
 #define TCG_TARGET_HAS_ext16s_i32
@@ -56,13 +87,43 @@
 #define TCG_TARGET_HAS_not_i32
 // #define TCG_TARGET_HAS_andc_i32
 // #define TCG_TARGET_HAS_orc_i32
+// #define TCG_TARGET_HAS_eqv_i32
+// #define TCG_TARGET_HAS_nand_i32
+// #define TCG_TARGET_HAS_nor_i32
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_div2_i64
+#define TCG_TARGET_HAS_rot_i64
+#define TCG_TARGET_HAS_ext8s_i64
+#define TCG_TARGET_HAS_ext16s_i64
+#define TCG_TARGET_HAS_ext32s_i64
+#define TCG_TARGET_HAS_ext8u_i64
+#define TCG_TARGET_HAS_ext16u_i64
+#define TCG_TARGET_HAS_ext32u_i64
+#define TCG_TARGET_HAS_bswap16_i64
+#define TCG_TARGET_HAS_bswap32_i64
+#define TCG_TARGET_HAS_bswap64_i64
+#define TCG_TARGET_HAS_neg_i64
+#define TCG_TARGET_HAS_not_i64
+// #define TCG_TARGET_HAS_andc_i64
+// #define TCG_TARGET_HAS_orc_i64
+// #define TCG_TARGET_HAS_eqv_i64
+// #define TCG_TARGET_HAS_nand_i64
+// #define TCG_TARGET_HAS_nor_i64
+#endif
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
 /* Note: must be synced with dyngen-exec.h */
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_AREG0 TCG_REG_R14
+#define TCG_AREG1 TCG_REG_R15
+#define TCG_AREG2 TCG_REG_R12
+#else
 #define TCG_AREG0 TCG_REG_EBP
 #define TCG_AREG1 TCG_REG_EBX
 #define TCG_AREG2 TCG_REG_ESI
+#endif
 
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
 {
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 96cc461..7970268 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -36,11 +36,6 @@
 #endif
 
 #define FAST_PATH
-#if TARGET_PHYS_ADDR_BITS <= 32
-#define ADDEND_OFFSET 0
-#else
-#define ADDEND_OFFSET 4
-#endif
 
 #ifndef GUEST_BASE
 #define GUEST_BASE 0
@@ -56,7 +51,7 @@
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "r0",
     "r1",
-    "rp",
+    "r2",
     "r3",
     "r4",
     "r5",
@@ -328,10 +323,12 @@
 #define MULLI  OPCD( 7)
 #define CMPLI  OPCD(10)
 #define CMPI   OPCD(11)
+#define SUBFIC OPCD( 8)
 
 #define LWZU   OPCD(33)
 #define STWU   OPCD(37)
 
+#define RLWIMI OPCD(20)
 #define RLWINM OPCD(21)
 #define RLWNM  OPCD(23)
 
@@ -373,6 +370,8 @@
 #define NOR    XO31(124)
 #define ANDC   XO31( 60)
 #define ORC    XO31(412)
+#define EQV    XO31(284)
+#define NAND   XO31(476)
 
 #define LBZX   XO31( 87)
 #define LHZX   XO31(279)
@@ -438,7 +437,7 @@
     [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE,
 };
 
-static void tcg_out_mov(TCGContext *s, int ret, int arg)
+static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
 {
     tcg_out32 (s, OR | SAB (arg, ret, arg));
 }
@@ -592,11 +591,11 @@
 
     /* slow path */
 #if TARGET_LONG_BITS == 32
-    tcg_out_mov (s, 3, addr_reg);
+    tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I32, 4, mem_index);
 #else
-    tcg_out_mov (s, 3, addr_reg2);
-    tcg_out_mov (s, 4, addr_reg);
+    tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg2);
+    tcg_out_mov (s, TCG_TYPE_I32, 4, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif
 
@@ -612,23 +611,23 @@
     case 1:
     case 2:
         if (data_reg != 3)
-            tcg_out_mov (s, data_reg, 3);
+            tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
         break;
     case 3:
         if (data_reg == 3) {
             if (data_reg2 == 4) {
-                tcg_out_mov (s, 0, 4);
-                tcg_out_mov (s, 4, 3);
-                tcg_out_mov (s, 3, 0);
+                tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
+                tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
+                tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
             }
             else {
-                tcg_out_mov (s, data_reg2, 3);
-                tcg_out_mov (s, 3, 4);
+                tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
+                tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
             }
         }
         else {
-            if (data_reg != 4) tcg_out_mov (s, data_reg, 4);
-            if (data_reg2 != 3) tcg_out_mov (s, data_reg2, 3);
+            if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
+            if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
         }
         break;
     }
@@ -644,7 +643,7 @@
     tcg_out32 (s, (LWZ
                    | RT (r0)
                    | RA (r0)
-                   | (ADDEND_OFFSET + offsetof (CPUTLBEntry, addend)
+                   | (offsetof (CPUTLBEntry, addend)
                       - offsetof (CPUTLBEntry, addr_read))
                    ));
     /* r0 = env->tlb_table[mem_index][index].addend */
@@ -706,7 +705,7 @@
             if (r0 == data_reg2) {
                 tcg_out32 (s, LWZ | RT (0) | RA (r0));
                 tcg_out32 (s, LWZ | RT (data_reg) | RA (r0) | 4);
-                tcg_out_mov (s, data_reg2, 0);
+                tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 0);
             }
             else {
                 tcg_out32 (s, LWZ | RT (data_reg2) | RA (r0));
@@ -788,11 +787,11 @@
 
     /* slow path */
 #if TARGET_LONG_BITS == 32
-    tcg_out_mov (s, 3, addr_reg);
+    tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg);
     ir = 4;
 #else
-    tcg_out_mov (s, 3, addr_reg2);
-    tcg_out_mov (s, 4, addr_reg);
+    tcg_out_mov (s, TCG_TYPE_I32, 3, addr_reg2);
+    tcg_out_mov (s, TCG_TYPE_I32, 4, addr_reg);
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
     ir = 5;
 #else
@@ -818,14 +817,14 @@
                        | ME (31)));
         break;
     case 2:
-        tcg_out_mov (s, ir, data_reg);
+        tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
         break;
     case 3:
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
         ir = 5;
 #endif
-        tcg_out_mov (s, ir++, data_reg2);
-        tcg_out_mov (s, ir, data_reg);
+        tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2);
+        tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
         break;
     }
     ir++;
@@ -843,7 +842,7 @@
     tcg_out32 (s, (LWZ
                    | RT (r0)
                    | RA (r0)
-                   | (ADDEND_OFFSET + offsetof (CPUTLBEntry, addend)
+                   | (offsetof (CPUTLBEntry, addend)
                       - offsetof (CPUTLBEntry, addr_write))
                    ));
     /* r0 = env->tlb_table[mem_index][index].addend */
@@ -901,7 +900,7 @@
 #endif
 }
 
-void tcg_target_qemu_prologue (TCGContext *s)
+static void tcg_target_qemu_prologue (TCGContext *s)
 {
     int i, frame_size;
 
@@ -934,7 +933,10 @@
     tcg_out32 (s, STW | RS (0) | RA (1) | (frame_size + LR_OFFSET));
 
 #ifdef CONFIG_USE_GUEST_BASE
-    tcg_out_movi (s, TCG_TYPE_I32, TCG_GUEST_BASE_REG, GUEST_BASE);
+    if (GUEST_BASE) {
+        tcg_out_movi (s, TCG_TYPE_I32, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
 #endif
 
     tcg_out32 (s, MTSPR | RS (3) | CTR);
@@ -1076,7 +1078,8 @@
 static void tcg_out_cr7eq_from_cond (TCGContext *s, const TCGArg *args,
                                      const int *const_args)
 {
-    int cond = args[4], op;
+    TCGCond cond = args[4];
+    int op;
     struct { int bit1; int bit2; int cond2; } bits[] = {
         [TCG_COND_LT ] = { CR_LT, CR_LT, TCG_COND_LT  },
         [TCG_COND_LE ] = { CR_LT, CR_GT, TCG_COND_LT  },
@@ -1106,9 +1109,9 @@
     case TCG_COND_GEU:
         op = (b->bit1 != b->bit2) ? CRANDC : CRAND;
         tcg_out_cmp (s, b->cond2, args[1], args[3], const_args[3], 5);
-        tcg_out_cmp (s, TCG_COND_EQ, args[1], args[3], const_args[3], 6);
-        tcg_out_cmp (s, cond, args[0], args[2], const_args[2], 7);
-        tcg_out32 (s, op | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, b->bit2));
+        tcg_out_cmp (s, tcg_unsigned_cond (cond), args[0], args[2],
+                     const_args[2], 7);
+        tcg_out32 (s, op | BT (7, CR_EQ) | BA (5, CR_EQ) | BB (7, b->bit2));
         tcg_out32 (s, CROR | BT (7, CR_EQ) | BA (5, b->bit1) | BB (7, CR_EQ));
         break;
     default:
@@ -1116,7 +1119,7 @@
     }
 }
 
-static void tcg_out_setcond (TCGContext *s, int cond, TCGArg arg0,
+static void tcg_out_setcond (TCGContext *s, TCGCond cond, TCGArg arg0,
                              TCGArg arg1, TCGArg arg2, int const_arg2)
 {
     int crop, sh, arg;
@@ -1240,7 +1243,7 @@
         );
 }
 
-static void tcg_out_brcond (TCGContext *s, int cond,
+static void tcg_out_brcond (TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
                             int label_index)
 {
@@ -1288,7 +1291,7 @@
     flush_icache_range(jmp_addr, jmp_addr + patch_size);
 }
 
-static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        const int *const_args)
 {
     switch (opc) {
@@ -1478,6 +1481,15 @@
     case INDEX_op_orc_i32:
         tcg_out32 (s, ORC | SAB (args[1], args[0], args[2]));
         break;
+    case INDEX_op_eqv_i32:
+        tcg_out32 (s, EQV | SAB (args[1], args[0], args[2]));
+        break;
+    case INDEX_op_nand_i32:
+        tcg_out32 (s, NAND | SAB (args[1], args[0], args[2]));
+        break;
+    case INDEX_op_nor_i32:
+        tcg_out32 (s, NOR | SAB (args[1], args[0], args[2]));
+        break;
 
     case INDEX_op_mul_i32:
         if (const_args[2]) {
@@ -1517,7 +1529,7 @@
         if (args[0] == args[2] || args[0] == args[3]) {
             tcg_out32 (s, MULLW | TAB (0, args[2], args[3]));
             tcg_out32 (s, MULHWU | TAB (args[1], args[2], args[3]));
-            tcg_out_mov (s, args[0], 0);
+            tcg_out_mov (s, TCG_TYPE_I32, args[0], 0);
         }
         else {
             tcg_out32 (s, MULLW | TAB (args[0], args[2], args[3]));
@@ -1575,7 +1587,7 @@
     case INDEX_op_rotr_i32:
         if (const_args[2]) {
             if (!args[2]) {
-                tcg_out_mov (s, args[0], args[1]);
+                tcg_out_mov (s, TCG_TYPE_I32, args[0], args[1]);
             }
             else {
                 tcg_out32 (s, RLWINM
@@ -1588,7 +1600,7 @@
             }
         }
         else {
-            tcg_out32 (s, ADDI | RT (0) | RA (args[2]) | 0xffe0);
+            tcg_out32 (s, SUBFIC | RT (0) | RA (args[2]) | 32);
             tcg_out32 (s, RLWNM
                        | RA (args[0])
                        | RS (args[1])
@@ -1603,7 +1615,7 @@
         if (args[0] == args[3] || args[0] == args[5]) {
             tcg_out32 (s, ADDC | TAB (0, args[2], args[4]));
             tcg_out32 (s, ADDE | TAB (args[1], args[3], args[5]));
-            tcg_out_mov (s, args[0], 0);
+            tcg_out_mov (s, TCG_TYPE_I32, args[0], 0);
         }
         else {
             tcg_out32 (s, ADDC | TAB (args[0], args[2], args[4]));
@@ -1614,7 +1626,7 @@
         if (args[0] == args[3] || args[0] == args[5]) {
             tcg_out32 (s, SUBFC | TAB (0, args[4], args[2]));
             tcg_out32 (s, SUBFE | TAB (args[1], args[5], args[3]));
-            tcg_out_mov (s, args[0], 0);
+            tcg_out_mov (s, TCG_TYPE_I32, args[0], 0);
         }
         else {
             tcg_out32 (s, SUBFC | TAB (args[0], args[4], args[2]));
@@ -1641,7 +1653,7 @@
         break;
 
     case INDEX_op_not_i32:
-        tcg_out32 (s, NOR | SAB (args[1], args[0], args[0]));
+        tcg_out32 (s, NOR | SAB (args[1], args[0], args[1]));
         break;
 
     case INDEX_op_qemu_ld8u:
@@ -1656,7 +1668,7 @@
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld(s, args, 1 | 4);
         break;
-    case INDEX_op_qemu_ld32u:
+    case INDEX_op_qemu_ld32:
         tcg_out_qemu_ld(s, args, 2);
         break;
     case INDEX_op_qemu_ld64:
@@ -1707,6 +1719,77 @@
         tcg_out_setcond2 (s, args, const_args);
         break;
 
+    case INDEX_op_bswap16_i32:
+        /* Stolen from gcc's builtin_bswap16 */
+
+        /* a1 = abcd */
+
+        /* r0 = (a1 << 8) & 0xff00 # 00d0 */
+        tcg_out32 (s, RLWINM
+                   | RA (0)
+                   | RS (args[1])
+                   | SH (8)
+                   | MB (16)
+                   | ME (23)
+            );
+
+        /* a0 = rotate_left (a1, 24) & 0xff # 000c */
+        tcg_out32 (s, RLWINM
+                   | RA (args[0])
+                   | RS (args[1])
+                   | SH (24)
+                   | MB (24)
+                   | ME (31)
+            );
+
+        /* a0 = a0 | r0 # 00dc */
+        tcg_out32 (s, OR | SAB (0, args[0], args[0]));
+        break;
+
+    case INDEX_op_bswap32_i32:
+        /* Stolen from gcc's builtin_bswap32 */
+        {
+            int a0 = args[0];
+
+            /* a1 = args[1] # abcd */
+
+            if (a0 == args[1]) {
+                a0 = 0;
+            }
+
+            /* a0 = rotate_left (a1, 8) # bcda */
+            tcg_out32 (s, RLWINM
+                       | RA (a0)
+                       | RS (args[1])
+                       | SH (8)
+                       | MB (0)
+                       | ME (31)
+                );
+
+            /* a0 = (a0 & ~0xff000000) | ((a1 << 24) & 0xff000000) # dcda */
+            tcg_out32 (s, RLWIMI
+                       | RA (a0)
+                       | RS (args[1])
+                       | SH (24)
+                       | MB (0)
+                       | ME (7)
+                );
+
+            /* a0 = (a0 & ~0x0000ff00) | ((a1 << 24) & 0x0000ff00) # dcba */
+            tcg_out32 (s, RLWIMI
+                       | RA (a0)
+                       | RS (args[1])
+                       | SH (24)
+                       | MB (16)
+                       | ME (23)
+                );
+
+            if (!a0) {
+                tcg_out_mov (s, TCG_TYPE_I32, args[0], a0);
+            }
+        }
+        break;
+
     default:
         tcg_dump_ops (s, stderr);
         tcg_abort ();
@@ -1761,16 +1844,22 @@
 
     { INDEX_op_andc_i32, { "r", "r", "r" } },
     { INDEX_op_orc_i32, { "r", "r", "r" } },
+    { INDEX_op_eqv_i32, { "r", "r", "r" } },
+    { INDEX_op_nand_i32, { "r", "r", "r" } },
+    { INDEX_op_nor_i32, { "r", "r", "r" } },
 
     { INDEX_op_setcond_i32, { "r", "r", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
 
+    { INDEX_op_bswap16_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "r", "r" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
     { INDEX_op_qemu_ld64, { "r", "r", "L" } },
 
     { INDEX_op_qemu_st8, { "K", "K" } },
@@ -1782,7 +1871,7 @@
     { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
     { INDEX_op_qemu_ld64, { "r", "L", "L", "L" } },
 
     { INDEX_op_qemu_st8, { "K", "K", "K" } },
@@ -1799,7 +1888,7 @@
     { -1 },
 };
 
-void tcg_target_init(TCGContext *s)
+static void tcg_target_init(TCGContext *s)
 {
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
     tcg_regset_set32(tcg_target_call_clobber_regs, 0,
@@ -1828,9 +1917,6 @@
 #ifdef _CALL_SYSV
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13);
 #endif
-#ifdef CONFIG_USE_GUEST_BASE
-    tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
-#endif
 
     tcg_add_target_add_op_defs(ppc_op_defs);
 }
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 0c71a11..a1f8599 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -65,7 +65,7 @@
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_R1
 #define TCG_TARGET_STACK_ALIGN 16
-#if defined _CALL_DARWIN
+#if defined _CALL_DARWIN || defined __APPLE__
 #define TCG_TARGET_CALL_STACK_OFFSET 24
 #elif defined _CALL_AIX
 #define TCG_TARGET_CALL_STACK_OFFSET 52
@@ -83,15 +83,16 @@
 #define TCG_TARGET_HAS_ext16s_i32
 #define TCG_TARGET_HAS_ext8u_i32
 #define TCG_TARGET_HAS_ext16u_i32
-/* #define TCG_TARGET_HAS_bswap16_i32 */
-/* #define TCG_TARGET_HAS_bswap32_i32 */
+#define TCG_TARGET_HAS_bswap16_i32
+#define TCG_TARGET_HAS_bswap32_i32
 #define TCG_TARGET_HAS_not_i32
 #define TCG_TARGET_HAS_neg_i32
 #define TCG_TARGET_HAS_andc_i32
 #define TCG_TARGET_HAS_orc_i32
+#define TCG_TARGET_HAS_eqv_i32
+#define TCG_TARGET_HAS_nand_i32
+#define TCG_TARGET_HAS_nor_i32
 
 #define TCG_AREG0 TCG_REG_R27
-#define TCG_AREG1 TCG_REG_R24
-#define TCG_AREG2 TCG_REG_R25
 
 #define TCG_TARGET_HAS_GUEST_BASE
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index fed179c..ebbee34 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -28,12 +28,6 @@
 
 #define FAST_PATH
 
-#if TARGET_PHYS_ADDR_BITS == 32
-#define LD_ADDEND LWZ
-#else
-#define LD_ADDEND LD
-#endif
-
 #if TARGET_LONG_BITS == 32
 #define LD_ADDR LWZU
 #define CMP_L 0
@@ -56,7 +50,7 @@
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     "r0",
     "r1",
-    "rp",
+    "r2",
     "r3",
     "r4",
     "r5",
@@ -441,7 +435,7 @@
     [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE,
 };
 
-static void tcg_out_mov (TCGContext *s, int ret, int arg)
+static void tcg_out_mov (TCGContext *s, TCGType type, int ret, int arg)
 {
     tcg_out32 (s, OR | SAB (arg, ret, arg));
 }
@@ -650,7 +644,7 @@
 #endif
 
     /* slow path */
-    tcg_out_mov (s, 3, addr_reg);
+    tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);
 
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
@@ -670,7 +664,7 @@
     case 2:
     case 3:
         if (data_reg != 3)
-            tcg_out_mov (s, data_reg, 3);
+            tcg_out_mov (s, TCG_TYPE_I64, data_reg, 3);
         break;
     }
     label2_ptr = s->code_ptr;
@@ -682,7 +676,7 @@
 #endif
 
     /* r0 now contains &env->tlb_table[mem_index][index].addr_read */
-    tcg_out32 (s, (LD_ADDEND
+    tcg_out32 (s, (LD
                    | RT (r0)
                    | RA (r0)
                    | (offsetof (CPUTLBEntry, addend)
@@ -796,7 +790,7 @@
 #endif
 
     /* slow path */
-    tcg_out_mov (s, 3, addr_reg);
+    tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);
 
@@ -810,7 +804,7 @@
     reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
 #endif
 
-    tcg_out32 (s, (LD_ADDEND
+    tcg_out32 (s, (LD
                    | RT (r0)
                    | RA (r0)
                    | (offsetof (CPUTLBEntry, addend)
@@ -866,7 +860,7 @@
 #endif
 }
 
-void tcg_target_qemu_prologue (TCGContext *s)
+static void tcg_target_qemu_prologue (TCGContext *s)
 {
     int i, frame_size;
 #ifndef __APPLE__
@@ -905,7 +899,10 @@
     tcg_out32 (s, STD | RS (0) | RA (1) | (frame_size + 16));
 
 #ifdef CONFIG_USE_GUEST_BASE
-    tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE);
+    if (GUEST_BASE) {
+        tcg_out_movi (s, TCG_TYPE_I64, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
 #endif
 
     tcg_out32 (s, MTSPR | RS (3) | CTR);
@@ -1049,8 +1046,9 @@
 
 }
 
-static void tcg_out_setcond (TCGContext *s, TCGType type, int cond, TCGArg arg0,
-                             TCGArg arg1, TCGArg arg2, int const_arg2)
+static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
+                             TCGArg arg0, TCGArg arg1, TCGArg arg2,
+                             int const_arg2)
 {
     int crop, sh, arg;
 
@@ -1180,7 +1178,7 @@
     }
 }
 
-static void tcg_out_brcond (TCGContext *s, int cond,
+static void tcg_out_brcond (TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
                             int label_index, int arch64)
 {
@@ -1199,7 +1197,7 @@
     flush_icache_range (jmp_addr, jmp_addr + patch_size);
 }
 
-static void tcg_out_op (TCGContext *s, int opc, const TCGArg *args,
+static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
                         const int *const_args)
 {
     int c;
@@ -1517,6 +1515,7 @@
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld (s, args, 1 | 4);
         break;
+    case INDEX_op_qemu_ld32:
     case INDEX_op_qemu_ld32u:
         tcg_out_qemu_ld (s, args, 2);
         break;
@@ -1645,6 +1644,7 @@
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
     { INDEX_op_qemu_ld32u, { "r", "L" } },
     { INDEX_op_qemu_ld32s, { "r", "L" } },
     { INDEX_op_qemu_ld64, { "r", "L" } },
@@ -1666,7 +1666,7 @@
     { -1 },
 };
 
-void tcg_target_init (TCGContext *s)
+static void tcg_target_init (TCGContext *s)
 {
     tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
     tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
@@ -1695,9 +1695,5 @@
 #endif
     tcg_regset_set_reg (s->reserved_regs, TCG_REG_R13);
 
-#ifdef CONFIG_USE_GUEST_BASE
-    tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
-#endif
-
     tcg_add_target_add_op_defs (ppc_op_defs);
 }
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index f5de642..8a6db11 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -80,6 +80,9 @@
 #define TCG_TARGET_HAS_neg_i32
 /* #define TCG_TARGET_HAS_andc_i32 */
 /* #define TCG_TARGET_HAS_orc_i32 */
+/* #define TCG_TARGET_HAS_eqv_i32 */
+/* #define TCG_TARGET_HAS_nand_i32 */
+/* #define TCG_TARGET_HAS_nor_i32 */
 
 #define TCG_TARGET_HAS_div_i64
 /* #define TCG_TARGET_HAS_rot_i64 */
@@ -96,9 +99,11 @@
 #define TCG_TARGET_HAS_neg_i64
 /* #define TCG_TARGET_HAS_andc_i64 */
 /* #define TCG_TARGET_HAS_orc_i64 */
+/* #define TCG_TARGET_HAS_eqv_i64 */
+/* #define TCG_TARGET_HAS_nand_i64 */
+/* #define TCG_TARGET_HAS_nor_i64 */
 
 #define TCG_AREG0 TCG_REG_R27
-#define TCG_AREG1 TCG_REG_R24
-#define TCG_AREG2 TCG_REG_R25
 
 #define TCG_TARGET_HAS_GUEST_BASE
+#define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index d4ddaa7..5f1353a 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -304,7 +304,7 @@
               | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
 }
 
-static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
+static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
 {
     tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
 }
@@ -520,7 +520,7 @@
     tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
 }
 
-static void tcg_out_brcond_i32(TCGContext *s, int cond,
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond,
                                TCGArg arg1, TCGArg arg2, int const_arg2,
                                int label_index)
 {
@@ -530,7 +530,7 @@
 }
 
 #if TCG_TARGET_REG_BITS == 64
-static void tcg_out_brcond_i64(TCGContext *s, int cond,
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond,
                                TCGArg arg1, TCGArg arg2, int const_arg2,
                                int label_index)
 {
@@ -539,7 +539,7 @@
     tcg_out_nop(s);
 }
 #else
-static void tcg_out_brcond2_i32(TCGContext *s, int cond,
+static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
                                 TCGArg al, TCGArg ah,
                                 TCGArg bl, int blconst,
                                 TCGArg bh, int bhconst, int label_dest)
@@ -587,7 +587,7 @@
 }
 #endif
 
-static void tcg_out_setcond_i32(TCGContext *s, int cond, TCGArg ret,
+static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
                                 TCGArg c1, TCGArg c2, int c2const)
 {
     TCGArg t;
@@ -643,7 +643,7 @@
 }
 
 #if TCG_TARGET_REG_BITS == 64
-static void tcg_out_setcond_i64(TCGContext *s, int cond, TCGArg ret,
+static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
                                 TCGArg c1, TCGArg c2, int c2const)
 {
     tcg_out_cmp(s, c1, c2, c2const);
@@ -653,7 +653,7 @@
                | MOVCC_XCC | INSN_IMM11(1));
 }
 #else
-static void tcg_out_setcond2_i32(TCGContext *s, int cond, TCGArg ret,
+static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
                                  TCGArg al, TCGArg ah,
                                  TCGArg bl, int blconst,
                                  TCGArg bh, int bhconst)
@@ -691,7 +691,7 @@
 #endif
 
 /* Generate global QEMU prologue and epilogue code */
-void tcg_target_qemu_prologue(TCGContext *s)
+static void tcg_target_qemu_prologue(TCGContext *s)
 {
     tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
               INSN_IMM13(-TCG_TARGET_STACK_MINFRAME));
@@ -725,11 +725,13 @@
 #define TARGET_LD_OP LDX
 #endif
 
-#if TARGET_PHYS_ADDR_BITS == 32
+#if defined(CONFIG_SOFTMMU)
+#if HOST_LONG_BITS == 32
 #define TARGET_ADDEND_LD_OP LDUW
 #else
 #define TARGET_ADDEND_LD_OP LDX
 #endif
+#endif
 
 #ifdef __arch64__
 #define HOST_LD_OP LDX
@@ -793,7 +795,7 @@
     tcg_out32(s, 0);
 
     /* mov (delay slot) */
-    tcg_out_mov(s, arg0, addr_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
 
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
@@ -843,7 +845,7 @@
     case 3:
     default:
         /* mov */
-        tcg_out_mov(s, data_reg, arg0);
+        tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
         break;
     }
 
@@ -1005,10 +1007,10 @@
     tcg_out32(s, 0);
 
     /* mov (delay slot) */
-    tcg_out_mov(s, arg0, addr_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
 
     /* mov */
-    tcg_out_mov(s, arg1, data_reg);
+    tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
 
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
@@ -1111,7 +1113,7 @@
 #endif
 }
 
-static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                               const int *const_args)
 {
     int c;
@@ -1316,7 +1318,10 @@
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld(s, args, 1 | 4);
         break;
+    case INDEX_op_qemu_ld32:
+#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_qemu_ld32u:
+#endif
         tcg_out_qemu_ld(s, args, 2);
         break;
 #if TCG_TARGET_REG_BITS == 64
@@ -1472,8 +1477,9 @@
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
     { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
 #if TCG_TARGET_REG_BITS == 64
+    { INDEX_op_qemu_ld32u, { "r", "L" } },
     { INDEX_op_qemu_ld32s, { "r", "L" } },
 #endif
 
@@ -1527,7 +1533,7 @@
     { -1 },
 };
 
-void tcg_target_init(TCGContext *s)
+static void tcg_target_init(TCGContext *s)
 {
     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index dbc574d..df0785e 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -87,6 +87,10 @@
 #define TCG_TARGET_STACK_ALIGN 8
 #endif
 
+#ifdef __arch64__
+#define TCG_TARGET_EXTEND_ARGS 1
+#endif
+
 /* optional instructions */
 #define TCG_TARGET_HAS_div_i32
 // #define TCG_TARGET_HAS_rot_i32
@@ -100,6 +104,9 @@
 #define TCG_TARGET_HAS_not_i32
 #define TCG_TARGET_HAS_andc_i32
 #define TCG_TARGET_HAS_orc_i32
+// #define TCG_TARGET_HAS_eqv_i32
+// #define TCG_TARGET_HAS_nand_i32
+// #define TCG_TARGET_HAS_nor_i32
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div_i64
@@ -117,21 +124,18 @@
 #define TCG_TARGET_HAS_not_i64
 #define TCG_TARGET_HAS_andc_i64
 #define TCG_TARGET_HAS_orc_i64
+// #define TCG_TARGET_HAS_eqv_i64
+// #define TCG_TARGET_HAS_nand_i64
+// #define TCG_TARGET_HAS_nor_i64
 #endif
 
-/* Note: must be synced with dyngen-exec.h and Makefile.target */
+/* Note: must be synced with dyngen-exec.h */
 #ifdef CONFIG_SOLARIS
 #define TCG_AREG0 TCG_REG_G2
-#define TCG_AREG1 TCG_REG_G3
-#define TCG_AREG2 TCG_REG_G4
 #elif defined(__sparc_v9__)
 #define TCG_AREG0 TCG_REG_G5
-#define TCG_AREG1 TCG_REG_G6
-#define TCG_AREG2 TCG_REG_G7
 #else
 #define TCG_AREG0 TCG_REG_G6
-#define TCG_AREG1 TCG_REG_G1
-#define TCG_AREG2 TCG_REG_G2
 #endif
 
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 6ae1760..207a89f 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,60 +25,60 @@
 
 int gen_new_label(void);
 
-static inline void tcg_gen_op1_i32(int opc, TCGv_i32 arg1)
+static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
 }
 
-static inline void tcg_gen_op1_i64(int opc, TCGv_i64 arg1)
+static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
 }
 
-static inline void tcg_gen_op1i(int opc, TCGArg arg1)
+static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = arg1;
 }
 
-static inline void tcg_gen_op2_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2)
+static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
     *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
 }
 
-static inline void tcg_gen_op2_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2)
+static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
     *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
 }
 
-static inline void tcg_gen_op2i_i32(int opc, TCGv_i32 arg1, TCGArg arg2)
+static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
     *gen_opparam_ptr++ = arg2;
 }
 
-static inline void tcg_gen_op2i_i64(int opc, TCGv_i64 arg1, TCGArg arg2)
+static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
     *gen_opparam_ptr++ = arg2;
 }
 
-static inline void tcg_gen_op2ii(int opc, TCGArg arg1, TCGArg arg2)
+static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = arg1;
     *gen_opparam_ptr++ = arg2;
 }
 
-static inline void tcg_gen_op3_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3)
 {
     *gen_opc_ptr++ = opc;
@@ -87,7 +87,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
 }
 
-static inline void tcg_gen_op3_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3)
 {
     *gen_opc_ptr++ = opc;
@@ -96,8 +96,8 @@
     *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
 }
 
-static inline void tcg_gen_op3i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
-                                    TCGArg arg3)
+static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1,
+                                    TCGv_i32 arg2, TCGArg arg3)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
@@ -105,8 +105,8 @@
     *gen_opparam_ptr++ = arg3;
 }
 
-static inline void tcg_gen_op3i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
-                                    TCGArg arg3)
+static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1,
+                                    TCGv_i64 arg2, TCGArg arg3)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
@@ -114,8 +114,8 @@
     *gen_opparam_ptr++ = arg3;
 }
 
-static inline void tcg_gen_ldst_op_i32(int opc, TCGv_i32 val, TCGv_ptr base,
-                                       TCGArg offset)
+static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
+                                       TCGv_ptr base, TCGArg offset)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I32(val);
@@ -123,8 +123,8 @@
     *gen_opparam_ptr++ = offset;
 }
 
-static inline void tcg_gen_ldst_op_i64(int opc, TCGv_i64 val, TCGv_ptr base,
-                                       TCGArg offset)
+static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
+                                       TCGv_ptr base, TCGArg offset)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(val);
@@ -132,8 +132,8 @@
     *gen_opparam_ptr++ = offset;
 }
 
-static inline void tcg_gen_qemu_ldst_op_i64_i32(int opc, TCGv_i64 val, TCGv_i32 addr,
-                                                TCGArg mem_index)
+static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val,
+                                                TCGv_i32 addr, TCGArg mem_index)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(val);
@@ -141,8 +141,8 @@
     *gen_opparam_ptr++ = mem_index;
 }
 
-static inline void tcg_gen_qemu_ldst_op_i64_i64(int opc, TCGv_i64 val, TCGv_i64 addr,
-                                                TCGArg mem_index)
+static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val,
+                                                TCGv_i64 addr, TCGArg mem_index)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(val);
@@ -150,7 +150,7 @@
     *gen_opparam_ptr++ = mem_index;
 }
 
-static inline void tcg_gen_op4_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4)
 {
     *gen_opc_ptr++ = opc;
@@ -160,7 +160,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
 }
 
-static inline void tcg_gen_op4_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3, TCGv_i64 arg4)
 {
     *gen_opc_ptr++ = opc;
@@ -170,7 +170,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
 }
 
-static inline void tcg_gen_op4i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                     TCGv_i32 arg3, TCGArg arg4)
 {
     *gen_opc_ptr++ = opc;
@@ -180,7 +180,7 @@
     *gen_opparam_ptr++ = arg4;
 }
 
-static inline void tcg_gen_op4i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                     TCGv_i64 arg3, TCGArg arg4)
 {
     *gen_opc_ptr++ = opc;
@@ -190,7 +190,7 @@
     *gen_opparam_ptr++ = arg4;
 }
 
-static inline void tcg_gen_op4ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                      TCGArg arg3, TCGArg arg4)
 {
     *gen_opc_ptr++ = opc;
@@ -200,7 +200,7 @@
     *gen_opparam_ptr++ = arg4;
 }
 
-static inline void tcg_gen_op4ii_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                      TCGArg arg3, TCGArg arg4)
 {
     *gen_opc_ptr++ = opc;
@@ -210,7 +210,7 @@
     *gen_opparam_ptr++ = arg4;
 }
 
-static inline void tcg_gen_op5_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5)
 {
     *gen_opc_ptr++ = opc;
@@ -221,7 +221,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
 }
 
-static inline void tcg_gen_op5_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5)
 {
     *gen_opc_ptr++ = opc;
@@ -232,7 +232,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
 }
 
-static inline void tcg_gen_op5i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                     TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5)
 {
     *gen_opc_ptr++ = opc;
@@ -243,7 +243,7 @@
     *gen_opparam_ptr++ = arg5;
 }
 
-static inline void tcg_gen_op5i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                     TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5)
 {
     *gen_opc_ptr++ = opc;
@@ -254,7 +254,31 @@
     *gen_opparam_ptr++ = arg5;
 }
 
-static inline void tcg_gen_op6_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 arg1,
+                                     TCGv_i32 arg2, TCGv_i32 arg3,
+                                     TCGArg arg4, TCGArg arg5)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+    *gen_opparam_ptr++ = arg4;
+    *gen_opparam_ptr++ = arg5;
+}
+
+static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 arg1,
+                                     TCGv_i64 arg2, TCGv_i64 arg3,
+                                     TCGArg arg4, TCGArg arg5)
+{
+    *gen_opc_ptr++ = opc;
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+    *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+    *gen_opparam_ptr++ = arg4;
+    *gen_opparam_ptr++ = arg5;
+}
+
+static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                    TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5,
                                    TCGv_i32 arg6)
 {
@@ -267,7 +291,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I32(arg6);
 }
 
-static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                    TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5,
                                    TCGv_i64 arg6)
 {
@@ -280,7 +304,7 @@
     *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
 }
 
-static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
                                     TCGv_i32 arg3, TCGv_i32 arg4,
                                     TCGv_i32 arg5, TCGArg arg6)
 {
@@ -293,7 +317,7 @@
     *gen_opparam_ptr++ = arg6;
 }
 
-static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
                                     TCGv_i64 arg3, TCGv_i64 arg4,
                                     TCGv_i64 arg5, TCGArg arg6)
 {
@@ -306,9 +330,9 @@
     *gen_opparam_ptr++ = arg6;
 }
 
-static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
-                                     TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
-                                     TCGArg arg6)
+static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1,
+                                     TCGv_i32 arg2, TCGv_i32 arg3,
+                                     TCGv_i32 arg4, TCGArg arg5, TCGArg arg6)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
@@ -319,9 +343,9 @@
     *gen_opparam_ptr++ = arg6;
 }
 
-static inline void tcg_gen_op6ii_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
-                                     TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5,
-                                     TCGArg arg6)
+static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1,
+                                     TCGv_i64 arg2, TCGv_i64 arg3,
+                                     TCGv_i64 arg4, TCGArg arg5, TCGArg arg6)
 {
     *gen_opc_ptr++ = opc;
     *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
@@ -353,6 +377,13 @@
     tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg);
 }
 
+/* A version of dh_sizemask from def-helper.h that doesn't rely on
+   preprocessor magic.  */
+static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed)
+{
+    return (is_64bit << n*2) | (is_signed << (n*2 + 1));
+}
+
 /* helper calls */
 static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
                                    TCGArg ret, int nargs, TCGArg *args)
@@ -364,8 +395,25 @@
     tcg_temp_free_ptr(fn);
 }
 
-/* FIXME: Should this be pure?  */
-static inline void tcg_gen_helper64(void *func, TCGv_i64 ret,
+/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
+   reserved for helpers in tcg-runtime.c. These helpers are all const
+   and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST |
+   TCG_CALL_PURE. This may need to be adjusted if these functions
+   start to be used with other helpers. */
+static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
+                                    TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_ptr fn;
+    TCGArg args[2];
+    fn = tcg_const_ptr((tcg_target_long)func);
+    args[0] = GET_TCGV_I32(a);
+    args[1] = GET_TCGV_I32(b);
+    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
+                  GET_TCGV_I32(ret), 2, args);
+    tcg_temp_free_ptr(fn);
+}
+
+static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret,
                                     TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_ptr fn;
@@ -373,7 +421,8 @@
     fn = tcg_const_ptr((tcg_target_long)func);
     args[0] = GET_TCGV_I64(a);
     args[1] = GET_TCGV_I64(b);
-    tcg_gen_callN(&tcg_ctx, fn, 0, 7, GET_TCGV_I64(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
+                  GET_TCGV_I64(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
@@ -575,28 +624,28 @@
     }
 }
 
-static inline void tcg_gen_brcond_i32(int cond, TCGv_i32 arg1, TCGv_i32 arg2,
-                                      int label_index)
+static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
+                                      TCGv_i32 arg2, int label_index)
 {
     tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
 }
 
-static inline void tcg_gen_brcondi_i32(int cond, TCGv_i32 arg1, int32_t arg2,
-                                       int label_index)
+static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
+                                       int32_t arg2, int label_index)
 {
     TCGv_i32 t0 = tcg_const_i32(arg2);
     tcg_gen_brcond_i32(cond, arg1, t0, label_index);
     tcg_temp_free_i32(t0);
 }
 
-static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
                                        TCGv_i32 arg1, TCGv_i32 arg2)
 {
     tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
 }
 
-static inline void tcg_gen_setcondi_i32(int cond, TCGv_i32 ret, TCGv_i32 arg1,
-                                        int32_t arg2)
+static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+                                        TCGv_i32 arg1, int32_t arg2)
 {
     TCGv_i32 t0 = tcg_const_i32(arg2);
     tcg_gen_setcond_i32(cond, ret, arg1, t0);
@@ -635,7 +684,7 @@
 {
     tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
 }
-#else
+#elif defined(TCG_TARGET_HAS_div2_i32)
 static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
     TCGv_i32 t0;
@@ -671,6 +720,50 @@
     tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
     tcg_temp_free_i32(t0);
 }
+#else
+static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 32-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 0, 1);
+    sizemask |= tcg_gen_sizemask(1, 0, 1);
+    sizemask |= tcg_gen_sizemask(2, 0, 1);
+
+    tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 32-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 0, 1);
+    sizemask |= tcg_gen_sizemask(1, 0, 1);
+    sizemask |= tcg_gen_sizemask(2, 0, 1);
+
+    tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 32-bit and unsigned.  */
+    sizemask |= tcg_gen_sizemask(0, 0, 0);
+    sizemask |= tcg_gen_sizemask(1, 0, 0);
+    sizemask |= tcg_gen_sizemask(2, 0, 0);
+
+    tcg_gen_helper32(tcg_helper_divu_i32, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 32-bit and unsigned.  */
+    sizemask |= tcg_gen_sizemask(0, 0, 0);
+    sizemask |= tcg_gen_sizemask(1, 0, 0);
+    sizemask |= tcg_gen_sizemask(2, 0, 0);
+
+    tcg_gen_helper32(tcg_helper_remu_i32, sizemask, ret, arg1, arg2);
+}
 #endif
 
 #if TCG_TARGET_REG_BITS == 32
@@ -829,7 +922,13 @@
    specific code (x86) */
 static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_shl_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_shl_i64, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
@@ -839,7 +938,13 @@
 
 static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_shr_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_shr_i64, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
@@ -849,7 +954,13 @@
 
 static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_sar_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_sar_i64, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
@@ -857,15 +968,15 @@
     tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
 }
 
-static inline void tcg_gen_brcond_i64(int cond, TCGv_i64 arg1, TCGv_i64 arg2,
-                                      int label_index)
+static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
+                                      TCGv_i64 arg2, int label_index)
 {
     tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
                       TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                       TCGV_HIGH(arg2), cond, label_index);
 }
 
-static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
     tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
@@ -897,22 +1008,46 @@
 
 static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_div_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_rem_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_divu_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and unsigned.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 0);
+    sizemask |= tcg_gen_sizemask(1, 1, 0);
+    sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+    tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2);
 }
 
 static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_helper64(tcg_helper_remu_i64, ret, arg1, arg2);
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and unsigned.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 0);
+    sizemask |= tcg_gen_sizemask(1, 1, 0);
+    sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+    tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2);
 }
 
 #else
@@ -1098,13 +1233,13 @@
     }
 }
 
-static inline void tcg_gen_brcond_i64(int cond, TCGv_i64 arg1, TCGv_i64 arg2,
-                                      int label_index)
+static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
+                                      TCGv_i64 arg2, int label_index)
 {
     tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
 }
 
-static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
     tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
@@ -1135,7 +1270,7 @@
 {
     tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
 }
-#else
+#elif defined(TCG_TARGET_HAS_div2_i64)
 static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
     TCGv_i64 t0;
@@ -1171,6 +1306,50 @@
     tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
     tcg_temp_free_i64(t0);
 }
+#else
+static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and signed.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 1);
+    sizemask |= tcg_gen_sizemask(1, 1, 1);
+    sizemask |= tcg_gen_sizemask(2, 1, 1);
+
+    tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and unsigned.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 0);
+    sizemask |= tcg_gen_sizemask(1, 1, 0);
+    sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+    tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2);
+}
+
+static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    int sizemask = 0;
+    /* Return value and both arguments are 64-bit and unsigned.  */
+    sizemask |= tcg_gen_sizemask(0, 1, 0);
+    sizemask |= tcg_gen_sizemask(1, 1, 0);
+    sizemask |= tcg_gen_sizemask(2, 1, 0);
+
+    tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2);
+}
 #endif
 
 #endif
@@ -1205,16 +1384,16 @@
         tcg_temp_free_i64(t0);
     }
 }
-static inline void tcg_gen_brcondi_i64(int cond, TCGv_i64 arg1, int64_t arg2,
-                                       int label_index)
+static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
+                                       int64_t arg2, int label_index)
 {
     TCGv_i64 t0 = tcg_const_i64(arg2);
     tcg_gen_brcond_i64(cond, arg1, t0, label_index);
     tcg_temp_free_i64(t0);
 }
 
-static inline void tcg_gen_setcondi_i64(int cond, TCGv_i64 ret, TCGv_i64 arg1,
-                                        int64_t arg2)
+static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+                                        TCGv_i64 arg1, int64_t arg2)
 {
     TCGv_i64 t0 = tcg_const_i64(arg2);
     tcg_gen_setcond_i64(cond, ret, arg1, t0);
@@ -1595,6 +1774,9 @@
 {
 #ifdef TCG_TARGET_HAS_not_i64
     tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
+#elif defined(TCG_TARGET_HAS_not_i32) && TCG_TARGET_REG_BITS == 32
+    tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+    tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
 #else
     tcg_gen_xori_i64(ret, arg, -1);
 #endif
@@ -1679,38 +1861,71 @@
 
 static inline void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
+#ifdef TCG_TARGET_HAS_eqv_i32
+    tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
+#else
     tcg_gen_xor_i32(ret, arg1, arg2);
     tcg_gen_not_i32(ret, ret);
+#endif
 }
 
 static inline void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
+#ifdef TCG_TARGET_HAS_eqv_i64
+    tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_eqv_i32) && TCG_TARGET_REG_BITS == 32
+    tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+    tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
     tcg_gen_xor_i64(ret, arg1, arg2);
     tcg_gen_not_i64(ret, ret);
+#endif
 }
 
 static inline void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
+#ifdef TCG_TARGET_HAS_nand_i32
+    tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
+#else
     tcg_gen_and_i32(ret, arg1, arg2);
     tcg_gen_not_i32(ret, ret);
+#endif
 }
 
 static inline void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
+#ifdef TCG_TARGET_HAS_nand_i64
+    tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_nand_i32) && TCG_TARGET_REG_BITS == 32
+    tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+    tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
     tcg_gen_and_i64(ret, arg1, arg2);
     tcg_gen_not_i64(ret, ret);
+#endif
 }
 
 static inline void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
+#ifdef TCG_TARGET_HAS_nor_i32
+    tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
+#else
     tcg_gen_or_i32(ret, arg1, arg2);
     tcg_gen_not_i32(ret, ret);
+#endif
 }
 
 static inline void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
+#ifdef TCG_TARGET_HAS_nor_i64
+    tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
+#elif defined(TCG_TARGET_HAS_nor_i32) && TCG_TARGET_REG_BITS == 32
+    tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+    tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+#else
     tcg_gen_or_i64(ret, arg1, arg2);
     tcg_gen_not_i64(ret, ret);
+#endif
 }
 
 static inline void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -1880,6 +2095,44 @@
     }
 }
 
+static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
+				       TCGv_i32 arg2, unsigned int ofs,
+				       unsigned int len)
+{
+#ifdef TCG_TARGET_HAS_deposit_i32
+  tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
+#else
+  uint32_t mask = (1u << len) - 1;
+  TCGv_i32 t1 = tcg_temp_new_i32 ();
+
+  tcg_gen_andi_i32(t1, arg2, mask);
+  tcg_gen_shli_i32(t1, t1, ofs);
+  tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+  tcg_gen_or_i32(ret, ret, t1);
+
+  tcg_temp_free_i32(t1);
+#endif
+}
+
+static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
+				       TCGv_i64 arg2, unsigned int ofs,
+				       unsigned int len)
+{
+#ifdef TCG_TARGET_HAS_deposit_i64
+  tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+#else
+  uint64_t mask = (1ull << len) - 1;
+  TCGv_i64 t1 = tcg_temp_new_i64 ();
+
+  tcg_gen_andi_i64(t1, arg2, mask);
+  tcg_gen_shli_i64(t1, t1, ofs);
+  tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+  tcg_gen_or_i64(ret, ret, t1);
+
+  tcg_temp_free_i64(t1);
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -1981,9 +2234,9 @@
 static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
 {
 #if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld32u, ret, addr, mem_index);
+    tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index);
 #else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld32u, TCGV_LOW(ret), TCGV_LOW(addr),
+    tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr),
                      TCGV_HIGH(addr), mem_index);
     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
 #endif
@@ -1992,9 +2245,9 @@
 static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
 {
 #if TARGET_LONG_BITS == 32
-    tcg_gen_op3i_i32(INDEX_op_qemu_ld32u, ret, addr, mem_index);
+    tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index);
 #else
-    tcg_gen_op4i_i32(INDEX_op_qemu_ld32u, TCGV_LOW(ret), TCGV_LOW(addr),
+    tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr),
                      TCGV_HIGH(addr), mem_index);
     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
 #endif
@@ -2078,12 +2331,20 @@
 
 static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index)
 {
+#if TARGET_LONG_BITS == 32
+    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index);
+#else
     tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index);
+#endif
 }
 
 static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index)
 {
+#if TARGET_LONG_BITS == 32
+    tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index);
+#else
     tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index);
+#endif
 }
 
 static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index)
@@ -2117,7 +2378,6 @@
 #endif /* TCG_TARGET_REG_BITS != 32 */
 
 #if TARGET_LONG_BITS == 64
-#define TCG_TYPE_TL TCG_TYPE_I64
 #define tcg_gen_movi_tl tcg_gen_movi_i64
 #define tcg_gen_mov_tl tcg_gen_mov_i64
 #define tcg_gen_ld8u_tl tcg_gen_ld8u_i64
@@ -2186,10 +2446,10 @@
 #define tcg_gen_rotli_tl tcg_gen_rotli_i64
 #define tcg_gen_rotr_tl tcg_gen_rotr_i64
 #define tcg_gen_rotri_tl tcg_gen_rotri_i64
+#define tcg_gen_deposit_tl tcg_gen_deposit_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #else
-#define TCG_TYPE_TL TCG_TYPE_I32
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
 #define tcg_gen_ld8u_tl tcg_gen_ld8u_i32
@@ -2257,6 +2517,7 @@
 #define tcg_gen_rotli_tl tcg_gen_rotli_i32
 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
 #define tcg_gen_rotri_tl tcg_gen_rotri_i32
+#define tcg_gen_deposit_tl tcg_gen_deposit_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #endif
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 34cdba5..1b8a6e4 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -21,267 +21,295 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#ifndef DEF2
-#define DEF2(name, oargs, iargs, cargs, flags) DEF(name, oargs + iargs + cargs, 0)
-#endif
+
+/*
+ * DEF(name, oargs, iargs, cargs, flags)
+ */
 
 /* predefined ops */
-DEF2(end, 0, 0, 0, 0) /* must be kept first */
-DEF2(nop, 0, 0, 0, 0)
-DEF2(nop1, 0, 0, 1, 0)
-DEF2(nop2, 0, 0, 2, 0)
-DEF2(nop3, 0, 0, 3, 0)
-DEF2(nopn, 0, 0, 1, 0) /* variable number of parameters */
+DEF(end, 0, 0, 0, 0) /* must be kept first */
+DEF(nop, 0, 0, 0, 0)
+DEF(nop1, 0, 0, 1, 0)
+DEF(nop2, 0, 0, 2, 0)
+DEF(nop3, 0, 0, 3, 0)
+DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */
 
-DEF2(discard, 1, 0, 0, 0)
+DEF(discard, 1, 0, 0, 0)
 
-DEF2(set_label, 0, 0, 1, 0)
-DEF2(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
-DEF2(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(set_label, 0, 0, 1, 0)
+DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
+DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 
-DEF2(mov_i32, 1, 1, 0, 0)
-DEF2(movi_i32, 1, 0, 1, 0)
-DEF2(setcond_i32, 1, 2, 1, 0)
+DEF(mov_i32, 1, 1, 0, 0)
+DEF(movi_i32, 1, 0, 1, 0)
+DEF(setcond_i32, 1, 2, 1, 0)
 /* load/store */
-DEF2(ld8u_i32, 1, 1, 1, 0)
-DEF2(ld8s_i32, 1, 1, 1, 0)
-DEF2(ld16u_i32, 1, 1, 1, 0)
-DEF2(ld16s_i32, 1, 1, 1, 0)
-DEF2(ld_i32, 1, 1, 1, 0)
-DEF2(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF2(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF2(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(ld8u_i32, 1, 1, 1, 0)
+DEF(ld8s_i32, 1, 1, 1, 0)
+DEF(ld16u_i32, 1, 1, 1, 0)
+DEF(ld16s_i32, 1, 1, 1, 0)
+DEF(ld_i32, 1, 1, 1, 0)
+DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
 /* arith */
-DEF2(add_i32, 1, 2, 0, 0)
-DEF2(sub_i32, 1, 2, 0, 0)
-DEF2(mul_i32, 1, 2, 0, 0)
+DEF(add_i32, 1, 2, 0, 0)
+DEF(sub_i32, 1, 2, 0, 0)
+DEF(mul_i32, 1, 2, 0, 0)
 #ifdef TCG_TARGET_HAS_div_i32
-DEF2(div_i32, 1, 2, 0, 0)
-DEF2(divu_i32, 1, 2, 0, 0)
-DEF2(rem_i32, 1, 2, 0, 0)
-DEF2(remu_i32, 1, 2, 0, 0)
-#else
-DEF2(div2_i32, 2, 3, 0, 0)
-DEF2(divu2_i32, 2, 3, 0, 0)
+DEF(div_i32, 1, 2, 0, 0)
+DEF(divu_i32, 1, 2, 0, 0)
+DEF(rem_i32, 1, 2, 0, 0)
+DEF(remu_i32, 1, 2, 0, 0)
 #endif
-DEF2(and_i32, 1, 2, 0, 0)
-DEF2(or_i32, 1, 2, 0, 0)
-DEF2(xor_i32, 1, 2, 0, 0)
+#ifdef TCG_TARGET_HAS_div2_i32
+DEF(div2_i32, 2, 3, 0, 0)
+DEF(divu2_i32, 2, 3, 0, 0)
+#endif
+DEF(and_i32, 1, 2, 0, 0)
+DEF(or_i32, 1, 2, 0, 0)
+DEF(xor_i32, 1, 2, 0, 0)
 /* shifts/rotates */
-DEF2(shl_i32, 1, 2, 0, 0)
-DEF2(shr_i32, 1, 2, 0, 0)
-DEF2(sar_i32, 1, 2, 0, 0)
+DEF(shl_i32, 1, 2, 0, 0)
+DEF(shr_i32, 1, 2, 0, 0)
+DEF(sar_i32, 1, 2, 0, 0)
 #ifdef TCG_TARGET_HAS_rot_i32
-DEF2(rotl_i32, 1, 2, 0, 0)
-DEF2(rotr_i32, 1, 2, 0, 0)
+DEF(rotl_i32, 1, 2, 0, 0)
+DEF(rotr_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_deposit_i32
+DEF(deposit_i32, 1, 2, 2, 0)
 #endif
 
-DEF2(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #if TCG_TARGET_REG_BITS == 32
-DEF2(add2_i32, 2, 4, 0, 0)
-DEF2(sub2_i32, 2, 4, 0, 0)
-DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF2(mulu2_i32, 2, 2, 0, 0)
-DEF2(setcond2_i32, 1, 4, 1, 0)
+DEF(add2_i32, 2, 4, 0, 0)
+DEF(sub2_i32, 2, 4, 0, 0)
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(mulu2_i32, 2, 2, 0, 0)
+DEF(setcond2_i32, 1, 4, 1, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext8s_i32
-DEF2(ext8s_i32, 1, 1, 0, 0)
+DEF(ext8s_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext16s_i32
-DEF2(ext16s_i32, 1, 1, 0, 0)
+DEF(ext16s_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext8u_i32
-DEF2(ext8u_i32, 1, 1, 0, 0)
+DEF(ext8u_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext16u_i32
-DEF2(ext16u_i32, 1, 1, 0, 0)
+DEF(ext16u_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_bswap16_i32
-DEF2(bswap16_i32, 1, 1, 0, 0)
+DEF(bswap16_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_bswap32_i32
-DEF2(bswap32_i32, 1, 1, 0, 0)
+DEF(bswap32_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_not_i32
-DEF2(not_i32, 1, 1, 0, 0)
+DEF(not_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_neg_i32
-DEF2(neg_i32, 1, 1, 0, 0)
+DEF(neg_i32, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_andc_i32
-DEF2(andc_i32, 1, 2, 0, 0)
+DEF(andc_i32, 1, 2, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_orc_i32
-DEF2(orc_i32, 1, 2, 0, 0)
+DEF(orc_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_eqv_i32
+DEF(eqv_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nand_i32
+DEF(nand_i32, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nor_i32
+DEF(nor_i32, 1, 2, 0, 0)
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
-DEF2(mov_i64, 1, 1, 0, 0)
-DEF2(movi_i64, 1, 0, 1, 0)
-DEF2(setcond_i64, 1, 2, 1, 0)
+DEF(mov_i64, 1, 1, 0, 0)
+DEF(movi_i64, 1, 0, 1, 0)
+DEF(setcond_i64, 1, 2, 1, 0)
 /* load/store */
-DEF2(ld8u_i64, 1, 1, 1, 0)
-DEF2(ld8s_i64, 1, 1, 1, 0)
-DEF2(ld16u_i64, 1, 1, 1, 0)
-DEF2(ld16s_i64, 1, 1, 1, 0)
-DEF2(ld32u_i64, 1, 1, 1, 0)
-DEF2(ld32s_i64, 1, 1, 1, 0)
-DEF2(ld_i64, 1, 1, 1, 0)
-DEF2(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF2(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF2(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF2(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(ld8u_i64, 1, 1, 1, 0)
+DEF(ld8s_i64, 1, 1, 1, 0)
+DEF(ld16u_i64, 1, 1, 1, 0)
+DEF(ld16s_i64, 1, 1, 1, 0)
+DEF(ld32u_i64, 1, 1, 1, 0)
+DEF(ld32s_i64, 1, 1, 1, 0)
+DEF(ld_i64, 1, 1, 1, 0)
+DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
 /* arith */
-DEF2(add_i64, 1, 2, 0, 0)
-DEF2(sub_i64, 1, 2, 0, 0)
-DEF2(mul_i64, 1, 2, 0, 0)
+DEF(add_i64, 1, 2, 0, 0)
+DEF(sub_i64, 1, 2, 0, 0)
+DEF(mul_i64, 1, 2, 0, 0)
 #ifdef TCG_TARGET_HAS_div_i64
-DEF2(div_i64, 1, 2, 0, 0)
-DEF2(divu_i64, 1, 2, 0, 0)
-DEF2(rem_i64, 1, 2, 0, 0)
-DEF2(remu_i64, 1, 2, 0, 0)
-#else
-DEF2(div2_i64, 2, 3, 0, 0)
-DEF2(divu2_i64, 2, 3, 0, 0)
+DEF(div_i64, 1, 2, 0, 0)
+DEF(divu_i64, 1, 2, 0, 0)
+DEF(rem_i64, 1, 2, 0, 0)
+DEF(remu_i64, 1, 2, 0, 0)
 #endif
-DEF2(and_i64, 1, 2, 0, 0)
-DEF2(or_i64, 1, 2, 0, 0)
-DEF2(xor_i64, 1, 2, 0, 0)
+#ifdef TCG_TARGET_HAS_div2_i64
+DEF(div2_i64, 2, 3, 0, 0)
+DEF(divu2_i64, 2, 3, 0, 0)
+#endif
+DEF(and_i64, 1, 2, 0, 0)
+DEF(or_i64, 1, 2, 0, 0)
+DEF(xor_i64, 1, 2, 0, 0)
 /* shifts/rotates */
-DEF2(shl_i64, 1, 2, 0, 0)
-DEF2(shr_i64, 1, 2, 0, 0)
-DEF2(sar_i64, 1, 2, 0, 0)
+DEF(shl_i64, 1, 2, 0, 0)
+DEF(shr_i64, 1, 2, 0, 0)
+DEF(sar_i64, 1, 2, 0, 0)
 #ifdef TCG_TARGET_HAS_rot_i64
-DEF2(rotl_i64, 1, 2, 0, 0)
-DEF2(rotr_i64, 1, 2, 0, 0)
+DEF(rotl_i64, 1, 2, 0, 0)
+DEF(rotr_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_deposit_i64
+DEF(deposit_i64, 1, 2, 2, 0)
 #endif
 
-DEF2(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 #ifdef TCG_TARGET_HAS_ext8s_i64
-DEF2(ext8s_i64, 1, 1, 0, 0)
+DEF(ext8s_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext16s_i64
-DEF2(ext16s_i64, 1, 1, 0, 0)
+DEF(ext16s_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext32s_i64
-DEF2(ext32s_i64, 1, 1, 0, 0)
+DEF(ext32s_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext8u_i64
-DEF2(ext8u_i64, 1, 1, 0, 0)
+DEF(ext8u_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext16u_i64
-DEF2(ext16u_i64, 1, 1, 0, 0)
+DEF(ext16u_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_ext32u_i64
-DEF2(ext32u_i64, 1, 1, 0, 0)
+DEF(ext32u_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_bswap16_i64
-DEF2(bswap16_i64, 1, 1, 0, 0)
+DEF(bswap16_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_bswap32_i64
-DEF2(bswap32_i64, 1, 1, 0, 0)
+DEF(bswap32_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_bswap64_i64
-DEF2(bswap64_i64, 1, 1, 0, 0)
+DEF(bswap64_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_not_i64
-DEF2(not_i64, 1, 1, 0, 0)
+DEF(not_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_neg_i64
-DEF2(neg_i64, 1, 1, 0, 0)
+DEF(neg_i64, 1, 1, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_andc_i64
-DEF2(andc_i64, 1, 2, 0, 0)
+DEF(andc_i64, 1, 2, 0, 0)
 #endif
 #ifdef TCG_TARGET_HAS_orc_i64
-DEF2(orc_i64, 1, 2, 0, 0)
+DEF(orc_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_eqv_i64
+DEF(eqv_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nand_i64
+DEF(nand_i64, 1, 2, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_nor_i64
+DEF(nor_i64, 1, 2, 0, 0)
 #endif
 #endif
 
 /* QEMU specific */
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-DEF2(debug_insn_start, 0, 0, 2, 0)
+DEF(debug_insn_start, 0, 0, 2, 0)
 #else
-DEF2(debug_insn_start, 0, 0, 1, 0)
+DEF(debug_insn_start, 0, 0, 1, 0)
 #endif
-DEF2(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF2(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 /* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op
    constants must be defined */
 #if TCG_TARGET_REG_BITS == 32
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld32u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld32s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 #if TARGET_LONG_BITS == 32
-DEF2(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #else
-DEF2(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 #endif
 
 #else /* TCG_TARGET_REG_BITS == 32 */
 
-DEF2(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
-DEF2(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
-DEF2(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
+DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
 
 #endif /* TCG_TARGET_REG_BITS != 32 */
 
-#undef DEF2
+#undef DEF
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index e750cc1..5615b13 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -2,6 +2,11 @@
 #define TCG_RUNTIME_H
 
 /* tcg-runtime.c */
+int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2);
+int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2);
+uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2);
+uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2);
+
 int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2);
 int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a57edb1..5882f00 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -63,15 +63,15 @@
 #error GUEST_BASE not supported on this host.
 #endif
 
+static void tcg_target_init(TCGContext *s);
+static void tcg_target_qemu_prologue(TCGContext *s);
 static void patch_reloc(uint8_t *code_ptr, int type, 
                         tcg_target_long value, tcg_target_long addend);
 
 static TCGOpDef tcg_op_defs[] = {
-#define DEF(s, n, copy_size) { #s, 0, 0, n, n, 0, copy_size },
-#define DEF2(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags, 0 },
+#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
 #include "tcg-opc.h"
 #undef DEF
-#undef DEF2
 };
 
 static TCGRegSet tcg_target_available_regs[2];
@@ -110,7 +110,7 @@
 
 /* label relocation processing */
 
-void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, 
+static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
                    int label_index, long addend)
 {
     TCGLabel *l;
@@ -250,7 +250,10 @@
     }
     
     tcg_target_init(s);
+}
 
+void tcg_prologue_init(TCGContext *s)
+{
     /* init global prologue and epilogue */
     s->code_buf = code_gen_prologue;
     s->code_ptr = s->code_buf;
@@ -457,6 +460,10 @@
             s->nb_temps++;
         }
     }
+
+#if defined(CONFIG_DEBUG_TCG)
+    s->temps_in_use++;
+#endif
     return idx;
 }
 
@@ -482,6 +489,13 @@
     TCGTemp *ts;
     int k;
 
+#if defined(CONFIG_DEBUG_TCG)
+    s->temps_in_use--;
+    if (s->temps_in_use < 0) {
+        fprintf(stderr, "More temporaries freed than allocated!\n");
+    }
+#endif
+
     assert(idx >= s->nb_globals && idx < s->nb_temps);
     ts = &s->temps[idx];
     assert(ts->temp_allocated != 0);
@@ -535,6 +549,27 @@
     return t0;
 }
 
+#if defined(CONFIG_DEBUG_TCG)
+void tcg_clear_temp_count(void)
+{
+    TCGContext *s = &tcg_ctx;
+    s->temps_in_use = 0;
+}
+
+int tcg_check_temp_count(void)
+{
+    TCGContext *s = &tcg_ctx;
+    if (s->temps_in_use) {
+        /* Clear the count so that we don't give another
+         * warning immediately next time around.
+         */
+        s->temps_in_use = 0;
+        return 1;
+    }
+    return 0;
+}
+#endif
+
 void tcg_register_helper(void *func, const char *name)
 {
     TCGContext *s = &tcg_ctx;
@@ -560,14 +595,36 @@
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
 {
+#ifdef TCG_TARGET_I386
     int call_type;
+#endif
     int i;
     int real_args;
     int nb_rets;
     TCGArg *nparam;
+
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+    for (i = 0; i < nargs; ++i) {
+        int is_64bit = sizemask & (1 << (i+1)*2);
+        int is_signed = sizemask & (2 << (i+1)*2);
+        if (!is_64bit) {
+            TCGv_i64 temp = tcg_temp_new_i64();
+            TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+            if (is_signed) {
+                tcg_gen_ext32s_i64(temp, orig);
+            } else {
+                tcg_gen_ext32u_i64(temp, orig);
+            }
+            args[i] = GET_TCGV_I64(temp);
+        }
+    }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+
     *gen_opc_ptr++ = INDEX_op_call;
     nparam = gen_opparam_ptr++;
+#ifdef TCG_TARGET_I386
     call_type = (flags & TCG_CALL_TYPE_MASK);
+#endif
     if (ret != TCG_CALL_DUMMY_ARG) {
 #if TCG_TARGET_REG_BITS < 64
         if (sizemask & 1) {
@@ -591,7 +648,8 @@
     real_args = 0;
     for (i = 0; i < nargs; i++) {
 #if TCG_TARGET_REG_BITS < 64
-        if (sizemask & (2 << i)) {
+        int is_64bit = sizemask & (1 << (i+1)*2);
+        if (is_64bit) {
 #ifdef TCG_TARGET_I386
             /* REGPARM case: if the third parameter is 64 bit, it is
                allocated on the stack */
@@ -607,7 +665,17 @@
                 real_args++;
             }
 #endif
-#ifdef TCG_TARGET_WORDS_BIGENDIAN
+	    /* If stack grows up, then we will be placing successive
+	       arguments at lower addresses, which means we need to
+	       reverse the order compared to how we would normally
+	       treat either big or little-endian.  For those arguments
+	       that will wind up in registers, this still works for
+	       HPPA (the only current STACK_GROWSUP target) since the
+	       argument registers are *also* allocated in decreasing
+	       order.  If another such target is added, this logic may
+	       have to get more complicated to differentiate between
+	       stack arguments and register arguments.  */
+#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
             *gen_opparam_ptr++ = args[i] + 1;
             *gen_opparam_ptr++ = args[i];
 #else
@@ -615,13 +683,13 @@
             *gen_opparam_ptr++ = args[i] + 1;
 #endif
             real_args += 2;
-        } else
-#endif
-        {
+            continue;
+        }
+#endif /* TCG_TARGET_REG_BITS < 64 */
+
             *gen_opparam_ptr++ = args[i];
             real_args++;
         }
-    }
     *gen_opparam_ptr++ = GET_TCGV_PTR(func);
 
     *gen_opparam_ptr++ = flags;
@@ -630,6 +698,16 @@
 
     /* total parameters, needed to go backward in the instruction stream */
     *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+    for (i = 0; i < nargs; ++i) {
+        int is_64bit = sizemask & (1 << (i+1)*2);
+        if (!is_64bit) {
+            TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
+            tcg_temp_free_i64(temp);
+        }
+    }
+#endif /* TCG_TARGET_EXTEND_ARGS */
 }
 
 #if TCG_TARGET_REG_BITS == 32
@@ -681,6 +759,7 @@
 }
 #endif
 
+
 static void tcg_reg_alloc_start(TCGContext *s)
 {
     int i;
@@ -793,7 +872,8 @@
     const uint16_t *opc_ptr;
     const TCGArg *args;
     TCGArg arg;
-    int c, i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
+    TCGOpcode c;
+    int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
     const TCGOpDef *def;
     char buf[128];
 
@@ -980,16 +1060,16 @@
 
 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
 {
-    int op;
+    TCGOpcode op;
     TCGOpDef *def;
     const char *ct_str;
     int i, nb_args;
 
     for(;;) {
-        if (tdefs->op < 0)
+        if (tdefs->op == (TCGOpcode)-1)
             break;
         op = tdefs->op;
-        assert(op >= 0 && op < NB_OPS);
+        assert((unsigned)op < NB_OPS);
         def = &tcg_op_defs[op];
 #if defined(CONFIG_DEBUG_TCG)
         /* Duplicate entry in op definitions? */
@@ -1056,14 +1136,26 @@
     }
 
 #if defined(CONFIG_DEBUG_TCG)
+    i = 0;
     for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
         if (op < INDEX_op_call || op == INDEX_op_debug_insn_start) {
             /* Wrong entry in op definitions? */
-            assert(!tcg_op_defs[op].used);
+            if (tcg_op_defs[op].used) {
+                fprintf(stderr, "Invalid op definition for %s\n",
+                        tcg_op_defs[op].name);
+                i = 1;
+            }
         } else {
             /* Missing entry in op definitions? */
-            assert(tcg_op_defs[op].used);
+            if (!tcg_op_defs[op].used) {
+                fprintf(stderr, "Missing op definition for %s\n",
+                        tcg_op_defs[op].name);
+                i = 1;
         }
+        }
+    }
+    if (i == 1) {
+        tcg_abort();
     }
 #endif
 }
@@ -1116,7 +1208,8 @@
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
 {
-    int i, op_index, op, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
+    int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
+    TCGOpcode op;
     TCGArg *args;
     const TCGOpDef *def;
     uint8_t *dead_temps;
@@ -1271,7 +1364,7 @@
 }
 #else
 /* dummy liveness analysis */
-void tcg_liveness_analysis(TCGContext *s)
+static void tcg_liveness_analysis(TCGContext *s)
 {
     int nb_ops;
     nb_ops = gen_opc_ptr - gen_opc_buf;
@@ -1532,7 +1625,7 @@
                 reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
             }
             if (ts->reg != reg) {
-                tcg_out_mov(s, reg, ts->reg);
+                tcg_out_mov(s, ots->type, reg, ts->reg);
             }
         }
     } else if (ts->val_type == TEMP_VAL_MEM) {
@@ -1564,7 +1657,7 @@
 }
 
 static void tcg_reg_alloc_op(TCGContext *s,
-                             const TCGOpDef *def, int opc,
+                             const TCGOpDef *def, TCGOpcode opc,
                              const TCGArg *args,
                              unsigned int dead_iargs)
 {
@@ -1637,7 +1730,7 @@
             /* allocate a new register matching the constraint
                and move the temporary register into it */
             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
-            tcg_out_mov(s, reg, ts->reg);
+            tcg_out_mov(s, ts->type, reg, ts->reg);
         }
         new_args[i] = reg;
         const_args[i] = 0;
@@ -1719,7 +1812,7 @@
         ts = &s->temps[args[i]];
         reg = new_args[i];
         if (ts->fixed_reg && ts->reg != reg) {
-            tcg_out_mov(s, ts->reg, reg);
+            tcg_out_mov(s, ts->type, ts->reg, reg);
         }
     }
 }
@@ -1731,7 +1824,7 @@
 #endif
 
 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
-                              int opc, const TCGArg *args,
+                              TCGOpcode opc, const TCGArg *args,
                               unsigned int dead_iargs)
 {
     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
@@ -1805,7 +1898,7 @@
             tcg_reg_free(s, reg);
             if (ts->val_type == TEMP_VAL_REG) {
                 if (ts->reg != reg) {
-                    tcg_out_mov(s, reg, ts->reg);
+                    tcg_out_mov(s, ts->type, reg, ts->reg);
                 }
             } else if (ts->val_type == TEMP_VAL_MEM) {
                 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
@@ -1834,7 +1927,7 @@
         reg = ts->reg;
         if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) {
             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
-            tcg_out_mov(s, reg, ts->reg);
+            tcg_out_mov(s, ts->type, reg, ts->reg);
         }
         func_arg = reg;
         tcg_regset_set_reg(allocated_regs, reg);
@@ -1893,7 +1986,7 @@
         assert(s->reg_to_temp[reg] == -1);
         if (ts->fixed_reg) {
             if (ts->reg != reg) {
-                tcg_out_mov(s, ts->reg, reg);
+                tcg_out_mov(s, ts->type, ts->reg, reg);
             }
         } else {
             if (ts->val_type == TEMP_VAL_REG)
@@ -1912,7 +2005,7 @@
 
 static int64_t tcg_table_op_count[NB_OPS];
 
-void dump_op_count(void)
+static void dump_op_count(void)
 {
     int i;
     FILE *f;
@@ -1928,7 +2021,8 @@
 static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
                                       long search_pc)
 {
-    int opc, op_index;
+    TCGOpcode opc;
+    int op_index;
     const TCGOpDef *def;
     unsigned int dead_iargs;
     const TCGArg *args;
@@ -2109,8 +2203,7 @@
 }
 
 #ifdef CONFIG_PROFILER
-void tcg_dump_info(FILE *f,
-                   int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
     TCGContext *s = &tcg_ctx;
     int64_t tot;
@@ -2154,8 +2247,7 @@
     dump_op_count();
 }
 #else
-void tcg_dump_info(FILE *f,
-                   int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
     cpu_fprintf(f, "[TCG profiler not compiled]\n");
 }
diff --git a/tcg/tcg.h b/tcg/tcg.h
index cf3a508..3fab8d6 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -47,12 +47,12 @@
 #error unsupported
 #endif
 
-enum {
-#define DEF(s, n, copy_size) INDEX_op_ ## s,
+typedef enum TCGOpcode {
+#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name,
 #include "tcg-opc.h"
 #undef DEF
     NB_OPS,
-};
+} TCGOpcode;
 
 #define tcg_regset_clear(d) (d) = 0
 #define tcg_regset_set(d, s) (d) = (s)
@@ -96,18 +96,30 @@
    this value, they are statically allocated in the TB stack frame */
 #define TCG_STATIC_CALL_ARGS_SIZE 128
 
-typedef int TCGType;
+typedef enum TCGType {
+    TCG_TYPE_I32,
+    TCG_TYPE_I64,
+    TCG_TYPE_COUNT, /* number of different types */
 
-#define TCG_TYPE_I32 0
-#define TCG_TYPE_I64 1
-#define TCG_TYPE_COUNT 2 /* number of different types */
-
+    /* An alias for the size of the host register.  */
 #if TCG_TARGET_REG_BITS == 32
-#define TCG_TYPE_PTR TCG_TYPE_I32
+    TCG_TYPE_REG = TCG_TYPE_I32,
 #else
-#define TCG_TYPE_PTR TCG_TYPE_I64
+    TCG_TYPE_REG = TCG_TYPE_I64,
 #endif
 
+    /* An alias for the size of the native pointer.  We don't currently
+       support any hosts with 64-bit registers and 32-bit pointers.  */
+    TCG_TYPE_PTR = TCG_TYPE_REG,
+
+    /* An alias for the size of the target "long", aka register.  */
+#if TARGET_LONG_BITS == 64
+    TCG_TYPE_TL = TCG_TYPE_I64,
+#else
+    TCG_TYPE_TL = TCG_TYPE_I32,
+#endif
+} TCGType;
+
 typedef tcg_target_ulong TCGArg;
 
 /* Define a type and accessor macros for varables.  Using a struct is
@@ -311,6 +323,10 @@
     int64_t restore_count;
     int64_t restore_time;
 #endif
+
+#ifdef CONFIG_DEBUG_TCG
+    int temps_in_use;
+#endif
 };
 
 extern TCGContext tcg_ctx;
@@ -341,6 +357,7 @@
 }
 
 void tcg_context_init(TCGContext *s);
+void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
 int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf);
@@ -379,8 +396,20 @@
 void tcg_temp_free_i64(TCGv_i64 arg);
 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
 
-void tcg_dump_info(FILE *f,
-                   int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
+#if defined(CONFIG_DEBUG_TCG)
+/* If you call tcg_clear_temp_count() at the start of a section of
+ * code which is not supposed to leak any TCG temporaries, then
+ * calling tcg_check_temp_count() at the end of the section will
+ * return 1 if the section did in fact leak a temporary.
+ */
+void tcg_clear_temp_count(void);
+int tcg_check_temp_count(void);
+#else
+#define tcg_clear_temp_count() do { } while (0)
+#define tcg_check_temp_count() 0
+#endif
+
+void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf);
 
 #define TCG_CT_ALIAS  0x80
 #define TCG_CT_IALIAS 0x40
@@ -409,7 +438,6 @@
     const char *name;
     uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args;
     uint8_t flags;
-    uint16_t copy_size;
     TCGArgConstraint *args_ct;
     int *sorted_args;
 #if defined(CONFIG_DEBUG_TCG)
@@ -418,13 +446,10 @@
 } TCGOpDef;
         
 typedef struct TCGTargetOpDef {
-    int op;
+    TCGOpcode op;
     const char *args_ct_str[TCG_MAX_OP_ARGS];
 } TCGTargetOpDef;
 
-void tcg_target_init(TCGContext *s);
-void tcg_target_qemu_prologue(TCGContext *s);
-
 #define tcg_abort() \
 do {\
     fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\
@@ -472,9 +497,6 @@
 TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
-void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, 
-                   int label_index, long addend);
-
 extern uint8_t code_gen_prologue[];
 #if defined(_ARCH_PPC) && !defined(_ARCH_PPC64)
 #define tcg_qemu_tb_exec(tb_ptr) \
diff --git a/telephony/sysdeps_qemu.c b/telephony/sysdeps_qemu.c
index e464597..6aa5038 100644
--- a/telephony/sysdeps_qemu.c
+++ b/telephony/sysdeps_qemu.c
@@ -41,7 +41,7 @@
 SysTime
 sys_time_ms( void )
 {
-    return qemu_get_clock( rt_clock );
+    return qemu_get_clock_ms(rt_clock);
 }
 
 /** TIMERS
@@ -126,7 +126,7 @@
          qemu_free_timer( timer->timer );
     }
 
-    timer->timer    = qemu_new_timer( rt_clock, callback, opaque );
+    timer->timer    = qemu_new_timer_ms( rt_clock, callback, opaque );
     timer->callback = callback;
     timer->opaque   = opaque;
 
@@ -320,7 +320,7 @@
 
     channel->fd = socket_anyaddr_server( port, SOCKET_STREAM );
     if (channel->fd < 0) {
-        D( "%s: failed to created network socket on TCP:%d\n", 
+        D( "%s: failed to created network socket on TCP:%d\n",
             __FUNCTION__, port );
         sys_channel_free( channel );
         return NULL;
diff --git a/trace.c b/trace.c
index 9eb8f5f..caf5878 100644
--- a/trace.c
+++ b/trace.c
@@ -22,10 +22,106 @@
 #include <time.h>
 #include "cpu.h"
 #include "exec-all.h"
-#include "trace.h"
+#include "android-trace.h"
 #include "varint.h"
 #include "android/utils/path.h"
 
+// For tracing dynamic execution of basic blocks
+typedef struct TraceBB {
+    char        *filename;
+    FILE        *fstream;
+    BBRec       buffer[kMaxNumBasicBlocks];
+    BBRec       *next;          // points to next record in buffer
+    uint64_t    flush_time;     // time of last buffer flush
+    char        compressed[kCompressedSize];
+    char        *compressed_ptr;
+    char        *high_water_ptr;
+    int64_t     prev_bb_num;
+    uint64_t    prev_bb_time;
+    uint64_t    current_bb_num;
+    uint64_t    current_bb_start_time;
+    uint64_t    recnum;         // counts number of trace records
+    uint32_t    current_bb_addr;
+    int         num_insns;
+} TraceBB;
+
+// For tracing simuation start times of instructions
+typedef struct TraceInsn {
+    char        *filename;
+    FILE        *fstream;
+    InsnRec     dummy;          // this is here so we can use buffer[-1]
+    InsnRec     buffer[kInsnBufferSize];
+    InsnRec     *current;
+    uint64_t    prev_time;      // time of last instruction start
+    char        compressed[kCompressedSize];
+    char        *compressed_ptr;
+    char        *high_water_ptr;
+} TraceInsn;
+
+// For tracing the static information about a basic block
+typedef struct TraceStatic {
+    char        *filename;
+    FILE        *fstream;
+    uint32_t    insns[kMaxInsnPerBB];
+    int         next_insn;
+    uint64_t    bb_num;
+    uint32_t    bb_addr;
+    int         is_thumb;
+} TraceStatic;
+
+// For tracing load and store addresses
+typedef struct TraceAddr {
+    char        *filename;
+    FILE        *fstream;
+    AddrRec     buffer[kMaxNumAddrs];
+    AddrRec     *next;
+    char        compressed[kCompressedSize];
+    char        *compressed_ptr;
+    char        *high_water_ptr;
+    uint32_t    prev_addr;
+    uint64_t    prev_time;
+} TraceAddr;
+
+// For tracing exceptions
+typedef struct TraceExc {
+    char        *filename;
+    FILE        *fstream;
+    char        compressed[kCompressedSize];
+    char        *compressed_ptr;
+    char        *high_water_ptr;
+    uint64_t    prev_time;
+    uint64_t    prev_bb_recnum;
+} TraceExc;
+
+// For tracing process id changes
+typedef struct TracePid {
+    char        *filename;
+    FILE        *fstream;
+    char        compressed[kCompressedSize];
+    char        *compressed_ptr;
+    uint64_t    prev_time;
+} TracePid;
+
+// For tracing Dalvik VM method enter and exit
+typedef struct TraceMethod {
+    char        *filename;
+    FILE        *fstream;
+    char        compressed[kCompressedSize];
+    char        *compressed_ptr;
+    uint64_t    prev_time;
+    uint32_t    prev_addr;
+    int32_t     prev_pid;
+} TraceMethod;
+
+extern TraceBB trace_bb;
+extern TraceInsn trace_insn;
+extern TraceStatic trace_static;
+extern TraceAddr trace_load;
+extern TraceAddr trace_store;
+extern TraceExc trace_exc;
+extern TracePid trace_pid;
+extern TraceMethod trace_method;
+
 TraceBB trace_bb;
 TraceInsn trace_insn;
 TraceStatic trace_static;
@@ -1878,3 +1974,8 @@
     comp_ptr = varint_encode(call_type, comp_ptr);
     trace_method.compressed_ptr = comp_ptr;
 }
+
+uint64_t trace_static_bb_num(void)
+{
+    return trace_static.bb_num;
+}
diff --git a/trace.h b/trace.h
index 7bf4b82..e69de29 100644
--- a/trace.h
+++ b/trace.h
@@ -1,159 +0,0 @@
-/* Copyright (C) 2006-2007 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#ifndef TRACE_H
-#define TRACE_H
-
-#include <inttypes.h>
-#include "trace_common.h"
-
-extern uint64_t start_time, end_time;
-extern uint64_t elapsed_usecs;
-extern uint64 Now();
-
-struct TranslationBlock;
-
-// For tracing dynamic execution of basic blocks
-typedef struct TraceBB {
-    char	*filename;
-    FILE	*fstream;
-    BBRec	buffer[kMaxNumBasicBlocks];
-    BBRec	*next;		// points to next record in buffer
-    uint64_t	flush_time;	// time of last buffer flush
-    char	compressed[kCompressedSize];
-    char	*compressed_ptr;
-    char	*high_water_ptr;
-    int64_t	prev_bb_num;
-    uint64_t	prev_bb_time;
-    uint64_t	current_bb_num;
-    uint64_t	current_bb_start_time;
-    uint64_t	recnum;		// counts number of trace records
-    uint32_t	current_bb_addr;
-    int		num_insns;
-} TraceBB;
-
-// For tracing simuation start times of instructions
-typedef struct TraceInsn {
-    char	*filename;
-    FILE	*fstream;
-    InsnRec	dummy;		// this is here so we can use buffer[-1]
-    InsnRec	buffer[kInsnBufferSize];
-    InsnRec	*current;
-    uint64_t	prev_time;	// time of last instruction start
-    char	compressed[kCompressedSize];
-    char	*compressed_ptr;
-    char	*high_water_ptr;
-} TraceInsn;
-
-// For tracing the static information about a basic block
-typedef struct TraceStatic {
-    char	*filename;
-    FILE	*fstream;
-    uint32_t	insns[kMaxInsnPerBB];
-    int		next_insn;
-    uint64_t	bb_num;
-    uint32_t	bb_addr;
-    int		is_thumb;
-} TraceStatic;
-
-// For tracing load and store addresses
-typedef struct TraceAddr {
-    char	*filename;
-    FILE	*fstream;
-    AddrRec	buffer[kMaxNumAddrs];
-    AddrRec	*next;
-    char	compressed[kCompressedSize];
-    char	*compressed_ptr;
-    char	*high_water_ptr;
-    uint32_t	prev_addr;
-    uint64_t	prev_time;
-} TraceAddr;
-
-// For tracing exceptions
-typedef struct TraceExc {
-    char	*filename;
-    FILE	*fstream;
-    char	compressed[kCompressedSize];
-    char	*compressed_ptr;
-    char	*high_water_ptr;
-    uint64_t	prev_time;
-    uint64_t	prev_bb_recnum;
-} TraceExc;
-
-// For tracing process id changes
-typedef struct TracePid {
-    char	*filename;
-    FILE	*fstream;
-    char	compressed[kCompressedSize];
-    char	*compressed_ptr;
-    uint64_t	prev_time;
-} TracePid;
-
-// For tracing Dalvik VM method enter and exit
-typedef struct TraceMethod {
-    char	*filename;
-    FILE	*fstream;
-    char	compressed[kCompressedSize];
-    char	*compressed_ptr;
-    uint64_t	prev_time;
-    uint32_t	prev_addr;
-    int32_t	prev_pid;
-} TraceMethod;
-
-extern TraceBB trace_bb;
-extern TraceInsn trace_insn;
-extern TraceStatic trace_static;
-extern TraceAddr trace_load;
-extern TraceAddr trace_store;
-extern TraceExc trace_exc;
-extern TracePid trace_pid;
-extern TraceMethod trace_method;
-
-// The simulated time, in clock ticks, starting with one.
-extern uint64_t sim_time;
-
-// This variable == 1 if we are currently tracing, otherwise == 0.
-extern int tracing;
-extern int trace_all_addr;
-extern int trace_cache_miss;
-
-extern void start_tracing();
-extern void stop_tracing();
-extern void trace_init(const char *filename);
-extern void trace_bb_start(uint32_t bb_addr);
-extern void trace_add_insn(uint32_t insn, int is_thumb);
-extern void trace_bb_end();
-
-extern int get_insn_ticks_arm(uint32_t insn);
-extern int get_insn_ticks_thumb(uint32_t  insn);
-
-extern void trace_exception(uint32 pc);
-extern void trace_bb_helper(uint64_t bb_num, TranslationBlock *tb);
-extern void trace_insn_helper();
-extern void sim_dcache_load(uint32_t addr);
-extern void sim_dcache_store(uint32_t addr, uint32_t val);
-extern void sim_dcache_swp(uint32_t addr);
-extern void trace_interpreted_method(uint32_t addr, int call_type);
-
-extern const char *trace_filename;
-extern int tracing;
-extern int trace_cache_miss;
-extern int trace_all_addr;
-
-// Trace process/thread operations
-extern void trace_switch(int pid);
-extern void trace_fork(int tgid, int pid);
-extern void trace_clone(int tgid, int pid);
-extern void trace_exit(int exitcode);
-extern void trace_name(char *name);
-
-#endif /* TRACE_H */
diff --git a/trace_common.h b/trace_common.h
index fe84c1a..b2a2188 100644
--- a/trace_common.h
+++ b/trace_common.h
@@ -1,142 +1,4 @@
-/* Copyright (C) 2006-2007 The Android Open Source Project
-**
-** This software is licensed under the terms of the GNU General Public
-** License version 2, as published by the Free Software Foundation, and
-** may be copied, distributed, and modified under those terms.
-**
-** This program is distributed in the hope that it will be useful,
-** but WITHOUT ANY WARRANTY; without even the implied warranty of
-** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-** GNU General Public License for more details.
-*/
-
-#ifndef TRACE_COMMON_H
-#define TRACE_COMMON_H
-
-#include <inttypes.h>
-
-// This should be the same as OPC_BUF_SIZE
-#define kMaxInsnPerBB 512
-
-#define kMaxNumBasicBlocks 1024
-
-#define kMaxNumAddrs 1024
-
-#define kInsnBufferSize 1024
-
-#define kCompressedSize 8192
-
-#define kMethodEnter		0
-#define kMethodExit		1
-#define kMethodException	2
-#define kNativeEnter		4
-#define kNativeExit		5
-#define kNativeException	6
-
-// The trace identifier string must be less than 16 characters.
-#define TRACE_IDENT "qemu_trace_file"
-#define TRACE_VERSION 2
-
-typedef struct TraceHeader {
-    char	ident[16];
-    int		version;
-    uint32_t	start_sec;
-    uint32_t	start_usec;
-    uint32_t	pdate;
-    uint32_t	ptime;
-    uint32_t	num_used_pids;		// number of distinct process ids used
-    int		first_unused_pid;	// -1 if all 32,768 pids are used (unlikely)
-    uint8_t	padding[4];		// next field is 8-byte aligned
-    uint64_t	num_static_bb;
-    uint64_t	num_static_insn;
-    uint64_t	num_dynamic_bb;
-    uint64_t	num_dynamic_insn;
-    uint64_t	elapsed_usecs;
-} TraceHeader;
-
-typedef struct BBRec {
-    uint64_t	start_time;	// time of first occurrence
-    uint64_t	bb_num;		// basic block number
-    uint32_t	repeat;		// repeat count (= 0 if just one occurrence)
-    uint64_t	time_diff;	// diff from previous time (if repeat > 0)
-} BBRec;
-
-// Define a trace record for addresses that miss in the cache
-typedef struct AddrRec {
-    uint64_t	time;
-    uint32_t	addr;
-} AddrRec;
-
-// Define a trace record for the start time of each instruction
-typedef struct InsnRec {
-    uint64_t	time_diff;	// time difference from last instruction
-    uint32_t	repeat;		// repeat count
-} InsnRec;
-
-// Define record types for process id changes.
-#define kPidEndOfFile		0
-#define kPidFork		1
-#define kPidClone		2
-#define kPidSwitch		3
-#define kPidExec		4
-#define kPidMmap		5
-#define kPidExit		6
-#define kPidKthreadName		7
-#define kPidSymbolAdd		8
-#define kPidSymbolRemove	9
-#define kPidMunmap		10
-#define kPidNoAction		11
-#define kPidName		12
-
-#define bswap16(x) ((((x) & 0xff) << 8) | (((x) >> 8) & 0xff))
-
-#define bswap32(x) ((((x) & 0xff) << 24) | (((x) & 0xff00) << 8) \
-        | (((x) >> 8) & 0xff00) | (((x) >> 24) & 0xff))
-
-#define bswap64(x) (((x) << 56) | (((x) & 0xff00) << 40) \
-        | (((x) & 0xff0000) << 24) | (((x) & 0xff000000ull) << 8) \
-        | (((x) >> 8) & 0xff000000ull) | (((x) >> 24) & 0xff0000) \
-        | (((x) >> 40) & 0xff00) | ((x) >> 56))
-
-#if BYTE_ORDER == LITTLE_ENDIAN
-#define hostToLE16(x)	(x)
-#define hostToLE32(x)	(x)
-#define hostToLE64(x)	(x)
-#define LE16ToHost(x)	(x)
-#define LE32ToHost(x)	(x)
-#define LE64ToHost(x)	(x)
-#define convert16(x)
-#define convert32(x)
-#define convert64(x)
-#else
-#define hostToLE16(x)	bswap16(x)
-#define hostToLE32(x)	bswap32(x)
-#define hostToLE64(x)	bswap64(x)
-#define LE16ToHost(x)	bswap16(x)
-#define LE32ToHost(x)	bswap32(x)
-#define LE64ToHost(x)	bswap64(x)
-#define convert16(x) (x = bswap16(x))
-#define convert32(x) (x = bswap32(x))
-#define convert64(x) (x = bswap64(x))
-#endif
-
-/* XXX: we wrap 16-bit thumb instructions into 32-bit undefined ARM instructions
- *      for simplicity reasons. See section 3.13.1 section of the ARM ARM for details
- *      on the undefined instruction space we're using
+/* This file should be removed once we update the Android qtools to
+ * include 'android-trace_common.h' directly.
  */
-static __inline__ int   insn_is_thumb(uint32_t   insn)
-{
-    return ((insn & 0xfff000f0) == 0xf7f000f0);
-}
-
-static __inline__ uint32_t  insn_wrap_thumb(uint32_t  insn)
-{
-    return 0xf7f000f0 | ((insn & 0xfff0) << 4) | (insn & 0x000f);
-}
-
-static __inline__ uint32_t  insn_unwrap_thumb(uint32_t  insn)
-{
-    return ((insn >> 4) & 0xfff0) | (insn & 0x000f);
-}
-
-#endif /* TRACE_COMMON_H */
+#include "android-trace_common.h"
diff --git a/translate-all.c b/translate-all.c
index 3830007..ed174e7 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -72,9 +72,11 @@
 
     if (max == 0) {
         max = TCG_MAX_OP_SIZE;
-#define DEF(s, n, copy_size) max = copy_size > max? copy_size : max;
+#define DEF(name, iarg, oarg, carg, flags) DEF2((iarg) + (oarg) + (carg))
+#define DEF2(copy_size) max = (copy_size > max) ? copy_size : max;
 #include "tcg-opc.h"
 #undef DEF
+#undef DEF2
         max *= OPC_MAX_SIZE;
     }
 
@@ -169,8 +171,7 @@
 /* The cpu state corresponding to 'searched_pc' is restored.
  */
 int cpu_restore_state(TranslationBlock *tb,
-                      CPUState *env, unsigned long searched_pc,
-                      void *puc)
+                      CPUState *env, unsigned long searched_pc)
 {
     TCGContext *s = &tcg_ctx;
     int j;
@@ -214,7 +215,7 @@
         j--;
     env->icount_decr.u16.low -= gen_opc_icount[j];
 
-    gen_pc_load(env, tb, searched_pc, j, puc);
+    restore_state_to_opc(env, tb, j);
 
 #ifdef CONFIG_PROFILER
     s->restore_time += profile_getclock() - ti;
diff --git a/usb-linux.c b/usb-linux.c
index d8610e8..51aee0c 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -1374,7 +1374,7 @@
 static void usb_host_auto_timer(void *unused)
 {
     usb_host_scan(NULL, usb_host_auto_scan);
-    qemu_mod_timer(usb_auto_timer, qemu_get_clock(rt_clock) + 2000);
+    qemu_mod_timer(usb_auto_timer, qemu_get_clock_ms(rt_clock) + 2000);
 }
 
 /*
@@ -1452,7 +1452,7 @@
          * If this turns out to be too expensive we can move that into a
          * separate thread.
          */
-	usb_auto_timer = qemu_new_timer(rt_clock, usb_host_auto_timer, NULL);
+	usb_auto_timer = qemu_new_timer_ms(rt_clock, usb_host_auto_timer, NULL);
 	if (!usb_auto_timer) {
             fprintf(stderr, "husb: failed to allocate auto scan timer\n");
             qemu_free(f);
@@ -1460,7 +1460,7 @@
         }
 
         /* Check for new devices every two seconds */
-        qemu_mod_timer(usb_auto_timer, qemu_get_clock(rt_clock) + 2000);
+        qemu_mod_timer(usb_auto_timer, qemu_get_clock_ms(rt_clock) + 2000);
     }
 
     dprintf("husb: added auto filter: bus_num %d addr %d vid %d pid %d\n",
diff --git a/vl-android.c b/vl-android.c
index c240a44..8436d68 100644
--- a/vl-android.c
+++ b/vl-android.c
@@ -85,7 +85,6 @@
 
 #ifndef _WIN32
 #include <libgen.h>
-#include <pwd.h>
 #include <sys/times.h>
 #include <sys/wait.h>
 #include <termios.h>
@@ -163,8 +162,11 @@
 #define memalign(align, size) malloc(size)
 #endif
 
+#include "cpus.h"
+#include "arch_init.h"
 
 #ifdef CONFIG_COCOA
+int qemu_main(int argc, char **argv, char **envp);
 #undef main
 #define main qemu_main
 #endif /* CONFIG_COCOA */
@@ -174,7 +176,6 @@
 #include "hw/usb.h"
 #include "hw/pcmcia.h"
 #include "hw/pc.h"
-#include "hw/audiodev.h"
 #include "hw/isa.h"
 #include "hw/baum.h"
 #include "hw/bt.h"
@@ -222,11 +223,8 @@
 
 #include "disas.h"
 
-#include "exec-all.h"
-
 #ifdef CONFIG_TRACE
-#include "trace.h"
-#include "dcache.h"
+#include "android-trace.h"
 #endif
 
 #include "qemu_socket.h"
@@ -263,6 +261,10 @@
 const char* keyboard_layout = NULL;
 int64_t ticks_per_sec;
 ram_addr_t ram_size;
+const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 0; /* force preallocation of physical target memory */
+#endif
 int nb_nics;
 NICInfo nd_table[MAX_NICS];
 int vm_running;
@@ -274,15 +276,6 @@
 int vmsvga_enabled = 0;
 int xenfb_enabled = 0;
 QEMUClock *rtc_clock;
-#ifdef TARGET_SPARC
-int graphic_width = 1024;
-int graphic_height = 768;
-int graphic_depth = 8;
-#else
-int graphic_width = 800;
-int graphic_height = 600;
-int graphic_depth = 15;
-#endif
 static int full_screen = 0;
 #ifdef CONFIG_SDL
 static int no_frame = 0;
@@ -309,9 +302,6 @@
 int no_shutdown = 0;
 int cursor_hide = 1;
 int graphic_rotate = 0;
-#ifndef _WIN32
-int daemonize = 0;
-#endif
 WatchdogTimerModel *watchdog = NULL;
 int watchdog_action = WDT_RESET;
 const char *option_rom[MAX_OPTION_ROMS];
@@ -334,8 +324,6 @@
 uint64_t node_mem[MAX_NODES];
 uint64_t node_cpumask[MAX_NODES];
 
-static CPUState *cur_cpu;
-static CPUState *next_cpu;
 static QEMUTimer *nographic_timer;
 
 uint8_t qemu_uuid[16];
@@ -567,28 +555,6 @@
   return new_log_fd;
 }
 
-/***********************************************************/
-void hw_error(const char *fmt, ...)
-{
-    va_list ap;
-    CPUState *env;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "qemu: hardware error: ");
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
-#ifdef TARGET_I386
-        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
-#else
-        cpu_dump_state(env, stderr, fprintf, 0);
-#endif
-    }
-    va_end(ap);
-    abort();
-}
-
 /***************/
 /* ballooning */
 
@@ -615,33 +581,6 @@
 }
 
 /***********************************************************/
-/* real time host monotonic timer */
-
-/* compute with 96 bit intermediate result: (a*b)/c */
-uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
-{
-    union {
-        uint64_t ll;
-        struct {
-#ifdef HOST_WORDS_BIGENDIAN
-            uint32_t high, low;
-#else
-            uint32_t low, high;
-#endif
-        } l;
-    } u, res;
-    uint64_t rl, rh;
-
-    u.ll = a;
-    rl = (uint64_t)u.l.low * (uint64_t)b;
-    rh = (uint64_t)u.l.high * (uint64_t)b;
-    rh += (rl >> 32);
-    res.l.high = rh / c;
-    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
-    return res.ll;
-}
-
-/***********************************************************/
 /* host time/date access */
 void qemu_get_timedate(struct tm *tm, int offset)
 {
@@ -680,7 +619,7 @@
 
 
 #ifdef CONFIG_TRACE
-static int tbflush_requested;
+int tbflush_requested;
 static int exit_requested;
 
 void start_tracing()
@@ -1750,468 +1689,6 @@
 }
 
 /***********************************************************/
-/* I/O handling */
-
-typedef struct IOHandlerRecord {
-    int fd;
-    IOCanReadHandler *fd_read_poll;
-    IOHandler *fd_read;
-    IOHandler *fd_write;
-    int deleted;
-    void *opaque;
-    /* temporary data */
-    struct pollfd *ufd;
-    struct IOHandlerRecord *next;
-} IOHandlerRecord;
-
-static IOHandlerRecord *first_io_handler;
-
-/* XXX: fd_read_poll should be suppressed, but an API change is
-   necessary in the character devices to suppress fd_can_read(). */
-int qemu_set_fd_handler2(int fd,
-                         IOCanReadHandler *fd_read_poll,
-                         IOHandler *fd_read,
-                         IOHandler *fd_write,
-                         void *opaque)
-{
-    IOHandlerRecord **pioh, *ioh;
-
-    if (!fd_read && !fd_write) {
-        pioh = &first_io_handler;
-        for(;;) {
-            ioh = *pioh;
-            if (ioh == NULL)
-                break;
-            if (ioh->fd == fd) {
-                ioh->deleted = 1;
-                break;
-            }
-            pioh = &ioh->next;
-        }
-    } else {
-        for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
-            if (ioh->fd == fd)
-                goto found;
-        }
-        ioh = qemu_mallocz(sizeof(IOHandlerRecord));
-        ioh->next = first_io_handler;
-        first_io_handler = ioh;
-    found:
-        ioh->fd = fd;
-        ioh->fd_read_poll = fd_read_poll;
-        ioh->fd_read = fd_read;
-        ioh->fd_write = fd_write;
-        ioh->opaque = opaque;
-        ioh->deleted = 0;
-    }
-    return 0;
-}
-
-int qemu_set_fd_handler(int fd,
-                        IOHandler *fd_read,
-                        IOHandler *fd_write,
-                        void *opaque)
-{
-    return qemu_set_fd_handler2(fd, NULL, fd_read, fd_write, opaque);
-}
-
-#ifdef _WIN32
-/***********************************************************/
-/* Polling handling */
-
-typedef struct PollingEntry {
-    PollingFunc *func;
-    void *opaque;
-    struct PollingEntry *next;
-} PollingEntry;
-
-static PollingEntry *first_polling_entry;
-
-int qemu_add_polling_cb(PollingFunc *func, void *opaque)
-{
-    PollingEntry **ppe, *pe;
-    pe = qemu_mallocz(sizeof(PollingEntry));
-    pe->func = func;
-    pe->opaque = opaque;
-    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
-    *ppe = pe;
-    return 0;
-}
-
-void qemu_del_polling_cb(PollingFunc *func, void *opaque)
-{
-    PollingEntry **ppe, *pe;
-    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
-        pe = *ppe;
-        if (pe->func == func && pe->opaque == opaque) {
-            *ppe = pe->next;
-            qemu_free(pe);
-            break;
-        }
-    }
-}
-
-/***********************************************************/
-/* Wait objects support */
-typedef struct WaitObjects {
-    int num;
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
-    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
-} WaitObjects;
-
-static WaitObjects wait_objects = {0};
-
-int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
-{
-    WaitObjects *w = &wait_objects;
-
-    if (w->num >= MAXIMUM_WAIT_OBJECTS)
-        return -1;
-    w->events[w->num] = handle;
-    w->func[w->num] = func;
-    w->opaque[w->num] = opaque;
-    w->num++;
-    return 0;
-}
-
-void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
-{
-    int i, found;
-    WaitObjects *w = &wait_objects;
-
-    found = 0;
-    for (i = 0; i < w->num; i++) {
-        if (w->events[i] == handle)
-            found = 1;
-        if (found) {
-            w->events[i] = w->events[i + 1];
-            w->func[i] = w->func[i + 1];
-            w->opaque[i] = w->opaque[i + 1];
-        }
-    }
-    if (found)
-        w->num--;
-}
-#endif
-
-/***********************************************************/
-/* ram save/restore */
-
-static int ram_get_page(QEMUFile *f, uint8_t *buf, int len)
-{
-    int v;
-
-    v = qemu_get_byte(f);
-    switch(v) {
-    case 0:
-        if (qemu_get_buffer(f, buf, len) != len)
-            return -EIO;
-        break;
-    case 1:
-        v = qemu_get_byte(f);
-        memset(buf, v, len);
-        break;
-    default:
-        return -EINVAL;
-    }
-
-    if (qemu_file_has_error(f))
-        return -EIO;
-
-    return 0;
-}
-
-static int ram_load_v1(QEMUFile *f, void *opaque)
-{
-    int ret;
-    ram_addr_t i;
-
-    if (qemu_get_be32(f) != last_ram_offset)
-        return -EINVAL;
-    for(i = 0; i < last_ram_offset; i+= TARGET_PAGE_SIZE) {
-        ret = ram_get_page(f, qemu_get_ram_ptr(i), TARGET_PAGE_SIZE);
-        if (ret)
-            return ret;
-    }
-    return 0;
-}
-
-#define BDRV_HASH_BLOCK_SIZE 1024
-#define IOBUF_SIZE 4096
-#define RAM_CBLOCK_MAGIC 0xfabe
-
-typedef struct RamDecompressState {
-    z_stream zstream;
-    QEMUFile *f;
-    uint8_t buf[IOBUF_SIZE];
-} RamDecompressState;
-
-static int ram_decompress_open(RamDecompressState *s, QEMUFile *f)
-{
-    int ret;
-    memset(s, 0, sizeof(*s));
-    s->f = f;
-    ret = inflateInit(&s->zstream);
-    if (ret != Z_OK)
-        return -1;
-    return 0;
-}
-
-static int ram_decompress_buf(RamDecompressState *s, uint8_t *buf, int len)
-{
-    int ret, clen;
-
-    s->zstream.avail_out = len;
-    s->zstream.next_out = buf;
-    while (s->zstream.avail_out > 0) {
-        if (s->zstream.avail_in == 0) {
-            if (qemu_get_be16(s->f) != RAM_CBLOCK_MAGIC)
-                return -1;
-            clen = qemu_get_be16(s->f);
-            if (clen > IOBUF_SIZE)
-                return -1;
-            qemu_get_buffer(s->f, s->buf, clen);
-            s->zstream.avail_in = clen;
-            s->zstream.next_in = s->buf;
-        }
-        ret = inflate(&s->zstream, Z_PARTIAL_FLUSH);
-        if (ret != Z_OK && ret != Z_STREAM_END) {
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static void ram_decompress_close(RamDecompressState *s)
-{
-    inflateEnd(&s->zstream);
-}
-
-#define RAM_SAVE_FLAG_FULL	0x01
-#define RAM_SAVE_FLAG_COMPRESS	0x02
-#define RAM_SAVE_FLAG_MEM_SIZE	0x04
-#define RAM_SAVE_FLAG_PAGE	0x08
-#define RAM_SAVE_FLAG_EOS	0x10
-
-static int is_dup_page(uint8_t *page, uint8_t ch)
-{
-    uint32_t val = ch << 24 | ch << 16 | ch << 8 | ch;
-    uint32_t *array = (uint32_t *)page;
-    int i;
-
-    for (i = 0; i < (TARGET_PAGE_SIZE / 4); i++) {
-        if (array[i] != val)
-            return 0;
-    }
-
-    return 1;
-}
-
-static int ram_save_block(QEMUFile *f)
-{
-    static ram_addr_t current_addr = 0;
-    ram_addr_t saved_addr = current_addr;
-    ram_addr_t addr = 0;
-    int found = 0;
-
-    while (addr < last_ram_offset) {
-        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
-            uint8_t *p;
-
-            cpu_physical_memory_reset_dirty(current_addr,
-                                            current_addr + TARGET_PAGE_SIZE,
-                                            MIGRATION_DIRTY_FLAG);
-
-            p = qemu_get_ram_ptr(current_addr);
-
-            if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-            } else {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-            }
-
-            found = 1;
-            break;
-        }
-        addr += TARGET_PAGE_SIZE;
-        current_addr = (saved_addr + addr) % last_ram_offset;
-    }
-
-    return found;
-}
-
-static uint64_t bytes_transferred = 0;
-
-static ram_addr_t ram_save_remaining(void)
-{
-    ram_addr_t addr;
-    ram_addr_t count = 0;
-
-    for (addr = 0; addr < last_ram_offset; addr += TARGET_PAGE_SIZE) {
-        if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
-            count++;
-    }
-
-    return count;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
-    return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
-    return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
-    return last_ram_offset;
-}
-
-static int ram_save_live(QEMUFile *f, int stage, void *opaque)
-{
-    ram_addr_t addr;
-    uint64_t bytes_transferred_last;
-    double bwidth = 0;
-    uint64_t expected_time = 0;
-
-    cpu_physical_sync_dirty_bitmap(0, TARGET_PHYS_ADDR_MAX);
-
-    if (stage == 1) {
-        /* Make sure all dirty bits are set */
-        for (addr = 0; addr < last_ram_offset; addr += TARGET_PAGE_SIZE) {
-            if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
-                cpu_physical_memory_set_dirty(addr);
-        }
-
-        /* Enable dirty memory tracking */
-        cpu_physical_memory_set_dirty_tracking(1);
-
-        qemu_put_be64(f, last_ram_offset | RAM_SAVE_FLAG_MEM_SIZE);
-    }
-
-    bytes_transferred_last = bytes_transferred;
-    bwidth = qemu_get_clock_ns(rt_clock);
-
-    while (!qemu_file_rate_limit(f)) {
-        int ret;
-
-        ret = ram_save_block(f);
-        bytes_transferred += ret * TARGET_PAGE_SIZE;
-        if (ret == 0) /* no more blocks */
-            break;
-    }
-
-    bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
-    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
-
-    /* if we haven't transferred anything this round, force expected_time to a
-     * a very high value, but without crashing */
-    if (bwidth == 0)
-        bwidth = 0.000001;
-
-    /* try transferring iterative blocks of memory */
-
-    if (stage == 3) {
-
-        /* flush all remaining blocks regardless of rate limiting */
-        while (ram_save_block(f) != 0) {
-            bytes_transferred += TARGET_PAGE_SIZE;
-        }
-        cpu_physical_memory_set_dirty_tracking(0);
-    }
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-
-    return (stage == 2) && (expected_time <= migrate_max_downtime());
-}
-
-static int ram_load_dead(QEMUFile *f, void *opaque)
-{
-    RamDecompressState s1, *s = &s1;
-    uint8_t buf[10];
-    ram_addr_t i;
-
-    if (ram_decompress_open(s, f) < 0)
-        return -EINVAL;
-    for(i = 0; i < last_ram_offset; i+= BDRV_HASH_BLOCK_SIZE) {
-        if (ram_decompress_buf(s, buf, 1) < 0) {
-            fprintf(stderr, "Error while reading ram block header\n");
-            goto error;
-        }
-        if (buf[0] == 0) {
-            if (ram_decompress_buf(s, qemu_get_ram_ptr(i),
-                                   BDRV_HASH_BLOCK_SIZE) < 0) {
-                fprintf(stderr, "Error while reading ram block address=0x%08" PRIx64, (uint64_t)i);
-                goto error;
-            }
-        } else {
-        error:
-            printf("Error block header\n");
-            return -EINVAL;
-        }
-    }
-    ram_decompress_close(s);
-
-    return 0;
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
-    ram_addr_t addr;
-    int flags;
-
-    if (version_id == 1)
-        return ram_load_v1(f, opaque);
-
-    if (version_id == 2) {
-        if (qemu_get_be32(f) != last_ram_offset)
-            return -EINVAL;
-        return ram_load_dead(f, opaque);
-    }
-
-    if (version_id != 3)
-        return -EINVAL;
-
-    do {
-        addr = qemu_get_be64(f);
-
-        flags = addr & ~TARGET_PAGE_MASK;
-        addr &= TARGET_PAGE_MASK;
-
-        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
-            if (addr != last_ram_offset)
-                return -EINVAL;
-        }
-
-        if (flags & RAM_SAVE_FLAG_FULL) {
-            if (ram_load_dead(f, opaque) < 0)
-                return -EINVAL;
-        }
-
-        if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            uint8_t ch = qemu_get_byte(f);
-            memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE);
-        } else if (flags & RAM_SAVE_FLAG_PAGE)
-            qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE);
-    } while (!(flags & RAM_SAVE_FLAG_EOS));
-
-    return 0;
-}
-
-void qemu_service_io(void)
-{
-    qemu_notify_event();
-}
-
-/***********************************************************/
 /* machine registration */
 
 static QEMUMachine *first_machine = NULL;
@@ -2268,14 +1745,14 @@
             interval = dcl->gui_timer_interval;
         dcl = dcl->next;
     }
-    qemu_mod_timer(ds->gui_timer, interval + qemu_get_clock(rt_clock));
+    qemu_mod_timer(ds->gui_timer, interval + qemu_get_clock_ms(rt_clock));
 }
 
 static void nographic_update(void *opaque)
 {
     uint64_t interval = GUI_REFRESH_INTERVAL;
 
-    qemu_mod_timer(nographic_timer, interval + qemu_get_clock(rt_clock));
+    qemu_mod_timer(nographic_timer, interval + qemu_get_clock_ms(rt_clock));
 }
 
 struct vm_change_state_entry {
@@ -2305,7 +1782,7 @@
     qemu_free (e);
 }
 
-static void vm_state_notify(int running, int reason)
+void vm_state_notify(int running, int reason)
 {
     VMChangeStateEntry *e;
 
@@ -2314,9 +1791,6 @@
     }
 }
 
-static void resume_all_vcpus(void);
-static void pause_all_vcpus(void);
-
 void vm_start(void)
 {
     if (!vm_running) {
@@ -2339,9 +1813,10 @@
 
 static QEMUResetEntry *first_reset_entry;
 static int reset_requested;
-static int shutdown_requested;
+static int shutdown_requested, shutdown_signal = -1;
+static pid_t shutdown_pid;
 static int powerdown_requested;
-static int debug_requested;
+int debug_requested;
 static int vmstop_requested;
 
 int qemu_shutdown_requested(void)
@@ -2379,16 +1854,6 @@
     return r;
 }
 
-static void do_vm_stop(int reason)
-{
-    if (vm_running) {
-        cpu_disable_ticks();
-        vm_running = 0;
-        pause_all_vcpus();
-        vm_state_notify(0, reason);
-    }
-}
-
 void qemu_register_reset(QEMUResetHandler *func, int order, void *opaque)
 {
     QEMUResetEntry **pre, *re;
@@ -2425,6 +1890,13 @@
     qemu_notify_event();
 }
 
+void qemu_system_killed(int signal, pid_t pid)
+{
+    shutdown_signal = signal;
+    shutdown_pid = pid;
+    qemu_system_shutdown_request();
+}
+
 void qemu_system_shutdown_request(void)
 {
     shutdown_requested = 1;
@@ -2445,586 +1917,36 @@
 }
 #endif
 
-#ifndef _WIN32
-static int io_thread_fd = -1;
-
-#if 0
-static void qemu_event_increment(void)
-{
-    static const char byte = 0;
-
-    if (io_thread_fd == -1)
-        return;
-
-    write(io_thread_fd, &byte, sizeof(byte));
-}
-#endif
-
-static void qemu_event_read(void *opaque)
-{
-    int fd = (unsigned long)opaque;
-    ssize_t len;
-
-    /* Drain the notify pipe */
-    do {
-        char buffer[512];
-        len = read(fd, buffer, sizeof(buffer));
-    } while ((len == -1 && errno == EINTR) || len > 0);
-}
-
-static int qemu_event_init(void)
-{
-    int err;
-    int fds[2];
-
-    err = pipe(fds);
-    if (err == -1)
-        return -errno;
-
-    err = fcntl_setfl(fds[0], O_NONBLOCK);
-    if (err < 0)
-        goto fail;
-
-    err = fcntl_setfl(fds[1], O_NONBLOCK);
-    if (err < 0)
-        goto fail;
-
-    qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
-                         (void *)(unsigned long)fds[0]);
-
-    io_thread_fd = fds[1];
-    return 0;
-
-fail:
-    close(fds[0]);
-    close(fds[1]);
-    return err;
-}
-#else
-HANDLE qemu_event_handle;
-
-static void dummy_event_handler(void *opaque)
-{
-}
-
-static int qemu_event_init(void)
-{
-    qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
-    if (!qemu_event_handle) {
-        perror("Failed CreateEvent");
-        return -1;
-    }
-    qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
-    return 0;
-}
-
-#if 0
-static void qemu_event_increment(void)
-{
-    SetEvent(qemu_event_handle);
-}
-#endif
-#endif
-
-static int cpu_can_run(CPUState *env)
-{
-    if (env->stop)
-        return 0;
-    if (env->stopped)
-        return 0;
-    return 1;
-}
-
-#ifndef CONFIG_IOTHREAD
-static int qemu_init_main_loop(void)
-{
-    return qemu_event_init();
-}
-
-void qemu_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-
-    if (kvm_enabled())
-        kvm_init_vcpu(env);
-    return;
-}
-
-int qemu_cpu_self(void *env)
-{
-    return 1;
-}
-
-static void resume_all_vcpus(void)
-{
-}
-
-static void pause_all_vcpus(void)
-{
-}
-
-void qemu_cpu_kick(void *env)
-{
-    return;
-}
-
-void qemu_notify_event(void)
-{
-    CPUState *env = cpu_single_env;
-
-    if (env) {
-        cpu_exit(env);
-#ifdef USE_KQEMU
-        if (env->kqemu_enabled)
-            kqemu_cpu_interrupt(env);
-#endif
-     }
-}
-
-#define qemu_mutex_lock_iothread() do { } while (0)
-#define qemu_mutex_unlock_iothread() do { } while (0)
-
-void vm_stop(int reason)
-{
-    do_vm_stop(reason);
-}
-
-#else /* CONFIG_IOTHREAD */
-
-#include "qemu-thread.h"
-
-QemuMutex qemu_global_mutex;
-static QemuMutex qemu_fair_mutex;
-
-static QemuThread io_thread;
-
-static QemuThread *tcg_cpu_thread;
-static QemuCond *tcg_halt_cond;
-
-static int qemu_system_ready;
-/* cpu creation */
-static QemuCond qemu_cpu_cond;
-/* system init */
-static QemuCond qemu_system_cond;
-static QemuCond qemu_pause_cond;
-
-static void block_io_signals(void);
-static void unblock_io_signals(void);
-static int tcg_has_work(void);
-
-static int qemu_init_main_loop(void)
-{
-    int ret;
-
-    ret = qemu_event_init();
-    if (ret)
-        return ret;
-
-    qemu_cond_init(&qemu_pause_cond);
-    qemu_mutex_init(&qemu_fair_mutex);
-    qemu_mutex_init(&qemu_global_mutex);
-    qemu_mutex_lock(&qemu_global_mutex);
-
-    unblock_io_signals();
-    qemu_thread_self(&io_thread);
-
-    return 0;
-}
-
-static void qemu_wait_io_event(CPUState *env)
-{
-    while (!tcg_has_work())
-        qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
-
-    qemu_mutex_unlock(&qemu_global_mutex);
-
-    /*
-     * Users of qemu_global_mutex can be starved, having no chance
-     * to acquire it since this path will get to it first.
-     * So use another lock to provide fairness.
-     */
-    qemu_mutex_lock(&qemu_fair_mutex);
-    qemu_mutex_unlock(&qemu_fair_mutex);
-
-    qemu_mutex_lock(&qemu_global_mutex);
-    if (env->stop) {
-        env->stop = 0;
-        env->stopped = 1;
-        qemu_cond_signal(&qemu_pause_cond);
-    }
-}
-
-static int qemu_cpu_exec(CPUState *env);
-
-static void *kvm_cpu_thread_fn(void *arg)
-{
-    CPUState *env = arg;
-
-    block_io_signals();
-    qemu_thread_self(env->thread);
-
-    /* signal CPU creation */
-    qemu_mutex_lock(&qemu_global_mutex);
-    env->created = 1;
-    qemu_cond_signal(&qemu_cpu_cond);
-
-    /* and wait for machine initialization */
-    while (!qemu_system_ready)
-        qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
-
-    while (1) {
-        if (cpu_can_run(env))
-            qemu_cpu_exec(env);
-        qemu_wait_io_event(env);
-    }
-
-    return NULL;
-}
-
-static void tcg_cpu_exec(void);
-
-static void *tcg_cpu_thread_fn(void *arg)
-{
-    CPUState *env = arg;
-
-    block_io_signals();
-    qemu_thread_self(env->thread);
-
-    /* signal CPU creation */
-    qemu_mutex_lock(&qemu_global_mutex);
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        env->created = 1;
-    qemu_cond_signal(&qemu_cpu_cond);
-
-    /* and wait for machine initialization */
-    while (!qemu_system_ready)
-        qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
-
-    while (1) {
-        tcg_cpu_exec();
-        qemu_wait_io_event(cur_cpu);
-    }
-
-    return NULL;
-}
-
-void qemu_cpu_kick(void *_env)
-{
-    CPUState *env = _env;
-    qemu_cond_broadcast(env->halt_cond);
-    if (kvm_enabled())
-        qemu_thread_signal(env->thread, SIGUSR1);
-}
-
-int qemu_cpu_self(void *env)
-{
-    return (cpu_single_env != NULL);
-}
-
-static void cpu_signal(int sig)
-{
-    if (cpu_single_env)
-        cpu_exit(cpu_single_env);
-}
-
-static void block_io_signals(void)
-{
-    sigset_t set;
-    struct sigaction sigact;
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR2);
-    sigaddset(&set, SIGIO);
-    sigaddset(&set, SIGALRM);
-    pthread_sigmask(SIG_BLOCK, &set, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR1);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = cpu_signal;
-    sigaction(SIGUSR1, &sigact, NULL);
-}
-
-static void unblock_io_signals(void)
-{
-    sigset_t set;
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR2);
-    sigaddset(&set, SIGIO);
-    sigaddset(&set, SIGALRM);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR1);
-    pthread_sigmask(SIG_BLOCK, &set, NULL);
-}
-
-static void qemu_signal_lock(unsigned int msecs)
-{
-    qemu_mutex_lock(&qemu_fair_mutex);
-
-    while (qemu_mutex_trylock(&qemu_global_mutex)) {
-        qemu_thread_signal(tcg_cpu_thread, SIGUSR1);
-        if (!qemu_mutex_timedlock(&qemu_global_mutex, msecs))
-            break;
-    }
-    qemu_mutex_unlock(&qemu_fair_mutex);
-}
-
-void qemu_mutex_lock_iothread(void)
-{
-    if (kvm_enabled()) {
-        qemu_mutex_lock(&qemu_fair_mutex);
-        qemu_mutex_lock(&qemu_global_mutex);
-        qemu_mutex_unlock(&qemu_fair_mutex);
-    } else
-        qemu_signal_lock(100);
-}
-
-void qemu_mutex_unlock_iothread(void)
-{
-    qemu_mutex_unlock(&qemu_global_mutex);
-}
-
-static int all_vcpus_paused(void)
-{
-    CPUState *penv = first_cpu;
-
-    while (penv) {
-        if (!penv->stopped)
-            return 0;
-        penv = (CPUState *)penv->next_cpu;
-    }
-
-    return 1;
-}
-
-static void pause_all_vcpus(void)
-{
-    CPUState *penv = first_cpu;
-
-    while (penv) {
-        penv->stop = 1;
-        qemu_thread_signal(penv->thread, SIGUSR1);
-        qemu_cpu_kick(penv);
-        penv = (CPUState *)penv->next_cpu;
-    }
-
-    while (!all_vcpus_paused()) {
-        qemu_cond_timedwait(&qemu_pause_cond, &qemu_global_mutex, 100);
-        penv = first_cpu;
-        while (penv) {
-            qemu_thread_signal(penv->thread, SIGUSR1);
-            penv = (CPUState *)penv->next_cpu;
-        }
-    }
-}
-
-static void resume_all_vcpus(void)
-{
-    CPUState *penv = first_cpu;
-
-    while (penv) {
-        penv->stop = 0;
-        penv->stopped = 0;
-        qemu_thread_signal(penv->thread, SIGUSR1);
-        qemu_cpu_kick(penv);
-        penv = (CPUState *)penv->next_cpu;
-    }
-}
-
-static void tcg_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-    /* share a single thread for all cpus with TCG */
-    if (!tcg_cpu_thread) {
-        env->thread = qemu_mallocz(sizeof(QemuThread));
-        env->halt_cond = qemu_mallocz(sizeof(QemuCond));
-        qemu_cond_init(env->halt_cond);
-        qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
-        while (env->created == 0)
-            qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
-        tcg_cpu_thread = env->thread;
-        tcg_halt_cond = env->halt_cond;
-    } else {
-        env->thread = tcg_cpu_thread;
-        env->halt_cond = tcg_halt_cond;
-    }
-}
-
-static void kvm_start_vcpu(CPUState *env)
-{
-#if 0
-    kvm_init_vcpu(env);
-    env->thread = qemu_mallocz(sizeof(QemuThread));
-    env->halt_cond = qemu_mallocz(sizeof(QemuCond));
-    qemu_cond_init(env->halt_cond);
-    qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
-    while (env->created == 0)
-        qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
-#endif
-}
-
-void qemu_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-
-    if (kvm_enabled())
-        kvm_start_vcpu(env);
-    else
-        tcg_init_vcpu(env);
-}
-
-void qemu_notify_event(void)
-{
-    qemu_event_increment();
-}
-
-void vm_stop(int reason)
-{
-    QemuThread me;
-    qemu_thread_self(&me);
-
-    if (!qemu_thread_equal(&me, &io_thread)) {
-        qemu_system_vmstop_request(reason);
-        /*
-         * FIXME: should not return to device code in case
-         * vm_stop() has been requested.
-         */
-        if (cpu_single_env) {
-            cpu_exit(cpu_single_env);
-            cpu_single_env->stop = 1;
-        }
-        return;
-    }
-    do_vm_stop(reason);
-}
-
-#endif
-
-
-#ifdef _WIN32
-static void host_main_loop_wait(int *timeout)
-{
-    int ret, ret2, i;
-    PollingEntry *pe;
-
-
-    /* XXX: need to suppress polling by better using win32 events */
-    ret = 0;
-    for(pe = first_polling_entry; pe != NULL; pe = pe->next) {
-        ret |= pe->func(pe->opaque);
-    }
-    if (ret == 0) {
-        int err;
-        WaitObjects *w = &wait_objects;
-
-        ret = WaitForMultipleObjects(w->num, w->events, FALSE, *timeout);
-        if (WAIT_OBJECT_0 + 0 <= ret && ret <= WAIT_OBJECT_0 + w->num - 1) {
-            if (w->func[ret - WAIT_OBJECT_0])
-                w->func[ret - WAIT_OBJECT_0](w->opaque[ret - WAIT_OBJECT_0]);
-
-            /* Check for additional signaled events */
-            for(i = (ret - WAIT_OBJECT_0 + 1); i < w->num; i++) {
-
-                /* Check if event is signaled */
-                ret2 = WaitForSingleObject(w->events[i], 0);
-                if(ret2 == WAIT_OBJECT_0) {
-                    if (w->func[i])
-                        w->func[i](w->opaque[i]);
-                } else if (ret2 == WAIT_TIMEOUT) {
-                } else {
-                    err = GetLastError();
-                    fprintf(stderr, "WaitForSingleObject error %d %d\n", i, err);
-                }
-            }
-        } else if (ret == WAIT_TIMEOUT) {
-        } else {
-            err = GetLastError();
-            fprintf(stderr, "WaitForMultipleObjects error %d %d\n", ret, err);
-        }
-    }
-
-    *timeout = 0;
-}
-#else
-static void host_main_loop_wait(int *timeout)
-{
-}
-#endif
-
 void main_loop_wait(int timeout)
 {
-    IOHandlerRecord *ioh;
     fd_set rfds, wfds, xfds;
     int ret, nfds;
     struct timeval tv;
 
     qemu_bh_update_timeout(&timeout);
 
-    host_main_loop_wait(&timeout);
+    os_host_main_loop_wait(&timeout);
+
+
+    tv.tv_sec = timeout / 1000;
+    tv.tv_usec = (timeout % 1000) * 1000;
 
     /* poll any events */
+
     /* XXX: separate device handlers from system ones */
     nfds = -1;
     FD_ZERO(&rfds);
     FD_ZERO(&wfds);
     FD_ZERO(&xfds);
-    for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
-        if (ioh->deleted)
-            continue;
-        if (ioh->fd_read &&
-            (!ioh->fd_read_poll ||
-             ioh->fd_read_poll(ioh->opaque) != 0)) {
-            FD_SET(ioh->fd, &rfds);
-            if (ioh->fd > nfds)
-                nfds = ioh->fd;
-        }
-        if (ioh->fd_write) {
-            FD_SET(ioh->fd, &wfds);
-            if (ioh->fd > nfds)
-                nfds = ioh->fd;
-        }
-    }
-
-    tv.tv_sec = timeout / 1000;
-    tv.tv_usec = (timeout % 1000) * 1000;
-
-#if defined(CONFIG_SLIRP)
+    qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds);
     if (slirp_is_inited()) {
         slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
     }
-#endif
+
     qemu_mutex_unlock_iothread();
     ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
     qemu_mutex_lock_iothread();
-    if (ret > 0) {
-        IOHandlerRecord **pioh;
-
-        for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
-            if (!ioh->deleted && ioh->fd_read && FD_ISSET(ioh->fd, &rfds)) {
-                ioh->fd_read(ioh->opaque);
-            }
-            if (!ioh->deleted && ioh->fd_write && FD_ISSET(ioh->fd, &wfds)) {
-                ioh->fd_write(ioh->opaque);
-            }
-        }
-
-        /* remove deleted IO handlers */
-        pioh = &first_io_handler;
-        while (*pioh) {
-            ioh = *pioh;
-            if (ioh->deleted) {
-                *pioh = ioh->next;
-                qemu_free(ioh);
-            } else
-                pioh = &ioh->next;
-        }
-    }
-#if defined(CONFIG_SLIRP)
+    qemu_iohandler_poll(&rfds, &wfds, &xfds, ret);
     if (slirp_is_inited()) {
         if (ret < 0) {
             FD_ZERO(&rfds);
@@ -3033,7 +1955,6 @@
         }
         slirp_select_poll(&rfds, &wfds, &xfds);
     }
-#endif
     charpipe_poll();
 
     qemu_run_all_timers();
@@ -3044,102 +1965,6 @@
 
 }
 
-static int qemu_cpu_exec(CPUState *env)
-{
-    int ret;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
-
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    if (use_icount) {
-        int64_t count;
-        int decr;
-        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
-        env->icount_decr.u16.low = 0;
-        env->icount_extra = 0;
-        count = qemu_next_deadline();
-        count = (count + (1 << icount_time_shift) - 1)
-                >> icount_time_shift;
-        qemu_icount += count;
-        decr = (count > 0xffff) ? 0xffff : count;
-        count -= decr;
-        env->icount_decr.u16.low = decr;
-        env->icount_extra = count;
-    }
-#ifdef CONFIG_TRACE
-    if (tbflush_requested) {
-        tbflush_requested = 0;
-        tb_flush(env);
-        return EXCP_INTERRUPT;
-    }
-#endif
-
-
-    ret = cpu_exec(env);
-#ifdef CONFIG_PROFILER
-    qemu_time += profile_getclock() - ti;
-#endif
-    if (use_icount) {
-        /* Fold pending instructions back into the
-           instruction counter, and clear the interrupt flag.  */
-        qemu_icount -= (env->icount_decr.u16.low
-                        + env->icount_extra);
-        env->icount_decr.u32 = 0;
-        env->icount_extra = 0;
-    }
-    return ret;
-}
-
-static void tcg_cpu_exec(void)
-{
-    int ret = 0;
-
-    if (next_cpu == NULL)
-        next_cpu = first_cpu;
-    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
-        CPUState *env = cur_cpu = next_cpu;
-
-        if (!vm_running)
-            break;
-        if (qemu_timer_alarm_pending()) {
-            break;
-        }
-        if (cpu_can_run(env))
-            ret = qemu_cpu_exec(env);
-        if (ret == EXCP_DEBUG) {
-            gdb_set_stop_cpu(env);
-            debug_requested = 1;
-            break;
-        }
-    }
-}
-
-static int cpu_has_work(CPUState *env)
-{
-    if (env->stop)
-        return 1;
-    if (env->stopped)
-        return 0;
-    if (!env->halted)
-        return 1;
-    if (qemu_cpu_has_work(env))
-        return 1;
-    return 0;
-}
-
-int tcg_has_work(void)
-{
-    CPUState *env;
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        if (cpu_has_work(env))
-            return 1;
-    return 0;
-}
-
 static int vm_can_run(void)
 {
     if (powerdown_requested)
@@ -3231,7 +2056,7 @@
 #define DEF(option, opt_arg, opt_enum, opt_help)        \
            opt_help
 #define DEFHEADING(text) stringify(text) "\n"
-#include "qemu-options.h"
+#include "qemu-options.def"
 #undef DEF
 #undef DEFHEADING
 #undef GEN_DOCS
@@ -3260,7 +2085,7 @@
 #define DEF(option, opt_arg, opt_enum, opt_help)        \
     opt_enum,
 #define DEFHEADING(text)
-#include "qemu-options.h"
+#include "qemu-options.def"
 #undef DEF
 #undef DEFHEADING
 #undef GEN_DOCS
@@ -3277,159 +2102,13 @@
 #define DEF(option, opt_arg, opt_enum, opt_help)        \
     { option, opt_arg, opt_enum },
 #define DEFHEADING(text)
-#include "qemu-options.h"
+#include "qemu-options.def"
 #undef DEF
 #undef DEFHEADING
 #undef GEN_DOCS
     { NULL, 0, 0 },
 };
 
-#ifdef HAS_AUDIO
-struct soundhw soundhw[] = {
-#ifdef HAS_AUDIO_CHOICE
-#if defined(TARGET_I386) || defined(TARGET_MIPS)
-    {
-        "pcspk",
-        "PC speaker",
-        0,
-        1,
-        { .init_isa = pcspk_audio_init }
-    },
-#endif
-
-#ifdef CONFIG_SB16
-    {
-        "sb16",
-        "Creative Sound Blaster 16",
-        0,
-        1,
-        { .init_isa = SB16_init }
-    },
-#endif
-
-#ifdef CONFIG_CS4231A
-    {
-        "cs4231a",
-        "CS4231A",
-        0,
-        1,
-        { .init_isa = cs4231a_init }
-    },
-#endif
-
-#ifdef CONFIG_ADLIB
-    {
-        "adlib",
-#ifdef HAS_YMF262
-        "Yamaha YMF262 (OPL3)",
-#else
-        "Yamaha YM3812 (OPL2)",
-#endif
-        0,
-        1,
-        { .init_isa = Adlib_init }
-    },
-#endif
-
-#ifdef CONFIG_GUS
-    {
-        "gus",
-        "Gravis Ultrasound GF1",
-        0,
-        1,
-        { .init_isa = GUS_init }
-    },
-#endif
-
-#ifdef CONFIG_AC97
-    {
-        "ac97",
-        "Intel 82801AA AC97 Audio",
-        0,
-        0,
-        { .init_pci = ac97_init }
-    },
-#endif
-
-#ifdef CONFIG_ES1370
-    {
-        "es1370",
-        "ENSONIQ AudioPCI ES1370",
-        0,
-        0,
-        { .init_pci = es1370_init }
-    },
-#endif
-
-#endif /* HAS_AUDIO_CHOICE */
-
-    { NULL, NULL, 0, 0, { NULL } }
-};
-
-static void select_soundhw (const char *optarg)
-{
-    struct soundhw *c;
-
-    if (*optarg == '?') {
-    show_valid_cards:
-
-        printf ("Valid sound card names (comma separated):\n");
-        for (c = soundhw; c->name; ++c) {
-            printf ("%-11s %s\n", c->name, c->descr);
-        }
-        printf ("\n-soundhw all will enable all of the above\n");
-        if (*optarg != '?') {
-            PANIC("Unknown sound card name: %s", optarg);
-        } else {
-            QEMU_EXIT(0);
-        }
-    }
-    else {
-        size_t l;
-        const char *p;
-        char *e;
-        int bad_card = 0;
-
-        if (!strcmp (optarg, "all")) {
-            for (c = soundhw; c->name; ++c) {
-                c->enabled = 1;
-            }
-            return;
-        }
-
-        p = optarg;
-        while (*p) {
-            e = strchr (p, ',');
-            l = !e ? strlen (p) : (size_t) (e - p);
-
-            for (c = soundhw; c->name; ++c) {
-                if (!strncmp (c->name, p, l)) {
-                    c->enabled = 1;
-                    break;
-                }
-            }
-
-            if (!c->name) {
-#ifndef CONFIG_ANDROID
-                if (l > 80) {
-                    fprintf (stderr,
-                             "Unknown sound card name (too big to show)\n");
-                } else {
-                    fprintf (stderr, "Unknown sound card name `%.*s'\n",
-                             (int) l, p);
-                }
-#endif  // !CONFIG_ANDROID
-	            bad_card = 1;
-            }
-            p += l + (e != NULL);
-        }
-
-        if (bad_card)
-            goto show_valid_cards;
-    }
-}
-#endif
-
 static void select_vgahw (const char *p)
 {
     const char *opts;
@@ -3465,66 +2144,8 @@
     }
 }
 
-#ifdef _WIN32
-static BOOL WINAPI qemu_ctrl_handler(DWORD type)
-{
-    exit(STATUS_CONTROL_C_EXIT);
-    return TRUE;
-}
-#endif
-
-int qemu_uuid_parse(const char *str, uint8_t *uuid)
-{
-    int ret;
-
-    if(strlen(str) != 36)
-        return -1;
-
-    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
-            &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
-            &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], &uuid[15]);
-
-    if(ret != 16)
-        return -1;
-
-#ifdef TARGET_I386
-    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
-#endif
-
-    return 0;
-}
-
 #define MAX_NET_CLIENTS 32
 
-#ifndef _WIN32
-
-static void termsig_handler(int signal)
-{
-    qemu_system_shutdown_request();
-}
-
-static void sigchld_handler(int signal)
-{
-    waitpid(-1, NULL, WNOHANG);
-}
-
-static void sighandler_setup(void)
-{
-    struct sigaction act;
-
-    memset(&act, 0, sizeof(act));
-    act.sa_handler = termsig_handler;
-    sigaction(SIGINT,  &act, NULL);
-    sigaction(SIGHUP,  &act, NULL);
-    sigaction(SIGTERM, &act, NULL);
-
-    act.sa_handler = sigchld_handler;
-    act.sa_flags = SA_NOCLDSTOP;
-    sigaction(SIGCHLD, &act, NULL);
-}
-
-#endif
-
 #ifdef _WIN32
 /* Look for support files in the same directory as the executable.  */
 static char *find_datadir(const char *argv0)
@@ -3920,18 +2541,9 @@
     const char *cpu_model;
     const char *usb_devices[MAX_USB_CMDLINE];
     int usb_devices_index;
-#ifndef _WIN32
-    int fds[2];
-#endif
     int tb_size;
     const char *pid_file = NULL;
     const char *incoming = NULL;
-#ifndef _WIN32
-    int fd = 0;
-    struct passwd *pwd = NULL;
-    const char *chroot_dir = NULL;
-    const char *run_as = NULL;
-#endif
     CPUState *env;
     int show_vnc_port = 0;
     IniFile*  hw_ini = NULL;
@@ -3950,35 +2562,7 @@
     qemu_cache_utils_init(envp);
 
     QLIST_INIT (&vm_change_state_head);
-#ifndef _WIN32
-    {
-        struct sigaction act;
-        sigfillset(&act.sa_mask);
-        act.sa_flags = 0;
-        act.sa_handler = SIG_IGN;
-        sigaction(SIGPIPE, &act, NULL);
-    }
-#else
-    SetConsoleCtrlHandler(qemu_ctrl_handler, TRUE);
-    /* Note: cpu_interrupt() is currently not SMP safe, so we force
-       QEMU to run on a single CPU */
-    {
-        HANDLE h;
-        DWORD mask, smask;
-        int i;
-        h = GetCurrentProcess();
-        if (GetProcessAffinityMask(h, &mask, &smask)) {
-            for(i = 0; i < 32; i++) {
-                if (mask & (1 << i))
-                    break;
-            }
-            if (i != 32) {
-                mask = 1 << i;
-                SetProcessAffinityMask(h, mask);
-            }
-        }
-    }
-#endif
+    os_setup_early_signal_handling();
 
     module_call_init(MODULE_INIT_MACHINE);
     machine = find_default_machine();
@@ -4268,13 +2852,6 @@
             case QEMU_OPTION_bootp:
                 bootp_filename = optarg;
                 break;
-#if 0  /* ANDROID disabled */
-#ifndef _WIN32
-            case QEMU_OPTION_smb:
-		net_slirp_smb(optarg);
-                break;
-#endif
-#endif /* ANDROID */
             case QEMU_OPTION_redir:
                 net_slirp_redir(NULL, optarg, NULL);
                 break;
@@ -4363,9 +2940,6 @@
                 singlestep = 1;
                 break;
             case QEMU_OPTION_S:
-#if 0  /* ANDROID */
-                PANIC("Sorry, stopped launch is not supported in the Android emulator" );
-#endif
                 autostart = 0;
                 break;
 #ifndef _WIN32
@@ -4503,17 +3077,7 @@
                 break;
 #endif
             case QEMU_OPTION_smbios:
-                if(smbios_entry_add(optarg) < 0) {
-                    PANIC("Wrong smbios provided");
-                }
-                break;
-#endif
-#ifdef CONFIG_KQEMU
-            case QEMU_OPTION_no_kqemu:
-                kqemu_allowed = 0;
-                break;
-            case QEMU_OPTION_kernel_kqemu:
-                kqemu_allowed = 2;
+                do_smbios_option(optarg);
                 break;
 #endif
 #ifdef CONFIG_KVM
@@ -4570,11 +3134,6 @@
                     PANIC("Fail to parse UUID string. Wrong format.");
                 }
                 break;
-#ifndef _WIN32
-	    case QEMU_OPTION_daemonize:
-		daemonize = 1;
-		break;
-#endif
 	    case QEMU_OPTION_option_rom:
 		if (nb_option_roms >= MAX_OPTION_ROMS) {
 		    PANIC("Too many option ROMs");
@@ -4740,14 +3299,6 @@
             case QEMU_OPTION_incoming:
                 incoming = optarg;
                 break;
-#ifndef _WIN32
-            case QEMU_OPTION_chroot:
-                chroot_dir = optarg;
-                break;
-            case QEMU_OPTION_runas:
-                run_as = optarg;
-                break;
-#endif
 #ifdef CONFIG_XEN
             case QEMU_OPTION_xen_domid:
                 xen_domid = atoi(optarg);
@@ -4914,6 +3465,8 @@
             case QEMU_OPTION_snapshot_no_time_update:
                 android_snapshot_update_time = 0;
                 break;
+            default:
+                os_parse_cmd_args(popt->index, optarg);
             }
         }
     }
@@ -5275,6 +3828,11 @@
     serial_hds_add_at(1, "android-qemud");
     stralloc_add_str(kernel_params, " android.qemud=ttyS1");
 
+    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
+        os_pidfile_error();
+        exit(1);
+    }
+
 #if defined(CONFIG_KVM)
     if (kvm_allowed < 0) {
         kvm_allowed = kvm_check_allowed();
@@ -5304,68 +3862,6 @@
            monitor_device = "stdio";
     }
 
-#ifndef _WIN32
-    if (daemonize) {
-	pid_t pid;
-
-	if (pipe(fds) == -1) {
-	    PANIC("Unable to aquire pidfile");
-	}
-
-	pid = fork();
-	if (pid > 0) {
-	    uint8_t status;
-	    ssize_t len;
-
-	    close(fds[1]);
-
-	again:
-            len = read(fds[0], &status, 1);
-            if (len == -1 && (errno == EINTR))
-                goto again;
-
-            if (len != 1) {
-                PANIC("Error when aquiring pidfile");
-            }
-            else if (status == 1) {
-                PANIC("Could not acquire pidfile");
-            } else {
-                QEMU_EXIT(0);
-            }
-	} else if (pid < 0) {
-        PANIC("Unable to daemonize");
-	}
-
-	setsid();
-
-	pid = fork();
-	if (pid > 0) {
-	    QEMU_EXIT(0);
-	} else if (pid < 0) {
-         PANIC("Could not acquire pid file");
-	}
-
-	umask(027);
-
-        signal(SIGTSTP, SIG_IGN);
-        signal(SIGTTOU, SIG_IGN);
-        signal(SIGTTIN, SIG_IGN);
-    }
-
-    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
-        if (daemonize) {
-            uint8_t status = 1;
-            int ret;
-            do {
-                ret = write(fds[1], &status, 1);
-            } while (ret < 0 && errno == EINTR);
-            PANIC("Could not acquire pid file");
-        } else {
-            PANIC("Could not acquire pid file");
-        }
-    }
-#endif
-
 #ifdef CONFIG_KQEMU
     if (smp_cpus > 1)
         kqemu_allowed = 0;
@@ -5396,7 +3892,7 @@
     if (!boot_devices[0]) {
         boot_devices = "cad";
     }
-    setvbuf(stdout, NULL, _IOLBF, 0);
+    os_set_line_buffering();
 
     if (init_timer_alarm() < 0) {
         PANIC("could not initialize alarm timer");
@@ -5511,10 +4007,8 @@
     //register_savevm("timer", 0, 2, timer_save, timer_load, &timers_state);
     register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
 
-#ifndef _WIN32
     /* must be after terminal init, SDL library changes signal handlers */
-    sighandler_setup();
-#endif
+    os_setup_signal_handling();
 
     /* Maintain compatibility with multiple stdio monitors */
     if (!strcmp(monitor_device,"stdio")) {
@@ -5626,13 +4120,6 @@
 #ifdef CONFIG_TRACE
     if (trace_filename) {
         trace_init(trace_filename);
-#if 0
-        // We don't need the dcache code until we can get load and store tracing
-        // working again.
-        dcache_init(dcache_size, dcache_ways, dcache_line_size,
-                    dcache_replace_policy, dcache_load_miss_penalty,
-                    dcache_store_miss_penalty);
-#endif
         fprintf(stderr, "-- When done tracing, exit the emulator. --\n");
     }
 #endif
@@ -5791,15 +4278,15 @@
     dcl = ds->listeners;
     while (dcl != NULL) {
         if (dcl->dpy_refresh != NULL) {
-            ds->gui_timer = qemu_new_timer(rt_clock, gui_update, ds);
-            qemu_mod_timer(ds->gui_timer, qemu_get_clock(rt_clock));
+            ds->gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds);
+            qemu_mod_timer(ds->gui_timer, qemu_get_clock_ms(rt_clock));
         }
         dcl = dcl->next;
     }
 
     if (display_type == DT_NOGRAPHIC || display_type == DT_VNC) {
-        nographic_timer = qemu_new_timer(rt_clock, nographic_update, NULL);
-        qemu_mod_timer(nographic_timer, qemu_get_clock(rt_clock));
+        nographic_timer = qemu_new_timer_ms(rt_clock, nographic_update, NULL);
+        qemu_mod_timer(nographic_timer, qemu_get_clock_ms(rt_clock));
     }
 
     text_consoles_set_display(ds);
@@ -5857,66 +4344,7 @@
     if (autostart)
         vm_start();
 
-#ifndef _WIN32
-    if (daemonize) {
-	uint8_t status = 0;
-	ssize_t len;
-
-    again1:
-	len = write(fds[1], &status, 1);
-	if (len == -1 && (errno == EINTR))
-	    goto again1;
-
-	if (len != 1) {
-	    PANIC("Unable to daemonize");
-	}
-
-        if (chdir("/")) {
-            perror("not able to chdir to /");
-            PANIC("not able to chdir to /");
-        }
-	TFR(fd = open("/dev/null", O_RDWR));
-	if (fd == -1)
-	    PANIC("open(\"/dev/null\") failed: %s", errno_str);
-    }
-
-    if (run_as) {
-        pwd = getpwnam(run_as);
-        if (!pwd) {
-            PANIC("User \"%s\" doesn't exist", run_as);
-        }
-    }
-
-    if (chroot_dir) {
-        if (chroot(chroot_dir) < 0) {
-            PANIC("chroot failed");
-        }
-        if (chdir("/")) {
-            perror("not able to chdir to /");
-            PANIC("not able to chdir to /");
-        }
-    }
-
-    if (run_as) {
-        if (setgid(pwd->pw_gid) < 0) {
-            PANIC("Failed to setgid(%d)", pwd->pw_gid);
-        }
-        if (setuid(pwd->pw_uid) < 0) {
-            PANIC("Failed to setuid(%d)", pwd->pw_uid);
-        }
-        if (setuid(0) != -1) {
-            PANIC("Dropping privileges failed");
-        }
-    }
-
-    if (daemonize) {
-        dup2(fd, 0);
-        dup2(fd, 1);
-        dup2(fd, 2);
-
-        close(fd);
-    }
-#endif
+    os_setup_post();
 
 #ifdef CONFIG_ANDROID
     // This will notify the UI that the core is successfuly initialized
diff --git a/vl.c b/vl.c
index 97c3aa3..3102679 100644
--- a/vl.c
+++ b/vl.c
@@ -34,7 +34,6 @@
 
 #ifndef _WIN32
 #include <libgen.h>
-#include <pwd.h>
 #include <sys/times.h>
 #include <sys/wait.h>
 #include <termios.h>
@@ -112,6 +111,9 @@
 #define memalign(align, size) malloc(size)
 #endif
 
+#include "cpus.h"
+#include "arch_init.h"
+
 #ifdef CONFIG_SDL
 #ifdef __APPLE__
 #include <SDL.h>
@@ -126,6 +128,7 @@
 #endif /* CONFIG_SDL */
 
 #ifdef CONFIG_COCOA
+int qemu_main(int argc, char **argv, char **envp);
 #undef main
 #define main qemu_main
 #endif /* CONFIG_COCOA */
@@ -135,7 +138,6 @@
 #include "hw/usb.h"
 #include "hw/pcmcia.h"
 #include "hw/pc.h"
-#include "hw/audiodev.h"
 #include "hw/isa.h"
 #include "hw/baum.h"
 #include "hw/bt.h"
@@ -192,10 +194,13 @@
 DriveInfo drives_table[MAX_DRIVES+1];
 int nb_drives;
 enum vga_retrace_method vga_retrace_method = VGA_RETRACE_DUMB;
-static DisplayState *display_state;
 DisplayType display_type = DT_DEFAULT;
 const char* keyboard_layout = NULL;
 ram_addr_t ram_size;
+const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 0; /* force preallocation of physical target memory */
+#endif
 int nb_nics;
 NICInfo nd_table[MAX_NICS];
 int vm_running;
@@ -207,15 +212,6 @@
 int vmsvga_enabled = 0;
 int xenfb_enabled = 0;
 QEMUClock *rtc_clock;
-#ifdef TARGET_SPARC
-int graphic_width = 1024;
-int graphic_height = 768;
-int graphic_depth = 8;
-#else
-int graphic_width = 800;
-int graphic_height = 600;
-int graphic_depth = 15;
-#endif
 static int full_screen = 0;
 #ifdef CONFIG_SDL
 static int no_frame = 0;
@@ -240,9 +236,6 @@
 int no_shutdown = 0;
 int cursor_hide = 1;
 int graphic_rotate = 0;
-#ifndef _WIN32
-int daemonize = 0;
-#endif
 WatchdogTimerModel *watchdog = NULL;
 int watchdog_action = WDT_RESET;
 const char *option_rom[MAX_OPTION_ROMS];
@@ -264,9 +257,6 @@
 uint64_t node_mem[MAX_NODES];
 uint64_t node_cpumask[MAX_NODES];
 
-static CPUState *cur_cpu;
-static CPUState *next_cpu;
-static int timer_alarm_pending = 1;
 static QEMUTimer *nographic_timer;
 
 uint8_t qemu_uuid[16];
@@ -354,28 +344,6 @@
 }
 
 
-/***********************************************************/
-void hw_error(const char *fmt, ...)
-{
-    va_list ap;
-    CPUState *env;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "qemu: hardware error: ");
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
-#ifdef TARGET_I386
-        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
-#else
-        cpu_dump_state(env, stderr, fprintf, 0);
-#endif
-    }
-    va_end(ap);
-    abort();
-}
-
 /***************/
 /* ballooning */
 
@@ -402,33 +370,6 @@
 }
 
 /***********************************************************/
-/* real time host monotonic timer */
-
-/* compute with 96 bit intermediate result: (a*b)/c */
-uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
-{
-    union {
-        uint64_t ll;
-        struct {
-#ifdef HOST_WORDS_BIGENDIAN
-            uint32_t high, low;
-#else
-            uint32_t low, high;
-#endif
-        } l;
-    } u, res;
-    uint64_t rl, rh;
-
-    u.ll = a;
-    rl = (uint64_t)u.l.low * (uint64_t)b;
-    rh = (uint64_t)u.l.high * (uint64_t)b;
-    rh += (rl >> 32);
-    res.l.high = rh / c;
-    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
-    return res.ll;
-}
-
-/***********************************************************/
 /* host time/date access */
 void qemu_get_timedate(struct tm *tm, int offset)
 {
@@ -1550,507 +1491,6 @@
 }
 
 /***********************************************************/
-/* register display */
-
-struct DisplayAllocator default_allocator = {
-    defaultallocator_create_displaysurface,
-    defaultallocator_resize_displaysurface,
-    defaultallocator_free_displaysurface
-};
-
-void register_displaystate(DisplayState *ds)
-{
-    DisplayState **s;
-    s = &display_state;
-    while (*s != NULL)
-        s = &(*s)->next;
-    ds->next = NULL;
-    *s = ds;
-}
-
-DisplayState *get_displaystate(void)
-{
-    return display_state;
-}
-
-DisplayAllocator *register_displayallocator(DisplayState *ds, DisplayAllocator *da)
-{
-    if(ds->allocator ==  &default_allocator) ds->allocator = da;
-    return ds->allocator;
-}
-
-/* dumb display */
-
-static void dumb_display_init(void)
-{
-    DisplayState *ds = qemu_mallocz(sizeof(DisplayState));
-    ds->allocator = &default_allocator;
-    ds->surface = qemu_create_displaysurface(ds, 640, 480);
-    register_displaystate(ds);
-}
-
-/***********************************************************/
-/* I/O handling */
-
-typedef struct IOHandlerRecord {
-    int fd;
-    IOCanReadHandler *fd_read_poll;
-    IOHandler *fd_read;
-    IOHandler *fd_write;
-    int deleted;
-    void *opaque;
-    /* temporary data */
-    struct pollfd *ufd;
-    QLIST_ENTRY(IOHandlerRecord) next;
-} IOHandlerRecord;
-
-static QLIST_HEAD(, IOHandlerRecord) io_handlers =
-    QLIST_HEAD_INITIALIZER(io_handlers);
-
-
-/* XXX: fd_read_poll should be suppressed, but an API change is
-   necessary in the character devices to suppress fd_can_read(). */
-int qemu_set_fd_handler2(int fd,
-                         IOCanReadHandler *fd_read_poll,
-                         IOHandler *fd_read,
-                         IOHandler *fd_write,
-                         void *opaque)
-{
-    IOHandlerRecord *ioh;
-
-    if (!fd_read && !fd_write) {
-        QLIST_FOREACH(ioh, &io_handlers, next) {
-            if (ioh->fd == fd) {
-                ioh->deleted = 1;
-                break;
-            }
-        }
-    } else {
-        QLIST_FOREACH(ioh, &io_handlers, next) {
-            if (ioh->fd == fd)
-                goto found;
-        }
-        ioh = qemu_mallocz(sizeof(IOHandlerRecord));
-        QLIST_INSERT_HEAD(&io_handlers, ioh, next);
-    found:
-        ioh->fd = fd;
-        ioh->fd_read_poll = fd_read_poll;
-        ioh->fd_read = fd_read;
-        ioh->fd_write = fd_write;
-        ioh->opaque = opaque;
-        ioh->deleted = 0;
-    }
-    return 0;
-}
-
-int qemu_set_fd_handler(int fd,
-                        IOHandler *fd_read,
-                        IOHandler *fd_write,
-                        void *opaque)
-{
-    return qemu_set_fd_handler2(fd, NULL, fd_read, fd_write, opaque);
-}
-
-#ifdef _WIN32
-/***********************************************************/
-/* Polling handling */
-
-typedef struct PollingEntry {
-    PollingFunc *func;
-    void *opaque;
-    struct PollingEntry *next;
-} PollingEntry;
-
-static PollingEntry *first_polling_entry;
-
-int qemu_add_polling_cb(PollingFunc *func, void *opaque)
-{
-    PollingEntry **ppe, *pe;
-    pe = qemu_mallocz(sizeof(PollingEntry));
-    pe->func = func;
-    pe->opaque = opaque;
-    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
-    *ppe = pe;
-    return 0;
-}
-
-void qemu_del_polling_cb(PollingFunc *func, void *opaque)
-{
-    PollingEntry **ppe, *pe;
-    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
-        pe = *ppe;
-        if (pe->func == func && pe->opaque == opaque) {
-            *ppe = pe->next;
-            qemu_free(pe);
-            break;
-        }
-    }
-}
-
-/***********************************************************/
-/* Wait objects support */
-typedef struct WaitObjects {
-    int num;
-    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
-    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
-    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
-} WaitObjects;
-
-static WaitObjects wait_objects = {0};
-
-int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
-{
-    WaitObjects *w = &wait_objects;
-
-    if (w->num >= MAXIMUM_WAIT_OBJECTS)
-        return -1;
-    w->events[w->num] = handle;
-    w->func[w->num] = func;
-    w->opaque[w->num] = opaque;
-    w->num++;
-    return 0;
-}
-
-void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
-{
-    int i, found;
-    WaitObjects *w = &wait_objects;
-
-    found = 0;
-    for (i = 0; i < w->num; i++) {
-        if (w->events[i] == handle)
-            found = 1;
-        if (found) {
-            w->events[i] = w->events[i + 1];
-            w->func[i] = w->func[i + 1];
-            w->opaque[i] = w->opaque[i + 1];
-        }
-    }
-    if (found)
-        w->num--;
-}
-#endif
-
-/***********************************************************/
-/* ram save/restore */
-
-static int ram_get_page(QEMUFile *f, uint8_t *buf, int len)
-{
-    int v;
-
-    v = qemu_get_byte(f);
-    switch(v) {
-    case 0:
-        if (qemu_get_buffer(f, buf, len) != len)
-            return -EIO;
-        break;
-    case 1:
-        v = qemu_get_byte(f);
-        memset(buf, v, len);
-        break;
-    default:
-        return -EINVAL;
-    }
-
-    if (qemu_file_has_error(f))
-        return -EIO;
-
-    return 0;
-}
-
-static int ram_load_v1(QEMUFile *f, void *opaque)
-{
-    int ret;
-    ram_addr_t i;
-
-    if (qemu_get_be32(f) != last_ram_offset)
-        return -EINVAL;
-    for(i = 0; i < last_ram_offset; i+= TARGET_PAGE_SIZE) {
-        ret = ram_get_page(f, qemu_get_ram_ptr(i), TARGET_PAGE_SIZE);
-        if (ret)
-            return ret;
-    }
-    return 0;
-}
-
-#define BDRV_HASH_BLOCK_SIZE 1024
-#define IOBUF_SIZE 4096
-#define RAM_CBLOCK_MAGIC 0xfabe
-
-typedef struct RamDecompressState {
-    z_stream zstream;
-    QEMUFile *f;
-    uint8_t buf[IOBUF_SIZE];
-} RamDecompressState;
-
-static int ram_decompress_open(RamDecompressState *s, QEMUFile *f)
-{
-    int ret;
-    memset(s, 0, sizeof(*s));
-    s->f = f;
-    ret = inflateInit(&s->zstream);
-    if (ret != Z_OK)
-        return -1;
-    return 0;
-}
-
-static int ram_decompress_buf(RamDecompressState *s, uint8_t *buf, int len)
-{
-    int ret, clen;
-
-    s->zstream.avail_out = len;
-    s->zstream.next_out = buf;
-    while (s->zstream.avail_out > 0) {
-        if (s->zstream.avail_in == 0) {
-            if (qemu_get_be16(s->f) != RAM_CBLOCK_MAGIC)
-                return -1;
-            clen = qemu_get_be16(s->f);
-            if (clen > IOBUF_SIZE)
-                return -1;
-            qemu_get_buffer(s->f, s->buf, clen);
-            s->zstream.avail_in = clen;
-            s->zstream.next_in = s->buf;
-        }
-        ret = inflate(&s->zstream, Z_PARTIAL_FLUSH);
-        if (ret != Z_OK && ret != Z_STREAM_END) {
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static void ram_decompress_close(RamDecompressState *s)
-{
-    inflateEnd(&s->zstream);
-}
-
-#define RAM_SAVE_FLAG_FULL	0x01
-#define RAM_SAVE_FLAG_COMPRESS	0x02
-#define RAM_SAVE_FLAG_MEM_SIZE	0x04
-#define RAM_SAVE_FLAG_PAGE	0x08
-#define RAM_SAVE_FLAG_EOS	0x10
-
-static int is_dup_page(uint8_t *page, uint8_t ch)
-{
-    uint32_t val = ch << 24 | ch << 16 | ch << 8 | ch;
-    uint32_t *array = (uint32_t *)page;
-    int i;
-
-    for (i = 0; i < (TARGET_PAGE_SIZE / 4); i++) {
-        if (array[i] != val)
-            return 0;
-    }
-
-    return 1;
-}
-
-static int ram_save_block(QEMUFile *f)
-{
-    static ram_addr_t current_addr = 0;
-    ram_addr_t saved_addr = current_addr;
-    ram_addr_t addr = 0;
-    int found = 0;
-
-    while (addr < last_ram_offset) {
-        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
-            uint8_t *p;
-
-            cpu_physical_memory_reset_dirty(current_addr,
-                                            current_addr + TARGET_PAGE_SIZE,
-                                            MIGRATION_DIRTY_FLAG);
-
-            p = qemu_get_ram_ptr(current_addr);
-
-            if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-            } else {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-            }
-
-            found = 1;
-            break;
-        }
-        addr += TARGET_PAGE_SIZE;
-        current_addr = (saved_addr + addr) % last_ram_offset;
-    }
-
-    return found;
-}
-
-static uint64_t bytes_transferred;
-
-static ram_addr_t ram_save_remaining(void)
-{
-    ram_addr_t addr;
-    ram_addr_t count = 0;
-
-    for (addr = 0; addr < last_ram_offset; addr += TARGET_PAGE_SIZE) {
-        if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
-            count++;
-    }
-
-    return count;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
-    return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
-    return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
-    return last_ram_offset;
-}
-
-static int ram_save_live(QEMUFile *f, int stage, void *opaque)
-{
-    ram_addr_t addr;
-    uint64_t bytes_transferred_last;
-    double bwidth = 0;
-    uint64_t expected_time = 0;
-
-    if (cpu_physical_sync_dirty_bitmap(0, TARGET_PHYS_ADDR_MAX) != 0) {
-        qemu_file_set_error(f);
-        return 0;
-    }
-
-    if (stage == 1) {
-        bytes_transferred = 0;
-
-        /* Make sure all dirty bits are set */
-        for (addr = 0; addr < last_ram_offset; addr += TARGET_PAGE_SIZE) {
-            if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
-                cpu_physical_memory_set_dirty(addr);
-        }
-
-        /* Enable dirty memory tracking */
-        cpu_physical_memory_set_dirty_tracking(1);
-
-        qemu_put_be64(f, last_ram_offset | RAM_SAVE_FLAG_MEM_SIZE);
-    }
-
-    bytes_transferred_last = bytes_transferred;
-    bwidth = qemu_get_clock_ns(rt_clock);
-
-    while (!qemu_file_rate_limit(f)) {
-        int ret;
-
-        ret = ram_save_block(f);
-        bytes_transferred += ret * TARGET_PAGE_SIZE;
-        if (ret == 0) /* no more blocks */
-            break;
-    }
-
-    bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
-    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
-
-    /* if we haven't transferred anything this round, force expected_time to a
-     * a very high value, but without crashing */
-    if (bwidth == 0)
-        bwidth = 0.000001;
-
-    /* try transferring iterative blocks of memory */
-    if (stage == 3) {
-        /* flush all remaining blocks regardless of rate limiting */
-        while (ram_save_block(f) != 0) {
-            bytes_transferred += TARGET_PAGE_SIZE;
-        }
-        cpu_physical_memory_set_dirty_tracking(0);
-    }
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-
-    return (stage == 2) && (expected_time <= migrate_max_downtime());
-}
-
-static int ram_load_dead(QEMUFile *f, void *opaque)
-{
-    RamDecompressState s1, *s = &s1;
-    uint8_t buf[10];
-    ram_addr_t i;
-
-    if (ram_decompress_open(s, f) < 0)
-        return -EINVAL;
-    for(i = 0; i < last_ram_offset; i+= BDRV_HASH_BLOCK_SIZE) {
-        if (ram_decompress_buf(s, buf, 1) < 0) {
-            fprintf(stderr, "Error while reading ram block header\n");
-            goto error;
-        }
-        if (buf[0] == 0) {
-            if (ram_decompress_buf(s, qemu_get_ram_ptr(i),
-                                   BDRV_HASH_BLOCK_SIZE) < 0) {
-                fprintf(stderr, "Error while reading ram block address=0x%08" PRIx64, (uint64_t)i);
-                goto error;
-            }
-        } else {
-        error:
-            printf("Error block header\n");
-            return -EINVAL;
-        }
-    }
-    ram_decompress_close(s);
-
-    return 0;
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
-    ram_addr_t addr;
-    int flags;
-
-    if (version_id == 1)
-        return ram_load_v1(f, opaque);
-
-    if (version_id == 2) {
-        if (qemu_get_be32(f) != last_ram_offset)
-            return -EINVAL;
-        return ram_load_dead(f, opaque);
-    }
-
-    if (version_id != 3)
-        return -EINVAL;
-
-    do {
-        addr = qemu_get_be64(f);
-
-        flags = addr & ~TARGET_PAGE_MASK;
-        addr &= TARGET_PAGE_MASK;
-
-        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
-            if (addr != last_ram_offset)
-                return -EINVAL;
-        }
-
-        if (flags & RAM_SAVE_FLAG_FULL) {
-            if (ram_load_dead(f, opaque) < 0)
-                return -EINVAL;
-        }
-
-        if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            uint8_t ch = qemu_get_byte(f);
-            memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE);
-        } else if (flags & RAM_SAVE_FLAG_PAGE)
-            qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE);
-    } while (!(flags & RAM_SAVE_FLAG_EOS));
-
-    return 0;
-}
-
-void qemu_service_io(void)
-{
-    qemu_notify_event();
-}
-
-/***********************************************************/
 /* machine registration */
 
 static QEMUMachine *first_machine = NULL;
@@ -2107,14 +1547,14 @@
             interval = dcl->gui_timer_interval;
         dcl = dcl->next;
     }
-    qemu_mod_timer(ds->gui_timer, interval + qemu_get_clock(rt_clock));
+    qemu_mod_timer(ds->gui_timer, interval + qemu_get_clock_ms(rt_clock));
 }
 
 static void nographic_update(void *opaque)
 {
     uint64_t interval = GUI_REFRESH_INTERVAL;
 
-    qemu_mod_timer(nographic_timer, interval + qemu_get_clock(rt_clock));
+    qemu_mod_timer(nographic_timer, interval + qemu_get_clock_ms(rt_clock));
 }
 
 struct vm_change_state_entry {
@@ -2144,7 +1584,7 @@
     qemu_free (e);
 }
 
-static void vm_state_notify(int running, int reason)
+void vm_state_notify(int running, int reason)
 {
     VMChangeStateEntry *e;
 
@@ -2153,9 +1593,6 @@
     }
 }
 
-static void resume_all_vcpus(void);
-static void pause_all_vcpus(void);
-
 void vm_start(void)
 {
     if (!vm_running) {
@@ -2178,9 +1615,10 @@
 static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
     QTAILQ_HEAD_INITIALIZER(reset_handlers);
 static int reset_requested;
-static int shutdown_requested;
+static int shutdown_requested, shutdown_signal = -1;
+static pid_t shutdown_pid;
 static int powerdown_requested;
-static int debug_requested;
+int debug_requested;
 static int vmstop_requested;
 
 int qemu_shutdown_requested(void)
@@ -2218,38 +1656,6 @@
     return r;
 }
 
-void qemu_register_reset(QEMUResetHandler *func, void *opaque)
-{
-    QEMUResetEntry *re = qemu_mallocz(sizeof(QEMUResetEntry));
-
-    re->func = func;
-    re->opaque = opaque;
-    QTAILQ_INSERT_TAIL(&reset_handlers, re, entry);
-}
-
-void qemu_unregister_reset(QEMUResetHandler *func, void *opaque)
-{
-    QEMUResetEntry *re;
-
-    QTAILQ_FOREACH(re, &reset_handlers, entry) {
-        if (re->func == func && re->opaque == opaque) {
-            QTAILQ_REMOVE(&reset_handlers, re, entry);
-            qemu_free(re);
-            return;
-        }
-    }
-}
-
-static void do_vm_stop(int reason)
-{
-    if (vm_running) {
-        cpu_disable_ticks();
-        vm_running = 0;
-        pause_all_vcpus();
-        vm_state_notify(0, reason);
-    }
-}
-
 void qemu_register_reset(QEMUResetHandler *func, int order, void *opaque)
 {
     QEMUResetEntry **pre, *re;
@@ -2286,6 +1692,13 @@
     qemu_notify_event();
 }
 
+void qemu_system_killed(int signal, pid_t pid)
+{
+    shutdown_signal = signal;
+    shutdown_pid = pid;
+    qemu_system_shutdown_request();
+}
+
 void qemu_system_shutdown_request(void)
 {
     shutdown_requested = 1;
@@ -2306,572 +1719,36 @@
 }
 #endif
 
-#ifndef _WIN32
-static int io_thread_fd = -1;
-
-static void qemu_event_increment(void)
-{
-    static const char byte = 0;
-
-    if (io_thread_fd == -1)
-        return;
-
-    write(io_thread_fd, &byte, sizeof(byte));
-}
-
-static void qemu_event_read(void *opaque)
-{
-    int fd = (unsigned long)opaque;
-    ssize_t len;
-
-    /* Drain the notify pipe */
-    do {
-        char buffer[512];
-        len = read(fd, buffer, sizeof(buffer));
-    } while ((len == -1 && errno == EINTR) || len > 0);
-}
-
-static int qemu_event_init(void)
-{
-    int err;
-    int fds[2];
-
-    err = pipe(fds);
-    if (err == -1)
-        return -errno;
-
-    err = fcntl_setfl(fds[0], O_NONBLOCK);
-    if (err < 0)
-        goto fail;
-
-    err = fcntl_setfl(fds[1], O_NONBLOCK);
-    if (err < 0)
-        goto fail;
-
-    qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
-                         (void *)(unsigned long)fds[0]);
-
-    io_thread_fd = fds[1];
-    return 0;
-
-fail:
-    close(fds[0]);
-    close(fds[1]);
-    return err;
-}
-#else
-HANDLE qemu_event_handle;
-
-static void dummy_event_handler(void *opaque)
-{
-}
-
-static int qemu_event_init(void)
-{
-    qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
-    if (!qemu_event_handle) {
-        perror("Failed CreateEvent");
-        return -1;
-    }
-    qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
-    return 0;
-}
-
-static void qemu_event_increment(void)
-{
-    SetEvent(qemu_event_handle);
-}
-#endif
-
-static int cpu_can_run(CPUState *env)
-{
-    if (env->stop)
-        return 0;
-    if (env->stopped)
-        return 0;
-    if (!vm_running)
-        return 0;
-    return 1;
-}
-
-#ifndef CONFIG_IOTHREAD
-static int qemu_init_main_loop(void)
-{
-    return qemu_event_init();
-}
-
-void qemu_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-
-    if (kvm_enabled())
-        kvm_init_vcpu(env);
-    return;
-}
-
-int qemu_cpu_self(void *env)
-{
-    return 1;
-}
-
-static void resume_all_vcpus(void)
-{
-}
-
-static void pause_all_vcpus(void)
-{
-}
-
-void qemu_cpu_kick(void *env)
-{
-    return;
-}
-
-void qemu_notify_event(void)
-{
-    CPUState *env = cpu_single_env;
-
-    if (env) {
-        cpu_exit(env);
-     }
-}
-
-#define qemu_mutex_lock_iothread() do { } while (0)
-#define qemu_mutex_unlock_iothread() do { } while (0)
-
-void vm_stop(int reason)
-{
-    do_vm_stop(reason);
-}
-
-#else /* CONFIG_IOTHREAD */
-
-#include "qemu-thread.h"
-
-QemuMutex qemu_global_mutex;
-static QemuMutex qemu_fair_mutex;
-
-static QemuThread io_thread;
-
-static QemuThread *tcg_cpu_thread;
-static QemuCond *tcg_halt_cond;
-
-static int qemu_system_ready;
-/* cpu creation */
-static QemuCond qemu_cpu_cond;
-/* system init */
-static QemuCond qemu_system_cond;
-static QemuCond qemu_pause_cond;
-
-static void block_io_signals(void);
-static void unblock_io_signals(void);
-static int tcg_has_work(void);
-
-static int qemu_init_main_loop(void)
-{
-    int ret;
-
-    ret = qemu_event_init();
-    if (ret)
-        return ret;
-
-    qemu_cond_init(&qemu_pause_cond);
-    qemu_mutex_init(&qemu_fair_mutex);
-    qemu_mutex_init(&qemu_global_mutex);
-    qemu_mutex_lock(&qemu_global_mutex);
-
-    unblock_io_signals();
-    qemu_thread_self(&io_thread);
-
-    return 0;
-}
-
-static void qemu_wait_io_event(CPUState *env)
-{
-    while (!tcg_has_work())
-        qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
-
-    qemu_mutex_unlock(&qemu_global_mutex);
-
-    /*
-     * Users of qemu_global_mutex can be starved, having no chance
-     * to acquire it since this path will get to it first.
-     * So use another lock to provide fairness.
-     */
-    qemu_mutex_lock(&qemu_fair_mutex);
-    qemu_mutex_unlock(&qemu_fair_mutex);
-
-    qemu_mutex_lock(&qemu_global_mutex);
-    if (env->stop) {
-        env->stop = 0;
-        env->stopped = 1;
-        qemu_cond_signal(&qemu_pause_cond);
-    }
-}
-
-static int qemu_cpu_exec(CPUState *env);
-
-static void *kvm_cpu_thread_fn(void *arg)
-{
-    CPUState *env = arg;
-
-    block_io_signals();
-    qemu_thread_self(env->thread);
-
-    /* signal CPU creation */
-    qemu_mutex_lock(&qemu_global_mutex);
-    env->created = 1;
-    qemu_cond_signal(&qemu_cpu_cond);
-
-    /* and wait for machine initialization */
-    while (!qemu_system_ready)
-        qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
-
-    while (1) {
-        if (cpu_can_run(env))
-            qemu_cpu_exec(env);
-        qemu_wait_io_event(env);
-    }
-
-    return NULL;
-}
-
-static void tcg_cpu_exec(void);
-
-static void *tcg_cpu_thread_fn(void *arg)
-{
-    CPUState *env = arg;
-
-    block_io_signals();
-    qemu_thread_self(env->thread);
-
-    /* signal CPU creation */
-    qemu_mutex_lock(&qemu_global_mutex);
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        env->created = 1;
-    qemu_cond_signal(&qemu_cpu_cond);
-
-    /* and wait for machine initialization */
-    while (!qemu_system_ready)
-        qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
-
-    while (1) {
-        tcg_cpu_exec();
-        qemu_wait_io_event(cur_cpu);
-    }
-
-    return NULL;
-}
-
-void qemu_cpu_kick(void *_env)
-{
-    CPUState *env = _env;
-    qemu_cond_broadcast(env->halt_cond);
-    if (kvm_enabled())
-        qemu_thread_signal(env->thread, SIGUSR1);
-}
-
-int qemu_cpu_self(void *env)
-{
-    return (cpu_single_env != NULL);
-}
-
-static void cpu_signal(int sig)
-{
-    if (cpu_single_env)
-        cpu_exit(cpu_single_env);
-}
-
-static void block_io_signals(void)
-{
-    sigset_t set;
-    struct sigaction sigact;
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR2);
-    sigaddset(&set, SIGIO);
-    sigaddset(&set, SIGALRM);
-    pthread_sigmask(SIG_BLOCK, &set, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR1);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = cpu_signal;
-    sigaction(SIGUSR1, &sigact, NULL);
-}
-
-static void unblock_io_signals(void)
-{
-    sigset_t set;
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR2);
-    sigaddset(&set, SIGIO);
-    sigaddset(&set, SIGALRM);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR1);
-    pthread_sigmask(SIG_BLOCK, &set, NULL);
-}
-
-static void qemu_signal_lock(unsigned int msecs)
-{
-    qemu_mutex_lock(&qemu_fair_mutex);
-
-    while (qemu_mutex_trylock(&qemu_global_mutex)) {
-        qemu_thread_signal(tcg_cpu_thread, SIGUSR1);
-        if (!qemu_mutex_timedlock(&qemu_global_mutex, msecs))
-            break;
-    }
-    qemu_mutex_unlock(&qemu_fair_mutex);
-}
-
-void qemu_mutex_lock_iothread(void)
-{
-    if (kvm_enabled()) {
-        qemu_mutex_lock(&qemu_fair_mutex);
-        qemu_mutex_lock(&qemu_global_mutex);
-        qemu_mutex_unlock(&qemu_fair_mutex);
-    } else
-        qemu_signal_lock(100);
-}
-
-void qemu_mutex_unlock_iothread(void)
-{
-    qemu_mutex_unlock(&qemu_global_mutex);
-}
-
-static int all_vcpus_paused(void)
-{
-    CPUState *penv = first_cpu;
-
-    while (penv) {
-        if (!penv->stopped)
-            return 0;
-        penv = (CPUState *)penv->next_cpu;
-    }
-
-    return 1;
-}
-
-static void pause_all_vcpus(void)
-{
-    CPUState *penv = first_cpu;
-
-    while (penv) {
-        penv->stop = 1;
-        qemu_thread_signal(penv->thread, SIGUSR1);
-        qemu_cpu_kick(penv);
-        penv = (CPUState *)penv->next_cpu;
-    }
-
-    while (!all_vcpus_paused()) {
-        qemu_cond_timedwait(&qemu_pause_cond, &qemu_global_mutex, 100);
-        penv = first_cpu;
-        while (penv) {
-            qemu_thread_signal(penv->thread, SIGUSR1);
-            penv = (CPUState *)penv->next_cpu;
-        }
-    }
-}
-
-static void resume_all_vcpus(void)
-{
-    CPUState *penv = first_cpu;
-
-    while (penv) {
-        penv->stop = 0;
-        penv->stopped = 0;
-        qemu_thread_signal(penv->thread, SIGUSR1);
-        qemu_cpu_kick(penv);
-        penv = (CPUState *)penv->next_cpu;
-    }
-}
-
-static void tcg_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-    /* share a single thread for all cpus with TCG */
-    if (!tcg_cpu_thread) {
-        env->thread = qemu_mallocz(sizeof(QemuThread));
-        env->halt_cond = qemu_mallocz(sizeof(QemuCond));
-        qemu_cond_init(env->halt_cond);
-        qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
-        while (env->created == 0)
-            qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
-        tcg_cpu_thread = env->thread;
-        tcg_halt_cond = env->halt_cond;
-    } else {
-        env->thread = tcg_cpu_thread;
-        env->halt_cond = tcg_halt_cond;
-    }
-}
-
-static void kvm_start_vcpu(CPUState *env)
-{
-    kvm_init_vcpu(env);
-    env->thread = qemu_mallocz(sizeof(QemuThread));
-    env->halt_cond = qemu_mallocz(sizeof(QemuCond));
-    qemu_cond_init(env->halt_cond);
-    qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
-    while (env->created == 0)
-        qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
-}
-
-void qemu_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-
-    if (kvm_enabled())
-        kvm_start_vcpu(env);
-    else
-        tcg_init_vcpu(env);
-}
-
-void qemu_notify_event(void)
-{
-    qemu_event_increment();
-}
-
-void vm_stop(int reason)
-{
-    QemuThread me;
-    qemu_thread_self(&me);
-
-    if (!qemu_thread_equal(&me, &io_thread)) {
-        qemu_system_vmstop_request(reason);
-        /*
-         * FIXME: should not return to device code in case
-         * vm_stop() has been requested.
-         */
-        if (cpu_single_env) {
-            cpu_exit(cpu_single_env);
-            cpu_single_env->stop = 1;
-        }
-        return;
-    }
-    do_vm_stop(reason);
-}
-
-#endif
-
-
-#ifdef _WIN32
-static void host_main_loop_wait(int *timeout)
-{
-    int ret, ret2, i;
-    PollingEntry *pe;
-
-
-    /* XXX: need to suppress polling by better using win32 events */
-    ret = 0;
-    for(pe = first_polling_entry; pe != NULL; pe = pe->next) {
-        ret |= pe->func(pe->opaque);
-    }
-    if (ret == 0) {
-        int err;
-        WaitObjects *w = &wait_objects;
-
-        ret = WaitForMultipleObjects(w->num, w->events, FALSE, *timeout);
-        if (WAIT_OBJECT_0 + 0 <= ret && ret <= WAIT_OBJECT_0 + w->num - 1) {
-            if (w->func[ret - WAIT_OBJECT_0])
-                w->func[ret - WAIT_OBJECT_0](w->opaque[ret - WAIT_OBJECT_0]);
-
-            /* Check for additional signaled events */
-            for(i = (ret - WAIT_OBJECT_0 + 1); i < w->num; i++) {
-
-                /* Check if event is signaled */
-                ret2 = WaitForSingleObject(w->events[i], 0);
-                if(ret2 == WAIT_OBJECT_0) {
-                    if (w->func[i])
-                        w->func[i](w->opaque[i]);
-                } else if (ret2 == WAIT_TIMEOUT) {
-                } else {
-                    err = GetLastError();
-                    fprintf(stderr, "WaitForSingleObject error %d %d\n", i, err);
-                }
-            }
-        } else if (ret == WAIT_TIMEOUT) {
-        } else {
-            err = GetLastError();
-            fprintf(stderr, "WaitForMultipleObjects error %d %d\n", ret, err);
-        }
-    }
-
-    *timeout = 0;
-}
-#else
-static void host_main_loop_wait(int *timeout)
-{
-}
-#endif
-
 void main_loop_wait(int timeout)
 {
-    IOHandlerRecord *ioh;
     fd_set rfds, wfds, xfds;
     int ret, nfds;
     struct timeval tv;
 
     qemu_bh_update_timeout(&timeout);
 
-    host_main_loop_wait(&timeout);
+    os_host_main_loop_wait(&timeout);
+
+
+    tv.tv_sec = timeout / 1000;
+    tv.tv_usec = (timeout % 1000) * 1000;
 
     /* poll any events */
+
     /* XXX: separate device handlers from system ones */
     nfds = -1;
     FD_ZERO(&rfds);
     FD_ZERO(&wfds);
     FD_ZERO(&xfds);
-    QLIST_FOREACH(ioh, &io_handlers, next) {
-        if (ioh->deleted)
-            continue;
-        if (ioh->fd_read &&
-            (!ioh->fd_read_poll ||
-             ioh->fd_read_poll(ioh->opaque) != 0)) {
-            FD_SET(ioh->fd, &rfds);
-            if (ioh->fd > nfds)
-                nfds = ioh->fd;
-        }
-        if (ioh->fd_write) {
-            FD_SET(ioh->fd, &wfds);
-            if (ioh->fd > nfds)
-                nfds = ioh->fd;
-        }
-    }
-
-    tv.tv_sec = timeout / 1000;
-    tv.tv_usec = (timeout % 1000) * 1000;
-
-#if defined(CONFIG_SLIRP)
+    qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds);
     if (slirp_is_inited()) {
         slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
     }
-#endif
+
     qemu_mutex_unlock_iothread();
     ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
     qemu_mutex_lock_iothread();
-    if (ret > 0) {
-        IOHandlerRecord *pioh;
-
-        QLIST_FOREACH_SAFE(ioh, &io_handlers, next, pioh) {
-            if (ioh->deleted) {
-                QLIST_REMOVE(ioh, next);
-                qemu_free(ioh);
-                continue;
-            }
-            if (ioh->fd_read && FD_ISSET(ioh->fd, &rfds)) {
-                ioh->fd_read(ioh->opaque);
-            }
-            if (ioh->fd_write && FD_ISSET(ioh->fd, &wfds)) {
-                ioh->fd_write(ioh->opaque);
-            }
-        }
-
-#if defined(CONFIG_SLIRP)
+    qemu_iohandler_poll(&rfds, &wfds, &xfds, ret);
     if (slirp_is_inited()) {
         if (ret < 0) {
             FD_ZERO(&rfds);
@@ -2880,7 +1757,6 @@
         }
         slirp_select_poll(&rfds, &wfds, &xfds);
     }
-#endif
 
     qemu_run_all_timers();
 
@@ -2890,97 +1766,6 @@
 
 }
 
-static int qemu_cpu_exec(CPUState *env)
-{
-    int ret;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
-
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    if (use_icount) {
-        int64_t count;
-        int decr;
-        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
-        env->icount_decr.u16.low = 0;
-        env->icount_extra = 0;
-        count = qemu_next_deadline();
-        count = (count + (1 << icount_time_shift) - 1)
-                >> icount_time_shift;
-        qemu_icount += count;
-        decr = (count > 0xffff) ? 0xffff : count;
-        count -= decr;
-        env->icount_decr.u16.low = decr;
-        env->icount_extra = count;
-    }
-    ret = cpu_exec(env);
-#ifdef CONFIG_PROFILER
-    qemu_time += profile_getclock() - ti;
-#endif
-    if (use_icount) {
-        /* Fold pending instructions back into the
-           instruction counter, and clear the interrupt flag.  */
-        qemu_icount -= (env->icount_decr.u16.low
-                        + env->icount_extra);
-        env->icount_decr.u32 = 0;
-        env->icount_extra = 0;
-    }
-    return ret;
-}
-
-static void tcg_cpu_exec(void)
-{
-    int ret = 0;
-
-    if (next_cpu == NULL)
-        next_cpu = first_cpu;
-    for (; next_cpu != NULL; next_cpu = next_cpu->next_cpu) {
-        CPUState *env = cur_cpu = next_cpu;
-
-        if (timer_alarm_pending) {
-            timer_alarm_pending = 0;
-            break;
-        }
-        if (cpu_can_run(env))
-            ret = qemu_cpu_exec(env);
-        else if (env->stop)
-            break;
-
-        if (ret == EXCP_DEBUG) {
-            gdb_set_stop_cpu(env);
-            debug_requested = 1;
-            break;
-        }
-    }
-}
-
-#if 0
-static int cpu_has_work(CPUState *env)
-{
-    if (env->stop)
-        return 1;
-    if (env->stopped)
-        return 0;
-    if (!env->halted)
-        return 1;
-    if (qemu_cpu_has_work(env))
-        return 1;
-    return 0;
-}
-
-static int tcg_has_work(void)
-{
-    CPUState *env;
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        if (cpu_has_work(env))
-            return 1;
-    return 0;
-}
-#endif
-
 static int vm_can_run(void)
 {
     if (powerdown_requested)
@@ -3057,7 +1842,7 @@
 #define DEF(option, opt_arg, opt_enum, opt_help)        \
            opt_help
 #define DEFHEADING(text) stringify(text) "\n"
-#include "qemu-options.h"
+#include "qemu-options.def"
 #undef DEF
 #undef DEFHEADING
 #undef GEN_DOCS
@@ -3086,7 +1871,7 @@
 #define DEF(option, opt_arg, opt_enum, opt_help)        \
     opt_enum,
 #define DEFHEADING(text)
-#include "qemu-options.h"
+#include "qemu-options.def"
 #undef DEF
 #undef DEFHEADING
 #undef GEN_DOCS
@@ -3103,154 +1888,13 @@
 #define DEF(option, opt_arg, opt_enum, opt_help)        \
     { option, opt_arg, opt_enum },
 #define DEFHEADING(text)
-#include "qemu-options.h"
+#include "qemu-options.def"
 #undef DEF
 #undef DEFHEADING
 #undef GEN_DOCS
-    { NULL },
+    { NULL, 0, 0 },
 };
 
-#ifdef HAS_AUDIO
-struct soundhw soundhw[] = {
-#ifdef HAS_AUDIO_CHOICE
-#if defined(TARGET_I386) || defined(TARGET_MIPS)
-    {
-        "pcspk",
-        "PC speaker",
-        0,
-        1,
-        { .init_isa = pcspk_audio_init }
-    },
-#endif
-
-#ifdef CONFIG_SB16
-    {
-        "sb16",
-        "Creative Sound Blaster 16",
-        0,
-        1,
-        { .init_isa = SB16_init }
-    },
-#endif
-
-#ifdef CONFIG_CS4231A
-    {
-        "cs4231a",
-        "CS4231A",
-        0,
-        1,
-        { .init_isa = cs4231a_init }
-    },
-#endif
-
-#ifdef CONFIG_ADLIB
-    {
-        "adlib",
-#ifdef HAS_YMF262
-        "Yamaha YMF262 (OPL3)",
-#else
-        "Yamaha YM3812 (OPL2)",
-#endif
-        0,
-        1,
-        { .init_isa = Adlib_init }
-    },
-#endif
-
-#ifdef CONFIG_GUS
-    {
-        "gus",
-        "Gravis Ultrasound GF1",
-        0,
-        1,
-        { .init_isa = GUS_init }
-    },
-#endif
-
-#ifdef CONFIG_AC97
-    {
-        "ac97",
-        "Intel 82801AA AC97 Audio",
-        0,
-        0,
-        { .init_pci = ac97_init }
-    },
-#endif
-
-#ifdef CONFIG_ES1370
-    {
-        "es1370",
-        "ENSONIQ AudioPCI ES1370",
-        0,
-        0,
-        { .init_pci = es1370_init }
-    },
-#endif
-
-#endif /* HAS_AUDIO_CHOICE */
-
-    { NULL, NULL, 0, 0, { NULL } }
-};
-
-static void select_soundhw (const char *optarg)
-{
-    struct soundhw *c;
-
-    if (*optarg == '?') {
-    show_valid_cards:
-
-        printf ("Valid sound card names (comma separated):\n");
-        for (c = soundhw; c->name; ++c) {
-            printf ("%-11s %s\n", c->name, c->descr);
-        }
-        printf ("\n-soundhw all will enable all of the above\n");
-        exit (*optarg != '?');
-    }
-    else {
-        size_t l;
-        const char *p;
-        char *e;
-        int bad_card = 0;
-
-        if (!strcmp (optarg, "all")) {
-            for (c = soundhw; c->name; ++c) {
-                c->enabled = 1;
-            }
-            return;
-        }
-
-        p = optarg;
-        while (*p) {
-            e = strchr (p, ',');
-            l = !e ? strlen (p) : (size_t) (e - p);
-
-            for (c = soundhw; c->name; ++c) {
-                if (!strncmp (c->name, p, l)) {
-                    c->enabled = 1;
-                    break;
-                }
-            }
-
-            if (!c->name) {
-                if (l > 80) {
-                    fprintf (stderr,
-                             "Unknown sound card name (too big to show)\n");
-                }
-                else {
-                    fprintf (stderr, "Unknown sound card name `%.*s'\n",
-                             (int) l, p);
-                }
-                bad_card = 1;
-            }
-            p += l + (e != NULL);
-        }
-
-        if (bad_card)
-            goto show_valid_cards;
-    }
-}
-#endif
-
 static void select_vgahw (const char *p)
 {
     const char *opts;
@@ -3287,66 +1931,8 @@
     }
 }
 
-#ifdef _WIN32
-static BOOL WINAPI qemu_ctrl_handler(DWORD type)
-{
-    exit(STATUS_CONTROL_C_EXIT);
-    return TRUE;
-}
-#endif
-
-int qemu_uuid_parse(const char *str, uint8_t *uuid)
-{
-    int ret;
-
-    if(strlen(str) != 36)
-        return -1;
-
-    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
-            &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
-            &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14], &uuid[15]);
-
-    if(ret != 16)
-        return -1;
-
-#ifdef TARGET_I386
-    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
-#endif
-
-    return 0;
-}
-
 #define MAX_NET_CLIENTS 32
 
-#ifndef _WIN32
-
-static void termsig_handler(int signal)
-{
-    qemu_system_shutdown_request();
-}
-
-static void sigchld_handler(int signal)
-{
-    waitpid(-1, NULL, WNOHANG);
-}
-
-static void sighandler_setup(void)
-{
-    struct sigaction act;
-
-    memset(&act, 0, sizeof(act));
-    act.sa_handler = termsig_handler;
-    sigaction(SIGINT,  &act, NULL);
-    sigaction(SIGHUP,  &act, NULL);
-    sigaction(SIGTERM, &act, NULL);
-
-    act.sa_handler = sigchld_handler;
-    act.sa_flags = SA_NOCLDSTOP;
-    sigaction(SIGCHLD, &act, NULL);
-}
-
-#endif
-
 #ifdef _WIN32
 /* Look for support files in the same directory as the executable.  */
 static char *find_datadir(const char *argv0)
@@ -3504,18 +2090,9 @@
     const char *cpu_model;
     const char *usb_devices[MAX_USB_CMDLINE];
     int usb_devices_index;
-#ifndef _WIN32
-    int fds[2];
-#endif
     int tb_size;
     const char *pid_file = NULL;
     const char *incoming = NULL;
-#ifndef _WIN32
-    int fd = 0;
-    struct passwd *pwd = NULL;
-    const char *chroot_dir = NULL;
-    const char *run_as = NULL;
-#endif
     CPUState *env;
     int show_vnc_port = 0;
 
@@ -3524,35 +2101,7 @@
     qemu_cache_utils_init(envp);
 
     QLIST_INIT (&vm_change_state_head);
-#ifndef _WIN32
-    {
-        struct sigaction act;
-        sigfillset(&act.sa_mask);
-        act.sa_flags = 0;
-        act.sa_handler = SIG_IGN;
-        sigaction(SIGPIPE, &act, NULL);
-    }
-#else
-    SetConsoleCtrlHandler(qemu_ctrl_handler, TRUE);
-    /* Note: cpu_interrupt() is currently not SMP safe, so we force
-       QEMU to run on a single CPU */
-    {
-        HANDLE h;
-        DWORD mask, smask;
-        int i;
-        h = GetCurrentProcess();
-        if (GetProcessAffinityMask(h, &mask, &smask)) {
-            for(i = 0; i < 32; i++) {
-                if (mask & (1 << i))
-                    break;
-            }
-            if (i != 32) {
-                mask = 1 << i;
-                SetProcessAffinityMask(h, mask);
-            }
-        }
-    }
-#endif
+    os_setup_early_signal_handling();
 
     module_call_init(MODULE_INIT_MACHINE);
     machine = find_default_machine();
@@ -3825,11 +2374,6 @@
             case QEMU_OPTION_bootp:
                 bootp_filename = optarg;
                 break;
-#ifndef _WIN32
-            case QEMU_OPTION_smb:
-		net_slirp_smb(optarg);
-                break;
-#endif
             case QEMU_OPTION_redir:
                 net_slirp_redir(NULL, optarg, NULL);
                 break;
@@ -4122,11 +2666,6 @@
                     exit(1);
                 }
                 break;
-#ifndef _WIN32
-	    case QEMU_OPTION_daemonize:
-		daemonize = 1;
-		break;
-#endif
 	    case QEMU_OPTION_option_rom:
 		if (nb_option_roms >= MAX_OPTION_ROMS) {
 		    fprintf(stderr, "Too many option ROMs\n");
@@ -4164,7 +2703,7 @@
             case QEMU_OPTION_startdate:
                 {
                     struct tm tm;
-                    time_t rtc_start_date;
+                    time_t rtc_start_date = 0;
                     if (!strcmp(optarg, "now")) {
                         rtc_date_offset = -1;
                     } else {
@@ -4210,14 +2749,6 @@
             case QEMU_OPTION_incoming:
                 incoming = optarg;
                 break;
-#ifndef _WIN32
-            case QEMU_OPTION_chroot:
-                chroot_dir = optarg;
-                break;
-            case QEMU_OPTION_runas:
-                run_as = optarg;
-                break;
-#endif
 #ifdef CONFIG_XEN
             case QEMU_OPTION_xen_domid:
                 xen_domid = atoi(optarg);
@@ -4243,6 +2774,11 @@
         data_dir = CONFIG_QEMU_SHAREDIR;
     }
 
+    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
+        os_pidfile_error();
+        exit(1);
+    }
+
 #if defined(CONFIG_KVM) && defined(CONFIG_KQEMU)
     if (kvm_allowed && kqemu_allowed) {
         fprintf(stderr,
@@ -4268,60 +2804,6 @@
            monitor_device = "stdio";
     }
 
-#ifndef _WIN32
-    if (daemonize) {
-	pid_t pid;
-
-	if (pipe(fds) == -1)
-	    exit(1);
-
-	pid = fork();
-	if (pid > 0) {
-	    uint8_t status;
-	    ssize_t len;
-
-	    close(fds[1]);
-
-	again:
-            len = read(fds[0], &status, 1);
-            if (len == -1 && (errno == EINTR))
-                goto again;
-
-            if (len != 1)
-                exit(1);
-            else if (status == 1) {
-                fprintf(stderr, "Could not acquire pidfile\n");
-                exit(1);
-            } else
-                exit(0);
-	} else if (pid < 0)
-            exit(1);
-
-	setsid();
-
-	pid = fork();
-	if (pid > 0)
-	    exit(0);
-	else if (pid < 0)
-	    exit(1);
-
-	umask(027);
-
-        signal(SIGTSTP, SIG_IGN);
-        signal(SIGTTOU, SIG_IGN);
-        signal(SIGTTIN, SIG_IGN);
-    }
-
-    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
-        if (daemonize) {
-            uint8_t status = 1;
-            write(fds[1], &status, 1);
-        } else
-            fprintf(stderr, "Could not acquire pid file\n");
-        exit(1);
-    }
-#endif
-
 #ifdef CONFIG_KQEMU
     if (smp_cpus > 1)
         kqemu_allowed = 0;
@@ -4347,7 +2829,7 @@
     if (!boot_devices[0]) {
         boot_devices = "cad";
     }
-    setvbuf(stdout, NULL, _IOLBF, 0);
+    os_set_line_buffering();
 
     if (init_timer_alarm() < 0) {
         fprintf(stderr, "could not initialize alarm timer\n");
@@ -4460,10 +2942,8 @@
     //register_savevm("timer", 0, 2, timer_save, timer_load, NULL);
     register_savevm_live("ram", 0, 3, ram_save_live, NULL, ram_load, NULL);
 
-#ifndef _WIN32
     /* must be after terminal init, SDL library changes signal handlers */
-    sighandler_setup();
-#endif
+    os_setup_signal_handling();
 
     /* Maintain compatibility with multiple stdio monitors */
     if (!strcmp(monitor_device,"stdio")) {
@@ -4669,15 +3149,15 @@
     dcl = ds->listeners;
     while (dcl != NULL) {
         if (dcl->dpy_refresh != NULL) {
-            ds->gui_timer = qemu_new_timer(rt_clock, gui_update, ds);
-            qemu_mod_timer(ds->gui_timer, qemu_get_clock(rt_clock));
+            ds->gui_timer = qemu_new_timer_ms(rt_clock, gui_update, ds);
+            qemu_mod_timer(ds->gui_timer, qemu_get_clock_ms(rt_clock));
         }
         dcl = dcl->next;
     }
 
     if (display_type == DT_NOGRAPHIC || display_type == DT_VNC) {
-        nographic_timer = qemu_new_timer(rt_clock, nographic_update, NULL);
-        qemu_mod_timer(nographic_timer, qemu_get_clock(rt_clock));
+        nographic_timer = qemu_new_timer_ms(rt_clock, nographic_update, NULL);
+        qemu_mod_timer(nographic_timer, qemu_get_clock_ms(rt_clock));
     }
 
     text_consoles_set_display(display_state);
@@ -4727,70 +3207,7 @@
     if (autostart)
         vm_start();
 
-#ifndef _WIN32
-    if (daemonize) {
-	uint8_t status = 0;
-	ssize_t len;
-
-    again1:
-	len = write(fds[1], &status, 1);
-	if (len == -1 && (errno == EINTR))
-	    goto again1;
-
-	if (len != 1)
-	    exit(1);
-
-        if (chdir("/")) {
-            perror("not able to chdir to /");
-            exit(1);
-        }
-	TFR(fd = qemu_open("/dev/null", O_RDWR));
-	if (fd == -1)
-	    exit(1);
-    }
-
-    if (run_as) {
-        pwd = getpwnam(run_as);
-        if (!pwd) {
-            fprintf(stderr, "User \"%s\" doesn't exist\n", run_as);
-            exit(1);
-        }
-    }
-
-    if (chroot_dir) {
-        if (chroot(chroot_dir) < 0) {
-            fprintf(stderr, "chroot failed\n");
-            exit(1);
-        }
-        if (chdir("/")) {
-            perror("not able to chdir to /");
-            exit(1);
-        }
-    }
-
-    if (run_as) {
-        if (setgid(pwd->pw_gid) < 0) {
-            fprintf(stderr, "Failed to setgid(%d)\n", pwd->pw_gid);
-            exit(1);
-        }
-        if (setuid(pwd->pw_uid) < 0) {
-            fprintf(stderr, "Failed to setuid(%d)\n", pwd->pw_uid);
-            exit(1);
-        }
-        if (setuid(0) != -1) {
-            fprintf(stderr, "Dropping privileges failed\n");
-            exit(1);
-        }
-    }
-
-    if (daemonize) {
-        dup2(fd, 0);
-        dup2(fd, 1);
-        dup2(fd, 2);
-
-        close(fd);
-    }
-#endif
+    os_setup_post();
 
     main_loop();
     quit_timers();
diff --git a/vnc-android.c b/vnc-android.c
index af39dd2..9b259b2 100644
--- a/vnc-android.c
+++ b/vnc-android.c
@@ -722,7 +722,7 @@
 
         if (vs->output.offset && !vs->audio_cap && !vs->force_update) {
             /* kernel send buffers are full -> drop frames to throttle */
-            qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock) + VNC_REFRESH_INTERVAL);
+            qemu_mod_timer(vs->timer, qemu_get_clock_ms(rt_clock) + VNC_REFRESH_INTERVAL);
             return;
         }
 
@@ -763,7 +763,7 @@
         }
 
         if (!has_dirty && !vs->audio_cap && !vs->force_update) {
-            qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock) + VNC_REFRESH_INTERVAL);
+            qemu_mod_timer(vs->timer, qemu_get_clock_ms(rt_clock) + VNC_REFRESH_INTERVAL);
             return;
         }
 
@@ -811,7 +811,7 @@
     }
 
     if (vs->csock != -1) {
-        qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock) + VNC_REFRESH_INTERVAL);
+        qemu_mod_timer(vs->timer, qemu_get_clock_ms(rt_clock) + VNC_REFRESH_INTERVAL);
     } else {
         vnc_disconnect_finish(vs);
     }
@@ -2073,7 +2073,7 @@
 
     vs->vd = vd;
     vs->ds = vd->ds;
-    vs->timer = qemu_new_timer(rt_clock, vnc_update_client, vs);
+    vs->timer = qemu_new_timer_ms(rt_clock, vnc_update_client, vs);
     vs->last_x = -1;
     vs->last_y = -1;
 
diff --git a/vnc.c b/vnc.c
index dfcf34c..cc3ecaa 100644
--- a/vnc.c
+++ b/vnc.c
@@ -716,7 +716,7 @@
 
         if (vs->output.offset && !vs->audio_cap && !vs->force_update) {
             /* kernel send buffers are full -> drop frames to throttle */
-            qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock) + VNC_REFRESH_INTERVAL);
+            qemu_mod_timer(vs->timer, qemu_get_clock_ms(rt_clock) + VNC_REFRESH_INTERVAL);
             return;
         }
 
@@ -757,7 +757,7 @@
         }
 
         if (!has_dirty && !vs->audio_cap && !vs->force_update) {
-            qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock) + VNC_REFRESH_INTERVAL);
+            qemu_mod_timer(vs->timer, qemu_get_clock_ms(rt_clock) + VNC_REFRESH_INTERVAL);
             return;
         }
 
@@ -805,7 +805,7 @@
     }
 
     if (vs->csock != -1) {
-        qemu_mod_timer(vs->timer, qemu_get_clock(rt_clock) + VNC_REFRESH_INTERVAL);
+        qemu_mod_timer(vs->timer, qemu_get_clock_ms(rt_clock) + VNC_REFRESH_INTERVAL);
     } else {
         vnc_disconnect_finish(vs);
     }
@@ -1585,7 +1585,7 @@
 static void set_pixel_conversion(VncState *vs)
 {
     if ((vs->clientds.flags & QEMU_BIG_ENDIAN_FLAG) ==
-        (vs->ds->surface->flags & QEMU_BIG_ENDIAN_FLAG) && 
+        (vs->ds->surface->flags & QEMU_BIG_ENDIAN_FLAG) &&
         !memcmp(&(vs->clientds.pf), &(vs->ds->surface->pf), sizeof(PixelFormat))) {
         vs->write_pixels = vnc_write_pixels_copy;
         switch (vs->ds->surface->pf.bits_per_pixel) {
@@ -1693,7 +1693,7 @@
         vnc_write_u8(vs, 0);  /* msg id */
         vnc_write_u8(vs, 0);
         vnc_write_u16(vs, 1); /* number of rects */
-        vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds), 
+        vnc_framebuffer_update(vs, 0, 0, ds_get_width(vs->ds),
                                ds_get_height(vs->ds), VNC_ENCODING_WMVi);
         pixel_format_message(vs);
         vnc_flush(vs);
@@ -2067,7 +2067,7 @@
 
     vs->vd = vd;
     vs->ds = vd->ds;
-    vs->timer = qemu_new_timer(rt_clock, vnc_update_client, vs);
+    vs->timer = qemu_new_timer_ms(rt_clock, vnc_update_client, vs);
     vs->last_x = -1;
     vs->last_y = -1;
 
@@ -2175,7 +2175,7 @@
 char *vnc_display_local_addr(DisplayState *ds)
 {
     VncDisplay *vs = ds ? (VncDisplay *)ds->opaque : vnc_display;
-    
+
     return vnc_socket_local_addr("%s:%s", vs->lsock);
 }